LLVM 19.0.0git
DAGCombiner.cpp
Go to the documentation of this file.
1//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10// both before and after the DAG is legalized.
11//
12// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13// primarily intended to handle simplification opportunities that are implicit
14// in the LLVM IR and exposed by the various codegen lowering phases.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/APFloat.h"
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/DenseMap.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/SetVector.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
51#include "llvm/IR/Attributes.h"
52#include "llvm/IR/Constant.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/Metadata.h"
61#include "llvm/Support/Debug.h"
69#include <algorithm>
70#include <cassert>
71#include <cstdint>
72#include <functional>
73#include <iterator>
74#include <optional>
75#include <string>
76#include <tuple>
77#include <utility>
78#include <variant>
79
80#include "MatchContext.h"
81
82using namespace llvm;
83using namespace llvm::SDPatternMatch;
84
85#define DEBUG_TYPE "dagcombine"
86
87STATISTIC(NodesCombined , "Number of dag nodes combined");
88STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
89STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
90STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
91STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
92STATISTIC(SlicedLoads, "Number of load sliced");
93STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
94
95DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
96 "Controls whether a DAG combine is performed for a node");
97
98static cl::opt<bool>
99CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
100 cl::desc("Enable DAG combiner's use of IR alias analysis"));
101
102static cl::opt<bool>
103UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
104 cl::desc("Enable DAG combiner's use of TBAA"));
105
106#ifndef NDEBUG
108CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
109 cl::desc("Only use DAG-combiner alias analysis in this"
110 " function"));
111#endif
112
113/// Hidden option to stress test load slicing, i.e., when this option
114/// is enabled, load slicing bypasses most of its profitability guards.
115static cl::opt<bool>
116StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
117 cl::desc("Bypass the profitability model of load slicing"),
118 cl::init(false));
119
120static cl::opt<bool>
121 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
122 cl::desc("DAG combiner may split indexing from loads"));
123
124static cl::opt<bool>
125 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
126 cl::desc("DAG combiner enable merging multiple stores "
127 "into a wider store"));
128
130 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
131 cl::desc("Limit the number of operands to inline for Token Factors"));
132
134 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
135 cl::desc("Limit the number of times for the same StoreNode and RootNode "
136 "to bail out in store merging dependence check"));
137
139 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
140 cl::desc("DAG combiner enable reducing the width of load/op/store "
141 "sequence"));
142
144 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
145 cl::desc("DAG combiner enable load/<replace bytes>/store with "
146 "a narrower store"));
147
149 "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
150 cl::desc(
151 "Enable merging extends and rounds into FCOPYSIGN on vector types"));
152
153namespace {
154
155 class DAGCombiner {
156 SelectionDAG &DAG;
157 const TargetLowering &TLI;
158 const SelectionDAGTargetInfo *STI;
160 CodeGenOptLevel OptLevel;
161 bool LegalDAG = false;
162 bool LegalOperations = false;
163 bool LegalTypes = false;
164 bool ForCodeSize;
165 bool DisableGenericCombines;
166
167 /// Worklist of all of the nodes that need to be simplified.
168 ///
169 /// This must behave as a stack -- new nodes to process are pushed onto the
170 /// back and when processing we pop off of the back.
171 ///
172 /// The worklist will not contain duplicates but may contain null entries
173 /// due to nodes being deleted from the underlying DAG.
175
176 /// Mapping from an SDNode to its position on the worklist.
177 ///
178 /// This is used to find and remove nodes from the worklist (by nulling
179 /// them) when they are deleted from the underlying DAG. It relies on
180 /// stable indices of nodes within the worklist.
182
183 /// This records all nodes attempted to be added to the worklist since we
184 /// considered a new worklist entry. As we keep do not add duplicate nodes
185 /// in the worklist, this is different from the tail of the worklist.
187
188 /// Set of nodes which have been combined (at least once).
189 ///
190 /// This is used to allow us to reliably add any operands of a DAG node
191 /// which have not yet been combined to the worklist.
192 SmallPtrSet<SDNode *, 32> CombinedNodes;
193
194 /// Map from candidate StoreNode to the pair of RootNode and count.
195 /// The count is used to track how many times we have seen the StoreNode
196 /// with the same RootNode bail out in dependence check. If we have seen
197 /// the bail out for the same pair many times over a limit, we won't
198 /// consider the StoreNode with the same RootNode as store merging
199 /// candidate again.
201
202 // AA - Used for DAG load/store alias analysis.
203 AliasAnalysis *AA;
204
205 /// When an instruction is simplified, add all users of the instruction to
206 /// the work lists because they might get more simplified now.
207 void AddUsersToWorklist(SDNode *N) {
208 for (SDNode *Node : N->uses())
209 AddToWorklist(Node);
210 }
211
212 /// Convenient shorthand to add a node and all of its user to the worklist.
213 void AddToWorklistWithUsers(SDNode *N) {
214 AddUsersToWorklist(N);
215 AddToWorklist(N);
216 }
217
218 // Prune potentially dangling nodes. This is called after
219 // any visit to a node, but should also be called during a visit after any
220 // failed combine which may have created a DAG node.
221 void clearAddedDanglingWorklistEntries() {
222 // Check any nodes added to the worklist to see if they are prunable.
223 while (!PruningList.empty()) {
224 auto *N = PruningList.pop_back_val();
225 if (N->use_empty())
226 recursivelyDeleteUnusedNodes(N);
227 }
228 }
229
230 SDNode *getNextWorklistEntry() {
231 // Before we do any work, remove nodes that are not in use.
232 clearAddedDanglingWorklistEntries();
233 SDNode *N = nullptr;
234 // The Worklist holds the SDNodes in order, but it may contain null
235 // entries.
236 while (!N && !Worklist.empty()) {
237 N = Worklist.pop_back_val();
238 }
239
240 if (N) {
241 bool GoodWorklistEntry = WorklistMap.erase(N);
242 (void)GoodWorklistEntry;
243 assert(GoodWorklistEntry &&
244 "Found a worklist entry without a corresponding map entry!");
245 }
246 return N;
247 }
248
249 /// Call the node-specific routine that folds each particular type of node.
250 SDValue visit(SDNode *N);
251
252 public:
253 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
254 : DAG(D), TLI(D.getTargetLoweringInfo()),
255 STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
256 ForCodeSize = DAG.shouldOptForSize();
257 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
258
259 MaximumLegalStoreInBits = 0;
260 // We use the minimum store size here, since that's all we can guarantee
261 // for the scalable vector types.
262 for (MVT VT : MVT::all_valuetypes())
263 if (EVT(VT).isSimple() && VT != MVT::Other &&
264 TLI.isTypeLegal(EVT(VT)) &&
265 VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
266 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
267 }
268
269 void ConsiderForPruning(SDNode *N) {
270 // Mark this for potential pruning.
271 PruningList.insert(N);
272 }
273
274 /// Add to the worklist making sure its instance is at the back (next to be
275 /// processed.)
276 void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
277 assert(N->getOpcode() != ISD::DELETED_NODE &&
278 "Deleted Node added to Worklist");
279
280 // Skip handle nodes as they can't usefully be combined and confuse the
281 // zero-use deletion strategy.
282 if (N->getOpcode() == ISD::HANDLENODE)
283 return;
284
285 if (IsCandidateForPruning)
286 ConsiderForPruning(N);
287
288 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
289 Worklist.push_back(N);
290 }
291
292 /// Remove all instances of N from the worklist.
293 void removeFromWorklist(SDNode *N) {
294 CombinedNodes.erase(N);
295 PruningList.remove(N);
296 StoreRootCountMap.erase(N);
297
298 auto It = WorklistMap.find(N);
299 if (It == WorklistMap.end())
300 return; // Not in the worklist.
301
302 // Null out the entry rather than erasing it to avoid a linear operation.
303 Worklist[It->second] = nullptr;
304 WorklistMap.erase(It);
305 }
306
307 void deleteAndRecombine(SDNode *N);
308 bool recursivelyDeleteUnusedNodes(SDNode *N);
309
310 /// Replaces all uses of the results of one DAG node with new values.
311 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
312 bool AddTo = true);
313
314 /// Replaces all uses of the results of one DAG node with new values.
315 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
316 return CombineTo(N, &Res, 1, AddTo);
317 }
318
319 /// Replaces all uses of the results of one DAG node with new values.
320 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
321 bool AddTo = true) {
322 SDValue To[] = { Res0, Res1 };
323 return CombineTo(N, To, 2, AddTo);
324 }
325
326 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
327
328 private:
329 unsigned MaximumLegalStoreInBits;
330
331 /// Check the specified integer node value to see if it can be simplified or
332 /// if things it uses can be simplified by bit propagation.
333 /// If so, return true.
334 bool SimplifyDemandedBits(SDValue Op) {
335 unsigned BitWidth = Op.getScalarValueSizeInBits();
337 return SimplifyDemandedBits(Op, DemandedBits);
338 }
339
340 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
341 EVT VT = Op.getValueType();
342 APInt DemandedElts = VT.isFixedLengthVector()
344 : APInt(1, 1);
345 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, false);
346 }
347
348 /// Check the specified vector node value to see if it can be simplified or
349 /// if things it uses can be simplified as it only uses some of the
350 /// elements. If so, return true.
351 bool SimplifyDemandedVectorElts(SDValue Op) {
352 // TODO: For now just pretend it cannot be simplified.
353 if (Op.getValueType().isScalableVector())
354 return false;
355
356 unsigned NumElts = Op.getValueType().getVectorNumElements();
357 APInt DemandedElts = APInt::getAllOnes(NumElts);
358 return SimplifyDemandedVectorElts(Op, DemandedElts);
359 }
360
361 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
362 const APInt &DemandedElts,
363 bool AssumeSingleUse = false);
364 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
365 bool AssumeSingleUse = false);
366
367 bool CombineToPreIndexedLoadStore(SDNode *N);
368 bool CombineToPostIndexedLoadStore(SDNode *N);
369 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
370 bool SliceUpLoad(SDNode *N);
371
372 // Looks up the chain to find a unique (unaliased) store feeding the passed
373 // load. If no such store is found, returns a nullptr.
374 // Note: This will look past a CALLSEQ_START if the load is chained to it so
375 // so that it can find stack stores for byval params.
376 StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
377 // Scalars have size 0 to distinguish from singleton vectors.
378 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
379 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
380 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
381
382 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
383 /// load.
384 ///
385 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
386 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
387 /// \param EltNo index of the vector element to load.
388 /// \param OriginalLoad load that EVE came from to be replaced.
389 /// \returns EVE on success SDValue() on failure.
390 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
391 SDValue EltNo,
392 LoadSDNode *OriginalLoad);
393 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
394 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
395 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
396 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
397 SDValue PromoteIntBinOp(SDValue Op);
398 SDValue PromoteIntShiftOp(SDValue Op);
399 SDValue PromoteExtend(SDValue Op);
400 bool PromoteLoad(SDValue Op);
401
402 SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
403 SDValue RHS, SDValue True, SDValue False,
405
406 /// Call the node-specific routine that knows how to fold each
407 /// particular type of node. If that doesn't do anything, try the
408 /// target-specific DAG combines.
409 SDValue combine(SDNode *N);
410
411 // Visitation implementation - Implement dag node combining for different
412 // node types. The semantics are as follows:
413 // Return Value:
414 // SDValue.getNode() == 0 - No change was made
415 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
416 // otherwise - N should be replaced by the returned Operand.
417 //
418 SDValue visitTokenFactor(SDNode *N);
419 SDValue visitMERGE_VALUES(SDNode *N);
420 SDValue visitADD(SDNode *N);
421 SDValue visitADDLike(SDNode *N);
422 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
423 SDValue visitSUB(SDNode *N);
424 SDValue visitADDSAT(SDNode *N);
425 SDValue visitSUBSAT(SDNode *N);
426 SDValue visitADDC(SDNode *N);
427 SDValue visitADDO(SDNode *N);
428 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
429 SDValue visitSUBC(SDNode *N);
430 SDValue visitSUBO(SDNode *N);
431 SDValue visitADDE(SDNode *N);
432 SDValue visitUADDO_CARRY(SDNode *N);
433 SDValue visitSADDO_CARRY(SDNode *N);
434 SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
435 SDNode *N);
436 SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
437 SDNode *N);
438 SDValue visitSUBE(SDNode *N);
439 SDValue visitUSUBO_CARRY(SDNode *N);
440 SDValue visitSSUBO_CARRY(SDNode *N);
441 SDValue visitMUL(SDNode *N);
442 SDValue visitMULFIX(SDNode *N);
443 SDValue useDivRem(SDNode *N);
444 SDValue visitSDIV(SDNode *N);
445 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
446 SDValue visitUDIV(SDNode *N);
447 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
448 SDValue visitREM(SDNode *N);
449 SDValue visitMULHU(SDNode *N);
450 SDValue visitMULHS(SDNode *N);
451 SDValue visitAVG(SDNode *N);
452 SDValue visitABD(SDNode *N);
453 SDValue visitSMUL_LOHI(SDNode *N);
454 SDValue visitUMUL_LOHI(SDNode *N);
455 SDValue visitMULO(SDNode *N);
456 SDValue visitIMINMAX(SDNode *N);
457 SDValue visitAND(SDNode *N);
458 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
459 SDValue visitOR(SDNode *N);
460 SDValue visitORLike(SDValue N0, SDValue N1, const SDLoc &DL);
461 SDValue visitXOR(SDNode *N);
462 SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
463 SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
464 SDValue visitSHL(SDNode *N);
465 SDValue visitSRA(SDNode *N);
466 SDValue visitSRL(SDNode *N);
467 SDValue visitFunnelShift(SDNode *N);
468 SDValue visitSHLSAT(SDNode *N);
469 SDValue visitRotate(SDNode *N);
470 SDValue visitABS(SDNode *N);
471 SDValue visitBSWAP(SDNode *N);
472 SDValue visitBITREVERSE(SDNode *N);
473 SDValue visitCTLZ(SDNode *N);
474 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
475 SDValue visitCTTZ(SDNode *N);
476 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
477 SDValue visitCTPOP(SDNode *N);
478 SDValue visitSELECT(SDNode *N);
479 SDValue visitVSELECT(SDNode *N);
480 SDValue visitVP_SELECT(SDNode *N);
481 SDValue visitSELECT_CC(SDNode *N);
482 SDValue visitSETCC(SDNode *N);
483 SDValue visitSETCCCARRY(SDNode *N);
484 SDValue visitSIGN_EXTEND(SDNode *N);
485 SDValue visitZERO_EXTEND(SDNode *N);
486 SDValue visitANY_EXTEND(SDNode *N);
487 SDValue visitAssertExt(SDNode *N);
488 SDValue visitAssertAlign(SDNode *N);
489 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
490 SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
491 SDValue visitTRUNCATE(SDNode *N);
492 SDValue visitBITCAST(SDNode *N);
493 SDValue visitFREEZE(SDNode *N);
494 SDValue visitBUILD_PAIR(SDNode *N);
495 SDValue visitFADD(SDNode *N);
496 SDValue visitVP_FADD(SDNode *N);
497 SDValue visitVP_FSUB(SDNode *N);
498 SDValue visitSTRICT_FADD(SDNode *N);
499 SDValue visitFSUB(SDNode *N);
500 SDValue visitFMUL(SDNode *N);
501 template <class MatchContextClass> SDValue visitFMA(SDNode *N);
502 SDValue visitFMAD(SDNode *N);
503 SDValue visitFDIV(SDNode *N);
504 SDValue visitFREM(SDNode *N);
505 SDValue visitFSQRT(SDNode *N);
506 SDValue visitFCOPYSIGN(SDNode *N);
507 SDValue visitFPOW(SDNode *N);
508 SDValue visitSINT_TO_FP(SDNode *N);
509 SDValue visitUINT_TO_FP(SDNode *N);
510 SDValue visitFP_TO_SINT(SDNode *N);
511 SDValue visitFP_TO_UINT(SDNode *N);
512 SDValue visitXRINT(SDNode *N);
513 SDValue visitFP_ROUND(SDNode *N);
514 SDValue visitFP_EXTEND(SDNode *N);
515 SDValue visitFNEG(SDNode *N);
516 SDValue visitFABS(SDNode *N);
517 SDValue visitFCEIL(SDNode *N);
518 SDValue visitFTRUNC(SDNode *N);
519 SDValue visitFFREXP(SDNode *N);
520 SDValue visitFFLOOR(SDNode *N);
521 SDValue visitFMinMax(SDNode *N);
522 SDValue visitBRCOND(SDNode *N);
523 SDValue visitBR_CC(SDNode *N);
524 SDValue visitLOAD(SDNode *N);
525
526 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
527 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
528 SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
529
530 bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
531
532 SDValue visitSTORE(SDNode *N);
533 SDValue visitATOMIC_STORE(SDNode *N);
534 SDValue visitLIFETIME_END(SDNode *N);
535 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
536 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
537 SDValue visitBUILD_VECTOR(SDNode *N);
538 SDValue visitCONCAT_VECTORS(SDNode *N);
539 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
540 SDValue visitVECTOR_SHUFFLE(SDNode *N);
541 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
542 SDValue visitINSERT_SUBVECTOR(SDNode *N);
543 SDValue visitMLOAD(SDNode *N);
544 SDValue visitMSTORE(SDNode *N);
545 SDValue visitMGATHER(SDNode *N);
546 SDValue visitMSCATTER(SDNode *N);
547 SDValue visitVPGATHER(SDNode *N);
548 SDValue visitVPSCATTER(SDNode *N);
549 SDValue visitVP_STRIDED_LOAD(SDNode *N);
550 SDValue visitVP_STRIDED_STORE(SDNode *N);
551 SDValue visitFP_TO_FP16(SDNode *N);
552 SDValue visitFP16_TO_FP(SDNode *N);
553 SDValue visitFP_TO_BF16(SDNode *N);
554 SDValue visitBF16_TO_FP(SDNode *N);
555 SDValue visitVECREDUCE(SDNode *N);
556 SDValue visitVPOp(SDNode *N);
557 SDValue visitGET_FPENV_MEM(SDNode *N);
558 SDValue visitSET_FPENV_MEM(SDNode *N);
559
560 template <class MatchContextClass>
561 SDValue visitFADDForFMACombine(SDNode *N);
562 template <class MatchContextClass>
563 SDValue visitFSUBForFMACombine(SDNode *N);
564 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
565
566 SDValue XformToShuffleWithZero(SDNode *N);
567 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
568 const SDLoc &DL,
569 SDNode *N,
570 SDValue N0,
571 SDValue N1);
572 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
573 SDValue N1, SDNodeFlags Flags);
574 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
575 SDValue N1, SDNodeFlags Flags);
576 SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
577 EVT VT, SDValue N0, SDValue N1,
578 SDNodeFlags Flags = SDNodeFlags());
579
580 SDValue visitShiftByConstant(SDNode *N);
581
582 SDValue foldSelectOfConstants(SDNode *N);
583 SDValue foldVSelectOfConstants(SDNode *N);
584 SDValue foldBinOpIntoSelect(SDNode *BO);
585 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
586 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
587 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
588 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
590 bool NotExtCompare = false);
591 SDValue convertSelectOfFPConstantsToLoadOffset(
592 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
594 SDValue foldSignChangeInBitcast(SDNode *N);
595 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
597 SDValue foldSelectOfBinops(SDNode *N);
598 SDValue foldSextSetcc(SDNode *N);
599 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
600 const SDLoc &DL);
601 SDValue foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL);
602 SDValue foldABSToABD(SDNode *N, const SDLoc &DL);
603 SDValue unfoldMaskedMerge(SDNode *N);
604 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
605 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
606 const SDLoc &DL, bool foldBooleans);
607 SDValue rebuildSetCC(SDValue N);
608
609 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
610 SDValue &CC, bool MatchStrict = false) const;
611 bool isOneUseSetCC(SDValue N) const;
612
613 SDValue foldAddToAvg(SDNode *N, const SDLoc &DL);
614 SDValue foldSubToAvg(SDNode *N, const SDLoc &DL);
615
616 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
617 unsigned HiOp);
618 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
619 SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
620 const TargetLowering &TLI);
621
622 SDValue CombineExtLoad(SDNode *N);
623 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
624 SDValue combineRepeatedFPDivisors(SDNode *N);
625 SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
626 SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
627 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
628 SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
629 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
630 SDValue BuildSDIV(SDNode *N);
631 SDValue BuildSDIVPow2(SDNode *N);
632 SDValue BuildUDIV(SDNode *N);
633 SDValue BuildSREMPow2(SDNode *N);
634 SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
635 SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
636 bool KnownNeverZero = false,
637 bool InexpensiveOnly = false,
638 std::optional<EVT> OutVT = std::nullopt);
639 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
640 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
641 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
642 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
643 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
644 SDNodeFlags Flags, bool Reciprocal);
645 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
646 SDNodeFlags Flags, bool Reciprocal);
647 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
648 bool DemandHighBits = true);
649 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
650 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
651 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
652 unsigned PosOpcode, unsigned NegOpcode,
653 const SDLoc &DL);
654 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
655 SDValue InnerPos, SDValue InnerNeg, bool HasPos,
656 unsigned PosOpcode, unsigned NegOpcode,
657 const SDLoc &DL);
658 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
659 SDValue MatchLoadCombine(SDNode *N);
660 SDValue mergeTruncStores(StoreSDNode *N);
661 SDValue reduceLoadWidth(SDNode *N);
662 SDValue ReduceLoadOpStoreWidth(SDNode *N);
664 SDValue TransformFPLoadStorePair(SDNode *N);
665 SDValue convertBuildVecZextToZext(SDNode *N);
666 SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
667 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
668 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
669 SDValue reduceBuildVecToShuffle(SDNode *N);
670 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
671 ArrayRef<int> VectorMask, SDValue VecIn1,
672 SDValue VecIn2, unsigned LeftIdx,
673 bool DidSplitVec);
674 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
675
676 /// Walk up chain skipping non-aliasing memory nodes,
677 /// looking for aliasing nodes and adding them to the Aliases vector.
678 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
679 SmallVectorImpl<SDValue> &Aliases);
680
681 /// Return true if there is any possibility that the two addresses overlap.
682 bool mayAlias(SDNode *Op0, SDNode *Op1) const;
683
684 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
685 /// chain (aliasing node.)
686 SDValue FindBetterChain(SDNode *N, SDValue Chain);
687
688 /// Try to replace a store and any possibly adjacent stores on
689 /// consecutive chains with better chains. Return true only if St is
690 /// replaced.
691 ///
692 /// Notice that other chains may still be replaced even if the function
693 /// returns false.
694 bool findBetterNeighborChains(StoreSDNode *St);
695
696 // Helper for findBetterNeighborChains. Walk up store chain add additional
697 // chained stores that do not overlap and can be parallelized.
698 bool parallelizeChainedStores(StoreSDNode *St);
699
700 /// Holds a pointer to an LSBaseSDNode as well as information on where it
701 /// is located in a sequence of memory operations connected by a chain.
702 struct MemOpLink {
703 // Ptr to the mem node.
704 LSBaseSDNode *MemNode;
705
706 // Offset from the base ptr.
707 int64_t OffsetFromBase;
708
709 MemOpLink(LSBaseSDNode *N, int64_t Offset)
710 : MemNode(N), OffsetFromBase(Offset) {}
711 };
712
713 // Classify the origin of a stored value.
714 enum class StoreSource { Unknown, Constant, Extract, Load };
715 StoreSource getStoreSource(SDValue StoreVal) {
716 switch (StoreVal.getOpcode()) {
717 case ISD::Constant:
718 case ISD::ConstantFP:
719 return StoreSource::Constant;
723 return StoreSource::Constant;
724 return StoreSource::Unknown;
727 return StoreSource::Extract;
728 case ISD::LOAD:
729 return StoreSource::Load;
730 default:
731 return StoreSource::Unknown;
732 }
733 }
734
735 /// This is a helper function for visitMUL to check the profitability
736 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
737 /// MulNode is the original multiply, AddNode is (add x, c1),
738 /// and ConstNode is c2.
739 bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
740 SDValue ConstNode);
741
742 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
743 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
744 /// the type of the loaded value to be extended.
745 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
746 EVT LoadResultTy, EVT &ExtVT);
747
748 /// Helper function to calculate whether the given Load/Store can have its
749 /// width reduced to ExtVT.
750 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
751 EVT &MemVT, unsigned ShAmt = 0);
752
753 /// Used by BackwardsPropagateMask to find suitable loads.
754 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
755 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
756 ConstantSDNode *Mask, SDNode *&NodeToMask);
757 /// Attempt to propagate a given AND node back to load leaves so that they
758 /// can be combined into narrow loads.
759 bool BackwardsPropagateMask(SDNode *N);
760
761 /// Helper function for mergeConsecutiveStores which merges the component
762 /// store chains.
763 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
764 unsigned NumStores);
765
766 /// Helper function for mergeConsecutiveStores which checks if all the store
767 /// nodes have the same underlying object. We can still reuse the first
768 /// store's pointer info if all the stores are from the same object.
769 bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
770
771 /// This is a helper function for mergeConsecutiveStores. When the source
772 /// elements of the consecutive stores are all constants or all extracted
773 /// vector elements, try to merge them into one larger store introducing
774 /// bitcasts if necessary. \return True if a merged store was created.
775 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
776 EVT MemVT, unsigned NumStores,
777 bool IsConstantSrc, bool UseVector,
778 bool UseTrunc);
779
780 /// This is a helper function for mergeConsecutiveStores. Stores that
781 /// potentially may be merged with St are placed in StoreNodes. RootNode is
782 /// a chain predecessor to all store candidates.
783 void getStoreMergeCandidates(StoreSDNode *St,
784 SmallVectorImpl<MemOpLink> &StoreNodes,
785 SDNode *&Root);
786
787 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
788 /// have indirect dependency through their operands. RootNode is the
789 /// predecessor to all stores calculated by getStoreMergeCandidates and is
790 /// used to prune the dependency check. \return True if safe to merge.
791 bool checkMergeStoreCandidatesForDependencies(
792 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
793 SDNode *RootNode);
794
795 /// This is a helper function for mergeConsecutiveStores. Given a list of
796 /// store candidates, find the first N that are consecutive in memory.
797 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
798 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
799 int64_t ElementSizeBytes) const;
800
801 /// This is a helper function for mergeConsecutiveStores. It is used for
802 /// store chains that are composed entirely of constant values.
803 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
804 unsigned NumConsecutiveStores,
805 EVT MemVT, SDNode *Root, bool AllowVectors);
806
807 /// This is a helper function for mergeConsecutiveStores. It is used for
808 /// store chains that are composed entirely of extracted vector elements.
809 /// When extracting multiple vector elements, try to store them in one
810 /// vector store rather than a sequence of scalar stores.
811 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
812 unsigned NumConsecutiveStores, EVT MemVT,
813 SDNode *Root);
814
815 /// This is a helper function for mergeConsecutiveStores. It is used for
816 /// store chains that are composed entirely of loaded values.
817 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
818 unsigned NumConsecutiveStores, EVT MemVT,
819 SDNode *Root, bool AllowVectors,
820 bool IsNonTemporalStore, bool IsNonTemporalLoad);
821
822 /// Merge consecutive store operations into a wide store.
823 /// This optimization uses wide integers or vectors when possible.
824 /// \return true if stores were merged.
825 bool mergeConsecutiveStores(StoreSDNode *St);
826
827 /// Try to transform a truncation where C is a constant:
828 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
829 ///
830 /// \p N needs to be a truncation and its first operand an AND. Other
831 /// requirements are checked by the function (e.g. that trunc is
832 /// single-use) and if missed an empty SDValue is returned.
833 SDValue distributeTruncateThroughAnd(SDNode *N);
834
835 /// Helper function to determine whether the target supports operation
836 /// given by \p Opcode for type \p VT, that is, whether the operation
837 /// is legal or custom before legalizing operations, and whether is
838 /// legal (but not custom) after legalization.
839 bool hasOperation(unsigned Opcode, EVT VT) {
840 return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
841 }
842
843 public:
844 /// Runs the dag combiner on all nodes in the work list
845 void Run(CombineLevel AtLevel);
846
847 SelectionDAG &getDAG() const { return DAG; }
848
849 /// Returns a type large enough to hold any valid shift amount - before type
850 /// legalization these can be huge.
851 EVT getShiftAmountTy(EVT LHSTy) {
852 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
853 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
854 }
855
856 /// This method returns true if we are running before type legalization or
857 /// if the specified VT is legal.
858 bool isTypeLegal(const EVT &VT) {
859 if (!LegalTypes) return true;
860 return TLI.isTypeLegal(VT);
861 }
862
863 /// Convenience wrapper around TargetLowering::getSetCCResultType
864 EVT getSetCCResultType(EVT VT) const {
865 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
866 }
867
868 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
869 SDValue OrigLoad, SDValue ExtLoad,
870 ISD::NodeType ExtType);
871 };
872
873/// This class is a DAGUpdateListener that removes any deleted
874/// nodes from the worklist.
875class WorklistRemover : public SelectionDAG::DAGUpdateListener {
876 DAGCombiner &DC;
877
878public:
879 explicit WorklistRemover(DAGCombiner &dc)
880 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
881
882 void NodeDeleted(SDNode *N, SDNode *E) override {
883 DC.removeFromWorklist(N);
884 }
885};
886
887class WorklistInserter : public SelectionDAG::DAGUpdateListener {
888 DAGCombiner &DC;
889
890public:
891 explicit WorklistInserter(DAGCombiner &dc)
892 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
893
894 // FIXME: Ideally we could add N to the worklist, but this causes exponential
895 // compile time costs in large DAGs, e.g. Halide.
896 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
897};
898
899} // end anonymous namespace
900
901//===----------------------------------------------------------------------===//
902// TargetLowering::DAGCombinerInfo implementation
903//===----------------------------------------------------------------------===//
904
906 ((DAGCombiner*)DC)->AddToWorklist(N);
907}
908
910CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
911 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
912}
913
915CombineTo(SDNode *N, SDValue Res, bool AddTo) {
916 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
917}
918
920CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
921 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
922}
923
926 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
927}
928
931 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
932}
933
934//===----------------------------------------------------------------------===//
935// Helper Functions
936//===----------------------------------------------------------------------===//
937
938void DAGCombiner::deleteAndRecombine(SDNode *N) {
939 removeFromWorklist(N);
940
941 // If the operands of this node are only used by the node, they will now be
942 // dead. Make sure to re-visit them and recursively delete dead nodes.
943 for (const SDValue &Op : N->ops())
944 // For an operand generating multiple values, one of the values may
945 // become dead allowing further simplification (e.g. split index
946 // arithmetic from an indexed load).
947 if (Op->hasOneUse() || Op->getNumValues() > 1)
948 AddToWorklist(Op.getNode());
949
950 DAG.DeleteNode(N);
951}
952
953// APInts must be the same size for most operations, this helper
954// function zero extends the shorter of the pair so that they match.
955// We provide an Offset so that we can create bitwidths that won't overflow.
956static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
957 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
958 LHS = LHS.zext(Bits);
959 RHS = RHS.zext(Bits);
960}
961
962// Return true if this node is a setcc, or is a select_cc
963// that selects between the target values used for true and false, making it
964// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
965// the appropriate nodes based on the type of node we are checking. This
966// simplifies life a bit for the callers.
967bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
968 SDValue &CC, bool MatchStrict) const {
969 if (N.getOpcode() == ISD::SETCC) {
970 LHS = N.getOperand(0);
971 RHS = N.getOperand(1);
972 CC = N.getOperand(2);
973 return true;
974 }
975
976 if (MatchStrict &&
977 (N.getOpcode() == ISD::STRICT_FSETCC ||
978 N.getOpcode() == ISD::STRICT_FSETCCS)) {
979 LHS = N.getOperand(1);
980 RHS = N.getOperand(2);
981 CC = N.getOperand(3);
982 return true;
983 }
984
985 if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
986 !TLI.isConstFalseVal(N.getOperand(3)))
987 return false;
988
989 if (TLI.getBooleanContents(N.getValueType()) ==
991 return false;
992
993 LHS = N.getOperand(0);
994 RHS = N.getOperand(1);
995 CC = N.getOperand(4);
996 return true;
997}
998
999/// Return true if this is a SetCC-equivalent operation with only one use.
1000/// If this is true, it allows the users to invert the operation for free when
1001/// it is profitable to do so.
1002bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1003 SDValue N0, N1, N2;
1004 if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1005 return true;
1006 return false;
1007}
1008
1010 if (!ScalarTy.isSimple())
1011 return false;
1012
1013 uint64_t MaskForTy = 0ULL;
1014 switch (ScalarTy.getSimpleVT().SimpleTy) {
1015 case MVT::i8:
1016 MaskForTy = 0xFFULL;
1017 break;
1018 case MVT::i16:
1019 MaskForTy = 0xFFFFULL;
1020 break;
1021 case MVT::i32:
1022 MaskForTy = 0xFFFFFFFFULL;
1023 break;
1024 default:
1025 return false;
1026 break;
1027 }
1028
1029 APInt Val;
1030 if (ISD::isConstantSplatVector(N, Val))
1031 return Val.getLimitedValue() == MaskForTy;
1032
1033 return false;
1034}
1035
1036// Determines if it is a constant integer or a splat/build vector of constant
1037// integers (and undefs).
1038// Do not permit build vector implicit truncation.
1039static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1040 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1041 return !(Const->isOpaque() && NoOpaques);
1042 if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1043 return false;
1044 unsigned BitWidth = N.getScalarValueSizeInBits();
1045 for (const SDValue &Op : N->op_values()) {
1046 if (Op.isUndef())
1047 continue;
1048 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1049 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1050 (Const->isOpaque() && NoOpaques))
1051 return false;
1052 }
1053 return true;
1054}
1055
1056// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1057// undef's.
1058static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1059 if (V.getOpcode() != ISD::BUILD_VECTOR)
1060 return false;
1061 return isConstantOrConstantVector(V, NoOpaques) ||
1063}
1064
1065// Determine if this an indexed load with an opaque target constant index.
1066static bool canSplitIdx(LoadSDNode *LD) {
1067 return MaySplitLoadIndex &&
1068 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1069 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1070}
1071
1072bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1073 const SDLoc &DL,
1074 SDNode *N,
1075 SDValue N0,
1076 SDValue N1) {
1077 // Currently this only tries to ensure we don't undo the GEP splits done by
1078 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1079 // we check if the following transformation would be problematic:
1080 // (load/store (add, (add, x, offset1), offset2)) ->
1081 // (load/store (add, x, offset1+offset2)).
1082
1083 // (load/store (add, (add, x, y), offset2)) ->
1084 // (load/store (add, (add, x, offset2), y)).
1085
1086 if (N0.getOpcode() != ISD::ADD)
1087 return false;
1088
1089 // Check for vscale addressing modes.
1090 // (load/store (add/sub (add x, y), vscale))
1091 // (load/store (add/sub (add x, y), (lsl vscale, C)))
1092 // (load/store (add/sub (add x, y), (mul vscale, C)))
1093 if ((N1.getOpcode() == ISD::VSCALE ||
1094 ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::MUL) &&
1095 N1.getOperand(0).getOpcode() == ISD::VSCALE &&
1096 isa<ConstantSDNode>(N1.getOperand(1)))) &&
1097 N1.getValueType().getFixedSizeInBits() <= 64) {
1098 int64_t ScalableOffset = N1.getOpcode() == ISD::VSCALE
1099 ? N1.getConstantOperandVal(0)
1100 : (N1.getOperand(0).getConstantOperandVal(0) *
1101 (N1.getOpcode() == ISD::SHL
1102 ? (1LL << N1.getConstantOperandVal(1))
1103 : N1.getConstantOperandVal(1)));
1104 if (Opc == ISD::SUB)
1105 ScalableOffset = -ScalableOffset;
1106 if (all_of(N->uses(), [&](SDNode *Node) {
1107 if (auto *LoadStore = dyn_cast<MemSDNode>(Node);
1108 LoadStore && LoadStore->getBasePtr().getNode() == N) {
1110 AM.HasBaseReg = true;
1111 AM.ScalableOffset = ScalableOffset;
1112 EVT VT = LoadStore->getMemoryVT();
1113 unsigned AS = LoadStore->getAddressSpace();
1114 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1115 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy,
1116 AS);
1117 }
1118 return false;
1119 }))
1120 return true;
1121 }
1122
1123 if (Opc != ISD::ADD)
1124 return false;
1125
1126 auto *C2 = dyn_cast<ConstantSDNode>(N1);
1127 if (!C2)
1128 return false;
1129
1130 const APInt &C2APIntVal = C2->getAPIntValue();
1131 if (C2APIntVal.getSignificantBits() > 64)
1132 return false;
1133
1134 if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1135 if (N0.hasOneUse())
1136 return false;
1137
1138 const APInt &C1APIntVal = C1->getAPIntValue();
1139 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1140 if (CombinedValueIntVal.getSignificantBits() > 64)
1141 return false;
1142 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1143
1144 for (SDNode *Node : N->uses()) {
1145 if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1146 // Is x[offset2] already not a legal addressing mode? If so then
1147 // reassociating the constants breaks nothing (we test offset2 because
1148 // that's the one we hope to fold into the load or store).
1150 AM.HasBaseReg = true;
1151 AM.BaseOffs = C2APIntVal.getSExtValue();
1152 EVT VT = LoadStore->getMemoryVT();
1153 unsigned AS = LoadStore->getAddressSpace();
1154 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1155 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1156 continue;
1157
1158 // Would x[offset1+offset2] still be a legal addressing mode?
1159 AM.BaseOffs = CombinedValue;
1160 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1161 return true;
1162 }
1163 }
1164 } else {
1165 if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1166 if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1167 return false;
1168
1169 for (SDNode *Node : N->uses()) {
1170 auto *LoadStore = dyn_cast<MemSDNode>(Node);
1171 if (!LoadStore)
1172 return false;
1173
1174 // Is x[offset2] a legal addressing mode? If so then
1175 // reassociating the constants breaks address pattern
1177 AM.HasBaseReg = true;
1178 AM.BaseOffs = C2APIntVal.getSExtValue();
1179 EVT VT = LoadStore->getMemoryVT();
1180 unsigned AS = LoadStore->getAddressSpace();
1181 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1182 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1183 return false;
1184 }
1185 return true;
1186 }
1187
1188 return false;
1189}
1190
1191/// Helper for DAGCombiner::reassociateOps. Try to reassociate (Opc N0, N1) if
1192/// \p N0 is the same kind of operation as \p Opc.
1193SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1194 SDValue N0, SDValue N1,
1195 SDNodeFlags Flags) {
1196 EVT VT = N0.getValueType();
1197
1198 if (N0.getOpcode() != Opc)
1199 return SDValue();
1200
1201 SDValue N00 = N0.getOperand(0);
1202 SDValue N01 = N0.getOperand(1);
1203
1205 SDNodeFlags NewFlags;
1206 if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1207 Flags.hasNoUnsignedWrap())
1208 NewFlags.setNoUnsignedWrap(true);
1209
1211 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1212 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1213 return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags);
1214 return SDValue();
1215 }
1216 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1217 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1218 // iff (op x, c1) has one use
1219 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1220 return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1221 }
1222 }
1223
1224 // Check for repeated operand logic simplifications.
1225 if (Opc == ISD::AND || Opc == ISD::OR) {
1226 // (N00 & N01) & N00 --> N00 & N01
1227 // (N00 & N01) & N01 --> N00 & N01
1228 // (N00 | N01) | N00 --> N00 | N01
1229 // (N00 | N01) | N01 --> N00 | N01
1230 if (N1 == N00 || N1 == N01)
1231 return N0;
1232 }
1233 if (Opc == ISD::XOR) {
1234 // (N00 ^ N01) ^ N00 --> N01
1235 if (N1 == N00)
1236 return N01;
1237 // (N00 ^ N01) ^ N01 --> N00
1238 if (N1 == N01)
1239 return N00;
1240 }
1241
1242 if (TLI.isReassocProfitable(DAG, N0, N1)) {
1243 if (N1 != N01) {
1244 // Reassociate if (op N00, N1) already exist
1245 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1246 // if Op (Op N00, N1), N01 already exist
1247 // we need to stop reassciate to avoid dead loop
1248 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1249 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1250 }
1251 }
1252
1253 if (N1 != N00) {
1254 // Reassociate if (op N01, N1) already exist
1255 if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1256 // if Op (Op N01, N1), N00 already exist
1257 // we need to stop reassciate to avoid dead loop
1258 if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1259 return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1260 }
1261 }
1262
1263 // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1264 // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1265 // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1266 // comparisons with the same predicate. This enables optimizations as the
1267 // following one:
1268 // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1269 // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1270 if (Opc == ISD::AND || Opc == ISD::OR) {
1271 if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1272 N01->getOpcode() == ISD::SETCC) {
1273 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1274 ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1275 ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1276 if (CC1 == CC00 && CC1 != CC01) {
1277 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1278 return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1279 }
1280 if (CC1 == CC01 && CC1 != CC00) {
1281 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1282 return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1283 }
1284 }
1285 }
1286 }
1287
1288 return SDValue();
1289}
1290
1291/// Try to reassociate commutative (Opc N0, N1) if either \p N0 or \p N1 is the
1292/// same kind of operation as \p Opc.
1293SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1294 SDValue N1, SDNodeFlags Flags) {
1295 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1296
1297 // Floating-point reassociation is not allowed without loose FP math.
1298 if (N0.getValueType().isFloatingPoint() ||
1300 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1301 return SDValue();
1302
1303 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1304 return Combined;
1305 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1306 return Combined;
1307 return SDValue();
1308}
1309
1310// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1311// Note that we only expect Flags to be passed from FP operations. For integer
1312// operations they need to be dropped.
1313SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1314 const SDLoc &DL, EVT VT, SDValue N0,
1315 SDValue N1, SDNodeFlags Flags) {
1316 if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1317 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1318 N0->hasOneUse() && N1->hasOneUse() &&
1320 TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1321 SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1322 return DAG.getNode(RedOpc, DL, VT,
1323 DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1324 N0.getOperand(0), N1.getOperand(0)));
1325 }
1326 return SDValue();
1327}
1328
1329SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1330 bool AddTo) {
1331 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1332 ++NodesCombined;
1333 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1334 To[0].dump(&DAG);
1335 dbgs() << " and " << NumTo - 1 << " other values\n");
1336 for (unsigned i = 0, e = NumTo; i != e; ++i)
1337 assert((!To[i].getNode() ||
1338 N->getValueType(i) == To[i].getValueType()) &&
1339 "Cannot combine value to value of different type!");
1340
1341 WorklistRemover DeadNodes(*this);
1342 DAG.ReplaceAllUsesWith(N, To);
1343 if (AddTo) {
1344 // Push the new nodes and any users onto the worklist
1345 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1346 if (To[i].getNode())
1347 AddToWorklistWithUsers(To[i].getNode());
1348 }
1349 }
1350
1351 // Finally, if the node is now dead, remove it from the graph. The node
1352 // may not be dead if the replacement process recursively simplified to
1353 // something else needing this node.
1354 if (N->use_empty())
1355 deleteAndRecombine(N);
1356 return SDValue(N, 0);
1357}
1358
1359void DAGCombiner::
1360CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1361 // Replace the old value with the new one.
1362 ++NodesCombined;
1363 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1364 dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1365
1366 // Replace all uses.
1367 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1368
1369 // Push the new node and any (possibly new) users onto the worklist.
1370 AddToWorklistWithUsers(TLO.New.getNode());
1371
1372 // Finally, if the node is now dead, remove it from the graph.
1373 recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1374}
1375
1376/// Check the specified integer node value to see if it can be simplified or if
1377/// things it uses can be simplified by bit propagation. If so, return true.
1378bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1379 const APInt &DemandedElts,
1380 bool AssumeSingleUse) {
1381 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1382 KnownBits Known;
1383 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1384 AssumeSingleUse))
1385 return false;
1386
1387 // Revisit the node.
1388 AddToWorklist(Op.getNode());
1389
1390 CommitTargetLoweringOpt(TLO);
1391 return true;
1392}
1393
1394/// Check the specified vector node value to see if it can be simplified or
1395/// if things it uses can be simplified as it only uses some of the elements.
1396/// If so, return true.
1397bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1398 const APInt &DemandedElts,
1399 bool AssumeSingleUse) {
1400 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1401 APInt KnownUndef, KnownZero;
1402 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1403 TLO, 0, AssumeSingleUse))
1404 return false;
1405
1406 // Revisit the node.
1407 AddToWorklist(Op.getNode());
1408
1409 CommitTargetLoweringOpt(TLO);
1410 return true;
1411}
1412
1413void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1414 SDLoc DL(Load);
1415 EVT VT = Load->getValueType(0);
1416 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1417
1418 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1419 Trunc.dump(&DAG); dbgs() << '\n');
1420
1421 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1422 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1423
1424 AddToWorklist(Trunc.getNode());
1425 recursivelyDeleteUnusedNodes(Load);
1426}
1427
1428SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1429 Replace = false;
1430 SDLoc DL(Op);
1431 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1432 LoadSDNode *LD = cast<LoadSDNode>(Op);
1433 EVT MemVT = LD->getMemoryVT();
1435 : LD->getExtensionType();
1436 Replace = true;
1437 return DAG.getExtLoad(ExtType, DL, PVT,
1438 LD->getChain(), LD->getBasePtr(),
1439 MemVT, LD->getMemOperand());
1440 }
1441
1442 unsigned Opc = Op.getOpcode();
1443 switch (Opc) {
1444 default: break;
1445 case ISD::AssertSext:
1446 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1447 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1448 break;
1449 case ISD::AssertZext:
1450 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1451 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1452 break;
1453 case ISD::Constant: {
1454 unsigned ExtOpc =
1455 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1456 return DAG.getNode(ExtOpc, DL, PVT, Op);
1457 }
1458 }
1459
1460 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1461 return SDValue();
1462 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1463}
1464
1465SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1467 return SDValue();
1468 EVT OldVT = Op.getValueType();
1469 SDLoc DL(Op);
1470 bool Replace = false;
1471 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1472 if (!NewOp.getNode())
1473 return SDValue();
1474 AddToWorklist(NewOp.getNode());
1475
1476 if (Replace)
1477 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1478 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1479 DAG.getValueType(OldVT));
1480}
1481
1482SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1483 EVT OldVT = Op.getValueType();
1484 SDLoc DL(Op);
1485 bool Replace = false;
1486 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1487 if (!NewOp.getNode())
1488 return SDValue();
1489 AddToWorklist(NewOp.getNode());
1490
1491 if (Replace)
1492 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1493 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1494}
1495
1496/// Promote the specified integer binary operation if the target indicates it is
1497/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1498/// i32 since i16 instructions are longer.
1499SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1500 if (!LegalOperations)
1501 return SDValue();
1502
1503 EVT VT = Op.getValueType();
1504 if (VT.isVector() || !VT.isInteger())
1505 return SDValue();
1506
1507 // If operation type is 'undesirable', e.g. i16 on x86, consider
1508 // promoting it.
1509 unsigned Opc = Op.getOpcode();
1510 if (TLI.isTypeDesirableForOp(Opc, VT))
1511 return SDValue();
1512
1513 EVT PVT = VT;
1514 // Consult target whether it is a good idea to promote this operation and
1515 // what's the right type to promote it to.
1516 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1517 assert(PVT != VT && "Don't know what type to promote to!");
1518
1519 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1520
1521 bool Replace0 = false;
1522 SDValue N0 = Op.getOperand(0);
1523 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1524
1525 bool Replace1 = false;
1526 SDValue N1 = Op.getOperand(1);
1527 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1528 SDLoc DL(Op);
1529
1530 SDValue RV =
1531 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1532
1533 // We are always replacing N0/N1's use in N and only need additional
1534 // replacements if there are additional uses.
1535 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1536 // (SDValue) here because the node may reference multiple values
1537 // (for example, the chain value of a load node).
1538 Replace0 &= !N0->hasOneUse();
1539 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1540
1541 // Combine Op here so it is preserved past replacements.
1542 CombineTo(Op.getNode(), RV);
1543
1544 // If operands have a use ordering, make sure we deal with
1545 // predecessor first.
1546 if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1547 std::swap(N0, N1);
1548 std::swap(NN0, NN1);
1549 }
1550
1551 if (Replace0) {
1552 AddToWorklist(NN0.getNode());
1553 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1554 }
1555 if (Replace1) {
1556 AddToWorklist(NN1.getNode());
1557 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1558 }
1559 return Op;
1560 }
1561 return SDValue();
1562}
1563
1564/// Promote the specified integer shift operation if the target indicates it is
1565/// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1566/// i32 since i16 instructions are longer.
1567SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1568 if (!LegalOperations)
1569 return SDValue();
1570
1571 EVT VT = Op.getValueType();
1572 if (VT.isVector() || !VT.isInteger())
1573 return SDValue();
1574
1575 // If operation type is 'undesirable', e.g. i16 on x86, consider
1576 // promoting it.
1577 unsigned Opc = Op.getOpcode();
1578 if (TLI.isTypeDesirableForOp(Opc, VT))
1579 return SDValue();
1580
1581 EVT PVT = VT;
1582 // Consult target whether it is a good idea to promote this operation and
1583 // what's the right type to promote it to.
1584 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1585 assert(PVT != VT && "Don't know what type to promote to!");
1586
1587 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1588
1589 bool Replace = false;
1590 SDValue N0 = Op.getOperand(0);
1591 if (Opc == ISD::SRA)
1592 N0 = SExtPromoteOperand(N0, PVT);
1593 else if (Opc == ISD::SRL)
1594 N0 = ZExtPromoteOperand(N0, PVT);
1595 else
1596 N0 = PromoteOperand(N0, PVT, Replace);
1597
1598 if (!N0.getNode())
1599 return SDValue();
1600
1601 SDLoc DL(Op);
1602 SDValue N1 = Op.getOperand(1);
1603 SDValue RV =
1604 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1605
1606 if (Replace)
1607 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1608
1609 // Deal with Op being deleted.
1610 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1611 return RV;
1612 }
1613 return SDValue();
1614}
1615
1616SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1617 if (!LegalOperations)
1618 return SDValue();
1619
1620 EVT VT = Op.getValueType();
1621 if (VT.isVector() || !VT.isInteger())
1622 return SDValue();
1623
1624 // If operation type is 'undesirable', e.g. i16 on x86, consider
1625 // promoting it.
1626 unsigned Opc = Op.getOpcode();
1627 if (TLI.isTypeDesirableForOp(Opc, VT))
1628 return SDValue();
1629
1630 EVT PVT = VT;
1631 // Consult target whether it is a good idea to promote this operation and
1632 // what's the right type to promote it to.
1633 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1634 assert(PVT != VT && "Don't know what type to promote to!");
1635 // fold (aext (aext x)) -> (aext x)
1636 // fold (aext (zext x)) -> (zext x)
1637 // fold (aext (sext x)) -> (sext x)
1638 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1639 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1640 }
1641 return SDValue();
1642}
1643
1644bool DAGCombiner::PromoteLoad(SDValue Op) {
1645 if (!LegalOperations)
1646 return false;
1647
1648 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1649 return false;
1650
1651 EVT VT = Op.getValueType();
1652 if (VT.isVector() || !VT.isInteger())
1653 return false;
1654
1655 // If operation type is 'undesirable', e.g. i16 on x86, consider
1656 // promoting it.
1657 unsigned Opc = Op.getOpcode();
1658 if (TLI.isTypeDesirableForOp(Opc, VT))
1659 return false;
1660
1661 EVT PVT = VT;
1662 // Consult target whether it is a good idea to promote this operation and
1663 // what's the right type to promote it to.
1664 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1665 assert(PVT != VT && "Don't know what type to promote to!");
1666
1667 SDLoc DL(Op);
1668 SDNode *N = Op.getNode();
1669 LoadSDNode *LD = cast<LoadSDNode>(N);
1670 EVT MemVT = LD->getMemoryVT();
1672 : LD->getExtensionType();
1673 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1674 LD->getChain(), LD->getBasePtr(),
1675 MemVT, LD->getMemOperand());
1676 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1677
1678 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1679 Result.dump(&DAG); dbgs() << '\n');
1680
1682 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1683
1684 AddToWorklist(Result.getNode());
1685 recursivelyDeleteUnusedNodes(N);
1686 return true;
1687 }
1688
1689 return false;
1690}
1691
1692/// Recursively delete a node which has no uses and any operands for
1693/// which it is the only use.
1694///
1695/// Note that this both deletes the nodes and removes them from the worklist.
1696/// It also adds any nodes who have had a user deleted to the worklist as they
1697/// may now have only one use and subject to other combines.
1698bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1699 if (!N->use_empty())
1700 return false;
1701
1703 Nodes.insert(N);
1704 do {
1705 N = Nodes.pop_back_val();
1706 if (!N)
1707 continue;
1708
1709 if (N->use_empty()) {
1710 for (const SDValue &ChildN : N->op_values())
1711 Nodes.insert(ChildN.getNode());
1712
1713 removeFromWorklist(N);
1714 DAG.DeleteNode(N);
1715 } else {
1716 AddToWorklist(N);
1717 }
1718 } while (!Nodes.empty());
1719 return true;
1720}
1721
1722//===----------------------------------------------------------------------===//
1723// Main DAG Combiner implementation
1724//===----------------------------------------------------------------------===//
1725
1726void DAGCombiner::Run(CombineLevel AtLevel) {
1727 // set the instance variables, so that the various visit routines may use it.
1728 Level = AtLevel;
1729 LegalDAG = Level >= AfterLegalizeDAG;
1730 LegalOperations = Level >= AfterLegalizeVectorOps;
1731 LegalTypes = Level >= AfterLegalizeTypes;
1732
1733 WorklistInserter AddNodes(*this);
1734
1735 // Add all the dag nodes to the worklist.
1736 //
1737 // Note: All nodes are not added to PruningList here, this is because the only
1738 // nodes which can be deleted are those which have no uses and all other nodes
1739 // which would otherwise be added to the worklist by the first call to
1740 // getNextWorklistEntry are already present in it.
1741 for (SDNode &Node : DAG.allnodes())
1742 AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1743
1744 // Create a dummy node (which is not added to allnodes), that adds a reference
1745 // to the root node, preventing it from being deleted, and tracking any
1746 // changes of the root.
1747 HandleSDNode Dummy(DAG.getRoot());
1748
1749 // While we have a valid worklist entry node, try to combine it.
1750 while (SDNode *N = getNextWorklistEntry()) {
1751 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1752 // N is deleted from the DAG, since they too may now be dead or may have a
1753 // reduced number of uses, allowing other xforms.
1754 if (recursivelyDeleteUnusedNodes(N))
1755 continue;
1756
1757 WorklistRemover DeadNodes(*this);
1758
1759 // If this combine is running after legalizing the DAG, re-legalize any
1760 // nodes pulled off the worklist.
1761 if (LegalDAG) {
1762 SmallSetVector<SDNode *, 16> UpdatedNodes;
1763 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1764
1765 for (SDNode *LN : UpdatedNodes)
1766 AddToWorklistWithUsers(LN);
1767
1768 if (!NIsValid)
1769 continue;
1770 }
1771
1772 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1773
1774 // Add any operands of the new node which have not yet been combined to the
1775 // worklist as well. Because the worklist uniques things already, this
1776 // won't repeatedly process the same operand.
1777 for (const SDValue &ChildN : N->op_values())
1778 if (!CombinedNodes.count(ChildN.getNode()))
1779 AddToWorklist(ChildN.getNode());
1780
1781 CombinedNodes.insert(N);
1782 SDValue RV = combine(N);
1783
1784 if (!RV.getNode())
1785 continue;
1786
1787 ++NodesCombined;
1788
1789 // If we get back the same node we passed in, rather than a new node or
1790 // zero, we know that the node must have defined multiple values and
1791 // CombineTo was used. Since CombineTo takes care of the worklist
1792 // mechanics for us, we have no work to do in this case.
1793 if (RV.getNode() == N)
1794 continue;
1795
1796 assert(N->getOpcode() != ISD::DELETED_NODE &&
1797 RV.getOpcode() != ISD::DELETED_NODE &&
1798 "Node was deleted but visit returned new node!");
1799
1800 LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1801
1802 if (N->getNumValues() == RV->getNumValues())
1803 DAG.ReplaceAllUsesWith(N, RV.getNode());
1804 else {
1805 assert(N->getValueType(0) == RV.getValueType() &&
1806 N->getNumValues() == 1 && "Type mismatch");
1807 DAG.ReplaceAllUsesWith(N, &RV);
1808 }
1809
1810 // Push the new node and any users onto the worklist. Omit this if the
1811 // new node is the EntryToken (e.g. if a store managed to get optimized
1812 // out), because re-visiting the EntryToken and its users will not uncover
1813 // any additional opportunities, but there may be a large number of such
1814 // users, potentially causing compile time explosion.
1815 if (RV.getOpcode() != ISD::EntryToken)
1816 AddToWorklistWithUsers(RV.getNode());
1817
1818 // Finally, if the node is now dead, remove it from the graph. The node
1819 // may not be dead if the replacement process recursively simplified to
1820 // something else needing this node. This will also take care of adding any
1821 // operands which have lost a user to the worklist.
1822 recursivelyDeleteUnusedNodes(N);
1823 }
1824
1825 // If the root changed (e.g. it was a dead load, update the root).
1826 DAG.setRoot(Dummy.getValue());
1827 DAG.RemoveDeadNodes();
1828}
1829
1830SDValue DAGCombiner::visit(SDNode *N) {
1831 // clang-format off
1832 switch (N->getOpcode()) {
1833 default: break;
1834 case ISD::TokenFactor: return visitTokenFactor(N);
1835 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1836 case ISD::ADD: return visitADD(N);
1837 case ISD::SUB: return visitSUB(N);
1838 case ISD::SADDSAT:
1839 case ISD::UADDSAT: return visitADDSAT(N);
1840 case ISD::SSUBSAT:
1841 case ISD::USUBSAT: return visitSUBSAT(N);
1842 case ISD::ADDC: return visitADDC(N);
1843 case ISD::SADDO:
1844 case ISD::UADDO: return visitADDO(N);
1845 case ISD::SUBC: return visitSUBC(N);
1846 case ISD::SSUBO:
1847 case ISD::USUBO: return visitSUBO(N);
1848 case ISD::ADDE: return visitADDE(N);
1849 case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
1850 case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
1851 case ISD::SUBE: return visitSUBE(N);
1852 case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
1853 case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
1854 case ISD::SMULFIX:
1855 case ISD::SMULFIXSAT:
1856 case ISD::UMULFIX:
1857 case ISD::UMULFIXSAT: return visitMULFIX(N);
1858 case ISD::MUL: return visitMUL(N);
1859 case ISD::SDIV: return visitSDIV(N);
1860 case ISD::UDIV: return visitUDIV(N);
1861 case ISD::SREM:
1862 case ISD::UREM: return visitREM(N);
1863 case ISD::MULHU: return visitMULHU(N);
1864 case ISD::MULHS: return visitMULHS(N);
1865 case ISD::AVGFLOORS:
1866 case ISD::AVGFLOORU:
1867 case ISD::AVGCEILS:
1868 case ISD::AVGCEILU: return visitAVG(N);
1869 case ISD::ABDS:
1870 case ISD::ABDU: return visitABD(N);
1871 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1872 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1873 case ISD::SMULO:
1874 case ISD::UMULO: return visitMULO(N);
1875 case ISD::SMIN:
1876 case ISD::SMAX:
1877 case ISD::UMIN:
1878 case ISD::UMAX: return visitIMINMAX(N);
1879 case ISD::AND: return visitAND(N);
1880 case ISD::OR: return visitOR(N);
1881 case ISD::XOR: return visitXOR(N);
1882 case ISD::SHL: return visitSHL(N);
1883 case ISD::SRA: return visitSRA(N);
1884 case ISD::SRL: return visitSRL(N);
1885 case ISD::ROTR:
1886 case ISD::ROTL: return visitRotate(N);
1887 case ISD::FSHL:
1888 case ISD::FSHR: return visitFunnelShift(N);
1889 case ISD::SSHLSAT:
1890 case ISD::USHLSAT: return visitSHLSAT(N);
1891 case ISD::ABS: return visitABS(N);
1892 case ISD::BSWAP: return visitBSWAP(N);
1893 case ISD::BITREVERSE: return visitBITREVERSE(N);
1894 case ISD::CTLZ: return visitCTLZ(N);
1895 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1896 case ISD::CTTZ: return visitCTTZ(N);
1897 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1898 case ISD::CTPOP: return visitCTPOP(N);
1899 case ISD::SELECT: return visitSELECT(N);
1900 case ISD::VSELECT: return visitVSELECT(N);
1901 case ISD::SELECT_CC: return visitSELECT_CC(N);
1902 case ISD::SETCC: return visitSETCC(N);
1903 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1904 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1905 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1906 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1907 case ISD::AssertSext:
1908 case ISD::AssertZext: return visitAssertExt(N);
1909 case ISD::AssertAlign: return visitAssertAlign(N);
1910 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1913 case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1914 case ISD::TRUNCATE: return visitTRUNCATE(N);
1915 case ISD::BITCAST: return visitBITCAST(N);
1916 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1917 case ISD::FADD: return visitFADD(N);
1918 case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
1919 case ISD::FSUB: return visitFSUB(N);
1920 case ISD::FMUL: return visitFMUL(N);
1921 case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
1922 case ISD::FMAD: return visitFMAD(N);
1923 case ISD::FDIV: return visitFDIV(N);
1924 case ISD::FREM: return visitFREM(N);
1925 case ISD::FSQRT: return visitFSQRT(N);
1926 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1927 case ISD::FPOW: return visitFPOW(N);
1928 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1929 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1930 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1931 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1932 case ISD::LRINT:
1933 case ISD::LLRINT: return visitXRINT(N);
1934 case ISD::FP_ROUND: return visitFP_ROUND(N);
1935 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1936 case ISD::FNEG: return visitFNEG(N);
1937 case ISD::FABS: return visitFABS(N);
1938 case ISD::FFLOOR: return visitFFLOOR(N);
1939 case ISD::FMINNUM:
1940 case ISD::FMAXNUM:
1941 case ISD::FMINIMUM:
1942 case ISD::FMAXIMUM: return visitFMinMax(N);
1943 case ISD::FCEIL: return visitFCEIL(N);
1944 case ISD::FTRUNC: return visitFTRUNC(N);
1945 case ISD::FFREXP: return visitFFREXP(N);
1946 case ISD::BRCOND: return visitBRCOND(N);
1947 case ISD::BR_CC: return visitBR_CC(N);
1948 case ISD::LOAD: return visitLOAD(N);
1949 case ISD::STORE: return visitSTORE(N);
1950 case ISD::ATOMIC_STORE: return visitATOMIC_STORE(N);
1951 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1952 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1953 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1954 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1955 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1956 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1957 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1958 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1959 case ISD::MGATHER: return visitMGATHER(N);
1960 case ISD::MLOAD: return visitMLOAD(N);
1961 case ISD::MSCATTER: return visitMSCATTER(N);
1962 case ISD::MSTORE: return visitMSTORE(N);
1963 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1964 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1965 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1966 case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
1967 case ISD::BF16_TO_FP: return visitBF16_TO_FP(N);
1968 case ISD::FREEZE: return visitFREEZE(N);
1969 case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
1970 case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
1973 case ISD::VECREDUCE_ADD:
1974 case ISD::VECREDUCE_MUL:
1975 case ISD::VECREDUCE_AND:
1976 case ISD::VECREDUCE_OR:
1977 case ISD::VECREDUCE_XOR:
1985 case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
1986#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1987#include "llvm/IR/VPIntrinsics.def"
1988 return visitVPOp(N);
1989 }
1990 // clang-format on
1991 return SDValue();
1992}
1993
1994SDValue DAGCombiner::combine(SDNode *N) {
1995 if (!DebugCounter::shouldExecute(DAGCombineCounter))
1996 return SDValue();
1997
1998 SDValue RV;
1999 if (!DisableGenericCombines)
2000 RV = visit(N);
2001
2002 // If nothing happened, try a target-specific DAG combine.
2003 if (!RV.getNode()) {
2004 assert(N->getOpcode() != ISD::DELETED_NODE &&
2005 "Node was deleted but visit returned NULL!");
2006
2007 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2008 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2009
2010 // Expose the DAG combiner to the target combiner impls.
2012 DagCombineInfo(DAG, Level, false, this);
2013
2014 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2015 }
2016 }
2017
2018 // If nothing happened still, try promoting the operation.
2019 if (!RV.getNode()) {
2020 switch (N->getOpcode()) {
2021 default: break;
2022 case ISD::ADD:
2023 case ISD::SUB:
2024 case ISD::MUL:
2025 case ISD::AND:
2026 case ISD::OR:
2027 case ISD::XOR:
2028 RV = PromoteIntBinOp(SDValue(N, 0));
2029 break;
2030 case ISD::SHL:
2031 case ISD::SRA:
2032 case ISD::SRL:
2033 RV = PromoteIntShiftOp(SDValue(N, 0));
2034 break;
2035 case ISD::SIGN_EXTEND:
2036 case ISD::ZERO_EXTEND:
2037 case ISD::ANY_EXTEND:
2038 RV = PromoteExtend(SDValue(N, 0));
2039 break;
2040 case ISD::LOAD:
2041 if (PromoteLoad(SDValue(N, 0)))
2042 RV = SDValue(N, 0);
2043 break;
2044 }
2045 }
2046
2047 // If N is a commutative binary node, try to eliminate it if the commuted
2048 // version is already present in the DAG.
2049 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2050 SDValue N0 = N->getOperand(0);
2051 SDValue N1 = N->getOperand(1);
2052
2053 // Constant operands are canonicalized to RHS.
2054 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2055 SDValue Ops[] = {N1, N0};
2056 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2057 N->getFlags());
2058 if (CSENode)
2059 return SDValue(CSENode, 0);
2060 }
2061 }
2062
2063 return RV;
2064}
2065
2066/// Given a node, return its input chain if it has one, otherwise return a null
2067/// sd operand.
2069 if (unsigned NumOps = N->getNumOperands()) {
2070 if (N->getOperand(0).getValueType() == MVT::Other)
2071 return N->getOperand(0);
2072 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2073 return N->getOperand(NumOps-1);
2074 for (unsigned i = 1; i < NumOps-1; ++i)
2075 if (N->getOperand(i).getValueType() == MVT::Other)
2076 return N->getOperand(i);
2077 }
2078 return SDValue();
2079}
2080
2081SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2082 // If N has two operands, where one has an input chain equal to the other,
2083 // the 'other' chain is redundant.
2084 if (N->getNumOperands() == 2) {
2085 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2086 return N->getOperand(0);
2087 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2088 return N->getOperand(1);
2089 }
2090
2091 // Don't simplify token factors if optnone.
2092 if (OptLevel == CodeGenOptLevel::None)
2093 return SDValue();
2094
2095 // Don't simplify the token factor if the node itself has too many operands.
2096 if (N->getNumOperands() > TokenFactorInlineLimit)
2097 return SDValue();
2098
2099 // If the sole user is a token factor, we should make sure we have a
2100 // chance to merge them together. This prevents TF chains from inhibiting
2101 // optimizations.
2102 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2103 AddToWorklist(*(N->use_begin()));
2104
2105 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
2106 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
2108 bool Changed = false; // If we should replace this token factor.
2109
2110 // Start out with this token factor.
2111 TFs.push_back(N);
2112
2113 // Iterate through token factors. The TFs grows when new token factors are
2114 // encountered.
2115 for (unsigned i = 0; i < TFs.size(); ++i) {
2116 // Limit number of nodes to inline, to avoid quadratic compile times.
2117 // We have to add the outstanding Token Factors to Ops, otherwise we might
2118 // drop Ops from the resulting Token Factors.
2119 if (Ops.size() > TokenFactorInlineLimit) {
2120 for (unsigned j = i; j < TFs.size(); j++)
2121 Ops.emplace_back(TFs[j], 0);
2122 // Drop unprocessed Token Factors from TFs, so we do not add them to the
2123 // combiner worklist later.
2124 TFs.resize(i);
2125 break;
2126 }
2127
2128 SDNode *TF = TFs[i];
2129 // Check each of the operands.
2130 for (const SDValue &Op : TF->op_values()) {
2131 switch (Op.getOpcode()) {
2132 case ISD::EntryToken:
2133 // Entry tokens don't need to be added to the list. They are
2134 // redundant.
2135 Changed = true;
2136 break;
2137
2138 case ISD::TokenFactor:
2139 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2140 // Queue up for processing.
2141 TFs.push_back(Op.getNode());
2142 Changed = true;
2143 break;
2144 }
2145 [[fallthrough]];
2146
2147 default:
2148 // Only add if it isn't already in the list.
2149 if (SeenOps.insert(Op.getNode()).second)
2150 Ops.push_back(Op);
2151 else
2152 Changed = true;
2153 break;
2154 }
2155 }
2156 }
2157
2158 // Re-visit inlined Token Factors, to clean them up in case they have been
2159 // removed. Skip the first Token Factor, as this is the current node.
2160 for (unsigned i = 1, e = TFs.size(); i < e; i++)
2161 AddToWorklist(TFs[i]);
2162
2163 // Remove Nodes that are chained to another node in the list. Do so
2164 // by walking up chains breath-first stopping when we've seen
2165 // another operand. In general we must climb to the EntryNode, but we can exit
2166 // early if we find all remaining work is associated with just one operand as
2167 // no further pruning is possible.
2168
2169 // List of nodes to search through and original Ops from which they originate.
2171 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2172 SmallPtrSet<SDNode *, 16> SeenChains;
2173 bool DidPruneOps = false;
2174
2175 unsigned NumLeftToConsider = 0;
2176 for (const SDValue &Op : Ops) {
2177 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2178 OpWorkCount.push_back(1);
2179 }
2180
2181 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2182 // If this is an Op, we can remove the op from the list. Remark any
2183 // search associated with it as from the current OpNumber.
2184 if (SeenOps.contains(Op)) {
2185 Changed = true;
2186 DidPruneOps = true;
2187 unsigned OrigOpNumber = 0;
2188 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2189 OrigOpNumber++;
2190 assert((OrigOpNumber != Ops.size()) &&
2191 "expected to find TokenFactor Operand");
2192 // Re-mark worklist from OrigOpNumber to OpNumber
2193 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2194 if (Worklist[i].second == OrigOpNumber) {
2195 Worklist[i].second = OpNumber;
2196 }
2197 }
2198 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2199 OpWorkCount[OrigOpNumber] = 0;
2200 NumLeftToConsider--;
2201 }
2202 // Add if it's a new chain
2203 if (SeenChains.insert(Op).second) {
2204 OpWorkCount[OpNumber]++;
2205 Worklist.push_back(std::make_pair(Op, OpNumber));
2206 }
2207 };
2208
2209 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2210 // We need at least be consider at least 2 Ops to prune.
2211 if (NumLeftToConsider <= 1)
2212 break;
2213 auto CurNode = Worklist[i].first;
2214 auto CurOpNumber = Worklist[i].second;
2215 assert((OpWorkCount[CurOpNumber] > 0) &&
2216 "Node should not appear in worklist");
2217 switch (CurNode->getOpcode()) {
2218 case ISD::EntryToken:
2219 // Hitting EntryToken is the only way for the search to terminate without
2220 // hitting
2221 // another operand's search. Prevent us from marking this operand
2222 // considered.
2223 NumLeftToConsider++;
2224 break;
2225 case ISD::TokenFactor:
2226 for (const SDValue &Op : CurNode->op_values())
2227 AddToWorklist(i, Op.getNode(), CurOpNumber);
2228 break;
2230 case ISD::LIFETIME_END:
2231 case ISD::CopyFromReg:
2232 case ISD::CopyToReg:
2233 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2234 break;
2235 default:
2236 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2237 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2238 break;
2239 }
2240 OpWorkCount[CurOpNumber]--;
2241 if (OpWorkCount[CurOpNumber] == 0)
2242 NumLeftToConsider--;
2243 }
2244
2245 // If we've changed things around then replace token factor.
2246 if (Changed) {
2248 if (Ops.empty()) {
2249 // The entry token is the only possible outcome.
2250 Result = DAG.getEntryNode();
2251 } else {
2252 if (DidPruneOps) {
2253 SmallVector<SDValue, 8> PrunedOps;
2254 //
2255 for (const SDValue &Op : Ops) {
2256 if (SeenChains.count(Op.getNode()) == 0)
2257 PrunedOps.push_back(Op);
2258 }
2259 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2260 } else {
2261 Result = DAG.getTokenFactor(SDLoc(N), Ops);
2262 }
2263 }
2264 return Result;
2265 }
2266 return SDValue();
2267}
2268
2269/// MERGE_VALUES can always be eliminated.
2270SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2271 WorklistRemover DeadNodes(*this);
2272 // Replacing results may cause a different MERGE_VALUES to suddenly
2273 // be CSE'd with N, and carry its uses with it. Iterate until no
2274 // uses remain, to ensure that the node can be safely deleted.
2275 // First add the users of this node to the work list so that they
2276 // can be tried again once they have new operands.
2277 AddUsersToWorklist(N);
2278 do {
2279 // Do as a single replacement to avoid rewalking use lists.
2281 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2282 Ops.push_back(N->getOperand(i));
2283 DAG.ReplaceAllUsesWith(N, Ops.data());
2284 } while (!N->use_empty());
2285 deleteAndRecombine(N);
2286 return SDValue(N, 0); // Return N so it doesn't get rechecked!
2287}
2288
2289/// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2290/// ConstantSDNode pointer else nullptr.
2292 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2293 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2294}
2295
2296// isTruncateOf - If N is a truncate of some other value, return true, record
2297// the value being truncated in Op and which of Op's bits are zero/one in Known.
2298// This function computes KnownBits to avoid a duplicated call to
2299// computeKnownBits in the caller.
2301 KnownBits &Known) {
2302 if (N->getOpcode() == ISD::TRUNCATE) {
2303 Op = N->getOperand(0);
2304 Known = DAG.computeKnownBits(Op);
2305 return true;
2306 }
2307
2308 if (N.getOpcode() != ISD::SETCC ||
2309 N.getValueType().getScalarType() != MVT::i1 ||
2310 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
2311 return false;
2312
2313 SDValue Op0 = N->getOperand(0);
2314 SDValue Op1 = N->getOperand(1);
2315 assert(Op0.getValueType() == Op1.getValueType());
2316
2317 if (isNullOrNullSplat(Op0))
2318 Op = Op1;
2319 else if (isNullOrNullSplat(Op1))
2320 Op = Op0;
2321 else
2322 return false;
2323
2324 Known = DAG.computeKnownBits(Op);
2325
2326 return (Known.Zero | 1).isAllOnes();
2327}
2328
2329/// Return true if 'Use' is a load or a store that uses N as its base pointer
2330/// and that N may be folded in the load / store addressing mode.
2332 const TargetLowering &TLI) {
2333 EVT VT;
2334 unsigned AS;
2335
2336 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2337 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2338 return false;
2339 VT = LD->getMemoryVT();
2340 AS = LD->getAddressSpace();
2341 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2342 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2343 return false;
2344 VT = ST->getMemoryVT();
2345 AS = ST->getAddressSpace();
2346 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2347 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2348 return false;
2349 VT = LD->getMemoryVT();
2350 AS = LD->getAddressSpace();
2351 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2352 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2353 return false;
2354 VT = ST->getMemoryVT();
2355 AS = ST->getAddressSpace();
2356 } else {
2357 return false;
2358 }
2359
2361 if (N->getOpcode() == ISD::ADD) {
2362 AM.HasBaseReg = true;
2363 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2364 if (Offset)
2365 // [reg +/- imm]
2366 AM.BaseOffs = Offset->getSExtValue();
2367 else
2368 // [reg +/- reg]
2369 AM.Scale = 1;
2370 } else if (N->getOpcode() == ISD::SUB) {
2371 AM.HasBaseReg = true;
2372 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2373 if (Offset)
2374 // [reg +/- imm]
2375 AM.BaseOffs = -Offset->getSExtValue();
2376 else
2377 // [reg +/- reg]
2378 AM.Scale = 1;
2379 } else {
2380 return false;
2381 }
2382
2383 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2384 VT.getTypeForEVT(*DAG.getContext()), AS);
2385}
2386
2387/// This inverts a canonicalization in IR that replaces a variable select arm
2388/// with an identity constant. Codegen improves if we re-use the variable
2389/// operand rather than load a constant. This can also be converted into a
2390/// masked vector operation if the target supports it.
2392 bool ShouldCommuteOperands) {
2393 // Match a select as operand 1. The identity constant that we are looking for
2394 // is only valid as operand 1 of a non-commutative binop.
2395 SDValue N0 = N->getOperand(0);
2396 SDValue N1 = N->getOperand(1);
2397 if (ShouldCommuteOperands)
2398 std::swap(N0, N1);
2399
2400 // TODO: Should this apply to scalar select too?
2401 if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2402 return SDValue();
2403
2404 // We can't hoist all instructions because of immediate UB (not speculatable).
2405 // For example div/rem by zero.
2407 return SDValue();
2408
2409 unsigned Opcode = N->getOpcode();
2410 EVT VT = N->getValueType(0);
2411 SDValue Cond = N1.getOperand(0);
2412 SDValue TVal = N1.getOperand(1);
2413 SDValue FVal = N1.getOperand(2);
2414
2415 // This transform increases uses of N0, so freeze it to be safe.
2416 // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2417 unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2418 if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2419 SDValue F0 = DAG.getFreeze(N0);
2420 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2421 return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2422 }
2423 // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2424 if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2425 SDValue F0 = DAG.getFreeze(N0);
2426 SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2427 return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2428 }
2429
2430 return SDValue();
2431}
2432
2433SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2434 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2435 "Unexpected binary operator");
2436
2437 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2438 auto BinOpcode = BO->getOpcode();
2439 EVT VT = BO->getValueType(0);
2440 if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2441 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2442 return Sel;
2443
2444 if (TLI.isCommutativeBinOp(BO->getOpcode()))
2445 if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2446 return Sel;
2447 }
2448
2449 // Don't do this unless the old select is going away. We want to eliminate the
2450 // binary operator, not replace a binop with a select.
2451 // TODO: Handle ISD::SELECT_CC.
2452 unsigned SelOpNo = 0;
2453 SDValue Sel = BO->getOperand(0);
2454 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2455 SelOpNo = 1;
2456 Sel = BO->getOperand(1);
2457
2458 // Peek through trunc to shift amount type.
2459 if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2460 BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2461 // This is valid when the truncated bits of x are already zero.
2462 SDValue Op;
2463 KnownBits Known;
2464 if (isTruncateOf(DAG, Sel, Op, Known) &&
2466 Sel = Op;
2467 }
2468 }
2469
2470 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2471 return SDValue();
2472
2473 SDValue CT = Sel.getOperand(1);
2474 if (!isConstantOrConstantVector(CT, true) &&
2476 return SDValue();
2477
2478 SDValue CF = Sel.getOperand(2);
2479 if (!isConstantOrConstantVector(CF, true) &&
2481 return SDValue();
2482
2483 // Bail out if any constants are opaque because we can't constant fold those.
2484 // The exception is "and" and "or" with either 0 or -1 in which case we can
2485 // propagate non constant operands into select. I.e.:
2486 // and (select Cond, 0, -1), X --> select Cond, 0, X
2487 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2488 bool CanFoldNonConst =
2489 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2492
2493 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2494 if (!CanFoldNonConst &&
2495 !isConstantOrConstantVector(CBO, true) &&
2497 return SDValue();
2498
2499 SDLoc DL(Sel);
2500 SDValue NewCT, NewCF;
2501
2502 if (CanFoldNonConst) {
2503 // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2504 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2505 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2506 NewCT = CT;
2507 else
2508 NewCT = CBO;
2509
2510 if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2511 (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2512 NewCF = CF;
2513 else
2514 NewCF = CBO;
2515 } else {
2516 // We have a select-of-constants followed by a binary operator with a
2517 // constant. Eliminate the binop by pulling the constant math into the
2518 // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2519 // CBO, CF + CBO
2520 NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2521 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2522 if (!NewCT)
2523 return SDValue();
2524
2525 NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2526 : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2527 if (!NewCF)
2528 return SDValue();
2529 }
2530
2531 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2532 SelectOp->setFlags(BO->getFlags());
2533 return SelectOp;
2534}
2535
2537 SelectionDAG &DAG) {
2538 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2539 "Expecting add or sub");
2540
2541 // Match a constant operand and a zext operand for the math instruction:
2542 // add Z, C
2543 // sub C, Z
2544 bool IsAdd = N->getOpcode() == ISD::ADD;
2545 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2546 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2547 auto *CN = dyn_cast<ConstantSDNode>(C);
2548 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2549 return SDValue();
2550
2551 // Match the zext operand as a setcc of a boolean.
2552 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2553 Z.getOperand(0).getValueType() != MVT::i1)
2554 return SDValue();
2555
2556 // Match the compare as: setcc (X & 1), 0, eq.
2557 SDValue SetCC = Z.getOperand(0);
2558 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2559 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2560 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2561 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2562 return SDValue();
2563
2564 // We are adding/subtracting a constant and an inverted low bit. Turn that
2565 // into a subtract/add of the low bit with incremented/decremented constant:
2566 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2567 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2568 EVT VT = C.getValueType();
2569 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2570 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2571 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2572 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2573}
2574
2575// Attempt to form avgceil(A, B) from (A | B) - ((A ^ B) >> 1)
2576SDValue DAGCombiner::foldSubToAvg(SDNode *N, const SDLoc &DL) {
2577 SDValue N0 = N->getOperand(0);
2578 EVT VT = N0.getValueType();
2579 SDValue A, B;
2580
2581 if (hasOperation(ISD::AVGCEILU, VT) &&
2584 m_SpecificInt(1))))) {
2585 return DAG.getNode(ISD::AVGCEILU, DL, VT, A, B);
2586 }
2587 if (hasOperation(ISD::AVGCEILS, VT) &&
2590 m_SpecificInt(1))))) {
2591 return DAG.getNode(ISD::AVGCEILS, DL, VT, A, B);
2592 }
2593 return SDValue();
2594}
2595
2596/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2597/// a shift and add with a different constant.
2599 SelectionDAG &DAG) {
2600 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2601 "Expecting add or sub");
2602
2603 // We need a constant operand for the add/sub, and the other operand is a
2604 // logical shift right: add (srl), C or sub C, (srl).
2605 bool IsAdd = N->getOpcode() == ISD::ADD;
2606 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2607 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2608 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2609 ShiftOp.getOpcode() != ISD::SRL)
2610 return SDValue();
2611
2612 // The shift must be of a 'not' value.
2613 SDValue Not = ShiftOp.getOperand(0);
2614 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2615 return SDValue();
2616
2617 // The shift must be moving the sign bit to the least-significant-bit.
2618 EVT VT = ShiftOp.getValueType();
2619 SDValue ShAmt = ShiftOp.getOperand(1);
2620 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2621 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2622 return SDValue();
2623
2624 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2625 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2626 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2627 if (SDValue NewC = DAG.FoldConstantArithmetic(
2628 IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2629 {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2630 SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2631 Not.getOperand(0), ShAmt);
2632 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2633 }
2634
2635 return SDValue();
2636}
2637
2638static bool
2640 return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2641 (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2642}
2643
2644/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2645/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2646/// are no common bits set in the operands).
2647SDValue DAGCombiner::visitADDLike(SDNode *N) {
2648 SDValue N0 = N->getOperand(0);
2649 SDValue N1 = N->getOperand(1);
2650 EVT VT = N0.getValueType();
2651 SDLoc DL(N);
2652
2653 // fold (add x, undef) -> undef
2654 if (N0.isUndef())
2655 return N0;
2656 if (N1.isUndef())
2657 return N1;
2658
2659 // fold (add c1, c2) -> c1+c2
2660 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2661 return C;
2662
2663 // canonicalize constant to RHS
2666 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2667
2668 if (areBitwiseNotOfEachother(N0, N1))
2670 SDLoc(N), VT);
2671
2672 // fold vector ops
2673 if (VT.isVector()) {
2674 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2675 return FoldedVOp;
2676
2677 // fold (add x, 0) -> x, vector edition
2679 return N0;
2680 }
2681
2682 // fold (add x, 0) -> x
2683 if (isNullConstant(N1))
2684 return N0;
2685
2686 if (N0.getOpcode() == ISD::SUB) {
2687 SDValue N00 = N0.getOperand(0);
2688 SDValue N01 = N0.getOperand(1);
2689
2690 // fold ((A-c1)+c2) -> (A+(c2-c1))
2691 if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2692 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2693
2694 // fold ((c1-A)+c2) -> (c1+c2)-A
2695 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2696 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2697 }
2698
2699 // add (sext i1 X), 1 -> zext (not i1 X)
2700 // We don't transform this pattern:
2701 // add (zext i1 X), -1 -> sext (not i1 X)
2702 // because most (?) targets generate better code for the zext form.
2703 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2704 isOneOrOneSplat(N1)) {
2705 SDValue X = N0.getOperand(0);
2706 if ((!LegalOperations ||
2707 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2709 X.getScalarValueSizeInBits() == 1) {
2710 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2711 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2712 }
2713 }
2714
2715 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2716 // iff (or x, c0) is equivalent to (add x, c0).
2717 // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2718 // iff (xor x, c0) is equivalent to (add x, c0).
2719 if (DAG.isADDLike(N0)) {
2720 SDValue N01 = N0.getOperand(1);
2721 if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2722 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2723 }
2724
2725 if (SDValue NewSel = foldBinOpIntoSelect(N))
2726 return NewSel;
2727
2728 // reassociate add
2729 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2730 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2731 return RADD;
2732
2733 // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2734 // equivalent to (add x, c).
2735 // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2736 // equivalent to (add x, c).
2737 // Do this optimization only when adding c does not introduce instructions
2738 // for adding carries.
2739 auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2740 if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2741 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2742 // If N0's type does not split or is a sign mask, it does not introduce
2743 // add carry.
2744 auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2745 bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2748 if (NoAddCarry)
2749 return DAG.getNode(
2750 ISD::ADD, DL, VT,
2751 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2752 N0.getOperand(1));
2753 }
2754 return SDValue();
2755 };
2756 if (SDValue Add = ReassociateAddOr(N0, N1))
2757 return Add;
2758 if (SDValue Add = ReassociateAddOr(N1, N0))
2759 return Add;
2760
2761 // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2762 if (SDValue SD =
2763 reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2764 return SD;
2765 }
2766
2767 SDValue A, B, C;
2768
2769 // fold ((0-A) + B) -> B-A
2770 if (sd_match(N0, m_Neg(m_Value(A))))
2771 return DAG.getNode(ISD::SUB, DL, VT, N1, A);
2772
2773 // fold (A + (0-B)) -> A-B
2774 if (sd_match(N1, m_Neg(m_Value(B))))
2775 return DAG.getNode(ISD::SUB, DL, VT, N0, B);
2776
2777 // fold (A+(B-A)) -> B
2778 if (sd_match(N1, m_Sub(m_Value(B), m_Specific(N0))))
2779 return B;
2780
2781 // fold ((B-A)+A) -> B
2782 if (sd_match(N0, m_Sub(m_Value(B), m_Specific(N1))))
2783 return B;
2784
2785 // fold ((A-B)+(C-A)) -> (C-B)
2786 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2788 return DAG.getNode(ISD::SUB, DL, VT, C, B);
2789
2790 // fold ((A-B)+(B-C)) -> (A-C)
2791 if (sd_match(N0, m_Sub(m_Value(A), m_Value(B))) &&
2793 return DAG.getNode(ISD::SUB, DL, VT, A, C);
2794
2795 // fold (A+(B-(A+C))) to (B-C)
2796 // fold (A+(B-(C+A))) to (B-C)
2797 if (sd_match(N1, m_Sub(m_Value(B), m_Add(m_Specific(N0), m_Value(C)))))
2798 return DAG.getNode(ISD::SUB, DL, VT, B, C);
2799
2800 // fold (A+((B-A)+or-C)) to (B+or-C)
2801 if (sd_match(N1,
2803 m_Sub(m_Sub(m_Value(B), m_Specific(N0)), m_Value(C)))))
2804 return DAG.getNode(N1.getOpcode(), DL, VT, B, C);
2805
2806 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2807 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2808 N0->hasOneUse() && N1->hasOneUse()) {
2809 SDValue N00 = N0.getOperand(0);
2810 SDValue N01 = N0.getOperand(1);
2811 SDValue N10 = N1.getOperand(0);
2812 SDValue N11 = N1.getOperand(1);
2813
2815 return DAG.getNode(ISD::SUB, DL, VT,
2816 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2817 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2818 }
2819
2820 // fold (add (umax X, C), -C) --> (usubsat X, C)
2821 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2822 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2823 return (!Max && !Op) ||
2824 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2825 };
2826 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2827 /*AllowUndefs*/ true))
2828 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2829 N0.getOperand(1));
2830 }
2831
2833 return SDValue(N, 0);
2834
2835 if (isOneOrOneSplat(N1)) {
2836 // fold (add (xor a, -1), 1) -> (sub 0, a)
2837 if (isBitwiseNot(N0))
2838 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2839 N0.getOperand(0));
2840
2841 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2842 if (N0.getOpcode() == ISD::ADD) {
2843 SDValue A, Xor;
2844
2845 if (isBitwiseNot(N0.getOperand(0))) {
2846 A = N0.getOperand(1);
2847 Xor = N0.getOperand(0);
2848 } else if (isBitwiseNot(N0.getOperand(1))) {
2849 A = N0.getOperand(0);
2850 Xor = N0.getOperand(1);
2851 }
2852
2853 if (Xor)
2854 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2855 }
2856
2857 // Look for:
2858 // add (add x, y), 1
2859 // And if the target does not like this form then turn into:
2860 // sub y, (xor x, -1)
2861 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2862 N0.hasOneUse() &&
2863 // Limit this to after legalization if the add has wrap flags
2864 (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2865 !N->getFlags().hasNoSignedWrap()))) {
2866 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
2867 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2868 }
2869 }
2870
2871 // (x - y) + -1 -> add (xor y, -1), x
2872 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2873 isAllOnesOrAllOnesSplat(N1, /*AllowUndefs=*/true)) {
2874 SDValue Not = DAG.getNOT(DL, N0.getOperand(1), VT);
2875 return DAG.getNode(ISD::ADD, DL, VT, Not, N0.getOperand(0));
2876 }
2877
2878 // Fold add(mul(add(A, CA), CM), CB) -> add(mul(A, CM), CM*CA+CB).
2879 // This can help if the inner add has multiple uses.
2880 APInt CM, CA;
2881 if (ConstantSDNode *CB = dyn_cast<ConstantSDNode>(N1)) {
2882 if (VT.getScalarSizeInBits() <= 64) {
2884 m_ConstInt(CM)))) &&
2886 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2888 // If all the inputs are nuw, the outputs can be nuw. If all the input
2889 // are _also_ nsw the outputs can be too.
2890 if (N->getFlags().hasNoUnsignedWrap() &&
2891 N0->getFlags().hasNoUnsignedWrap() &&
2893 Flags.setNoUnsignedWrap(true);
2894 if (N->getFlags().hasNoSignedWrap() &&
2895 N0->getFlags().hasNoSignedWrap() &&
2897 Flags.setNoSignedWrap(true);
2898 }
2899 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2900 DAG.getConstant(CM, DL, VT), Flags);
2901 return DAG.getNode(
2902 ISD::ADD, DL, VT, Mul,
2903 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2904 }
2905 // Also look in case there is an intermediate add.
2906 if (sd_match(N0, m_OneUse(m_Add(
2908 m_ConstInt(CM))),
2909 m_Value(B)))) &&
2911 (CA * CM + CB->getAPIntValue()).getSExtValue())) {
2913 // If all the inputs are nuw, the outputs can be nuw. If all the input
2914 // are _also_ nsw the outputs can be too.
2915 SDValue OMul =
2916 N0.getOperand(0) == B ? N0.getOperand(1) : N0.getOperand(0);
2917 if (N->getFlags().hasNoUnsignedWrap() &&
2918 N0->getFlags().hasNoUnsignedWrap() &&
2919 OMul->getFlags().hasNoUnsignedWrap() &&
2920 OMul.getOperand(0)->getFlags().hasNoUnsignedWrap()) {
2921 Flags.setNoUnsignedWrap(true);
2922 if (N->getFlags().hasNoSignedWrap() &&
2923 N0->getFlags().hasNoSignedWrap() &&
2924 OMul->getFlags().hasNoSignedWrap() &&
2925 OMul.getOperand(0)->getFlags().hasNoSignedWrap())
2926 Flags.setNoSignedWrap(true);
2927 }
2928 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N1), VT, A,
2929 DAG.getConstant(CM, DL, VT), Flags);
2930 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N1), VT, Mul, B, Flags);
2931 return DAG.getNode(
2932 ISD::ADD, DL, VT, Add,
2933 DAG.getConstant(CA * CM + CB->getAPIntValue(), DL, VT), Flags);
2934 }
2935 }
2936 }
2937
2938 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2939 return Combined;
2940
2941 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2942 return Combined;
2943
2944 return SDValue();
2945}
2946
2947// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
2948SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
2949 SDValue N0 = N->getOperand(0);
2950 EVT VT = N0.getValueType();
2951 SDValue A, B;
2952
2953 if (hasOperation(ISD::AVGFLOORU, VT) &&
2956 m_SpecificInt(1))))) {
2957 return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
2958 }
2959 if (hasOperation(ISD::AVGFLOORS, VT) &&
2962 m_SpecificInt(1))))) {
2963 return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
2964 }
2965
2966 return SDValue();
2967}
2968
2969SDValue DAGCombiner::visitADD(SDNode *N) {
2970 SDValue N0 = N->getOperand(0);
2971 SDValue N1 = N->getOperand(1);
2972 EVT VT = N0.getValueType();
2973 SDLoc DL(N);
2974
2975 if (SDValue Combined = visitADDLike(N))
2976 return Combined;
2977
2978 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
2979 return V;
2980
2981 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
2982 return V;
2983
2984 // Try to match AVGFLOOR fixedwidth pattern
2985 if (SDValue V = foldAddToAvg(N, DL))
2986 return V;
2987
2988 // fold (a+b) -> (a|b) iff a and b share no bits.
2989 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2990 DAG.haveNoCommonBitsSet(N0, N1)) {
2992 Flags.setDisjoint(true);
2993 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
2994 }
2995
2996 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2997 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2998 const APInt &C0 = N0->getConstantOperandAPInt(0);
2999 const APInt &C1 = N1->getConstantOperandAPInt(0);
3000 return DAG.getVScale(DL, VT, C0 + C1);
3001 }
3002
3003 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
3004 if (N0.getOpcode() == ISD::ADD &&
3005 N0.getOperand(1).getOpcode() == ISD::VSCALE &&
3006 N1.getOpcode() == ISD::VSCALE) {
3007 const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3008 const APInt &VS1 = N1->getConstantOperandAPInt(0);
3009 SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
3010 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
3011 }
3012
3013 // Fold (add step_vector(c1), step_vector(c2) to step_vector(c1+c2))
3014 if (N0.getOpcode() == ISD::STEP_VECTOR &&
3015 N1.getOpcode() == ISD::STEP_VECTOR) {
3016 const APInt &C0 = N0->getConstantOperandAPInt(0);
3017 const APInt &C1 = N1->getConstantOperandAPInt(0);
3018 APInt NewStep = C0 + C1;
3019 return DAG.getStepVector(DL, VT, NewStep);
3020 }
3021
3022 // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3023 if (N0.getOpcode() == ISD::ADD &&
3025 N1.getOpcode() == ISD::STEP_VECTOR) {
3026 const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3027 const APInt &SV1 = N1->getConstantOperandAPInt(0);
3028 APInt NewStep = SV0 + SV1;
3029 SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3030 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3031 }
3032
3033 return SDValue();
3034}
3035
3036SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3037 unsigned Opcode = N->getOpcode();
3038 SDValue N0 = N->getOperand(0);
3039 SDValue N1 = N->getOperand(1);
3040 EVT VT = N0.getValueType();
3041 bool IsSigned = Opcode == ISD::SADDSAT;
3042 SDLoc DL(N);
3043
3044 // fold (add_sat x, undef) -> -1
3045 if (N0.isUndef() || N1.isUndef())
3046 return DAG.getAllOnesConstant(DL, VT);
3047
3048 // fold (add_sat c1, c2) -> c3
3049 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3050 return C;
3051
3052 // canonicalize constant to RHS
3055 return DAG.getNode(Opcode, DL, VT, N1, N0);
3056
3057 // fold vector ops
3058 if (VT.isVector()) {
3059 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3060 return FoldedVOp;
3061
3062 // fold (add_sat x, 0) -> x, vector edition
3064 return N0;
3065 }
3066
3067 // fold (add_sat x, 0) -> x
3068 if (isNullConstant(N1))
3069 return N0;
3070
3071 // If it cannot overflow, transform into an add.
3072 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3073 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3074
3075 return SDValue();
3076}
3077
3079 bool ForceCarryReconstruction = false) {
3080 bool Masked = false;
3081
3082 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3083 while (true) {
3084 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3085 V = V.getOperand(0);
3086 continue;
3087 }
3088
3089 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3090 if (ForceCarryReconstruction)
3091 return V;
3092
3093 Masked = true;
3094 V = V.getOperand(0);
3095 continue;
3096 }
3097
3098 if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3099 return V;
3100
3101 break;
3102 }
3103
3104 // If this is not a carry, return.
3105 if (V.getResNo() != 1)
3106 return SDValue();
3107
3108 if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3109 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3110 return SDValue();
3111
3112 EVT VT = V->getValueType(0);
3113 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3114 return SDValue();
3115
3116 // If the result is masked, then no matter what kind of bool it is we can
3117 // return. If it isn't, then we need to make sure the bool type is either 0 or
3118 // 1 and not other values.
3119 if (Masked ||
3120 TLI.getBooleanContents(V.getValueType()) ==
3122 return V;
3123
3124 return SDValue();
3125}
3126
3127/// Given the operands of an add/sub operation, see if the 2nd operand is a
3128/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3129/// the opcode and bypass the mask operation.
3130static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3131 SelectionDAG &DAG, const SDLoc &DL) {
3132 if (N1.getOpcode() == ISD::ZERO_EXTEND)
3133 N1 = N1.getOperand(0);
3134
3135 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3136 return SDValue();
3137
3138 EVT VT = N0.getValueType();
3139 SDValue N10 = N1.getOperand(0);
3140 if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3141 N10 = N10.getOperand(0);
3142
3143 if (N10.getValueType() != VT)
3144 return SDValue();
3145
3146 if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3147 return SDValue();
3148
3149 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3150 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3151 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3152}
3153
3154/// Helper for doing combines based on N0 and N1 being added to each other.
3155SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3156 SDNode *LocReference) {
3157 EVT VT = N0.getValueType();
3158 SDLoc DL(LocReference);
3159
3160 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3161 SDValue Y, N;
3162 if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N))))
3163 return DAG.getNode(ISD::SUB, DL, VT, N0,
3164 DAG.getNode(ISD::SHL, DL, VT, Y, N));
3165
3166 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3167 return V;
3168
3169 // Look for:
3170 // add (add x, 1), y
3171 // And if the target does not like this form then turn into:
3172 // sub y, (xor x, -1)
3173 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3174 N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3175 // Limit this to after legalization if the add has wrap flags
3176 (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3177 !N0->getFlags().hasNoSignedWrap()))) {
3178 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
3179 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3180 }
3181
3182 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3183 // Hoist one-use subtraction by non-opaque constant:
3184 // (x - C) + y -> (x + y) - C
3185 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3186 if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3187 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3188 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3189 }
3190 // Hoist one-use subtraction from non-opaque constant:
3191 // (C - x) + y -> (y - x) + C
3192 if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3193 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3194 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3195 }
3196 }
3197
3198 // add (mul x, C), x -> mul x, C+1
3199 if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3200 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3201 N0.hasOneUse()) {
3202 SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3203 DAG.getConstant(1, DL, VT));
3204 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3205 }
3206
3207 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3208 // rather than 'add 0/-1' (the zext should get folded).
3209 // add (sext i1 Y), X --> sub X, (zext i1 Y)
3210 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3211 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3213 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3214 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3215 }
3216
3217 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3218 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3219 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3220 if (TN->getVT() == MVT::i1) {
3221 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3222 DAG.getConstant(1, DL, VT));
3223 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3224 }
3225 }
3226
3227 // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3228 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3229 N1.getResNo() == 0)
3230 return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3231 N0, N1.getOperand(0), N1.getOperand(2));
3232
3233 // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3235 if (SDValue Carry = getAsCarry(TLI, N1))
3236 return DAG.getNode(ISD::UADDO_CARRY, DL,
3237 DAG.getVTList(VT, Carry.getValueType()), N0,
3238 DAG.getConstant(0, DL, VT), Carry);
3239
3240 return SDValue();
3241}
3242
3243SDValue DAGCombiner::visitADDC(SDNode *N) {
3244 SDValue N0 = N->getOperand(0);
3245 SDValue N1 = N->getOperand(1);
3246 EVT VT = N0.getValueType();
3247 SDLoc DL(N);
3248
3249 // If the flag result is dead, turn this into an ADD.
3250 if (!N->hasAnyUseOfValue(1))
3251 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3252 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3253
3254 // canonicalize constant to RHS.
3255 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3256 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3257 if (N0C && !N1C)
3258 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3259
3260 // fold (addc x, 0) -> x + no carry out
3261 if (isNullConstant(N1))
3262 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3263 DL, MVT::Glue));
3264
3265 // If it cannot overflow, transform into an add.
3267 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3268 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3269
3270 return SDValue();
3271}
3272
3273/**
3274 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3275 * then the flip also occurs if computing the inverse is the same cost.
3276 * This function returns an empty SDValue in case it cannot flip the boolean
3277 * without increasing the cost of the computation. If you want to flip a boolean
3278 * no matter what, use DAG.getLogicalNOT.
3279 */
3281 const TargetLowering &TLI,
3282 bool Force) {
3283 if (Force && isa<ConstantSDNode>(V))
3284 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3285
3286 if (V.getOpcode() != ISD::XOR)
3287 return SDValue();
3288
3289 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3290 if (!Const)
3291 return SDValue();
3292
3293 EVT VT = V.getValueType();
3294
3295 bool IsFlip = false;
3296 switch(TLI.getBooleanContents(VT)) {
3298 IsFlip = Const->isOne();
3299 break;
3301 IsFlip = Const->isAllOnes();
3302 break;
3304 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3305 break;
3306 }
3307
3308 if (IsFlip)
3309 return V.getOperand(0);
3310 if (Force)
3311 return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3312 return SDValue();
3313}
3314
3315SDValue DAGCombiner::visitADDO(SDNode *N) {
3316 SDValue N0 = N->getOperand(0);
3317 SDValue N1 = N->getOperand(1);
3318 EVT VT = N0.getValueType();
3319 bool IsSigned = (ISD::SADDO == N->getOpcode());
3320
3321 EVT CarryVT = N->getValueType(1);
3322 SDLoc DL(N);
3323
3324 // If the flag result is dead, turn this into an ADD.
3325 if (!N->hasAnyUseOfValue(1))
3326 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3327 DAG.getUNDEF(CarryVT));
3328
3329 // canonicalize constant to RHS.
3332 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3333
3334 // fold (addo x, 0) -> x + no carry out
3335 if (isNullOrNullSplat(N1))
3336 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3337
3338 // If it cannot overflow, transform into an add.
3339 if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3340 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3341 DAG.getConstant(0, DL, CarryVT));
3342
3343 if (IsSigned) {
3344 // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3345 if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3346 return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3347 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3348 } else {
3349 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3350 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3351 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3352 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3353 return CombineTo(
3354 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3355 }
3356
3357 if (SDValue Combined = visitUADDOLike(N0, N1, N))
3358 return Combined;
3359
3360 if (SDValue Combined = visitUADDOLike(N1, N0, N))
3361 return Combined;
3362 }
3363
3364 return SDValue();
3365}
3366
3367SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3368 EVT VT = N0.getValueType();
3369 if (VT.isVector())
3370 return SDValue();
3371
3372 // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3373 // If Y + 1 cannot overflow.
3374 if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3375 SDValue Y = N1.getOperand(0);
3376 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3378 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3379 N1.getOperand(2));
3380 }
3381
3382 // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3384 if (SDValue Carry = getAsCarry(TLI, N1))
3385 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3386 DAG.getConstant(0, SDLoc(N), VT), Carry);
3387
3388 return SDValue();
3389}
3390
3391SDValue DAGCombiner::visitADDE(SDNode *N) {
3392 SDValue N0 = N->getOperand(0);
3393 SDValue N1 = N->getOperand(1);
3394 SDValue CarryIn = N->getOperand(2);
3395
3396 // canonicalize constant to RHS
3397 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3398 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3399 if (N0C && !N1C)
3400 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3401 N1, N0, CarryIn);
3402
3403 // fold (adde x, y, false) -> (addc x, y)
3404 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3405 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3406
3407 return SDValue();
3408}
3409
3410SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3411 SDValue N0 = N->getOperand(0);
3412 SDValue N1 = N->getOperand(1);
3413 SDValue CarryIn = N->getOperand(2);
3414 SDLoc DL(N);
3415
3416 // canonicalize constant to RHS
3417 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3418 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3419 if (N0C && !N1C)
3420 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3421
3422 // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3423 if (isNullConstant(CarryIn)) {
3424 if (!LegalOperations ||
3425 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3426 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3427 }
3428
3429 // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3430 if (isNullConstant(N0) && isNullConstant(N1)) {
3431 EVT VT = N0.getValueType();
3432 EVT CarryVT = CarryIn.getValueType();
3433 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3434 AddToWorklist(CarryExt.getNode());
3435 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3436 DAG.getConstant(1, DL, VT)),
3437 DAG.getConstant(0, DL, CarryVT));
3438 }
3439
3440 if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3441 return Combined;
3442
3443 if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3444 return Combined;
3445
3446 // We want to avoid useless duplication.
3447 // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3448 // not a binary operation, this is not really possible to leverage this
3449 // existing mechanism for it. However, if more operations require the same
3450 // deduplication logic, then it may be worth generalize.
3451 SDValue Ops[] = {N1, N0, CarryIn};
3452 SDNode *CSENode =
3453 DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3454 if (CSENode)
3455 return SDValue(CSENode, 0);
3456
3457 return SDValue();
3458}
3459
3460/**
3461 * If we are facing some sort of diamond carry propagation pattern try to
3462 * break it up to generate something like:
3463 * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3464 *
3465 * The end result is usually an increase in operation required, but because the
3466 * carry is now linearized, other transforms can kick in and optimize the DAG.
3467 *
3468 * Patterns typically look something like
3469 * (uaddo A, B)
3470 * / \
3471 * Carry Sum
3472 * | \
3473 * | (uaddo_carry *, 0, Z)
3474 * | /
3475 * \ Carry
3476 * | /
3477 * (uaddo_carry X, *, *)
3478 *
3479 * But numerous variation exist. Our goal is to identify A, B, X and Z and
3480 * produce a combine with a single path for carry propagation.
3481 */
3483 SelectionDAG &DAG, SDValue X,
3484 SDValue Carry0, SDValue Carry1,
3485 SDNode *N) {
3486 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3487 return SDValue();
3488 if (Carry1.getOpcode() != ISD::UADDO)
3489 return SDValue();
3490
3491 SDValue Z;
3492
3493 /**
3494 * First look for a suitable Z. It will present itself in the form of
3495 * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3496 */
3497 if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3498 isNullConstant(Carry0.getOperand(1))) {
3499 Z = Carry0.getOperand(2);
3500 } else if (Carry0.getOpcode() == ISD::UADDO &&
3501 isOneConstant(Carry0.getOperand(1))) {
3502 EVT VT = Carry0->getValueType(1);
3503 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3504 } else {
3505 // We couldn't find a suitable Z.
3506 return SDValue();
3507 }
3508
3509
3510 auto cancelDiamond = [&](SDValue A,SDValue B) {
3511 SDLoc DL(N);
3512 SDValue NewY =
3513 DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3514 Combiner.AddToWorklist(NewY.getNode());
3515 return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3516 DAG.getConstant(0, DL, X.getValueType()),
3517 NewY.getValue(1));
3518 };
3519
3520 /**
3521 * (uaddo A, B)
3522 * |
3523 * Sum
3524 * |
3525 * (uaddo_carry *, 0, Z)
3526 */
3527 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3528 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3529 }
3530
3531 /**
3532 * (uaddo_carry A, 0, Z)
3533 * |
3534 * Sum
3535 * |
3536 * (uaddo *, B)
3537 */
3538 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3539 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3540 }
3541
3542 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3543 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3544 }
3545
3546 return SDValue();
3547}
3548
3549// If we are facing some sort of diamond carry/borrow in/out pattern try to
3550// match patterns like:
3551//
3552// (uaddo A, B) CarryIn
3553// | \ |
3554// | \ |
3555// PartialSum PartialCarryOutX /
3556// | | /
3557// | ____|____________/
3558// | / |
3559// (uaddo *, *) \________
3560// | \ \
3561// | \ |
3562// | PartialCarryOutY |
3563// | \ |
3564// | \ /
3565// AddCarrySum | ______/
3566// | /
3567// CarryOut = (or *, *)
3568//
3569// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3570//
3571// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3572//
3573// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3574// with a single path for carry/borrow out propagation.
3576 SDValue N0, SDValue N1, SDNode *N) {
3577 SDValue Carry0 = getAsCarry(TLI, N0);
3578 if (!Carry0)
3579 return SDValue();
3580 SDValue Carry1 = getAsCarry(TLI, N1);
3581 if (!Carry1)
3582 return SDValue();
3583
3584 unsigned Opcode = Carry0.getOpcode();
3585 if (Opcode != Carry1.getOpcode())
3586 return SDValue();
3587 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3588 return SDValue();
3589 // Guarantee identical type of CarryOut
3590 EVT CarryOutType = N->getValueType(0);
3591 if (CarryOutType != Carry0.getValue(1).getValueType() ||
3592 CarryOutType != Carry1.getValue(1).getValueType())
3593 return SDValue();
3594
3595 // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3596 // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3597 if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3598 std::swap(Carry0, Carry1);
3599
3600 // Check if nodes are connected in expected way.
3601 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3602 Carry1.getOperand(1) != Carry0.getValue(0))
3603 return SDValue();
3604
3605 // The carry in value must be on the righthand side for subtraction.
3606 unsigned CarryInOperandNum =
3607 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3608 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3609 return SDValue();
3610 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3611
3612 unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3613 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3614 return SDValue();
3615
3616 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3617 CarryIn = getAsCarry(TLI, CarryIn, true);
3618 if (!CarryIn)
3619 return SDValue();
3620
3621 SDLoc DL(N);
3622 CarryIn = DAG.getBoolExtOrTrunc(CarryIn, DL, Carry1->getValueType(1),
3623 Carry1->getValueType(0));
3624 SDValue Merged =
3625 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3626 Carry0.getOperand(1), CarryIn);
3627
3628 // Please note that because we have proven that the result of the UADDO/USUBO
3629 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3630 // therefore prove that if the first UADDO/USUBO overflows, the second
3631 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3632 // maximum value.
3633 //
3634 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3635 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3636 //
3637 // This is important because it means that OR and XOR can be used to merge
3638 // carry flags; and that AND can return a constant zero.
3639 //
3640 // TODO: match other operations that can merge flags (ADD, etc)
3641 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3642 if (N->getOpcode() == ISD::AND)
3643 return DAG.getConstant(0, DL, CarryOutType);
3644 return Merged.getValue(1);
3645}
3646
3647SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3648 SDValue CarryIn, SDNode *N) {
3649 // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3650 // carry.
3651 if (isBitwiseNot(N0))
3652 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3653 SDLoc DL(N);
3654 SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3655 N0.getOperand(0), NotC);
3656 return CombineTo(
3657 N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3658 }
3659
3660 // Iff the flag result is dead:
3661 // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3662 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3663 // or the dependency between the instructions.
3664 if ((N0.getOpcode() == ISD::ADD ||
3665 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3666 N0.getValue(1) != CarryIn)) &&
3667 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3668 return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3669 N0.getOperand(0), N0.getOperand(1), CarryIn);
3670
3671 /**
3672 * When one of the uaddo_carry argument is itself a carry, we may be facing
3673 * a diamond carry propagation. In which case we try to transform the DAG
3674 * to ensure linear carry propagation if that is possible.
3675 */
3676 if (auto Y = getAsCarry(TLI, N1)) {
3677 // Because both are carries, Y and Z can be swapped.
3678 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3679 return R;
3680 if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3681 return R;
3682 }
3683
3684 return SDValue();
3685}
3686
3687SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3688 SDValue CarryIn, SDNode *N) {
3689 // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3690 if (isBitwiseNot(N0)) {
3691 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3692 return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3693 N0.getOperand(0), NotC);
3694 }
3695
3696 return SDValue();
3697}
3698
3699SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3700 SDValue N0 = N->getOperand(0);
3701 SDValue N1 = N->getOperand(1);
3702 SDValue CarryIn = N->getOperand(2);
3703 SDLoc DL(N);
3704
3705 // canonicalize constant to RHS
3706 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3707 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3708 if (N0C && !N1C)
3709 return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3710
3711 // fold (saddo_carry x, y, false) -> (saddo x, y)
3712 if (isNullConstant(CarryIn)) {
3713 if (!LegalOperations ||
3714 TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3715 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3716 }
3717
3718 if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3719 return Combined;
3720
3721 if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3722 return Combined;
3723
3724 return SDValue();
3725}
3726
3727// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3728// clamp/truncation if necessary.
3729static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3730 SDValue RHS, SelectionDAG &DAG,
3731 const SDLoc &DL) {
3732 assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3733 "Illegal truncation");
3734
3735 if (DstVT == SrcVT)
3736 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3737
3738 // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3739 // clamping RHS.
3741 DstVT.getScalarSizeInBits());
3742 if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3743 return SDValue();
3744
3745 SDValue SatLimit =
3747 DstVT.getScalarSizeInBits()),
3748 DL, SrcVT);
3749 RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3750 RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3751 LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3752 return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3753}
3754
3755// Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3756// usubsat(a,b), optionally as a truncated type.
3757SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N, const SDLoc &DL) {
3758 if (N->getOpcode() != ISD::SUB ||
3759 !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3760 return SDValue();
3761
3762 EVT SubVT = N->getValueType(0);
3763 SDValue Op0 = N->getOperand(0);
3764 SDValue Op1 = N->getOperand(1);
3765
3766 // Try to find umax(a,b) - b or a - umin(a,b) patterns
3767 // they may be converted to usubsat(a,b).
3768 if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3769 SDValue MaxLHS = Op0.getOperand(0);
3770 SDValue MaxRHS = Op0.getOperand(1);
3771 if (MaxLHS == Op1)
3772 return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, DL);
3773 if (MaxRHS == Op1)
3774 return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, DL);
3775 }
3776
3777 if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3778 SDValue MinLHS = Op1.getOperand(0);
3779 SDValue MinRHS = Op1.getOperand(1);
3780 if (MinLHS == Op0)
3781 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, DL);
3782 if (MinRHS == Op0)
3783 return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, DL);
3784 }
3785
3786 // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3787 if (Op1.getOpcode() == ISD::TRUNCATE &&
3788 Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3789 Op1.getOperand(0).hasOneUse()) {
3790 SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3791 SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3792 if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3793 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3794 DAG, DL);
3795 if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3796 return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3797 DAG, DL);
3798 }
3799
3800 return SDValue();
3801}
3802
3803// Since it may not be valid to emit a fold to zero for vector initializers
3804// check if we can before folding.
3805static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3806 SelectionDAG &DAG, bool LegalOperations) {
3807 if (!VT.isVector())
3808 return DAG.getConstant(0, DL, VT);
3809 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3810 return DAG.getConstant(0, DL, VT);
3811 return SDValue();
3812}
3813
3814SDValue DAGCombiner::visitSUB(SDNode *N) {
3815 SDValue N0 = N->getOperand(0);
3816 SDValue N1 = N->getOperand(1);
3817 EVT VT = N0.getValueType();
3818 unsigned BitWidth = VT.getScalarSizeInBits();
3819 SDLoc DL(N);
3820
3821 auto PeekThroughFreeze = [](SDValue N) {
3822 if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3823 return N->getOperand(0);
3824 return N;
3825 };
3826
3827 // fold (sub x, x) -> 0
3828 // FIXME: Refactor this and xor and other similar operations together.
3829 if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3830 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3831
3832 // fold (sub c1, c2) -> c3
3833 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3834 return C;
3835
3836 // fold vector ops
3837 if (VT.isVector()) {
3838 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3839 return FoldedVOp;
3840
3841 // fold (sub x, 0) -> x, vector edition
3843 return N0;
3844 }
3845
3846 if (SDValue NewSel = foldBinOpIntoSelect(N))
3847 return NewSel;
3848
3849 // fold (sub x, c) -> (add x, -c)
3851 return DAG.getNode(ISD::ADD, DL, VT, N0,
3852 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3853
3854 if (isNullOrNullSplat(N0)) {
3855 // Right-shifting everything out but the sign bit followed by negation is
3856 // the same as flipping arithmetic/logical shift type without the negation:
3857 // -(X >>u 31) -> (X >>s 31)
3858 // -(X >>s 31) -> (X >>u 31)
3859 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3861 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3862 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3863 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3864 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3865 }
3866 }
3867
3868 // 0 - X --> 0 if the sub is NUW.
3869 if (N->getFlags().hasNoUnsignedWrap())
3870 return N0;
3871
3873 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3874 // N1 must be 0 because negating the minimum signed value is undefined.
3875 if (N->getFlags().hasNoSignedWrap())
3876 return N0;
3877
3878 // 0 - X --> X if X is 0 or the minimum signed value.
3879 return N1;
3880 }
3881
3882 // Convert 0 - abs(x).
3883 if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3885 if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3886 return Result;
3887
3888 // Fold neg(splat(neg(x)) -> splat(x)
3889 if (VT.isVector()) {
3890 SDValue N1S = DAG.getSplatValue(N1, true);
3891 if (N1S && N1S.getOpcode() == ISD::SUB &&
3892 isNullConstant(N1S.getOperand(0)))
3893 return DAG.getSplat(VT, DL, N1S.getOperand(1));
3894 }
3895 }
3896
3897 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3899 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3900
3901 // fold (A - (0-B)) -> A+B
3902 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3903 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3904
3905 // fold A-(A-B) -> B
3906 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3907 return N1.getOperand(1);
3908
3909 // fold (A+B)-A -> B
3910 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3911 return N0.getOperand(1);
3912
3913 // fold (A+B)-B -> A
3914 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3915 return N0.getOperand(0);
3916
3917 // fold (A+C1)-C2 -> A+(C1-C2)
3918 if (N0.getOpcode() == ISD::ADD) {
3919 SDValue N01 = N0.getOperand(1);
3920 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3921 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3922 }
3923
3924 // fold C2-(A+C1) -> (C2-C1)-A
3925 if (N1.getOpcode() == ISD::ADD) {
3926 SDValue N11 = N1.getOperand(1);
3927 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3928 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3929 }
3930
3931 // fold (A-C1)-C2 -> A-(C1+C2)
3932 if (N0.getOpcode() == ISD::SUB) {
3933 SDValue N01 = N0.getOperand(1);
3934 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3935 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3936 }
3937
3938 // fold (c1-A)-c2 -> (c1-c2)-A
3939 if (N0.getOpcode() == ISD::SUB) {
3940 SDValue N00 = N0.getOperand(0);
3941 if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3942 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3943 }
3944
3945 SDValue A, B, C;
3946
3947 // fold ((A+(B+C))-B) -> A+C
3948 if (sd_match(N0, m_Add(m_Value(A), m_Add(m_Specific(N1), m_Value(C)))))
3949 return DAG.getNode(ISD::ADD, DL, VT, A, C);
3950
3951 // fold ((A+(B-C))-B) -> A-C
3952 if (sd_match(N0, m_Add(m_Value(A), m_Sub(m_Specific(N1), m_Value(C)))))
3953 return DAG.getNode(ISD::SUB, DL, VT, A, C);
3954
3955 // fold ((A-(B-C))-C) -> A-B
3956 if (sd_match(N0, m_Sub(m_Value(A), m_Sub(m_Value(B), m_Specific(N1)))))
3957 return DAG.getNode(ISD::SUB, DL, VT, A, B);
3958
3959 // fold (A-(B-C)) -> A+(C-B)
3960 if (sd_match(N1, m_OneUse(m_Sub(m_Value(B), m_Value(C)))))
3961 return DAG.getNode(ISD::ADD, DL, VT, N0,
3962 DAG.getNode(ISD::SUB, DL, VT, C, B));
3963
3964 // A - (A & B) -> A & (~B)
3965 if (sd_match(N1, m_And(m_Specific(N0), m_Value(B))) &&
3966 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true)))
3967 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getNOT(DL, B, VT));
3968
3969 // fold (A - (-B * C)) -> (A + (B * C))
3970 if (sd_match(N1, m_OneUse(m_Mul(m_Neg(m_Value(B)), m_Value(C)))))
3971 return DAG.getNode(ISD::ADD, DL, VT, N0,
3972 DAG.getNode(ISD::MUL, DL, VT, B, C));
3973
3974 // If either operand of a sub is undef, the result is undef
3975 if (N0.isUndef())
3976 return N0;
3977 if (N1.isUndef())
3978 return N1;
3979
3980 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DL, DAG))
3981 return V;
3982
3983 if (SDValue V = foldAddSubOfSignBit(N, DL, DAG))
3984 return V;
3985
3986 // Try to match AVGCEIL fixedwidth pattern
3987 if (SDValue V = foldSubToAvg(N, DL))
3988 return V;
3989
3990 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, DL))
3991 return V;
3992
3993 if (SDValue V = foldSubToUSubSat(VT, N, DL))
3994 return V;
3995
3996 // (A - B) - 1 -> add (xor B, -1), A
3998 return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));
3999
4000 // Look for:
4001 // sub y, (xor x, -1)
4002 // And if the target does not like this form then turn into:
4003 // add (add x, y), 1
4004 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4005 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4006 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4007 }
4008
4009 // Hoist one-use addition by non-opaque constant:
4010 // (x + C) - y -> (x - y) + C
4011 if (!reassociationCanBreakAddressingModePattern(ISD::SUB, DL, N, N0, N1) &&
4012 N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4013 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4014 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4015 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4016 }
4017 // y - (x + C) -> (y - x) - C
4018 if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4019 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4020 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4021 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4022 }
4023 // (x - C) - y -> (x - y) - C
4024 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4025 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4026 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4027 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4028 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4029 }
4030 // (C - x) - y -> C - (x + y)
4031 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4032 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4033 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4034 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4035 }
4036
4037 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4038 // rather than 'sub 0/1' (the sext should get folded).
4039 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4040 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4041 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4042 TLI.getBooleanContents(VT) ==
4044 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4045 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4046 }
4047
4048 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
4049 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
4050 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
4051 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
4052 SDValue S0 = N1.getOperand(0);
4053 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
4055 if (C->getAPIntValue() == (BitWidth - 1))
4056 return DAG.getNode(ISD::ABS, DL, VT, S0);
4057 }
4058 }
4059
4060 // If the relocation model supports it, consider symbol offsets.
4061 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4062 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4063 // fold (sub Sym+c1, Sym+c2) -> c1-c2
4064 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4065 if (GA->getGlobal() == GB->getGlobal())
4066 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4067 DL, VT);
4068 }
4069
4070 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4071 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4072 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4073 if (TN->getVT() == MVT::i1) {
4074 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4075 DAG.getConstant(1, DL, VT));
4076 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4077 }
4078 }
4079
4080 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4081 if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4082 const APInt &IntVal = N1.getConstantOperandAPInt(0);
4083 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4084 }
4085
4086 // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4087 if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4088 APInt NewStep = -N1.getConstantOperandAPInt(0);
4089 return DAG.getNode(ISD::ADD, DL, VT, N0,
4090 DAG.getStepVector(DL, VT, NewStep));
4091 }
4092
4093 // Prefer an add for more folding potential and possibly better codegen:
4094 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4095 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4096 SDValue ShAmt = N1.getOperand(1);
4097 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4098 if (ShAmtC && ShAmtC->getAPIntValue() == (BitWidth - 1)) {
4099 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4100 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4101 }
4102 }
4103
4104 // As with the previous fold, prefer add for more folding potential.
4105 // Subtracting SMIN/0 is the same as adding SMIN/0:
4106 // N0 - (X << BW-1) --> N0 + (X << BW-1)
4107 if (N1.getOpcode() == ISD::SHL) {
4109 if (ShlC && ShlC->getAPIntValue() == (BitWidth - 1))
4110 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4111 }
4112
4113 // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4114 if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4115 N0.getResNo() == 0 && N0.hasOneUse())
4116 return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4117 N0.getOperand(0), N1, N0.getOperand(2));
4118
4120 // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
4121 if (SDValue Carry = getAsCarry(TLI, N0)) {
4122 SDValue X = N1;
4123 SDValue Zero = DAG.getConstant(0, DL, VT);
4124 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4125 return DAG.getNode(ISD::UADDO_CARRY, DL,
4126 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4127 Carry);
4128 }
4129 }
4130
4131 // If there's no chance of borrowing from adjacent bits, then sub is xor:
4132 // sub C0, X --> xor X, C0
4133 if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4134 if (!C0->isOpaque()) {
4135 const APInt &C0Val = C0->getAPIntValue();
4136 const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4137 if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4138 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4139 }
4140 }
4141
4142 // smax(a,b) - smin(a,b) --> abds(a,b)
4143 if (hasOperation(ISD::ABDS, VT) &&
4144 sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
4146 return DAG.getNode(ISD::ABDS, DL, VT, A, B);
4147
4148 // umax(a,b) - umin(a,b) --> abdu(a,b)
4149 if (hasOperation(ISD::ABDU, VT) &&
4150 sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
4152 return DAG.getNode(ISD::ABDU, DL, VT, A, B);
4153
4154 return SDValue();
4155}
4156
4157SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4158 unsigned Opcode = N->getOpcode();
4159 SDValue N0 = N->getOperand(0);
4160 SDValue N1 = N->getOperand(1);
4161 EVT VT = N0.getValueType();
4162 bool IsSigned = Opcode == ISD::SSUBSAT;
4163 SDLoc DL(N);
4164
4165 // fold (sub_sat x, undef) -> 0
4166 if (N0.isUndef() || N1.isUndef())
4167 return DAG.getConstant(0, DL, VT);
4168
4169 // fold (sub_sat x, x) -> 0
4170 if (N0 == N1)
4171 return DAG.getConstant(0, DL, VT);
4172
4173 // fold (sub_sat c1, c2) -> c3
4174 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4175 return C;
4176
4177 // fold vector ops
4178 if (VT.isVector()) {
4179 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4180 return FoldedVOp;
4181
4182 // fold (sub_sat x, 0) -> x, vector edition
4184 return N0;
4185 }
4186
4187 // fold (sub_sat x, 0) -> x
4188 if (isNullConstant(N1))
4189 return N0;
4190
4191 // If it cannot overflow, transform into an sub.
4192 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4193 return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4194
4195 return SDValue();
4196}
4197
4198SDValue DAGCombiner::visitSUBC(SDNode *N) {
4199 SDValue N0 = N->getOperand(0);
4200 SDValue N1 = N->getOperand(1);
4201 EVT VT = N0.getValueType();
4202 SDLoc DL(N);
4203
4204 // If the flag result is dead, turn this into an SUB.
4205 if (!N->hasAnyUseOfValue(1))
4206 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4207 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4208
4209 // fold (subc x, x) -> 0 + no borrow
4210 if (N0 == N1)
4211 return CombineTo(N, DAG.getConstant(0, DL, VT),
4212 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4213
4214 // fold (subc x, 0) -> x + no borrow
4215 if (isNullConstant(N1))
4216 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4217
4218 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4219 if (isAllOnesConstant(N0))
4220 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4221 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4222
4223 return SDValue();
4224}
4225
4226SDValue DAGCombiner::visitSUBO(SDNode *N) {
4227 SDValue N0 = N->getOperand(0);
4228 SDValue N1 = N->getOperand(1);
4229 EVT VT = N0.getValueType();
4230 bool IsSigned = (ISD::SSUBO == N->getOpcode());
4231
4232 EVT CarryVT = N->getValueType(1);
4233 SDLoc DL(N);
4234
4235 // If the flag result is dead, turn this into an SUB.
4236 if (!N->hasAnyUseOfValue(1))
4237 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4238 DAG.getUNDEF(CarryVT));
4239
4240 // fold (subo x, x) -> 0 + no borrow
4241 if (N0 == N1)
4242 return CombineTo(N, DAG.getConstant(0, DL, VT),
4243 DAG.getConstant(0, DL, CarryVT));
4244
4245 // fold (subox, c) -> (addo x, -c)
4247 if (IsSigned && !N1C->isMinSignedValue())
4248 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4249 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4250
4251 // fold (subo x, 0) -> x + no borrow
4252 if (isNullOrNullSplat(N1))
4253 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4254
4255 // If it cannot overflow, transform into an sub.
4256 if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4257 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4258 DAG.getConstant(0, DL, CarryVT));
4259
4260 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4261 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4262 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4263 DAG.getConstant(0, DL, CarryVT));
4264
4265 return SDValue();
4266}
4267
4268SDValue DAGCombiner::visitSUBE(SDNode *N) {
4269 SDValue N0 = N->getOperand(0);
4270 SDValue N1 = N->getOperand(1);
4271 SDValue CarryIn = N->getOperand(2);
4272
4273 // fold (sube x, y, false) -> (subc x, y)
4274 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4275 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4276
4277 return SDValue();
4278}
4279
4280SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4281 SDValue N0 = N->getOperand(0);
4282 SDValue N1 = N->getOperand(1);
4283 SDValue CarryIn = N->getOperand(2);
4284
4285 // fold (usubo_carry x, y, false) -> (usubo x, y)
4286 if (isNullConstant(CarryIn)) {
4287 if (!LegalOperations ||
4288 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4289 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4290 }
4291
4292 return SDValue();
4293}
4294
4295SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4296 SDValue N0 = N->getOperand(0);
4297 SDValue N1 = N->getOperand(1);
4298 SDValue CarryIn = N->getOperand(2);
4299
4300 // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4301 if (isNullConstant(CarryIn)) {
4302 if (!LegalOperations ||
4303 TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4304 return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4305 }
4306
4307 return SDValue();
4308}
4309
4310// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4311// UMULFIXSAT here.
4312SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4313 SDValue N0 = N->getOperand(0);
4314 SDValue N1 = N->getOperand(1);
4315 SDValue Scale = N->getOperand(2);
4316 EVT VT = N0.getValueType();
4317
4318 // fold (mulfix x, undef, scale) -> 0
4319 if (N0.isUndef() || N1.isUndef())
4320 return DAG.getConstant(0, SDLoc(N), VT);
4321
4322 // Canonicalize constant to RHS (vector doesn't have to splat)
4325 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4326
4327 // fold (mulfix x, 0, scale) -> 0
4328 if (isNullConstant(N1))
4329 return DAG.getConstant(0, SDLoc(N), VT);
4330
4331 return SDValue();
4332}
4333
4334SDValue DAGCombiner::visitMUL(SDNode *N) {
4335 SDValue N0 = N->getOperand(0);
4336 SDValue N1 = N->getOperand(1);
4337 EVT VT = N0.getValueType();
4338 SDLoc DL(N);
4339
4340 // fold (mul x, undef) -> 0
4341 if (N0.isUndef() || N1.isUndef())
4342 return DAG.getConstant(0, DL, VT);
4343
4344 // fold (mul c1, c2) -> c1*c2
4345 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4346 return C;
4347
4348 // canonicalize constant to RHS (vector doesn't have to splat)
4351 return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
4352
4353 bool N1IsConst = false;
4354 bool N1IsOpaqueConst = false;
4355 APInt ConstValue1;
4356
4357 // fold vector ops
4358 if (VT.isVector()) {
4359 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4360 return FoldedVOp;
4361
4362 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4363 assert((!N1IsConst ||
4364 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
4365 "Splat APInt should be element width");
4366 } else {
4367 N1IsConst = isa<ConstantSDNode>(N1);
4368 if (N1IsConst) {
4369 ConstValue1 = N1->getAsAPIntVal();
4370 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4371 }
4372 }
4373
4374 // fold (mul x, 0) -> 0
4375 if (N1IsConst && ConstValue1.isZero())
4376 return N1;
4377
4378 // fold (mul x, 1) -> x
4379 if (N1IsConst && ConstValue1.isOne())
4380 return N0;
4381
4382 if (SDValue NewSel = foldBinOpIntoSelect(N))
4383 return NewSel;
4384
4385 // fold (mul x, -1) -> 0-x
4386 if (N1IsConst && ConstValue1.isAllOnes())
4387 return DAG.getNegative(N0, DL, VT);
4388
4389 // fold (mul x, (1 << c)) -> x << c
4390 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4391 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4392 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4393 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4394 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4395 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
4396 }
4397 }
4398
4399 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4400 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4401 unsigned Log2Val = (-ConstValue1).logBase2();
4402 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4403
4404 // FIXME: If the input is something that is easily negated (e.g. a
4405 // single-use add), we should put the negate there.
4406 return DAG.getNode(ISD::SUB, DL, VT,
4407 DAG.getConstant(0, DL, VT),
4408 DAG.getNode(ISD::SHL, DL, VT, N0,
4409 DAG.getConstant(Log2Val, DL, ShiftVT)));
4410 }
4411
4412 // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4413 // hi result is in use in case we hit this mid-legalization.
4414 for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4415 if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4416 SDVTList LoHiVT = DAG.getVTList(VT, VT);
4417 // TODO: Can we match commutable operands with getNodeIfExists?
4418 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4419 if (LoHi->hasAnyUseOfValue(1))
4420 return SDValue(LoHi, 0);
4421 if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4422 if (LoHi->hasAnyUseOfValue(1))
4423 return SDValue(LoHi, 0);
4424 }
4425 }
4426
4427 // Try to transform:
4428 // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4429 // mul x, (2^N + 1) --> add (shl x, N), x
4430 // mul x, (2^N - 1) --> sub (shl x, N), x
4431 // Examples: x * 33 --> (x << 5) + x
4432 // x * 15 --> (x << 4) - x
4433 // x * -33 --> -((x << 5) + x)
4434 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4435 // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4436 // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4437 // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4438 // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4439 // x * 0xf800 --> (x << 16) - (x << 11)
4440 // x * -0x8800 --> -((x << 15) + (x << 11))
4441 // x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4442 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4443 // TODO: We could handle more general decomposition of any constant by
4444 // having the target set a limit on number of ops and making a
4445 // callback to determine that sequence (similar to sqrt expansion).
4446 unsigned MathOp = ISD::DELETED_NODE;
4447 APInt MulC = ConstValue1.abs();
4448 // The constant `2` should be treated as (2^0 + 1).
4449 unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4450 MulC.lshrInPlace(TZeros);
4451 if ((MulC - 1).isPowerOf2())
4452 MathOp = ISD::ADD;
4453 else if ((MulC + 1).isPowerOf2())
4454 MathOp = ISD::SUB;
4455
4456 if (MathOp != ISD::DELETED_NODE) {
4457 unsigned ShAmt =
4458 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4459 ShAmt += TZeros;
4460 assert(ShAmt < VT.getScalarSizeInBits() &&
4461 "multiply-by-constant generated out of bounds shift");
4462 SDValue Shl =
4463 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4464 SDValue R =
4465 TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4466 DAG.getNode(ISD::SHL, DL, VT, N0,
4467 DAG.getConstant(TZeros, DL, VT)))
4468 : DAG.getNode(MathOp, DL, VT, Shl, N0);
4469 if (ConstValue1.isNegative())
4470 R = DAG.getNegative(R, DL, VT);
4471 return R;
4472 }
4473 }
4474
4475 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4476 if (N0.getOpcode() == ISD::SHL) {
4477 SDValue N01 = N0.getOperand(1);
4478 if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4479 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4480 }
4481
4482 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4483 // use.
4484 {
4485 SDValue Sh, Y;
4486
4487 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
4488 if (N0.getOpcode() == ISD::SHL &&
4490 Sh = N0; Y = N1;
4491 } else if (N1.getOpcode() == ISD::SHL &&
4493 N1->hasOneUse()) {
4494 Sh = N1; Y = N0;
4495 }
4496
4497 if (Sh.getNode()) {
4498 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4499 return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4500 }
4501 }
4502
4503 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4504 if (N0.getOpcode() == ISD::ADD &&
4508 return DAG.getNode(
4509 ISD::ADD, DL, VT,
4510 DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4511 DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4512
4513 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4515 if (N0.getOpcode() == ISD::VSCALE && NC1) {
4516 const APInt &C0 = N0.getConstantOperandAPInt(0);
4517 const APInt &C1 = NC1->getAPIntValue();
4518 return DAG.getVScale(DL, VT, C0 * C1);
4519 }
4520
4521 // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4522 APInt MulVal;
4523 if (N0.getOpcode() == ISD::STEP_VECTOR &&
4524 ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4525 const APInt &C0 = N0.getConstantOperandAPInt(0);
4526 APInt NewStep = C0 * MulVal;
4527 return DAG.getStepVector(DL, VT, NewStep);
4528 }
4529
4530 // Fold ((mul x, 0/undef) -> 0,
4531 // (mul x, 1) -> x) -> x)
4532 // -> and(x, mask)
4533 // We can replace vectors with '0' and '1' factors with a clearing mask.
4534 if (VT.isFixedLengthVector()) {
4535 unsigned NumElts = VT.getVectorNumElements();
4536 SmallBitVector ClearMask;
4537 ClearMask.reserve(NumElts);
4538 auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4539 if (!V || V->isZero()) {
4540 ClearMask.push_back(true);
4541 return true;
4542 }
4543 ClearMask.push_back(false);
4544 return V->isOne();
4545 };
4546 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4547 ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4548 assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4549 EVT LegalSVT = N1.getOperand(0).getValueType();
4550 SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4551 SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4553 for (unsigned I = 0; I != NumElts; ++I)
4554 if (ClearMask[I])
4555 Mask[I] = Zero;
4556 return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4557 }
4558 }
4559
4560 // reassociate mul
4561 if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4562 return RMUL;
4563
4564 // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4565 if (SDValue SD =
4566 reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4567 return SD;
4568
4569 // Simplify the operands using demanded-bits information.
4571 return SDValue(N, 0);
4572
4573 return SDValue();
4574}
4575
4576/// Return true if divmod libcall is available.
4578 const TargetLowering &TLI) {
4579 RTLIB::Libcall LC;
4580 EVT NodeType = Node->getValueType(0);
4581 if (!NodeType.isSimple())
4582 return false;
4583 switch (NodeType.getSimpleVT().SimpleTy) {
4584 default: return false; // No libcall for vector types.
4585 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
4586 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4587 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4588 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4589 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4590 }
4591
4592 return TLI.getLibcallName(LC) != nullptr;
4593}
4594
4595/// Issue divrem if both quotient and remainder are needed.
4596SDValue DAGCombiner::useDivRem(SDNode *Node) {
4597 if (Node->use_empty())
4598 return SDValue(); // This is a dead node, leave it alone.
4599
4600 unsigned Opcode = Node->getOpcode();
4601 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4602 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4603
4604 // DivMod lib calls can still work on non-legal types if using lib-calls.
4605 EVT VT = Node->getValueType(0);
4606 if (VT.isVector() || !VT.isInteger())
4607 return SDValue();
4608
4609 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4610 return SDValue();
4611
4612 // If DIVREM is going to get expanded into a libcall,
4613 // but there is no libcall available, then don't combine.
4614 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4616 return SDValue();
4617
4618 // If div is legal, it's better to do the normal expansion
4619 unsigned OtherOpcode = 0;
4620 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4621 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4622 if (TLI.isOperationLegalOrCustom(Opcode, VT))
4623 return SDValue();
4624 } else {
4625 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4626 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4627 return SDValue();
4628 }
4629
4630 SDValue Op0 = Node->getOperand(0);
4631 SDValue Op1 = Node->getOperand(1);
4632 SDValue combined;
4633 for (SDNode *User : Op0->uses()) {
4634 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4635 User->use_empty())
4636 continue;
4637 // Convert the other matching node(s), too;
4638 // otherwise, the DIVREM may get target-legalized into something
4639 // target-specific that we won't be able to recognize.
4640 unsigned UserOpc = User->getOpcode();
4641 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4642 User->getOperand(0) == Op0 &&
4643 User->getOperand(1) == Op1) {
4644 if (!combined) {
4645 if (UserOpc == OtherOpcode) {
4646 SDVTList VTs = DAG.getVTList(VT, VT);
4647 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4648 } else if (UserOpc == DivRemOpc) {
4649 combined = SDValue(User, 0);
4650 } else {
4651 assert(UserOpc == Opcode);
4652 continue;
4653 }
4654 }
4655 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4656 CombineTo(User, combined);
4657 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4658 CombineTo(User, combined.getValue(1));
4659 }
4660 }
4661 return combined;
4662}
4663
4665 SDValue N0 = N->getOperand(0);
4666 SDValue N1 = N->getOperand(1);
4667 EVT VT = N->getValueType(0);
4668 SDLoc DL(N);
4669
4670 unsigned Opc = N->getOpcode();
4671 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4673
4674 // X / undef -> undef
4675 // X % undef -> undef
4676 // X / 0 -> undef
4677 // X % 0 -> undef
4678 // NOTE: This includes vectors where any divisor element is zero/undef.
4679 if (DAG.isUndef(Opc, {N0, N1}))
4680 return DAG.getUNDEF(VT);
4681
4682 // undef / X -> 0
4683 // undef % X -> 0
4684 if (N0.isUndef())
4685 return DAG.getConstant(0, DL, VT);
4686
4687 // 0 / X -> 0
4688 // 0 % X -> 0
4690 if (N0C && N0C->isZero())
4691 return N0;
4692
4693 // X / X -> 1
4694 // X % X -> 0
4695 if (N0 == N1)
4696 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4697
4698 // X / 1 -> X
4699 // X % 1 -> 0
4700 // If this is a boolean op (single-bit element type), we can't have
4701 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4702 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4703 // it's a 1.
4704 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4705 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4706
4707 return SDValue();
4708}
4709
4710SDValue DAGCombiner::visitSDIV(SDNode *N) {
4711 SDValue N0 = N->getOperand(0);
4712 SDValue N1 = N->getOperand(1);
4713 EVT VT = N->getValueType(0);
4714 EVT CCVT = getSetCCResultType(VT);
4715 SDLoc DL(N);
4716
4717 // fold (sdiv c1, c2) -> c1/c2
4718 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4719 return C;
4720
4721 // fold vector ops
4722 if (VT.isVector())
4723 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4724 return FoldedVOp;
4725
4726 // fold (sdiv X, -1) -> 0-X
4728 if (N1C && N1C->isAllOnes())
4729 return DAG.getNegative(N0, DL, VT);
4730
4731 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4732 if (N1C && N1C->isMinSignedValue())
4733 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4734 DAG.getConstant(1, DL, VT),
4735 DAG.getConstant(0, DL, VT));
4736
4737 if (SDValue V = simplifyDivRem(N, DAG))
4738 return V;
4739
4740 if (SDValue NewSel = foldBinOpIntoSelect(N))
4741 return NewSel;
4742
4743 // If we know the sign bits of both operands are zero, strength reduce to a
4744 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
4745 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4746 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4747
4748 if (SDValue V = visitSDIVLike(N0, N1, N)) {
4749 // If the corresponding remainder node exists, update its users with
4750 // (Dividend - (Quotient * Divisor).
4751 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4752 { N0, N1 })) {
4753 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4754 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4755 AddToWorklist(Mul.getNode());
4756 AddToWorklist(Sub.getNode());
4757 CombineTo(RemNode, Sub);
4758 }
4759 return V;
4760 }
4761
4762 // sdiv, srem -> sdivrem
4763 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4764 // true. Otherwise, we break the simplification logic in visitREM().
4766 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4767 if (SDValue DivRem = useDivRem(N))
4768 return DivRem;
4769
4770 return SDValue();
4771}
4772
4773static bool isDivisorPowerOfTwo(SDValue Divisor) {
4774 // Helper for determining whether a value is a power-2 constant scalar or a
4775 // vector of such elements.
4776 auto IsPowerOfTwo = [](ConstantSDNode *C) {
4777 if (C->isZero() || C->isOpaque())
4778 return false;
4779 if (C->getAPIntValue().isPowerOf2())
4780 return true;
4781 if (C->getAPIntValue().isNegatedPowerOf2())
4782 return true;
4783 return false;
4784 };
4785
4786 return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4787}
4788
4789SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4790 SDLoc DL(N);
4791 EVT VT = N->getValueType(0);
4792 EVT CCVT = getSetCCResultType(VT);
4793 unsigned BitWidth = VT.getScalarSizeInBits();
4794
4795 // fold (sdiv X, pow2) -> simple ops after legalize
4796 // FIXME: We check for the exact bit here because the generic lowering gives
4797 // better results in that case. The target-specific lowering should learn how
4798 // to handle exact sdivs efficiently.
4799 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4800 // Target-specific implementation of sdiv x, pow2.
4801 if (SDValue Res = BuildSDIVPow2(N))
4802 return Res;
4803
4804 // Create constants that are functions of the shift amount value.
4805 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4806 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4807 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4808 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4809 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4810 if (!isConstantOrConstantVector(Inexact))
4811 return SDValue();
4812
4813 // Splat the sign bit into the register
4814 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4815 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4816 AddToWorklist(Sign.getNode());
4817
4818 // Add (N0 < 0) ? abs2 - 1 : 0;
4819 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4820 AddToWorklist(Srl.getNode());
4821 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4822 AddToWorklist(Add.getNode());
4823 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4824 AddToWorklist(Sra.getNode());
4825
4826 // Special case: (sdiv X, 1) -> X
4827 // Special Case: (sdiv X, -1) -> 0-X
4828 SDValue One = DAG.getConstant(1, DL, VT);
4830 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4831 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4832 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4833 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4834
4835 // If dividing by a positive value, we're done. Otherwise, the result must
4836 // be negated.
4837 SDValue Zero = DAG.getConstant(0, DL, VT);
4838 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4839
4840 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4841 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4842 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4843 return Res;
4844 }
4845
4846 // If integer divide is expensive and we satisfy the requirements, emit an
4847 // alternate sequence. Targets may check function attributes for size/speed
4848 // trade-offs.
4851 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4852 if (SDValue Op = BuildSDIV(N))
4853 return Op;
4854
4855 return SDValue();
4856}
4857
4858SDValue DAGCombiner::visitUDIV(SDNode *N) {
4859 SDValue N0 = N->getOperand(0);
4860 SDValue N1 = N->getOperand(1);
4861 EVT VT = N->getValueType(0);
4862 EVT CCVT = getSetCCResultType(VT);
4863 SDLoc DL(N);
4864
4865 // fold (udiv c1, c2) -> c1/c2
4866 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4867 return C;
4868
4869 // fold vector ops
4870 if (VT.isVector())
4871 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4872 return FoldedVOp;
4873
4874 // fold (udiv X, -1) -> select(X == -1, 1, 0)
4876 if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4877 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4878 DAG.getConstant(1, DL, VT),
4879 DAG.getConstant(0, DL, VT));
4880 }
4881
4882 if (SDValue V = simplifyDivRem(N, DAG))
4883 return V;
4884
4885 if (SDValue NewSel = foldBinOpIntoSelect(N))
4886 return NewSel;
4887
4888 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4889 // If the corresponding remainder node exists, update its users with
4890 // (Dividend - (Quotient * Divisor).
4891 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4892 { N0, N1 })) {
4893 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4894 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4895 AddToWorklist(Mul.getNode());
4896 AddToWorklist(Sub.getNode());
4897 CombineTo(RemNode, Sub);
4898 }
4899 return V;
4900 }
4901
4902 // sdiv, srem -> sdivrem
4903 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4904 // true. Otherwise, we break the simplification logic in visitREM().
4906 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4907 if (SDValue DivRem = useDivRem(N))
4908 return DivRem;
4909
4910 return SDValue();
4911}
4912
4913SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4914 SDLoc DL(N);
4915 EVT VT = N->getValueType(0);
4916
4917 // fold (udiv x, (1 << c)) -> x >>u c
4918 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
4919 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4920 AddToWorklist(LogBase2.getNode());
4921
4922 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4923 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4924 AddToWorklist(Trunc.getNode());
4925 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4926 }
4927 }
4928
4929 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4930 if (N1.getOpcode() == ISD::SHL) {
4931 SDValue N10 = N1.getOperand(0);
4932 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
4933 if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
4934 AddToWorklist(LogBase2.getNode());
4935
4936 EVT ADDVT = N1.getOperand(1).getValueType();
4937 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4938 AddToWorklist(Trunc.getNode());
4939 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4940 AddToWorklist(Add.getNode());
4941 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4942 }
4943 }
4944 }
4945
4946 // fold (udiv x, c) -> alternate
4949 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4950 if (SDValue Op = BuildUDIV(N))
4951 return Op;
4952
4953 return SDValue();
4954}
4955
4956SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4957 if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4958 !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4959 // Target-specific implementation of srem x, pow2.
4960 if (SDValue Res = BuildSREMPow2(N))
4961 return Res;
4962 }
4963 return SDValue();
4964}
4965
4966// handles ISD::SREM and ISD::UREM
4967SDValue DAGCombiner::visitREM(SDNode *N) {
4968 unsigned Opcode = N->getOpcode();
4969 SDValue N0 = N->getOperand(0);
4970 SDValue N1 = N->getOperand(1);
4971 EVT VT = N->getValueType(0);
4972 EVT CCVT = getSetCCResultType(VT);
4973
4974 bool isSigned = (Opcode == ISD::SREM);
4975 SDLoc DL(N);
4976
4977 // fold (rem c1, c2) -> c1%c2
4978 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4979 return C;
4980
4981 // fold (urem X, -1) -> select(FX == -1, 0, FX)
4982 // Freeze the numerator to avoid a miscompile with an undefined value.
4983 if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
4984 CCVT.isVector() == VT.isVector()) {
4985 SDValue F0 = DAG.getFreeze(N0);
4986 SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4987 return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
4988 }
4989
4990 if (SDValue V = simplifyDivRem(N, DAG))
4991 return V;
4992
4993 if (SDValue NewSel = foldBinOpIntoSelect(N))
4994 return NewSel;
4995
4996 if (isSigned) {
4997 // If we know the sign bits of both operands are zero, strength reduce to a
4998 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4999 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5000 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5001 } else {
5002 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5003 // fold (urem x, pow2) -> (and x, pow2-1)
5004 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5005 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5006 AddToWorklist(Add.getNode());
5007 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5008 }
5009 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5010 // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5011 // TODO: We should sink the following into isKnownToBePowerOfTwo
5012 // using a OrZero parameter analogous to our handling in ValueTracking.
5013 if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5015 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5016 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5017 AddToWorklist(Add.getNode());
5018 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5019 }
5020 }
5021
5023
5024 // If X/C can be simplified by the division-by-constant logic, lower
5025 // X%C to the equivalent of X-X/C*C.
5026 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5027 // speculative DIV must not cause a DIVREM conversion. We guard against this
5028 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
5029 // combine will not return a DIVREM. Regardless, checking cheapness here
5030 // makes sense since the simplification results in fatter code.
5031 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5032 if (isSigned) {
5033 // check if we can build faster implementation for srem
5034 if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5035 return OptimizedRem;
5036 }
5037
5038 SDValue OptimizedDiv =
5039 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5040 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5041 // If the equivalent Div node also exists, update its users.
5042 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5043 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5044 { N0, N1 }))
5045 CombineTo(DivNode, OptimizedDiv);
5046 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5047 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5048 AddToWorklist(OptimizedDiv.getNode());
5049 AddToWorklist(Mul.getNode());
5050 return Sub;
5051 }
5052 }
5053
5054 // sdiv, srem -> sdivrem
5055 if (SDValue DivRem = useDivRem(N))
5056 return DivRem.getValue(1);
5057
5058 return SDValue();
5059}
5060
5061SDValue DAGCombiner::visitMULHS(SDNode *N) {
5062 SDValue N0 = N->getOperand(0);
5063 SDValue N1 = N->getOperand(1);
5064 EVT VT = N->getValueType(0);
5065 SDLoc DL(N);
5066
5067 // fold (mulhs c1, c2)
5068 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5069 return C;
5070
5071 // canonicalize constant to RHS.
5074 return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5075
5076 if (VT.isVector()) {
5077 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5078 return FoldedVOp;
5079
5080 // fold (mulhs x, 0) -> 0
5081 // do not return N1, because undef node may exist.
5083 return DAG.getConstant(0, DL, VT);
5084 }
5085
5086 // fold (mulhs x, 0) -> 0
5087 if (isNullConstant(N1))
5088 return N1;
5089
5090 // fold (mulhs x, 1) -> (sra x, size(x)-1)
5091 if (isOneConstant(N1))
5092 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
5095
5096 // fold (mulhs x, undef) -> 0
5097 if (N0.isUndef() || N1.isUndef())
5098 return DAG.getConstant(0, DL, VT);
5099
5100 // If the type twice as wide is legal, transform the mulhs to a wider multiply
5101 // plus a shift.
5102 if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5103 !VT.isVector()) {
5104 MVT Simple = VT.getSimpleVT();
5105 unsigned SimpleSize = Simple.getSizeInBits();
5106 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5107 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5108 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5109 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5110 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5111 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5112 DAG.getConstant(SimpleSize, DL,
5114 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5115 }
5116 }
5117
5118 return SDValue();
5119}
5120
5121SDValue DAGCombiner::visitMULHU(SDNode *N) {
5122 SDValue N0 = N->getOperand(0);
5123 SDValue N1 = N->getOperand(1);
5124 EVT VT = N->getValueType(0);
5125 SDLoc DL(N);
5126
5127 // fold (mulhu c1, c2)
5128 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5129 return C;
5130
5131 // canonicalize constant to RHS.
5134 return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5135
5136 if (VT.isVector()) {
5137 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5138 return FoldedVOp;
5139
5140 // fold (mulhu x, 0) -> 0
5141 // do not return N1, because undef node may exist.
5143 return DAG.getConstant(0, DL, VT);
5144 }
5145
5146 // fold (mulhu x, 0) -> 0
5147 if (isNullConstant(N1))
5148 return N1;
5149
5150 // fold (mulhu x, 1) -> 0
5151 if (isOneConstant(N1))
5152 return DAG.getConstant(0, DL, N0.getValueType());
5153
5154 // fold (mulhu x, undef) -> 0
5155 if (N0.isUndef() || N1.isUndef())
5156 return DAG.getConstant(0, DL, VT);
5157
5158 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5159 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5160 hasOperation(ISD::SRL, VT)) {
5161 if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5162 unsigned NumEltBits = VT.getScalarSizeInBits();
5163 SDValue SRLAmt = DAG.getNode(
5164 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5165 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5166 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5167 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5168 }
5169 }
5170
5171 // If the type twice as wide is legal, transform the mulhu to a wider multiply
5172 // plus a shift.
5173 if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5174 !VT.isVector()) {
5175 MVT Simple = VT.getSimpleVT();
5176 unsigned SimpleSize = Simple.getSizeInBits();
5177 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5178 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5179 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5180 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5181 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5182 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5183 DAG.getConstant(SimpleSize, DL,
5185 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5186 }
5187 }
5188
5189 // Simplify the operands using demanded-bits information.
5190 // We don't have demanded bits support for MULHU so this just enables constant
5191 // folding based on known bits.
5193 return SDValue(N, 0);
5194
5195 return SDValue();
5196}
5197
5198SDValue DAGCombiner::visitAVG(SDNode *N) {
5199 unsigned Opcode = N->getOpcode();
5200 SDValue N0 = N->getOperand(0);
5201 SDValue N1 = N->getOperand(1);
5202 EVT VT = N->getValueType(0);
5203 SDLoc DL(N);
5204
5205 // fold (avg c1, c2)
5206 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5207 return C;
5208
5209 // canonicalize constant to RHS.
5212 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5213
5214 if (VT.isVector()) {
5215 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5216 return FoldedVOp;
5217
5218 // fold (avgfloor x, 0) -> x >> 1
5220 if (Opcode == ISD::AVGFLOORS)
5221 return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
5222 if (Opcode == ISD::AVGFLOORU)
5223 return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
5224 }
5225 }
5226
5227 // fold (avg x, undef) -> x
5228 if (N0.isUndef())
5229 return N1;
5230 if (N1.isUndef())
5231 return N0;
5232
5233 // Fold (avg x, x) --> x
5234 if (N0 == N1 && Level >= AfterLegalizeTypes)
5235 return N0;
5236
5237 // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
5238
5239 return SDValue();
5240}
5241
5242SDValue DAGCombiner::visitABD(SDNode *N) {
5243 unsigned Opcode = N->getOpcode();
5244 SDValue N0 = N->getOperand(0);
5245 SDValue N1 = N->getOperand(1);
5246 EVT VT = N->getValueType(0);
5247 SDLoc DL(N);
5248
5249 // fold (abd c1, c2)
5250 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5251 return C;
5252
5253 // canonicalize constant to RHS.
5256 return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5257
5258 if (VT.isVector()) {
5259 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5260 return FoldedVOp;
5261
5262 // fold (abds x, 0) -> abs x
5263 // fold (abdu x, 0) -> x
5265 if (Opcode == ISD::ABDS)
5266 return DAG.getNode(ISD::ABS, DL, VT, N0);
5267 if (Opcode == ISD::ABDU)
5268 return N0;
5269 }
5270 }
5271
5272 // fold (abd x, undef) -> 0
5273 if (N0.isUndef() || N1.isUndef())
5274 return DAG.getConstant(0, DL, VT);
5275
5276 // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5277 if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5278 DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5279 return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5280
5281 return SDValue();
5282}
5283
5284/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5285/// give the opcodes for the two computations that are being performed. Return
5286/// true if a simplification was made.
5287SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5288 unsigned HiOp) {
5289 // If the high half is not needed, just compute the low half.
5290 bool HiExists = N->hasAnyUseOfValue(1);
5291 if (!HiExists && (!LegalOperations ||
5292 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5293 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5294 return CombineTo(N, Res, Res);
5295 }
5296
5297 // If the low half is not needed, just compute the high half.
5298 bool LoExists = N->hasAnyUseOfValue(0);
5299 if (!LoExists && (!LegalOperations ||
5300 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5301 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5302 return CombineTo(N, Res, Res);
5303 }
5304
5305 // If both halves are used, return as it is.
5306 if (LoExists && HiExists)
5307 return SDValue();
5308
5309 // If the two computed results can be simplified separately, separate them.
5310 if (LoExists) {
5311 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5312 AddToWorklist(Lo.getNode());
5313 SDValue LoOpt = combine(Lo.getNode());
5314 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5315 (!LegalOperations ||
5316 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5317 return CombineTo(N, LoOpt, LoOpt);
5318 }
5319
5320 if (HiExists) {
5321 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5322 AddToWorklist(Hi.getNode());
5323 SDValue HiOpt = combine(Hi.getNode());
5324 if (HiOpt.getNode() && HiOpt != Hi &&
5325 (!LegalOperations ||
5326 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5327 return CombineTo(N, HiOpt, HiOpt);
5328 }
5329
5330 return SDValue();
5331}
5332
5333SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5334 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5335 return Res;
5336
5337 SDValue N0 = N->getOperand(0);
5338 SDValue N1 = N->getOperand(1);
5339 EVT VT = N->getValueType(0);
5340 SDLoc DL(N);
5341
5342 // Constant fold.
5343 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5344 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5345
5346 // canonicalize constant to RHS (vector doesn't have to splat)
5349 return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5350
5351 // If the type is twice as wide is legal, transform the mulhu to a wider
5352 // multiply plus a shift.
5353 if (VT.isSimple() && !VT.isVector()) {
5354 MVT Simple = VT.getSimpleVT();
5355 unsigned SimpleSize = Simple.getSizeInBits();
5356 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5357 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5358 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5359 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5360 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5361 // Compute the high part as N1.
5362 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5363 DAG.getConstant(SimpleSize, DL,
5364 getShiftAmountTy(Lo.getValueType())));
5365 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5366 // Compute the low part as N0.
5367 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5368 return CombineTo(N, Lo, Hi);
5369 }
5370 }
5371
5372 return SDValue();
5373}
5374
5375SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5376 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5377 return Res;
5378
5379 SDValue N0 = N->getOperand(0);
5380 SDValue N1 = N->getOperand(1);
5381 EVT VT = N->getValueType(0);
5382 SDLoc DL(N);
5383
5384 // Constant fold.
5385 if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5386 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5387
5388 // canonicalize constant to RHS (vector doesn't have to splat)
5391 return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5392
5393 // (umul_lohi N0, 0) -> (0, 0)
5394 if (isNullConstant(N1)) {
5395 SDValue Zero = DAG.getConstant(0, DL, VT);
5396 return CombineTo(N, Zero, Zero);
5397 }
5398
5399 // (umul_lohi N0, 1) -> (N0, 0)
5400 if (isOneConstant(N1)) {
5401 SDValue Zero = DAG.getConstant(0, DL, VT);
5402 return CombineTo(N, N0, Zero);
5403 }
5404
5405 // If the type is twice as wide is legal, transform the mulhu to a wider
5406 // multiply plus a shift.
5407 if (VT.isSimple() && !VT.isVector()) {
5408 MVT Simple = VT.getSimpleVT();
5409 unsigned SimpleSize = Simple.getSizeInBits();
5410 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5411 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5412 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5413 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5414 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5415 // Compute the high part as N1.
5416 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5417 DAG.getConstant(SimpleSize, DL,
5418 getShiftAmountTy(Lo.getValueType())));
5419 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5420 // Compute the low part as N0.
5421 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5422 return CombineTo(N, Lo, Hi);
5423 }
5424 }
5425
5426 return SDValue();
5427}
5428
5429SDValue DAGCombiner::visitMULO(SDNode *N) {
5430 SDValue N0 = N->getOperand(0);
5431 SDValue N1 = N->getOperand(1);
5432 EVT VT = N0.getValueType();
5433 bool IsSigned = (ISD::SMULO == N->getOpcode());
5434
5435 EVT CarryVT = N->getValueType(1);
5436 SDLoc DL(N);
5437
5440
5441 // fold operation with constant operands.
5442 // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5443 // multiple results.
5444 if (N0C && N1C) {
5445 bool Overflow;
5446 APInt Result =
5447 IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5448 : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5449 return CombineTo(N, DAG.getConstant(Result, DL, VT),
5450 DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5451 }
5452
5453 // canonicalize constant to RHS.
5456 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5457
5458 // fold (mulo x, 0) -> 0 + no carry out
5459 if (isNullOrNullSplat(N1))
5460 return CombineTo(N, DAG.getConstant(0, DL, VT),
5461 DAG.getConstant(0, DL, CarryVT));
5462
5463 // (mulo x, 2) -> (addo x, x)
5464 // FIXME: This needs a freeze.
5465 if (N1C && N1C->getAPIntValue() == 2 &&
5466 (!IsSigned || VT.getScalarSizeInBits() > 2))
5467 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5468 N->getVTList(), N0, N0);
5469
5470 // A 1 bit SMULO overflows if both inputs are 1.
5471 if (IsSigned && VT.getScalarSizeInBits() == 1) {
5472 SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5473 SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5474 DAG.getConstant(0, DL, VT), ISD::SETNE);
5475 return CombineTo(N, And, Cmp);
5476 }
5477
5478 // If it cannot overflow, transform into a mul.
5479 if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5480 return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5481 DAG.getConstant(0, DL, CarryVT));
5482 return SDValue();
5483}
5484
5485// Function to calculate whether the Min/Max pair of SDNodes (potentially
5486// swapped around) make a signed saturate pattern, clamping to between a signed
5487// saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5488// Returns the node being clamped and the bitwidth of the clamp in BW. Should
5489// work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5490// same as SimplifySelectCC. N0<N1 ? N2 : N3.
5492 SDValue N3, ISD::CondCode CC, unsigned &BW,
5493 bool &Unsigned, SelectionDAG &DAG) {
5494 auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5495 ISD::CondCode CC) {
5496 // The compare and select operand should be the same or the select operands
5497 // should be truncated versions of the comparison.
5498 if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5499 return 0;
5500 // The constants need to be the same or a truncated version of each other.
5503 if (!N1C || !N3C)
5504 return 0;
5505 const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5506 const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5507 if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5508 return 0;
5509 return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5510 };
5511
5512 // Check the initial value is a SMIN/SMAX equivalent.
5513 unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5514 if (!Opcode0)
5515 return SDValue();
5516
5517 // We could only need one range check, if the fptosi could never produce
5518 // the upper value.
5519 if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5520 if (isNullOrNullSplat(N3)) {
5521 EVT IntVT = N0.getValueType().getScalarType();
5522 EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5523 if (FPVT.isSimple()) {
5524 Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5525 const fltSemantics &Semantics = InputTy->getFltSemantics();
5526 uint32_t MinBitWidth =
5527 APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5528 if (IntVT.getSizeInBits() >= MinBitWidth) {
5529 Unsigned = true;
5530 BW = PowerOf2Ceil(MinBitWidth);
5531 return N0;
5532 }
5533 }
5534 }
5535 }
5536
5537 SDValue N00, N01, N02, N03;
5538 ISD::CondCode N0CC;
5539 switch (N0.getOpcode()) {
5540 case ISD::SMIN:
5541 case ISD::SMAX:
5542 N00 = N02 = N0.getOperand(0);
5543 N01 = N03 = N0.getOperand(1);
5544 N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5545 break;
5546 case ISD::SELECT_CC:
5547 N00 = N0.getOperand(0);
5548 N01 = N0.getOperand(1);
5549 N02 = N0.getOperand(2);
5550 N03 = N0.getOperand(3);
5551 N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5552 break;
5553 case ISD::SELECT:
5554 case ISD::VSELECT:
5555 if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5556 return SDValue();
5557 N00 = N0.getOperand(0).getOperand(0);
5558 N01 = N0.getOperand(0).getOperand(1);
5559 N02 = N0.getOperand(1);
5560 N03 = N0.getOperand(2);
5561 N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5562 break;
5563 default:
5564 return SDValue();
5565 }
5566
5567 unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5568 if (!Opcode1 || Opcode0 == Opcode1)
5569 return SDValue();
5570
5571 ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5572 ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5573 if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5574 return SDValue();
5575
5576 const APInt &MinC = MinCOp->getAPIntValue();
5577 const APInt &MaxC = MaxCOp->getAPIntValue();
5578 APInt MinCPlus1 = MinC + 1;
5579 if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5580 BW = MinCPlus1.exactLogBase2() + 1;
5581 Unsigned = false;
5582 return N02;
5583 }
5584
5585 if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5586 BW = MinCPlus1.exactLogBase2();
5587 Unsigned = true;
5588 return N02;
5589 }
5590
5591 return SDValue();
5592}
5593
5596 SelectionDAG &DAG) {
5597 unsigned BW;
5598 bool Unsigned;
5599 SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5600 if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5601 return SDValue();
5602 EVT FPVT = Fp.getOperand(0).getValueType();
5603 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5604 if (FPVT.isVector())
5605 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5606 FPVT.getVectorElementCount());
5607 unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5608 if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5609 return SDValue();
5610 SDLoc DL(Fp);
5611 SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5612 DAG.getValueType(NewVT.getScalarType()));
5613 return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5614}
5615
5618 SelectionDAG &DAG) {
5619 // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5620 // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5621 // be truncated versions of the setcc (N0/N1).
5622 if ((N0 != N2 &&
5623 (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5625 return SDValue();
5628 if (!N1C || !N3C)
5629 return SDValue();
5630 const APInt &C1 = N1C->getAPIntValue();
5631 const APInt &C3 = N3C->getAPIntValue();
5632 if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5633 C1 != C3.zext(C1.getBitWidth()))
5634 return SDValue();
5635
5636 unsigned BW = (C1 + 1).exactLogBase2();
5637 EVT FPVT = N0.getOperand(0).getValueType();
5638 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5639 if (FPVT.isVector())
5640 NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5641 FPVT.getVectorElementCount());
5643 FPVT, NewVT))
5644 return SDValue();
5645
5646 SDValue Sat =
5647 DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5648 DAG.getValueType(NewVT.getScalarType()));
5649 return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5650}
5651
5652SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5653 SDValue N0 = N->getOperand(0);
5654 SDValue N1 = N->getOperand(1);
5655 EVT VT = N0.getValueType();
5656 unsigned Opcode = N->getOpcode();
5657 SDLoc DL(N);
5658
5659 // fold operation with constant operands.
5660 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5661 return C;
5662
5663 // If the operands are the same, this is a no-op.
5664 if (N0 == N1)
5665 return N0;
5666
5667 // canonicalize constant to RHS
5670 return DAG.getNode(Opcode, DL, VT, N1, N0);
5671
5672 // fold vector ops
5673 if (VT.isVector())
5674 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5675 return FoldedVOp;
5676
5677 // reassociate minmax
5678 if (SDValue RMINMAX = reassociateOps(Opcode, DL, N0, N1, N->getFlags()))
5679 return RMINMAX;
5680
5681 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5682 // Only do this if:
5683 // 1. The current op isn't legal and the flipped is.
5684 // 2. The saturation pattern is broken by canonicalization in InstCombine.
5685 bool IsOpIllegal = !TLI.isOperationLegal(Opcode, VT);
5686 bool IsSatBroken = Opcode == ISD::UMIN && N0.getOpcode() == ISD::SMAX;
5687 if ((IsSatBroken || IsOpIllegal) && (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5688 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5689 unsigned AltOpcode;
5690 switch (Opcode) {
5691 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5692 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5693 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5694 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5695 default: llvm_unreachable("Unknown MINMAX opcode");
5696 }
5697 if ((IsSatBroken && IsOpIllegal) || TLI.isOperationLegal(AltOpcode, VT))
5698 return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5699 }
5700
5701 if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5703 N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5704 return S;
5705 if (Opcode == ISD::UMIN)
5706 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5707 return S;
5708
5709 // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5710 auto ReductionOpcode = [](unsigned Opcode) {
5711 switch (Opcode) {
5712 case ISD::SMIN:
5713 return ISD::VECREDUCE_SMIN;
5714 case ISD::SMAX:
5715 return ISD::VECREDUCE_SMAX;
5716 case ISD::UMIN:
5717 return ISD::VECREDUCE_UMIN;
5718 case ISD::UMAX:
5719 return ISD::VECREDUCE_UMAX;
5720 default:
5721 llvm_unreachable("Unexpected opcode");
5722 }
5723 };
5724 if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5725 SDLoc(N), VT, N0, N1))
5726 return SD;
5727
5728 // Simplify the operands using demanded-bits information.
5730 return SDValue(N, 0);
5731
5732 return SDValue();
5733}
5734
5735/// If this is a bitwise logic instruction and both operands have the same
5736/// opcode, try to sink the other opcode after the logic instruction.
5737SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5738 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5739 EVT VT = N0.getValueType();
5740 unsigned LogicOpcode = N->getOpcode();
5741 unsigned HandOpcode = N0.getOpcode();
5742 assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5743 assert(HandOpcode == N1.getOpcode() && "Bad input!");
5744
5745 // Bail early if none of these transforms apply.
5746 if (N0.getNumOperands() == 0)
5747 return SDValue();
5748
5749 // FIXME: We should check number of uses of the operands to not increase
5750 // the instruction count for all transforms.
5751
5752 // Handle size-changing casts (or sign_extend_inreg).
5753 SDValue X = N0.getOperand(0);
5754 SDValue Y = N1.getOperand(0);
5755 EVT XVT = X.getValueType();
5756 SDLoc DL(N);
5757 if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5758 (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5759 N0.getOperand(1) == N1.getOperand(1))) {
5760 // If both operands have other uses, this transform would create extra
5761 // instructions without eliminating anything.
5762 if (!N0.hasOneUse() && !N1.hasOneUse())
5763 return SDValue();
5764 // We need matching integer source types.
5765 if (XVT != Y.getValueType())
5766 return SDValue();
5767 // Don't create an illegal op during or after legalization. Don't ever
5768 // create an unsupported vector op.
5769 if ((VT.isVector() || LegalOperations) &&
5770 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5771 return SDValue();
5772 // Avoid infinite looping with PromoteIntBinOp.
5773 // TODO: Should we apply desirable/legal constraints to all opcodes?
5774 if ((HandOpcode == ISD::ANY_EXTEND ||
5775 HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5776 LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5777 return SDValue();
5778 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5779 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5780 if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5781 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5782 return DAG.getNode(HandOpcode, DL, VT, Logic);
5783 }
5784
5785 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5786 if (HandOpcode == ISD::TRUNCATE) {
5787 // If both operands have other uses, this transform would create extra
5788 // instructions without eliminating anything.
5789 if (!N0.hasOneUse() && !N1.hasOneUse())
5790 return SDValue();
5791 // We need matching source types.
5792 if (XVT != Y.getValueType())
5793 return SDValue();
5794 // Don't create an illegal op during or after legalization.
5795 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5796 return SDValue();
5797 // Be extra careful sinking truncate. If it's free, there's no benefit in
5798 // widening a binop. Also, don't create a logic op on an illegal type.
5799 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5800 return SDValue();
5801 if (!TLI.isTypeLegal(XVT))
5802 return SDValue();
5803 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5804 return DAG.getNode(HandOpcode, DL, VT, Logic);
5805 }
5806
5807 // For binops SHL/SRL/SRA/AND:
5808 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5809 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5810 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5811 N0.getOperand(1) == N1.getOperand(1)) {
5812 // If either operand has other uses, this transform is not an improvement.
5813 if (!N0.hasOneUse() || !N1.hasOneUse())
5814 return SDValue();
5815 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5816 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5817 }
5818
5819 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5820 if (HandOpcode == ISD::BSWAP) {
5821 // If either operand has other uses, this transform is not an improvement.
5822 if (!N0.hasOneUse() || !N1.hasOneUse())
5823 return SDValue();
5824 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5825 return DAG.getNode(HandOpcode, DL, VT, Logic);
5826 }
5827
5828 // For funnel shifts FSHL/FSHR:
5829 // logic_op (OP x, x1, s), (OP y, y1, s) -->
5830 // --> OP (logic_op x, y), (logic_op, x1, y1), s
5831 if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5832 N0.getOperand(2) == N1.getOperand(2)) {
5833 if (!N0.hasOneUse() || !N1.hasOneUse())
5834 return SDValue();
5835 SDValue X1 = N0.getOperand(1);
5836 SDValue Y1 = N1.getOperand(1);
5837 SDValue S = N0.getOperand(2);
5838 SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5839 SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5840 return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5841 }
5842
5843 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5844 // Only perform this optimization up until type legalization, before
5845 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5846 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5847 // we don't want to undo this promotion.
5848 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5849 // on scalars.
5850 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5851 Level <= AfterLegalizeTypes) {
5852 // Input types must be integer and the same.
5853 if (XVT.isInteger() && XVT == Y.getValueType() &&
5854 !(VT.isVector() && TLI.isTypeLegal(VT) &&
5855 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5856 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5857 return DAG.getNode(HandOpcode, DL, VT, Logic);
5858 }
5859 }
5860
5861 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5862 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5863 // If both shuffles use the same mask, and both shuffle within a single
5864 // vector, then it is worthwhile to move the swizzle after the operation.
5865 // The type-legalizer generates this pattern when loading illegal
5866 // vector types from memory. In many cases this allows additional shuffle
5867 // optimizations.
5868 // There are other cases where moving the shuffle after the xor/and/or
5869 // is profitable even if shuffles don't perform a swizzle.
5870 // If both shuffles use the same mask, and both shuffles have the same first
5871 // or second operand, then it might still be profitable to move the shuffle
5872 // after the xor/and/or operation.
5873 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5874 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5875 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5876 assert(X.getValueType() == Y.getValueType() &&
5877 "Inputs to shuffles are not the same type");
5878
5879 // Check that both shuffles use the same mask. The masks are known to be of
5880 // the same length because the result vector type is the same.
5881 // Check also that shuffles have only one use to avoid introducing extra
5882 // instructions.
5883 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5884 !SVN0->getMask().equals(SVN1->getMask()))
5885 return SDValue();
5886
5887 // Don't try to fold this node if it requires introducing a
5888 // build vector of all zeros that might be illegal at this stage.
5889 SDValue ShOp = N0.getOperand(1);
5890 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5891 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5892
5893 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5894 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5895 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5896 N0.getOperand(0), N1.getOperand(0));
5897 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5898 }
5899
5900 // Don't try to fold this node if it requires introducing a
5901 // build vector of all zeros that might be illegal at this stage.
5902 ShOp = N0.getOperand(0);
5903 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5904 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5905
5906 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5907 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5908 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5909 N1.getOperand(1));
5910 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5911 }
5912 }
5913
5914 return SDValue();
5915}
5916
5917/// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5918SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5919 const SDLoc &DL) {
5920 SDValue LL, LR, RL, RR, N0CC, N1CC;
5921 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5922 !isSetCCEquivalent(N1, RL, RR, N1CC))
5923 return SDValue();
5924
5925 assert(N0.getValueType() == N1.getValueType() &&
5926 "Unexpected operand types for bitwise logic op");
5927 assert(LL.getValueType() == LR.getValueType() &&
5928 RL.getValueType() == RR.getValueType() &&
5929 "Unexpected operand types for setcc");
5930
5931 // If we're here post-legalization or the logic op type is not i1, the logic
5932 // op type must match a setcc result type. Also, all folds require new
5933 // operations on the left and right operands, so those types must match.
5934 EVT VT = N0.getValueType();
5935 EVT OpVT = LL.getValueType();
5936 if (LegalOperations || VT.getScalarType() != MVT::i1)
5937 if (VT != getSetCCResultType(OpVT))
5938 return SDValue();
5939 if (OpVT != RL.getValueType())
5940 return SDValue();
5941
5942 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5943 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5944 bool IsInteger = OpVT.isInteger();
5945 if (LR == RR && CC0 == CC1 && IsInteger) {
5946 bool IsZero = isNullOrNullSplat(LR);
5947 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5948
5949 // All bits clear?
5950 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5951 // All sign bits clear?
5952 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5953 // Any bits set?
5954 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5955 // Any sign bits set?
5956 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5957
5958 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
5959 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5960 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
5961 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
5962 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5963 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5964 AddToWorklist(Or.getNode());
5965 return DAG.getSetCC(DL, VT, Or, LR, CC1);
5966 }
5967
5968 // All bits set?
5969 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5970 // All sign bits set?
5971 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5972 // Any bits clear?
5973 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5974 // Any sign bits clear?
5975 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5976
5977 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5978 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
5979 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5980 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
5981 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5982 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5983 AddToWorklist(And.getNode());
5984 return DAG.getSetCC(DL, VT, And, LR, CC1);
5985 }
5986 }
5987
5988 // TODO: What is the 'or' equivalent of this fold?
5989 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5990 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5991 IsInteger && CC0 == ISD::SETNE &&
5992 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5993 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5994 SDValue One = DAG.getConstant(1, DL, OpVT);
5995 SDValue Two = DAG.getConstant(2, DL, OpVT);
5996 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5997 AddToWorklist(Add.getNode());
5998 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5999 }
6000
6001 // Try more general transforms if the predicates match and the only user of
6002 // the compares is the 'and' or 'or'.
6003 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6004 N0.hasOneUse() && N1.hasOneUse()) {
6005 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6006 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6007 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6008 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6009 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6010 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6011 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6012 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6013 }
6014
6015 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6016 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6017 // Match a shared variable operand and 2 non-opaque constant operands.
6018 auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6019 // The difference of the constants must be a single bit.
6020 const APInt &CMax =
6021 APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6022 const APInt &CMin =
6023 APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6024 return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6025 };
6026 if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6027 // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6028 // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6029 SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6030 SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6031 SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6032 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6033 SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6034 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6035 SDValue Zero = DAG.getConstant(0, DL, OpVT);
6036 return DAG.getSetCC(DL, VT, And, Zero, CC0);
6037 }
6038 }
6039 }
6040
6041 // Canonicalize equivalent operands to LL == RL.
6042 if (LL == RR && LR == RL) {
6044 std::swap(RL, RR);
6045 }
6046
6047 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6048 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6049 if (LL == RL && LR == RR) {
6050 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6051 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6052 if (NewCC != ISD::SETCC_INVALID &&
6053 (!LegalOperations ||
6054 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6055 TLI.isOperationLegal(ISD::SETCC, OpVT))))
6056 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6057 }
6058
6059 return SDValue();
6060}
6061
6062static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6063 SelectionDAG &DAG) {
6064 return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6065}
6066
6067static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6068 SelectionDAG &DAG) {
6069 return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6070}
6071
6072static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6073 ISD::CondCode CC, unsigned OrAndOpcode,
6074 SelectionDAG &DAG,
6075 bool isFMAXNUMFMINNUM_IEEE,
6076 bool isFMAXNUMFMINNUM) {
6077 // The optimization cannot be applied for all the predicates because
6078 // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6079 // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6080 // applied at all if one of the operands is a signaling NaN.
6081
6082 // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6083 // are non NaN values.
6084 if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6085 ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
6086 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6087 isFMAXNUMFMINNUM_IEEE
6090 else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
6091 (OrAndOpcode == ISD::OR)) ||
6092 ((CC == ISD::SETLT || CC == ISD::SETLE) &&
6093 (OrAndOpcode == ISD::AND)))
6094 return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6095 isFMAXNUMFMINNUM_IEEE
6098 // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6099 // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6100 // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6101 // that there are not any sNaNs, then the optimization is not valid
6102 // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6103 // the optimization using FMINNUM/FMAXNUM for the following cases. If
6104 // we can prove that we do not have any sNaNs, then we can do the
6105 // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6106 // cases.
6107 else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6108 (OrAndOpcode == ISD::OR)) ||
6109 ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6110 (OrAndOpcode == ISD::AND)))
6111 return isFMAXNUMFMINNUM ? ISD::FMINNUM
6112 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6113 isFMAXNUMFMINNUM_IEEE
6116 else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6117 (OrAndOpcode == ISD::OR)) ||
6118 ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6119 (OrAndOpcode == ISD::AND)))
6120 return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6121 : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6122 isFMAXNUMFMINNUM_IEEE
6125 return ISD::DELETED_NODE;
6126}
6127
6130 assert(
6131 (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6132 "Invalid Op to combine SETCC with");
6133
6134 // TODO: Search past casts/truncates.
6135 SDValue LHS = LogicOp->getOperand(0);
6136 SDValue RHS = LogicOp->getOperand(1);
6137 if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6138 !LHS->hasOneUse() || !RHS->hasOneUse())
6139 return SDValue();
6140
6141 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6143 LogicOp, LHS.getNode(), RHS.getNode());
6144
6145 SDValue LHS0 = LHS->getOperand(0);
6146 SDValue RHS0 = RHS->getOperand(0);
6147 SDValue LHS1 = LHS->getOperand(1);
6148 SDValue RHS1 = RHS->getOperand(1);
6149 // TODO: We don't actually need a splat here, for vectors we just need the
6150 // invariants to hold for each element.
6151 auto *LHS1C = isConstOrConstSplat(LHS1);
6152 auto *RHS1C = isConstOrConstSplat(RHS1);
6153 ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6154 ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6155 EVT VT = LogicOp->getValueType(0);
6156 EVT OpVT = LHS0.getValueType();
6157 SDLoc DL(LogicOp);
6158
6159 // Check if the operands of an and/or operation are comparisons and if they
6160 // compare against the same value. Replace the and/or-cmp-cmp sequence with
6161 // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6162 // sequence will be replaced with min-cmp sequence:
6163 // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6164 // and and-cmp-cmp will be replaced with max-cmp sequence:
6165 // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6166 // The optimization does not work for `==` or `!=` .
6167 // The two comparisons should have either the same predicate or the
6168 // predicate of one of the comparisons is the opposite of the other one.
6169 bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6171 bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6173 if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6174 TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6175 TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6176 TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6177 (OpVT.isFloatingPoint() &&
6178 (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6180 CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6181 CCL != ISD::SETTRUE &&
6182 (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6183
6184 SDValue CommonValue, Operand1, Operand2;
6186 if (CCL == CCR) {
6187 if (LHS0 == RHS0) {
6188 CommonValue = LHS0;
6189 Operand1 = LHS1;
6190 Operand2 = RHS1;
6192 } else if (LHS1 == RHS1) {
6193 CommonValue = LHS1;
6194 Operand1 = LHS0;
6195 Operand2 = RHS0;
6196 CC = CCL;
6197 }
6198 } else {
6199 assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6200 if (LHS0 == RHS1) {
6201 CommonValue = LHS0;
6202 Operand1 = LHS1;
6203 Operand2 = RHS0;
6204 CC = CCR;
6205 } else if (RHS0 == LHS1) {
6206 CommonValue = LHS1;
6207 Operand1 = LHS0;
6208 Operand2 = RHS1;
6209 CC = CCL;
6210 }
6211 }
6212
6213 // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6214 // handle it using OR/AND.
6215 if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6217 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6219
6220 if (CC != ISD::SETCC_INVALID) {
6221 unsigned NewOpcode = ISD::DELETED_NODE;
6222 bool IsSigned = isSignedIntSetCC(CC);
6223 if (OpVT.isInteger()) {
6224 bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6225 CC == ISD::SETLT || CC == ISD::SETULT);
6226 bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6227 if (IsLess == IsOr)
6228 NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6229 else
6230 NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6231 } else if (OpVT.isFloatingPoint())
6232 NewOpcode =
6233 getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6234 DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6235
6236 if (NewOpcode != ISD::DELETED_NODE) {
6237 SDValue MinMaxValue =
6238 DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6239 return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6240 }
6241 }
6242 }
6243
6244 if (TargetPreference == AndOrSETCCFoldKind::None)
6245 return SDValue();
6246
6247 if (CCL == CCR &&
6248 CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6249 LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6250 const APInt &APLhs = LHS1C->getAPIntValue();
6251 const APInt &APRhs = RHS1C->getAPIntValue();
6252
6253 // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6254 // case this is just a compare).
6255 if (APLhs == (-APRhs) &&
6256 ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6257 DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6258 const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6259 // (icmp eq A, C) | (icmp eq A, -C)
6260 // -> (icmp eq Abs(A), C)
6261 // (icmp ne A, C) & (icmp ne A, -C)
6262 // -> (icmp ne Abs(A), C)
6263 SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6264 return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6265 DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6266 } else if (TargetPreference &
6268
6269 // AndOrSETCCFoldKind::AddAnd:
6270 // A == C0 | A == C1
6271 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6272 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6273 // A != C0 & A != C1
6274 // IF IsPow2(smax(C0, C1)-smin(C0, C1))
6275 // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6276
6277 // AndOrSETCCFoldKind::NotAnd:
6278 // A == C0 | A == C1
6279 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6280 // -> ~A & smin(C0, C1) == 0
6281 // A != C0 & A != C1
6282 // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6283 // -> ~A & smin(C0, C1) != 0
6284
6285 const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6286 const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6287 APInt Dif = MaxC - MinC;
6288 if (!Dif.isZero() && Dif.isPowerOf2()) {
6289 if (MaxC.isAllOnes() &&
6290 (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6291 SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6292 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6293 DAG.getConstant(MinC, DL, OpVT));
6294 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6295 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6296 } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6297
6298 SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6299 DAG.getConstant(-MinC, DL, OpVT));
6300 SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6301 DAG.getConstant(~Dif, DL, OpVT));
6302 return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6303 DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6304 }
6305 }
6306 }
6307 }
6308
6309 return SDValue();
6310}
6311
6312// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6313// We canonicalize to the `select` form in the middle end, but the `and` form
6314// gets better codegen and all tested targets (arm, x86, riscv)
6316 const SDLoc &DL, SelectionDAG &DAG) {
6317 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6318 if (!isNullConstant(F))
6319 return SDValue();
6320
6321 EVT CondVT = Cond.getValueType();
6322 if (TLI.getBooleanContents(CondVT) !=
6324 return SDValue();
6325
6326 if (T.getOpcode() != ISD::AND)
6327 return SDValue();
6328
6329 if (!isOneConstant(T.getOperand(1)))
6330 return SDValue();
6331
6332 EVT OpVT = T.getValueType();
6333
6334 SDValue CondMask =
6335 OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6336 return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6337}
6338
6339/// This contains all DAGCombine rules which reduce two values combined by
6340/// an And operation to a single value. This makes them reusable in the context
6341/// of visitSELECT(). Rules involving constants are not included as
6342/// visitSELECT() already handles those cases.
6343SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6344 EVT VT = N1.getValueType();
6345 SDLoc DL(N);
6346
6347 // fold (and x, undef) -> 0
6348 if (N0.isUndef() || N1.isUndef())
6349 return DAG.getConstant(0, DL, VT);
6350
6351 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6352 return V;
6353
6354 // Canonicalize:
6355 // and(x, add) -> and(add, x)
6356 if (N1.getOpcode() == ISD::ADD)
6357 std::swap(N0, N1);
6358
6359 // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6360 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6361 VT.isScalarInteger() && VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6362 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6363 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6364 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6365 // immediate for an add, but it is legal if its top c2 bits are set,
6366 // transform the ADD so the immediate doesn't need to be materialized
6367 // in a register.
6368 APInt ADDC = ADDI->getAPIntValue();
6369 APInt SRLC = SRLI->getAPIntValue();
6370 if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6371 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6373 SRLC.getZExtValue());
6374 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6375 ADDC |= Mask;
6376 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6377 SDLoc DL0(N0);
6378 SDValue NewAdd =
6379 DAG.getNode(ISD::ADD, DL0, VT,
6380 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6381 CombineTo(N0.getNode(), NewAdd);
6382 // Return N so it doesn't get rechecked!
6383 return SDValue(N, 0);
6384 }
6385 }
6386 }
6387 }
6388 }
6389 }
6390
6391 return SDValue();
6392}
6393
6394bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6395 EVT LoadResultTy, EVT &ExtVT) {
6396 if (!AndC->getAPIntValue().isMask())
6397 return false;
6398
6399 unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6400
6401 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6402 EVT LoadedVT = LoadN->getMemoryVT();
6403
6404 if (ExtVT == LoadedVT &&
6405 (!LegalOperations ||
6406 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6407 // ZEXTLOAD will match without needing to change the size of the value being
6408 // loaded.
6409 return true;
6410 }
6411
6412 // Do not change the width of a volatile or atomic loads.
6413 if (!LoadN->isSimple())
6414 return false;
6415
6416 // Do not generate loads of non-round integer types since these can
6417 // be expensive (and would be wrong if the type is not byte sized).
6418 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6419 return false;
6420
6421 if (LegalOperations &&
6422 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6423 return false;
6424
6425 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6426 return false;
6427
6428 return true;
6429}
6430
6431bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6432 ISD::LoadExtType ExtType, EVT &MemVT,
6433 unsigned ShAmt) {
6434 if (!LDST)
6435 return false;
6436 // Only allow byte offsets.
6437 if (ShAmt % 8)
6438 return false;
6439
6440 // Do not generate loads of non-round integer types since these can
6441 // be expensive (and would be wrong if the type is not byte sized).
6442 if (!MemVT.isRound())
6443 return false;
6444
6445 // Don't change the width of a volatile or atomic loads.
6446 if (!LDST->isSimple())
6447 return false;
6448
6449 EVT LdStMemVT = LDST->getMemoryVT();
6450
6451 // Bail out when changing the scalable property, since we can't be sure that
6452 // we're actually narrowing here.
6453 if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6454 return false;
6455
6456 // Verify that we are actually reducing a load width here.
6457 if (LdStMemVT.bitsLT(MemVT))
6458 return false;
6459
6460 // Ensure that this isn't going to produce an unsupported memory access.
6461 if (ShAmt) {
6462 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6463 const unsigned ByteShAmt = ShAmt / 8;
6464 const Align LDSTAlign = LDST->getAlign();
6465 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6466 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6467 LDST->getAddressSpace(), NarrowAlign,
6468 LDST->getMemOperand()->getFlags()))
6469 return false;
6470 }
6471
6472 // It's not possible to generate a constant of extended or untyped type.
6473 EVT PtrType = LDST->getBasePtr().getValueType();
6474 if (PtrType == MVT::Untyped || PtrType.isExtended())
6475 return false;
6476
6477 if (isa<LoadSDNode>(LDST)) {
6478 LoadSDNode *Load = cast<LoadSDNode>(LDST);
6479 // Don't transform one with multiple uses, this would require adding a new
6480 // load.
6481 if (!SDValue(Load, 0).hasOneUse())
6482 return false;
6483
6484 if (LegalOperations &&
6485 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6486 return false;
6487
6488 // For the transform to be legal, the load must produce only two values
6489 // (the value loaded and the chain). Don't transform a pre-increment
6490 // load, for example, which produces an extra value. Otherwise the
6491 // transformation is not equivalent, and the downstream logic to replace
6492 // uses gets things wrong.
6493 if (Load->getNumValues() > 2)
6494 return false;
6495
6496 // If the load that we're shrinking is an extload and we're not just
6497 // discarding the extension we can't simply shrink the load. Bail.
6498 // TODO: It would be possible to merge the extensions in some cases.
6499 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6500 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6501 return false;
6502
6503 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6504 return false;
6505 } else {
6506 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6507 StoreSDNode *Store = cast<StoreSDNode>(LDST);
6508 // Can't write outside the original store
6509 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6510 return false;
6511
6512 if (LegalOperations &&
6513 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6514 return false;
6515 }
6516 return true;
6517}
6518
6519bool DAGCombiner::SearchForAndLoads(SDNode *N,
6521 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6522 ConstantSDNode *Mask,
6523 SDNode *&NodeToMask) {
6524 // Recursively search for the operands, looking for loads which can be
6525 // narrowed.
6526 for (SDValue Op : N->op_values()) {
6527 if (Op.getValueType().isVector())
6528 return false;
6529
6530 // Some constants may need fixing up later if they are too large.
6531 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6532 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6533 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6534 NodesWithConsts.insert(N);
6535 continue;
6536 }
6537
6538 if (!Op.hasOneUse())
6539 return false;
6540
6541 switch(Op.getOpcode()) {
6542 case ISD::LOAD: {
6543 auto *Load = cast<LoadSDNode>(Op);
6544 EVT ExtVT;
6545 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6546 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6547
6548 // ZEXTLOAD is already small enough.
6549 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6550 ExtVT.bitsGE(Load->getMemoryVT()))
6551 continue;
6552
6553 // Use LE to convert equal sized loads to zext.
6554 if (ExtVT.bitsLE(Load->getMemoryVT()))
6555 Loads.push_back(Load);
6556
6557 continue;
6558 }
6559 return false;
6560 }
6561 case ISD::ZERO_EXTEND:
6562 case ISD::AssertZext: {
6563 unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6564 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6565 EVT VT = Op.getOpcode() == ISD::AssertZext ?
6566 cast<VTSDNode>(Op.getOperand(1))->getVT() :
6567 Op.getOperand(0).getValueType();
6568
6569 // We can accept extending nodes if the mask is wider or an equal
6570 // width to the original type.
6571 if (ExtVT.bitsGE(VT))
6572 continue;
6573 break;
6574 }
6575 case ISD::OR:
6576 case ISD::XOR:
6577 case ISD::AND:
6578 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6579 NodeToMask))
6580 return false;
6581 continue;
6582 }
6583
6584 // Allow one node which will masked along with any loads found.
6585 if (NodeToMask)
6586 return false;
6587
6588 // Also ensure that the node to be masked only produces one data result.
6589 NodeToMask = Op.getNode();
6590 if (NodeToMask->getNumValues() > 1) {
6591 bool HasValue = false;
6592 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6593 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6594 if (VT != MVT::Glue && VT != MVT::Other) {
6595 if (HasValue) {
6596 NodeToMask = nullptr;
6597 return false;
6598 }
6599 HasValue = true;
6600 }
6601 }
6602 assert(HasValue && "Node to be masked has no data result?");
6603 }
6604 }
6605 return true;
6606}
6607
6608bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6609 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6610 if (!Mask)
6611 return false;
6612
6613 if (!Mask->getAPIntValue().isMask())
6614 return false;
6615
6616 // No need to do anything if the and directly uses a load.
6617 if (isa<LoadSDNode>(N->getOperand(0)))
6618 return false;
6619
6621 SmallPtrSet<SDNode*, 2> NodesWithConsts;
6622 SDNode *FixupNode = nullptr;
6623 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6624 if (Loads.empty())
6625 return false;
6626
6627 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6628 SDValue MaskOp = N->getOperand(1);
6629
6630 // If it exists, fixup the single node we allow in the tree that needs
6631 // masking.
6632 if (FixupNode) {
6633 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6634 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6635 FixupNode->getValueType(0),
6636 SDValue(FixupNode, 0), MaskOp);
6637 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6638 if (And.getOpcode() == ISD ::AND)
6639 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6640 }
6641
6642 // Narrow any constants that need it.
6643 for (auto *LogicN : NodesWithConsts) {
6644 SDValue Op0 = LogicN->getOperand(0);
6645 SDValue Op1 = LogicN->getOperand(1);
6646
6647 if (isa<ConstantSDNode>(Op0))
6648 Op0 =
6649 DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6650
6651 if (isa<ConstantSDNode>(Op1))
6652 Op1 =
6653 DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6654
6655 if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6656 std::swap(Op0, Op1);
6657
6658 DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6659 }
6660
6661 // Create narrow loads.
6662 for (auto *Load : Loads) {
6663 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6664 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6665 SDValue(Load, 0), MaskOp);
6666 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6667 if (And.getOpcode() == ISD ::AND)
6668 And = SDValue(
6669 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6670 SDValue NewLoad = reduceLoadWidth(And.getNode());
6671 assert(NewLoad &&
6672 "Shouldn't be masking the load if it can't be narrowed");
6673 CombineTo(Load, NewLoad, NewLoad.getValue(1));
6674 }
6675 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6676 return true;
6677 }
6678 return false;
6679}
6680
6681// Unfold
6682// x & (-1 'logical shift' y)
6683// To
6684// (x 'opposite logical shift' y) 'logical shift' y
6685// if it is better for performance.
6686SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6687 assert(N->getOpcode() == ISD::AND);
6688
6689 SDValue N0 = N->getOperand(0);
6690 SDValue N1 = N->getOperand(1);
6691
6692 // Do we actually prefer shifts over mask?
6694 return SDValue();
6695
6696 // Try to match (-1 '[outer] logical shift' y)
6697 unsigned OuterShift;
6698 unsigned InnerShift; // The opposite direction to the OuterShift.
6699 SDValue Y; // Shift amount.
6700 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6701 if (!M.hasOneUse())
6702 return false;
6703 OuterShift = M->getOpcode();
6704 if (OuterShift == ISD::SHL)
6705 InnerShift = ISD::SRL;
6706 else if (OuterShift == ISD::SRL)
6707 InnerShift = ISD::SHL;
6708 else
6709 return false;
6710 if (!isAllOnesConstant(M->getOperand(0)))
6711 return false;
6712 Y = M->getOperand(1);
6713 return true;
6714 };
6715
6716 SDValue X;
6717 if (matchMask(N1))
6718 X = N0;
6719 else if (matchMask(N0))
6720 X = N1;
6721 else
6722 return SDValue();
6723
6724 SDLoc DL(N);
6725 EVT VT = N->getValueType(0);
6726
6727 // tmp = x 'opposite logical shift' y
6728 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6729 // ret = tmp 'logical shift' y
6730 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6731
6732 return T1;
6733}
6734
6735/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6736/// For a target with a bit test, this is expected to become test + set and save
6737/// at least 1 instruction.
6739 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6740
6741 // Look through an optional extension.
6742 SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6743 if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6744 And0 = And0.getOperand(0);
6745 if (!isOneConstant(And1) || !And0.hasOneUse())
6746 return SDValue();
6747
6748 SDValue Src = And0;
6749
6750 // Attempt to find a 'not' op.
6751 // TODO: Should we favor test+set even without the 'not' op?
6752 bool FoundNot = false;
6753 if (isBitwiseNot(Src)) {
6754 FoundNot = true;
6755 Src = Src.getOperand(0);
6756
6757 // Look though an optional truncation. The source operand may not be the
6758 // same type as the original 'and', but that is ok because we are masking
6759 // off everything but the low bit.
6760 if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6761 Src = Src.getOperand(0);
6762 }
6763
6764 // Match a shift-right by constant.
6765 if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6766 return SDValue();
6767
6768 // This is probably not worthwhile without a supported type.
6769 EVT SrcVT = Src.getValueType();
6770 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6771 if (!TLI.isTypeLegal(SrcVT))
6772 return SDValue();
6773
6774 // We might have looked through casts that make this transform invalid.
6775 unsigned BitWidth = SrcVT.getScalarSizeInBits();
6776 SDValue ShiftAmt = Src.getOperand(1);
6777 auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6778 if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6779 return SDValue();
6780
6781 // Set source to shift source.
6782 Src = Src.getOperand(0);
6783
6784 // Try again to find a 'not' op.
6785 // TODO: Should we favor test+set even with two 'not' ops?
6786 if (!FoundNot) {
6787 if (!isBitwiseNot(Src))
6788 return SDValue();
6789 Src = Src.getOperand(0);
6790 }
6791
6792 if (!TLI.hasBitTest(Src, ShiftAmt))
6793 return SDValue();
6794
6795 // Turn this into a bit-test pattern using mask op + setcc:
6796 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6797 // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6798 SDLoc DL(And);
6799 SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6800 EVT CCVT =
6801 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6802 SDValue Mask = DAG.getConstant(
6803 APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6804 SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6805 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6806 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6807 return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6808}
6809
6810/// For targets that support usubsat, match a bit-hack form of that operation
6811/// that ends in 'and' and convert it.
6813 EVT VT = N->getValueType(0);
6814 unsigned BitWidth = VT.getScalarSizeInBits();
6815 APInt SignMask = APInt::getSignMask(BitWidth);
6816
6817 // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6818 // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6819 // xor/add with SMIN (signmask) are logically equivalent.
6820 SDValue X;
6821 if (!sd_match(N, m_And(m_OneUse(m_Xor(m_Value(X), m_SpecificInt(SignMask))),
6823 m_SpecificInt(BitWidth - 1))))) &&
6826 m_SpecificInt(BitWidth - 1))))))
6827 return SDValue();
6828
6829 return DAG.getNode(ISD::USUBSAT, DL, VT, X,
6830 DAG.getConstant(SignMask, DL, VT));
6831}
6832
6833/// Given a bitwise logic operation N with a matching bitwise logic operand,
6834/// fold a pattern where 2 of the source operands are identically shifted
6835/// values. For example:
6836/// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6838 SelectionDAG &DAG) {
6839 unsigned LogicOpcode = N->getOpcode();
6840 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6841 "Expected bitwise logic operation");
6842
6843 if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6844 return SDValue();
6845
6846 // Match another bitwise logic op and a shift.
6847 unsigned ShiftOpcode = ShiftOp.getOpcode();
6848 if (LogicOp.getOpcode() != LogicOpcode ||
6849 !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6850 ShiftOpcode == ISD::SRA))
6851 return SDValue();
6852
6853 // Match another shift op inside the first logic operand. Handle both commuted
6854 // possibilities.
6855 // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6856 // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6857 SDValue X1 = ShiftOp.getOperand(0);
6858 SDValue Y = ShiftOp.getOperand(1);
6859 SDValue X0, Z;
6860 if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6861 LogicOp.getOperand(0).getOperand(1) == Y) {
6862 X0 = LogicOp.getOperand(0).getOperand(0);
6863 Z = LogicOp.getOperand(1);
6864 } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6865 LogicOp.getOperand(1).getOperand(1) == Y) {
6866 X0 = LogicOp.getOperand(1).getOperand(0);
6867 Z = LogicOp.getOperand(0);
6868 } else {
6869 return SDValue();
6870 }
6871
6872 EVT VT = N->getValueType(0);
6873 SDLoc DL(N);
6874 SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6875 SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6876 return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6877}
6878
6879/// Given a tree of logic operations with shape like
6880/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6881/// try to match and fold shift operations with the same shift amount.
6882/// For example:
6883/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6884/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6886 SDValue RightHand, SelectionDAG &DAG) {
6887 unsigned LogicOpcode = N->getOpcode();
6888 assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6889 "Expected bitwise logic operation");
6890 if (LeftHand.getOpcode() != LogicOpcode ||
6891 RightHand.getOpcode() != LogicOpcode)
6892 return SDValue();
6893 if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6894 return SDValue();
6895
6896 // Try to match one of following patterns:
6897 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6898 // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6899 // Note that foldLogicOfShifts will handle commuted versions of the left hand
6900 // itself.
6901 SDValue CombinedShifts, W;
6902 SDValue R0 = RightHand.getOperand(0);
6903 SDValue R1 = RightHand.getOperand(1);
6904 if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6905 W = R1;
6906 else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6907 W = R0;
6908 else
6909 return SDValue();
6910
6911 EVT VT = N->getValueType(0);
6912 SDLoc DL(N);
6913 return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6914}
6915
6916SDValue DAGCombiner::visitAND(SDNode *N) {
6917 SDValue N0 = N->getOperand(0);
6918 SDValue N1 = N->getOperand(1);
6919 EVT VT = N1.getValueType();
6920 SDLoc DL(N);
6921
6922 // x & x --> x
6923 if (N0 == N1)
6924 return N0;
6925
6926 // fold (and c1, c2) -> c1&c2
6927 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, DL, VT, {N0, N1}))
6928 return C;
6929
6930 // canonicalize constant to RHS
6933 return DAG.getNode(ISD::AND, DL, VT, N1, N0);
6934
6935 if (areBitwiseNotOfEachother(N0, N1))
6936 return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), DL, VT);
6937
6938 // fold vector ops
6939 if (VT.isVector()) {
6940 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
6941 return FoldedVOp;
6942
6943 // fold (and x, 0) -> 0, vector edition
6945 // do not return N1, because undef node may exist in N1
6947 N1.getValueType());
6948
6949 // fold (and x, -1) -> x, vector edition
6951 return N0;
6952
6953 // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6954 auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6955 ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6956 if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
6957 N1.hasOneUse()) {
6958 EVT LoadVT = MLoad->getMemoryVT();
6959 EVT ExtVT = VT;
6960 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6961 // For this AND to be a zero extension of the masked load the elements
6962 // of the BuildVec must mask the bottom bits of the extended element
6963 // type
6964 uint64_t ElementSize =
6966 if (Splat->getAPIntValue().isMask(ElementSize)) {
6967 SDValue NewLoad = DAG.getMaskedLoad(
6968 ExtVT, DL, MLoad->getChain(), MLoad->getBasePtr(),
6969 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6970 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6971 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6972 bool LoadHasOtherUsers = !N0.hasOneUse();
6973 CombineTo(N, NewLoad);
6974 if (LoadHasOtherUsers)
6975 CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
6976 return SDValue(N, 0);
6977 }
6978 }
6979 }
6980 }
6981
6982 // fold (and x, -1) -> x
6983 if (isAllOnesConstant(N1))
6984 return N0;
6985
6986 // if (and x, c) is known to be zero, return 0
6987 unsigned BitWidth = VT.getScalarSizeInBits();
6990 return DAG.getConstant(0, DL, VT);
6991
6992 if (SDValue R = foldAndOrOfSETCC(N, DAG))
6993 return R;
6994
6995 if (SDValue NewSel = foldBinOpIntoSelect(N))
6996 return NewSel;
6997
6998 // reassociate and
6999 if (SDValue RAND = reassociateOps(ISD::AND, DL, N0, N1, N->getFlags()))
7000 return RAND;
7001
7002 // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7003 if (SDValue SD =
7004 reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, DL, VT, N0, N1))
7005 return SD;
7006
7007 // fold (and (or x, C), D) -> D if (C & D) == D
7008 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7009 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7010 };
7011 if (N0.getOpcode() == ISD::OR &&
7012 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7013 return N1;
7014
7015 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7016 SDValue N0Op0 = N0.getOperand(0);
7017 EVT SrcVT = N0Op0.getValueType();
7018 unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7019 APInt Mask = ~N1C->getAPIntValue();
7020 Mask = Mask.trunc(SrcBitWidth);
7021
7022 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7023 if (DAG.MaskedValueIsZero(N0Op0, Mask))
7024 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0Op0);
7025
7026 // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7027 if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7028 TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7029 TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7030 TLI.isNarrowingProfitable(VT, SrcVT))
7031 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7032 DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7033 DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7034 }
7035
7036 // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7037 if (ISD::isExtOpcode(N0.getOpcode())) {
7038 unsigned ExtOpc = N0.getOpcode();
7039 SDValue N0Op0 = N0.getOperand(0);
7040 if (N0Op0.getOpcode() == ISD::AND &&
7041 (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7044 N0->hasOneUse() && N0Op0->hasOneUse()) {
7045 SDValue NewMask =
7046 DAG.getNode(ISD::AND, DL, VT, N1,
7047 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
7048 return DAG.getNode(ISD::AND, DL, VT,
7049 DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7050 NewMask);
7051 }
7052 }
7053
7054 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7055 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7056 // already be zero by virtue of the width of the base type of the load.
7057 //
7058 // the 'X' node here can either be nothing or an extract_vector_elt to catch
7059 // more cases.
7060 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7062 N0.getOperand(0).getOpcode() == ISD::LOAD &&
7063 N0.getOperand(0).getResNo() == 0) ||
7064 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7065 auto *Load =
7066 cast<LoadSDNode>((N0.getOpcode() == ISD::LOAD) ? N0 : N0.getOperand(0));
7067
7068 // Get the constant (if applicable) the zero'th operand is being ANDed with.
7069 // This can be a pure constant or a vector splat, in which case we treat the
7070 // vector as a scalar and use the splat value.
7073 N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
7074 Constant = C->getAPIntValue();
7075 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7076 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7077 APInt SplatValue, SplatUndef;
7078 unsigned SplatBitSize;
7079 bool HasAnyUndefs;
7080 // Endianness should not matter here. Code below makes sure that we only
7081 // use the result if the SplatBitSize is a multiple of the vector element
7082 // size. And after that we AND all element sized parts of the splat
7083 // together. So the end result should be the same regardless of in which
7084 // order we do those operations.
7085 const bool IsBigEndian = false;
7086 bool IsSplat =
7087 Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7088 HasAnyUndefs, EltBitWidth, IsBigEndian);
7089
7090 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7091 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7092 if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7093 // Undef bits can contribute to a possible optimisation if set, so
7094 // set them.
7095 SplatValue |= SplatUndef;
7096
7097 // The splat value may be something like "0x00FFFFFF", which means 0 for
7098 // the first vector value and FF for the rest, repeating. We need a mask
7099 // that will apply equally to all members of the vector, so AND all the
7100 // lanes of the constant together.
7101 Constant = APInt::getAllOnes(EltBitWidth);
7102 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7103 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7104 }
7105 }
7106
7107 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7108 // actually legal and isn't going to get expanded, else this is a false
7109 // optimisation.
7110 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7111 Load->getValueType(0),
7112 Load->getMemoryVT());
7113
7114 // Resize the constant to the same size as the original memory access before
7115 // extension. If it is still the AllOnesValue then this AND is completely
7116 // unneeded.
7117 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7118
7119 bool B;
7120 switch (Load->getExtensionType()) {
7121 default: B = false; break;
7122 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7123 case ISD::ZEXTLOAD:
7124 case ISD::NON_EXTLOAD: B = true; break;
7125 }
7126
7127 if (B && Constant.isAllOnes()) {
7128 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7129 // preserve semantics once we get rid of the AND.
7130 SDValue NewLoad(Load, 0);
7131
7132 // Fold the AND away. NewLoad may get replaced immediately.
7133 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7134
7135 if (Load->getExtensionType() == ISD::EXTLOAD) {
7136 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7137 Load->getValueType(0), SDLoc(Load),
7138 Load->getChain(), Load->getBasePtr(),
7139 Load->getOffset(), Load->getMemoryVT(),
7140 Load->getMemOperand());
7141 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7142 if (Load->getNumValues() == 3) {
7143 // PRE/POST_INC loads have 3 values.
7144 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7145 NewLoad.getValue(2) };
7146 CombineTo(Load, To, 3, true);
7147 } else {
7148 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7149 }
7150 }
7151
7152 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7153 }
7154 }
7155
7156 // Try to convert a constant mask AND into a shuffle clear mask.
7157 if (VT.isVector())
7158 if (SDValue Shuffle = XformToShuffleWithZero(N))
7159 return Shuffle;
7160
7161 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7162 return Combined;
7163
7164 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7166 SDValue Ext = N0.getOperand(0);
7167 EVT ExtVT = Ext->getValueType(0);
7168 SDValue Extendee = Ext->getOperand(0);
7169
7170 unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7171 if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7172 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7173 // (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7174 // => (extract_subvector (iN_zeroext v))
7175 SDValue ZeroExtExtendee =
7176 DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Extendee);
7177
7178 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ZeroExtExtendee,
7179 N0.getOperand(1));
7180 }
7181 }
7182
7183 // fold (and (masked_gather x)) -> (zext_masked_gather x)
7184 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7185 EVT MemVT = GN0->getMemoryVT();
7186 EVT ScalarVT = MemVT.getScalarType();
7187
7188 if (SDValue(GN0, 0).hasOneUse() &&
7189 isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7191 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
7192 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
7193
7194 SDValue ZExtLoad = DAG.getMaskedGather(
7195 DAG.getVTList(VT, MVT::Other), MemVT, DL, Ops, GN0->getMemOperand(),
7196 GN0->getIndexType(), ISD::ZEXTLOAD);
7197
7198 CombineTo(N, ZExtLoad);
7199 AddToWorklist(ZExtLoad.getNode());
7200 // Avoid recheck of N.
7201 return SDValue(N, 0);
7202 }
7203 }
7204
7205 // fold (and (load x), 255) -> (zextload x, i8)
7206 // fold (and (extload x, i16), 255) -> (zextload x, i8)
7207 if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7208 if (SDValue Res = reduceLoadWidth(N))
7209 return Res;
7210
7211 if (LegalTypes) {
7212 // Attempt to propagate the AND back up to the leaves which, if they're
7213 // loads, can be combined to narrow loads and the AND node can be removed.
7214 // Perform after legalization so that extend nodes will already be
7215 // combined into the loads.
7216 if (BackwardsPropagateMask(N))
7217 return SDValue(N, 0);
7218 }
7219
7220 if (SDValue Combined = visitANDLike(N0, N1, N))
7221 return Combined;
7222
7223 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
7224 if (N0.getOpcode() == N1.getOpcode())
7225 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7226 return V;
7227
7228 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7229 return R;
7230 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7231 return R;
7232
7233 // Masking the negated extension of a boolean is just the zero-extended
7234 // boolean:
7235 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7236 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7237 //
7238 // Note: the SimplifyDemandedBits fold below can make an information-losing
7239 // transform, and then we have no way to find this better fold.
7240 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
7241 if (isNullOrNullSplat(N0.getOperand(0))) {
7242 SDValue SubRHS = N0.getOperand(1);
7243 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
7244 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7245 return SubRHS;
7246 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
7247 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7248 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SubRHS.getOperand(0));
7249 }
7250 }
7251
7252 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7253 // fold (and (sra)) -> (and (srl)) when possible.
7255 return SDValue(N, 0);
7256
7257 // fold (zext_inreg (extload x)) -> (zextload x)
7258 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7259 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7260 (ISD::isEXTLoad(N0.getNode()) ||
7261 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7262 auto *LN0 = cast<LoadSDNode>(N0);
7263 EVT MemVT = LN0->getMemoryVT();
7264 // If we zero all the possible extended bits, then we can turn this into
7265 // a zextload if we are running before legalize or the operation is legal.
7266 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7267 unsigned MemBitSize = MemVT.getScalarSizeInBits();
7268 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7269 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7270 ((!LegalOperations && LN0->isSimple()) ||
7271 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7272 SDValue ExtLoad =
7273 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7274 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7275 AddToWorklist(N);
7276 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7277 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7278 }
7279 }
7280
7281 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7282 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7283 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7284 N0.getOperand(1), false))
7285 return BSwap;
7286 }
7287
7288 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7289 return Shifts;
7290
7291 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7292 return V;
7293
7294 // Recognize the following pattern:
7295 //
7296 // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7297 //
7298 // where bitmask is a mask that clears the upper bits of AndVT. The
7299 // number of bits in bitmask must be a power of two.
7300 auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7301 if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7302 return false;
7303
7304 auto *C = dyn_cast<ConstantSDNode>(RHS);
7305 if (!C)
7306 return false;
7307
7308 if (!C->getAPIntValue().isMask(
7309 LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7310 return false;
7311
7312 return true;
7313 };
7314
7315 // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7316 if (IsAndZeroExtMask(N0, N1))
7317 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
7318
7319 if (hasOperation(ISD::USUBSAT, VT))
7320 if (SDValue V = foldAndToUsubsat(N, DAG, DL))
7321 return V;
7322
7323 // Postpone until legalization completed to avoid interference with bswap
7324 // folding
7325 if (LegalOperations || VT.isVector())
7326 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7327 return R;
7328
7329 return SDValue();
7330}
7331
7332/// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7333SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7334 bool DemandHighBits) {
7335 if (!LegalOperations)
7336 return SDValue();
7337
7338 EVT VT = N->getValueType(0);
7339 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7340 return SDValue();
7342 return SDValue();
7343
7344 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7345 bool LookPassAnd0 = false;
7346 bool LookPassAnd1 = false;
7347 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7348 std::swap(N0, N1);
7349 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7350 std::swap(N0, N1);
7351 if (N0.getOpcode() == ISD::AND) {
7352 if (!N0->hasOneUse())
7353 return SDValue();
7354 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7355 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7356 // This is needed for X86.
7357 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7358 N01C->getZExtValue() != 0xFFFF))
7359 return SDValue();
7360 N0 = N0.getOperand(0);
7361 LookPassAnd0 = true;
7362 }
7363
7364 if (N1.getOpcode() == ISD::AND) {
7365 if (!N1->hasOneUse())
7366 return SDValue();
7367 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7368 if (!N11C || N11C->getZExtValue() != 0xFF)
7369 return SDValue();
7370 N1 = N1.getOperand(0);
7371 LookPassAnd1 = true;
7372 }
7373
7374 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7375 std::swap(N0, N1);
7376 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7377 return SDValue();
7378 if (!N0->hasOneUse() || !N1->hasOneUse())
7379 return SDValue();
7380
7381 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7382 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7383 if (!N01C || !N11C)
7384 return SDValue();
7385 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7386 return SDValue();
7387
7388 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7389 SDValue N00 = N0->getOperand(0);
7390 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7391 if (!N00->hasOneUse())
7392 return SDValue();
7393 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7394 if (!N001C || N001C->getZExtValue() != 0xFF)
7395 return SDValue();
7396 N00 = N00.getOperand(0);
7397 LookPassAnd0 = true;
7398 }
7399
7400 SDValue N10 = N1->getOperand(0);
7401 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7402 if (!N10->hasOneUse())
7403 return SDValue();
7404 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7405 // Also allow 0xFFFF since the bits will be shifted out. This is needed
7406 // for X86.
7407 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7408 N101C->getZExtValue() != 0xFFFF))
7409 return SDValue();
7410 N10 = N10.getOperand(0);
7411 LookPassAnd1 = true;
7412 }
7413
7414 if (N00 != N10)
7415 return SDValue();
7416
7417 // Make sure everything beyond the low halfword gets set to zero since the SRL
7418 // 16 will clear the top bits.
7419 unsigned OpSizeInBits = VT.getSizeInBits();
7420 if (OpSizeInBits > 16) {
7421 // If the left-shift isn't masked out then the only way this is a bswap is
7422 // if all bits beyond the low 8 are 0. In that case the entire pattern
7423 // reduces to a left shift anyway: leave it for other parts of the combiner.
7424 if (DemandHighBits && !LookPassAnd0)
7425 return SDValue();
7426
7427 // However, if the right shift isn't masked out then it might be because
7428 // it's not needed. See if we can spot that too. If the high bits aren't
7429 // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7430 // upper bits to be zero.
7431 if (!LookPassAnd1) {
7432 unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7433 if (!DAG.MaskedValueIsZero(N10,
7434 APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7435 return SDValue();
7436 }
7437 }
7438
7439 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7440 if (OpSizeInBits > 16) {
7441 SDLoc DL(N);
7442 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7443 DAG.getConstant(OpSizeInBits - 16, DL,
7444 getShiftAmountTy(VT)));
7445 }
7446 return Res;
7447}
7448
7449/// Return true if the specified node is an element that makes up a 32-bit
7450/// packed halfword byteswap.
7451/// ((x & 0x000000ff) << 8) |
7452/// ((x & 0x0000ff00) >> 8) |
7453/// ((x & 0x00ff0000) << 8) |
7454/// ((x & 0xff000000) >> 8)
7456 if (!N->hasOneUse())
7457 return false;
7458
7459 unsigned Opc = N.getOpcode();
7460 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7461 return false;
7462
7463 SDValue N0 = N.getOperand(0);
7464 unsigned Opc0 = N0.getOpcode();
7465 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7466 return false;
7467
7468 ConstantSDNode *N1C = nullptr;
7469 // SHL or SRL: look upstream for AND mask operand
7470 if (Opc == ISD::AND)
7471 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7472 else if (Opc0 == ISD::AND)
7473 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7474 if (!N1C)
7475 return false;
7476
7477 unsigned MaskByteOffset;
7478 switch (N1C->getZExtValue()) {
7479 default:
7480 return false;
7481 case 0xFF: MaskByteOffset = 0; break;
7482 case 0xFF00: MaskByteOffset = 1; break;
7483 case 0xFFFF:
7484 // In case demanded bits didn't clear the bits that will be shifted out.
7485 // This is needed for X86.
7486 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7487 MaskByteOffset = 1;
7488 break;
7489 }
7490 return false;
7491 case 0xFF0000: MaskByteOffset = 2; break;
7492 case 0xFF000000: MaskByteOffset = 3; break;
7493 }
7494
7495 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7496 if (Opc == ISD::AND) {
7497 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7498 // (x >> 8) & 0xff
7499 // (x >> 8) & 0xff0000
7500 if (Opc0 != ISD::SRL)
7501 return false;
7502 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7503 if (!C || C->getZExtValue() != 8)
7504 return false;
7505 } else {
7506 // (x << 8) & 0xff00
7507 // (x << 8) & 0xff000000
7508 if (Opc0 != ISD::SHL)
7509 return false;
7510 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7511 if (!C || C->getZExtValue() != 8)
7512 return false;
7513 }
7514 } else if (Opc == ISD::SHL) {
7515 // (x & 0xff) << 8
7516 // (x & 0xff0000) << 8
7517 if (MaskByteOffset != 0 && MaskByteOffset != 2)
7518 return false;
7519 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7520 if (!C || C->getZExtValue() != 8)
7521 return false;
7522 } else { // Opc == ISD::SRL
7523 // (x & 0xff00) >> 8
7524 // (x & 0xff000000) >> 8
7525 if (MaskByteOffset != 1 && MaskByteOffset != 3)
7526 return false;
7527 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7528 if (!C || C->getZExtValue() != 8)
7529 return false;
7530 }
7531
7532 if (Parts[MaskByteOffset])
7533 return false;
7534
7535 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7536 return true;
7537}
7538
7539// Match 2 elements of a packed halfword bswap.
7541 if (N.getOpcode() == ISD::OR)
7542 return isBSwapHWordElement(N.getOperand(0), Parts) &&
7543 isBSwapHWordElement(N.getOperand(1), Parts);
7544
7545 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7546 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7547 if (!C || C->getAPIntValue() != 16)
7548 return false;
7549 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7550 return true;
7551 }
7552
7553 return false;
7554}
7555
7556// Match this pattern:
7557// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7558// And rewrite this to:
7559// (rotr (bswap A), 16)
7561 SelectionDAG &DAG, SDNode *N, SDValue N0,
7562 SDValue N1, EVT VT, EVT ShiftAmountTy) {
7563 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7564 "MatchBSwapHWordOrAndAnd: expecting i32");
7565 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7566 return SDValue();
7567 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7568 return SDValue();
7569 // TODO: this is too restrictive; lifting this restriction requires more tests
7570 if (!N0->hasOneUse() || !N1->hasOneUse())
7571 return SDValue();
7574 if (!Mask0 || !Mask1)
7575 return SDValue();
7576 if (Mask0->getAPIntValue() != 0xff00ff00 ||
7577 Mask1->getAPIntValue() != 0x00ff00ff)
7578 return SDValue();
7579 SDValue Shift0 = N0.getOperand(0);
7580 SDValue Shift1 = N1.getOperand(0);
7581 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7582 return SDValue();
7583 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7584 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7585 if (!ShiftAmt0 || !ShiftAmt1)
7586 return SDValue();
7587 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7588 return SDValue();
7589 if (Shift0.getOperand(0) != Shift1.getOperand(0))
7590 return SDValue();
7591
7592 SDLoc DL(N);
7593 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7594 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
7595 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7596}
7597
7598/// Match a 32-bit packed halfword bswap. That is
7599/// ((x & 0x000000ff) << 8) |
7600/// ((x & 0x0000ff00) >> 8) |
7601/// ((x & 0x00ff0000) << 8) |
7602/// ((x & 0xff000000) >> 8)
7603/// => (rotl (bswap x), 16)
7604SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7605 if (!LegalOperations)
7606 return SDValue();
7607
7608 EVT VT = N->getValueType(0);
7609 if (VT != MVT::i32)
7610 return SDValue();
7612 return SDValue();
7613
7614 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
7615 getShiftAmountTy(VT)))
7616 return BSwap;
7617
7618 // Try again with commuted operands.
7619 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
7620 getShiftAmountTy(VT)))
7621 return BSwap;
7622
7623
7624 // Look for either
7625 // (or (bswaphpair), (bswaphpair))
7626 // (or (or (bswaphpair), (and)), (and))
7627 // (or (or (and), (bswaphpair)), (and))
7628 SDNode *Parts[4] = {};
7629
7630 if (isBSwapHWordPair(N0, Parts)) {
7631 // (or (or (and), (and)), (or (and), (and)))
7632 if (!isBSwapHWordPair(N1, Parts))
7633 return SDValue();
7634 } else if (N0.getOpcode() == ISD::OR) {
7635 // (or (or (or (and), (and)), (and)), (and))
7636 if (!isBSwapHWordElement(N1, Parts))
7637 return SDValue();
7638 SDValue N00 = N0.getOperand(0);
7639 SDValue N01 = N0.getOperand(1);
7640 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7641 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7642 return SDValue();
7643 } else {
7644 return SDValue();
7645 }
7646
7647 // Make sure the parts are all coming from the same node.
7648 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7649 return SDValue();
7650
7651 SDLoc DL(N);
7652 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7653 SDValue(Parts[0], 0));
7654
7655 // Result of the bswap should be rotated by 16. If it's not legal, then
7656 // do (x << 16) | (x >> 16).
7657 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
7659 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7661 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7662 return DAG.getNode(ISD::OR, DL, VT,
7663 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7664 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7665}
7666
7667/// This contains all DAGCombine rules which reduce two values combined by
7668/// an Or operation to a single value \see visitANDLike().
7669SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) {
7670 EVT VT = N1.getValueType();
7671
7672 // fold (or x, undef) -> -1
7673 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7674 return DAG.getAllOnesConstant(DL, VT);
7675
7676 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7677 return V;
7678
7679 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
7680 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7681 // Don't increase # computations.
7682 (N0->hasOneUse() || N1->hasOneUse())) {
7683 // We can only do this xform if we know that bits from X that are set in C2
7684 // but not in C1 are already zero. Likewise for Y.
7685 if (const ConstantSDNode *N0O1C =
7687 if (const ConstantSDNode *N1O1C =
7689 // We can only do this xform if we know that bits from X that are set in
7690 // C2 but not in C1 are already zero. Likewise for Y.
7691 const APInt &LHSMask = N0O1C->getAPIntValue();
7692 const APInt &RHSMask = N1O1C->getAPIntValue();
7693
7694 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7695 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7696 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7697 N0.getOperand(0), N1.getOperand(0));
7698 return DAG.getNode(ISD::AND, DL, VT, X,
7699 DAG.getConstant(LHSMask | RHSMask, DL, VT));
7700 }
7701 }
7702 }
7703 }
7704
7705 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7706 if (N0.getOpcode() == ISD::AND &&
7707 N1.getOpcode() == ISD::AND &&
7708 N0.getOperand(0) == N1.getOperand(0) &&
7709 // Don't increase # computations.
7710 (N0->hasOneUse() || N1->hasOneUse())) {
7711 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7712 N0.getOperand(1), N1.getOperand(1));
7713 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7714 }
7715
7716 return SDValue();
7717}
7718
7719/// OR combines for which the commuted variant will be tried as well.
7721 SDNode *N) {
7722 EVT VT = N0.getValueType();
7723 unsigned BW = VT.getScalarSizeInBits();
7724 SDLoc DL(N);
7725
7726 auto peekThroughResize = [](SDValue V) {
7727 if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7728 return V->getOperand(0);
7729 return V;
7730 };
7731
7732 SDValue N0Resized = peekThroughResize(N0);
7733 if (N0Resized.getOpcode() == ISD::AND) {
7734 SDValue N1Resized = peekThroughResize(N1);
7735 SDValue N00 = N0Resized.getOperand(0);
7736 SDValue N01 = N0Resized.getOperand(1);
7737
7738 // fold or (and x, y), x --> x
7739 if (N00 == N1Resized || N01 == N1Resized)
7740 return N1;
7741
7742 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7743 // TODO: Set AllowUndefs = true.
7744 if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7745 /* AllowUndefs */ false)) {
7746 if (peekThroughResize(NotOperand) == N1Resized)
7747 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N00, DL, VT),
7748 N1);
7749 }
7750
7751 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7752 if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7753 /* AllowUndefs */ false)) {
7754 if (peekThroughResize(NotOperand) == N1Resized)
7755 return DAG.getNode(ISD::OR, DL, VT, DAG.getZExtOrTrunc(N01, DL, VT),
7756 N1);
7757 }
7758 }
7759
7760 SDValue X, Y;
7761
7762 // fold or (xor X, N1), N1 --> or X, N1
7763 if (sd_match(N0, m_Xor(m_Value(X), m_Specific(N1))))
7764 return DAG.getNode(ISD::OR, DL, VT, X, N1);
7765
7766 // fold or (xor x, y), (x and/or y) --> or x, y
7767 if (sd_match(N0, m_Xor(m_Value(X), m_Value(Y))) &&
7768 (sd_match(N1, m_And(m_Specific(X), m_Specific(Y))) ||
7770 return DAG.getNode(ISD::OR, DL, VT, X, Y);
7771
7772 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7773 return R;
7774
7775 auto peekThroughZext = [](SDValue V) {
7776 if (V->getOpcode() == ISD::ZERO_EXTEND)
7777 return V->getOperand(0);
7778 return V;
7779 };
7780
7781 // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7782 if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7783 N0.getOperand(0) == N1.getOperand(0) &&
7784 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7785 return N0;
7786
7787 // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7788 if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7789 N0.getOperand(1) == N1.getOperand(0) &&
7790 peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7791 return N0;
7792
7793 // Attempt to match a legalized build_pair-esque pattern:
7794 // or(shl(aext(Hi),BW/2),zext(Lo))
7795 SDValue Lo, Hi;
7796 if (sd_match(N0,
7798 sd_match(N1, m_ZExt(m_Value(Lo))) &&
7799 Lo.getScalarValueSizeInBits() == (BW / 2) &&
7800 Lo.getValueType() == Hi.getValueType()) {
7801 // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)).
7802 SDValue NotLo, NotHi;
7803 if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) &&
7804 sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) {
7805 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo);
7806 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi);
7807 Hi = DAG.getNode(ISD::SHL, DL, VT, Hi,
7808 DAG.getShiftAmountConstant(BW / 2, VT, DL));
7809 return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT);
7810 }
7811 }
7812
7813 return SDValue();
7814}
7815
7816SDValue DAGCombiner::visitOR(SDNode *N) {
7817 SDValue N0 = N->getOperand(0);
7818 SDValue N1 = N->getOperand(1);
7819 EVT VT = N1.getValueType();
7820 SDLoc DL(N);
7821
7822 // x | x --> x
7823 if (N0 == N1)
7824 return N0;
7825
7826 // fold (or c1, c2) -> c1|c2
7827 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N0, N1}))
7828 return C;
7829
7830 // canonicalize constant to RHS
7833 return DAG.getNode(ISD::OR, DL, VT, N1, N0);
7834
7835 // fold vector ops
7836 if (VT.isVector()) {
7837 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
7838 return FoldedVOp;
7839
7840 // fold (or x, 0) -> x, vector edition
7842 return N0;
7843
7844 // fold (or x, -1) -> -1, vector edition
7846 // do not return N1, because undef node may exist in N1
7847 return DAG.getAllOnesConstant(DL, N1.getValueType());
7848
7849 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7850 // Do this only if the resulting type / shuffle is legal.
7851 auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7852 auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7853 if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7854 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7855 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7856 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7857 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7858 // Ensure both shuffles have a zero input.
7859 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7860 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
7861 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
7862 bool CanFold = true;
7863 int NumElts = VT.getVectorNumElements();
7864 SmallVector<int, 4> Mask(NumElts, -1);
7865
7866 for (int i = 0; i != NumElts; ++i) {
7867 int M0 = SV0->getMaskElt(i);
7868 int M1 = SV1->getMaskElt(i);
7869
7870 // Determine if either index is pointing to a zero vector.
7871 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7872 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7873
7874 // If one element is zero and the otherside is undef, keep undef.
7875 // This also handles the case that both are undef.
7876 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7877 continue;
7878
7879 // Make sure only one of the elements is zero.
7880 if (M0Zero == M1Zero) {
7881 CanFold = false;
7882 break;
7883 }
7884
7885 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7886
7887 // We have a zero and non-zero element. If the non-zero came from
7888 // SV0 make the index a LHS index. If it came from SV1, make it
7889 // a RHS index. We need to mod by NumElts because we don't care
7890 // which operand it came from in the original shuffles.
7891 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7892 }
7893
7894 if (CanFold) {
7895 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7896 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7897 SDValue LegalShuffle =
7898 TLI.buildLegalVectorShuffle(VT, DL, NewLHS, NewRHS, Mask, DAG);
7899 if (LegalShuffle)
7900 return LegalShuffle;
7901 }
7902 }
7903 }
7904 }
7905
7906 // fold (or x, 0) -> x
7907 if (isNullConstant(N1))
7908 return N0;
7909
7910 // fold (or x, -1) -> -1
7911 if (isAllOnesConstant(N1))
7912 return N1;
7913
7914 if (SDValue NewSel = foldBinOpIntoSelect(N))
7915 return NewSel;
7916
7917 // fold (or x, c) -> c iff (x & ~c) == 0
7918 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7919 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7920 return N1;
7921
7922 if (SDValue R = foldAndOrOfSETCC(N, DAG))
7923 return R;
7924
7925 if (SDValue Combined = visitORLike(N0, N1, DL))
7926 return Combined;
7927
7928 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7929 return Combined;
7930
7931 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7932 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7933 return BSwap;
7934 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7935 return BSwap;
7936
7937 // reassociate or
7938 if (SDValue ROR = reassociateOps(ISD::OR, DL, N0, N1, N->getFlags()))
7939 return ROR;
7940
7941 // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
7942 if (SDValue SD =
7943 reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, DL, VT, N0, N1))
7944 return SD;
7945
7946 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7947 // iff (c1 & c2) != 0 or c1/c2 are undef.
7948 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7949 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7950 };
7951 if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7952 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7953 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7954 {N1, N0.getOperand(1)})) {
7955 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7956 AddToWorklist(IOR.getNode());
7957 return DAG.getNode(ISD::AND, DL, VT, COR, IOR);
7958 }
7959 }
7960
7961 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7962 return Combined;
7963 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7964 return Combined;
7965
7966 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
7967 if (N0.getOpcode() == N1.getOpcode())
7968 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7969 return V;
7970
7971 // See if this is some rotate idiom.
7972 if (SDValue Rot = MatchRotate(N0, N1, DL))
7973 return Rot;
7974
7975 if (SDValue Load = MatchLoadCombine(N))
7976 return Load;
7977
7978 // Simplify the operands using demanded-bits information.
7980 return SDValue(N, 0);
7981
7982 // If OR can be rewritten into ADD, try combines based on ADD.
7983 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7984 DAG.isADDLike(SDValue(N, 0)))
7985 if (SDValue Combined = visitADDLike(N))
7986 return Combined;
7987
7988 // Postpone until legalization completed to avoid interference with bswap
7989 // folding
7990 if (LegalOperations || VT.isVector())
7991 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7992 return R;
7993
7994 return SDValue();
7995}
7996
7998 SDValue &Mask) {
7999 if (Op.getOpcode() == ISD::AND &&
8000 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8001 Mask = Op.getOperand(1);
8002 return Op.getOperand(0);
8003 }
8004 return Op;
8005}
8006
8007/// Match "(X shl/srl V1) & V2" where V2 may not be present.
8008static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8009 SDValue &Mask) {
8010 Op = stripConstantMask(DAG, Op, Mask);
8011 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8012 Shift = Op;
8013 return true;
8014 }
8015 return false;
8016}
8017
8018/// Helper function for visitOR to extract the needed side of a rotate idiom
8019/// from a shl/srl/mul/udiv. This is meant to handle cases where
8020/// InstCombine merged some outside op with one of the shifts from
8021/// the rotate pattern.
8022/// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8023/// Otherwise, returns an expansion of \p ExtractFrom based on the following
8024/// patterns:
8025///
8026/// (or (add v v) (shrl v bitwidth-1)):
8027/// expands (add v v) -> (shl v 1)
8028///
8029/// (or (mul v c0) (shrl (mul v c1) c2)):
8030/// expands (mul v c0) -> (shl (mul v c1) c3)
8031///
8032/// (or (udiv v c0) (shl (udiv v c1) c2)):
8033/// expands (udiv v c0) -> (shrl (udiv v c1) c3)
8034///
8035/// (or (shl v c0) (shrl (shl v c1) c2)):
8036/// expands (shl v c0) -> (shl (shl v c1) c3)
8037///
8038/// (or (shrl v c0) (shl (shrl v c1) c2)):
8039/// expands (shrl v c0) -> (shrl (shrl v c1) c3)
8040///
8041/// Such that in all cases, c3+c2==bitwidth(op v c1).
8043 SDValue ExtractFrom, SDValue &Mask,
8044 const SDLoc &DL) {
8045 assert(OppShift && ExtractFrom && "Empty SDValue");
8046 if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8047 return SDValue();
8048
8049 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8050
8051 // Value and Type of the shift.
8052 SDValue OppShiftLHS = OppShift.getOperand(0);
8053 EVT ShiftedVT = OppShiftLHS.getValueType();
8054
8055 // Amount of the existing shift.
8056 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8057
8058 // (add v v) -> (shl v 1)
8059 // TODO: Should this be a general DAG canonicalization?
8060 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8061 ExtractFrom.getOpcode() == ISD::ADD &&
8062 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8063 ExtractFrom.getOperand(0) == OppShiftLHS &&
8064 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8065 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8066 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8067
8068 // Preconditions:
8069 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8070 //
8071 // Find opcode of the needed shift to be extracted from (op0 v c0).
8072 unsigned Opcode = ISD::DELETED_NODE;
8073 bool IsMulOrDiv = false;
8074 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8075 // opcode or its arithmetic (mul or udiv) variant.
8076 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8077 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8078 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8079 return false;
8080 Opcode = NeededShift;
8081 return true;
8082 };
8083 // op0 must be either the needed shift opcode or the mul/udiv equivalent
8084 // that the needed shift can be extracted from.
8085 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8086 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8087 return SDValue();
8088
8089 // op0 must be the same opcode on both sides, have the same LHS argument,
8090 // and produce the same value type.
8091 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8092 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8093 ShiftedVT != ExtractFrom.getValueType())
8094 return SDValue();
8095
8096 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8097 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8098 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8099 ConstantSDNode *ExtractFromCst =
8100 isConstOrConstSplat(ExtractFrom.getOperand(1));
8101 // TODO: We should be able to handle non-uniform constant vectors for these values
8102 // Check that we have constant values.
8103 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8104 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8105 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8106 return SDValue();
8107
8108 // Compute the shift amount we need to extract to complete the rotate.
8109 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8110 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8111 return SDValue();
8112 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8113 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8114 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8115 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8116 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8117
8118 // Now try extract the needed shift from the ExtractFrom op and see if the
8119 // result matches up with the existing shift's LHS op.
8120 if (IsMulOrDiv) {
8121 // Op to extract from is a mul or udiv by a constant.
8122 // Check:
8123 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8124 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8125 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8126 NeededShiftAmt.getZExtValue());
8127 APInt ResultAmt;
8128 APInt Rem;
8129 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8130 if (Rem != 0 || ResultAmt != OppLHSAmt)
8131 return SDValue();
8132 } else {
8133 // Op to extract from is a shift by a constant.
8134 // Check:
8135 // c2 - (bitwidth(op0 v c0) - c1) == c0
8136 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8137 ExtractFromAmt.getBitWidth()))
8138 return SDValue();
8139 }
8140
8141 // Return the expanded shift op that should allow a rotate to be formed.
8142 EVT ShiftVT = OppShift.getOperand(1).getValueType();
8143 EVT ResVT = ExtractFrom.getValueType();
8144 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8145 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8146}
8147
8148// Return true if we can prove that, whenever Neg and Pos are both in the
8149// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
8150// for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8151//
8152// (or (shift1 X, Neg), (shift2 X, Pos))
8153//
8154// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8155// in direction shift1 by Neg. The range [0, EltSize) means that we only need
8156// to consider shift amounts with defined behavior.
8157//
8158// The IsRotate flag should be set when the LHS of both shifts is the same.
8159// Otherwise if matching a general funnel shift, it should be clear.
8160static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8161 SelectionDAG &DAG, bool IsRotate) {
8162 const auto &TLI = DAG.getTargetLoweringInfo();
8163 // If EltSize is a power of 2 then:
8164 //
8165 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8166 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8167 //
8168 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8169 // for the stronger condition:
8170 //
8171 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
8172 //
8173 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8174 // we can just replace Neg with Neg' for the rest of the function.
8175 //
8176 // In other cases we check for the even stronger condition:
8177 //
8178 // Neg == EltSize - Pos [B]
8179 //
8180 // for all Neg and Pos. Note that the (or ...) then invokes undefined
8181 // behavior if Pos == 0 (and consequently Neg == EltSize).
8182 //
8183 // We could actually use [A] whenever EltSize is a power of 2, but the
8184 // only extra cases that it would match are those uninteresting ones
8185 // where Neg and Pos are never in range at the same time. E.g. for
8186 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8187 // as well as (sub 32, Pos), but:
8188 //
8189 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8190 //
8191 // always invokes undefined behavior for 32-bit X.
8192 //
8193 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8194 // This allows us to peek through any operations that only affect Mask's
8195 // un-demanded bits.
8196 //
8197 // NOTE: We can only do this when matching operations which won't modify the
8198 // least Log2(EltSize) significant bits and not a general funnel shift.
8199 unsigned MaskLoBits = 0;
8200 if (IsRotate && isPowerOf2_64(EltSize)) {
8201 unsigned Bits = Log2_64(EltSize);
8202 unsigned NegBits = Neg.getScalarValueSizeInBits();
8203 if (NegBits >= Bits) {
8204 APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8205 if (SDValue Inner =
8207 Neg = Inner;
8208 MaskLoBits = Bits;
8209 }
8210 }
8211 }
8212
8213 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8214 if (Neg.getOpcode() != ISD::SUB)
8215 return false;
8217 if (!NegC)
8218 return false;
8219 SDValue NegOp1 = Neg.getOperand(1);
8220
8221 // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8222 // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8223 // are redundant for the purpose of the equality.
8224 if (MaskLoBits) {
8225 unsigned PosBits = Pos.getScalarValueSizeInBits();
8226 if (PosBits >= MaskLoBits) {
8227 APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8228 if (SDValue Inner =
8230 Pos = Inner;
8231 }
8232 }
8233 }
8234
8235 // The condition we need is now:
8236 //
8237 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8238 //
8239 // If NegOp1 == Pos then we need:
8240 //
8241 // EltSize & Mask == NegC & Mask
8242 //
8243 // (because "x & Mask" is a truncation and distributes through subtraction).
8244 //
8245 // We also need to account for a potential truncation of NegOp1 if the amount
8246 // has already been legalized to a shift amount type.
8247 APInt Width;
8248 if ((Pos == NegOp1) ||
8249 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8250 Width = NegC->getAPIntValue();
8251
8252 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8253 // Then the condition we want to prove becomes:
8254 //
8255 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8256 //
8257 // which, again because "x & Mask" is a truncation, becomes:
8258 //
8259 // NegC & Mask == (EltSize - PosC) & Mask
8260 // EltSize & Mask == (NegC + PosC) & Mask
8261 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8262 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8263 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8264 else
8265 return false;
8266 } else
8267 return false;
8268
8269 // Now we just need to check that EltSize & Mask == Width & Mask.
8270 if (MaskLoBits)
8271 // EltSize & Mask is 0 since Mask is EltSize - 1.
8272 return Width.getLoBits(MaskLoBits) == 0;
8273 return Width == EltSize;
8274}
8275
8276// A subroutine of MatchRotate used once we have found an OR of two opposite
8277// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
8278// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8279// former being preferred if supported. InnerPos and InnerNeg are Pos and
8280// Neg with outer conversions stripped away.
8281SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8282 SDValue Neg, SDValue InnerPos,
8283 SDValue InnerNeg, bool HasPos,
8284 unsigned PosOpcode, unsigned NegOpcode,
8285 const SDLoc &DL) {
8286 // fold (or (shl x, (*ext y)),
8287 // (srl x, (*ext (sub 32, y)))) ->
8288 // (rotl x, y) or (rotr x, (sub 32, y))
8289 //
8290 // fold (or (shl x, (*ext (sub 32, y))),
8291 // (srl x, (*ext y))) ->
8292 // (rotr x, y) or (rotl x, (sub 32, y))
8293 EVT VT = Shifted.getValueType();
8294 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8295 /*IsRotate*/ true)) {
8296 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8297 HasPos ? Pos : Neg);
8298 }
8299
8300 return SDValue();
8301}
8302
8303// A subroutine of MatchRotate used once we have found an OR of two opposite
8304// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
8305// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8306// former being preferred if supported. InnerPos and InnerNeg are Pos and
8307// Neg with outer conversions stripped away.
8308// TODO: Merge with MatchRotatePosNeg.
8309SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8310 SDValue Neg, SDValue InnerPos,
8311 SDValue InnerNeg, bool HasPos,
8312 unsigned PosOpcode, unsigned NegOpcode,
8313 const SDLoc &DL) {
8314 EVT VT = N0.getValueType();
8315 unsigned EltBits = VT.getScalarSizeInBits();
8316
8317 // fold (or (shl x0, (*ext y)),
8318 // (srl x1, (*ext (sub 32, y)))) ->
8319 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8320 //
8321 // fold (or (shl x0, (*ext (sub 32, y))),
8322 // (srl x1, (*ext y))) ->
8323 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8324 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8325 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8326 HasPos ? Pos : Neg);
8327 }
8328
8329 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8330 // so for now just use the PosOpcode case if its legal.
8331 // TODO: When can we use the NegOpcode case?
8332 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8333 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8334 if (Op.getOpcode() != BinOpc)
8335 return false;
8336 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8337 return Cst && (Cst->getAPIntValue() == Imm);
8338 };
8339
8340 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8341 // -> (fshl x0, x1, y)
8342 if (IsBinOpImm(N1, ISD::SRL, 1) &&
8343 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8344 InnerPos == InnerNeg.getOperand(0) &&
8346 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8347 }
8348
8349 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8350 // -> (fshr x0, x1, y)
8351 if (IsBinOpImm(N0, ISD::SHL, 1) &&
8352 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8353 InnerNeg == InnerPos.getOperand(0) &&
8355 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8356 }
8357
8358 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8359 // -> (fshr x0, x1, y)
8360 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8361 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8362 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8363 InnerNeg == InnerPos.getOperand(0) &&
8365 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8366 }
8367 }
8368
8369 return SDValue();
8370}
8371
8372// MatchRotate - Handle an 'or' of two operands. If this is one of the many
8373// idioms for rotate, and if the target supports rotation instructions, generate
8374// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8375// with different shifted sources.
8376SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8377 EVT VT = LHS.getValueType();
8378
8379 // The target must have at least one rotate/funnel flavor.
8380 // We still try to match rotate by constant pre-legalization.
8381 // TODO: Support pre-legalization funnel-shift by constant.
8382 bool HasROTL = hasOperation(ISD::ROTL, VT);
8383 bool HasROTR = hasOperation(ISD::ROTR, VT);
8384 bool HasFSHL = hasOperation(ISD::FSHL, VT);
8385 bool HasFSHR = hasOperation(ISD::FSHR, VT);
8386
8387 // If the type is going to be promoted and the target has enabled custom
8388 // lowering for rotate, allow matching rotate by non-constants. Only allow
8389 // this for scalar types.
8390 if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8394 }
8395
8396 if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8397 return SDValue();
8398
8399 // Check for truncated rotate.
8400 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8401 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8402 assert(LHS.getValueType() == RHS.getValueType());
8403 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8404 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8405 }
8406 }
8407
8408 // Match "(X shl/srl V1) & V2" where V2 may not be present.
8409 SDValue LHSShift; // The shift.
8410 SDValue LHSMask; // AND value if any.
8411 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8412
8413 SDValue RHSShift; // The shift.
8414 SDValue RHSMask; // AND value if any.
8415 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8416
8417 // If neither side matched a rotate half, bail
8418 if (!LHSShift && !RHSShift)
8419 return SDValue();
8420
8421 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8422 // side of the rotate, so try to handle that here. In all cases we need to
8423 // pass the matched shift from the opposite side to compute the opcode and
8424 // needed shift amount to extract. We still want to do this if both sides
8425 // matched a rotate half because one half may be a potential overshift that
8426 // can be broken down (ie if InstCombine merged two shl or srl ops into a
8427 // single one).
8428
8429 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8430 if (LHSShift)
8431 if (SDValue NewRHSShift =
8432 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8433 RHSShift = NewRHSShift;
8434 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8435 if (RHSShift)
8436 if (SDValue NewLHSShift =
8437 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8438 LHSShift = NewLHSShift;
8439
8440 // If a side is still missing, nothing else we can do.
8441 if (!RHSShift || !LHSShift)
8442 return SDValue();
8443
8444 // At this point we've matched or extracted a shift op on each side.
8445
8446 if (LHSShift.getOpcode() == RHSShift.getOpcode())
8447 return SDValue(); // Shifts must disagree.
8448
8449 // Canonicalize shl to left side in a shl/srl pair.
8450 if (RHSShift.getOpcode() == ISD::SHL) {
8451 std::swap(LHS, RHS);
8452 std::swap(LHSShift, RHSShift);
8453 std::swap(LHSMask, RHSMask);
8454 }
8455
8456 // Something has gone wrong - we've lost the shl/srl pair - bail.
8457 if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8458 return SDValue();
8459
8460 unsigned EltSizeInBits = VT.getScalarSizeInBits();
8461 SDValue LHSShiftArg = LHSShift.getOperand(0);
8462 SDValue LHSShiftAmt = LHSShift.getOperand(1);
8463 SDValue RHSShiftArg = RHSShift.getOperand(0);
8464 SDValue RHSShiftAmt = RHSShift.getOperand(1);
8465
8466 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8468 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8469 };
8470
8471 auto ApplyMasks = [&](SDValue Res) {
8472 // If there is an AND of either shifted operand, apply it to the result.
8473 if (LHSMask.getNode() || RHSMask.getNode()) {
8476
8477 if (LHSMask.getNode()) {
8478 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8479 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8480 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8481 }
8482 if (RHSMask.getNode()) {
8483 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8484 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8485 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8486 }
8487
8488 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8489 }
8490
8491 return Res;
8492 };
8493
8494 // TODO: Support pre-legalization funnel-shift by constant.
8495 bool IsRotate = LHSShiftArg == RHSShiftArg;
8496 if (!IsRotate && !(HasFSHL || HasFSHR)) {
8497 if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8498 ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8499 // Look for a disguised rotate by constant.
8500 // The common shifted operand X may be hidden inside another 'or'.
8501 SDValue X, Y;
8502 auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8503 if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8504 return false;
8505 if (CommonOp == Or.getOperand(0)) {
8506 X = CommonOp;
8507 Y = Or.getOperand(1);
8508 return true;
8509 }
8510 if (CommonOp == Or.getOperand(1)) {
8511 X = CommonOp;
8512 Y = Or.getOperand(0);
8513 return true;
8514 }
8515 return false;
8516 };
8517
8518 SDValue Res;
8519 if (matchOr(LHSShiftArg, RHSShiftArg)) {
8520 // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8521 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8522 SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8523 Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8524 } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8525 // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8526 SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8527 SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8528 Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8529 } else {
8530 return SDValue();
8531 }
8532
8533 return ApplyMasks(Res);
8534 }
8535
8536 return SDValue(); // Requires funnel shift support.
8537 }
8538
8539 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8540 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8541 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8542 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8543 // iff C1+C2 == EltSizeInBits
8544 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8545 SDValue Res;
8546 if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8547 bool UseROTL = !LegalOperations || HasROTL;
8548 Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8549 UseROTL ? LHSShiftAmt : RHSShiftAmt);
8550 } else {
8551 bool UseFSHL = !LegalOperations || HasFSHL;
8552 Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8553 RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8554 }
8555
8556 return ApplyMasks(Res);
8557 }
8558
8559 // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8560 // shift.
8561 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8562 return SDValue();
8563
8564 // If there is a mask here, and we have a variable shift, we can't be sure
8565 // that we're masking out the right stuff.
8566 if (LHSMask.getNode() || RHSMask.getNode())
8567 return SDValue();
8568
8569 // If the shift amount is sign/zext/any-extended just peel it off.
8570 SDValue LExtOp0 = LHSShiftAmt;
8571 SDValue RExtOp0 = RHSShiftAmt;
8572 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8573 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8574 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8575 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8576 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8577 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8578 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8579 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8580 LExtOp0 = LHSShiftAmt.getOperand(0);
8581 RExtOp0 = RHSShiftAmt.getOperand(0);
8582 }
8583
8584 if (IsRotate && (HasROTL || HasROTR)) {
8585 SDValue TryL =
8586 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8587 RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8588 if (TryL)
8589 return TryL;
8590
8591 SDValue TryR =
8592 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8593 LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8594 if (TryR)
8595 return TryR;
8596 }
8597
8598 SDValue TryL =
8599 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8600 LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8601 if (TryL)
8602 return TryL;
8603
8604 SDValue TryR =
8605 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8606 RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8607 if (TryR)
8608 return TryR;
8609
8610 return SDValue();
8611}
8612
8613/// Recursively traverses the expression calculating the origin of the requested
8614/// byte of the given value. Returns std::nullopt if the provider can't be
8615/// calculated.
8616///
8617/// For all the values except the root of the expression, we verify that the
8618/// value has exactly one use and if not then return std::nullopt. This way if
8619/// the origin of the byte is returned it's guaranteed that the values which
8620/// contribute to the byte are not used outside of this expression.
8621
8622/// However, there is a special case when dealing with vector loads -- we allow
8623/// more than one use if the load is a vector type. Since the values that
8624/// contribute to the byte ultimately come from the ExtractVectorElements of the
8625/// Load, we don't care if the Load has uses other than ExtractVectorElements,
8626/// because those operations are independent from the pattern to be combined.
8627/// For vector loads, we simply care that the ByteProviders are adjacent
8628/// positions of the same vector, and their index matches the byte that is being
8629/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8630/// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8631/// byte position we are trying to provide for the LoadCombine. If these do
8632/// not match, then we can not combine the vector loads. \p Index uses the
8633/// byte position we are trying to provide for and is matched against the
8634/// shl and load size. The \p Index algorithm ensures the requested byte is
8635/// provided for by the pattern, and the pattern does not over provide bytes.
8636///
8637///
8638/// The supported LoadCombine pattern for vector loads is as follows
8639/// or
8640/// / \
8641/// or shl
8642/// / \ |
8643/// or shl zext
8644/// / \ | |
8645/// shl zext zext EVE*
8646/// | | | |
8647/// zext EVE* EVE* LOAD
8648/// | | |
8649/// EVE* LOAD LOAD
8650/// |
8651/// LOAD
8652///
8653/// *ExtractVectorElement
8655
8656static std::optional<SDByteProvider>
8658 std::optional<uint64_t> VectorIndex,
8659 unsigned StartingIndex = 0) {
8660
8661 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8662 if (Depth == 10)
8663 return std::nullopt;
8664
8665 // Only allow multiple uses if the instruction is a vector load (in which
8666 // case we will use the load for every ExtractVectorElement)
8667 if (Depth && !Op.hasOneUse() &&
8668 (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8669 return std::nullopt;
8670
8671 // Fail to combine if we have encountered anything but a LOAD after handling
8672 // an ExtractVectorElement.
8673 if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8674 return std::nullopt;
8675
8676 unsigned BitWidth = Op.getValueSizeInBits();
8677 if (BitWidth % 8 != 0)
8678 return std::nullopt;
8679 unsigned ByteWidth = BitWidth / 8;
8680 assert(Index < ByteWidth && "invalid index requested");
8681 (void) ByteWidth;
8682
8683 switch (Op.getOpcode()) {
8684 case ISD::OR: {
8685 auto LHS =
8686 calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8687 if (!LHS)
8688 return std::nullopt;
8689 auto RHS =
8690 calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8691 if (!RHS)
8692 return std::nullopt;
8693
8694 if (LHS->isConstantZero())
8695 return RHS;
8696 if (RHS->isConstantZero())
8697 return LHS;
8698 return std::nullopt;
8699 }
8700 case ISD::SHL: {
8701 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8702 if (!ShiftOp)
8703 return std::nullopt;
8704
8705 uint64_t BitShift = ShiftOp->getZExtValue();
8706
8707 if (BitShift % 8 != 0)
8708 return std::nullopt;
8709 uint64_t ByteShift = BitShift / 8;
8710
8711 // If we are shifting by an amount greater than the index we are trying to
8712 // provide, then do not provide anything. Otherwise, subtract the index by
8713 // the amount we shifted by.
8714 return Index < ByteShift
8716 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8717 Depth + 1, VectorIndex, Index);
8718 }
8719 case ISD::ANY_EXTEND:
8720 case ISD::SIGN_EXTEND:
8721 case ISD::ZERO_EXTEND: {
8722 SDValue NarrowOp = Op->getOperand(0);
8723 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8724 if (NarrowBitWidth % 8 != 0)
8725 return std::nullopt;
8726 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8727
8728 if (Index >= NarrowByteWidth)
8729 return Op.getOpcode() == ISD::ZERO_EXTEND
8730 ? std::optional<SDByteProvider>(
8732 : std::nullopt;
8733 return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8734 StartingIndex);
8735 }
8736 case ISD::BSWAP:
8737 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8738 Depth + 1, VectorIndex, StartingIndex);
8740 auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8741 if (!OffsetOp)
8742 return std::nullopt;
8743
8744 VectorIndex = OffsetOp->getZExtValue();
8745
8746 SDValue NarrowOp = Op->getOperand(0);
8747 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8748 if (NarrowBitWidth % 8 != 0)
8749 return std::nullopt;
8750 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8751 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
8752 // type, leaving the high bits undefined.
8753 if (Index >= NarrowByteWidth)
8754 return std::nullopt;
8755
8756 // Check to see if the position of the element in the vector corresponds
8757 // with the byte we are trying to provide for. In the case of a vector of
8758 // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8759 // the element will provide a range of bytes. For example, if we have a
8760 // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8761 // 3).
8762 if (*VectorIndex * NarrowByteWidth > StartingIndex)
8763 return std::nullopt;
8764 if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8765 return std::nullopt;
8766
8767 return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8768 VectorIndex, StartingIndex);
8769 }
8770 case ISD::LOAD: {
8771 auto L = cast<LoadSDNode>(Op.getNode());
8772 if (!L->isSimple() || L->isIndexed())
8773 return std::nullopt;
8774
8775 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8776 if (NarrowBitWidth % 8 != 0)
8777 return std::nullopt;
8778 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8779
8780 // If the width of the load does not reach byte we are trying to provide for
8781 // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8782 // question
8783 if (Index >= NarrowByteWidth)
8784 return L->getExtensionType() == ISD::ZEXTLOAD
8785 ? std::optional<SDByteProvider>(
8787 : std::nullopt;
8788
8789 unsigned BPVectorIndex = VectorIndex.value_or(0U);
8790 return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8791 }
8792 }
8793
8794 return std::nullopt;
8795}
8796
8797static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8798 return i;
8799}
8800
8801static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8802 return BW - i - 1;
8803}
8804
8805// Check if the bytes offsets we are looking at match with either big or
8806// little endian value loaded. Return true for big endian, false for little
8807// endian, and std::nullopt if match failed.
8808static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8809 int64_t FirstOffset) {
8810 // The endian can be decided only when it is 2 bytes at least.
8811 unsigned Width = ByteOffsets.size();
8812 if (Width < 2)
8813 return std::nullopt;
8814
8815 bool BigEndian = true, LittleEndian = true;
8816 for (unsigned i = 0; i < Width; i++) {
8817 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8818 LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8819 BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8820 if (!BigEndian && !LittleEndian)
8821 return std::nullopt;
8822 }
8823
8824 assert((BigEndian != LittleEndian) && "It should be either big endian or"
8825 "little endian");
8826 return BigEndian;
8827}
8828
8829// Look through one layer of truncate or extend.
8831 switch (Value.getOpcode()) {
8832 case ISD::TRUNCATE:
8833 case ISD::ZERO_EXTEND:
8834 case ISD::SIGN_EXTEND:
8835 case ISD::ANY_EXTEND:
8836 return Value.getOperand(0);
8837 }
8838 return SDValue();
8839}
8840
8841/// Match a pattern where a wide type scalar value is stored by several narrow
8842/// stores. Fold it into a single store or a BSWAP and a store if the targets
8843/// supports it.
8844///
8845/// Assuming little endian target:
8846/// i8 *p = ...
8847/// i32 val = ...
8848/// p[0] = (val >> 0) & 0xFF;
8849/// p[1] = (val >> 8) & 0xFF;
8850/// p[2] = (val >> 16) & 0xFF;
8851/// p[3] = (val >> 24) & 0xFF;
8852/// =>
8853/// *((i32)p) = val;
8854///
8855/// i8 *p = ...
8856/// i32 val = ...
8857/// p[0] = (val >> 24) & 0xFF;
8858/// p[1] = (val >> 16) & 0xFF;
8859/// p[2] = (val >> 8) & 0xFF;
8860/// p[3] = (val >> 0) & 0xFF;
8861/// =>
8862/// *((i32)p) = BSWAP(val);
8863SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8864 // The matching looks for "store (trunc x)" patterns that appear early but are
8865 // likely to be replaced by truncating store nodes during combining.
8866 // TODO: If there is evidence that running this later would help, this
8867 // limitation could be removed. Legality checks may need to be added
8868 // for the created store and optional bswap/rotate.
8869 if (LegalOperations || OptLevel == CodeGenOptLevel::None)
8870 return SDValue();
8871
8872 // We only handle merging simple stores of 1-4 bytes.
8873 // TODO: Allow unordered atomics when wider type is legal (see D66309)
8874 EVT MemVT = N->getMemoryVT();
8875 if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8876 !N->isSimple() || N->isIndexed())
8877 return SDValue();
8878
8879 // Collect all of the stores in the chain, upto the maximum store width (i64).
8880 SDValue Chain = N->getChain();
8882 unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
8883 unsigned MaxWideNumBits = 64;
8884 unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
8885 while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8886 // All stores must be the same size to ensure that we are writing all of the
8887 // bytes in the wide value.
8888 // This store should have exactly one use as a chain operand for another
8889 // store in the merging set. If there are other chain uses, then the
8890 // transform may not be safe because order of loads/stores outside of this
8891 // set may not be preserved.
8892 // TODO: We could allow multiple sizes by tracking each stored byte.
8893 if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8894 Store->isIndexed() || !Store->hasOneUse())
8895 return SDValue();
8896 Stores.push_back(Store);
8897 Chain = Store->getChain();
8898 if (MaxStores < Stores.size())
8899 return SDValue();
8900 }
8901 // There is no reason to continue if we do not have at least a pair of stores.
8902 if (Stores.size() < 2)
8903 return SDValue();
8904
8905 // Handle simple types only.
8906 LLVMContext &Context = *DAG.getContext();
8907 unsigned NumStores = Stores.size();
8908 unsigned WideNumBits = NumStores * NarrowNumBits;
8909 EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8910 if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8911 return SDValue();
8912
8913 // Check if all bytes of the source value that we are looking at are stored
8914 // to the same base address. Collect offsets from Base address into OffsetMap.
8915 SDValue SourceValue;
8916 SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
8917 int64_t FirstOffset = INT64_MAX;
8918 StoreSDNode *FirstStore = nullptr;
8919 std::optional<BaseIndexOffset> Base;
8920 for (auto *Store : Stores) {
8921 // All the stores store different parts of the CombinedValue. A truncate is
8922 // required to get the partial value.
8923 SDValue Trunc = Store->getValue();
8924 if (Trunc.getOpcode() != ISD::TRUNCATE)
8925 return SDValue();
8926 // Other than the first/last part, a shift operation is required to get the
8927 // offset.
8928 int64_t Offset = 0;
8929 SDValue WideVal = Trunc.getOperand(0);
8930 if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8931 isa<ConstantSDNode>(WideVal.getOperand(1))) {
8932 // The shift amount must be a constant multiple of the narrow type.
8933 // It is translated to the offset address in the wide source value "y".
8934 //
8935 // x = srl y, ShiftAmtC
8936 // i8 z = trunc x
8937 // store z, ...
8938 uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8939 if (ShiftAmtC % NarrowNumBits != 0)
8940 return SDValue();
8941
8942 // Make sure we aren't reading bits that are shifted in.
8943 if (ShiftAmtC > WideVal.getScalarValueSizeInBits() - NarrowNumBits)
8944 return SDValue();
8945
8946 Offset = ShiftAmtC / NarrowNumBits;
8947 WideVal = WideVal.getOperand(0);
8948 }
8949
8950 // Stores must share the same source value with different offsets.
8951 if (!SourceValue)
8952 SourceValue = WideVal;
8953 else if (SourceValue != WideVal) {
8954 // Truncate and extends can be stripped to see if the values are related.
8955 if (stripTruncAndExt(SourceValue) != WideVal &&
8956 stripTruncAndExt(WideVal) != SourceValue)
8957 return SDValue();
8958
8959 if (WideVal.getScalarValueSizeInBits() >
8960 SourceValue.getScalarValueSizeInBits())
8961 SourceValue = WideVal;
8962
8963 // Give up if the source value type is smaller than the store size.
8964 if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
8965 return SDValue();
8966 }
8967
8968 // Stores must share the same base address.
8970 int64_t ByteOffsetFromBase = 0;
8971 if (!Base)
8972 Base = Ptr;
8973 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8974 return SDValue();
8975
8976 // Remember the first store.
8977 if (ByteOffsetFromBase < FirstOffset) {
8978 FirstStore = Store;
8979 FirstOffset = ByteOffsetFromBase;
8980 }
8981 // Map the offset in the store and the offset in the combined value, and
8982 // early return if it has been set before.
8983 if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
8984 return SDValue();
8985 OffsetMap[Offset] = ByteOffsetFromBase;
8986 }
8987
8988 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8989 assert(FirstStore && "First store must be set");
8990
8991 // Check that a store of the wide type is both allowed and fast on the target
8992 const DataLayout &Layout = DAG.getDataLayout();
8993 unsigned Fast = 0;
8994 bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
8995 *FirstStore->getMemOperand(), &Fast);
8996 if (!Allowed || !Fast)
8997 return SDValue();
8998
8999 // Check if the pieces of the value are going to the expected places in memory
9000 // to merge the stores.
9001 auto checkOffsets = [&](bool MatchLittleEndian) {
9002 if (MatchLittleEndian) {
9003 for (unsigned i = 0; i != NumStores; ++i)
9004 if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9005 return false;
9006 } else { // MatchBigEndian by reversing loop counter.
9007 for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9008 if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9009 return false;
9010 }
9011 return true;
9012 };
9013
9014 // Check if the offsets line up for the native data layout of this target.
9015 bool NeedBswap = false;
9016 bool NeedRotate = false;
9017 if (!checkOffsets(Layout.isLittleEndian())) {
9018 // Special-case: check if byte offsets line up for the opposite endian.
9019 if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9020 NeedBswap = true;
9021 else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9022 NeedRotate = true;
9023 else
9024 return SDValue();
9025 }
9026
9027 SDLoc DL(N);
9028 if (WideVT != SourceValue.getValueType()) {
9029 assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9030 "Unexpected store value to merge");
9031 SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9032 }
9033
9034 // Before legalize we can introduce illegal bswaps/rotates which will be later
9035 // converted to an explicit bswap sequence. This way we end up with a single
9036 // store and byte shuffling instead of several stores and byte shuffling.
9037 if (NeedBswap) {
9038 SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9039 } else if (NeedRotate) {
9040 assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9041 SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9042 SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9043 }
9044
9045 SDValue NewStore =
9046 DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9047 FirstStore->getPointerInfo(), FirstStore->getAlign());
9048
9049 // Rely on other DAG combine rules to remove the other individual stores.
9050 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9051 return NewStore;
9052}
9053
9054/// Match a pattern where a wide type scalar value is loaded by several narrow
9055/// loads and combined by shifts and ors. Fold it into a single load or a load
9056/// and a BSWAP if the targets supports it.
9057///
9058/// Assuming little endian target:
9059/// i8 *a = ...
9060/// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9061/// =>
9062/// i32 val = *((i32)a)
9063///
9064/// i8 *a = ...
9065/// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9066/// =>
9067/// i32 val = BSWAP(*((i32)a))
9068///
9069/// TODO: This rule matches complex patterns with OR node roots and doesn't
9070/// interact well with the worklist mechanism. When a part of the pattern is
9071/// updated (e.g. one of the loads) its direct users are put into the worklist,
9072/// but the root node of the pattern which triggers the load combine is not
9073/// necessarily a direct user of the changed node. For example, once the address
9074/// of t28 load is reassociated load combine won't be triggered:
9075/// t25: i32 = add t4, Constant:i32<2>
9076/// t26: i64 = sign_extend t25
9077/// t27: i64 = add t2, t26
9078/// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9079/// t29: i32 = zero_extend t28
9080/// t32: i32 = shl t29, Constant:i8<8>
9081/// t33: i32 = or t23, t32
9082/// As a possible fix visitLoad can check if the load can be a part of a load
9083/// combine pattern and add corresponding OR roots to the worklist.
9084SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9085 assert(N->getOpcode() == ISD::OR &&
9086 "Can only match load combining against OR nodes");
9087
9088 // Handles simple types only
9089 EVT VT = N->getValueType(0);
9090 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9091 return SDValue();
9092 unsigned ByteWidth = VT.getSizeInBits() / 8;
9093
9094 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9095 auto MemoryByteOffset = [&](SDByteProvider P) {
9096 assert(P.hasSrc() && "Must be a memory byte provider");
9097 auto *Load = cast<LoadSDNode>(P.Src.value());
9098
9099 unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9100
9101 assert(LoadBitWidth % 8 == 0 &&
9102 "can only analyze providers for individual bytes not bit");
9103 unsigned LoadByteWidth = LoadBitWidth / 8;
9104 return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9105 : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9106 };
9107
9108 std::optional<BaseIndexOffset> Base;
9109 SDValue Chain;
9110
9112 std::optional<SDByteProvider> FirstByteProvider;
9113 int64_t FirstOffset = INT64_MAX;
9114
9115 // Check if all the bytes of the OR we are looking at are loaded from the same
9116 // base address. Collect bytes offsets from Base address in ByteOffsets.
9117 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9118 unsigned ZeroExtendedBytes = 0;
9119 for (int i = ByteWidth - 1; i >= 0; --i) {
9120 auto P =
9121 calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9122 /*StartingIndex*/ i);
9123 if (!P)
9124 return SDValue();
9125
9126 if (P->isConstantZero()) {
9127 // It's OK for the N most significant bytes to be 0, we can just
9128 // zero-extend the load.
9129 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9130 return SDValue();
9131 continue;
9132 }
9133 assert(P->hasSrc() && "provenance should either be memory or zero");
9134 auto *L = cast<LoadSDNode>(P->Src.value());
9135
9136 // All loads must share the same chain
9137 SDValue LChain = L->getChain();
9138 if (!Chain)
9139 Chain = LChain;
9140 else if (Chain != LChain)
9141 return SDValue();
9142
9143 // Loads must share the same base address
9145 int64_t ByteOffsetFromBase = 0;
9146
9147 // For vector loads, the expected load combine pattern will have an
9148 // ExtractElement for each index in the vector. While each of these
9149 // ExtractElements will be accessing the same base address as determined
9150 // by the load instruction, the actual bytes they interact with will differ
9151 // due to different ExtractElement indices. To accurately determine the
9152 // byte position of an ExtractElement, we offset the base load ptr with
9153 // the index multiplied by the byte size of each element in the vector.
9154 if (L->getMemoryVT().isVector()) {
9155 unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9156 if (LoadWidthInBit % 8 != 0)
9157 return SDValue();
9158 unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9159 Ptr.addToOffset(ByteOffsetFromVector);
9160 }
9161
9162 if (!Base)
9163 Base = Ptr;
9164
9165 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9166 return SDValue();
9167
9168 // Calculate the offset of the current byte from the base address
9169 ByteOffsetFromBase += MemoryByteOffset(*P);
9170 ByteOffsets[i] = ByteOffsetFromBase;
9171
9172 // Remember the first byte load
9173 if (ByteOffsetFromBase < FirstOffset) {
9174 FirstByteProvider = P;
9175 FirstOffset = ByteOffsetFromBase;
9176 }
9177
9178 Loads.insert(L);
9179 }
9180
9181 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9182 "memory, so there must be at least one load which produces the value");
9183 assert(Base && "Base address of the accessed memory location must be set");
9184 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9185
9186 bool NeedsZext = ZeroExtendedBytes > 0;
9187
9188 EVT MemVT =
9189 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9190
9191 if (!MemVT.isSimple())
9192 return SDValue();
9193
9194 // Before legalize we can introduce too wide illegal loads which will be later
9195 // split into legal sized loads. This enables us to combine i64 load by i8
9196 // patterns to a couple of i32 loads on 32 bit targets.
9197 if (LegalOperations &&
9199 MemVT))
9200 return SDValue();
9201
9202 // Check if the bytes of the OR we are looking at match with either big or
9203 // little endian value load
9204 std::optional<bool> IsBigEndian = isBigEndian(
9205 ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9206 if (!IsBigEndian)
9207 return SDValue();
9208
9209 assert(FirstByteProvider && "must be set");
9210
9211 // Ensure that the first byte is loaded from zero offset of the first load.
9212 // So the combined value can be loaded from the first load address.
9213 if (MemoryByteOffset(*FirstByteProvider) != 0)
9214 return SDValue();
9215 auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9216
9217 // The node we are looking at matches with the pattern, check if we can
9218 // replace it with a single (possibly zero-extended) load and bswap + shift if
9219 // needed.
9220
9221 // If the load needs byte swap check if the target supports it
9222 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9223
9224 // Before legalize we can introduce illegal bswaps which will be later
9225 // converted to an explicit bswap sequence. This way we end up with a single
9226 // load and byte shuffling instead of several loads and byte shuffling.
9227 // We do not introduce illegal bswaps when zero-extending as this tends to
9228 // introduce too many arithmetic instructions.
9229 if (NeedsBswap && (LegalOperations || NeedsZext) &&
9230 !TLI.isOperationLegal(ISD::BSWAP, VT))
9231 return SDValue();
9232
9233 // If we need to bswap and zero extend, we have to insert a shift. Check that
9234 // it is legal.
9235 if (NeedsBswap && NeedsZext && LegalOperations &&
9236 !TLI.isOperationLegal(ISD::SHL, VT))
9237 return SDValue();
9238
9239 // Check that a load of the wide type is both allowed and fast on the target
9240 unsigned Fast = 0;
9241 bool Allowed =
9242 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9243 *FirstLoad->getMemOperand(), &Fast);
9244 if (!Allowed || !Fast)
9245 return SDValue();
9246
9247 SDValue NewLoad =
9248 DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9249 Chain, FirstLoad->getBasePtr(),
9250 FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9251
9252 // Transfer chain users from old loads to the new load.
9253 for (LoadSDNode *L : Loads)
9254 DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9255
9256 if (!NeedsBswap)
9257 return NewLoad;
9258
9259 SDValue ShiftedLoad =
9260 NeedsZext
9261 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9262 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
9263 SDLoc(N), LegalOperations))
9264 : NewLoad;
9265 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9266}
9267
9268// If the target has andn, bsl, or a similar bit-select instruction,
9269// we want to unfold masked merge, with canonical pattern of:
9270// | A | |B|
9271// ((x ^ y) & m) ^ y
9272// | D |
9273// Into:
9274// (x & m) | (y & ~m)
9275// If y is a constant, m is not a 'not', and the 'andn' does not work with
9276// immediates, we unfold into a different pattern:
9277// ~(~x & m) & (m | y)
9278// If x is a constant, m is a 'not', and the 'andn' does not work with
9279// immediates, we unfold into a different pattern:
9280// (x | ~m) & ~(~m & ~y)
9281// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9282// the very least that breaks andnpd / andnps patterns, and because those
9283// patterns are simplified in IR and shouldn't be created in the DAG
9284SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9285 assert(N->getOpcode() == ISD::XOR);
9286
9287 // Don't touch 'not' (i.e. where y = -1).
9288 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9289 return SDValue();
9290
9291 EVT VT = N->getValueType(0);
9292
9293 // There are 3 commutable operators in the pattern,
9294 // so we have to deal with 8 possible variants of the basic pattern.
9295 SDValue X, Y, M;
9296 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9297 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9298 return false;
9299 SDValue Xor = And.getOperand(XorIdx);
9300 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9301 return false;
9302 SDValue Xor0 = Xor.getOperand(0);
9303 SDValue Xor1 = Xor.getOperand(1);
9304 // Don't touch 'not' (i.e. where y = -1).
9305 if (isAllOnesOrAllOnesSplat(Xor1))
9306 return false;
9307 if (Other == Xor0)
9308 std::swap(Xor0, Xor1);
9309 if (Other != Xor1)
9310 return false;
9311 X = Xor0;
9312 Y = Xor1;
9313 M = And.getOperand(XorIdx ? 0 : 1);
9314 return true;
9315 };
9316
9317 SDValue N0 = N->getOperand(0);
9318 SDValue N1 = N->getOperand(1);
9319 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9320 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9321 return SDValue();
9322
9323 // Don't do anything if the mask is constant. This should not be reachable.
9324 // InstCombine should have already unfolded this pattern, and DAGCombiner
9325 // probably shouldn't produce it, too.
9326 if (isa<ConstantSDNode>(M.getNode()))
9327 return SDValue();
9328
9329 // We can transform if the target has AndNot
9330 if (!TLI.hasAndNot(M))
9331 return SDValue();
9332
9333 SDLoc DL(N);
9334
9335 // If Y is a constant, check that 'andn' works with immediates. Unless M is
9336 // a bitwise not that would already allow ANDN to be used.
9337 if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9338 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9339 // If not, we need to do a bit more work to make sure andn is still used.
9340 SDValue NotX = DAG.getNOT(DL, X, VT);
9341 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9342 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9343 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9344 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9345 }
9346
9347 // If X is a constant and M is a bitwise not, check that 'andn' works with
9348 // immediates.
9349 if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9350 assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9351 // If not, we need to do a bit more work to make sure andn is still used.
9352 SDValue NotM = M.getOperand(0);
9353 SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9354 SDValue NotY = DAG.getNOT(DL, Y, VT);
9355 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9356 SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9357 return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9358 }
9359
9360 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9361 SDValue NotM = DAG.getNOT(DL, M, VT);
9362 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9363
9364 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9365}
9366
9367SDValue DAGCombiner::visitXOR(SDNode *N) {
9368 SDValue N0 = N->getOperand(0);
9369 SDValue N1 = N->getOperand(1);
9370 EVT VT = N0.getValueType();
9371 SDLoc DL(N);
9372
9373 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9374 if (N0.isUndef() && N1.isUndef())
9375 return DAG.getConstant(0, DL, VT);
9376
9377 // fold (xor x, undef) -> undef
9378 if (N0.isUndef())
9379 return N0;
9380 if (N1.isUndef())
9381 return N1;
9382
9383 // fold (xor c1, c2) -> c1^c2
9384 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9385 return C;
9386
9387 // canonicalize constant to RHS
9390 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9391
9392 // fold vector ops
9393 if (VT.isVector()) {
9394 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9395 return FoldedVOp;
9396
9397 // fold (xor x, 0) -> x, vector edition
9399 return N0;
9400 }
9401
9402 // fold (xor x, 0) -> x
9403 if (isNullConstant(N1))
9404 return N0;
9405
9406 if (SDValue NewSel = foldBinOpIntoSelect(N))
9407 return NewSel;
9408
9409 // reassociate xor
9410 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9411 return RXOR;
9412
9413 // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9414 if (SDValue SD =
9415 reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9416 return SD;
9417
9418 // fold (a^b) -> (a|b) iff a and b share no bits.
9419 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9420 DAG.haveNoCommonBitsSet(N0, N1)) {
9422 Flags.setDisjoint(true);
9423 return DAG.getNode(ISD::OR, DL, VT, N0, N1, Flags);
9424 }
9425
9426 // look for 'add-like' folds:
9427 // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9428 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9430 if (SDValue Combined = visitADDLike(N))
9431 return Combined;
9432
9433 // fold !(x cc y) -> (x !cc y)
9434 unsigned N0Opcode = N0.getOpcode();
9435 SDValue LHS, RHS, CC;
9436 if (TLI.isConstTrueVal(N1) &&
9437 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9438 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9439 LHS.getValueType());
9440 if (!LegalOperations ||
9441 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9442 switch (N0Opcode) {
9443 default:
9444 llvm_unreachable("Unhandled SetCC Equivalent!");
9445 case ISD::SETCC:
9446 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9447 case ISD::SELECT_CC:
9448 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9449 N0.getOperand(3), NotCC);
9450 case ISD::STRICT_FSETCC:
9451 case ISD::STRICT_FSETCCS: {
9452 if (N0.hasOneUse()) {
9453 // FIXME Can we handle multiple uses? Could we token factor the chain
9454 // results from the new/old setcc?
9455 SDValue SetCC =
9456 DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9457 N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9458 CombineTo(N, SetCC);
9459 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9460 recursivelyDeleteUnusedNodes(N0.getNode());
9461 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9462 }
9463 break;
9464 }
9465 }
9466 }
9467 }
9468
9469 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9470 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9471 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9472 SDValue V = N0.getOperand(0);
9473 SDLoc DL0(N0);
9474 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9475 DAG.getConstant(1, DL0, V.getValueType()));
9476 AddToWorklist(V.getNode());
9477 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9478 }
9479
9480 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9481 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9482 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9483 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9484 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9485 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9486 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9487 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9488 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9489 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9490 }
9491 }
9492 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9493 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9494 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9495 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9496 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9497 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9498 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9499 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9500 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9501 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9502 }
9503 }
9504
9505 // fold (not (neg x)) -> (add X, -1)
9506 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9507 // Y is a constant or the subtract has a single use.
9508 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9509 isNullConstant(N0.getOperand(0))) {
9510 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9511 DAG.getAllOnesConstant(DL, VT));
9512 }
9513
9514 // fold (not (add X, -1)) -> (neg X)
9515 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9517 return DAG.getNegative(N0.getOperand(0), DL, VT);
9518 }
9519
9520 // fold (xor (and x, y), y) -> (and (not x), y)
9521 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9522 SDValue X = N0.getOperand(0);
9523 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9524 AddToWorklist(NotX.getNode());
9525 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9526 }
9527
9528 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9529 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
9530 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9531 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9532 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9533 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9534 SDValue S0 = S.getOperand(0);
9535 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9537 if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9538 return DAG.getNode(ISD::ABS, DL, VT, S0);
9539 }
9540 }
9541
9542 // fold (xor x, x) -> 0
9543 if (N0 == N1)
9544 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9545
9546 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9547 // Here is a concrete example of this equivalence:
9548 // i16 x == 14
9549 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
9550 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9551 //
9552 // =>
9553 //
9554 // i16 ~1 == 0b1111111111111110
9555 // i16 rol(~1, 14) == 0b1011111111111111
9556 //
9557 // Some additional tips to help conceptualize this transform:
9558 // - Try to see the operation as placing a single zero in a value of all ones.
9559 // - There exists no value for x which would allow the result to contain zero.
9560 // - Values of x larger than the bitwidth are undefined and do not require a
9561 // consistent result.
9562 // - Pushing the zero left requires shifting one bits in from the right.
9563 // A rotate left of ~1 is a nice way of achieving the desired result.
9564 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9566 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
9567 N0.getOperand(1));
9568 }
9569
9570 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
9571 if (N0Opcode == N1.getOpcode())
9572 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9573 return V;
9574
9575 if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9576 return R;
9577 if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9578 return R;
9579 if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9580 return R;
9581
9582 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
9583 if (SDValue MM = unfoldMaskedMerge(N))
9584 return MM;
9585
9586 // Simplify the expression using non-local knowledge.
9588 return SDValue(N, 0);
9589
9590 if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9591 return Combined;
9592
9593 return SDValue();
9594}
9595
9596/// If we have a shift-by-constant of a bitwise logic op that itself has a
9597/// shift-by-constant operand with identical opcode, we may be able to convert
9598/// that into 2 independent shifts followed by the logic op. This is a
9599/// throughput improvement.
9601 // Match a one-use bitwise logic op.
9602 SDValue LogicOp = Shift->getOperand(0);
9603 if (!LogicOp.hasOneUse())
9604 return SDValue();
9605
9606 unsigned LogicOpcode = LogicOp.getOpcode();
9607 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9608 LogicOpcode != ISD::XOR)
9609 return SDValue();
9610
9611 // Find a matching one-use shift by constant.
9612 unsigned ShiftOpcode = Shift->getOpcode();
9613 SDValue C1 = Shift->getOperand(1);
9614 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9615 assert(C1Node && "Expected a shift with constant operand");
9616 const APInt &C1Val = C1Node->getAPIntValue();
9617 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9618 const APInt *&ShiftAmtVal) {
9619 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9620 return false;
9621
9622 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9623 if (!ShiftCNode)
9624 return false;
9625
9626 // Capture the shifted operand and shift amount value.
9627 ShiftOp = V.getOperand(0);
9628 ShiftAmtVal = &ShiftCNode->getAPIntValue();
9629
9630 // Shift amount types do not have to match their operand type, so check that
9631 // the constants are the same width.
9632 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9633 return false;
9634
9635 // The fold is not valid if the sum of the shift values doesn't fit in the
9636 // given shift amount type.
9637 bool Overflow = false;
9638 APInt NewShiftAmt = C1Val.uadd_ov(*ShiftAmtVal, Overflow);
9639 if (Overflow)
9640 return false;
9641
9642 // The fold is not valid if the sum of the shift values exceeds bitwidth.
9643 if (NewShiftAmt.uge(V.getScalarValueSizeInBits()))
9644 return false;
9645
9646 return true;
9647 };
9648
9649 // Logic ops are commutative, so check each operand for a match.
9650 SDValue X, Y;
9651 const APInt *C0Val;
9652 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9653 Y = LogicOp.getOperand(1);
9654 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9655 Y = LogicOp.getOperand(0);
9656 else
9657 return SDValue();
9658
9659 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9660 SDLoc DL(Shift);
9661 EVT VT = Shift->getValueType(0);
9662 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9663 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9664 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9665 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9666 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2,
9667 LogicOp->getFlags());
9668}
9669
9670/// Handle transforms common to the three shifts, when the shift amount is a
9671/// constant.
9672/// We are looking for: (shift being one of shl/sra/srl)
9673/// shift (binop X, C0), C1
9674/// And want to transform into:
9675/// binop (shift X, C1), (shift C0, C1)
9676SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9677 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9678
9679 // Do not turn a 'not' into a regular xor.
9680 if (isBitwiseNot(N->getOperand(0)))
9681 return SDValue();
9682
9683 // The inner binop must be one-use, since we want to replace it.
9684 SDValue LHS = N->getOperand(0);
9685 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9686 return SDValue();
9687
9688 // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9689 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9690 return R;
9691
9692 // We want to pull some binops through shifts, so that we have (and (shift))
9693 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
9694 // thing happens with address calculations, so it's important to canonicalize
9695 // it.
9696 switch (LHS.getOpcode()) {
9697 default:
9698 return SDValue();
9699 case ISD::OR:
9700 case ISD::XOR:
9701 case ISD::AND:
9702 break;
9703 case ISD::ADD:
9704 if (N->getOpcode() != ISD::SHL)
9705 return SDValue(); // only shl(add) not sr[al](add).
9706 break;
9707 }
9708
9709 // FIXME: disable this unless the input to the binop is a shift by a constant
9710 // or is copy/select. Enable this in other cases when figure out it's exactly
9711 // profitable.
9712 SDValue BinOpLHSVal = LHS.getOperand(0);
9713 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9714 BinOpLHSVal.getOpcode() == ISD::SRA ||
9715 BinOpLHSVal.getOpcode() == ISD::SRL) &&
9716 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9717 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9718 BinOpLHSVal.getOpcode() == ISD::SELECT;
9719
9720 if (!IsShiftByConstant && !IsCopyOrSelect)
9721 return SDValue();
9722
9723 if (IsCopyOrSelect && N->hasOneUse())
9724 return SDValue();
9725
9726 // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9727 SDLoc DL(N);
9728 EVT VT = N->getValueType(0);
9729 if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9730 N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9731 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9732 N->getOperand(1));
9733 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9734 }
9735
9736 return SDValue();
9737}
9738
9739SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9740 assert(N->getOpcode() == ISD::TRUNCATE);
9741 assert(N->getOperand(0).getOpcode() == ISD::AND);
9742
9743 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9744 EVT TruncVT = N->getValueType(0);
9745 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9746 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9747 SDValue N01 = N->getOperand(0).getOperand(1);
9748 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9749 SDLoc DL(N);
9750 SDValue N00 = N->getOperand(0).getOperand(0);
9751 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9752 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9753 AddToWorklist(Trunc00.getNode());
9754 AddToWorklist(Trunc01.getNode());
9755 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9756 }
9757 }
9758
9759 return SDValue();
9760}
9761
9762SDValue DAGCombiner::visitRotate(SDNode *N) {
9763 SDLoc dl(N);
9764 SDValue N0 = N->getOperand(0);
9765 SDValue N1 = N->getOperand(1);
9766 EVT VT = N->getValueType(0);
9767 unsigned Bitsize = VT.getScalarSizeInBits();
9768
9769 // fold (rot x, 0) -> x
9770 if (isNullOrNullSplat(N1))
9771 return N0;
9772
9773 // fold (rot x, c) -> x iff (c % BitSize) == 0
9774 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9775 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9776 if (DAG.MaskedValueIsZero(N1, ModuloMask))
9777 return N0;
9778 }
9779
9780 // fold (rot x, c) -> (rot x, c % BitSize)
9781 bool OutOfRange = false;
9782 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9783 OutOfRange |= C->getAPIntValue().uge(Bitsize);
9784 return true;
9785 };
9786 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9787 EVT AmtVT = N1.getValueType();
9788 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9789 if (SDValue Amt =
9790 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9791 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9792 }
9793
9794 // rot i16 X, 8 --> bswap X
9795 auto *RotAmtC = isConstOrConstSplat(N1);
9796 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9797 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9798 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9799
9800 // Simplify the operands using demanded-bits information.
9802 return SDValue(N, 0);
9803
9804 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9805 if (N1.getOpcode() == ISD::TRUNCATE &&
9806 N1.getOperand(0).getOpcode() == ISD::AND) {
9807 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9808 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9809 }
9810
9811 unsigned NextOp = N0.getOpcode();
9812
9813 // fold (rot* (rot* x, c2), c1)
9814 // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9815 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9818 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9819 EVT ShiftVT = C1->getValueType(0);
9820 bool SameSide = (N->getOpcode() == NextOp);
9821 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9822 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9823 SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9824 {N1, BitsizeC});
9825 SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9826 {N0.getOperand(1), BitsizeC});
9827 if (Norm1 && Norm2)
9828 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9829 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9830 CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9831 {CombinedShift, BitsizeC});
9832 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9833 ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9834 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9835 CombinedShiftNorm);
9836 }
9837 }
9838 }
9839 return SDValue();
9840}
9841
9842SDValue DAGCombiner::visitSHL(SDNode *N) {
9843 SDValue N0 = N->getOperand(0);
9844 SDValue N1 = N->getOperand(1);
9845 if (SDValue V = DAG.simplifyShift(N0, N1))
9846 return V;
9847
9848 SDLoc DL(N);
9849 EVT VT = N0.getValueType();
9850 EVT ShiftVT = N1.getValueType();
9851 unsigned OpSizeInBits = VT.getScalarSizeInBits();
9852
9853 // fold (shl c1, c2) -> c1<<c2
9854 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N0, N1}))
9855 return C;
9856
9857 // fold vector ops
9858 if (VT.isVector()) {
9859 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9860 return FoldedVOp;
9861
9862 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9863 // If setcc produces all-one true value then:
9864 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9865 if (N1CV && N1CV->isConstant()) {
9866 if (N0.getOpcode() == ISD::AND) {
9867 SDValue N00 = N0->getOperand(0);
9868 SDValue N01 = N0->getOperand(1);
9869 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9870
9871 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9874 if (SDValue C =
9875 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N01, N1}))
9876 return DAG.getNode(ISD::AND, DL, VT, N00, C);
9877 }
9878 }
9879 }
9880 }
9881
9882 if (SDValue NewSel = foldBinOpIntoSelect(N))
9883 return NewSel;
9884
9885 // if (shl x, c) is known to be zero, return 0
9886 if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9887 return DAG.getConstant(0, DL, VT);
9888
9889 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9890 if (N1.getOpcode() == ISD::TRUNCATE &&
9891 N1.getOperand(0).getOpcode() == ISD::AND) {
9892 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9893 return DAG.getNode(ISD::SHL, DL, VT, N0, NewOp1);
9894 }
9895
9896 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9897 if (N0.getOpcode() == ISD::SHL) {
9898 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9900 APInt c1 = LHS->getAPIntValue();
9901 APInt c2 = RHS->getAPIntValue();
9902 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9903 return (c1 + c2).uge(OpSizeInBits);
9904 };
9905 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9906 return DAG.getConstant(0, DL, VT);
9907
9908 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9910 APInt c1 = LHS->getAPIntValue();
9911 APInt c2 = RHS->getAPIntValue();
9912 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9913 return (c1 + c2).ult(OpSizeInBits);
9914 };
9915 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9916 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9917 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9918 }
9919 }
9920
9921 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9922 // For this to be valid, the second form must not preserve any of the bits
9923 // that are shifted out by the inner shift in the first form. This means
9924 // the outer shift size must be >= the number of bits added by the ext.
9925 // As a corollary, we don't care what kind of ext it is.
9926 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9927 N0.getOpcode() == ISD::ANY_EXTEND ||
9928 N0.getOpcode() == ISD::SIGN_EXTEND) &&
9929 N0.getOperand(0).getOpcode() == ISD::SHL) {
9930 SDValue N0Op0 = N0.getOperand(0);
9931 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9932 EVT InnerVT = N0Op0.getValueType();
9933 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9934
9935 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9937 APInt c1 = LHS->getAPIntValue();
9938 APInt c2 = RHS->getAPIntValue();
9939 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9940 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9941 (c1 + c2).uge(OpSizeInBits);
9942 };
9943 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9944 /*AllowUndefs*/ false,
9945 /*AllowTypeMismatch*/ true))
9946 return DAG.getConstant(0, DL, VT);
9947
9948 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9950 APInt c1 = LHS->getAPIntValue();
9951 APInt c2 = RHS->getAPIntValue();
9952 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9953 return c2.uge(OpSizeInBits - InnerBitwidth) &&
9954 (c1 + c2).ult(OpSizeInBits);
9955 };
9956 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
9957 /*AllowUndefs*/ false,
9958 /*AllowTypeMismatch*/ true)) {
9959 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
9960 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
9961 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
9962 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
9963 }
9964 }
9965
9966 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
9967 // Only fold this if the inner zext has no other uses to avoid increasing
9968 // the total number of instructions.
9969 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9970 N0.getOperand(0).getOpcode() == ISD::SRL) {
9971 SDValue N0Op0 = N0.getOperand(0);
9972 SDValue InnerShiftAmt = N0Op0.getOperand(1);
9973
9974 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9975 APInt c1 = LHS->getAPIntValue();
9976 APInt c2 = RHS->getAPIntValue();
9977 zeroExtendToMatch(c1, c2);
9978 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
9979 };
9980 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
9981 /*AllowUndefs*/ false,
9982 /*AllowTypeMismatch*/ true)) {
9983 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
9984 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
9985 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
9986 AddToWorklist(NewSHL.getNode());
9987 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
9988 }
9989 }
9990
9991 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
9992 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9994 const APInt &LHSC = LHS->getAPIntValue();
9995 const APInt &RHSC = RHS->getAPIntValue();
9996 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9997 LHSC.getZExtValue() <= RHSC.getZExtValue();
9998 };
9999
10000 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
10001 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10002 if (N0->getFlags().hasExact()) {
10003 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10004 /*AllowUndefs*/ false,
10005 /*AllowTypeMismatch*/ true)) {
10006 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10007 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10008 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10009 }
10010 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10011 /*AllowUndefs*/ false,
10012 /*AllowTypeMismatch*/ true)) {
10013 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10014 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10015 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10016 }
10017 }
10018
10019 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10020 // (and (srl x, (sub c1, c2), MASK)
10021 // Only fold this if the inner shift has no other uses -- if it does,
10022 // folding this will increase the total number of instructions.
10023 if (N0.getOpcode() == ISD::SRL &&
10024 (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10026 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10027 /*AllowUndefs*/ false,
10028 /*AllowTypeMismatch*/ true)) {
10029 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10030 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10031 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10032 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10033 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10034 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10035 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10036 }
10037 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10038 /*AllowUndefs*/ false,
10039 /*AllowTypeMismatch*/ true)) {
10040 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10041 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10042 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10043 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10044 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10045 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10046 }
10047 }
10048 }
10049
10050 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10051 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10052 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10053 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10054 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10055 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10056 }
10057
10058 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10059 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10060 // Variant of version done on multiply, except mul by a power of 2 is turned
10061 // into a shift.
10062 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10063 N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
10064 SDValue N01 = N0.getOperand(1);
10065 if (SDValue Shl1 =
10066 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10067 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10068 AddToWorklist(Shl0.getNode());
10070 // Preserve the disjoint flag for Or.
10071 if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10072 Flags.setDisjoint(true);
10073 return DAG.getNode(N0.getOpcode(), DL, VT, Shl0, Shl1, Flags);
10074 }
10075 }
10076
10077 // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10078 // TODO: Add zext/add_nuw variant with suitable test coverage
10079 // TODO: Should we limit this with isLegalAddImmediate?
10080 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10081 N0.getOperand(0).getOpcode() == ISD::ADD &&
10082 N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
10083 N0.getOperand(0)->hasOneUse() &&
10084 TLI.isDesirableToCommuteWithShift(N, Level)) {
10085 SDValue Add = N0.getOperand(0);
10086 SDLoc DL(N0);
10087 if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10088 {Add.getOperand(1)})) {
10089 if (SDValue ShlC =
10090 DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10091 SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10092 SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10093 return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10094 }
10095 }
10096 }
10097
10098 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10099 if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10100 SDValue N01 = N0.getOperand(1);
10101 if (SDValue Shl =
10102 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10103 return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), Shl);
10104 }
10105
10107 if (N1C && !N1C->isOpaque())
10108 if (SDValue NewSHL = visitShiftByConstant(N))
10109 return NewSHL;
10110
10112 return SDValue(N, 0);
10113
10114 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10115 if (N0.getOpcode() == ISD::VSCALE && N1C) {
10116 const APInt &C0 = N0.getConstantOperandAPInt(0);
10117 const APInt &C1 = N1C->getAPIntValue();
10118 return DAG.getVScale(DL, VT, C0 << C1);
10119 }
10120
10121 // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10122 APInt ShlVal;
10123 if (N0.getOpcode() == ISD::STEP_VECTOR &&
10124 ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10125 const APInt &C0 = N0.getConstantOperandAPInt(0);
10126 if (ShlVal.ult(C0.getBitWidth())) {
10127 APInt NewStep = C0 << ShlVal;
10128 return DAG.getStepVector(DL, VT, NewStep);
10129 }
10130 }
10131
10132 return SDValue();
10133}
10134
10135// Transform a right shift of a multiply into a multiply-high.
10136// Examples:
10137// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10138// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10140 const TargetLowering &TLI) {
10141 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10142 "SRL or SRA node is required here!");
10143
10144 // Check the shift amount. Proceed with the transformation if the shift
10145 // amount is constant.
10146 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10147 if (!ShiftAmtSrc)
10148 return SDValue();
10149
10150 // The operation feeding into the shift must be a multiply.
10151 SDValue ShiftOperand = N->getOperand(0);
10152 if (ShiftOperand.getOpcode() != ISD::MUL)
10153 return SDValue();
10154
10155 // Both operands must be equivalent extend nodes.
10156 SDValue LeftOp = ShiftOperand.getOperand(0);
10157 SDValue RightOp = ShiftOperand.getOperand(1);
10158
10159 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10160 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10161
10162 if (!IsSignExt && !IsZeroExt)
10163 return SDValue();
10164
10165 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10166 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10167
10168 // return true if U may use the lower bits of its operands
10169 auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10170 if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10171 return true;
10172 }
10173 ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10174 if (!UShiftAmtSrc) {
10175 return true;
10176 }
10177 unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10178 return UShiftAmt < NarrowVTSize;
10179 };
10180
10181 // If the lower part of the MUL is also used and MUL_LOHI is supported
10182 // do not introduce the MULH in favor of MUL_LOHI
10183 unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10184 if (!ShiftOperand.hasOneUse() &&
10185 TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10186 llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
10187 return SDValue();
10188 }
10189
10190 SDValue MulhRightOp;
10192 unsigned ActiveBits = IsSignExt
10193 ? Constant->getAPIntValue().getSignificantBits()
10194 : Constant->getAPIntValue().getActiveBits();
10195 if (ActiveBits > NarrowVTSize)
10196 return SDValue();
10197 MulhRightOp = DAG.getConstant(
10198 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10199 NarrowVT);
10200 } else {
10201 if (LeftOp.getOpcode() != RightOp.getOpcode())
10202 return SDValue();
10203 // Check that the two extend nodes are the same type.
10204 if (NarrowVT != RightOp.getOperand(0).getValueType())
10205 return SDValue();
10206 MulhRightOp = RightOp.getOperand(0);
10207 }
10208
10209 EVT WideVT = LeftOp.getValueType();
10210 // Proceed with the transformation if the wide types match.
10211 assert((WideVT == RightOp.getValueType()) &&
10212 "Cannot have a multiply node with two different operand types.");
10213
10214 // Proceed with the transformation if the wide type is twice as large
10215 // as the narrow type.
10216 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10217 return SDValue();
10218
10219 // Check the shift amount with the narrow type size.
10220 // Proceed with the transformation if the shift amount is the width
10221 // of the narrow type.
10222 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10223 if (ShiftAmt != NarrowVTSize)
10224 return SDValue();
10225
10226 // If the operation feeding into the MUL is a sign extend (sext),
10227 // we use mulhs. Othewise, zero extends (zext) use mulhu.
10228 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10229
10230 // Combine to mulh if mulh is legal/custom for the narrow type on the target
10231 // or if it is a vector type then we could transform to an acceptable type and
10232 // rely on legalization to split/combine the result.
10233 if (NarrowVT.isVector()) {
10234 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10235 if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10236 !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10237 return SDValue();
10238 } else {
10239 if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10240 return SDValue();
10241 }
10242
10243 SDValue Result =
10244 DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10245 bool IsSigned = N->getOpcode() == ISD::SRA;
10246 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10247}
10248
10249// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10250// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10252 unsigned Opcode = N->getOpcode();
10253 if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10254 return SDValue();
10255
10256 SDValue N0 = N->getOperand(0);
10257 EVT VT = N->getValueType(0);
10258 SDLoc DL(N);
10259 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10260 SDValue OldLHS = N0.getOperand(0);
10261 SDValue OldRHS = N0.getOperand(1);
10262
10263 // If both operands are bswap/bitreverse, ignore the multiuse
10264 // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10265 if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10266 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10267 OldRHS.getOperand(0));
10268 }
10269
10270 if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10271 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10272 return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10273 NewBitReorder);
10274 }
10275
10276 if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10277 SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10278 return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10279 OldRHS.getOperand(0));
10280 }
10281 }
10282 return SDValue();
10283}
10284
10285SDValue DAGCombiner::visitSRA(SDNode *N) {
10286 SDValue N0 = N->getOperand(0);
10287 SDValue N1 = N->getOperand(1);
10288 if (SDValue V = DAG.simplifyShift(N0, N1))
10289 return V;
10290
10291 SDLoc DL(N);
10292 EVT VT = N0.getValueType();
10293 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10294
10295 // fold (sra c1, c2) -> (sra c1, c2)
10296 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, DL, VT, {N0, N1}))
10297 return C;
10298
10299 // Arithmetic shifting an all-sign-bit value is a no-op.
10300 // fold (sra 0, x) -> 0
10301 // fold (sra -1, x) -> -1
10302 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10303 return N0;
10304
10305 // fold vector ops
10306 if (VT.isVector())
10307 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10308 return FoldedVOp;
10309
10310 if (SDValue NewSel = foldBinOpIntoSelect(N))
10311 return NewSel;
10312
10314
10315 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10316 // clamp (add c1, c2) to max shift.
10317 if (N0.getOpcode() == ISD::SRA) {
10318 EVT ShiftVT = N1.getValueType();
10319 EVT ShiftSVT = ShiftVT.getScalarType();
10320 SmallVector<SDValue, 16> ShiftValues;
10321
10322 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10323 APInt c1 = LHS->getAPIntValue();
10324 APInt c2 = RHS->getAPIntValue();
10325 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10326 APInt Sum = c1 + c2;
10327 unsigned ShiftSum =
10328 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10329 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10330 return true;
10331 };
10332 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10333 SDValue ShiftValue;
10334 if (N1.getOpcode() == ISD::BUILD_VECTOR)
10335 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10336 else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10337 assert(ShiftValues.size() == 1 &&
10338 "Expected matchBinaryPredicate to return one element for "
10339 "SPLAT_VECTORs");
10340 ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10341 } else
10342 ShiftValue = ShiftValues[0];
10343 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10344 }
10345 }
10346
10347 // fold (sra (shl X, m), (sub result_size, n))
10348 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10349 // result_size - n != m.
10350 // If truncate is free for the target sext(shl) is likely to result in better
10351 // code.
10352 if (N0.getOpcode() == ISD::SHL && N1C) {
10353 // Get the two constants of the shifts, CN0 = m, CN = n.
10354 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10355 if (N01C) {
10356 LLVMContext &Ctx = *DAG.getContext();
10357 // Determine what the truncate's result bitsize and type would be.
10358 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10359
10360 if (VT.isVector())
10361 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10362
10363 // Determine the residual right-shift amount.
10364 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10365
10366 // If the shift is not a no-op (in which case this should be just a sign
10367 // extend already), the truncated to type is legal, sign_extend is legal
10368 // on that type, and the truncate to that type is both legal and free,
10369 // perform the transform.
10370 if ((ShiftAmt > 0) &&
10373 TLI.isTruncateFree(VT, TruncVT)) {
10374 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
10376 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10377 N0.getOperand(0), Amt);
10378 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10379 Shift);
10380 return DAG.getNode(ISD::SIGN_EXTEND, DL,
10381 N->getValueType(0), Trunc);
10382 }
10383 }
10384 }
10385
10386 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10387 // sra (add (shl X, N1C), AddC), N1C -->
10388 // sext (add (trunc X to (width - N1C)), AddC')
10389 // sra (sub AddC, (shl X, N1C)), N1C -->
10390 // sext (sub AddC1',(trunc X to (width - N1C)))
10391 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10392 N0.hasOneUse()) {
10393 bool IsAdd = N0.getOpcode() == ISD::ADD;
10394 SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10395 if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10396 Shl.hasOneUse()) {
10397 // TODO: AddC does not need to be a splat.
10398 if (ConstantSDNode *AddC =
10399 isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10400 // Determine what the truncate's type would be and ask the target if
10401 // that is a free operation.
10402 LLVMContext &Ctx = *DAG.getContext();
10403 unsigned ShiftAmt = N1C->getZExtValue();
10404 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10405 if (VT.isVector())
10406 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10407
10408 // TODO: The simple type check probably belongs in the default hook
10409 // implementation and/or target-specific overrides (because
10410 // non-simple types likely require masking when legalized), but
10411 // that restriction may conflict with other transforms.
10412 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10413 TLI.isTruncateFree(VT, TruncVT)) {
10414 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10415 SDValue ShiftC =
10416 DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10417 TruncVT.getScalarSizeInBits()),
10418 DL, TruncVT);
10419 SDValue Add;
10420 if (IsAdd)
10421 Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10422 else
10423 Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10424 return DAG.getSExtOrTrunc(Add, DL, VT);
10425 }
10426 }
10427 }
10428 }
10429
10430 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10431 if (N1.getOpcode() == ISD::TRUNCATE &&
10432 N1.getOperand(0).getOpcode() == ISD::AND) {
10433 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10434 return DAG.getNode(ISD::SRA, DL, VT, N0, NewOp1);
10435 }
10436
10437 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10438 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10439 // if c1 is equal to the number of bits the trunc removes
10440 // TODO - support non-uniform vector shift amounts.
10441 if (N0.getOpcode() == ISD::TRUNCATE &&
10442 (N0.getOperand(0).getOpcode() == ISD::SRL ||
10443 N0.getOperand(0).getOpcode() == ISD::SRA) &&
10444 N0.getOperand(0).hasOneUse() &&
10445 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10446 SDValue N0Op0 = N0.getOperand(0);
10447 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10448 EVT LargeVT = N0Op0.getValueType();
10449 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10450 if (LargeShift->getAPIntValue() == TruncBits) {
10451 EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10452 SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10453 Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10454 DAG.getConstant(TruncBits, DL, LargeShiftVT));
10455 SDValue SRA =
10456 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10457 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10458 }
10459 }
10460 }
10461
10462 // Simplify, based on bits shifted out of the LHS.
10464 return SDValue(N, 0);
10465
10466 // If the sign bit is known to be zero, switch this to a SRL.
10467 if (DAG.SignBitIsZero(N0))
10468 return DAG.getNode(ISD::SRL, DL, VT, N0, N1);
10469
10470 if (N1C && !N1C->isOpaque())
10471 if (SDValue NewSRA = visitShiftByConstant(N))
10472 return NewSRA;
10473
10474 // Try to transform this shift into a multiply-high if
10475 // it matches the appropriate pattern detected in combineShiftToMULH.
10476 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10477 return MULH;
10478
10479 // Attempt to convert a sra of a load into a narrower sign-extending load.
10480 if (SDValue NarrowLoad = reduceLoadWidth(N))
10481 return NarrowLoad;
10482
10483 return SDValue();
10484}
10485
10486SDValue DAGCombiner::visitSRL(SDNode *N) {
10487 SDValue N0 = N->getOperand(0);
10488 SDValue N1 = N->getOperand(1);
10489 if (SDValue V = DAG.simplifyShift(N0, N1))
10490 return V;
10491
10492 SDLoc DL(N);
10493 EVT VT = N0.getValueType();
10494 EVT ShiftVT = N1.getValueType();
10495 unsigned OpSizeInBits = VT.getScalarSizeInBits();
10496
10497 // fold (srl c1, c2) -> c1 >>u c2
10498 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, DL, VT, {N0, N1}))
10499 return C;
10500
10501 // fold vector ops
10502 if (VT.isVector())
10503 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
10504 return FoldedVOp;
10505
10506 if (SDValue NewSel = foldBinOpIntoSelect(N))
10507 return NewSel;
10508
10509 // if (srl x, c) is known to be zero, return 0
10511 if (N1C &&
10512 DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10513 return DAG.getConstant(0, DL, VT);
10514
10515 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10516 if (N0.getOpcode() == ISD::SRL) {
10517 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10519 APInt c1 = LHS->getAPIntValue();
10520 APInt c2 = RHS->getAPIntValue();
10521 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10522 return (c1 + c2).uge(OpSizeInBits);
10523 };
10524 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10525 return DAG.getConstant(0, DL, VT);
10526
10527 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10529 APInt c1 = LHS->getAPIntValue();
10530 APInt c2 = RHS->getAPIntValue();
10531 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10532 return (c1 + c2).ult(OpSizeInBits);
10533 };
10534 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10535 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10536 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10537 }
10538 }
10539
10540 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10541 N0.getOperand(0).getOpcode() == ISD::SRL) {
10542 SDValue InnerShift = N0.getOperand(0);
10543 // TODO - support non-uniform vector shift amounts.
10544 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10545 uint64_t c1 = N001C->getZExtValue();
10546 uint64_t c2 = N1C->getZExtValue();
10547 EVT InnerShiftVT = InnerShift.getValueType();
10548 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10549 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10550 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10551 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10552 if (c1 + OpSizeInBits == InnerShiftSize) {
10553 if (c1 + c2 >= InnerShiftSize)
10554 return DAG.getConstant(0, DL, VT);
10555 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10556 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10557 InnerShift.getOperand(0), NewShiftAmt);
10558 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10559 }
10560 // In the more general case, we can clear the high bits after the shift:
10561 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10562 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10563 c1 + c2 < InnerShiftSize) {
10564 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10565 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10566 InnerShift.getOperand(0), NewShiftAmt);
10567 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10568 OpSizeInBits - c2),
10569 DL, InnerShiftVT);
10570 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10571 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10572 }
10573 }
10574 }
10575
10576 // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10577 // (and (srl x, (sub c2, c1), MASK)
10578 if (N0.getOpcode() == ISD::SHL &&
10579 (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10581 auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10583 const APInt &LHSC = LHS->getAPIntValue();
10584 const APInt &RHSC = RHS->getAPIntValue();
10585 return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10586 LHSC.getZExtValue() <= RHSC.getZExtValue();
10587 };
10588 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10589 /*AllowUndefs*/ false,
10590 /*AllowTypeMismatch*/ true)) {
10591 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10592 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10593 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10594 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10595 Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10596 SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10597 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10598 }
10599 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10600 /*AllowUndefs*/ false,
10601 /*AllowTypeMismatch*/ true)) {
10602 SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10603 SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10604 SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10605 Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10606 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10607 return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10608 }
10609 }
10610
10611 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10612 // TODO - support non-uniform vector shift amounts.
10613 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10614 // Shifting in all undef bits?
10615 EVT SmallVT = N0.getOperand(0).getValueType();
10616 unsigned BitSize = SmallVT.getScalarSizeInBits();
10617 if (N1C->getAPIntValue().uge(BitSize))
10618 return DAG.getUNDEF(VT);
10619
10620 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10621 uint64_t ShiftAmt = N1C->getZExtValue();
10622 SDLoc DL0(N0);
10623 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
10624 N0.getOperand(0),
10625 DAG.getConstant(ShiftAmt, DL0,
10626 getShiftAmountTy(SmallVT)));
10627 AddToWorklist(SmallShift.getNode());
10628 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10629 return DAG.getNode(ISD::AND, DL, VT,
10630 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10631 DAG.getConstant(Mask, DL, VT));
10632 }
10633 }
10634
10635 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
10636 // bit, which is unmodified by sra.
10637 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10638 if (N0.getOpcode() == ISD::SRA)
10639 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
10640 }
10641
10642 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
10643 // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10644 if (N1C && N0.getOpcode() == ISD::CTLZ &&
10645 isPowerOf2_32(OpSizeInBits) &&
10646 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10647 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10648
10649 // If any of the input bits are KnownOne, then the input couldn't be all
10650 // zeros, thus the result of the srl will always be zero.
10651 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10652
10653 // If all of the bits input the to ctlz node are known to be zero, then
10654 // the result of the ctlz is "32" and the result of the shift is one.
10655 APInt UnknownBits = ~Known.Zero;
10656 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10657
10658 // Otherwise, check to see if there is exactly one bit input to the ctlz.
10659 if (UnknownBits.isPowerOf2()) {
10660 // Okay, we know that only that the single bit specified by UnknownBits
10661 // could be set on input to the CTLZ node. If this bit is set, the SRL
10662 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10663 // to an SRL/XOR pair, which is likely to simplify more.
10664 unsigned ShAmt = UnknownBits.countr_zero();
10665 SDValue Op = N0.getOperand(0);
10666
10667 if (ShAmt) {
10668 SDLoc DL(N0);
10669 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10670 DAG.getConstant(ShAmt, DL,
10671 getShiftAmountTy(Op.getValueType())));
10672 AddToWorklist(Op.getNode());
10673 }
10674 return DAG.getNode(ISD::XOR, DL, VT, Op, DAG.getConstant(1, DL, VT));
10675 }
10676 }
10677
10678 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10679 if (N1.getOpcode() == ISD::TRUNCATE &&
10680 N1.getOperand(0).getOpcode() == ISD::AND) {
10681 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10682 return DAG.getNode(ISD::SRL, DL, VT, N0, NewOp1);
10683 }
10684
10685 // fold operands of srl based on knowledge that the low bits are not
10686 // demanded.
10688 return SDValue(N, 0);
10689
10690 if (N1C && !N1C->isOpaque())
10691 if (SDValue NewSRL = visitShiftByConstant(N))
10692 return NewSRL;
10693
10694 // Attempt to convert a srl of a load into a narrower zero-extending load.
10695 if (SDValue NarrowLoad = reduceLoadWidth(N))
10696 return NarrowLoad;
10697
10698 // Here is a common situation. We want to optimize:
10699 //
10700 // %a = ...
10701 // %b = and i32 %a, 2
10702 // %c = srl i32 %b, 1
10703 // brcond i32 %c ...
10704 //
10705 // into
10706 //
10707 // %a = ...
10708 // %b = and %a, 2
10709 // %c = setcc eq %b, 0
10710 // brcond %c ...
10711 //
10712 // However when after the source operand of SRL is optimized into AND, the SRL
10713 // itself may not be optimized further. Look for it and add the BRCOND into
10714 // the worklist.
10715 //
10716 // The also tends to happen for binary operations when SimplifyDemandedBits
10717 // is involved.
10718 //
10719 // FIXME: This is unecessary if we process the DAG in topological order,
10720 // which we plan to do. This workaround can be removed once the DAG is
10721 // processed in topological order.
10722 if (N->hasOneUse()) {
10723 SDNode *Use = *N->use_begin();
10724
10725 // Look pass the truncate.
10726 if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10727 Use = *Use->use_begin();
10728
10729 if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10730 Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10731 AddToWorklist(Use);
10732 }
10733
10734 // Try to transform this shift into a multiply-high if
10735 // it matches the appropriate pattern detected in combineShiftToMULH.
10736 if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
10737 return MULH;
10738
10739 return SDValue();
10740}
10741
10742SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10743 EVT VT = N->getValueType(0);
10744 SDValue N0 = N->getOperand(0);
10745 SDValue N1 = N->getOperand(1);
10746 SDValue N2 = N->getOperand(2);
10747 bool IsFSHL = N->getOpcode() == ISD::FSHL;
10748 unsigned BitWidth = VT.getScalarSizeInBits();
10749
10750 // fold (fshl N0, N1, 0) -> N0
10751 // fold (fshr N0, N1, 0) -> N1
10753 if (DAG.MaskedValueIsZero(
10754 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10755 return IsFSHL ? N0 : N1;
10756
10757 auto IsUndefOrZero = [](SDValue V) {
10758 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10759 };
10760
10761 // TODO - support non-uniform vector shift amounts.
10762 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10763 EVT ShAmtTy = N2.getValueType();
10764
10765 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10766 if (Cst->getAPIntValue().uge(BitWidth)) {
10767 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10768 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
10769 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
10770 }
10771
10772 unsigned ShAmt = Cst->getZExtValue();
10773 if (ShAmt == 0)
10774 return IsFSHL ? N0 : N1;
10775
10776 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10777 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10778 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10779 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10780 if (IsUndefOrZero(N0))
10781 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
10782 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
10783 SDLoc(N), ShAmtTy));
10784 if (IsUndefOrZero(N1))
10785 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
10786 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
10787 SDLoc(N), ShAmtTy));
10788
10789 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10790 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10791 // TODO - bigendian support once we have test coverage.
10792 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10793 // TODO - permit LHS EXTLOAD if extensions are shifted out.
10794 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10795 !DAG.getDataLayout().isBigEndian()) {
10796 auto *LHS = dyn_cast<LoadSDNode>(N0);
10797 auto *RHS = dyn_cast<LoadSDNode>(N1);
10798 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10799 LHS->getAddressSpace() == RHS->getAddressSpace() &&
10800 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10801 ISD::isNON_EXTLoad(LHS)) {
10802 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10803 SDLoc DL(RHS);
10804 uint64_t PtrOff =
10805 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10806 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10807 unsigned Fast = 0;
10808 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10809 RHS->getAddressSpace(), NewAlign,
10810 RHS->getMemOperand()->getFlags(), &Fast) &&
10811 Fast) {
10812 SDValue NewPtr = DAG.getMemBasePlusOffset(
10813 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
10814 AddToWorklist(NewPtr.getNode());
10815 SDValue Load = DAG.getLoad(
10816 VT, DL, RHS->getChain(), NewPtr,
10817 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10818 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10819 // Replace the old load's chain with the new load's chain.
10820 WorklistRemover DeadNodes(*this);
10821 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10822 return Load;
10823 }
10824 }
10825 }
10826 }
10827 }
10828
10829 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10830 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10831 // iff We know the shift amount is in range.
10832 // TODO: when is it worth doing SUB(BW, N2) as well?
10833 if (isPowerOf2_32(BitWidth)) {
10834 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10835 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10836 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
10837 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10838 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
10839 }
10840
10841 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10842 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10843 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
10844 // is legal as well we might be better off avoiding non-constant (BW - N2).
10845 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10846 if (N0 == N1 && hasOperation(RotOpc, VT))
10847 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
10848
10849 // Simplify, based on bits shifted out of N0/N1.
10851 return SDValue(N, 0);
10852
10853 return SDValue();
10854}
10855
10856SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10857 SDValue N0 = N->getOperand(0);
10858 SDValue N1 = N->getOperand(1);
10859 if (SDValue V = DAG.simplifyShift(N0, N1))
10860 return V;
10861
10862 SDLoc DL(N);
10863 EVT VT = N0.getValueType();
10864
10865 // fold (*shlsat c1, c2) -> c1<<c2
10866 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
10867 return C;
10868
10870
10871 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10872 // fold (sshlsat x, c) -> (shl x, c)
10873 if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10874 N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10875 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10876
10877 // fold (ushlsat x, c) -> (shl x, c)
10878 if (N->getOpcode() == ISD::USHLSAT && N1C &&
10879 N1C->getAPIntValue().ule(
10881 return DAG.getNode(ISD::SHL, DL, VT, N0, N1);
10882 }
10883
10884 return SDValue();
10885}
10886
10887// Given a ABS node, detect the following patterns:
10888// (ABS (SUB (EXTEND a), (EXTEND b))).
10889// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10890// Generates UABD/SABD instruction.
10891SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
10892 EVT SrcVT = N->getValueType(0);
10893
10894 if (N->getOpcode() == ISD::TRUNCATE)
10895 N = N->getOperand(0).getNode();
10896
10897 if (N->getOpcode() != ISD::ABS)
10898 return SDValue();
10899
10900 EVT VT = N->getValueType(0);
10901 SDValue AbsOp1 = N->getOperand(0);
10902 SDValue Op0, Op1;
10903
10904 if (AbsOp1.getOpcode() != ISD::SUB)
10905 return SDValue();
10906
10907 Op0 = AbsOp1.getOperand(0);
10908 Op1 = AbsOp1.getOperand(1);
10909
10910 unsigned Opc0 = Op0.getOpcode();
10911
10912 // Check if the operands of the sub are (zero|sign)-extended.
10913 // TODO: Should we use ValueTracking instead?
10914 if (Opc0 != Op1.getOpcode() ||
10915 (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
10916 Opc0 != ISD::SIGN_EXTEND_INREG)) {
10917 // fold (abs (sub nsw x, y)) -> abds(x, y)
10918 if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10919 TLI.preferABDSToABSWithNSW(VT)) {
10920 SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10921 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10922 }
10923 return SDValue();
10924 }
10925
10926 EVT VT0, VT1;
10927 if (Opc0 == ISD::SIGN_EXTEND_INREG) {
10928 VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
10929 VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
10930 } else {
10931 VT0 = Op0.getOperand(0).getValueType();
10932 VT1 = Op1.getOperand(0).getValueType();
10933 }
10934 unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
10935
10936 // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10937 // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10938 EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
10939 if ((VT0 == MaxVT || Op0->hasOneUse()) &&
10940 (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
10941 SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
10942 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
10943 DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
10944 ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
10945 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10946 }
10947
10948 // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
10949 // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
10950 if (hasOperation(ABDOpcode, VT)) {
10951 SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
10952 return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10953 }
10954
10955 return SDValue();
10956}
10957
10958SDValue DAGCombiner::visitABS(SDNode *N) {
10959 SDValue N0 = N->getOperand(0);
10960 EVT VT = N->getValueType(0);
10961 SDLoc DL(N);
10962
10963 // fold (abs c1) -> c2
10964 if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, DL, VT, {N0}))
10965 return C;
10966 // fold (abs (abs x)) -> (abs x)
10967 if (N0.getOpcode() == ISD::ABS)
10968 return N0;
10969 // fold (abs x) -> x iff not-negative
10970 if (DAG.SignBitIsZero(N0))
10971 return N0;
10972
10973 if (SDValue ABD = foldABSToABD(N, DL))
10974 return ABD;
10975
10976 // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
10977 // iff zero_extend/truncate are free.
10978 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
10979 EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
10980 if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
10981 TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
10982 hasOperation(ISD::ABS, ExtVT)) {
10983 return DAG.getNode(
10984 ISD::ZERO_EXTEND, DL, VT,
10985 DAG.getNode(ISD::ABS, DL, ExtVT,
10986 DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
10987 }
10988 }
10989
10990 return SDValue();
10991}
10992
10993SDValue DAGCombiner::visitBSWAP(SDNode *N) {
10994 SDValue N0 = N->getOperand(0);
10995 EVT VT = N->getValueType(0);
10996 SDLoc DL(N);
10997
10998 // fold (bswap c1) -> c2
10999 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11000 return C;
11001 // fold (bswap (bswap x)) -> x
11002 if (N0.getOpcode() == ISD::BSWAP)
11003 return N0.getOperand(0);
11004
11005 // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11006 // isn't supported, it will be expanded to bswap followed by a manual reversal
11007 // of bits in each byte. By placing bswaps before bitreverse, we can remove
11008 // the two bswaps if the bitreverse gets expanded.
11009 if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11010 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11011 return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11012 }
11013
11014 // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11015 // iff x >= bw/2 (i.e. lower half is known zero)
11016 unsigned BW = VT.getScalarSizeInBits();
11017 if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11018 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11019 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11020 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11021 ShAmt->getZExtValue() >= (BW / 2) &&
11022 (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11023 TLI.isTruncateFree(VT, HalfVT) &&
11024 (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11025 SDValue Res = N0.getOperand(0);
11026 if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11027 Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11028 DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
11029 Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11030 Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11031 return DAG.getZExtOrTrunc(Res, DL, VT);
11032 }
11033 }
11034
11035 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11036 // inverse-shift-of-bswap:
11037 // bswap (X u<< C) --> (bswap X) u>> C
11038 // bswap (X u>> C) --> (bswap X) u<< C
11039 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11040 N0.hasOneUse()) {
11041 auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11042 if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11043 ShAmt->getZExtValue() % 8 == 0) {
11044 SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11045 unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11046 return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11047 }
11048 }
11049
11050 if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11051 return V;
11052
11053 return SDValue();
11054}
11055
11056SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11057 SDValue N0 = N->getOperand(0);
11058 EVT VT = N->getValueType(0);
11059 SDLoc DL(N);
11060
11061 // fold (bitreverse c1) -> c2
11062 if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11063 return C;
11064
11065 // fold (bitreverse (bitreverse x)) -> x
11066 if (N0.getOpcode() == ISD::BITREVERSE)
11067 return N0.getOperand(0);
11068
11069 SDValue X, Y;
11070
11071 // fold (bitreverse (lshr (bitreverse x), y)) -> (shl x, y)
11072 if ((!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11074 return DAG.getNode(ISD::SHL, DL, VT, X, Y);
11075
11076 // fold (bitreverse (shl (bitreverse x), y)) -> (lshr x, y)
11077 if ((!LegalOperations || TLI.isOperationLegal(ISD::SRL, VT)) &&
11079 return DAG.getNode(ISD::SRL, DL, VT, X, Y);
11080
11081 return SDValue();
11082}
11083
11084SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11085 SDValue N0 = N->getOperand(0);
11086 EVT VT = N->getValueType(0);
11087 SDLoc DL(N);
11088
11089 // fold (ctlz c1) -> c2
11090 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11091 return C;
11092
11093 // If the value is known never to be zero, switch to the undef version.
11094 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11095 if (DAG.isKnownNeverZero(N0))
11096 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11097
11098 return SDValue();
11099}
11100
11101SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11102 SDValue N0 = N->getOperand(0);
11103 EVT VT = N->getValueType(0);
11104 SDLoc DL(N);
11105
11106 // fold (ctlz_zero_undef c1) -> c2
11107 if (SDValue C =
11109 return C;
11110 return SDValue();
11111}
11112
11113SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11114 SDValue N0 = N->getOperand(0);
11115 EVT VT = N->getValueType(0);
11116 SDLoc DL(N);
11117
11118 // fold (cttz c1) -> c2
11119 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11120 return C;
11121
11122 // If the value is known never to be zero, switch to the undef version.
11123 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11124 if (DAG.isKnownNeverZero(N0))
11125 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11126
11127 return SDValue();
11128}
11129
11130SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11131 SDValue N0 = N->getOperand(0);
11132 EVT VT = N->getValueType(0);
11133 SDLoc DL(N);
11134
11135 // fold (cttz_zero_undef c1) -> c2
11136 if (SDValue C =
11138 return C;
11139 return SDValue();
11140}
11141
11142SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11143 SDValue N0 = N->getOperand(0);
11144 EVT VT = N->getValueType(0);
11145 unsigned NumBits = VT.getScalarSizeInBits();
11146 SDLoc DL(N);
11147
11148 // fold (ctpop c1) -> c2
11149 if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11150 return C;
11151
11152 // If the source is being shifted, but doesn't affect any active bits,
11153 // then we can call CTPOP on the shift source directly.
11154 if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SHL) {
11155 if (ConstantSDNode *AmtC = isConstOrConstSplat(N0.getOperand(1))) {
11156 const APInt &Amt = AmtC->getAPIntValue();
11157 if (Amt.ult(NumBits)) {
11158 KnownBits KnownSrc = DAG.computeKnownBits(N0.getOperand(0));
11159 if ((N0.getOpcode() == ISD::SRL &&
11160 Amt.ule(KnownSrc.countMinTrailingZeros())) ||
11161 (N0.getOpcode() == ISD::SHL &&
11162 Amt.ule(KnownSrc.countMinLeadingZeros()))) {
11163 return DAG.getNode(ISD::CTPOP, DL, VT, N0.getOperand(0));
11164 }
11165 }
11166 }
11167 }
11168
11169 // If the upper bits are known to be zero, then see if its profitable to
11170 // only count the lower bits.
11171 if (VT.isScalarInteger() && NumBits > 8 && (NumBits & 1) == 0) {
11172 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), NumBits / 2);
11173 if (hasOperation(ISD::CTPOP, HalfVT) &&
11174 TLI.isTypeDesirableForOp(ISD::CTPOP, HalfVT) &&
11175 TLI.isTruncateFree(N0, HalfVT) && TLI.isZExtFree(HalfVT, VT)) {
11176 APInt UpperBits = APInt::getHighBitsSet(NumBits, NumBits / 2);
11177 if (DAG.MaskedValueIsZero(N0, UpperBits)) {
11178 SDValue PopCnt = DAG.getNode(ISD::CTPOP, DL, HalfVT,
11179 DAG.getZExtOrTrunc(N0, DL, HalfVT));
11180 return DAG.getZExtOrTrunc(PopCnt, DL, VT);
11181 }
11182 }
11183 }
11184
11185 return SDValue();
11186}
11187
11188// FIXME: This should be checking for no signed zeros on individual operands, as
11189// well as no nans.
11191 SDValue RHS,
11192 const TargetLowering &TLI) {
11193 const TargetOptions &Options = DAG.getTarget().Options;
11194 EVT VT = LHS.getValueType();
11195
11196 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
11199}
11200
11202 SDValue RHS, SDValue True, SDValue False,
11204 const TargetLowering &TLI,
11205 SelectionDAG &DAG) {
11206 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11207 switch (CC) {
11208 case ISD::SETOLT:
11209 case ISD::SETOLE:
11210 case ISD::SETLT:
11211 case ISD::SETLE:
11212 case ISD::SETULT:
11213 case ISD::SETULE: {
11214 // Since it's known never nan to get here already, either fminnum or
11215 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11216 // expanded in terms of it.
11217 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11218 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11219 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11220
11221 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11222 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11223 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11224 return SDValue();
11225 }
11226 case ISD::SETOGT:
11227 case ISD::SETOGE:
11228 case ISD::SETGT:
11229 case ISD::SETGE:
11230 case ISD::SETUGT:
11231 case ISD::SETUGE: {
11232 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11233 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11234 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11235
11236 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11237 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11238 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11239 return SDValue();
11240 }
11241 default:
11242 return SDValue();
11243 }
11244}
11245
11246/// Generate Min/Max node
11247SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11248 SDValue RHS, SDValue True,
11249 SDValue False, ISD::CondCode CC) {
11250 if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11251 return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11252
11253 // If we can't directly match this, try to see if we can pull an fneg out of
11254 // the select.
11256 True, DAG, LegalOperations, ForCodeSize);
11257 if (!NegTrue)
11258 return SDValue();
11259
11260 HandleSDNode NegTrueHandle(NegTrue);
11261
11262 // Try to unfold an fneg from the select if we are comparing the negated
11263 // constant.
11264 //
11265 // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11266 //
11267 // TODO: Handle fabs
11268 if (LHS == NegTrue) {
11269 // If we can't directly match this, try to see if we can pull an fneg out of
11270 // the select.
11272 RHS, DAG, LegalOperations, ForCodeSize);
11273 if (NegRHS) {
11274 HandleSDNode NegRHSHandle(NegRHS);
11275 if (NegRHS == False) {
11276 SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11277 False, CC, TLI, DAG);
11278 if (Combined)
11279 return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11280 }
11281 }
11282 }
11283
11284 return SDValue();
11285}
11286
11287/// If a (v)select has a condition value that is a sign-bit test, try to smear
11288/// the condition operand sign-bit across the value width and use it as a mask.
11290 SelectionDAG &DAG) {
11291 SDValue Cond = N->getOperand(0);
11292 SDValue C1 = N->getOperand(1);
11293 SDValue C2 = N->getOperand(2);
11295 return SDValue();
11296
11297 EVT VT = N->getValueType(0);
11298 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11299 VT != Cond.getOperand(0).getValueType())
11300 return SDValue();
11301
11302 // The inverted-condition + commuted-select variants of these patterns are
11303 // canonicalized to these forms in IR.
11304 SDValue X = Cond.getOperand(0);
11305 SDValue CondC = Cond.getOperand(1);
11306 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11307 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11309 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11310 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11311 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11312 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11313 }
11314 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11315 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11316 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11317 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11318 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11319 }
11320 return SDValue();
11321}
11322
11324 const TargetLowering &TLI) {
11325 if (!TLI.convertSelectOfConstantsToMath(VT))
11326 return false;
11327
11328 if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11329 return true;
11331 return true;
11332
11333 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11334 if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11335 return true;
11336 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11337 return true;
11338
11339 return false;
11340}
11341
11342SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11343 SDValue Cond = N->getOperand(0);
11344 SDValue N1 = N->getOperand(1);
11345 SDValue N2 = N->getOperand(2);
11346 EVT VT = N->getValueType(0);
11347 EVT CondVT = Cond.getValueType();
11348 SDLoc DL(N);
11349
11350 if (!VT.isInteger())
11351 return SDValue();
11352
11353 auto *C1 = dyn_cast<ConstantSDNode>(N1);
11354 auto *C2 = dyn_cast<ConstantSDNode>(N2);
11355 if (!C1 || !C2)
11356 return SDValue();
11357
11358 if (CondVT != MVT::i1 || LegalOperations) {
11359 // fold (select Cond, 0, 1) -> (xor Cond, 1)
11360 // We can't do this reliably if integer based booleans have different contents
11361 // to floating point based booleans. This is because we can't tell whether we
11362 // have an integer-based boolean or a floating-point-based boolean unless we
11363 // can find the SETCC that produced it and inspect its operands. This is
11364 // fairly easy if C is the SETCC node, but it can potentially be
11365 // undiscoverable (or not reasonably discoverable). For example, it could be
11366 // in another basic block or it could require searching a complicated
11367 // expression.
11368 if (CondVT.isInteger() &&
11369 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11371 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11373 C1->isZero() && C2->isOne()) {
11374 SDValue NotCond =
11375 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11376 if (VT.bitsEq(CondVT))
11377 return NotCond;
11378 return DAG.getZExtOrTrunc(NotCond, DL, VT);
11379 }
11380
11381 return SDValue();
11382 }
11383
11384 // Only do this before legalization to avoid conflicting with target-specific
11385 // transforms in the other direction (create a select from a zext/sext). There
11386 // is also a target-independent combine here in DAGCombiner in the other
11387 // direction for (select Cond, -1, 0) when the condition is not i1.
11388 assert(CondVT == MVT::i1 && !LegalOperations);
11389
11390 // select Cond, 1, 0 --> zext (Cond)
11391 if (C1->isOne() && C2->isZero())
11392 return DAG.getZExtOrTrunc(Cond, DL, VT);
11393
11394 // select Cond, -1, 0 --> sext (Cond)
11395 if (C1->isAllOnes() && C2->isZero())
11396 return DAG.getSExtOrTrunc(Cond, DL, VT);
11397
11398 // select Cond, 0, 1 --> zext (!Cond)
11399 if (C1->isZero() && C2->isOne()) {
11400 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11401 NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11402 return NotCond;
11403 }
11404
11405 // select Cond, 0, -1 --> sext (!Cond)
11406 if (C1->isZero() && C2->isAllOnes()) {
11407 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11408 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11409 return NotCond;
11410 }
11411
11412 // Use a target hook because some targets may prefer to transform in the
11413 // other direction.
11415 return SDValue();
11416
11417 // For any constants that differ by 1, we can transform the select into
11418 // an extend and add.
11419 const APInt &C1Val = C1->getAPIntValue();
11420 const APInt &C2Val = C2->getAPIntValue();
11421
11422 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11423 if (C1Val - 1 == C2Val) {
11424 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11425 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11426 }
11427
11428 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11429 if (C1Val + 1 == C2Val) {
11430 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11431 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11432 }
11433
11434 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11435 if (C1Val.isPowerOf2() && C2Val.isZero()) {
11436 Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11437 SDValue ShAmtC =
11438 DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11439 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11440 }
11441
11442 // select Cond, -1, C --> or (sext Cond), C
11443 if (C1->isAllOnes()) {
11444 Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11445 return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11446 }
11447
11448 // select Cond, C, -1 --> or (sext (not Cond)), C
11449 if (C2->isAllOnes()) {
11450 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11451 NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11452 return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11453 }
11454
11456 return V;
11457
11458 return SDValue();
11459}
11460
11461template <class MatchContextClass>
11463 assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
11464 N->getOpcode() == ISD::VP_SELECT) &&
11465 "Expected a (v)(vp.)select");
11466 SDValue Cond = N->getOperand(0);
11467 SDValue T = N->getOperand(1), F = N->getOperand(2);
11468 EVT VT = N->getValueType(0);
11469 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11470 MatchContextClass matcher(DAG, TLI, N);
11471
11472 if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11473 return SDValue();
11474
11475 // select Cond, Cond, F --> or Cond, F
11476 // select Cond, 1, F --> or Cond, F
11477 if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11478 return matcher.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11479
11480 // select Cond, T, Cond --> and Cond, T
11481 // select Cond, T, 0 --> and Cond, T
11482 if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11483 return matcher.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11484
11485 // select Cond, T, 1 --> or (not Cond), T
11486 if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11487 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11488 DAG.getAllOnesConstant(SDLoc(N), VT));
11489 return matcher.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11490 }
11491
11492 // select Cond, 0, F --> and (not Cond), F
11493 if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11494 SDValue NotCond = matcher.getNode(ISD::XOR, SDLoc(N), VT, Cond,
11495 DAG.getAllOnesConstant(SDLoc(N), VT));
11496 return matcher.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11497 }
11498
11499 return SDValue();
11500}
11501
11503 SDValue N0 = N->getOperand(0);
11504 SDValue N1 = N->getOperand(1);
11505 SDValue N2 = N->getOperand(2);
11506 EVT VT = N->getValueType(0);
11507 if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
11508 return SDValue();
11509
11510 SDValue Cond0 = N0.getOperand(0);
11511 SDValue Cond1 = N0.getOperand(1);
11512 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11513 if (VT != Cond0.getValueType())
11514 return SDValue();
11515
11516 // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11517 // compare is inverted from that pattern ("Cond0 s> -1").
11518 if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11519 ; // This is the pattern we are looking for.
11520 else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11521 std::swap(N1, N2);
11522 else
11523 return SDValue();
11524
11525 // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
11526 if (isNullOrNullSplat(N2)) {
11527 SDLoc DL(N);
11528 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11529 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11530 return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11531 }
11532
11533 // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
11534 if (isAllOnesOrAllOnesSplat(N1)) {
11535 SDLoc DL(N);
11536 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11537 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11538 return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11539 }
11540
11541 // If we have to invert the sign bit mask, only do that transform if the
11542 // target has a bitwise 'and not' instruction (the invert is free).
11543 // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
11544 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11545 if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11546 SDLoc DL(N);
11547 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11548 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11549 SDValue Not = DAG.getNOT(DL, Sra, VT);
11550 return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11551 }
11552
11553 // TODO: There's another pattern in this family, but it may require
11554 // implementing hasOrNot() to check for profitability:
11555 // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
11556
11557 return SDValue();
11558}
11559
11560SDValue DAGCombiner::visitSELECT(SDNode *N) {
11561 SDValue N0 = N->getOperand(0);
11562 SDValue N1 = N->getOperand(1);
11563 SDValue N2 = N->getOperand(2);
11564 EVT VT = N->getValueType(0);
11565 EVT VT0 = N0.getValueType();
11566 SDLoc DL(N);
11567 SDNodeFlags Flags = N->getFlags();
11568
11569 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11570 return V;
11571
11572 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
11573 return V;
11574
11575 // select (not Cond), N1, N2 -> select Cond, N2, N1
11576 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11577 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11578 SelectOp->setFlags(Flags);
11579 return SelectOp;
11580 }
11581
11582 if (SDValue V = foldSelectOfConstants(N))
11583 return V;
11584
11585 // If we can fold this based on the true/false value, do so.
11586 if (SimplifySelectOps(N, N1, N2))
11587 return SDValue(N, 0); // Don't revisit N.
11588
11589 if (VT0 == MVT::i1) {
11590 // The code in this block deals with the following 2 equivalences:
11591 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11592 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11593 // The target can specify its preferred form with the
11594 // shouldNormalizeToSelectSequence() callback. However we always transform
11595 // to the right anyway if we find the inner select exists in the DAG anyway
11596 // and we always transform to the left side if we know that we can further
11597 // optimize the combination of the conditions.
11598 bool normalizeToSequence =
11600 // select (and Cond0, Cond1), X, Y
11601 // -> select Cond0, (select Cond1, X, Y), Y
11602 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11603 SDValue Cond0 = N0->getOperand(0);
11604 SDValue Cond1 = N0->getOperand(1);
11605 SDValue InnerSelect =
11606 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11607 if (normalizeToSequence || !InnerSelect.use_empty())
11608 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11609 InnerSelect, N2, Flags);
11610 // Cleanup on failure.
11611 if (InnerSelect.use_empty())
11612 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11613 }
11614 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11615 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11616 SDValue Cond0 = N0->getOperand(0);
11617 SDValue Cond1 = N0->getOperand(1);
11618 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11619 Cond1, N1, N2, Flags);
11620 if (normalizeToSequence || !InnerSelect.use_empty())
11621 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11622 InnerSelect, Flags);
11623 // Cleanup on failure.
11624 if (InnerSelect.use_empty())
11625 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11626 }
11627
11628 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11629 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11630 SDValue N1_0 = N1->getOperand(0);
11631 SDValue N1_1 = N1->getOperand(1);
11632 SDValue N1_2 = N1->getOperand(2);
11633 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11634 // Create the actual and node if we can generate good code for it.
11635 if (!normalizeToSequence) {
11636 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11637 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11638 N2, Flags);
11639 }
11640 // Otherwise see if we can optimize the "and" to a better pattern.
11641 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11642 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11643 N2, Flags);
11644 }
11645 }
11646 }
11647 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11648 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11649 SDValue N2_0 = N2->getOperand(0);
11650 SDValue N2_1 = N2->getOperand(1);
11651 SDValue N2_2 = N2->getOperand(2);
11652 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11653 // Create the actual or node if we can generate good code for it.
11654 if (!normalizeToSequence) {
11655 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11656 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11657 N2_2, Flags);
11658 }
11659 // Otherwise see if we can optimize to a better pattern.
11660 if (SDValue Combined = visitORLike(N0, N2_0, DL))
11661 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11662 N2_2, Flags);
11663 }
11664 }
11665 }
11666
11667 // Fold selects based on a setcc into other things, such as min/max/abs.
11668 if (N0.getOpcode() == ISD::SETCC) {
11669 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11670 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11671
11672 // select (fcmp lt x, y), x, y -> fminnum x, y
11673 // select (fcmp gt x, y), x, y -> fmaxnum x, y
11674 //
11675 // This is OK if we don't care what happens if either operand is a NaN.
11676 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
11677 if (SDValue FMinMax =
11678 combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
11679 return FMinMax;
11680
11681 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
11682 // This is conservatively limited to pre-legal-operations to give targets
11683 // a chance to reverse the transform if they want to do that. Also, it is
11684 // unlikely that the pattern would be formed late, so it's probably not
11685 // worth going through the other checks.
11686 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
11687 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
11688 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
11689 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
11690 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
11691 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
11692 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
11693 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
11694 //
11695 // The IR equivalent of this transform would have this form:
11696 // %a = add %x, C
11697 // %c = icmp ugt %x, ~C
11698 // %r = select %c, -1, %a
11699 // =>
11700 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
11701 // %u0 = extractvalue %u, 0
11702 // %u1 = extractvalue %u, 1
11703 // %r = select %u1, -1, %u0
11704 SDVTList VTs = DAG.getVTList(VT, VT0);
11705 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
11706 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
11707 }
11708 }
11709
11710 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11711 (!LegalOperations &&
11713 // Any flags available in a select/setcc fold will be on the setcc as they
11714 // migrated from fcmp
11715 Flags = N0->getFlags();
11716 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11717 N2, N0.getOperand(2));
11718 SelectNode->setFlags(Flags);
11719 return SelectNode;
11720 }
11721
11722 if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11723 return NewSel;
11724 }
11725
11726 if (!VT.isVector())
11727 if (SDValue BinOp = foldSelectOfBinops(N))
11728 return BinOp;
11729
11730 if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
11731 return R;
11732
11733 return SDValue();
11734}
11735
11736// This function assumes all the vselect's arguments are CONCAT_VECTOR
11737// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11739 SDLoc DL(N);
11740 SDValue Cond = N->getOperand(0);
11741 SDValue LHS = N->getOperand(1);
11742 SDValue RHS = N->getOperand(2);
11743 EVT VT = N->getValueType(0);
11744 int NumElems = VT.getVectorNumElements();
11745 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
11746 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
11747 Cond.getOpcode() == ISD::BUILD_VECTOR);
11748
11749 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11750 // binary ones here.
11751 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11752 return SDValue();
11753
11754 // We're sure we have an even number of elements due to the
11755 // concat_vectors we have as arguments to vselect.
11756 // Skip BV elements until we find one that's not an UNDEF
11757 // After we find an UNDEF element, keep looping until we get to half the
11758 // length of the BV and see if all the non-undef nodes are the same.
11759 ConstantSDNode *BottomHalf = nullptr;
11760 for (int i = 0; i < NumElems / 2; ++i) {
11761 if (Cond->getOperand(i)->isUndef())
11762 continue;
11763
11764 if (BottomHalf == nullptr)
11765 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11766 else if (Cond->getOperand(i).getNode() != BottomHalf)
11767 return SDValue();
11768 }
11769
11770 // Do the same for the second half of the BuildVector
11771 ConstantSDNode *TopHalf = nullptr;
11772 for (int i = NumElems / 2; i < NumElems; ++i) {
11773 if (Cond->getOperand(i)->isUndef())
11774 continue;
11775
11776 if (TopHalf == nullptr)
11777 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11778 else if (Cond->getOperand(i).getNode() != TopHalf)
11779 return SDValue();
11780 }
11781
11782 assert(TopHalf && BottomHalf &&
11783 "One half of the selector was all UNDEFs and the other was all the "
11784 "same value. This should have been addressed before this function.");
11785 return DAG.getNode(
11787 BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11788 TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11789}
11790
11791bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11792 SelectionDAG &DAG, const SDLoc &DL) {
11793
11794 // Only perform the transformation when existing operands can be reused.
11795 if (IndexIsScaled)
11796 return false;
11797
11798 if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11799 return false;
11800
11801 EVT VT = BasePtr.getValueType();
11802
11803 if (SDValue SplatVal = DAG.getSplatValue(Index);
11804 SplatVal && !isNullConstant(SplatVal) &&
11805 SplatVal.getValueType() == VT) {
11806 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11807 Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
11808 return true;
11809 }
11810
11811 if (Index.getOpcode() != ISD::ADD)
11812 return false;
11813
11814 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11815 SplatVal && SplatVal.getValueType() == VT) {
11816 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11817 Index = Index.getOperand(1);
11818 return true;
11819 }
11820 if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11821 SplatVal && SplatVal.getValueType() == VT) {
11822 BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11823 Index = Index.getOperand(0);
11824 return true;
11825 }
11826 return false;
11827}
11828
11829// Fold sext/zext of index into index type.
11831 SelectionDAG &DAG) {
11832 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11833
11834 // It's always safe to look through zero extends.
11835 if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11836 if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11837 IndexType = ISD::UNSIGNED_SCALED;
11838 Index = Index.getOperand(0);
11839 return true;
11840 }
11841 if (ISD::isIndexTypeSigned(IndexType)) {
11842 IndexType = ISD::UNSIGNED_SCALED;
11843 return true;
11844 }
11845 }
11846
11847 // It's only safe to look through sign extends when Index is signed.
11848 if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11849 ISD::isIndexTypeSigned(IndexType) &&
11850 TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11851 Index = Index.getOperand(0);
11852 return true;
11853 }
11854
11855 return false;
11856}
11857
11858SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11859 VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11860 SDValue Mask = MSC->getMask();
11861 SDValue Chain = MSC->getChain();
11862 SDValue Index = MSC->getIndex();
11863 SDValue Scale = MSC->getScale();
11864 SDValue StoreVal = MSC->getValue();
11865 SDValue BasePtr = MSC->getBasePtr();
11866 SDValue VL = MSC->getVectorLength();
11867 ISD::MemIndexType IndexType = MSC->getIndexType();
11868 SDLoc DL(N);
11869
11870 // Zap scatters with a zero mask.
11872 return Chain;
11873
11874 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11875 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11876 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11877 DL, Ops, MSC->getMemOperand(), IndexType);
11878 }
11879
11880 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11881 SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11882 return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11883 DL, Ops, MSC->getMemOperand(), IndexType);
11884 }
11885
11886 return SDValue();
11887}
11888
11889SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11890 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11891 SDValue Mask = MSC->getMask();
11892 SDValue Chain = MSC->getChain();
11893 SDValue Index = MSC->getIndex();
11894 SDValue Scale = MSC->getScale();
11895 SDValue StoreVal = MSC->getValue();
11896 SDValue BasePtr = MSC->getBasePtr();
11897 ISD::MemIndexType IndexType = MSC->getIndexType();
11898 SDLoc DL(N);
11899
11900 // Zap scatters with a zero mask.
11902 return Chain;
11903
11904 if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11905 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11906 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11907 DL, Ops, MSC->getMemOperand(), IndexType,
11908 MSC->isTruncatingStore());
11909 }
11910
11911 if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11912 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11913 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11914 DL, Ops, MSC->getMemOperand(), IndexType,
11915 MSC->isTruncatingStore());
11916 }
11917
11918 return SDValue();
11919}
11920
11921SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11922 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11923 SDValue Mask = MST->getMask();
11924 SDValue Chain = MST->getChain();
11925 SDValue Value = MST->getValue();
11926 SDValue Ptr = MST->getBasePtr();
11927 SDLoc DL(N);
11928
11929 // Zap masked stores with a zero mask.
11931 return Chain;
11932
11933 // Remove a masked store if base pointers and masks are equal.
11934 if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
11935 if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
11936 MST1->isSimple() && MST1->getBasePtr() == Ptr &&
11937 !MST->getBasePtr().isUndef() &&
11938 ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
11939 MST1->getMemoryVT().getStoreSize()) ||
11941 TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
11942 MST->getMemoryVT().getStoreSize())) {
11943 CombineTo(MST1, MST1->getChain());
11944 if (N->getOpcode() != ISD::DELETED_NODE)
11945 AddToWorklist(N);
11946 return SDValue(N, 0);
11947 }
11948 }
11949
11950 // If this is a masked load with an all ones mask, we can use a unmasked load.
11951 // FIXME: Can we do this for indexed, compressing, or truncating stores?
11952 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
11953 !MST->isCompressingStore() && !MST->isTruncatingStore())
11954 return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
11955 MST->getBasePtr(), MST->getPointerInfo(),
11956 MST->getOriginalAlign(),
11957 MST->getMemOperand()->getFlags(), MST->getAAInfo());
11958
11959 // Try transforming N to an indexed store.
11960 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11961 return SDValue(N, 0);
11962
11963 if (MST->isTruncatingStore() && MST->isUnindexed() &&
11964 Value.getValueType().isInteger() &&
11965 (!isa<ConstantSDNode>(Value) ||
11966 !cast<ConstantSDNode>(Value)->isOpaque())) {
11967 APInt TruncDemandedBits =
11968 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
11970
11971 // See if we can simplify the operation with
11972 // SimplifyDemandedBits, which only works if the value has a single use.
11973 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
11974 // Re-visit the store if anything changed and the store hasn't been merged
11975 // with another node (N is deleted) SimplifyDemandedBits will add Value's
11976 // node back to the worklist if necessary, but we also need to re-visit
11977 // the Store node itself.
11978 if (N->getOpcode() != ISD::DELETED_NODE)
11979 AddToWorklist(N);
11980 return SDValue(N, 0);
11981 }
11982 }
11983
11984 // If this is a TRUNC followed by a masked store, fold this into a masked
11985 // truncating store. We can do this even if this is already a masked
11986 // truncstore.
11987 // TODO: Try combine to masked compress store if possiable.
11988 if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
11989 MST->isUnindexed() && !MST->isCompressingStore() &&
11990 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
11991 MST->getMemoryVT(), LegalOperations)) {
11992 auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
11993 Value.getOperand(0).getValueType());
11994 return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
11995 MST->getOffset(), Mask, MST->getMemoryVT(),
11996 MST->getMemOperand(), MST->getAddressingMode(),
11997 /*IsTruncating=*/true);
11998 }
11999
12000 return SDValue();
12001}
12002
12003SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
12004 auto *SST = cast<VPStridedStoreSDNode>(N);
12005 EVT EltVT = SST->getValue().getValueType().getVectorElementType();
12006 // Combine strided stores with unit-stride to a regular VP store.
12007 if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
12008 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12009 return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
12010 SST->getBasePtr(), SST->getOffset(), SST->getMask(),
12011 SST->getVectorLength(), SST->getMemoryVT(),
12012 SST->getMemOperand(), SST->getAddressingMode(),
12013 SST->isTruncatingStore(), SST->isCompressingStore());
12014 }
12015 return SDValue();
12016}
12017
12018SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
12019 VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
12020 SDValue Mask = MGT->getMask();
12021 SDValue Chain = MGT->getChain();
12022 SDValue Index = MGT->getIndex();
12023 SDValue Scale = MGT->getScale();
12024 SDValue BasePtr = MGT->getBasePtr();
12025 SDValue VL = MGT->getVectorLength();
12026 ISD::MemIndexType IndexType = MGT->getIndexType();
12027 SDLoc DL(N);
12028
12029 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12030 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12031 return DAG.getGatherVP(
12032 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12033 Ops, MGT->getMemOperand(), IndexType);
12034 }
12035
12036 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12037 SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
12038 return DAG.getGatherVP(
12039 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12040 Ops, MGT->getMemOperand(), IndexType);
12041 }
12042
12043 return SDValue();
12044}
12045
12046SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12047 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12048 SDValue Mask = MGT->getMask();
12049 SDValue Chain = MGT->getChain();
12050 SDValue Index = MGT->getIndex();
12051 SDValue Scale = MGT->getScale();
12052 SDValue PassThru = MGT->getPassThru();
12053 SDValue BasePtr = MGT->getBasePtr();
12054 ISD::MemIndexType IndexType = MGT->getIndexType();
12055 SDLoc DL(N);
12056
12057 // Zap gathers with a zero mask.
12059 return CombineTo(N, PassThru, MGT->getChain());
12060
12061 if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12062 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12063 return DAG.getMaskedGather(
12064 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12065 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12066 }
12067
12068 if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12069 SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12070 return DAG.getMaskedGather(
12071 DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12072 Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12073 }
12074
12075 return SDValue();
12076}
12077
12078SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12079 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12080 SDValue Mask = MLD->getMask();
12081 SDLoc DL(N);
12082
12083 // Zap masked loads with a zero mask.
12085 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12086
12087 // If this is a masked load with an all ones mask, we can use a unmasked load.
12088 // FIXME: Can we do this for indexed, expanding, or extending loads?
12089 if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12090 !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12091 SDValue NewLd = DAG.getLoad(
12092 N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12093 MLD->getPointerInfo(), MLD->getOriginalAlign(),
12094 MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges());
12095 return CombineTo(N, NewLd, NewLd.getValue(1));
12096 }
12097
12098 // Try transforming N to an indexed load.
12099 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12100 return SDValue(N, 0);
12101
12102 return SDValue();
12103}
12104
12105SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
12106 auto *SLD = cast<VPStridedLoadSDNode>(N);
12107 EVT EltVT = SLD->getValueType(0).getVectorElementType();
12108 // Combine strided loads with unit-stride to a regular VP load.
12109 if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12110 CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12111 SDValue NewLd = DAG.getLoadVP(
12112 SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12113 SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12114 SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12115 SLD->getMemOperand(), SLD->isExpandingLoad());
12116 return CombineTo(N, NewLd, NewLd.getValue(1));
12117 }
12118 return SDValue();
12119}
12120
12121/// A vector select of 2 constant vectors can be simplified to math/logic to
12122/// avoid a variable select instruction and possibly avoid constant loads.
12123SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12124 SDValue Cond = N->getOperand(0);
12125 SDValue N1 = N->getOperand(1);
12126 SDValue N2 = N->getOperand(2);
12127 EVT VT = N->getValueType(0);
12128 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12132 return SDValue();
12133
12134 // Check if we can use the condition value to increment/decrement a single
12135 // constant value. This simplifies a select to an add and removes a constant
12136 // load/materialization from the general case.
12137 bool AllAddOne = true;
12138 bool AllSubOne = true;
12139 unsigned Elts = VT.getVectorNumElements();
12140 for (unsigned i = 0; i != Elts; ++i) {
12141 SDValue N1Elt = N1.getOperand(i);
12142 SDValue N2Elt = N2.getOperand(i);
12143 if (N1Elt.isUndef() || N2Elt.isUndef())
12144 continue;
12145 if (N1Elt.getValueType() != N2Elt.getValueType())
12146 continue;
12147
12148 const APInt &C1 = N1Elt->getAsAPIntVal();
12149 const APInt &C2 = N2Elt->getAsAPIntVal();
12150 if (C1 != C2 + 1)
12151 AllAddOne = false;
12152 if (C1 != C2 - 1)
12153 AllSubOne = false;
12154 }
12155
12156 // Further simplifications for the extra-special cases where the constants are
12157 // all 0 or all -1 should be implemented as folds of these patterns.
12158 SDLoc DL(N);
12159 if (AllAddOne || AllSubOne) {
12160 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12161 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12162 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12163 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12164 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12165 }
12166
12167 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12168 APInt Pow2C;
12169 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12170 isNullOrNullSplat(N2)) {
12171 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12172 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12173 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12174 }
12175
12177 return V;
12178
12179 // The general case for select-of-constants:
12180 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12181 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12182 // leave that to a machine-specific pass.
12183 return SDValue();
12184}
12185
12186SDValue DAGCombiner::visitVP_SELECT(SDNode *N) {
12187 SDValue N0 = N->getOperand(0);
12188 SDValue N1 = N->getOperand(1);
12189 SDValue N2 = N->getOperand(2);
12190
12191 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12192 return V;
12193
12194 if (SDValue V = foldBoolSelectToLogic<VPMatchContext>(N, DAG))
12195 return V;
12196
12197 return SDValue();
12198}
12199
12200SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12201 SDValue N0 = N->getOperand(0);
12202 SDValue N1 = N->getOperand(1);
12203 SDValue N2 = N->getOperand(2);
12204 EVT VT = N->getValueType(0);
12205 SDLoc DL(N);
12206
12207 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12208 return V;
12209
12210 if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DAG))
12211 return V;
12212
12213 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12214 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12215 return DAG.getSelect(DL, VT, F, N2, N1);
12216
12217 // select (sext m), (add X, C), X --> (add X, (and C, (sext m))))
12218 if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N2 && N1->hasOneUse() &&
12221 TLI.getBooleanContents(N0.getValueType()) ==
12223 return DAG.getNode(
12224 ISD::ADD, DL, N1.getValueType(), N2,
12225 DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
12226 }
12227
12228 // Canonicalize integer abs.
12229 // vselect (setg[te] X, 0), X, -X ->
12230 // vselect (setgt X, -1), X, -X ->
12231 // vselect (setl[te] X, 0), -X, X ->
12232 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12233 if (N0.getOpcode() == ISD::SETCC) {
12234 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12235 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12236 bool isAbs = false;
12237 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12238
12239 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12240 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12241 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12243 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12244 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12246
12247 if (isAbs) {
12249 return DAG.getNode(ISD::ABS, DL, VT, LHS);
12250
12251 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
12252 DAG.getConstant(VT.getScalarSizeInBits() - 1,
12253 DL, getShiftAmountTy(VT)));
12254 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12255 AddToWorklist(Shift.getNode());
12256 AddToWorklist(Add.getNode());
12257 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12258 }
12259
12260 // vselect x, y (fcmp lt x, y) -> fminnum x, y
12261 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12262 //
12263 // This is OK if we don't care about what happens if either operand is a
12264 // NaN.
12265 //
12266 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
12267 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12268 return FMinMax;
12269 }
12270
12271 if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12272 return S;
12273 if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12274 return S;
12275
12276 // If this select has a condition (setcc) with narrower operands than the
12277 // select, try to widen the compare to match the select width.
12278 // TODO: This should be extended to handle any constant.
12279 // TODO: This could be extended to handle non-loading patterns, but that
12280 // requires thorough testing to avoid regressions.
12281 if (isNullOrNullSplat(RHS)) {
12282 EVT NarrowVT = LHS.getValueType();
12284 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12285 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12286 unsigned WideWidth = WideVT.getScalarSizeInBits();
12287 bool IsSigned = isSignedIntSetCC(CC);
12288 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12289 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12290 SetCCWidth != 1 && SetCCWidth < WideWidth &&
12291 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12292 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12293 // Both compare operands can be widened for free. The LHS can use an
12294 // extended load, and the RHS is a constant:
12295 // vselect (ext (setcc load(X), C)), N1, N2 -->
12296 // vselect (setcc extload(X), C'), N1, N2
12297 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12298 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12299 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12300 EVT WideSetCCVT = getSetCCResultType(WideVT);
12301 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12302 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12303 }
12304 }
12305
12306 // Match VSELECTs with absolute difference patterns.
12307 // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12308 // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12309 // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12310 // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12311 if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
12312 N1.getOperand(0) == N2.getOperand(1) &&
12313 N1.getOperand(1) == N2.getOperand(0)) {
12314 bool IsSigned = isSignedIntSetCC(CC);
12315 unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12316 if (hasOperation(ABDOpc, VT)) {
12317 switch (CC) {
12318 case ISD::SETGT:
12319 case ISD::SETGE:
12320 case ISD::SETUGT:
12321 case ISD::SETUGE:
12322 if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
12323 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12324 break;
12325 case ISD::SETLT:
12326 case ISD::SETLE:
12327 case ISD::SETULT:
12328 case ISD::SETULE:
12329 if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
12330 return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12331 break;
12332 default:
12333 break;
12334 }
12335 }
12336 }
12337
12338 // Match VSELECTs into add with unsigned saturation.
12339 if (hasOperation(ISD::UADDSAT, VT)) {
12340 // Check if one of the arms of the VSELECT is vector with all bits set.
12341 // If it's on the left side invert the predicate to simplify logic below.
12342 SDValue Other;
12343 ISD::CondCode SatCC = CC;
12345 Other = N2;
12346 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12347 } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12348 Other = N1;
12349 }
12350
12351 if (Other && Other.getOpcode() == ISD::ADD) {
12352 SDValue CondLHS = LHS, CondRHS = RHS;
12353 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12354
12355 // Canonicalize condition operands.
12356 if (SatCC == ISD::SETUGE) {
12357 std::swap(CondLHS, CondRHS);
12358 SatCC = ISD::SETULE;
12359 }
12360
12361 // We can test against either of the addition operands.
12362 // x <= x+y ? x+y : ~0 --> uaddsat x, y
12363 // x+y >= x ? x+y : ~0 --> uaddsat x, y
12364 if (SatCC == ISD::SETULE && Other == CondRHS &&
12365 (OpLHS == CondLHS || OpRHS == CondLHS))
12366 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12367
12368 if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12369 (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12370 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12371 CondLHS == OpLHS) {
12372 // If the RHS is a constant we have to reverse the const
12373 // canonicalization.
12374 // x >= ~C ? x+C : ~0 --> uaddsat x, C
12375 auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12376 return Cond->getAPIntValue() == ~Op->getAPIntValue();
12377 };
12378 if (SatCC == ISD::SETULE &&
12379 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12380 return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12381 }
12382 }
12383 }
12384
12385 // Match VSELECTs into sub with unsigned saturation.
12386 if (hasOperation(ISD::USUBSAT, VT)) {
12387 // Check if one of the arms of the VSELECT is a zero vector. If it's on
12388 // the left side invert the predicate to simplify logic below.
12389 SDValue Other;
12390 ISD::CondCode SatCC = CC;
12392 Other = N2;
12393 SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12395 Other = N1;
12396 }
12397
12398 // zext(x) >= y ? trunc(zext(x) - y) : 0
12399 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12400 // zext(x) > y ? trunc(zext(x) - y) : 0
12401 // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12402 if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12403 Other.getOperand(0).getOpcode() == ISD::SUB &&
12404 (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12405 SDValue OpLHS = Other.getOperand(0).getOperand(0);
12406 SDValue OpRHS = Other.getOperand(0).getOperand(1);
12407 if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12408 if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12409 DAG, DL))
12410 return R;
12411 }
12412
12413 if (Other && Other.getNumOperands() == 2) {
12414 SDValue CondRHS = RHS;
12415 SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12416
12417 if (OpLHS == LHS) {
12418 // Look for a general sub with unsigned saturation first.
12419 // x >= y ? x-y : 0 --> usubsat x, y
12420 // x > y ? x-y : 0 --> usubsat x, y
12421 if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12422 Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12423 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12424
12425 if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12426 OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12427 if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12428 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12429 // If the RHS is a constant we have to reverse the const
12430 // canonicalization.
12431 // x > C-1 ? x+-C : 0 --> usubsat x, C
12432 auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12433 return (!Op && !Cond) ||
12434 (Op && Cond &&
12435 Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12436 };
12437 if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12438 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12439 /*AllowUndefs*/ true)) {
12440 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12441 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12442 }
12443
12444 // Another special case: If C was a sign bit, the sub has been
12445 // canonicalized into a xor.
12446 // FIXME: Would it be better to use computeKnownBits to
12447 // determine whether it's safe to decanonicalize the xor?
12448 // x s< 0 ? x^C : 0 --> usubsat x, C
12449 APInt SplatValue;
12450 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12451 ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12453 SplatValue.isSignMask()) {
12454 // Note that we have to rebuild the RHS constant here to
12455 // ensure we don't rely on particular values of undef lanes.
12456 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12457 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12458 }
12459 }
12460 }
12461 }
12462 }
12463 }
12464 }
12465
12466 if (SimplifySelectOps(N, N1, N2))
12467 return SDValue(N, 0); // Don't revisit N.
12468
12469 // Fold (vselect all_ones, N1, N2) -> N1
12471 return N1;
12472 // Fold (vselect all_zeros, N1, N2) -> N2
12474 return N2;
12475
12476 // The ConvertSelectToConcatVector function is assuming both the above
12477 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12478 // and addressed.
12479 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12482 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12483 return CV;
12484 }
12485
12486 if (SDValue V = foldVSelectOfConstants(N))
12487 return V;
12488
12489 if (hasOperation(ISD::SRA, VT))
12491 return V;
12492
12494 return SDValue(N, 0);
12495
12496 return SDValue();
12497}
12498
12499SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12500 SDValue N0 = N->getOperand(0);
12501 SDValue N1 = N->getOperand(1);
12502 SDValue N2 = N->getOperand(2);
12503 SDValue N3 = N->getOperand(3);
12504 SDValue N4 = N->getOperand(4);
12505 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12506
12507 // fold select_cc lhs, rhs, x, x, cc -> x
12508 if (N2 == N3)
12509 return N2;
12510
12511 // select_cc bool, 0, x, y, seteq -> select bool, y, x
12512 if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12513 isNullConstant(N1))
12514 return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
12515
12516 // Determine if the condition we're dealing with is constant
12517 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12518 CC, SDLoc(N), false)) {
12519 AddToWorklist(SCC.getNode());
12520
12521 // cond always true -> true val
12522 // cond always false -> false val
12523 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12524 return SCCC->isZero() ? N3 : N2;
12525
12526 // When the condition is UNDEF, just return the first operand. This is
12527 // coherent the DAG creation, no setcc node is created in this case
12528 if (SCC->isUndef())
12529 return N2;
12530
12531 // Fold to a simpler select_cc
12532 if (SCC.getOpcode() == ISD::SETCC) {
12533 SDValue SelectOp = DAG.getNode(
12534 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
12535 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12536 SelectOp->setFlags(SCC->getFlags());
12537 return SelectOp;
12538 }
12539 }
12540
12541 // If we can fold this based on the true/false value, do so.
12542 if (SimplifySelectOps(N, N2, N3))
12543 return SDValue(N, 0); // Don't revisit N.
12544
12545 // fold select_cc into other things, such as min/max/abs
12546 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
12547}
12548
12549SDValue DAGCombiner::visitSETCC(SDNode *N) {
12550 // setcc is very commonly used as an argument to brcond. This pattern
12551 // also lend itself to numerous combines and, as a result, it is desired
12552 // we keep the argument to a brcond as a setcc as much as possible.
12553 bool PreferSetCC =
12554 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
12555
12556 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12557 EVT VT = N->getValueType(0);
12558 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12559
12560 SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
12561
12562 if (Combined) {
12563 // If we prefer to have a setcc, and we don't, we'll try our best to
12564 // recreate one using rebuildSetCC.
12565 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12566 SDValue NewSetCC = rebuildSetCC(Combined);
12567
12568 // We don't have anything interesting to combine to.
12569 if (NewSetCC.getNode() == N)
12570 return SDValue();
12571
12572 if (NewSetCC)
12573 return NewSetCC;
12574 }
12575 return Combined;
12576 }
12577
12578 // Optimize
12579 // 1) (icmp eq/ne (and X, C0), (shift X, C1))
12580 // or
12581 // 2) (icmp eq/ne X, (rotate X, C1))
12582 // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12583 // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12584 // Then:
12585 // If C1 is a power of 2, then the rotate and shift+and versions are
12586 // equivilent, so we can interchange them depending on target preference.
12587 // Otherwise, if we have the shift+and version we can interchange srl/shl
12588 // which inturn affects the constant C0. We can use this to get better
12589 // constants again determined by target preference.
12590 if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12591 auto IsAndWithShift = [](SDValue A, SDValue B) {
12592 return A.getOpcode() == ISD::AND &&
12593 (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12594 A.getOperand(0) == B.getOperand(0);
12595 };
12596 auto IsRotateWithOp = [](SDValue A, SDValue B) {
12597 return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12598 B.getOperand(0) == A;
12599 };
12600 SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12601 bool IsRotate = false;
12602
12603 // Find either shift+and or rotate pattern.
12604 if (IsAndWithShift(N0, N1)) {
12605 AndOrOp = N0;
12606 ShiftOrRotate = N1;
12607 } else if (IsAndWithShift(N1, N0)) {
12608 AndOrOp = N1;
12609 ShiftOrRotate = N0;
12610 } else if (IsRotateWithOp(N0, N1)) {
12611 IsRotate = true;
12612 AndOrOp = N0;
12613 ShiftOrRotate = N1;
12614 } else if (IsRotateWithOp(N1, N0)) {
12615 IsRotate = true;
12616 AndOrOp = N1;
12617 ShiftOrRotate = N0;
12618 }
12619
12620 if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
12621 (IsRotate || AndOrOp.hasOneUse())) {
12622 EVT OpVT = N0.getValueType();
12623 // Get constant shift/rotate amount and possibly mask (if its shift+and
12624 // variant).
12625 auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
12626 ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
12627 /*AllowTrunc*/ false);
12628 if (CNode == nullptr)
12629 return std::nullopt;
12630 return CNode->getAPIntValue();
12631 };
12632 std::optional<APInt> AndCMask =
12633 IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
12634 std::optional<APInt> ShiftCAmt =
12635 GetAPIntValue(ShiftOrRotate.getOperand(1));
12636 unsigned NumBits = OpVT.getScalarSizeInBits();
12637
12638 // We found constants.
12639 if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
12640 unsigned ShiftOpc = ShiftOrRotate.getOpcode();
12641 // Check that the constants meet the constraints.
12642 bool CanTransform = IsRotate;
12643 if (!CanTransform) {
12644 // Check that mask and shift compliment eachother
12645 CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
12646 // Check that we are comparing all bits
12647 CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
12648 // Check that the and mask is correct for the shift
12649 CanTransform &=
12650 ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
12651 }
12652
12653 // See if target prefers another shift/rotate opcode.
12654 unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
12655 OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
12656 // Transform is valid and we have a new preference.
12657 if (CanTransform && NewShiftOpc != ShiftOpc) {
12658 SDLoc DL(N);
12659 SDValue NewShiftOrRotate =
12660 DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
12661 ShiftOrRotate.getOperand(1));
12662 SDValue NewAndOrOp = SDValue();
12663
12664 if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
12665 APInt NewMask =
12666 NewShiftOpc == ISD::SHL
12667 ? APInt::getHighBitsSet(NumBits,
12668 NumBits - ShiftCAmt->getZExtValue())
12669 : APInt::getLowBitsSet(NumBits,
12670 NumBits - ShiftCAmt->getZExtValue());
12671 NewAndOrOp =
12672 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
12673 DAG.getConstant(NewMask, DL, OpVT));
12674 } else {
12675 NewAndOrOp = ShiftOrRotate.getOperand(0);
12676 }
12677
12678 return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
12679 }
12680 }
12681 }
12682 }
12683 return SDValue();
12684}
12685
12686SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
12687 SDValue LHS = N->getOperand(0);
12688 SDValue RHS = N->getOperand(1);
12689 SDValue Carry = N->getOperand(2);
12690 SDValue Cond = N->getOperand(3);
12691
12692 // If Carry is false, fold to a regular SETCC.
12693 if (isNullConstant(Carry))
12694 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
12695
12696 return SDValue();
12697}
12698
12699/// Check if N satisfies:
12700/// N is used once.
12701/// N is a Load.
12702/// The load is compatible with ExtOpcode. It means
12703/// If load has explicit zero/sign extension, ExpOpcode must have the same
12704/// extension.
12705/// Otherwise returns true.
12706static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
12707 if (!N.hasOneUse())
12708 return false;
12709
12710 if (!isa<LoadSDNode>(N))
12711 return false;
12712
12713 LoadSDNode *Load = cast<LoadSDNode>(N);
12714 ISD::LoadExtType LoadExt = Load->getExtensionType();
12715 if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
12716 return true;
12717
12718 // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
12719 // extension.
12720 if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
12721 (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
12722 return false;
12723
12724 return true;
12725}
12726
12727/// Fold
12728/// (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
12729/// (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
12730/// (aext (select c, load x, load y)) -> (select c, extload x, extload y)
12731/// This function is called by the DAGCombiner when visiting sext/zext/aext
12732/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12734 SelectionDAG &DAG,
12735 CombineLevel Level) {
12736 unsigned Opcode = N->getOpcode();
12737 SDValue N0 = N->getOperand(0);
12738 EVT VT = N->getValueType(0);
12739 SDLoc DL(N);
12740
12741 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
12742 Opcode == ISD::ANY_EXTEND) &&
12743 "Expected EXTEND dag node in input!");
12744
12745 if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
12746 !N0.hasOneUse())
12747 return SDValue();
12748
12749 SDValue Op1 = N0->getOperand(1);
12750 SDValue Op2 = N0->getOperand(2);
12751 if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
12752 return SDValue();
12753
12754 auto ExtLoadOpcode = ISD::EXTLOAD;
12755 if (Opcode == ISD::SIGN_EXTEND)
12756 ExtLoadOpcode = ISD::SEXTLOAD;
12757 else if (Opcode == ISD::ZERO_EXTEND)
12758 ExtLoadOpcode = ISD::ZEXTLOAD;
12759
12760 // Illegal VSELECT may ISel fail if happen after legalization (DAG
12761 // Combine2), so we should conservatively check the OperationAction.
12762 LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
12763 LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
12764 if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
12765 !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
12766 (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
12768 return SDValue();
12769
12770 SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
12771 SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
12772 return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
12773}
12774
12775/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
12776/// a build_vector of constants.
12777/// This function is called by the DAGCombiner when visiting sext/zext/aext
12778/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12779/// Vector extends are not folded if operations are legal; this is to
12780/// avoid introducing illegal build_vector dag nodes.
12782 const TargetLowering &TLI,
12783 SelectionDAG &DAG, bool LegalTypes) {
12784 unsigned Opcode = N->getOpcode();
12785 SDValue N0 = N->getOperand(0);
12786 EVT VT = N->getValueType(0);
12787
12788 assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
12789 "Expected EXTEND dag node in input!");
12790
12791 // fold (sext c1) -> c1
12792 // fold (zext c1) -> c1
12793 // fold (aext c1) -> c1
12794 if (isa<ConstantSDNode>(N0))
12795 return DAG.getNode(Opcode, DL, VT, N0);
12796
12797 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12798 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
12799 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12800 if (N0->getOpcode() == ISD::SELECT) {
12801 SDValue Op1 = N0->getOperand(1);
12802 SDValue Op2 = N0->getOperand(2);
12803 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
12804 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
12805 // For any_extend, choose sign extension of the constants to allow a
12806 // possible further transform to sign_extend_inreg.i.e.
12807 //
12808 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
12809 // t2: i64 = any_extend t1
12810 // -->
12811 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
12812 // -->
12813 // t4: i64 = sign_extend_inreg t3
12814 unsigned FoldOpc = Opcode;
12815 if (FoldOpc == ISD::ANY_EXTEND)
12816 FoldOpc = ISD::SIGN_EXTEND;
12817 return DAG.getSelect(DL, VT, N0->getOperand(0),
12818 DAG.getNode(FoldOpc, DL, VT, Op1),
12819 DAG.getNode(FoldOpc, DL, VT, Op2));
12820 }
12821 }
12822
12823 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
12824 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
12825 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
12826 EVT SVT = VT.getScalarType();
12827 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
12829 return SDValue();
12830
12831 // We can fold this node into a build_vector.
12832 unsigned VTBits = SVT.getSizeInBits();
12833 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
12835 unsigned NumElts = VT.getVectorNumElements();
12836
12837 for (unsigned i = 0; i != NumElts; ++i) {
12838 SDValue Op = N0.getOperand(i);
12839 if (Op.isUndef()) {
12840 if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
12841 Elts.push_back(DAG.getUNDEF(SVT));
12842 else
12843 Elts.push_back(DAG.getConstant(0, DL, SVT));
12844 continue;
12845 }
12846
12847 SDLoc DL(Op);
12848 // Get the constant value and if needed trunc it to the size of the type.
12849 // Nodes like build_vector might have constants wider than the scalar type.
12850 APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
12851 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12852 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12853 else
12854 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12855 }
12856
12857 return DAG.getBuildVector(VT, DL, Elts);
12858}
12859
12860// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12861// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12862// transformation. Returns true if extension are possible and the above
12863// mentioned transformation is profitable.
12865 unsigned ExtOpc,
12866 SmallVectorImpl<SDNode *> &ExtendNodes,
12867 const TargetLowering &TLI) {
12868 bool HasCopyToRegUses = false;
12869 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12870 for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12871 ++UI) {
12872 SDNode *User = *UI;
12873 if (User == N)
12874 continue;
12875 if (UI.getUse().getResNo() != N0.getResNo())
12876 continue;
12877 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12878 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12879 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12880 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12881 // Sign bits will be lost after a zext.
12882 return false;
12883 bool Add = false;
12884 for (unsigned i = 0; i != 2; ++i) {
12885 SDValue UseOp = User->getOperand(i);
12886 if (UseOp == N0)
12887 continue;
12888 if (!isa<ConstantSDNode>(UseOp))
12889 return false;
12890 Add = true;
12891 }
12892 if (Add)
12893 ExtendNodes.push_back(User);
12894 continue;
12895 }
12896 // If truncates aren't free and there are users we can't
12897 // extend, it isn't worthwhile.
12898 if (!isTruncFree)
12899 return false;
12900 // Remember if this value is live-out.
12901 if (User->getOpcode() == ISD::CopyToReg)
12902 HasCopyToRegUses = true;
12903 }
12904
12905 if (HasCopyToRegUses) {
12906 bool BothLiveOut = false;
12907 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12908 UI != UE; ++UI) {
12909 SDUse &Use = UI.getUse();
12910 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12911 BothLiveOut = true;
12912 break;
12913 }
12914 }
12915 if (BothLiveOut)
12916 // Both unextended and extended values are live out. There had better be
12917 // a good reason for the transformation.
12918 return !ExtendNodes.empty();
12919 }
12920 return true;
12921}
12922
12923void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
12924 SDValue OrigLoad, SDValue ExtLoad,
12925 ISD::NodeType ExtType) {
12926 // Extend SetCC uses if necessary.
12927 SDLoc DL(ExtLoad);
12928 for (SDNode *SetCC : SetCCs) {
12930
12931 for (unsigned j = 0; j != 2; ++j) {
12932 SDValue SOp = SetCC->getOperand(j);
12933 if (SOp == OrigLoad)
12934 Ops.push_back(ExtLoad);
12935 else
12936 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
12937 }
12938
12939 Ops.push_back(SetCC->getOperand(2));
12940 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
12941 }
12942}
12943
12944// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
12945SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
12946 SDValue N0 = N->getOperand(0);
12947 EVT DstVT = N->getValueType(0);
12948 EVT SrcVT = N0.getValueType();
12949
12950 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
12951 N->getOpcode() == ISD::ZERO_EXTEND) &&
12952 "Unexpected node type (not an extend)!");
12953
12954 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
12955 // For example, on a target with legal v4i32, but illegal v8i32, turn:
12956 // (v8i32 (sext (v8i16 (load x))))
12957 // into:
12958 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12959 // (v4i32 (sextload (x + 16)))))
12960 // Where uses of the original load, i.e.:
12961 // (v8i16 (load x))
12962 // are replaced with:
12963 // (v8i16 (truncate
12964 // (v8i32 (concat_vectors (v4i32 (sextload x)),
12965 // (v4i32 (sextload (x + 16)))))))
12966 //
12967 // This combine is only applicable to illegal, but splittable, vectors.
12968 // All legal types, and illegal non-vector types, are handled elsewhere.
12969 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
12970 //
12971 if (N0->getOpcode() != ISD::LOAD)
12972 return SDValue();
12973
12974 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12975
12976 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
12977 !N0.hasOneUse() || !LN0->isSimple() ||
12978 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
12980 return SDValue();
12981
12983 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
12984 return SDValue();
12985
12986 ISD::LoadExtType ExtType =
12987 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12988
12989 // Try to split the vector types to get down to legal types.
12990 EVT SplitSrcVT = SrcVT;
12991 EVT SplitDstVT = DstVT;
12992 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
12993 SplitSrcVT.getVectorNumElements() > 1) {
12994 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
12995 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
12996 }
12997
12998 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
12999 return SDValue();
13000
13001 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
13002
13003 SDLoc DL(N);
13004 const unsigned NumSplits =
13005 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
13006 const unsigned Stride = SplitSrcVT.getStoreSize();
13009
13010 SDValue BasePtr = LN0->getBasePtr();
13011 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
13012 const unsigned Offset = Idx * Stride;
13013
13014 SDValue SplitLoad =
13015 DAG.getExtLoad(ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(),
13016 BasePtr, LN0->getPointerInfo().getWithOffset(Offset),
13017 SplitSrcVT, LN0->getOriginalAlign(),
13018 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13019
13020 BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
13021
13022 Loads.push_back(SplitLoad.getValue(0));
13023 Chains.push_back(SplitLoad.getValue(1));
13024 }
13025
13026 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
13027 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
13028
13029 // Simplify TF.
13030 AddToWorklist(NewChain.getNode());
13031
13032 CombineTo(N, NewValue);
13033
13034 // Replace uses of the original load (before extension)
13035 // with a truncate of the concatenated sextloaded vectors.
13036 SDValue Trunc =
13037 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
13038 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
13039 CombineTo(N0.getNode(), Trunc, NewChain);
13040 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13041}
13042
13043// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13044// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13045SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
13046 assert(N->getOpcode() == ISD::ZERO_EXTEND);
13047 EVT VT = N->getValueType(0);
13048 EVT OrigVT = N->getOperand(0).getValueType();
13049 if (TLI.isZExtFree(OrigVT, VT))
13050 return SDValue();
13051
13052 // and/or/xor
13053 SDValue N0 = N->getOperand(0);
13054 if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
13055 N0.getOperand(1).getOpcode() != ISD::Constant ||
13056 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
13057 return SDValue();
13058
13059 // shl/shr
13060 SDValue N1 = N0->getOperand(0);
13061 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
13062 N1.getOperand(1).getOpcode() != ISD::Constant ||
13063 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
13064 return SDValue();
13065
13066 // load
13067 if (!isa<LoadSDNode>(N1.getOperand(0)))
13068 return SDValue();
13069 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
13070 EVT MemVT = Load->getMemoryVT();
13071 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
13072 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13073 return SDValue();
13074
13075
13076 // If the shift op is SHL, the logic op must be AND, otherwise the result
13077 // will be wrong.
13078 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
13079 return SDValue();
13080
13081 if (!N0.hasOneUse() || !N1.hasOneUse())
13082 return SDValue();
13083
13085 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
13086 ISD::ZERO_EXTEND, SetCCs, TLI))
13087 return SDValue();
13088
13089 // Actually do the transformation.
13090 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
13091 Load->getChain(), Load->getBasePtr(),
13092 Load->getMemoryVT(), Load->getMemOperand());
13093
13094 SDLoc DL1(N1);
13095 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
13096 N1.getOperand(1));
13097
13099 SDLoc DL0(N0);
13100 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
13101 DAG.getConstant(Mask, DL0, VT));
13102
13103 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13104 CombineTo(N, And);
13105 if (SDValue(Load, 0).hasOneUse()) {
13106 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
13107 } else {
13108 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
13109 Load->getValueType(0), ExtLoad);
13110 CombineTo(Load, Trunc, ExtLoad.getValue(1));
13111 }
13112
13113 // N0 is dead at this point.
13114 recursivelyDeleteUnusedNodes(N0.getNode());
13115
13116 return SDValue(N,0); // Return N so it doesn't get rechecked!
13117}
13118
13119/// If we're narrowing or widening the result of a vector select and the final
13120/// size is the same size as a setcc (compare) feeding the select, then try to
13121/// apply the cast operation to the select's operands because matching vector
13122/// sizes for a select condition and other operands should be more efficient.
13123SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13124 unsigned CastOpcode = Cast->getOpcode();
13125 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13126 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13127 CastOpcode == ISD::FP_ROUND) &&
13128 "Unexpected opcode for vector select narrowing/widening");
13129
13130 // We only do this transform before legal ops because the pattern may be
13131 // obfuscated by target-specific operations after legalization. Do not create
13132 // an illegal select op, however, because that may be difficult to lower.
13133 EVT VT = Cast->getValueType(0);
13134 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13135 return SDValue();
13136
13137 SDValue VSel = Cast->getOperand(0);
13138 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13139 VSel.getOperand(0).getOpcode() != ISD::SETCC)
13140 return SDValue();
13141
13142 // Does the setcc have the same vector size as the casted select?
13143 SDValue SetCC = VSel.getOperand(0);
13144 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13145 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13146 return SDValue();
13147
13148 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13149 SDValue A = VSel.getOperand(1);
13150 SDValue B = VSel.getOperand(2);
13151 SDValue CastA, CastB;
13152 SDLoc DL(Cast);
13153 if (CastOpcode == ISD::FP_ROUND) {
13154 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13155 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13156 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13157 } else {
13158 CastA = DAG.getNode(CastOpcode, DL, VT, A);
13159 CastB = DAG.getNode(CastOpcode, DL, VT, B);
13160 }
13161 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13162}
13163
13164// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13165// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13167 const TargetLowering &TLI, EVT VT,
13168 bool LegalOperations, SDNode *N,
13169 SDValue N0, ISD::LoadExtType ExtLoadType) {
13170 SDNode *N0Node = N0.getNode();
13171 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13172 : ISD::isZEXTLoad(N0Node);
13173 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13174 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13175 return SDValue();
13176
13177 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13178 EVT MemVT = LN0->getMemoryVT();
13179 if ((LegalOperations || !LN0->isSimple() ||
13180 VT.isVector()) &&
13181 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13182 return SDValue();
13183
13184 SDValue ExtLoad =
13185 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13186 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13187 Combiner.CombineTo(N, ExtLoad);
13188 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13189 if (LN0->use_empty())
13190 Combiner.recursivelyDeleteUnusedNodes(LN0);
13191 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13192}
13193
13194// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13195// Only generate vector extloads when 1) they're legal, and 2) they are
13196// deemed desirable by the target. NonNegZExt can be set to true if a zero
13197// extend has the nonneg flag to allow use of sextload if profitable.
13199 const TargetLowering &TLI, EVT VT,
13200 bool LegalOperations, SDNode *N, SDValue N0,
13201 ISD::LoadExtType ExtLoadType,
13202 ISD::NodeType ExtOpc,
13203 bool NonNegZExt = false) {
13205 return {};
13206
13207 // If this is zext nneg, see if it would make sense to treat it as a sext.
13208 if (NonNegZExt) {
13209 assert(ExtLoadType == ISD::ZEXTLOAD && ExtOpc == ISD::ZERO_EXTEND &&
13210 "Unexpected load type or opcode");
13211 for (SDNode *User : N0->uses()) {
13212 if (User->getOpcode() == ISD::SETCC) {
13213 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
13215 ExtLoadType = ISD::SEXTLOAD;
13216 ExtOpc = ISD::SIGN_EXTEND;
13217 break;
13218 }
13219 }
13220 }
13221 }
13222
13223 // TODO: isFixedLengthVector() should be removed and any negative effects on
13224 // code generation being the result of that target's implementation of
13225 // isVectorLoadExtDesirable().
13226 if ((LegalOperations || VT.isFixedLengthVector() ||
13227 !cast<LoadSDNode>(N0)->isSimple()) &&
13228 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))
13229 return {};
13230
13231 bool DoXform = true;
13233 if (!N0.hasOneUse())
13234 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13235 if (VT.isVector())
13236 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13237 if (!DoXform)
13238 return {};
13239
13240 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13241 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13242 LN0->getBasePtr(), N0.getValueType(),
13243 LN0->getMemOperand());
13244 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13245 // If the load value is used only by N, replace it via CombineTo N.
13246 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13247 Combiner.CombineTo(N, ExtLoad);
13248 if (NoReplaceTrunc) {
13249 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13250 Combiner.recursivelyDeleteUnusedNodes(LN0);
13251 } else {
13252 SDValue Trunc =
13253 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13254 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13255 }
13256 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13257}
13258
13259static SDValue
13261 bool LegalOperations, SDNode *N, SDValue N0,
13262 ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13263 if (!N0.hasOneUse())
13264 return SDValue();
13265
13266 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13267 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13268 return SDValue();
13269
13270 if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13271 !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13272 return SDValue();
13273
13274 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13275 return SDValue();
13276
13277 SDLoc dl(Ld);
13278 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13279 SDValue NewLoad = DAG.getMaskedLoad(
13280 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13281 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13282 ExtLoadType, Ld->isExpandingLoad());
13283 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13284 return NewLoad;
13285}
13286
13287// fold ([s|z]ext (atomic_load)) -> ([s|z]ext (truncate ([s|z]ext atomic_load)))
13289 const TargetLowering &TLI, EVT VT,
13290 SDValue N0,
13291 ISD::LoadExtType ExtLoadType) {
13292 auto *ALoad = dyn_cast<AtomicSDNode>(N0);
13293 if (!ALoad || ALoad->getOpcode() != ISD::ATOMIC_LOAD)
13294 return {};
13295 EVT MemoryVT = ALoad->getMemoryVT();
13296 if (!TLI.isAtomicLoadExtLegal(ExtLoadType, VT, MemoryVT))
13297 return {};
13298 // Can't fold into ALoad if it is already extending differently.
13299 ISD::LoadExtType ALoadExtTy = ALoad->getExtensionType();
13300 if ((ALoadExtTy == ISD::ZEXTLOAD && ExtLoadType == ISD::SEXTLOAD) ||
13301 (ALoadExtTy == ISD::SEXTLOAD && ExtLoadType == ISD::ZEXTLOAD))
13302 return {};
13303
13304 EVT OrigVT = ALoad->getValueType(0);
13305 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
13306 auto *NewALoad = cast<AtomicSDNode>(DAG.getAtomic(
13307 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
13308 ALoad->getBasePtr(), ALoad->getMemOperand()));
13309 NewALoad->setExtensionType(ExtLoadType);
13311 SDValue(ALoad, 0),
13312 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
13313 // Update the chain uses.
13314 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
13315 return SDValue(NewALoad, 0);
13316}
13317
13319 bool LegalOperations) {
13320 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13321 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13322
13323 SDValue SetCC = N->getOperand(0);
13324 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13325 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13326 return SDValue();
13327
13328 SDValue X = SetCC.getOperand(0);
13329 SDValue Ones = SetCC.getOperand(1);
13330 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13331 EVT VT = N->getValueType(0);
13332 EVT XVT = X.getValueType();
13333 // setge X, C is canonicalized to setgt, so we do not need to match that
13334 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13335 // not require the 'not' op.
13336 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13337 // Invert and smear/shift the sign bit:
13338 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13339 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13340 SDLoc DL(N);
13341 unsigned ShCt = VT.getSizeInBits() - 1;
13342 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13343 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13344 SDValue NotX = DAG.getNOT(DL, X, VT);
13345 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13346 auto ShiftOpcode =
13347 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13348 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13349 }
13350 }
13351 return SDValue();
13352}
13353
13354SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13355 SDValue N0 = N->getOperand(0);
13356 if (N0.getOpcode() != ISD::SETCC)
13357 return SDValue();
13358
13359 SDValue N00 = N0.getOperand(0);
13360 SDValue N01 = N0.getOperand(1);
13361 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13362 EVT VT = N->getValueType(0);
13363 EVT N00VT = N00.getValueType();
13364 SDLoc DL(N);
13365
13366 // Propagate fast-math-flags.
13367 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13368
13369 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13370 // the same size as the compared operands. Try to optimize sext(setcc())
13371 // if this is the case.
13372 if (VT.isVector() && !LegalOperations &&
13373 TLI.getBooleanContents(N00VT) ==
13375 EVT SVT = getSetCCResultType(N00VT);
13376
13377 // If we already have the desired type, don't change it.
13378 if (SVT != N0.getValueType()) {
13379 // We know that the # elements of the results is the same as the
13380 // # elements of the compare (and the # elements of the compare result
13381 // for that matter). Check to see that they are the same size. If so,
13382 // we know that the element size of the sext'd result matches the
13383 // element size of the compare operands.
13384 if (VT.getSizeInBits() == SVT.getSizeInBits())
13385 return DAG.getSetCC(DL, VT, N00, N01, CC);
13386
13387 // If the desired elements are smaller or larger than the source
13388 // elements, we can use a matching integer vector type and then
13389 // truncate/sign extend.
13390 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13391 if (SVT == MatchingVecType) {
13392 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13393 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13394 }
13395 }
13396
13397 // Try to eliminate the sext of a setcc by zexting the compare operands.
13398 if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13400 bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13401 unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13402 unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13403
13404 // We have an unsupported narrow vector compare op that would be legal
13405 // if extended to the destination type. See if the compare operands
13406 // can be freely extended to the destination type.
13407 auto IsFreeToExtend = [&](SDValue V) {
13408 if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13409 return true;
13410 // Match a simple, non-extended load that can be converted to a
13411 // legal {z/s}ext-load.
13412 // TODO: Allow widening of an existing {z/s}ext-load?
13413 if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13414 ISD::isUNINDEXEDLoad(V.getNode()) &&
13415 cast<LoadSDNode>(V)->isSimple() &&
13416 TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13417 return false;
13418
13419 // Non-chain users of this value must either be the setcc in this
13420 // sequence or extends that can be folded into the new {z/s}ext-load.
13421 for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
13422 UI != UE; ++UI) {
13423 // Skip uses of the chain and the setcc.
13424 SDNode *User = *UI;
13425 if (UI.getUse().getResNo() != 0 || User == N0.getNode())
13426 continue;
13427 // Extra users must have exactly the same cast we are about to create.
13428 // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13429 // is enhanced similarly.
13430 if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13431 return false;
13432 }
13433 return true;
13434 };
13435
13436 if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13437 SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13438 SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13439 return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13440 }
13441 }
13442 }
13443
13444 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13445 // Here, T can be 1 or -1, depending on the type of the setcc and
13446 // getBooleanContents().
13447 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13448
13449 // To determine the "true" side of the select, we need to know the high bit
13450 // of the value returned by the setcc if it evaluates to true.
13451 // If the type of the setcc is i1, then the true case of the select is just
13452 // sext(i1 1), that is, -1.
13453 // If the type of the setcc is larger (say, i8) then the value of the high
13454 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13455 // of the appropriate width.
13456 SDValue ExtTrueVal = (SetCCWidth == 1)
13457 ? DAG.getAllOnesConstant(DL, VT)
13458 : DAG.getBoolConstant(true, DL, VT, N00VT);
13459 SDValue Zero = DAG.getConstant(0, DL, VT);
13460 if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13461 return SCC;
13462
13463 if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13464 EVT SetCCVT = getSetCCResultType(N00VT);
13465 // Don't do this transform for i1 because there's a select transform
13466 // that would reverse it.
13467 // TODO: We should not do this transform at all without a target hook
13468 // because a sext is likely cheaper than a select?
13469 if (SetCCVT.getScalarSizeInBits() != 1 &&
13470 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13471 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13472 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13473 }
13474 }
13475
13476 return SDValue();
13477}
13478
13479SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13480 SDValue N0 = N->getOperand(0);
13481 EVT VT = N->getValueType(0);
13482 SDLoc DL(N);
13483
13484 if (VT.isVector())
13485 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13486 return FoldedVOp;
13487
13488 // sext(undef) = 0 because the top bit will all be the same.
13489 if (N0.isUndef())
13490 return DAG.getConstant(0, DL, VT);
13491
13492 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13493 return Res;
13494
13495 // fold (sext (sext x)) -> (sext x)
13496 // fold (sext (aext x)) -> (sext x)
13497 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13498 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13499
13500 // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13501 // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13505 N0.getOperand(0));
13506
13507 // fold (sext (sext_inreg x)) -> (sext (trunc x))
13508 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13509 SDValue N00 = N0.getOperand(0);
13510 EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13511 if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13512 (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13513 SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13514 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13515 }
13516 }
13517
13518 if (N0.getOpcode() == ISD::TRUNCATE) {
13519 // fold (sext (truncate (load x))) -> (sext (smaller load x))
13520 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13521 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13522 SDNode *oye = N0.getOperand(0).getNode();
13523 if (NarrowLoad.getNode() != N0.getNode()) {
13524 CombineTo(N0.getNode(), NarrowLoad);
13525 // CombineTo deleted the truncate, if needed, but not what's under it.
13526 AddToWorklist(oye);
13527 }
13528 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13529 }
13530
13531 // See if the value being truncated is already sign extended. If so, just
13532 // eliminate the trunc/sext pair.
13533 SDValue Op = N0.getOperand(0);
13534 unsigned OpBits = Op.getScalarValueSizeInBits();
13535 unsigned MidBits = N0.getScalarValueSizeInBits();
13536 unsigned DestBits = VT.getScalarSizeInBits();
13537 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13538
13539 if (OpBits == DestBits) {
13540 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13541 // bits, it is already ready.
13542 if (NumSignBits > DestBits-MidBits)
13543 return Op;
13544 } else if (OpBits < DestBits) {
13545 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13546 // bits, just sext from i32.
13547 if (NumSignBits > OpBits-MidBits)
13548 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13549 } else {
13550 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13551 // bits, just truncate to i32.
13552 if (NumSignBits > OpBits-MidBits)
13553 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13554 }
13555
13556 // fold (sext (truncate x)) -> (sextinreg x).
13557 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13558 N0.getValueType())) {
13559 if (OpBits < DestBits)
13560 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13561 else if (OpBits > DestBits)
13562 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13563 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13564 DAG.getValueType(N0.getValueType()));
13565 }
13566 }
13567
13568 // Try to simplify (sext (load x)).
13569 if (SDValue foldedExt =
13570 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13572 return foldedExt;
13573
13574 if (SDValue foldedExt =
13575 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13577 return foldedExt;
13578
13579 // fold (sext (load x)) to multiple smaller sextloads.
13580 // Only on illegal but splittable vectors.
13581 if (SDValue ExtLoad = CombineExtLoad(N))
13582 return ExtLoad;
13583
13584 // Try to simplify (sext (sextload x)).
13585 if (SDValue foldedExt = tryToFoldExtOfExtload(
13586 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13587 return foldedExt;
13588
13589 // Try to simplify (sext (atomic_load x)).
13590 if (SDValue foldedExt =
13591 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::SEXTLOAD))
13592 return foldedExt;
13593
13594 // fold (sext (and/or/xor (load x), cst)) ->
13595 // (and/or/xor (sextload x), (sext cst))
13596 if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13597 isa<LoadSDNode>(N0.getOperand(0)) &&
13598 N0.getOperand(1).getOpcode() == ISD::Constant &&
13599 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13600 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13601 EVT MemVT = LN00->getMemoryVT();
13602 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13603 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13605 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13606 ISD::SIGN_EXTEND, SetCCs, TLI);
13607 if (DoXform) {
13608 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13609 LN00->getChain(), LN00->getBasePtr(),
13610 LN00->getMemoryVT(),
13611 LN00->getMemOperand());
13613 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13614 ExtLoad, DAG.getConstant(Mask, DL, VT));
13615 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13616 bool NoReplaceTruncAnd = !N0.hasOneUse();
13617 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13618 CombineTo(N, And);
13619 // If N0 has multiple uses, change other uses as well.
13620 if (NoReplaceTruncAnd) {
13621 SDValue TruncAnd =
13623 CombineTo(N0.getNode(), TruncAnd);
13624 }
13625 if (NoReplaceTrunc) {
13626 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13627 } else {
13628 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13629 LN00->getValueType(0), ExtLoad);
13630 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13631 }
13632 return SDValue(N,0); // Return N so it doesn't get rechecked!
13633 }
13634 }
13635 }
13636
13637 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13638 return V;
13639
13640 if (SDValue V = foldSextSetcc(N))
13641 return V;
13642
13643 // fold (sext x) -> (zext x) if the sign bit is known zero.
13644 if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
13645 (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
13646 DAG.SignBitIsZero(N0)) {
13648 Flags.setNonNeg(true);
13649 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
13650 }
13651
13652 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13653 return NewVSel;
13654
13655 // Eliminate this sign extend by doing a negation in the destination type:
13656 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
13657 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
13661 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
13662 return DAG.getNegative(Zext, DL, VT);
13663 }
13664 // Eliminate this sign extend by doing a decrement in the destination type:
13665 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
13666 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
13670 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13671 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13672 }
13673
13674 // fold sext (not i1 X) -> add (zext i1 X), -1
13675 // TODO: This could be extended to handle bool vectors.
13676 if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
13677 (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
13678 TLI.isOperationLegal(ISD::ADD, VT)))) {
13679 // If we can eliminate the 'not', the sext form should be better
13680 if (SDValue NewXor = visitXOR(N0.getNode())) {
13681 // Returning N0 is a form of in-visit replacement that may have
13682 // invalidated N0.
13683 if (NewXor.getNode() == N0.getNode()) {
13684 // Return SDValue here as the xor should have already been replaced in
13685 // this sext.
13686 return SDValue();
13687 }
13688
13689 // Return a new sext with the new xor.
13690 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
13691 }
13692
13693 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13694 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13695 }
13696
13697 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13698 return Res;
13699
13700 return SDValue();
13701}
13702
13703/// Given an extending node with a pop-count operand, if the target does not
13704/// support a pop-count in the narrow source type but does support it in the
13705/// destination type, widen the pop-count to the destination type.
13706static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
13707 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
13708 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
13709
13710 SDValue CtPop = Extend->getOperand(0);
13711 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
13712 return SDValue();
13713
13714 EVT VT = Extend->getValueType(0);
13715 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13718 return SDValue();
13719
13720 // zext (ctpop X) --> ctpop (zext X)
13721 SDLoc DL(Extend);
13722 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
13723 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
13724}
13725
13726// If we have (zext (abs X)) where X is a type that will be promoted by type
13727// legalization, convert to (abs (sext X)). But don't extend past a legal type.
13728static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
13729 assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
13730
13731 EVT VT = Extend->getValueType(0);
13732 if (VT.isVector())
13733 return SDValue();
13734
13735 SDValue Abs = Extend->getOperand(0);
13736 if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
13737 return SDValue();
13738
13739 EVT AbsVT = Abs.getValueType();
13740 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13741 if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
13743 return SDValue();
13744
13745 EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
13746
13747 SDValue SExt =
13748 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
13749 SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
13750 return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
13751}
13752
13753SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
13754 SDValue N0 = N->getOperand(0);
13755 EVT VT = N->getValueType(0);
13756 SDLoc DL(N);
13757
13758 if (VT.isVector())
13759 if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13760 return FoldedVOp;
13761
13762 // zext(undef) = 0
13763 if (N0.isUndef())
13764 return DAG.getConstant(0, DL, VT);
13765
13766 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
13767 return Res;
13768
13769 // fold (zext (zext x)) -> (zext x)
13770 // fold (zext (aext x)) -> (zext x)
13771 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
13773 if (N0.getOpcode() == ISD::ZERO_EXTEND)
13774 Flags.setNonNeg(N0->getFlags().hasNonNeg());
13775 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0), Flags);
13776 }
13777
13778 // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13779 // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13783 N0.getOperand(0));
13784
13785 // fold (zext (truncate x)) -> (zext x) or
13786 // (zext (truncate x)) -> (truncate x)
13787 // This is valid when the truncated bits of x are already zero.
13788 SDValue Op;
13789 KnownBits Known;
13790 if (isTruncateOf(DAG, N0, Op, Known)) {
13791 APInt TruncatedBits =
13792 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
13793 APInt(Op.getScalarValueSizeInBits(), 0) :
13794 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
13796 std::min(Op.getScalarValueSizeInBits(),
13797 VT.getScalarSizeInBits()));
13798 if (TruncatedBits.isSubsetOf(Known.Zero)) {
13799 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13800 DAG.salvageDebugInfo(*N0.getNode());
13801
13802 return ZExtOrTrunc;
13803 }
13804 }
13805
13806 // fold (zext (truncate x)) -> (and x, mask)
13807 if (N0.getOpcode() == ISD::TRUNCATE) {
13808 // fold (zext (truncate (load x))) -> (zext (smaller load x))
13809 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
13810 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13811 SDNode *oye = N0.getOperand(0).getNode();
13812 if (NarrowLoad.getNode() != N0.getNode()) {
13813 CombineTo(N0.getNode(), NarrowLoad);
13814 // CombineTo deleted the truncate, if needed, but not what's under it.
13815 AddToWorklist(oye);
13816 }
13817 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13818 }
13819
13820 EVT SrcVT = N0.getOperand(0).getValueType();
13821 EVT MinVT = N0.getValueType();
13822
13823 if (N->getFlags().hasNonNeg()) {
13824 SDValue Op = N0.getOperand(0);
13825 unsigned OpBits = SrcVT.getScalarSizeInBits();
13826 unsigned MidBits = MinVT.getScalarSizeInBits();
13827 unsigned DestBits = VT.getScalarSizeInBits();
13828 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13829
13830 if (OpBits == DestBits) {
13831 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
13832 // bits, it is already ready.
13833 if (NumSignBits > DestBits - MidBits)
13834 return Op;
13835 } else if (OpBits < DestBits) {
13836 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
13837 // bits, just sext from i32.
13838 // FIXME: This can probably be ZERO_EXTEND nneg?
13839 if (NumSignBits > OpBits - MidBits)
13840 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13841 } else {
13842 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
13843 // bits, just truncate to i32.
13844 if (NumSignBits > OpBits - MidBits)
13845 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13846 }
13847 }
13848
13849 // Try to mask before the extension to avoid having to generate a larger mask,
13850 // possibly over several sub-vectors.
13851 if (SrcVT.bitsLT(VT) && VT.isVector()) {
13852 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
13854 SDValue Op = N0.getOperand(0);
13855 Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
13856 AddToWorklist(Op.getNode());
13857 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13858 // Transfer the debug info; the new node is equivalent to N0.
13859 DAG.transferDbgValues(N0, ZExtOrTrunc);
13860 return ZExtOrTrunc;
13861 }
13862 }
13863
13864 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
13865 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13866 AddToWorklist(Op.getNode());
13867 SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
13868 // We may safely transfer the debug info describing the truncate node over
13869 // to the equivalent and operation.
13870 DAG.transferDbgValues(N0, And);
13871 return And;
13872 }
13873 }
13874
13875 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
13876 // if either of the casts is not free.
13877 if (N0.getOpcode() == ISD::AND &&
13878 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13879 N0.getOperand(1).getOpcode() == ISD::Constant &&
13880 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
13881 !TLI.isZExtFree(N0.getValueType(), VT))) {
13882 SDValue X = N0.getOperand(0).getOperand(0);
13883 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
13885 return DAG.getNode(ISD::AND, DL, VT,
13886 X, DAG.getConstant(Mask, DL, VT));
13887 }
13888
13889 // Try to simplify (zext (load x)).
13890 if (SDValue foldedExt = tryToFoldExtOfLoad(
13891 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD,
13892 ISD::ZERO_EXTEND, N->getFlags().hasNonNeg()))
13893 return foldedExt;
13894
13895 if (SDValue foldedExt =
13896 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13898 return foldedExt;
13899
13900 // fold (zext (load x)) to multiple smaller zextloads.
13901 // Only on illegal but splittable vectors.
13902 if (SDValue ExtLoad = CombineExtLoad(N))
13903 return ExtLoad;
13904
13905 // Try to simplify (zext (atomic_load x)).
13906 if (SDValue foldedExt =
13907 tryToFoldExtOfAtomicLoad(DAG, TLI, VT, N0, ISD::ZEXTLOAD))
13908 return foldedExt;
13909
13910 // fold (zext (and/or/xor (load x), cst)) ->
13911 // (and/or/xor (zextload x), (zext cst))
13912 // Unless (and (load x) cst) will match as a zextload already and has
13913 // additional users, or the zext is already free.
13914 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
13915 isa<LoadSDNode>(N0.getOperand(0)) &&
13916 N0.getOperand(1).getOpcode() == ISD::Constant &&
13917 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13918 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13919 EVT MemVT = LN00->getMemoryVT();
13920 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
13921 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
13922 bool DoXform = true;
13924 if (!N0.hasOneUse()) {
13925 if (N0.getOpcode() == ISD::AND) {
13926 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
13927 EVT LoadResultTy = AndC->getValueType(0);
13928 EVT ExtVT;
13929 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
13930 DoXform = false;
13931 }
13932 }
13933 if (DoXform)
13934 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13935 ISD::ZERO_EXTEND, SetCCs, TLI);
13936 if (DoXform) {
13937 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
13938 LN00->getChain(), LN00->getBasePtr(),
13939 LN00->getMemoryVT(),
13940 LN00->getMemOperand());
13942 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13943 ExtLoad, DAG.getConstant(Mask, DL, VT));
13944 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13945 bool NoReplaceTruncAnd = !N0.hasOneUse();
13946 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13947 CombineTo(N, And);
13948 // If N0 has multiple uses, change other uses as well.
13949 if (NoReplaceTruncAnd) {
13950 SDValue TruncAnd =
13952 CombineTo(N0.getNode(), TruncAnd);
13953 }
13954 if (NoReplaceTrunc) {
13955 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13956 } else {
13957 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13958 LN00->getValueType(0), ExtLoad);
13959 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13960 }
13961 return SDValue(N,0); // Return N so it doesn't get rechecked!
13962 }
13963 }
13964 }
13965
13966 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13967 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13968 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
13969 return ZExtLoad;
13970
13971 // Try to simplify (zext (zextload x)).
13972 if (SDValue foldedExt = tryToFoldExtOfExtload(
13973 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
13974 return foldedExt;
13975
13976 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13977 return V;
13978
13979 if (N0.getOpcode() == ISD::SETCC) {
13980 // Propagate fast-math-flags.
13981 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13982
13983 // Only do this before legalize for now.
13984 if (!LegalOperations && VT.isVector() &&
13985 N0.getValueType().getVectorElementType() == MVT::i1) {
13986 EVT N00VT = N0.getOperand(0).getValueType();
13987 if (getSetCCResultType(N00VT) == N0.getValueType())
13988 return SDValue();
13989
13990 // We know that the # elements of the results is the same as the #
13991 // elements of the compare (and the # elements of the compare result for
13992 // that matter). Check to see that they are the same size. If so, we know
13993 // that the element size of the sext'd result matches the element size of
13994 // the compare operands.
13995 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
13996 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
13997 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
13998 N0.getOperand(1), N0.getOperand(2));
13999 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
14000 }
14001
14002 // If the desired elements are smaller or larger than the source
14003 // elements we can use a matching integer vector type and then
14004 // truncate/any extend followed by zext_in_reg.
14005 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14006 SDValue VsetCC =
14007 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
14008 N0.getOperand(1), N0.getOperand(2));
14009 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
14010 N0.getValueType());
14011 }
14012
14013 // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
14014 EVT N0VT = N0.getValueType();
14015 EVT N00VT = N0.getOperand(0).getValueType();
14016 if (SDValue SCC = SimplifySelectCC(
14017 DL, N0.getOperand(0), N0.getOperand(1),
14018 DAG.getBoolConstant(true, DL, N0VT, N00VT),
14019 DAG.getBoolConstant(false, DL, N0VT, N00VT),
14020 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14021 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
14022 }
14023
14024 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
14025 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
14026 !TLI.isZExtFree(N0, VT)) {
14027 SDValue ShVal = N0.getOperand(0);
14028 SDValue ShAmt = N0.getOperand(1);
14029 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
14030 if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
14031 if (N0.getOpcode() == ISD::SHL) {
14032 // If the original shl may be shifting out bits, do not perform this
14033 // transformation.
14034 unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
14035 ShVal.getOperand(0).getValueSizeInBits();
14036 if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
14037 // If the shift is too large, then see if we can deduce that the
14038 // shift is safe anyway.
14039 // Create a mask that has ones for the bits being shifted out.
14040 APInt ShiftOutMask =
14042 ShAmtC->getAPIntValue().getZExtValue());
14043
14044 // Check if the bits being shifted out are known to be zero.
14045 if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask))
14046 return SDValue();
14047 }
14048 }
14049
14050 // Ensure that the shift amount is wide enough for the shifted value.
14051 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
14052 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
14053
14054 return DAG.getNode(N0.getOpcode(), DL, VT,
14055 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
14056 }
14057 }
14058 }
14059
14060 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14061 return NewVSel;
14062
14063 if (SDValue NewCtPop = widenCtPop(N, DAG))
14064 return NewCtPop;
14065
14066 if (SDValue V = widenAbs(N, DAG))
14067 return V;
14068
14069 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14070 return Res;
14071
14072 // CSE zext nneg with sext if the zext is not free.
14073 if (N->getFlags().hasNonNeg() && !TLI.isZExtFree(N0.getValueType(), VT)) {
14074 SDNode *CSENode = DAG.getNodeIfExists(ISD::SIGN_EXTEND, N->getVTList(), N0);
14075 if (CSENode)
14076 return SDValue(CSENode, 0);
14077 }
14078
14079 return SDValue();
14080}
14081
14082SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
14083 SDValue N0 = N->getOperand(0);
14084 EVT VT = N->getValueType(0);
14085 SDLoc DL(N);
14086
14087 // aext(undef) = undef
14088 if (N0.isUndef())
14089 return DAG.getUNDEF(VT);
14090
14091 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14092 return Res;
14093
14094 // fold (aext (aext x)) -> (aext x)
14095 // fold (aext (zext x)) -> (zext x)
14096 // fold (aext (sext x)) -> (sext x)
14097 if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND ||
14098 N0.getOpcode() == ISD::SIGN_EXTEND) {
14100 if (N0.getOpcode() == ISD::ZERO_EXTEND)
14101 Flags.setNonNeg(N0->getFlags().hasNonNeg());
14102 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Flags);
14103 }
14104
14105 // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
14106 // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
14107 // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
14111 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14112
14113 // fold (aext (truncate (load x))) -> (aext (smaller load x))
14114 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
14115 if (N0.getOpcode() == ISD::TRUNCATE) {
14116 if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
14117 SDNode *oye = N0.getOperand(0).getNode();
14118 if (NarrowLoad.getNode() != N0.getNode()) {
14119 CombineTo(N0.getNode(), NarrowLoad);
14120 // CombineTo deleted the truncate, if needed, but not what's under it.
14121 AddToWorklist(oye);
14122 }
14123 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14124 }
14125 }
14126
14127 // fold (aext (truncate x))
14128 if (N0.getOpcode() == ISD::TRUNCATE)
14129 return DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
14130
14131 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
14132 // if the trunc is not free.
14133 if (N0.getOpcode() == ISD::AND &&
14134 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
14135 N0.getOperand(1).getOpcode() == ISD::Constant &&
14136 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
14137 SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
14138 SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
14139 assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
14140 return DAG.getNode(ISD::AND, DL, VT, X, Y);
14141 }
14142
14143 // fold (aext (load x)) -> (aext (truncate (extload x)))
14144 // None of the supported targets knows how to perform load and any_ext
14145 // on vectors in one instruction, so attempt to fold to zext instead.
14146 if (VT.isVector()) {
14147 // Try to simplify (zext (load x)).
14148 if (SDValue foldedExt =
14149 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
14151 return foldedExt;
14152 } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
14154 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14155 bool DoXform = true;
14157 if (!N0.hasOneUse())
14158 DoXform =
14159 ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
14160 if (DoXform) {
14161 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14162 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, LN0->getChain(),
14163 LN0->getBasePtr(), N0.getValueType(),
14164 LN0->getMemOperand());
14165 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
14166 // If the load value is used only by N, replace it via CombineTo N.
14167 bool NoReplaceTrunc = N0.hasOneUse();
14168 CombineTo(N, ExtLoad);
14169 if (NoReplaceTrunc) {
14170 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14171 recursivelyDeleteUnusedNodes(LN0);
14172 } else {
14173 SDValue Trunc =
14174 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14175 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14176 }
14177 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14178 }
14179 }
14180
14181 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14182 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14183 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
14184 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14185 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14186 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14187 ISD::LoadExtType ExtType = LN0->getExtensionType();
14188 EVT MemVT = LN0->getMemoryVT();
14189 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14190 SDValue ExtLoad =
14191 DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), LN0->getBasePtr(),
14192 MemVT, LN0->getMemOperand());
14193 CombineTo(N, ExtLoad);
14194 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14195 recursivelyDeleteUnusedNodes(LN0);
14196 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14197 }
14198 }
14199
14200 if (N0.getOpcode() == ISD::SETCC) {
14201 // Propagate fast-math-flags.
14202 SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14203
14204 // For vectors:
14205 // aext(setcc) -> vsetcc
14206 // aext(setcc) -> truncate(vsetcc)
14207 // aext(setcc) -> aext(vsetcc)
14208 // Only do this before legalize for now.
14209 if (VT.isVector() && !LegalOperations) {
14210 EVT N00VT = N0.getOperand(0).getValueType();
14211 if (getSetCCResultType(N00VT) == N0.getValueType())
14212 return SDValue();
14213
14214 // We know that the # elements of the results is the same as the
14215 // # elements of the compare (and the # elements of the compare result
14216 // for that matter). Check to see that they are the same size. If so,
14217 // we know that the element size of the sext'd result matches the
14218 // element size of the compare operands.
14219 if (VT.getSizeInBits() == N00VT.getSizeInBits())
14220 return DAG.getSetCC(DL, VT, N0.getOperand(0), N0.getOperand(1),
14221 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14222
14223 // If the desired elements are smaller or larger than the source
14224 // elements we can use a matching integer vector type and then
14225 // truncate/any extend
14226 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14227 SDValue VsetCC = DAG.getSetCC(
14228 DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1),
14229 cast<CondCodeSDNode>(N0.getOperand(2))->get());
14230 return DAG.getAnyExtOrTrunc(VsetCC, DL, VT);
14231 }
14232
14233 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14234 if (SDValue SCC = SimplifySelectCC(
14235 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14236 DAG.getConstant(0, DL, VT),
14237 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14238 return SCC;
14239 }
14240
14241 if (SDValue NewCtPop = widenCtPop(N, DAG))
14242 return NewCtPop;
14243
14244 if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14245 return Res;
14246
14247 return SDValue();
14248}
14249
14250SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14251 unsigned Opcode = N->getOpcode();
14252 SDValue N0 = N->getOperand(0);
14253 SDValue N1 = N->getOperand(1);
14254 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14255
14256 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14257 if (N0.getOpcode() == Opcode &&
14258 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14259 return N0;
14260
14261 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14262 N0.getOperand(0).getOpcode() == Opcode) {
14263 // We have an assert, truncate, assert sandwich. Make one stronger assert
14264 // by asserting on the smallest asserted type to the larger source type.
14265 // This eliminates the later assert:
14266 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14267 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14268 SDLoc DL(N);
14269 SDValue BigA = N0.getOperand(0);
14270 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14271 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14272 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14273 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14274 BigA.getOperand(0), MinAssertVTVal);
14275 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14276 }
14277
14278 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14279 // than X. Just move the AssertZext in front of the truncate and drop the
14280 // AssertSExt.
14281 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14283 Opcode == ISD::AssertZext) {
14284 SDValue BigA = N0.getOperand(0);
14285 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14286 if (AssertVT.bitsLT(BigA_AssertVT)) {
14287 SDLoc DL(N);
14288 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14289 BigA.getOperand(0), N1);
14290 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14291 }
14292 }
14293
14294 return SDValue();
14295}
14296
14297SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14298 SDLoc DL(N);
14299
14300 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14301 SDValue N0 = N->getOperand(0);
14302
14303 // Fold (assertalign (assertalign x, AL0), AL1) ->
14304 // (assertalign x, max(AL0, AL1))
14305 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14306 return DAG.getAssertAlign(DL, N0.getOperand(0),
14307 std::max(AL, AAN->getAlign()));
14308
14309 // In rare cases, there are trivial arithmetic ops in source operands. Sink
14310 // this assert down to source operands so that those arithmetic ops could be
14311 // exposed to the DAG combining.
14312 switch (N0.getOpcode()) {
14313 default:
14314 break;
14315 case ISD::ADD:
14316 case ISD::SUB: {
14317 unsigned AlignShift = Log2(AL);
14318 SDValue LHS = N0.getOperand(0);
14319 SDValue RHS = N0.getOperand(1);
14320 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14321 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14322 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14323 if (LHSAlignShift < AlignShift)
14324 LHS = DAG.getAssertAlign(DL, LHS, AL);
14325 if (RHSAlignShift < AlignShift)
14326 RHS = DAG.getAssertAlign(DL, RHS, AL);
14327 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14328 }
14329 break;
14330 }
14331 }
14332
14333 return SDValue();
14334}
14335
14336/// If the result of a load is shifted/masked/truncated to an effectively
14337/// narrower type, try to transform the load to a narrower type and/or
14338/// use an extending load.
14339SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14340 unsigned Opc = N->getOpcode();
14341
14343 SDValue N0 = N->getOperand(0);
14344 EVT VT = N->getValueType(0);
14345 EVT ExtVT = VT;
14346
14347 // This transformation isn't valid for vector loads.
14348 if (VT.isVector())
14349 return SDValue();
14350
14351 // The ShAmt variable is used to indicate that we've consumed a right
14352 // shift. I.e. we want to narrow the width of the load by skipping to load the
14353 // ShAmt least significant bits.
14354 unsigned ShAmt = 0;
14355 // A special case is when the least significant bits from the load are masked
14356 // away, but using an AND rather than a right shift. HasShiftedOffset is used
14357 // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14358 // the result.
14359 unsigned ShiftedOffset = 0;
14360 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14361 // extended to VT.
14362 if (Opc == ISD::SIGN_EXTEND_INREG) {
14363 ExtType = ISD::SEXTLOAD;
14364 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14365 } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14366 // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14367 // value, or it may be shifting a higher subword, half or byte into the
14368 // lowest bits.
14369
14370 // Only handle shift with constant shift amount, and the shiftee must be a
14371 // load.
14372 auto *LN = dyn_cast<LoadSDNode>(N0);
14373 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14374 if (!N1C || !LN)
14375 return SDValue();
14376 // If the shift amount is larger than the memory type then we're not
14377 // accessing any of the loaded bytes.
14378 ShAmt = N1C->getZExtValue();
14379 uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14380 if (MemoryWidth <= ShAmt)
14381 return SDValue();
14382 // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14383 ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14384 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14385 // If original load is a SEXTLOAD then we can't simply replace it by a
14386 // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14387 // followed by a ZEXT, but that is not handled at the moment). Similarly if
14388 // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14389 if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14390 LN->getExtensionType() == ISD::ZEXTLOAD) &&
14391 LN->getExtensionType() != ExtType)
14392 return SDValue();
14393 } else if (Opc == ISD::AND) {
14394 // An AND with a constant mask is the same as a truncate + zero-extend.
14395 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14396 if (!AndC)
14397 return SDValue();
14398
14399 const APInt &Mask = AndC->getAPIntValue();
14400 unsigned ActiveBits = 0;
14401 if (Mask.isMask()) {
14402 ActiveBits = Mask.countr_one();
14403 } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14404 ShiftedOffset = ShAmt;
14405 } else {
14406 return SDValue();
14407 }
14408
14409 ExtType = ISD::ZEXTLOAD;
14410 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14411 }
14412
14413 // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14414 // a right shift. Here we redo some of those checks, to possibly adjust the
14415 // ExtVT even further based on "a masking AND". We could also end up here for
14416 // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14417 // need to be done here as well.
14418 if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14419 SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14420 // Bail out when the SRL has more than one use. This is done for historical
14421 // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14422 // check below? And maybe it could be non-profitable to do the transform in
14423 // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14424 // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14425 if (!SRL.hasOneUse())
14426 return SDValue();
14427
14428 // Only handle shift with constant shift amount, and the shiftee must be a
14429 // load.
14430 auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14431 auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14432 if (!SRL1C || !LN)
14433 return SDValue();
14434
14435 // If the shift amount is larger than the input type then we're not
14436 // accessing any of the loaded bytes. If the load was a zextload/extload
14437 // then the result of the shift+trunc is zero/undef (handled elsewhere).
14438 ShAmt = SRL1C->getZExtValue();
14439 uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14440 if (ShAmt >= MemoryWidth)
14441 return SDValue();
14442
14443 // Because a SRL must be assumed to *need* to zero-extend the high bits
14444 // (as opposed to anyext the high bits), we can't combine the zextload
14445 // lowering of SRL and an sextload.
14446 if (LN->getExtensionType() == ISD::SEXTLOAD)
14447 return SDValue();
14448
14449 // Avoid reading outside the memory accessed by the original load (could
14450 // happened if we only adjust the load base pointer by ShAmt). Instead we
14451 // try to narrow the load even further. The typical scenario here is:
14452 // (i64 (truncate (i96 (srl (load x), 64)))) ->
14453 // (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14454 if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14455 // Don't replace sextload by zextload.
14456 if (ExtType == ISD::SEXTLOAD)
14457 return SDValue();
14458 // Narrow the load.
14459 ExtType = ISD::ZEXTLOAD;
14460 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14461 }
14462
14463 // If the SRL is only used by a masking AND, we may be able to adjust
14464 // the ExtVT to make the AND redundant.
14465 SDNode *Mask = *(SRL->use_begin());
14466 if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14467 isa<ConstantSDNode>(Mask->getOperand(1))) {
14468 unsigned Offset, ActiveBits;
14469 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14470 if (ShiftMask.isMask()) {
14471 EVT MaskedVT =
14472 EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14473 // If the mask is smaller, recompute the type.
14474 if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14475 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14476 ExtVT = MaskedVT;
14477 } else if (ExtType == ISD::ZEXTLOAD &&
14478 ShiftMask.isShiftedMask(Offset, ActiveBits) &&
14479 (Offset + ShAmt) < VT.getScalarSizeInBits()) {
14480 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14481 // If the mask is shifted we can use a narrower load and a shl to insert
14482 // the trailing zeros.
14483 if (((Offset + ActiveBits) <= ExtVT.getScalarSizeInBits()) &&
14484 TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT)) {
14485 ExtVT = MaskedVT;
14486 ShAmt = Offset + ShAmt;
14487 ShiftedOffset = Offset;
14488 }
14489 }
14490 }
14491
14492 N0 = SRL.getOperand(0);
14493 }
14494
14495 // If the load is shifted left (and the result isn't shifted back right), we
14496 // can fold a truncate through the shift. The typical scenario is that N
14497 // points at a TRUNCATE here so the attempted fold is:
14498 // (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14499 // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14500 unsigned ShLeftAmt = 0;
14501 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14502 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
14503 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14504 ShLeftAmt = N01->getZExtValue();
14505 N0 = N0.getOperand(0);
14506 }
14507 }
14508
14509 // If we haven't found a load, we can't narrow it.
14510 if (!isa<LoadSDNode>(N0))
14511 return SDValue();
14512
14513 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14514 // Reducing the width of a volatile load is illegal. For atomics, we may be
14515 // able to reduce the width provided we never widen again. (see D66309)
14516 if (!LN0->isSimple() ||
14517 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14518 return SDValue();
14519
14520 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14521 unsigned LVTStoreBits =
14523 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14524 return LVTStoreBits - EVTStoreBits - ShAmt;
14525 };
14526
14527 // We need to adjust the pointer to the load by ShAmt bits in order to load
14528 // the correct bytes.
14529 unsigned PtrAdjustmentInBits =
14530 DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14531
14532 uint64_t PtrOff = PtrAdjustmentInBits / 8;
14533 SDLoc DL(LN0);
14534 // The original load itself didn't wrap, so an offset within it doesn't.
14536 Flags.setNoUnsignedWrap(true);
14537 SDValue NewPtr = DAG.getMemBasePlusOffset(
14538 LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
14539 AddToWorklist(NewPtr.getNode());
14540
14541 SDValue Load;
14542 if (ExtType == ISD::NON_EXTLOAD)
14543 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14544 LN0->getPointerInfo().getWithOffset(PtrOff),
14545 LN0->getOriginalAlign(),
14546 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14547 else
14548 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14549 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14550 LN0->getOriginalAlign(),
14551 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14552
14553 // Replace the old load's chain with the new load's chain.
14554 WorklistRemover DeadNodes(*this);
14555 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14556
14557 // Shift the result left, if we've swallowed a left shift.
14559 if (ShLeftAmt != 0) {
14560 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
14561 if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
14562 ShImmTy = VT;
14563 // If the shift amount is as large as the result size (but, presumably,
14564 // no larger than the source) then the useful bits of the result are
14565 // zero; we can't simply return the shortened shift, because the result
14566 // of that operation is undefined.
14567 if (ShLeftAmt >= VT.getScalarSizeInBits())
14568 Result = DAG.getConstant(0, DL, VT);
14569 else
14570 Result = DAG.getNode(ISD::SHL, DL, VT,
14571 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
14572 }
14573
14574 if (ShiftedOffset != 0) {
14575 // We're using a shifted mask, so the load now has an offset. This means
14576 // that data has been loaded into the lower bytes than it would have been
14577 // before, so we need to shl the loaded data into the correct position in the
14578 // register.
14579 SDValue ShiftC = DAG.getConstant(ShiftedOffset, DL, VT);
14580 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14581 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14582 }
14583
14584 // Return the new loaded value.
14585 return Result;
14586}
14587
14588SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14589 SDValue N0 = N->getOperand(0);
14590 SDValue N1 = N->getOperand(1);
14591 EVT VT = N->getValueType(0);
14592 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14593 unsigned VTBits = VT.getScalarSizeInBits();
14594 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14595
14596 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14597 if (N0.isUndef())
14598 return DAG.getConstant(0, SDLoc(N), VT);
14599
14600 // fold (sext_in_reg c1) -> c1
14602 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
14603
14604 // If the input is already sign extended, just drop the extension.
14605 if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14606 return N0;
14607
14608 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14609 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14610 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14611 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
14612 N1);
14613
14614 // fold (sext_in_reg (sext x)) -> (sext x)
14615 // fold (sext_in_reg (aext x)) -> (sext x)
14616 // if x is small enough or if we know that x has more than 1 sign bit and the
14617 // sign_extend_inreg is extending from one of them.
14618 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14619 SDValue N00 = N0.getOperand(0);
14620 unsigned N00Bits = N00.getScalarValueSizeInBits();
14621 if ((N00Bits <= ExtVTBits ||
14622 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14623 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14624 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14625 }
14626
14627 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
14628 // if x is small enough or if we know that x has more than 1 sign bit and the
14629 // sign_extend_inreg is extending from one of them.
14631 SDValue N00 = N0.getOperand(0);
14632 unsigned N00Bits = N00.getScalarValueSizeInBits();
14633 unsigned DstElts = N0.getValueType().getVectorMinNumElements();
14634 unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
14635 bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
14636 APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
14637 if ((N00Bits == ExtVTBits ||
14638 (!IsZext && (N00Bits < ExtVTBits ||
14639 DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
14640 (!LegalOperations ||
14642 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
14643 }
14644
14645 // fold (sext_in_reg (zext x)) -> (sext x)
14646 // iff we are extending the source sign bit.
14647 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
14648 SDValue N00 = N0.getOperand(0);
14649 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
14650 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14651 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14652 }
14653
14654 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
14655 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
14656 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
14657
14658 // fold operands of sext_in_reg based on knowledge that the top bits are not
14659 // demanded.
14661 return SDValue(N, 0);
14662
14663 // fold (sext_in_reg (load x)) -> (smaller sextload x)
14664 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
14665 if (SDValue NarrowLoad = reduceLoadWidth(N))
14666 return NarrowLoad;
14667
14668 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
14669 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
14670 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
14671 if (N0.getOpcode() == ISD::SRL) {
14672 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
14673 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
14674 // We can turn this into an SRA iff the input to the SRL is already sign
14675 // extended enough.
14676 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
14677 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
14678 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
14679 N0.getOperand(1));
14680 }
14681 }
14682
14683 // fold (sext_inreg (extload x)) -> (sextload x)
14684 // If sextload is not supported by target, we can only do the combine when
14685 // load has one use. Doing otherwise can block folding the extload with other
14686 // extends that the target does support.
14687 if (ISD::isEXTLoad(N0.getNode()) &&
14689 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14690 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
14691 N0.hasOneUse()) ||
14692 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14693 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14694 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14695 LN0->getChain(),
14696 LN0->getBasePtr(), ExtVT,
14697 LN0->getMemOperand());
14698 CombineTo(N, ExtLoad);
14699 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14700 AddToWorklist(ExtLoad.getNode());
14701 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14702 }
14703
14704 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
14706 N0.hasOneUse() &&
14707 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14708 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
14709 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14710 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14711 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14712 LN0->getChain(),
14713 LN0->getBasePtr(), ExtVT,
14714 LN0->getMemOperand());
14715 CombineTo(N, ExtLoad);
14716 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14717 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14718 }
14719
14720 // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
14721 // ignore it if the masked load is already sign extended
14722 if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
14723 if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
14724 Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
14725 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
14726 SDValue ExtMaskedLoad = DAG.getMaskedLoad(
14727 VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
14728 Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
14729 Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
14730 CombineTo(N, ExtMaskedLoad);
14731 CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
14732 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14733 }
14734 }
14735
14736 // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
14737 if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
14738 if (SDValue(GN0, 0).hasOneUse() &&
14739 ExtVT == GN0->getMemoryVT() &&
14741 SDValue Ops[] = {GN0->getChain(), GN0->getPassThru(), GN0->getMask(),
14742 GN0->getBasePtr(), GN0->getIndex(), GN0->getScale()};
14743
14744 SDValue ExtLoad = DAG.getMaskedGather(
14745 DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
14746 GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
14747
14748 CombineTo(N, ExtLoad);
14749 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14750 AddToWorklist(ExtLoad.getNode());
14751 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14752 }
14753 }
14754
14755 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
14756 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
14757 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
14758 N0.getOperand(1), false))
14759 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
14760 }
14761
14762 // Fold (iM_signext_inreg
14763 // (extract_subvector (zext|anyext|sext iN_v to _) _)
14764 // from iN)
14765 // -> (extract_subvector (signext iN_v to iM))
14766 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
14768 SDValue InnerExt = N0.getOperand(0);
14769 EVT InnerExtVT = InnerExt->getValueType(0);
14770 SDValue Extendee = InnerExt->getOperand(0);
14771
14772 if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
14773 (!LegalOperations ||
14774 TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
14775 SDValue SignExtExtendee =
14776 DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
14777 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
14778 N0.getOperand(1));
14779 }
14780 }
14781
14782 return SDValue();
14783}
14784
14786 SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG,
14787 bool LegalOperations) {
14788 unsigned InregOpcode = N->getOpcode();
14789 unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
14790
14791 SDValue Src = N->getOperand(0);
14792 EVT VT = N->getValueType(0);
14793 EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
14794 Src.getValueType().getVectorElementType(),
14796
14797 assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
14798 "Expected EXTEND_VECTOR_INREG dag node in input!");
14799
14800 // Profitability check: our operand must be an one-use CONCAT_VECTORS.
14801 // FIXME: one-use check may be overly restrictive
14802 if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
14803 return SDValue();
14804
14805 // Profitability check: we must be extending exactly one of it's operands.
14806 // FIXME: this is probably overly restrictive.
14807 Src = Src.getOperand(0);
14808 if (Src.getValueType() != SrcVT)
14809 return SDValue();
14810
14811 if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
14812 return SDValue();
14813
14814 return DAG.getNode(Opcode, DL, VT, Src);
14815}
14816
14817SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
14818 SDValue N0 = N->getOperand(0);
14819 EVT VT = N->getValueType(0);
14820 SDLoc DL(N);
14821
14822 if (N0.isUndef()) {
14823 // aext_vector_inreg(undef) = undef because the top bits are undefined.
14824 // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
14825 return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
14826 ? DAG.getUNDEF(VT)
14827 : DAG.getConstant(0, DL, VT);
14828 }
14829
14830 if (SDValue Res = tryToFoldExtendOfConstant(N, DL, TLI, DAG, LegalTypes))
14831 return Res;
14832
14834 return SDValue(N, 0);
14835
14837 LegalOperations))
14838 return R;
14839
14840 return SDValue();
14841}
14842
14843SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
14844 SDValue N0 = N->getOperand(0);
14845 EVT VT = N->getValueType(0);
14846 EVT SrcVT = N0.getValueType();
14847 bool isLE = DAG.getDataLayout().isLittleEndian();
14848 SDLoc DL(N);
14849
14850 // trunc(undef) = undef
14851 if (N0.isUndef())
14852 return DAG.getUNDEF(VT);
14853
14854 // fold (truncate (truncate x)) -> (truncate x)
14855 if (N0.getOpcode() == ISD::TRUNCATE)
14856 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14857
14858 // fold (truncate c1) -> c1
14859 if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, DL, VT, {N0}))
14860 return C;
14861
14862 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
14863 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
14864 N0.getOpcode() == ISD::SIGN_EXTEND ||
14865 N0.getOpcode() == ISD::ANY_EXTEND) {
14866 // if the source is smaller than the dest, we still need an extend.
14867 if (N0.getOperand(0).getValueType().bitsLT(VT))
14868 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0));
14869 // if the source is larger than the dest, than we just need the truncate.
14870 if (N0.getOperand(0).getValueType().bitsGT(VT))
14871 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14872 // if the source and dest are the same type, we can drop both the extend
14873 // and the truncate.
14874 return N0.getOperand(0);
14875 }
14876
14877 // Try to narrow a truncate-of-sext_in_reg to the destination type:
14878 // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
14879 if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14880 N0.hasOneUse()) {
14881 SDValue X = N0.getOperand(0);
14882 SDValue ExtVal = N0.getOperand(1);
14883 EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
14884 if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
14885 SDValue TrX = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
14886 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, TrX, ExtVal);
14887 }
14888 }
14889
14890 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
14891 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
14892 return SDValue();
14893
14894 // Fold extract-and-trunc into a narrow extract. For example:
14895 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
14896 // i32 y = TRUNCATE(i64 x)
14897 // -- becomes --
14898 // v16i8 b = BITCAST (v2i64 val)
14899 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
14900 //
14901 // Note: We only run this optimization after type legalization (which often
14902 // creates this pattern) and before operation legalization after which
14903 // we need to be more careful about the vector instructions that we generate.
14904 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14905 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
14906 EVT VecTy = N0.getOperand(0).getValueType();
14907 EVT ExTy = N0.getValueType();
14908 EVT TrTy = N->getValueType(0);
14909
14910 auto EltCnt = VecTy.getVectorElementCount();
14911 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
14912 auto NewEltCnt = EltCnt * SizeRatio;
14913
14914 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
14915 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
14916
14917 SDValue EltNo = N0->getOperand(1);
14918 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
14919 int Elt = EltNo->getAsZExtVal();
14920 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
14921 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
14922 DAG.getBitcast(NVT, N0.getOperand(0)),
14924 }
14925 }
14926
14927 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
14928 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
14929 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
14930 TLI.isTruncateFree(SrcVT, VT)) {
14931 SDLoc SL(N0);
14932 SDValue Cond = N0.getOperand(0);
14933 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
14934 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
14935 return DAG.getNode(ISD::SELECT, DL, VT, Cond, TruncOp0, TruncOp1);
14936 }
14937 }
14938
14939 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
14940 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14941 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
14942 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
14943 SDValue Amt = N0.getOperand(1);
14944 KnownBits Known = DAG.computeKnownBits(Amt);
14945 unsigned Size = VT.getScalarSizeInBits();
14946 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
14947 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
14948 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14949 if (AmtVT != Amt.getValueType()) {
14950 Amt = DAG.getZExtOrTrunc(Amt, DL, AmtVT);
14951 AddToWorklist(Amt.getNode());
14952 }
14953 return DAG.getNode(ISD::SHL, DL, VT, Trunc, Amt);
14954 }
14955 }
14956
14957 if (SDValue V = foldSubToUSubSat(VT, N0.getNode(), DL))
14958 return V;
14959
14960 if (SDValue ABD = foldABSToABD(N, DL))
14961 return ABD;
14962
14963 // Attempt to pre-truncate BUILD_VECTOR sources.
14964 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
14965 N0.hasOneUse() &&
14966 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
14967 // Avoid creating illegal types if running after type legalizer.
14968 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
14969 EVT SVT = VT.getScalarType();
14970 SmallVector<SDValue, 8> TruncOps;
14971 for (const SDValue &Op : N0->op_values()) {
14972 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
14973 TruncOps.push_back(TruncOp);
14974 }
14975 return DAG.getBuildVector(VT, DL, TruncOps);
14976 }
14977
14978 // trunc (splat_vector x) -> splat_vector (trunc x)
14979 if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
14980 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
14981 (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
14982 EVT SVT = VT.getScalarType();
14983 return DAG.getSplatVector(
14984 VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
14985 }
14986
14987 // Fold a series of buildvector, bitcast, and truncate if possible.
14988 // For example fold
14989 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
14990 // (2xi32 (buildvector x, y)).
14991 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
14992 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
14994 N0.getOperand(0).hasOneUse()) {
14995 SDValue BuildVect = N0.getOperand(0);
14996 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
14997 EVT TruncVecEltTy = VT.getVectorElementType();
14998
14999 // Check that the element types match.
15000 if (BuildVectEltTy == TruncVecEltTy) {
15001 // Now we only need to compute the offset of the truncated elements.
15002 unsigned BuildVecNumElts = BuildVect.getNumOperands();
15003 unsigned TruncVecNumElts = VT.getVectorNumElements();
15004 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
15005
15006 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
15007 "Invalid number of elements");
15008
15010 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
15011 Opnds.push_back(BuildVect.getOperand(i));
15012
15013 return DAG.getBuildVector(VT, DL, Opnds);
15014 }
15015 }
15016
15017 // fold (truncate (load x)) -> (smaller load x)
15018 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
15019 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
15020 if (SDValue Reduced = reduceLoadWidth(N))
15021 return Reduced;
15022
15023 // Handle the case where the truncated result is at least as wide as the
15024 // loaded type.
15025 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
15026 auto *LN0 = cast<LoadSDNode>(N0);
15027 if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
15028 SDValue NewLoad = DAG.getExtLoad(
15029 LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
15030 LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
15031 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
15032 return NewLoad;
15033 }
15034 }
15035 }
15036
15037 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
15038 // where ... are all 'undef'.
15039 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
15041 SDValue V;
15042 unsigned Idx = 0;
15043 unsigned NumDefs = 0;
15044
15045 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
15046 SDValue X = N0.getOperand(i);
15047 if (!X.isUndef()) {
15048 V = X;
15049 Idx = i;
15050 NumDefs++;
15051 }
15052 // Stop if more than one members are non-undef.
15053 if (NumDefs > 1)
15054 break;
15055
15058 X.getValueType().getVectorElementCount()));
15059 }
15060
15061 if (NumDefs == 0)
15062 return DAG.getUNDEF(VT);
15063
15064 if (NumDefs == 1) {
15065 assert(V.getNode() && "The single defined operand is empty!");
15067 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
15068 if (i != Idx) {
15069 Opnds.push_back(DAG.getUNDEF(VTs[i]));
15070 continue;
15071 }
15072 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
15073 AddToWorklist(NV.getNode());
15074 Opnds.push_back(NV);
15075 }
15076 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
15077 }
15078 }
15079
15080 // Fold truncate of a bitcast of a vector to an extract of the low vector
15081 // element.
15082 //
15083 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
15084 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
15085 SDValue VecSrc = N0.getOperand(0);
15086 EVT VecSrcVT = VecSrc.getValueType();
15087 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
15088 (!LegalOperations ||
15089 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
15090 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
15091 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VecSrc,
15093 }
15094 }
15095
15096 // Simplify the operands using demanded-bits information.
15098 return SDValue(N, 0);
15099
15100 // fold (truncate (extract_subvector(ext x))) ->
15101 // (extract_subvector x)
15102 // TODO: This can be generalized to cover cases where the truncate and extract
15103 // do not fully cancel each other out.
15104 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
15105 SDValue N00 = N0.getOperand(0);
15106 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
15107 N00.getOpcode() == ISD::ZERO_EXTEND ||
15108 N00.getOpcode() == ISD::ANY_EXTEND) {
15109 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
15111 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
15112 N00.getOperand(0), N0.getOperand(1));
15113 }
15114 }
15115
15116 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15117 return NewVSel;
15118
15119 // Narrow a suitable binary operation with a non-opaque constant operand by
15120 // moving it ahead of the truncate. This is limited to pre-legalization
15121 // because targets may prefer a wider type during later combines and invert
15122 // this transform.
15123 switch (N0.getOpcode()) {
15124 case ISD::ADD:
15125 case ISD::SUB:
15126 case ISD::MUL:
15127 case ISD::AND:
15128 case ISD::OR:
15129 case ISD::XOR:
15130 if (!LegalOperations && N0.hasOneUse() &&
15131 (isConstantOrConstantVector(N0.getOperand(0), true) ||
15132 isConstantOrConstantVector(N0.getOperand(1), true))) {
15133 // TODO: We already restricted this to pre-legalization, but for vectors
15134 // we are extra cautious to not create an unsupported operation.
15135 // Target-specific changes are likely needed to avoid regressions here.
15136 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
15137 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15138 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15139 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
15140 }
15141 }
15142 break;
15143 case ISD::ADDE:
15144 case ISD::UADDO_CARRY:
15145 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
15146 // (trunc uaddo_carry(X, Y, Carry)) ->
15147 // (uaddo_carry trunc(X), trunc(Y), Carry)
15148 // When the adde's carry is not used.
15149 // We only do for uaddo_carry before legalize operation
15150 if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
15151 TLI.isOperationLegal(N0.getOpcode(), VT)) &&
15152 N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
15153 SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
15154 SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
15155 SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
15156 return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
15157 }
15158 break;
15159 case ISD::USUBSAT:
15160 // Truncate the USUBSAT only if LHS is a known zero-extension, its not
15161 // enough to know that the upper bits are zero we must ensure that we don't
15162 // introduce an extra truncate.
15163 if (!LegalOperations && N0.hasOneUse() &&
15166 VT.getScalarSizeInBits() &&
15167 hasOperation(N0.getOpcode(), VT)) {
15168 return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
15169 DAG, DL);
15170 }
15171 break;
15172 }
15173
15174 return SDValue();
15175}
15176
15177static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15178 SDValue Elt = N->getOperand(i);
15179 if (Elt.getOpcode() != ISD::MERGE_VALUES)
15180 return Elt.getNode();
15181 return Elt.getOperand(Elt.getResNo()).getNode();
15182}
15183
15184/// build_pair (load, load) -> load
15185/// if load locations are consecutive.
15186SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15187 assert(N->getOpcode() == ISD::BUILD_PAIR);
15188
15189 auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15190 auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15191
15192 // A BUILD_PAIR is always having the least significant part in elt 0 and the
15193 // most significant part in elt 1. So when combining into one large load, we
15194 // need to consider the endianness.
15195 if (DAG.getDataLayout().isBigEndian())
15196 std::swap(LD1, LD2);
15197
15198 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15199 !LD1->hasOneUse() || !LD2->hasOneUse() ||
15200 LD1->getAddressSpace() != LD2->getAddressSpace())
15201 return SDValue();
15202
15203 unsigned LD1Fast = 0;
15204 EVT LD1VT = LD1->getValueType(0);
15205 unsigned LD1Bytes = LD1VT.getStoreSize();
15206 if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15207 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15208 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15209 *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15210 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15211 LD1->getPointerInfo(), LD1->getAlign());
15212
15213 return SDValue();
15214}
15215
15216static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15217 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15218 // and Lo parts; on big-endian machines it doesn't.
15219 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15220}
15221
15222SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15223 const TargetLowering &TLI) {
15224 // If this is not a bitcast to an FP type or if the target doesn't have
15225 // IEEE754-compliant FP logic, we're done.
15226 EVT VT = N->getValueType(0);
15227 SDValue N0 = N->getOperand(0);
15228 EVT SourceVT = N0.getValueType();
15229
15230 if (!VT.isFloatingPoint())
15231 return SDValue();
15232
15233 // TODO: Handle cases where the integer constant is a different scalar
15234 // bitwidth to the FP.
15235 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15236 return SDValue();
15237
15238 unsigned FPOpcode;
15239 APInt SignMask;
15240 switch (N0.getOpcode()) {
15241 case ISD::AND:
15242 FPOpcode = ISD::FABS;
15243 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15244 break;
15245 case ISD::XOR:
15246 FPOpcode = ISD::FNEG;
15247 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15248 break;
15249 case ISD::OR:
15250 FPOpcode = ISD::FABS;
15251 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15252 break;
15253 default:
15254 return SDValue();
15255 }
15256
15257 if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15258 return SDValue();
15259
15260 // This needs to be the inverse of logic in foldSignChangeInBitcast.
15261 // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15262 // removing this would require more changes.
15263 auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15264 if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15265 return true;
15266
15267 return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15268 };
15269
15270 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15271 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15272 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15273 // fneg (fabs X)
15274 SDValue LogicOp0 = N0.getOperand(0);
15275 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15276 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15277 IsBitCastOrFree(LogicOp0, VT)) {
15278 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15279 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15280 NumFPLogicOpsConv++;
15281 if (N0.getOpcode() == ISD::OR)
15282 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15283 return FPOp;
15284 }
15285
15286 return SDValue();
15287}
15288
15289SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15290 SDValue N0 = N->getOperand(0);
15291 EVT VT = N->getValueType(0);
15292
15293 if (N0.isUndef())
15294 return DAG.getUNDEF(VT);
15295
15296 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15297 // Only do this before legalize types, unless both types are integer and the
15298 // scalar type is legal. Only do this before legalize ops, since the target
15299 // maybe depending on the bitcast.
15300 // First check to see if this is all constant.
15301 // TODO: Support FP bitcasts after legalize types.
15302 if (VT.isVector() &&
15303 (!LegalTypes ||
15304 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15305 TLI.isTypeLegal(VT.getVectorElementType()))) &&
15306 N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15307 cast<BuildVectorSDNode>(N0)->isConstant())
15308 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15310
15311 // If the input is a constant, let getNode fold it.
15312 if (isIntOrFPConstant(N0)) {
15313 // If we can't allow illegal operations, we need to check that this is just
15314 // a fp -> int or int -> conversion and that the resulting operation will
15315 // be legal.
15316 if (!LegalOperations ||
15317 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15319 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15320 TLI.isOperationLegal(ISD::Constant, VT))) {
15321 SDValue C = DAG.getBitcast(VT, N0);
15322 if (C.getNode() != N)
15323 return C;
15324 }
15325 }
15326
15327 // (conv (conv x, t1), t2) -> (conv x, t2)
15328 if (N0.getOpcode() == ISD::BITCAST)
15329 return DAG.getBitcast(VT, N0.getOperand(0));
15330
15331 // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15332 // iff the current bitwise logicop type isn't legal
15333 if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15334 !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15335 auto IsFreeBitcast = [VT](SDValue V) {
15336 return (V.getOpcode() == ISD::BITCAST &&
15337 V.getOperand(0).getValueType() == VT) ||
15339 V->hasOneUse());
15340 };
15341 if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15342 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15343 DAG.getBitcast(VT, N0.getOperand(0)),
15344 DAG.getBitcast(VT, N0.getOperand(1)));
15345 }
15346
15347 // fold (conv (load x)) -> (load (conv*)x)
15348 // If the resultant load doesn't need a higher alignment than the original!
15349 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15350 // Do not remove the cast if the types differ in endian layout.
15352 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15353 // If the load is volatile, we only want to change the load type if the
15354 // resulting load is legal. Otherwise we might increase the number of
15355 // memory accesses. We don't care if the original type was legal or not
15356 // as we assume software couldn't rely on the number of accesses of an
15357 // illegal type.
15358 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15359 TLI.isOperationLegal(ISD::LOAD, VT))) {
15360 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15361
15362 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15363 *LN0->getMemOperand())) {
15364 SDValue Load =
15365 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15366 LN0->getMemOperand());
15367 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15368 return Load;
15369 }
15370 }
15371
15372 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15373 return V;
15374
15375 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15376 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15377 //
15378 // For ppc_fp128:
15379 // fold (bitcast (fneg x)) ->
15380 // flipbit = signbit
15381 // (xor (bitcast x) (build_pair flipbit, flipbit))
15382 //
15383 // fold (bitcast (fabs x)) ->
15384 // flipbit = (and (extract_element (bitcast x), 0), signbit)
15385 // (xor (bitcast x) (build_pair flipbit, flipbit))
15386 // This often reduces constant pool loads.
15387 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15388 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15389 N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15390 !N0.getValueType().isVector()) {
15391 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15392 AddToWorklist(NewConv.getNode());
15393
15394 SDLoc DL(N);
15395 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15396 assert(VT.getSizeInBits() == 128);
15397 SDValue SignBit = DAG.getConstant(
15398 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15399 SDValue FlipBit;
15400 if (N0.getOpcode() == ISD::FNEG) {
15401 FlipBit = SignBit;
15402 AddToWorklist(FlipBit.getNode());
15403 } else {
15404 assert(N0.getOpcode() == ISD::FABS);
15405 SDValue Hi =
15406 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15408 SDLoc(NewConv)));
15409 AddToWorklist(Hi.getNode());
15410 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15411 AddToWorklist(FlipBit.getNode());
15412 }
15413 SDValue FlipBits =
15414 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15415 AddToWorklist(FlipBits.getNode());
15416 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15417 }
15418 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15419 if (N0.getOpcode() == ISD::FNEG)
15420 return DAG.getNode(ISD::XOR, DL, VT,
15421 NewConv, DAG.getConstant(SignBit, DL, VT));
15422 assert(N0.getOpcode() == ISD::FABS);
15423 return DAG.getNode(ISD::AND, DL, VT,
15424 NewConv, DAG.getConstant(~SignBit, DL, VT));
15425 }
15426
15427 // fold (bitconvert (fcopysign cst, x)) ->
15428 // (or (and (bitconvert x), sign), (and cst, (not sign)))
15429 // Note that we don't handle (copysign x, cst) because this can always be
15430 // folded to an fneg or fabs.
15431 //
15432 // For ppc_fp128:
15433 // fold (bitcast (fcopysign cst, x)) ->
15434 // flipbit = (and (extract_element
15435 // (xor (bitcast cst), (bitcast x)), 0),
15436 // signbit)
15437 // (xor (bitcast cst) (build_pair flipbit, flipbit))
15438 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15439 isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15440 !VT.isVector()) {
15441 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15442 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15443 if (isTypeLegal(IntXVT)) {
15444 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15445 AddToWorklist(X.getNode());
15446
15447 // If X has a different width than the result/lhs, sext it or truncate it.
15448 unsigned VTWidth = VT.getSizeInBits();
15449 if (OrigXWidth < VTWidth) {
15450 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15451 AddToWorklist(X.getNode());
15452 } else if (OrigXWidth > VTWidth) {
15453 // To get the sign bit in the right place, we have to shift it right
15454 // before truncating.
15455 SDLoc DL(X);
15456 X = DAG.getNode(ISD::SRL, DL,
15457 X.getValueType(), X,
15458 DAG.getConstant(OrigXWidth-VTWidth, DL,
15459 X.getValueType()));
15460 AddToWorklist(X.getNode());
15461 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15462 AddToWorklist(X.getNode());
15463 }
15464
15465 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15466 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15467 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15468 AddToWorklist(Cst.getNode());
15469 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15470 AddToWorklist(X.getNode());
15471 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15472 AddToWorklist(XorResult.getNode());
15473 SDValue XorResult64 = DAG.getNode(
15474 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15476 SDLoc(XorResult)));
15477 AddToWorklist(XorResult64.getNode());
15478 SDValue FlipBit =
15479 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15480 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15481 AddToWorklist(FlipBit.getNode());
15482 SDValue FlipBits =
15483 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15484 AddToWorklist(FlipBits.getNode());
15485 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15486 }
15487 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15488 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15489 X, DAG.getConstant(SignBit, SDLoc(X), VT));
15490 AddToWorklist(X.getNode());
15491
15492 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15493 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
15494 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
15495 AddToWorklist(Cst.getNode());
15496
15497 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
15498 }
15499 }
15500
15501 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
15502 if (N0.getOpcode() == ISD::BUILD_PAIR)
15503 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
15504 return CombineLD;
15505
15506 // Remove double bitcasts from shuffles - this is often a legacy of
15507 // XformToShuffleWithZero being used to combine bitmaskings (of
15508 // float vectors bitcast to integer vectors) into shuffles.
15509 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
15510 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
15511 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
15514 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
15515
15516 // If operands are a bitcast, peek through if it casts the original VT.
15517 // If operands are a constant, just bitcast back to original VT.
15518 auto PeekThroughBitcast = [&](SDValue Op) {
15519 if (Op.getOpcode() == ISD::BITCAST &&
15520 Op.getOperand(0).getValueType() == VT)
15521 return SDValue(Op.getOperand(0));
15522 if (Op.isUndef() || isAnyConstantBuildVector(Op))
15523 return DAG.getBitcast(VT, Op);
15524 return SDValue();
15525 };
15526
15527 // FIXME: If either input vector is bitcast, try to convert the shuffle to
15528 // the result type of this bitcast. This would eliminate at least one
15529 // bitcast. See the transform in InstCombine.
15530 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
15531 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
15532 if (!(SV0 && SV1))
15533 return SDValue();
15534
15535 int MaskScale =
15537 SmallVector<int, 8> NewMask;
15538 for (int M : SVN->getMask())
15539 for (int i = 0; i != MaskScale; ++i)
15540 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
15541
15542 SDValue LegalShuffle =
15543 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
15544 if (LegalShuffle)
15545 return LegalShuffle;
15546 }
15547
15548 return SDValue();
15549}
15550
15551SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
15552 EVT VT = N->getValueType(0);
15553 return CombineConsecutiveLoads(N, VT);
15554}
15555
15556SDValue DAGCombiner::visitFREEZE(SDNode *N) {
15557 SDValue N0 = N->getOperand(0);
15558
15559 if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
15560 return N0;
15561
15562 // We currently avoid folding freeze over SRA/SRL, due to the problems seen
15563 // with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
15564 // example https://reviews.llvm.org/D136529#4120959.
15565 if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
15566 return SDValue();
15567
15568 // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
15569 // Try to push freeze through instructions that propagate but don't produce
15570 // poison as far as possible. If an operand of freeze follows three
15571 // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
15572 // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
15573 // the freeze through to the operands that are not guaranteed non-poison.
15574 // NOTE: we will strip poison-generating flags, so ignore them here.
15575 if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
15576 /*ConsiderFlags*/ false) ||
15577 N0->getNumValues() != 1 || !N0->hasOneUse())
15578 return SDValue();
15579
15580 bool AllowMultipleMaybePoisonOperands =
15581 N0.getOpcode() == ISD::BUILD_VECTOR ||
15582 N0.getOpcode() == ISD::BUILD_PAIR ||
15585
15586 // Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
15587 // ones" or "constant" into something that depends on FrozenUndef. We can
15588 // instead pick undef values to keep those properties, while at the same time
15589 // folding away the freeze.
15590 // If we implement a more general solution for folding away freeze(undef) in
15591 // the future, then this special handling can be removed.
15592 if (N0.getOpcode() == ISD::BUILD_VECTOR) {
15593 SDLoc DL(N0);
15594 EVT VT = N0.getValueType();
15596 return DAG.getAllOnesConstant(DL, VT);
15599 for (const SDValue &Op : N0->op_values())
15600 NewVecC.push_back(
15601 Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
15602 return DAG.getBuildVector(VT, DL, NewVecC);
15603 }
15604 }
15605
15606 SmallSetVector<SDValue, 8> MaybePoisonOperands;
15607 for (SDValue Op : N0->ops()) {
15608 if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
15609 /*Depth*/ 1))
15610 continue;
15611 bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
15612 bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
15613 if (!HadMaybePoisonOperands)
15614 continue;
15615 if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
15616 // Multiple maybe-poison ops when not allowed - bail out.
15617 return SDValue();
15618 }
15619 }
15620 // NOTE: the whole op may be not guaranteed to not be undef or poison because
15621 // it could create undef or poison due to it's poison-generating flags.
15622 // So not finding any maybe-poison operands is fine.
15623
15624 for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
15625 // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
15626 if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
15627 continue;
15628 // First, freeze each offending operand.
15629 SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
15630 // Then, change all other uses of unfrozen operand to use frozen operand.
15631 DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
15632 if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
15633 FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
15634 // But, that also updated the use in the freeze we just created, thus
15635 // creating a cycle in a DAG. Let's undo that by mutating the freeze.
15636 DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
15637 MaybePoisonOperand);
15638 }
15639 }
15640
15641 // This node has been merged with another.
15642 if (N->getOpcode() == ISD::DELETED_NODE)
15643 return SDValue(N, 0);
15644
15645 // The whole node may have been updated, so the value we were holding
15646 // may no longer be valid. Re-fetch the operand we're `freeze`ing.
15647 N0 = N->getOperand(0);
15648
15649 // Finally, recreate the node, it's operands were updated to use
15650 // frozen operands, so we just need to use it's "original" operands.
15651 SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
15652 // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
15653 for (SDValue &Op : Ops) {
15654 if (Op.getOpcode() == ISD::UNDEF)
15655 Op = DAG.getFreeze(Op);
15656 }
15657
15658 SDValue R;
15659 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N0)) {
15660 // Special case handling for ShuffleVectorSDNode nodes.
15661 R = DAG.getVectorShuffle(N0.getValueType(), SDLoc(N0), Ops[0], Ops[1],
15662 SVN->getMask());
15663 } else {
15664 // NOTE: this strips poison generating flags.
15665 R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15666 }
15667 assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
15668 "Can't create node that may be undef/poison!");
15669 return R;
15670}
15671
15672/// We know that BV is a build_vector node with Constant, ConstantFP or Undef
15673/// operands. DstEltVT indicates the destination element value type.
15674SDValue DAGCombiner::
15675ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
15676 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
15677
15678 // If this is already the right type, we're done.
15679 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
15680
15681 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
15682 unsigned DstBitSize = DstEltVT.getSizeInBits();
15683
15684 // If this is a conversion of N elements of one type to N elements of another
15685 // type, convert each element. This handles FP<->INT cases.
15686 if (SrcBitSize == DstBitSize) {
15688 for (SDValue Op : BV->op_values()) {
15689 // If the vector element type is not legal, the BUILD_VECTOR operands
15690 // are promoted and implicitly truncated. Make that explicit here.
15691 if (Op.getValueType() != SrcEltVT)
15692 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
15693 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
15694 AddToWorklist(Ops.back().getNode());
15695 }
15696 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
15698 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
15699 }
15700
15701 // Otherwise, we're growing or shrinking the elements. To avoid having to
15702 // handle annoying details of growing/shrinking FP values, we convert them to
15703 // int first.
15704 if (SrcEltVT.isFloatingPoint()) {
15705 // Convert the input float vector to a int vector where the elements are the
15706 // same sizes.
15707 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
15708 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
15709 SrcEltVT = IntVT;
15710 }
15711
15712 // Now we know the input is an integer vector. If the output is a FP type,
15713 // convert to integer first, then to FP of the right size.
15714 if (DstEltVT.isFloatingPoint()) {
15715 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
15716 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
15717
15718 // Next, convert to FP elements of the same size.
15719 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
15720 }
15721
15722 // Okay, we know the src/dst types are both integers of differing types.
15723 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
15724
15725 // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
15726 // BuildVectorSDNode?
15727 auto *BVN = cast<BuildVectorSDNode>(BV);
15728
15729 // Extract the constant raw bit data.
15730 BitVector UndefElements;
15731 SmallVector<APInt> RawBits;
15732 bool IsLE = DAG.getDataLayout().isLittleEndian();
15733 if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
15734 return SDValue();
15735
15736 SDLoc DL(BV);
15738 for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
15739 if (UndefElements[I])
15740 Ops.push_back(DAG.getUNDEF(DstEltVT));
15741 else
15742 Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
15743 }
15744
15745 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
15746 return DAG.getBuildVector(VT, DL, Ops);
15747}
15748
15749// Returns true if floating point contraction is allowed on the FMUL-SDValue
15750// `N`
15752 assert(N.getOpcode() == ISD::FMUL);
15753
15754 return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
15755 N->getFlags().hasAllowContract();
15756}
15757
15758// Returns true if `N` can assume no infinities involved in its computation.
15760 return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
15761}
15762
15763/// Try to perform FMA combining on a given FADD node.
15764template <class MatchContextClass>
15765SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
15766 SDValue N0 = N->getOperand(0);
15767 SDValue N1 = N->getOperand(1);
15768 EVT VT = N->getValueType(0);
15769 SDLoc SL(N);
15770 MatchContextClass matcher(DAG, TLI, N);
15771 const TargetOptions &Options = DAG.getTarget().Options;
15772
15773 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15774
15775 // Floating-point multiply-add with intermediate rounding.
15776 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15777 // FIXME: Add VP_FMAD opcode.
15778 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15779
15780 // Floating-point multiply-add without intermediate rounding.
15781 bool HasFMA =
15783 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15784
15785 // No valid opcode, do not combine.
15786 if (!HasFMAD && !HasFMA)
15787 return SDValue();
15788
15789 bool CanReassociate =
15790 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15791 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15792 Options.UnsafeFPMath || HasFMAD);
15793 // If the addition is not contractable, do not combine.
15794 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15795 return SDValue();
15796
15797 // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
15798 // beneficial. It does not reduce latency. It increases register pressure. It
15799 // replaces an fadd with an fma which is a more complex instruction, so is
15800 // likely to have a larger encoding, use more functional units, etc.
15801 if (N0 == N1)
15802 return SDValue();
15803
15804 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15805 return SDValue();
15806
15807 // Always prefer FMAD to FMA for precision.
15808 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15810
15811 auto isFusedOp = [&](SDValue N) {
15812 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15813 };
15814
15815 // Is the node an FMUL and contractable either due to global flags or
15816 // SDNodeFlags.
15817 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15818 if (!matcher.match(N, ISD::FMUL))
15819 return false;
15820 return AllowFusionGlobally || N->getFlags().hasAllowContract();
15821 };
15822 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
15823 // prefer to fold the multiply with fewer uses.
15825 if (N0->use_size() > N1->use_size())
15826 std::swap(N0, N1);
15827 }
15828
15829 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
15830 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
15831 return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15832 N0.getOperand(1), N1);
15833 }
15834
15835 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
15836 // Note: Commutes FADD operands.
15837 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
15838 return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
15839 N1.getOperand(1), N0);
15840 }
15841
15842 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
15843 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
15844 // This also works with nested fma instructions:
15845 // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
15846 // fma A, B, (fma C, D, fma (E, F, G))
15847 // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
15848 // fma A, B, (fma C, D, fma (E, F, G)).
15849 // This requires reassociation because it changes the order of operations.
15850 if (CanReassociate) {
15851 SDValue FMA, E;
15852 if (isFusedOp(N0) && N0.hasOneUse()) {
15853 FMA = N0;
15854 E = N1;
15855 } else if (isFusedOp(N1) && N1.hasOneUse()) {
15856 FMA = N1;
15857 E = N0;
15858 }
15859
15860 SDValue TmpFMA = FMA;
15861 while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
15862 SDValue FMul = TmpFMA->getOperand(2);
15863 if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
15864 SDValue C = FMul.getOperand(0);
15865 SDValue D = FMul.getOperand(1);
15866 SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
15868 // Replacing the inner FMul could cause the outer FMA to be simplified
15869 // away.
15870 return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
15871 }
15872
15873 TmpFMA = TmpFMA->getOperand(2);
15874 }
15875 }
15876
15877 // Look through FP_EXTEND nodes to do more combining.
15878
15879 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
15880 if (matcher.match(N0, ISD::FP_EXTEND)) {
15881 SDValue N00 = N0.getOperand(0);
15882 if (isContractableFMUL(N00) &&
15883 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15884 N00.getValueType())) {
15885 return matcher.getNode(
15886 PreferredFusedOpcode, SL, VT,
15887 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15888 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
15889 }
15890 }
15891
15892 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
15893 // Note: Commutes FADD operands.
15894 if (matcher.match(N1, ISD::FP_EXTEND)) {
15895 SDValue N10 = N1.getOperand(0);
15896 if (isContractableFMUL(N10) &&
15897 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15898 N10.getValueType())) {
15899 return matcher.getNode(
15900 PreferredFusedOpcode, SL, VT,
15901 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
15902 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15903 }
15904 }
15905
15906 // More folding opportunities when target permits.
15907 if (Aggressive) {
15908 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
15909 // -> (fma x, y, (fma (fpext u), (fpext v), z))
15910 auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15911 SDValue Z) {
15912 return matcher.getNode(
15913 PreferredFusedOpcode, SL, VT, X, Y,
15914 matcher.getNode(PreferredFusedOpcode, SL, VT,
15915 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15916 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15917 };
15918 if (isFusedOp(N0)) {
15919 SDValue N02 = N0.getOperand(2);
15920 if (matcher.match(N02, ISD::FP_EXTEND)) {
15921 SDValue N020 = N02.getOperand(0);
15922 if (isContractableFMUL(N020) &&
15923 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15924 N020.getValueType())) {
15925 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
15926 N020.getOperand(0), N020.getOperand(1),
15927 N1);
15928 }
15929 }
15930 }
15931
15932 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
15933 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
15934 // FIXME: This turns two single-precision and one double-precision
15935 // operation into two double-precision operations, which might not be
15936 // interesting for all targets, especially GPUs.
15937 auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15938 SDValue Z) {
15939 return matcher.getNode(
15940 PreferredFusedOpcode, SL, VT,
15941 matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
15942 matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
15943 matcher.getNode(PreferredFusedOpcode, SL, VT,
15944 matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15945 matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15946 };
15947 if (N0.getOpcode() == ISD::FP_EXTEND) {
15948 SDValue N00 = N0.getOperand(0);
15949 if (isFusedOp(N00)) {
15950 SDValue N002 = N00.getOperand(2);
15951 if (isContractableFMUL(N002) &&
15952 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15953 N00.getValueType())) {
15954 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
15955 N002.getOperand(0), N002.getOperand(1),
15956 N1);
15957 }
15958 }
15959 }
15960
15961 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
15962 // -> (fma y, z, (fma (fpext u), (fpext v), x))
15963 if (isFusedOp(N1)) {
15964 SDValue N12 = N1.getOperand(2);
15965 if (N12.getOpcode() == ISD::FP_EXTEND) {
15966 SDValue N120 = N12.getOperand(0);
15967 if (isContractableFMUL(N120) &&
15968 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15969 N120.getValueType())) {
15970 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
15971 N120.getOperand(0), N120.getOperand(1),
15972 N0);
15973 }
15974 }
15975 }
15976
15977 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
15978 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
15979 // FIXME: This turns two single-precision and one double-precision
15980 // operation into two double-precision operations, which might not be
15981 // interesting for all targets, especially GPUs.
15982 if (N1.getOpcode() == ISD::FP_EXTEND) {
15983 SDValue N10 = N1.getOperand(0);
15984 if (isFusedOp(N10)) {
15985 SDValue N102 = N10.getOperand(2);
15986 if (isContractableFMUL(N102) &&
15987 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15988 N10.getValueType())) {
15989 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
15990 N102.getOperand(0), N102.getOperand(1),
15991 N0);
15992 }
15993 }
15994 }
15995 }
15996
15997 return SDValue();
15998}
15999
16000/// Try to perform FMA combining on a given FSUB node.
16001template <class MatchContextClass>
16002SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
16003 SDValue N0 = N->getOperand(0);
16004 SDValue N1 = N->getOperand(1);
16005 EVT VT = N->getValueType(0);
16006 SDLoc SL(N);
16007 MatchContextClass matcher(DAG, TLI, N);
16008 const TargetOptions &Options = DAG.getTarget().Options;
16009
16010 bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
16011
16012 // Floating-point multiply-add with intermediate rounding.
16013 // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
16014 // FIXME: Add VP_FMAD opcode.
16015 bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
16016
16017 // Floating-point multiply-add without intermediate rounding.
16018 bool HasFMA =
16020 (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
16021
16022 // No valid opcode, do not combine.
16023 if (!HasFMAD && !HasFMA)
16024 return SDValue();
16025
16026 const SDNodeFlags Flags = N->getFlags();
16027 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
16028 Options.UnsafeFPMath || HasFMAD);
16029
16030 // If the subtraction is not contractable, do not combine.
16031 if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
16032 return SDValue();
16033
16034 if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
16035 return SDValue();
16036
16037 // Always prefer FMAD to FMA for precision.
16038 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16040 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
16041
16042 // Is the node an FMUL and contractable either due to global flags or
16043 // SDNodeFlags.
16044 auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
16045 if (!matcher.match(N, ISD::FMUL))
16046 return false;
16047 return AllowFusionGlobally || N->getFlags().hasAllowContract();
16048 };
16049
16050 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16051 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
16052 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
16053 return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
16054 XY.getOperand(1),
16055 matcher.getNode(ISD::FNEG, SL, VT, Z));
16056 }
16057 return SDValue();
16058 };
16059
16060 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16061 // Note: Commutes FSUB operands.
16062 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
16063 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
16064 return matcher.getNode(
16065 PreferredFusedOpcode, SL, VT,
16066 matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
16067 YZ.getOperand(1), X);
16068 }
16069 return SDValue();
16070 };
16071
16072 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
16073 // prefer to fold the multiply with fewer uses.
16074 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
16075 (N0->use_size() > N1->use_size())) {
16076 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
16077 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16078 return V;
16079 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
16080 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16081 return V;
16082 } else {
16083 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
16084 if (SDValue V = tryToFoldXYSubZ(N0, N1))
16085 return V;
16086 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
16087 if (SDValue V = tryToFoldXSubYZ(N0, N1))
16088 return V;
16089 }
16090
16091 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
16092 if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
16093 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
16094 SDValue N00 = N0.getOperand(0).getOperand(0);
16095 SDValue N01 = N0.getOperand(0).getOperand(1);
16096 return matcher.getNode(PreferredFusedOpcode, SL, VT,
16097 matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
16098 matcher.getNode(ISD::FNEG, SL, VT, N1));
16099 }
16100
16101 // Look through FP_EXTEND nodes to do more combining.
16102
16103 // fold (fsub (fpext (fmul x, y)), z)
16104 // -> (fma (fpext x), (fpext y), (fneg z))
16105 if (matcher.match(N0, ISD::FP_EXTEND)) {
16106 SDValue N00 = N0.getOperand(0);
16107 if (isContractableFMUL(N00) &&
16108 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16109 N00.getValueType())) {
16110 return matcher.getNode(
16111 PreferredFusedOpcode, SL, VT,
16112 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16113 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16114 matcher.getNode(ISD::FNEG, SL, VT, N1));
16115 }
16116 }
16117
16118 // fold (fsub x, (fpext (fmul y, z)))
16119 // -> (fma (fneg (fpext y)), (fpext z), x)
16120 // Note: Commutes FSUB operands.
16121 if (matcher.match(N1, ISD::FP_EXTEND)) {
16122 SDValue N10 = N1.getOperand(0);
16123 if (isContractableFMUL(N10) &&
16124 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16125 N10.getValueType())) {
16126 return matcher.getNode(
16127 PreferredFusedOpcode, SL, VT,
16128 matcher.getNode(
16129 ISD::FNEG, SL, VT,
16130 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
16131 matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
16132 }
16133 }
16134
16135 // fold (fsub (fpext (fneg (fmul, x, y))), z)
16136 // -> (fneg (fma (fpext x), (fpext y), z))
16137 // Note: This could be removed with appropriate canonicalization of the
16138 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16139 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16140 // from implementing the canonicalization in visitFSUB.
16141 if (matcher.match(N0, ISD::FP_EXTEND)) {
16142 SDValue N00 = N0.getOperand(0);
16143 if (matcher.match(N00, ISD::FNEG)) {
16144 SDValue N000 = N00.getOperand(0);
16145 if (isContractableFMUL(N000) &&
16146 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16147 N00.getValueType())) {
16148 return matcher.getNode(
16149 ISD::FNEG, SL, VT,
16150 matcher.getNode(
16151 PreferredFusedOpcode, SL, VT,
16152 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16153 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16154 N1));
16155 }
16156 }
16157 }
16158
16159 // fold (fsub (fneg (fpext (fmul, x, y))), z)
16160 // -> (fneg (fma (fpext x)), (fpext y), z)
16161 // Note: This could be removed with appropriate canonicalization of the
16162 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
16163 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
16164 // from implementing the canonicalization in visitFSUB.
16165 if (matcher.match(N0, ISD::FNEG)) {
16166 SDValue N00 = N0.getOperand(0);
16167 if (matcher.match(N00, ISD::FP_EXTEND)) {
16168 SDValue N000 = N00.getOperand(0);
16169 if (isContractableFMUL(N000) &&
16170 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16171 N000.getValueType())) {
16172 return matcher.getNode(
16173 ISD::FNEG, SL, VT,
16174 matcher.getNode(
16175 PreferredFusedOpcode, SL, VT,
16176 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
16177 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
16178 N1));
16179 }
16180 }
16181 }
16182
16183 auto isReassociable = [&Options](SDNode *N) {
16184 return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16185 };
16186
16187 auto isContractableAndReassociableFMUL = [&isContractableFMUL,
16188 &isReassociable](SDValue N) {
16189 return isContractableFMUL(N) && isReassociable(N.getNode());
16190 };
16191
16192 auto isFusedOp = [&](SDValue N) {
16193 return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
16194 };
16195
16196 // More folding opportunities when target permits.
16197 if (Aggressive && isReassociable(N)) {
16198 bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
16199 // fold (fsub (fma x, y, (fmul u, v)), z)
16200 // -> (fma x, y (fma u, v, (fneg z)))
16201 if (CanFuse && isFusedOp(N0) &&
16202 isContractableAndReassociableFMUL(N0.getOperand(2)) &&
16203 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
16204 return matcher.getNode(
16205 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16206 matcher.getNode(PreferredFusedOpcode, SL, VT,
16207 N0.getOperand(2).getOperand(0),
16208 N0.getOperand(2).getOperand(1),
16209 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16210 }
16211
16212 // fold (fsub x, (fma y, z, (fmul u, v)))
16213 // -> (fma (fneg y), z, (fma (fneg u), v, x))
16214 if (CanFuse && isFusedOp(N1) &&
16215 isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16216 N1->hasOneUse() && NoSignedZero) {
16217 SDValue N20 = N1.getOperand(2).getOperand(0);
16218 SDValue N21 = N1.getOperand(2).getOperand(1);
16219 return matcher.getNode(
16220 PreferredFusedOpcode, SL, VT,
16221 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16222 N1.getOperand(1),
16223 matcher.getNode(PreferredFusedOpcode, SL, VT,
16224 matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16225 }
16226
16227 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16228 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16229 if (isFusedOp(N0) && N0->hasOneUse()) {
16230 SDValue N02 = N0.getOperand(2);
16231 if (matcher.match(N02, ISD::FP_EXTEND)) {
16232 SDValue N020 = N02.getOperand(0);
16233 if (isContractableAndReassociableFMUL(N020) &&
16234 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16235 N020.getValueType())) {
16236 return matcher.getNode(
16237 PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16238 matcher.getNode(
16239 PreferredFusedOpcode, SL, VT,
16240 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16241 matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16242 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16243 }
16244 }
16245 }
16246
16247 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16248 // -> (fma (fpext x), (fpext y),
16249 // (fma (fpext u), (fpext v), (fneg z)))
16250 // FIXME: This turns two single-precision and one double-precision
16251 // operation into two double-precision operations, which might not be
16252 // interesting for all targets, especially GPUs.
16253 if (matcher.match(N0, ISD::FP_EXTEND)) {
16254 SDValue N00 = N0.getOperand(0);
16255 if (isFusedOp(N00)) {
16256 SDValue N002 = N00.getOperand(2);
16257 if (isContractableAndReassociableFMUL(N002) &&
16258 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16259 N00.getValueType())) {
16260 return matcher.getNode(
16261 PreferredFusedOpcode, SL, VT,
16262 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16263 matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16264 matcher.getNode(
16265 PreferredFusedOpcode, SL, VT,
16266 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16267 matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16268 matcher.getNode(ISD::FNEG, SL, VT, N1)));
16269 }
16270 }
16271 }
16272
16273 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16274 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16275 if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16276 N1->hasOneUse()) {
16277 SDValue N120 = N1.getOperand(2).getOperand(0);
16278 if (isContractableAndReassociableFMUL(N120) &&
16279 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16280 N120.getValueType())) {
16281 SDValue N1200 = N120.getOperand(0);
16282 SDValue N1201 = N120.getOperand(1);
16283 return matcher.getNode(
16284 PreferredFusedOpcode, SL, VT,
16285 matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16286 N1.getOperand(1),
16287 matcher.getNode(
16288 PreferredFusedOpcode, SL, VT,
16289 matcher.getNode(ISD::FNEG, SL, VT,
16290 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16291 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16292 }
16293 }
16294
16295 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16296 // -> (fma (fneg (fpext y)), (fpext z),
16297 // (fma (fneg (fpext u)), (fpext v), x))
16298 // FIXME: This turns two single-precision and one double-precision
16299 // operation into two double-precision operations, which might not be
16300 // interesting for all targets, especially GPUs.
16301 if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16302 SDValue CvtSrc = N1.getOperand(0);
16303 SDValue N100 = CvtSrc.getOperand(0);
16304 SDValue N101 = CvtSrc.getOperand(1);
16305 SDValue N102 = CvtSrc.getOperand(2);
16306 if (isContractableAndReassociableFMUL(N102) &&
16307 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16308 CvtSrc.getValueType())) {
16309 SDValue N1020 = N102.getOperand(0);
16310 SDValue N1021 = N102.getOperand(1);
16311 return matcher.getNode(
16312 PreferredFusedOpcode, SL, VT,
16313 matcher.getNode(ISD::FNEG, SL, VT,
16314 matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16315 matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16316 matcher.getNode(
16317 PreferredFusedOpcode, SL, VT,
16318 matcher.getNode(ISD::FNEG, SL, VT,
16319 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16320 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16321 }
16322 }
16323 }
16324
16325 return SDValue();
16326}
16327
16328/// Try to perform FMA combining on a given FMUL node based on the distributive
16329/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16330/// subtraction instead of addition).
16331SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16332 SDValue N0 = N->getOperand(0);
16333 SDValue N1 = N->getOperand(1);
16334 EVT VT = N->getValueType(0);
16335 SDLoc SL(N);
16336
16337 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16338
16339 const TargetOptions &Options = DAG.getTarget().Options;
16340
16341 // The transforms below are incorrect when x == 0 and y == inf, because the
16342 // intermediate multiplication produces a nan.
16343 SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16344 if (!hasNoInfs(Options, FAdd))
16345 return SDValue();
16346
16347 // Floating-point multiply-add without intermediate rounding.
16348 bool HasFMA =
16351 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
16352
16353 // Floating-point multiply-add with intermediate rounding. This can result
16354 // in a less precise result due to the changed rounding order.
16355 bool HasFMAD = Options.UnsafeFPMath &&
16356 (LegalOperations && TLI.isFMADLegal(DAG, N));
16357
16358 // No valid opcode, do not combine.
16359 if (!HasFMAD && !HasFMA)
16360 return SDValue();
16361
16362 // Always prefer FMAD to FMA for precision.
16363 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16365
16366 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16367 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16368 auto FuseFADD = [&](SDValue X, SDValue Y) {
16369 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16370 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16371 if (C->isExactlyValue(+1.0))
16372 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16373 Y);
16374 if (C->isExactlyValue(-1.0))
16375 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16376 DAG.getNode(ISD::FNEG, SL, VT, Y));
16377 }
16378 }
16379 return SDValue();
16380 };
16381
16382 if (SDValue FMA = FuseFADD(N0, N1))
16383 return FMA;
16384 if (SDValue FMA = FuseFADD(N1, N0))
16385 return FMA;
16386
16387 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16388 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16389 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16390 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16391 auto FuseFSUB = [&](SDValue X, SDValue Y) {
16392 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16393 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16394 if (C0->isExactlyValue(+1.0))
16395 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16396 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16397 Y);
16398 if (C0->isExactlyValue(-1.0))
16399 return DAG.getNode(PreferredFusedOpcode, SL, VT,
16400 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16401 DAG.getNode(ISD::FNEG, SL, VT, Y));
16402 }
16403 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16404 if (C1->isExactlyValue(+1.0))
16405 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16406 DAG.getNode(ISD::FNEG, SL, VT, Y));
16407 if (C1->isExactlyValue(-1.0))
16408 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16409 Y);
16410 }
16411 }
16412 return SDValue();
16413 };
16414
16415 if (SDValue FMA = FuseFSUB(N0, N1))
16416 return FMA;
16417 if (SDValue FMA = FuseFSUB(N1, N0))
16418 return FMA;
16419
16420 return SDValue();
16421}
16422
16423SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16424 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16425
16426 // FADD -> FMA combines:
16427 if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16428 if (Fused.getOpcode() != ISD::DELETED_NODE)
16429 AddToWorklist(Fused.getNode());
16430 return Fused;
16431 }
16432 return SDValue();
16433}
16434
16435SDValue DAGCombiner::visitFADD(SDNode *N) {
16436 SDValue N0 = N->getOperand(0);
16437 SDValue N1 = N->getOperand(1);
16440 EVT VT = N->getValueType(0);
16441 SDLoc DL(N);
16442 const TargetOptions &Options = DAG.getTarget().Options;
16443 SDNodeFlags Flags = N->getFlags();
16444 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16445
16446 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16447 return R;
16448
16449 // fold (fadd c1, c2) -> c1 + c2
16450 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16451 return C;
16452
16453 // canonicalize constant to RHS
16454 if (N0CFP && !N1CFP)
16455 return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16456
16457 // fold vector ops
16458 if (VT.isVector())
16459 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16460 return FoldedVOp;
16461
16462 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16463 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16464 if (N1C && N1C->isZero())
16465 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16466 return N0;
16467
16468 if (SDValue NewSel = foldBinOpIntoSelect(N))
16469 return NewSel;
16470
16471 // fold (fadd A, (fneg B)) -> (fsub A, B)
16472 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16473 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16474 N1, DAG, LegalOperations, ForCodeSize))
16475 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
16476
16477 // fold (fadd (fneg A), B) -> (fsub B, A)
16478 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16479 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16480 N0, DAG, LegalOperations, ForCodeSize))
16481 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
16482
16483 auto isFMulNegTwo = [](SDValue FMul) {
16484 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
16485 return false;
16486 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
16487 return C && C->isExactlyValue(-2.0);
16488 };
16489
16490 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
16491 if (isFMulNegTwo(N0)) {
16492 SDValue B = N0.getOperand(0);
16493 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16494 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
16495 }
16496 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
16497 if (isFMulNegTwo(N1)) {
16498 SDValue B = N1.getOperand(0);
16499 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16500 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
16501 }
16502
16503 // No FP constant should be created after legalization as Instruction
16504 // Selection pass has a hard time dealing with FP constants.
16505 bool AllowNewConst = (Level < AfterLegalizeDAG);
16506
16507 // If nnan is enabled, fold lots of things.
16508 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
16509 // If allowed, fold (fadd (fneg x), x) -> 0.0
16510 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
16511 return DAG.getConstantFP(0.0, DL, VT);
16512
16513 // If allowed, fold (fadd x, (fneg x)) -> 0.0
16514 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
16515 return DAG.getConstantFP(0.0, DL, VT);
16516 }
16517
16518 // If 'unsafe math' or reassoc and nsz, fold lots of things.
16519 // TODO: break out portions of the transformations below for which Unsafe is
16520 // considered and which do not require both nsz and reassoc
16521 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16522 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16523 AllowNewConst) {
16524 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
16525 if (N1CFP && N0.getOpcode() == ISD::FADD &&
16527 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
16528 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
16529 }
16530
16531 // We can fold chains of FADD's of the same value into multiplications.
16532 // This transform is not safe in general because we are reducing the number
16533 // of rounding steps.
16534 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
16535 if (N0.getOpcode() == ISD::FMUL) {
16536 SDNode *CFP00 =
16538 SDNode *CFP01 =
16540
16541 // (fadd (fmul x, c), x) -> (fmul x, c+1)
16542 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
16543 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16544 DAG.getConstantFP(1.0, DL, VT));
16545 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
16546 }
16547
16548 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
16549 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
16550 N1.getOperand(0) == N1.getOperand(1) &&
16551 N0.getOperand(0) == N1.getOperand(0)) {
16552 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16553 DAG.getConstantFP(2.0, DL, VT));
16554 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
16555 }
16556 }
16557
16558 if (N1.getOpcode() == ISD::FMUL) {
16559 SDNode *CFP10 =
16561 SDNode *CFP11 =
16563
16564 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
16565 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
16566 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16567 DAG.getConstantFP(1.0, DL, VT));
16568 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
16569 }
16570
16571 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
16572 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
16573 N0.getOperand(0) == N0.getOperand(1) &&
16574 N1.getOperand(0) == N0.getOperand(0)) {
16575 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16576 DAG.getConstantFP(2.0, DL, VT));
16577 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
16578 }
16579 }
16580
16581 if (N0.getOpcode() == ISD::FADD) {
16582 SDNode *CFP00 =
16584 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
16585 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
16586 (N0.getOperand(0) == N1)) {
16587 return DAG.getNode(ISD::FMUL, DL, VT, N1,
16588 DAG.getConstantFP(3.0, DL, VT));
16589 }
16590 }
16591
16592 if (N1.getOpcode() == ISD::FADD) {
16593 SDNode *CFP10 =
16595 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
16596 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
16597 N1.getOperand(0) == N0) {
16598 return DAG.getNode(ISD::FMUL, DL, VT, N0,
16599 DAG.getConstantFP(3.0, DL, VT));
16600 }
16601 }
16602
16603 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
16604 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
16605 N0.getOperand(0) == N0.getOperand(1) &&
16606 N1.getOperand(0) == N1.getOperand(1) &&
16607 N0.getOperand(0) == N1.getOperand(0)) {
16608 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
16609 DAG.getConstantFP(4.0, DL, VT));
16610 }
16611 }
16612
16613 // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
16614 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
16615 VT, N0, N1, Flags))
16616 return SD;
16617 } // enable-unsafe-fp-math
16618
16619 // FADD -> FMA combines:
16620 if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16621 if (Fused.getOpcode() != ISD::DELETED_NODE)
16622 AddToWorklist(Fused.getNode());
16623 return Fused;
16624 }
16625 return SDValue();
16626}
16627
16628SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
16629 SDValue Chain = N->getOperand(0);
16630 SDValue N0 = N->getOperand(1);
16631 SDValue N1 = N->getOperand(2);
16632 EVT VT = N->getValueType(0);
16633 EVT ChainVT = N->getValueType(1);
16634 SDLoc DL(N);
16635 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16636
16637 // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
16638 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16639 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16640 N1, DAG, LegalOperations, ForCodeSize)) {
16641 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16642 {Chain, N0, NegN1});
16643 }
16644
16645 // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
16646 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16647 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16648 N0, DAG, LegalOperations, ForCodeSize)) {
16649 return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16650 {Chain, N1, NegN0});
16651 }
16652 return SDValue();
16653}
16654
16655SDValue DAGCombiner::visitFSUB(SDNode *N) {
16656 SDValue N0 = N->getOperand(0);
16657 SDValue N1 = N->getOperand(1);
16658 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
16659 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16660 EVT VT = N->getValueType(0);
16661 SDLoc DL(N);
16662 const TargetOptions &Options = DAG.getTarget().Options;
16663 const SDNodeFlags Flags = N->getFlags();
16664 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16665
16666 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16667 return R;
16668
16669 // fold (fsub c1, c2) -> c1-c2
16670 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
16671 return C;
16672
16673 // fold vector ops
16674 if (VT.isVector())
16675 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16676 return FoldedVOp;
16677
16678 if (SDValue NewSel = foldBinOpIntoSelect(N))
16679 return NewSel;
16680
16681 // (fsub A, 0) -> A
16682 if (N1CFP && N1CFP->isZero()) {
16683 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
16684 Flags.hasNoSignedZeros()) {
16685 return N0;
16686 }
16687 }
16688
16689 if (N0 == N1) {
16690 // (fsub x, x) -> 0.0
16691 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
16692 return DAG.getConstantFP(0.0f, DL, VT);
16693 }
16694
16695 // (fsub -0.0, N1) -> -N1
16696 if (N0CFP && N0CFP->isZero()) {
16697 if (N0CFP->isNegative() ||
16698 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
16699 // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
16700 // flushed to zero, unless all users treat denorms as zero (DAZ).
16701 // FIXME: This transform will change the sign of a NaN and the behavior
16702 // of a signaling NaN. It is only valid when a NoNaN flag is present.
16703 DenormalMode DenormMode = DAG.getDenormalMode(VT);
16704 if (DenormMode == DenormalMode::getIEEE()) {
16705 if (SDValue NegN1 =
16706 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16707 return NegN1;
16708 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16709 return DAG.getNode(ISD::FNEG, DL, VT, N1);
16710 }
16711 }
16712 }
16713
16714 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16715 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16716 N1.getOpcode() == ISD::FADD) {
16717 // X - (X + Y) -> -Y
16718 if (N0 == N1->getOperand(0))
16719 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
16720 // X - (Y + X) -> -Y
16721 if (N0 == N1->getOperand(1))
16722 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
16723 }
16724
16725 // fold (fsub A, (fneg B)) -> (fadd A, B)
16726 if (SDValue NegN1 =
16727 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16728 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
16729
16730 // FSUB -> FMA combines:
16731 if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
16732 AddToWorklist(Fused.getNode());
16733 return Fused;
16734 }
16735
16736 return SDValue();
16737}
16738
16739// Transform IEEE Floats:
16740// (fmul C, (uitofp Pow2))
16741// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
16742// (fdiv C, (uitofp Pow2))
16743// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
16744//
16745// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
16746// there is no need for more than an add/sub.
16747//
16748// This is valid under the following circumstances:
16749// 1) We are dealing with IEEE floats
16750// 2) C is normal
16751// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
16752// TODO: Much of this could also be used for generating `ldexp` on targets the
16753// prefer it.
16754SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
16755 EVT VT = N->getValueType(0);
16756 SDValue ConstOp, Pow2Op;
16757
16758 std::optional<int> Mantissa;
16759 auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
16760 if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
16761 return false;
16762
16763 ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
16764 Pow2Op = N->getOperand(1 - ConstOpIdx);
16765 if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
16766 (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
16767 !DAG.computeKnownBits(Pow2Op).isNonNegative()))
16768 return false;
16769
16770 Pow2Op = Pow2Op.getOperand(0);
16771
16772 // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
16773 // TODO: We could use knownbits to make this bound more precise.
16774 int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
16775
16776 auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
16777 if (CFP == nullptr)
16778 return false;
16779
16780 const APFloat &APF = CFP->getValueAPF();
16781
16782 // Make sure we have normal/ieee constant.
16783 if (!APF.isNormal() || !APF.isIEEE())
16784 return false;
16785
16786 // Make sure the floats exponent is within the bounds that this transform
16787 // produces bitwise equals value.
16788 int CurExp = ilogb(APF);
16789 // FMul by pow2 will only increase exponent.
16790 int MinExp =
16791 N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
16792 // FDiv by pow2 will only decrease exponent.
16793 int MaxExp =
16794 N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
16795 if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
16797 return false;
16798
16799 // Finally make sure we actually know the mantissa for the float type.
16800 int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
16801 if (!Mantissa)
16802 Mantissa = ThisMantissa;
16803
16804 return *Mantissa == ThisMantissa && ThisMantissa > 0;
16805 };
16806
16807 // TODO: We may be able to include undefs.
16808 return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
16809 };
16810
16811 if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
16812 return SDValue();
16813
16814 if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
16815 return SDValue();
16816
16817 // Get log2 after all other checks have taken place. This is because
16818 // BuildLogBase2 may create a new node.
16819 SDLoc DL(N);
16820 // Get Log2 type with same bitwidth as the float type (VT).
16821 EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
16822 if (VT.isVector())
16823 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
16825
16826 SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
16827 /*InexpensiveOnly*/ true, NewIntVT);
16828 if (!Log2)
16829 return SDValue();
16830
16831 // Perform actual transform.
16832 SDValue MantissaShiftCnt =
16833 DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
16834 // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
16835 // `(X << C1) + (C << C1)`, but that isn't always the case because of the
16836 // cast. We could implement that by handle here to handle the casts.
16837 SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
16838 SDValue ResAsInt =
16839 DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
16840 NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
16841 SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
16842 return ResAsFP;
16843}
16844
16845SDValue DAGCombiner::visitFMUL(SDNode *N) {
16846 SDValue N0 = N->getOperand(0);
16847 SDValue N1 = N->getOperand(1);
16848 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16849 EVT VT = N->getValueType(0);
16850 SDLoc DL(N);
16851 const TargetOptions &Options = DAG.getTarget().Options;
16852 const SDNodeFlags Flags = N->getFlags();
16853 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16854
16855 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16856 return R;
16857
16858 // fold (fmul c1, c2) -> c1*c2
16859 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
16860 return C;
16861
16862 // canonicalize constant to RHS
16865 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
16866
16867 // fold vector ops
16868 if (VT.isVector())
16869 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16870 return FoldedVOp;
16871
16872 if (SDValue NewSel = foldBinOpIntoSelect(N))
16873 return NewSel;
16874
16875 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
16876 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
16878 N0.getOpcode() == ISD::FMUL) {
16879 SDValue N00 = N0.getOperand(0);
16880 SDValue N01 = N0.getOperand(1);
16881 // Avoid an infinite loop by making sure that N00 is not a constant
16882 // (the inner multiply has not been constant folded yet).
16885 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
16886 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
16887 }
16888 }
16889
16890 // Match a special-case: we convert X * 2.0 into fadd.
16891 // fmul (fadd X, X), C -> fmul X, 2.0 * C
16892 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
16893 N0.getOperand(0) == N0.getOperand(1)) {
16894 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
16895 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
16896 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
16897 }
16898
16899 // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
16900 if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
16901 VT, N0, N1, Flags))
16902 return SD;
16903 }
16904
16905 // fold (fmul X, 2.0) -> (fadd X, X)
16906 if (N1CFP && N1CFP->isExactlyValue(+2.0))
16907 return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
16908
16909 // fold (fmul X, -1.0) -> (fsub -0.0, X)
16910 if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
16911 if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
16912 return DAG.getNode(ISD::FSUB, DL, VT,
16913 DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
16914 }
16915 }
16916
16917 // -N0 * -N1 --> N0 * N1
16922 SDValue NegN0 =
16923 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16924 if (NegN0) {
16925 HandleSDNode NegN0Handle(NegN0);
16926 SDValue NegN1 =
16927 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16928 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16930 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
16931 }
16932
16933 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
16934 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
16935 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
16936 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
16937 TLI.isOperationLegal(ISD::FABS, VT)) {
16938 SDValue Select = N0, X = N1;
16939 if (Select.getOpcode() != ISD::SELECT)
16940 std::swap(Select, X);
16941
16942 SDValue Cond = Select.getOperand(0);
16943 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
16944 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
16945
16946 if (TrueOpnd && FalseOpnd &&
16947 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
16948 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
16949 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
16950 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16951 switch (CC) {
16952 default: break;
16953 case ISD::SETOLT:
16954 case ISD::SETULT:
16955 case ISD::SETOLE:
16956 case ISD::SETULE:
16957 case ISD::SETLT:
16958 case ISD::SETLE:
16959 std::swap(TrueOpnd, FalseOpnd);
16960 [[fallthrough]];
16961 case ISD::SETOGT:
16962 case ISD::SETUGT:
16963 case ISD::SETOGE:
16964 case ISD::SETUGE:
16965 case ISD::SETGT:
16966 case ISD::SETGE:
16967 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
16968 TLI.isOperationLegal(ISD::FNEG, VT))
16969 return DAG.getNode(ISD::FNEG, DL, VT,
16970 DAG.getNode(ISD::FABS, DL, VT, X));
16971 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
16972 return DAG.getNode(ISD::FABS, DL, VT, X);
16973
16974 break;
16975 }
16976 }
16977 }
16978
16979 // FMUL -> FMA combines:
16980 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
16981 AddToWorklist(Fused.getNode());
16982 return Fused;
16983 }
16984
16985 // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
16986 // able to run.
16987 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
16988 return R;
16989
16990 return SDValue();
16991}
16992
16993template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
16994 SDValue N0 = N->getOperand(0);
16995 SDValue N1 = N->getOperand(1);
16996 SDValue N2 = N->getOperand(2);
16997 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
16998 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
16999 EVT VT = N->getValueType(0);
17000 SDLoc DL(N);
17001 const TargetOptions &Options = DAG.getTarget().Options;
17002 // FMA nodes have flags that propagate to the created nodes.
17003 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17004 MatchContextClass matcher(DAG, TLI, N);
17005
17006 bool CanReassociate =
17007 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
17008
17009 // Constant fold FMA.
17010 if (isa<ConstantFPSDNode>(N0) &&
17011 isa<ConstantFPSDNode>(N1) &&
17012 isa<ConstantFPSDNode>(N2)) {
17013 return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
17014 }
17015
17016 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
17021 SDValue NegN0 =
17022 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17023 if (NegN0) {
17024 HandleSDNode NegN0Handle(NegN0);
17025 SDValue NegN1 =
17026 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17027 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17029 return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
17030 }
17031
17032 // FIXME: use fast math flags instead of Options.UnsafeFPMath
17033 if (Options.UnsafeFPMath) {
17034 if (N0CFP && N0CFP->isZero())
17035 return N2;
17036 if (N1CFP && N1CFP->isZero())
17037 return N2;
17038 }
17039
17040 // FIXME: Support splat of constant.
17041 if (N0CFP && N0CFP->isExactlyValue(1.0))
17042 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
17043 if (N1CFP && N1CFP->isExactlyValue(1.0))
17044 return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
17045
17046 // Canonicalize (fma c, x, y) -> (fma x, c, y)
17049 return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
17050
17051 if (CanReassociate) {
17052 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
17053 if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
17056 return matcher.getNode(
17057 ISD::FMUL, DL, VT, N0,
17058 matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
17059 }
17060
17061 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
17062 if (matcher.match(N0, ISD::FMUL) &&
17065 return matcher.getNode(
17066 ISD::FMA, DL, VT, N0.getOperand(0),
17067 matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
17068 }
17069 }
17070
17071 // (fma x, -1, y) -> (fadd (fneg x), y)
17072 // FIXME: Support splat of constant.
17073 if (N1CFP) {
17074 if (N1CFP->isExactlyValue(1.0))
17075 return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
17076
17077 if (N1CFP->isExactlyValue(-1.0) &&
17078 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
17079 SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
17080 AddToWorklist(RHSNeg.getNode());
17081 return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
17082 }
17083
17084 // fma (fneg x), K, y -> fma x -K, y
17085 if (matcher.match(N0, ISD::FNEG) &&
17087 (N1.hasOneUse() &&
17088 !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
17089 return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
17090 matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
17091 }
17092 }
17093
17094 // FIXME: Support splat of constant.
17095 if (CanReassociate) {
17096 // (fma x, c, x) -> (fmul x, (c+1))
17097 if (N1CFP && N0 == N2) {
17098 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17099 matcher.getNode(ISD::FADD, DL, VT, N1,
17100 DAG.getConstantFP(1.0, DL, VT)));
17101 }
17102
17103 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
17104 if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
17105 return matcher.getNode(ISD::FMUL, DL, VT, N0,
17106 matcher.getNode(ISD::FADD, DL, VT, N1,
17107 DAG.getConstantFP(-1.0, DL, VT)));
17108 }
17109 }
17110
17111 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
17112 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
17113 if (!TLI.isFNegFree(VT))
17115 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
17116 return matcher.getNode(ISD::FNEG, DL, VT, Neg);
17117 return SDValue();
17118}
17119
17120SDValue DAGCombiner::visitFMAD(SDNode *N) {
17121 SDValue N0 = N->getOperand(0);
17122 SDValue N1 = N->getOperand(1);
17123 SDValue N2 = N->getOperand(2);
17124 EVT VT = N->getValueType(0);
17125 SDLoc DL(N);
17126
17127 // Constant fold FMAD.
17128 if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
17129 isa<ConstantFPSDNode>(N2))
17130 return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
17131
17132 return SDValue();
17133}
17134
17135// Combine multiple FDIVs with the same divisor into multiple FMULs by the
17136// reciprocal.
17137// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
17138// Notice that this is not always beneficial. One reason is different targets
17139// may have different costs for FDIV and FMUL, so sometimes the cost of two
17140// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
17141// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
17142SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
17143 // TODO: Limit this transform based on optsize/minsize - it always creates at
17144 // least 1 extra instruction. But the perf win may be substantial enough
17145 // that only minsize should restrict this.
17146 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
17147 const SDNodeFlags Flags = N->getFlags();
17148 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
17149 return SDValue();
17150
17151 // Skip if current node is a reciprocal/fneg-reciprocal.
17152 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
17153 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
17154 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
17155 return SDValue();
17156
17157 // Exit early if the target does not want this transform or if there can't
17158 // possibly be enough uses of the divisor to make the transform worthwhile.
17159 unsigned MinUses = TLI.combineRepeatedFPDivisors();
17160
17161 // For splat vectors, scale the number of uses by the splat factor. If we can
17162 // convert the division into a scalar op, that will likely be much faster.
17163 unsigned NumElts = 1;
17164 EVT VT = N->getValueType(0);
17165 if (VT.isVector() && DAG.isSplatValue(N1))
17166 NumElts = VT.getVectorMinNumElements();
17167
17168 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
17169 return SDValue();
17170
17171 // Find all FDIV users of the same divisor.
17172 // Use a set because duplicates may be present in the user list.
17174 for (auto *U : N1->uses()) {
17175 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
17176 // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
17177 if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
17178 U->getOperand(0) == U->getOperand(1).getOperand(0) &&
17179 U->getFlags().hasAllowReassociation() &&
17180 U->getFlags().hasNoSignedZeros())
17181 continue;
17182
17183 // This division is eligible for optimization only if global unsafe math
17184 // is enabled or if this division allows reciprocal formation.
17185 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
17186 Users.insert(U);
17187 }
17188 }
17189
17190 // Now that we have the actual number of divisor uses, make sure it meets
17191 // the minimum threshold specified by the target.
17192 if ((Users.size() * NumElts) < MinUses)
17193 return SDValue();
17194
17195 SDLoc DL(N);
17196 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
17197 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
17198
17199 // Dividend / Divisor -> Dividend * Reciprocal
17200 for (auto *U : Users) {
17201 SDValue Dividend = U->getOperand(0);
17202 if (Dividend != FPOne) {
17203 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
17204 Reciprocal, Flags);
17205 CombineTo(U, NewNode);
17206 } else if (U != Reciprocal.getNode()) {
17207 // In the absence of fast-math-flags, this user node is always the
17208 // same node as Reciprocal, but with FMF they may be different nodes.
17209 CombineTo(U, Reciprocal);
17210 }
17211 }
17212 return SDValue(N, 0); // N was replaced.
17213}
17214
17215SDValue DAGCombiner::visitFDIV(SDNode *N) {
17216 SDValue N0 = N->getOperand(0);
17217 SDValue N1 = N->getOperand(1);
17218 EVT VT = N->getValueType(0);
17219 SDLoc DL(N);
17220 const TargetOptions &Options = DAG.getTarget().Options;
17221 SDNodeFlags Flags = N->getFlags();
17222 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17223
17224 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17225 return R;
17226
17227 // fold (fdiv c1, c2) -> c1/c2
17228 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17229 return C;
17230
17231 // fold vector ops
17232 if (VT.isVector())
17233 if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17234 return FoldedVOp;
17235
17236 if (SDValue NewSel = foldBinOpIntoSelect(N))
17237 return NewSel;
17238
17240 return V;
17241
17242 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17243 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
17244 if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17245 // Compute the reciprocal 1.0 / c2.
17246 const APFloat &N1APF = N1CFP->getValueAPF();
17247 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17248 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17249 // Only do the transform if the reciprocal is a legal fp immediate that
17250 // isn't too nasty (eg NaN, denormal, ...).
17251 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
17252 (!LegalOperations ||
17253 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17254 // backend)... we should handle this gracefully after Legalize.
17255 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17257 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17258 return DAG.getNode(ISD::FMUL, DL, VT, N0,
17259 DAG.getConstantFP(Recip, DL, VT));
17260 }
17261
17262 // If this FDIV is part of a reciprocal square root, it may be folded
17263 // into a target-specific square root estimate instruction.
17264 if (N1.getOpcode() == ISD::FSQRT) {
17265 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17266 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17267 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17268 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17269 if (SDValue RV =
17270 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17271 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17272 AddToWorklist(RV.getNode());
17273 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17274 }
17275 } else if (N1.getOpcode() == ISD::FP_ROUND &&
17276 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17277 if (SDValue RV =
17278 buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17279 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17280 AddToWorklist(RV.getNode());
17281 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17282 }
17283 } else if (N1.getOpcode() == ISD::FMUL) {
17284 // Look through an FMUL. Even though this won't remove the FDIV directly,
17285 // it's still worthwhile to get rid of the FSQRT if possible.
17286 SDValue Sqrt, Y;
17287 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17288 Sqrt = N1.getOperand(0);
17289 Y = N1.getOperand(1);
17290 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17291 Sqrt = N1.getOperand(1);
17292 Y = N1.getOperand(0);
17293 }
17294 if (Sqrt.getNode()) {
17295 // If the other multiply operand is known positive, pull it into the
17296 // sqrt. That will eliminate the division if we convert to an estimate.
17297 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17298 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17299 SDValue A;
17300 if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17301 A = Y.getOperand(0);
17302 else if (Y == Sqrt.getOperand(0))
17303 A = Y;
17304 if (A) {
17305 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17306 // X / (A * sqrt(A)) --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17307 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17308 SDValue AAZ =
17309 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17310 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17311 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17312
17313 // Estimate creation failed. Clean up speculatively created nodes.
17314 recursivelyDeleteUnusedNodes(AAZ.getNode());
17315 }
17316 }
17317
17318 // We found a FSQRT, so try to make this fold:
17319 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17320 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17321 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17322 AddToWorklist(Div.getNode());
17323 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17324 }
17325 }
17326 }
17327
17328 // Fold into a reciprocal estimate and multiply instead of a real divide.
17329 if (Options.NoInfsFPMath || Flags.hasNoInfs())
17330 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17331 return RV;
17332 }
17333
17334 // Fold X/Sqrt(X) -> Sqrt(X)
17335 if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17336 (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17337 if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17338 return N1;
17339
17340 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17345 SDValue NegN0 =
17346 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17347 if (NegN0) {
17348 HandleSDNode NegN0Handle(NegN0);
17349 SDValue NegN1 =
17350 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17351 if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17353 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
17354 }
17355
17356 if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17357 return R;
17358
17359 return SDValue();
17360}
17361
17362SDValue DAGCombiner::visitFREM(SDNode *N) {
17363 SDValue N0 = N->getOperand(0);
17364 SDValue N1 = N->getOperand(1);
17365 EVT VT = N->getValueType(0);
17366 SDNodeFlags Flags = N->getFlags();
17367 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17368 SDLoc DL(N);
17369
17370 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17371 return R;
17372
17373 // fold (frem c1, c2) -> fmod(c1,c2)
17374 if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, DL, VT, {N0, N1}))
17375 return C;
17376
17377 if (SDValue NewSel = foldBinOpIntoSelect(N))
17378 return NewSel;
17379
17380 // Lower frem N0, N1 => x - trunc(N0 / N1) * N1, providing N1 is an integer
17381 // power of 2.
17382 if (!TLI.isOperationLegal(ISD::FREM, VT) &&
17386 DAG.isKnownToBeAPowerOfTwoFP(N1) &&
17387 (Flags.hasNoSignedZeros() || DAG.cannotBeOrderedNegativeFP(N0))) {
17388 SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
17389 SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
17391 return DAG.getNode(ISD::FMA, DL, VT, DAG.getNode(ISD::FNEG, DL, VT, Rnd),
17392 N1, N0);
17393 SDValue Mul = DAG.getNode(ISD::FMUL, DL, VT, Rnd, N1);
17394 return DAG.getNode(ISD::FSUB, DL, VT, N0, Mul);
17395 }
17396
17397 return SDValue();
17398}
17399
17400SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17401 SDNodeFlags Flags = N->getFlags();
17402 const TargetOptions &Options = DAG.getTarget().Options;
17403
17404 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17405 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17406 if (!Flags.hasApproximateFuncs() ||
17407 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17408 return SDValue();
17409
17410 SDValue N0 = N->getOperand(0);
17411 if (TLI.isFsqrtCheap(N0, DAG))
17412 return SDValue();
17413
17414 // FSQRT nodes have flags that propagate to the created nodes.
17415 // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17416 // transform the fdiv, we may produce a sub-optimal estimate sequence
17417 // because the reciprocal calculation may not have to filter out a
17418 // 0.0 input.
17419 return buildSqrtEstimate(N0, Flags);
17420}
17421
17422/// copysign(x, fp_extend(y)) -> copysign(x, y)
17423/// copysign(x, fp_round(y)) -> copysign(x, y)
17424/// Operands to the functions are the type of X and Y respectively.
17425static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17426 // Always fold no-op FP casts.
17427 if (XTy == YTy)
17428 return true;
17429
17430 // Do not optimize out type conversion of f128 type yet.
17431 // For some targets like x86_64, configuration is changed to keep one f128
17432 // value in one SSE register, but instruction selection cannot handle
17433 // FCOPYSIGN on SSE registers yet.
17434 if (YTy == MVT::f128)
17435 return false;
17436
17438}
17439
17441 SDValue N1 = N->getOperand(1);
17442 if (N1.getOpcode() != ISD::FP_EXTEND &&
17443 N1.getOpcode() != ISD::FP_ROUND)
17444 return false;
17445 EVT N1VT = N1->getValueType(0);
17446 EVT N1Op0VT = N1->getOperand(0).getValueType();
17447 return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17448}
17449
17450SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17451 SDValue N0 = N->getOperand(0);
17452 SDValue N1 = N->getOperand(1);
17453 EVT VT = N->getValueType(0);
17454
17455 // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17456 if (SDValue C =
17457 DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
17458 return C;
17459
17460 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17461 const APFloat &V = N1C->getValueAPF();
17462 // copysign(x, c1) -> fabs(x) iff ispos(c1)
17463 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17464 if (!V.isNegative()) {
17465 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17466 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17467 } else {
17468 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17469 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
17470 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
17471 }
17472 }
17473
17474 // copysign(fabs(x), y) -> copysign(x, y)
17475 // copysign(fneg(x), y) -> copysign(x, y)
17476 // copysign(copysign(x,z), y) -> copysign(x, y)
17477 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
17478 N0.getOpcode() == ISD::FCOPYSIGN)
17479 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
17480
17481 // copysign(x, abs(y)) -> abs(x)
17482 if (N1.getOpcode() == ISD::FABS)
17483 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17484
17485 // copysign(x, copysign(y,z)) -> copysign(x, z)
17486 if (N1.getOpcode() == ISD::FCOPYSIGN)
17487 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
17488
17489 // copysign(x, fp_extend(y)) -> copysign(x, y)
17490 // copysign(x, fp_round(y)) -> copysign(x, y)
17492 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
17493
17494 return SDValue();
17495}
17496
17497SDValue DAGCombiner::visitFPOW(SDNode *N) {
17498 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
17499 if (!ExponentC)
17500 return SDValue();
17501 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17502
17503 // Try to convert x ** (1/3) into cube root.
17504 // TODO: Handle the various flavors of long double.
17505 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
17506 // Some range near 1/3 should be fine.
17507 EVT VT = N->getValueType(0);
17508 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
17509 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
17510 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
17511 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
17512 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
17513 // For regular numbers, rounding may cause the results to differ.
17514 // Therefore, we require { nsz ninf nnan afn } for this transform.
17515 // TODO: We could select out the special cases if we don't have nsz/ninf.
17516 SDNodeFlags Flags = N->getFlags();
17517 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
17518 !Flags.hasApproximateFuncs())
17519 return SDValue();
17520
17521 // Do not create a cbrt() libcall if the target does not have it, and do not
17522 // turn a pow that has lowering support into a cbrt() libcall.
17523 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
17526 return SDValue();
17527
17528 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
17529 }
17530
17531 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
17532 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
17533 // TODO: This could be extended (using a target hook) to handle smaller
17534 // power-of-2 fractional exponents.
17535 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
17536 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
17537 if (ExponentIs025 || ExponentIs075) {
17538 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
17539 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
17540 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
17541 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
17542 // For regular numbers, rounding may cause the results to differ.
17543 // Therefore, we require { nsz ninf afn } for this transform.
17544 // TODO: We could select out the special cases if we don't have nsz/ninf.
17545 SDNodeFlags Flags = N->getFlags();
17546
17547 // We only need no signed zeros for the 0.25 case.
17548 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
17549 !Flags.hasApproximateFuncs())
17550 return SDValue();
17551
17552 // Don't double the number of libcalls. We are trying to inline fast code.
17554 return SDValue();
17555
17556 // Assume that libcalls are the smallest code.
17557 // TODO: This restriction should probably be lifted for vectors.
17558 if (ForCodeSize)
17559 return SDValue();
17560
17561 // pow(X, 0.25) --> sqrt(sqrt(X))
17562 SDLoc DL(N);
17563 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
17564 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
17565 if (ExponentIs025)
17566 return SqrtSqrt;
17567 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
17568 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
17569 }
17570
17571 return SDValue();
17572}
17573
17575 const TargetLowering &TLI) {
17576 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
17577 // replacing casts with a libcall. We also must be allowed to ignore -0.0
17578 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
17579 // conversions would return +0.0.
17580 // FIXME: We should be able to use node-level FMF here.
17581 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
17582 EVT VT = N->getValueType(0);
17583 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
17585 return SDValue();
17586
17587 // fptosi/fptoui round towards zero, so converting from FP to integer and
17588 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
17589 SDValue N0 = N->getOperand(0);
17590 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
17591 N0.getOperand(0).getValueType() == VT)
17592 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17593
17594 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
17595 N0.getOperand(0).getValueType() == VT)
17596 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17597
17598 return SDValue();
17599}
17600
17601SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
17602 SDValue N0 = N->getOperand(0);
17603 EVT VT = N->getValueType(0);
17604 EVT OpVT = N0.getValueType();
17605
17606 // [us]itofp(undef) = 0, because the result value is bounded.
17607 if (N0.isUndef())
17608 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17609
17610 // fold (sint_to_fp c1) -> c1fp
17612 // ...but only if the target supports immediate floating-point values
17613 (!LegalOperations ||
17615 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17616
17617 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
17618 // but UINT_TO_FP is legal on this target, try to convert.
17619 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
17620 hasOperation(ISD::UINT_TO_FP, OpVT)) {
17621 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
17622 if (DAG.SignBitIsZero(N0))
17623 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17624 }
17625
17626 // The next optimizations are desirable only if SELECT_CC can be lowered.
17627 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
17628 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
17629 !VT.isVector() &&
17630 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17631 SDLoc DL(N);
17632 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
17633 DAG.getConstantFP(0.0, DL, VT));
17634 }
17635
17636 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
17637 // (select (setcc x, y, cc), 1.0, 0.0)
17638 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
17639 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
17640 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17641 SDLoc DL(N);
17642 return DAG.getSelect(DL, VT, N0.getOperand(0),
17643 DAG.getConstantFP(1.0, DL, VT),
17644 DAG.getConstantFP(0.0, DL, VT));
17645 }
17646
17647 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17648 return FTrunc;
17649
17650 return SDValue();
17651}
17652
17653SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
17654 SDValue N0 = N->getOperand(0);
17655 EVT VT = N->getValueType(0);
17656 EVT OpVT = N0.getValueType();
17657
17658 // [us]itofp(undef) = 0, because the result value is bounded.
17659 if (N0.isUndef())
17660 return DAG.getConstantFP(0.0, SDLoc(N), VT);
17661
17662 // fold (uint_to_fp c1) -> c1fp
17664 // ...but only if the target supports immediate floating-point values
17665 (!LegalOperations ||
17667 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17668
17669 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
17670 // but SINT_TO_FP is legal on this target, try to convert.
17671 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
17672 hasOperation(ISD::SINT_TO_FP, OpVT)) {
17673 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
17674 if (DAG.SignBitIsZero(N0))
17675 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17676 }
17677
17678 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
17679 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
17680 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17681 SDLoc DL(N);
17682 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
17683 DAG.getConstantFP(0.0, DL, VT));
17684 }
17685
17686 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17687 return FTrunc;
17688
17689 return SDValue();
17690}
17691
17692// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
17694 SDValue N0 = N->getOperand(0);
17695 EVT VT = N->getValueType(0);
17696
17697 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
17698 return SDValue();
17699
17700 SDValue Src = N0.getOperand(0);
17701 EVT SrcVT = Src.getValueType();
17702 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
17703 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
17704
17705 // We can safely assume the conversion won't overflow the output range,
17706 // because (for example) (uint8_t)18293.f is undefined behavior.
17707
17708 // Since we can assume the conversion won't overflow, our decision as to
17709 // whether the input will fit in the float should depend on the minimum
17710 // of the input range and output range.
17711
17712 // This means this is also safe for a signed input and unsigned output, since
17713 // a negative input would lead to undefined behavior.
17714 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
17715 unsigned OutputSize = (int)VT.getScalarSizeInBits();
17716 unsigned ActualSize = std::min(InputSize, OutputSize);
17717 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
17718
17719 // We can only fold away the float conversion if the input range can be
17720 // represented exactly in the float range.
17721 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
17722 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
17723 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
17725 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
17726 }
17727 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
17728 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
17729 return DAG.getBitcast(VT, Src);
17730 }
17731 return SDValue();
17732}
17733
17734SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
17735 SDValue N0 = N->getOperand(0);
17736 EVT VT = N->getValueType(0);
17737
17738 // fold (fp_to_sint undef) -> undef
17739 if (N0.isUndef())
17740 return DAG.getUNDEF(VT);
17741
17742 // fold (fp_to_sint c1fp) -> c1
17744 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
17745
17746 return FoldIntToFPToInt(N, DAG);
17747}
17748
17749SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
17750 SDValue N0 = N->getOperand(0);
17751 EVT VT = N->getValueType(0);
17752
17753 // fold (fp_to_uint undef) -> undef
17754 if (N0.isUndef())
17755 return DAG.getUNDEF(VT);
17756
17757 // fold (fp_to_uint c1fp) -> c1
17759 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
17760
17761 return FoldIntToFPToInt(N, DAG);
17762}
17763
17764SDValue DAGCombiner::visitXRINT(SDNode *N) {
17765 SDValue N0 = N->getOperand(0);
17766 EVT VT = N->getValueType(0);
17767
17768 // fold (lrint|llrint undef) -> undef
17769 if (N0.isUndef())
17770 return DAG.getUNDEF(VT);
17771
17772 // fold (lrint|llrint c1fp) -> c1
17774 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
17775
17776 return SDValue();
17777}
17778
17779SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
17780 SDValue N0 = N->getOperand(0);
17781 SDValue N1 = N->getOperand(1);
17782 EVT VT = N->getValueType(0);
17783
17784 // fold (fp_round c1fp) -> c1fp
17785 if (SDValue C =
17786 DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
17787 return C;
17788
17789 // fold (fp_round (fp_extend x)) -> x
17790 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
17791 return N0.getOperand(0);
17792
17793 // fold (fp_round (fp_round x)) -> (fp_round x)
17794 if (N0.getOpcode() == ISD::FP_ROUND) {
17795 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
17796 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
17797
17798 // Avoid folding legal fp_rounds into non-legal ones.
17799 if (!hasOperation(ISD::FP_ROUND, VT))
17800 return SDValue();
17801
17802 // Skip this folding if it results in an fp_round from f80 to f16.
17803 //
17804 // f80 to f16 always generates an expensive (and as yet, unimplemented)
17805 // libcall to __truncxfhf2 instead of selecting native f16 conversion
17806 // instructions from f32 or f64. Moreover, the first (value-preserving)
17807 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
17808 // x86.
17809 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
17810 return SDValue();
17811
17812 // If the first fp_round isn't a value preserving truncation, it might
17813 // introduce a tie in the second fp_round, that wouldn't occur in the
17814 // single-step fp_round we want to fold to.
17815 // In other words, double rounding isn't the same as rounding.
17816 // Also, this is a value preserving truncation iff both fp_round's are.
17817 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
17818 SDLoc DL(N);
17819 return DAG.getNode(
17820 ISD::FP_ROUND, DL, VT, N0.getOperand(0),
17821 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
17822 }
17823 }
17824
17825 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
17826 // Note: From a legality perspective, this is a two step transform. First,
17827 // we duplicate the fp_round to the arguments of the copysign, then we
17828 // eliminate the fp_round on Y. The second step requires an additional
17829 // predicate to match the implementation above.
17830 if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17832 N0.getValueType())) {
17833 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
17834 N0.getOperand(0), N1);
17835 AddToWorklist(Tmp.getNode());
17836 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
17837 Tmp, N0.getOperand(1));
17838 }
17839
17840 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17841 return NewVSel;
17842
17843 return SDValue();
17844}
17845
17846SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
17847 SDValue N0 = N->getOperand(0);
17848 EVT VT = N->getValueType(0);
17849
17850 if (VT.isVector())
17851 if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
17852 return FoldedVOp;
17853
17854 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
17855 if (N->hasOneUse() &&
17856 N->use_begin()->getOpcode() == ISD::FP_ROUND)
17857 return SDValue();
17858
17859 // fold (fp_extend c1fp) -> c1fp
17861 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
17862
17863 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
17864 if (N0.getOpcode() == ISD::FP16_TO_FP &&
17866 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
17867
17868 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
17869 // value of X.
17870 if (N0.getOpcode() == ISD::FP_ROUND
17871 && N0.getConstantOperandVal(1) == 1) {
17872 SDValue In = N0.getOperand(0);
17873 if (In.getValueType() == VT) return In;
17874 if (VT.bitsLT(In.getValueType()))
17875 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
17876 In, N0.getOperand(1));
17877 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
17878 }
17879
17880 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
17881 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17883 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
17884 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
17885 LN0->getChain(),
17886 LN0->getBasePtr(), N0.getValueType(),
17887 LN0->getMemOperand());
17888 CombineTo(N, ExtLoad);
17889 CombineTo(
17890 N0.getNode(),
17891 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
17892 DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
17893 ExtLoad.getValue(1));
17894 return SDValue(N, 0); // Return N so it doesn't get rechecked!
17895 }
17896
17897 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17898 return NewVSel;
17899
17900 return SDValue();
17901}
17902
17903SDValue DAGCombiner::visitFCEIL(SDNode *N) {
17904 SDValue N0 = N->getOperand(0);
17905 EVT VT = N->getValueType(0);
17906
17907 // fold (fceil c1) -> fceil(c1)
17909 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
17910
17911 return SDValue();
17912}
17913
17914SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
17915 SDValue N0 = N->getOperand(0);
17916 EVT VT = N->getValueType(0);
17917
17918 // fold (ftrunc c1) -> ftrunc(c1)
17920 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
17921
17922 // fold ftrunc (known rounded int x) -> x
17923 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
17924 // likely to be generated to extract integer from a rounded floating value.
17925 switch (N0.getOpcode()) {
17926 default: break;
17927 case ISD::FRINT:
17928 case ISD::FTRUNC:
17929 case ISD::FNEARBYINT:
17930 case ISD::FROUNDEVEN:
17931 case ISD::FFLOOR:
17932 case ISD::FCEIL:
17933 return N0;
17934 }
17935
17936 return SDValue();
17937}
17938
17939SDValue DAGCombiner::visitFFREXP(SDNode *N) {
17940 SDValue N0 = N->getOperand(0);
17941
17942 // fold (ffrexp c1) -> ffrexp(c1)
17944 return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
17945 return SDValue();
17946}
17947
17948SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
17949 SDValue N0 = N->getOperand(0);
17950 EVT VT = N->getValueType(0);
17951
17952 // fold (ffloor c1) -> ffloor(c1)
17954 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
17955
17956 return SDValue();
17957}
17958
17959SDValue DAGCombiner::visitFNEG(SDNode *N) {
17960 SDValue N0 = N->getOperand(0);
17961 EVT VT = N->getValueType(0);
17962 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17963
17964 // Constant fold FNEG.
17966 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
17967
17968 if (SDValue NegN0 =
17969 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
17970 return NegN0;
17971
17972 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
17973 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
17974 // know it was called from a context with a nsz flag if the input fsub does
17975 // not.
17976 if (N0.getOpcode() == ISD::FSUB &&
17978 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
17979 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
17980 N0.getOperand(0));
17981 }
17982
17983 if (SDValue Cast = foldSignChangeInBitcast(N))
17984 return Cast;
17985
17986 return SDValue();
17987}
17988
17989SDValue DAGCombiner::visitFMinMax(SDNode *N) {
17990 SDValue N0 = N->getOperand(0);
17991 SDValue N1 = N->getOperand(1);
17992 EVT VT = N->getValueType(0);
17993 const SDNodeFlags Flags = N->getFlags();
17994 unsigned Opc = N->getOpcode();
17995 bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
17996 bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
17997 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17998
17999 // Constant fold.
18000 if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
18001 return C;
18002
18003 // Canonicalize to constant on RHS.
18006 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
18007
18008 if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
18009 const APFloat &AF = N1CFP->getValueAPF();
18010
18011 // minnum(X, nan) -> X
18012 // maxnum(X, nan) -> X
18013 // minimum(X, nan) -> nan
18014 // maximum(X, nan) -> nan
18015 if (AF.isNaN())
18016 return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
18017
18018 // In the following folds, inf can be replaced with the largest finite
18019 // float, if the ninf flag is set.
18020 if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
18021 // minnum(X, -inf) -> -inf
18022 // maxnum(X, +inf) -> +inf
18023 // minimum(X, -inf) -> -inf if nnan
18024 // maximum(X, +inf) -> +inf if nnan
18025 if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
18026 return N->getOperand(1);
18027
18028 // minnum(X, +inf) -> X if nnan
18029 // maxnum(X, -inf) -> X if nnan
18030 // minimum(X, +inf) -> X
18031 // maximum(X, -inf) -> X
18032 if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
18033 return N->getOperand(0);
18034 }
18035 }
18036
18037 if (SDValue SD = reassociateReduction(
18038 PropagatesNaN
18041 Opc, SDLoc(N), VT, N0, N1, Flags))
18042 return SD;
18043
18044 return SDValue();
18045}
18046
18047SDValue DAGCombiner::visitFABS(SDNode *N) {
18048 SDValue N0 = N->getOperand(0);
18049 EVT VT = N->getValueType(0);
18050
18051 // fold (fabs c1) -> fabs(c1)
18053 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
18054
18055 // fold (fabs (fabs x)) -> (fabs x)
18056 if (N0.getOpcode() == ISD::FABS)
18057 return N->getOperand(0);
18058
18059 // fold (fabs (fneg x)) -> (fabs x)
18060 // fold (fabs (fcopysign x, y)) -> (fabs x)
18061 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
18062 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
18063
18064 if (SDValue Cast = foldSignChangeInBitcast(N))
18065 return Cast;
18066
18067 return SDValue();
18068}
18069
18070SDValue DAGCombiner::visitBRCOND(SDNode *N) {
18071 SDValue Chain = N->getOperand(0);
18072 SDValue N1 = N->getOperand(1);
18073 SDValue N2 = N->getOperand(2);
18074
18075 // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
18076 // nondeterministic jumps).
18077 if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
18078 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18079 N1->getOperand(0), N2);
18080 }
18081
18082 // Variant of the previous fold where there is a SETCC in between:
18083 // BRCOND(SETCC(FREEZE(X), CONST, Cond))
18084 // =>
18085 // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
18086 // =>
18087 // BRCOND(SETCC(X, CONST, Cond))
18088 // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
18089 // isn't equivalent to true or false.
18090 // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
18091 // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
18092 if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
18093 SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
18094 ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
18095 ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
18096 ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
18097 bool Updated = false;
18098
18099 // Is 'X Cond C' always true or false?
18100 auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
18101 bool False = (Cond == ISD::SETULT && C->isZero()) ||
18102 (Cond == ISD::SETLT && C->isMinSignedValue()) ||
18103 (Cond == ISD::SETUGT && C->isAllOnes()) ||
18104 (Cond == ISD::SETGT && C->isMaxSignedValue());
18105 bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
18106 (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
18107 (Cond == ISD::SETUGE && C->isZero()) ||
18108 (Cond == ISD::SETGE && C->isMinSignedValue());
18109 return True || False;
18110 };
18111
18112 if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
18113 if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
18114 S0 = S0->getOperand(0);
18115 Updated = true;
18116 }
18117 }
18118 if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
18119 if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
18120 S1 = S1->getOperand(0);
18121 Updated = true;
18122 }
18123 }
18124
18125 if (Updated)
18126 return DAG.getNode(
18127 ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
18128 DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
18129 }
18130
18131 // If N is a constant we could fold this into a fallthrough or unconditional
18132 // branch. However that doesn't happen very often in normal code, because
18133 // Instcombine/SimplifyCFG should have handled the available opportunities.
18134 // If we did this folding here, it would be necessary to update the
18135 // MachineBasicBlock CFG, which is awkward.
18136
18137 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
18138 // on the target.
18139 if (N1.getOpcode() == ISD::SETCC &&
18141 N1.getOperand(0).getValueType())) {
18142 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18143 Chain, N1.getOperand(2),
18144 N1.getOperand(0), N1.getOperand(1), N2);
18145 }
18146
18147 if (N1.hasOneUse()) {
18148 // rebuildSetCC calls visitXor which may change the Chain when there is a
18149 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
18150 HandleSDNode ChainHandle(Chain);
18151 if (SDValue NewN1 = rebuildSetCC(N1))
18152 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
18153 ChainHandle.getValue(), NewN1, N2);
18154 }
18155
18156 return SDValue();
18157}
18158
18159SDValue DAGCombiner::rebuildSetCC(SDValue N) {
18160 if (N.getOpcode() == ISD::SRL ||
18161 (N.getOpcode() == ISD::TRUNCATE &&
18162 (N.getOperand(0).hasOneUse() &&
18163 N.getOperand(0).getOpcode() == ISD::SRL))) {
18164 // Look pass the truncate.
18165 if (N.getOpcode() == ISD::TRUNCATE)
18166 N = N.getOperand(0);
18167
18168 // Match this pattern so that we can generate simpler code:
18169 //
18170 // %a = ...
18171 // %b = and i32 %a, 2
18172 // %c = srl i32 %b, 1
18173 // brcond i32 %c ...
18174 //
18175 // into
18176 //
18177 // %a = ...
18178 // %b = and i32 %a, 2
18179 // %c = setcc eq %b, 0
18180 // brcond %c ...
18181 //
18182 // This applies only when the AND constant value has one bit set and the
18183 // SRL constant is equal to the log2 of the AND constant. The back-end is
18184 // smart enough to convert the result into a TEST/JMP sequence.
18185 SDValue Op0 = N.getOperand(0);
18186 SDValue Op1 = N.getOperand(1);
18187
18188 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
18189 SDValue AndOp1 = Op0.getOperand(1);
18190
18191 if (AndOp1.getOpcode() == ISD::Constant) {
18192 const APInt &AndConst = AndOp1->getAsAPIntVal();
18193
18194 if (AndConst.isPowerOf2() &&
18195 Op1->getAsAPIntVal() == AndConst.logBase2()) {
18196 SDLoc DL(N);
18197 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
18198 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
18199 ISD::SETNE);
18200 }
18201 }
18202 }
18203 }
18204
18205 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
18206 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
18207 if (N.getOpcode() == ISD::XOR) {
18208 // Because we may call this on a speculatively constructed
18209 // SimplifiedSetCC Node, we need to simplify this node first.
18210 // Ideally this should be folded into SimplifySetCC and not
18211 // here. For now, grab a handle to N so we don't lose it from
18212 // replacements interal to the visit.
18213 HandleSDNode XORHandle(N);
18214 while (N.getOpcode() == ISD::XOR) {
18215 SDValue Tmp = visitXOR(N.getNode());
18216 // No simplification done.
18217 if (!Tmp.getNode())
18218 break;
18219 // Returning N is form in-visit replacement that may invalidated
18220 // N. Grab value from Handle.
18221 if (Tmp.getNode() == N.getNode())
18222 N = XORHandle.getValue();
18223 else // Node simplified. Try simplifying again.
18224 N = Tmp;
18225 }
18226
18227 if (N.getOpcode() != ISD::XOR)
18228 return N;
18229
18230 SDValue Op0 = N->getOperand(0);
18231 SDValue Op1 = N->getOperand(1);
18232
18233 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18234 bool Equal = false;
18235 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18236 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18237 Op0.getValueType() == MVT::i1) {
18238 N = Op0;
18239 Op0 = N->getOperand(0);
18240 Op1 = N->getOperand(1);
18241 Equal = true;
18242 }
18243
18244 EVT SetCCVT = N.getValueType();
18245 if (LegalTypes)
18246 SetCCVT = getSetCCResultType(SetCCVT);
18247 // Replace the uses of XOR with SETCC
18248 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18249 Equal ? ISD::SETEQ : ISD::SETNE);
18250 }
18251 }
18252
18253 return SDValue();
18254}
18255
18256// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18257//
18258SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18259 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18260 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18261
18262 // If N is a constant we could fold this into a fallthrough or unconditional
18263 // branch. However that doesn't happen very often in normal code, because
18264 // Instcombine/SimplifyCFG should have handled the available opportunities.
18265 // If we did this folding here, it would be necessary to update the
18266 // MachineBasicBlock CFG, which is awkward.
18267
18268 // Use SimplifySetCC to simplify SETCC's.
18270 CondLHS, CondRHS, CC->get(), SDLoc(N),
18271 false);
18272 if (Simp.getNode()) AddToWorklist(Simp.getNode());
18273
18274 // fold to a simpler setcc
18275 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18276 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18277 N->getOperand(0), Simp.getOperand(2),
18278 Simp.getOperand(0), Simp.getOperand(1),
18279 N->getOperand(4));
18280
18281 return SDValue();
18282}
18283
18284static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18285 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18286 const TargetLowering &TLI) {
18287 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18288 if (LD->isIndexed())
18289 return false;
18290 EVT VT = LD->getMemoryVT();
18291 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18292 return false;
18293 Ptr = LD->getBasePtr();
18294 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18295 if (ST->isIndexed())
18296 return false;
18297 EVT VT = ST->getMemoryVT();
18298 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18299 return false;
18300 Ptr = ST->getBasePtr();
18301 IsLoad = false;
18302 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18303 if (LD->isIndexed())
18304 return false;
18305 EVT VT = LD->getMemoryVT();
18306 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18307 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18308 return false;
18309 Ptr = LD->getBasePtr();
18310 IsMasked = true;
18311 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18312 if (ST->isIndexed())
18313 return false;
18314 EVT VT = ST->getMemoryVT();
18315 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18316 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18317 return false;
18318 Ptr = ST->getBasePtr();
18319 IsLoad = false;
18320 IsMasked = true;
18321 } else {
18322 return false;
18323 }
18324 return true;
18325}
18326
18327/// Try turning a load/store into a pre-indexed load/store when the base
18328/// pointer is an add or subtract and it has other uses besides the load/store.
18329/// After the transformation, the new indexed load/store has effectively folded
18330/// the add/subtract in and all of its other uses are redirected to the
18331/// new load/store.
18332bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18333 if (Level < AfterLegalizeDAG)
18334 return false;
18335
18336 bool IsLoad = true;
18337 bool IsMasked = false;
18338 SDValue Ptr;
18339 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18340 Ptr, TLI))
18341 return false;
18342
18343 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18344 // out. There is no reason to make this a preinc/predec.
18345 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18346 Ptr->hasOneUse())
18347 return false;
18348
18349 // Ask the target to do addressing mode selection.
18353 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18354 return false;
18355
18356 // Backends without true r+i pre-indexed forms may need to pass a
18357 // constant base with a variable offset so that constant coercion
18358 // will work with the patterns in canonical form.
18359 bool Swapped = false;
18360 if (isa<ConstantSDNode>(BasePtr)) {
18361 std::swap(BasePtr, Offset);
18362 Swapped = true;
18363 }
18364
18365 // Don't create a indexed load / store with zero offset.
18367 return false;
18368
18369 // Try turning it into a pre-indexed load / store except when:
18370 // 1) The new base ptr is a frame index.
18371 // 2) If N is a store and the new base ptr is either the same as or is a
18372 // predecessor of the value being stored.
18373 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18374 // that would create a cycle.
18375 // 4) All uses are load / store ops that use it as old base ptr.
18376
18377 // Check #1. Preinc'ing a frame index would require copying the stack pointer
18378 // (plus the implicit offset) to a register to preinc anyway.
18379 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18380 return false;
18381
18382 // Check #2.
18383 if (!IsLoad) {
18384 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18385 : cast<StoreSDNode>(N)->getValue();
18386
18387 // Would require a copy.
18388 if (Val == BasePtr)
18389 return false;
18390
18391 // Would create a cycle.
18392 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18393 return false;
18394 }
18395
18396 // Caches for hasPredecessorHelper.
18399 Worklist.push_back(N);
18400
18401 // If the offset is a constant, there may be other adds of constants that
18402 // can be folded with this one. We should do this to avoid having to keep
18403 // a copy of the original base pointer.
18404 SmallVector<SDNode *, 16> OtherUses;
18405 constexpr unsigned int MaxSteps = 8192;
18406 if (isa<ConstantSDNode>(Offset))
18407 for (SDNode::use_iterator UI = BasePtr->use_begin(),
18408 UE = BasePtr->use_end();
18409 UI != UE; ++UI) {
18410 SDUse &Use = UI.getUse();
18411 // Skip the use that is Ptr and uses of other results from BasePtr's
18412 // node (important for nodes that return multiple results).
18413 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18414 continue;
18415
18416 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18417 MaxSteps))
18418 continue;
18419
18420 if (Use.getUser()->getOpcode() != ISD::ADD &&
18421 Use.getUser()->getOpcode() != ISD::SUB) {
18422 OtherUses.clear();
18423 break;
18424 }
18425
18426 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
18427 if (!isa<ConstantSDNode>(Op1)) {
18428 OtherUses.clear();
18429 break;
18430 }
18431
18432 // FIXME: In some cases, we can be smarter about this.
18433 if (Op1.getValueType() != Offset.getValueType()) {
18434 OtherUses.clear();
18435 break;
18436 }
18437
18438 OtherUses.push_back(Use.getUser());
18439 }
18440
18441 if (Swapped)
18442 std::swap(BasePtr, Offset);
18443
18444 // Now check for #3 and #4.
18445 bool RealUse = false;
18446
18447 for (SDNode *Use : Ptr->uses()) {
18448 if (Use == N)
18449 continue;
18450 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
18451 return false;
18452
18453 // If Ptr may be folded in addressing mode of other use, then it's
18454 // not profitable to do this transformation.
18455 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
18456 RealUse = true;
18457 }
18458
18459 if (!RealUse)
18460 return false;
18461
18463 if (!IsMasked) {
18464 if (IsLoad)
18465 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18466 else
18467 Result =
18468 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18469 } else {
18470 if (IsLoad)
18471 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18472 Offset, AM);
18473 else
18474 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
18475 Offset, AM);
18476 }
18477 ++PreIndexedNodes;
18478 ++NodesCombined;
18479 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
18480 Result.dump(&DAG); dbgs() << '\n');
18481 WorklistRemover DeadNodes(*this);
18482 if (IsLoad) {
18483 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18484 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18485 } else {
18486 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18487 }
18488
18489 // Finally, since the node is now dead, remove it from the graph.
18490 deleteAndRecombine(N);
18491
18492 if (Swapped)
18493 std::swap(BasePtr, Offset);
18494
18495 // Replace other uses of BasePtr that can be updated to use Ptr
18496 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
18497 unsigned OffsetIdx = 1;
18498 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
18499 OffsetIdx = 0;
18500 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
18501 BasePtr.getNode() && "Expected BasePtr operand");
18502
18503 // We need to replace ptr0 in the following expression:
18504 // x0 * offset0 + y0 * ptr0 = t0
18505 // knowing that
18506 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
18507 //
18508 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
18509 // indexed load/store and the expression that needs to be re-written.
18510 //
18511 // Therefore, we have:
18512 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
18513
18514 auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
18515 const APInt &Offset0 = CN->getAPIntValue();
18516 const APInt &Offset1 = Offset->getAsAPIntVal();
18517 int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
18518 int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
18519 int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
18520 int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
18521
18522 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
18523
18524 APInt CNV = Offset0;
18525 if (X0 < 0) CNV = -CNV;
18526 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
18527 else CNV = CNV - Offset1;
18528
18529 SDLoc DL(OtherUses[i]);
18530
18531 // We can now generate the new expression.
18532 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
18533 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
18534
18535 SDValue NewUse = DAG.getNode(Opcode,
18536 DL,
18537 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
18538 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
18539 deleteAndRecombine(OtherUses[i]);
18540 }
18541
18542 // Replace the uses of Ptr with uses of the updated base value.
18543 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
18544 deleteAndRecombine(Ptr.getNode());
18545 AddToWorklist(Result.getNode());
18546
18547 return true;
18548}
18549
18551 SDValue &BasePtr, SDValue &Offset,
18553 SelectionDAG &DAG,
18554 const TargetLowering &TLI) {
18555 if (PtrUse == N ||
18556 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
18557 return false;
18558
18559 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
18560 return false;
18561
18562 // Don't create a indexed load / store with zero offset.
18564 return false;
18565
18566 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18567 return false;
18568
18570 for (SDNode *Use : BasePtr->uses()) {
18571 if (Use == Ptr.getNode())
18572 continue;
18573
18574 // No if there's a later user which could perform the index instead.
18575 if (isa<MemSDNode>(Use)) {
18576 bool IsLoad = true;
18577 bool IsMasked = false;
18578 SDValue OtherPtr;
18580 IsMasked, OtherPtr, TLI)) {
18582 Worklist.push_back(Use);
18583 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
18584 return false;
18585 }
18586 }
18587
18588 // If all the uses are load / store addresses, then don't do the
18589 // transformation.
18590 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
18591 for (SDNode *UseUse : Use->uses())
18592 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
18593 return false;
18594 }
18595 }
18596 return true;
18597}
18598
18600 bool &IsMasked, SDValue &Ptr,
18601 SDValue &BasePtr, SDValue &Offset,
18603 SelectionDAG &DAG,
18604 const TargetLowering &TLI) {
18606 IsMasked, Ptr, TLI) ||
18607 Ptr->hasOneUse())
18608 return nullptr;
18609
18610 // Try turning it into a post-indexed load / store except when
18611 // 1) All uses are load / store ops that use it as base ptr (and
18612 // it may be folded as addressing mmode).
18613 // 2) Op must be independent of N, i.e. Op is neither a predecessor
18614 // nor a successor of N. Otherwise, if Op is folded that would
18615 // create a cycle.
18616 for (SDNode *Op : Ptr->uses()) {
18617 // Check for #1.
18618 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
18619 continue;
18620
18621 // Check for #2.
18624 constexpr unsigned int MaxSteps = 8192;
18625 // Ptr is predecessor to both N and Op.
18626 Visited.insert(Ptr.getNode());
18627 Worklist.push_back(N);
18628 Worklist.push_back(Op);
18629 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
18630 !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
18631 return Op;
18632 }
18633 return nullptr;
18634}
18635
18636/// Try to combine a load/store with a add/sub of the base pointer node into a
18637/// post-indexed load/store. The transformation folded the add/subtract into the
18638/// new indexed load/store effectively and all of its uses are redirected to the
18639/// new load/store.
18640bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
18641 if (Level < AfterLegalizeDAG)
18642 return false;
18643
18644 bool IsLoad = true;
18645 bool IsMasked = false;
18646 SDValue Ptr;
18650 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
18651 Offset, AM, DAG, TLI);
18652 if (!Op)
18653 return false;
18654
18656 if (!IsMasked)
18657 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18658 Offset, AM)
18659 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
18660 BasePtr, Offset, AM);
18661 else
18662 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
18663 BasePtr, Offset, AM)
18665 BasePtr, Offset, AM);
18666 ++PostIndexedNodes;
18667 ++NodesCombined;
18668 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
18669 Result.dump(&DAG); dbgs() << '\n');
18670 WorklistRemover DeadNodes(*this);
18671 if (IsLoad) {
18672 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18673 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18674 } else {
18675 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18676 }
18677
18678 // Finally, since the node is now dead, remove it from the graph.
18679 deleteAndRecombine(N);
18680
18681 // Replace the uses of Use with uses of the updated base value.
18683 Result.getValue(IsLoad ? 1 : 0));
18684 deleteAndRecombine(Op);
18685 return true;
18686}
18687
18688/// Return the base-pointer arithmetic from an indexed \p LD.
18689SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
18690 ISD::MemIndexedMode AM = LD->getAddressingMode();
18691 assert(AM != ISD::UNINDEXED);
18692 SDValue BP = LD->getOperand(1);
18693 SDValue Inc = LD->getOperand(2);
18694
18695 // Some backends use TargetConstants for load offsets, but don't expect
18696 // TargetConstants in general ADD nodes. We can convert these constants into
18697 // regular Constants (if the constant is not opaque).
18699 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
18700 "Cannot split out indexing using opaque target constants");
18701 if (Inc.getOpcode() == ISD::TargetConstant) {
18702 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
18703 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
18704 ConstInc->getValueType(0));
18705 }
18706
18707 unsigned Opc =
18708 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
18709 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
18710}
18711
18713 return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
18714}
18715
18716bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
18717 EVT STType = Val.getValueType();
18718 EVT STMemType = ST->getMemoryVT();
18719 if (STType == STMemType)
18720 return true;
18721 if (isTypeLegal(STMemType))
18722 return false; // fail.
18723 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
18724 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
18725 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
18726 return true;
18727 }
18728 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
18729 STType.isInteger() && STMemType.isInteger()) {
18730 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
18731 return true;
18732 }
18733 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
18734 Val = DAG.getBitcast(STMemType, Val);
18735 return true;
18736 }
18737 return false; // fail.
18738}
18739
18740bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
18741 EVT LDMemType = LD->getMemoryVT();
18742 EVT LDType = LD->getValueType(0);
18743 assert(Val.getValueType() == LDMemType &&
18744 "Attempting to extend value of non-matching type");
18745 if (LDType == LDMemType)
18746 return true;
18747 if (LDMemType.isInteger() && LDType.isInteger()) {
18748 switch (LD->getExtensionType()) {
18749 case ISD::NON_EXTLOAD:
18750 Val = DAG.getBitcast(LDType, Val);
18751 return true;
18752 case ISD::EXTLOAD:
18753 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
18754 return true;
18755 case ISD::SEXTLOAD:
18756 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
18757 return true;
18758 case ISD::ZEXTLOAD:
18759 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
18760 return true;
18761 }
18762 }
18763 return false;
18764}
18765
18766StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
18767 int64_t &Offset) {
18768 SDValue Chain = LD->getOperand(0);
18769
18770 // Look through CALLSEQ_START.
18771 if (Chain.getOpcode() == ISD::CALLSEQ_START)
18772 Chain = Chain->getOperand(0);
18773
18774 StoreSDNode *ST = nullptr;
18776 if (Chain.getOpcode() == ISD::TokenFactor) {
18777 // Look for unique store within the TokenFactor.
18778 for (SDValue Op : Chain->ops()) {
18779 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
18780 if (!Store)
18781 continue;
18782 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18783 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18784 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18785 continue;
18786 // Make sure the store is not aliased with any nodes in TokenFactor.
18787 GatherAllAliases(Store, Chain, Aliases);
18788 if (Aliases.empty() ||
18789 (Aliases.size() == 1 && Aliases.front().getNode() == Store))
18790 ST = Store;
18791 break;
18792 }
18793 } else {
18794 StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
18795 if (Store) {
18796 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18797 BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18798 if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18799 ST = Store;
18800 }
18801 }
18802
18803 return ST;
18804}
18805
18806SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
18807 if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
18808 return SDValue();
18809 SDValue Chain = LD->getOperand(0);
18810 int64_t Offset;
18811
18812 StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
18813 // TODO: Relax this restriction for unordered atomics (see D66309)
18814 if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
18815 return SDValue();
18816
18817 EVT LDType = LD->getValueType(0);
18818 EVT LDMemType = LD->getMemoryVT();
18819 EVT STMemType = ST->getMemoryVT();
18820 EVT STType = ST->getValue().getValueType();
18821
18822 // There are two cases to consider here:
18823 // 1. The store is fixed width and the load is scalable. In this case we
18824 // don't know at compile time if the store completely envelops the load
18825 // so we abandon the optimisation.
18826 // 2. The store is scalable and the load is fixed width. We could
18827 // potentially support a limited number of cases here, but there has been
18828 // no cost-benefit analysis to prove it's worth it.
18829 bool LdStScalable = LDMemType.isScalableVT();
18830 if (LdStScalable != STMemType.isScalableVT())
18831 return SDValue();
18832
18833 // If we are dealing with scalable vectors on a big endian platform the
18834 // calculation of offsets below becomes trickier, since we do not know at
18835 // compile time the absolute size of the vector. Until we've done more
18836 // analysis on big-endian platforms it seems better to bail out for now.
18837 if (LdStScalable && DAG.getDataLayout().isBigEndian())
18838 return SDValue();
18839
18840 // Normalize for Endianness. After this Offset=0 will denote that the least
18841 // significant bit in the loaded value maps to the least significant bit in
18842 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
18843 // n:th least significant byte of the stored value.
18844 int64_t OrigOffset = Offset;
18845 if (DAG.getDataLayout().isBigEndian())
18846 Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
18847 (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
18848 8 -
18849 Offset;
18850
18851 // Check that the stored value cover all bits that are loaded.
18852 bool STCoversLD;
18853
18854 TypeSize LdMemSize = LDMemType.getSizeInBits();
18855 TypeSize StMemSize = STMemType.getSizeInBits();
18856 if (LdStScalable)
18857 STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
18858 else
18859 STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
18860 StMemSize.getFixedValue());
18861
18862 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
18863 if (LD->isIndexed()) {
18864 // Cannot handle opaque target constants and we must respect the user's
18865 // request not to split indexes from loads.
18866 if (!canSplitIdx(LD))
18867 return SDValue();
18868 SDValue Idx = SplitIndexingFromLoad(LD);
18869 SDValue Ops[] = {Val, Idx, Chain};
18870 return CombineTo(LD, Ops, 3);
18871 }
18872 return CombineTo(LD, Val, Chain);
18873 };
18874
18875 if (!STCoversLD)
18876 return SDValue();
18877
18878 // Memory as copy space (potentially masked).
18879 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
18880 // Simple case: Direct non-truncating forwarding
18881 if (LDType.getSizeInBits() == LdMemSize)
18882 return ReplaceLd(LD, ST->getValue(), Chain);
18883 // Can we model the truncate and extension with an and mask?
18884 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
18885 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
18886 // Mask to size of LDMemType
18887 auto Mask =
18889 StMemSize.getFixedValue()),
18890 SDLoc(ST), STType);
18891 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
18892 return ReplaceLd(LD, Val, Chain);
18893 }
18894 }
18895
18896 // Handle some cases for big-endian that would be Offset 0 and handled for
18897 // little-endian.
18898 SDValue Val = ST->getValue();
18899 if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
18900 if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
18901 !LDType.isVector() && isTypeLegal(STType) &&
18902 TLI.isOperationLegal(ISD::SRL, STType)) {
18903 Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
18904 DAG.getConstant(Offset * 8, SDLoc(LD), STType));
18905 Offset = 0;
18906 }
18907 }
18908
18909 // TODO: Deal with nonzero offset.
18910 if (LD->getBasePtr().isUndef() || Offset != 0)
18911 return SDValue();
18912 // Model necessary truncations / extenstions.
18913 // Truncate Value To Stored Memory Size.
18914 do {
18915 if (!getTruncatedStoreValue(ST, Val))
18916 continue;
18917 if (!isTypeLegal(LDMemType))
18918 continue;
18919 if (STMemType != LDMemType) {
18920 // TODO: Support vectors? This requires extract_subvector/bitcast.
18921 if (!STMemType.isVector() && !LDMemType.isVector() &&
18922 STMemType.isInteger() && LDMemType.isInteger())
18923 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
18924 else
18925 continue;
18926 }
18927 if (!extendLoadedValueToExtension(LD, Val))
18928 continue;
18929 return ReplaceLd(LD, Val, Chain);
18930 } while (false);
18931
18932 // On failure, cleanup dead nodes we may have created.
18933 if (Val->use_empty())
18934 deleteAndRecombine(Val.getNode());
18935 return SDValue();
18936}
18937
18938SDValue DAGCombiner::visitLOAD(SDNode *N) {
18939 LoadSDNode *LD = cast<LoadSDNode>(N);
18940 SDValue Chain = LD->getChain();
18941 SDValue Ptr = LD->getBasePtr();
18942
18943 // If load is not volatile and there are no uses of the loaded value (and
18944 // the updated indexed value in case of indexed loads), change uses of the
18945 // chain value into uses of the chain input (i.e. delete the dead load).
18946 // TODO: Allow this for unordered atomics (see D66309)
18947 if (LD->isSimple()) {
18948 if (N->getValueType(1) == MVT::Other) {
18949 // Unindexed loads.
18950 if (!N->hasAnyUseOfValue(0)) {
18951 // It's not safe to use the two value CombineTo variant here. e.g.
18952 // v1, chain2 = load chain1, loc
18953 // v2, chain3 = load chain2, loc
18954 // v3 = add v2, c
18955 // Now we replace use of chain2 with chain1. This makes the second load
18956 // isomorphic to the one we are deleting, and thus makes this load live.
18957 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
18958 dbgs() << "\nWith chain: "; Chain.dump(&DAG);
18959 dbgs() << "\n");
18960 WorklistRemover DeadNodes(*this);
18961 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
18962 AddUsersToWorklist(Chain.getNode());
18963 if (N->use_empty())
18964 deleteAndRecombine(N);
18965
18966 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18967 }
18968 } else {
18969 // Indexed loads.
18970 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
18971
18972 // If this load has an opaque TargetConstant offset, then we cannot split
18973 // the indexing into an add/sub directly (that TargetConstant may not be
18974 // valid for a different type of node, and we cannot convert an opaque
18975 // target constant into a regular constant).
18976 bool CanSplitIdx = canSplitIdx(LD);
18977
18978 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
18979 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
18980 SDValue Index;
18981 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
18982 Index = SplitIndexingFromLoad(LD);
18983 // Try to fold the base pointer arithmetic into subsequent loads and
18984 // stores.
18985 AddUsersToWorklist(N);
18986 } else
18987 Index = DAG.getUNDEF(N->getValueType(1));
18988 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
18989 dbgs() << "\nWith: "; Undef.dump(&DAG);
18990 dbgs() << " and 2 other values\n");
18991 WorklistRemover DeadNodes(*this);
18992 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
18994 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
18995 deleteAndRecombine(N);
18996 return SDValue(N, 0); // Return N so it doesn't get rechecked!
18997 }
18998 }
18999 }
19000
19001 // If this load is directly stored, replace the load value with the stored
19002 // value.
19003 if (auto V = ForwardStoreValueToDirectLoad(LD))
19004 return V;
19005
19006 // Try to infer better alignment information than the load already has.
19007 if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
19008 !LD->isAtomic()) {
19009 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
19010 if (*Alignment > LD->getAlign() &&
19011 isAligned(*Alignment, LD->getSrcValueOffset())) {
19012 SDValue NewLoad = DAG.getExtLoad(
19013 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
19014 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
19015 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19016 // NewLoad will always be N as we are only refining the alignment
19017 assert(NewLoad.getNode() == N);
19018 (void)NewLoad;
19019 }
19020 }
19021 }
19022
19023 if (LD->isUnindexed()) {
19024 // Walk up chain skipping non-aliasing memory nodes.
19025 SDValue BetterChain = FindBetterChain(LD, Chain);
19026
19027 // If there is a better chain.
19028 if (Chain != BetterChain) {
19029 SDValue ReplLoad;
19030
19031 // Replace the chain to void dependency.
19032 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
19033 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
19034 BetterChain, Ptr, LD->getMemOperand());
19035 } else {
19036 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
19037 LD->getValueType(0),
19038 BetterChain, Ptr, LD->getMemoryVT(),
19039 LD->getMemOperand());
19040 }
19041
19042 // Create token factor to keep old chain connected.
19043 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
19044 MVT::Other, Chain, ReplLoad.getValue(1));
19045
19046 // Replace uses with load result and token factor
19047 return CombineTo(N, ReplLoad.getValue(0), Token);
19048 }
19049 }
19050
19051 // Try transforming N to an indexed load.
19052 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
19053 return SDValue(N, 0);
19054
19055 // Try to slice up N to more direct loads if the slices are mapped to
19056 // different register banks or pairing can take place.
19057 if (SliceUpLoad(N))
19058 return SDValue(N, 0);
19059
19060 return SDValue();
19061}
19062
19063namespace {
19064
19065/// Helper structure used to slice a load in smaller loads.
19066/// Basically a slice is obtained from the following sequence:
19067/// Origin = load Ty1, Base
19068/// Shift = srl Ty1 Origin, CstTy Amount
19069/// Inst = trunc Shift to Ty2
19070///
19071/// Then, it will be rewritten into:
19072/// Slice = load SliceTy, Base + SliceOffset
19073/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
19074///
19075/// SliceTy is deduced from the number of bits that are actually used to
19076/// build Inst.
19077struct LoadedSlice {
19078 /// Helper structure used to compute the cost of a slice.
19079 struct Cost {
19080 /// Are we optimizing for code size.
19081 bool ForCodeSize = false;
19082
19083 /// Various cost.
19084 unsigned Loads = 0;
19085 unsigned Truncates = 0;
19086 unsigned CrossRegisterBanksCopies = 0;
19087 unsigned ZExts = 0;
19088 unsigned Shift = 0;
19089
19090 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
19091
19092 /// Get the cost of one isolated slice.
19093 Cost(const LoadedSlice &LS, bool ForCodeSize)
19094 : ForCodeSize(ForCodeSize), Loads(1) {
19095 EVT TruncType = LS.Inst->getValueType(0);
19096 EVT LoadedType = LS.getLoadedType();
19097 if (TruncType != LoadedType &&
19098 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
19099 ZExts = 1;
19100 }
19101
19102 /// Account for slicing gain in the current cost.
19103 /// Slicing provide a few gains like removing a shift or a
19104 /// truncate. This method allows to grow the cost of the original
19105 /// load with the gain from this slice.
19106 void addSliceGain(const LoadedSlice &LS) {
19107 // Each slice saves a truncate.
19108 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
19109 if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
19110 ++Truncates;
19111 // If there is a shift amount, this slice gets rid of it.
19112 if (LS.Shift)
19113 ++Shift;
19114 // If this slice can merge a cross register bank copy, account for it.
19115 if (LS.canMergeExpensiveCrossRegisterBankCopy())
19116 ++CrossRegisterBanksCopies;
19117 }
19118
19119 Cost &operator+=(const Cost &RHS) {
19120 Loads += RHS.Loads;
19121 Truncates += RHS.Truncates;
19122 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
19123 ZExts += RHS.ZExts;
19124 Shift += RHS.Shift;
19125 return *this;
19126 }
19127
19128 bool operator==(const Cost &RHS) const {
19129 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
19130 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
19131 ZExts == RHS.ZExts && Shift == RHS.Shift;
19132 }
19133
19134 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
19135
19136 bool operator<(const Cost &RHS) const {
19137 // Assume cross register banks copies are as expensive as loads.
19138 // FIXME: Do we want some more target hooks?
19139 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
19140 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
19141 // Unless we are optimizing for code size, consider the
19142 // expensive operation first.
19143 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
19144 return ExpensiveOpsLHS < ExpensiveOpsRHS;
19145 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
19146 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
19147 }
19148
19149 bool operator>(const Cost &RHS) const { return RHS < *this; }
19150
19151 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
19152
19153 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
19154 };
19155
19156 // The last instruction that represent the slice. This should be a
19157 // truncate instruction.
19158 SDNode *Inst;
19159
19160 // The original load instruction.
19161 LoadSDNode *Origin;
19162
19163 // The right shift amount in bits from the original load.
19164 unsigned Shift;
19165
19166 // The DAG from which Origin came from.
19167 // This is used to get some contextual information about legal types, etc.
19168 SelectionDAG *DAG;
19169
19170 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
19171 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
19172 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
19173
19174 /// Get the bits used in a chunk of bits \p BitWidth large.
19175 /// \return Result is \p BitWidth and has used bits set to 1 and
19176 /// not used bits set to 0.
19177 APInt getUsedBits() const {
19178 // Reproduce the trunc(lshr) sequence:
19179 // - Start from the truncated value.
19180 // - Zero extend to the desired bit width.
19181 // - Shift left.
19182 assert(Origin && "No original load to compare against.");
19183 unsigned BitWidth = Origin->getValueSizeInBits(0);
19184 assert(Inst && "This slice is not bound to an instruction");
19185 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
19186 "Extracted slice is bigger than the whole type!");
19187 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
19188 UsedBits.setAllBits();
19189 UsedBits = UsedBits.zext(BitWidth);
19190 UsedBits <<= Shift;
19191 return UsedBits;
19192 }
19193
19194 /// Get the size of the slice to be loaded in bytes.
19195 unsigned getLoadedSize() const {
19196 unsigned SliceSize = getUsedBits().popcount();
19197 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
19198 return SliceSize / 8;
19199 }
19200
19201 /// Get the type that will be loaded for this slice.
19202 /// Note: This may not be the final type for the slice.
19203 EVT getLoadedType() const {
19204 assert(DAG && "Missing context");
19205 LLVMContext &Ctxt = *DAG->getContext();
19206 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
19207 }
19208
19209 /// Get the alignment of the load used for this slice.
19210 Align getAlign() const {
19211 Align Alignment = Origin->getAlign();
19212 uint64_t Offset = getOffsetFromBase();
19213 if (Offset != 0)
19214 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
19215 return Alignment;
19216 }
19217
19218 /// Check if this slice can be rewritten with legal operations.
19219 bool isLegal() const {
19220 // An invalid slice is not legal.
19221 if (!Origin || !Inst || !DAG)
19222 return false;
19223
19224 // Offsets are for indexed load only, we do not handle that.
19225 if (!Origin->getOffset().isUndef())
19226 return false;
19227
19228 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19229
19230 // Check that the type is legal.
19231 EVT SliceType = getLoadedType();
19232 if (!TLI.isTypeLegal(SliceType))
19233 return false;
19234
19235 // Check that the load is legal for this type.
19236 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19237 return false;
19238
19239 // Check that the offset can be computed.
19240 // 1. Check its type.
19241 EVT PtrType = Origin->getBasePtr().getValueType();
19242 if (PtrType == MVT::Untyped || PtrType.isExtended())
19243 return false;
19244
19245 // 2. Check that it fits in the immediate.
19246 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19247 return false;
19248
19249 // 3. Check that the computation is legal.
19250 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19251 return false;
19252
19253 // Check that the zext is legal if it needs one.
19254 EVT TruncateType = Inst->getValueType(0);
19255 if (TruncateType != SliceType &&
19256 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19257 return false;
19258
19259 return true;
19260 }
19261
19262 /// Get the offset in bytes of this slice in the original chunk of
19263 /// bits.
19264 /// \pre DAG != nullptr.
19265 uint64_t getOffsetFromBase() const {
19266 assert(DAG && "Missing context.");
19267 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19268 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19269 uint64_t Offset = Shift / 8;
19270 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19271 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19272 "The size of the original loaded type is not a multiple of a"
19273 " byte.");
19274 // If Offset is bigger than TySizeInBytes, it means we are loading all
19275 // zeros. This should have been optimized before in the process.
19276 assert(TySizeInBytes > Offset &&
19277 "Invalid shift amount for given loaded size");
19278 if (IsBigEndian)
19279 Offset = TySizeInBytes - Offset - getLoadedSize();
19280 return Offset;
19281 }
19282
19283 /// Generate the sequence of instructions to load the slice
19284 /// represented by this object and redirect the uses of this slice to
19285 /// this new sequence of instructions.
19286 /// \pre this->Inst && this->Origin are valid Instructions and this
19287 /// object passed the legal check: LoadedSlice::isLegal returned true.
19288 /// \return The last instruction of the sequence used to load the slice.
19289 SDValue loadSlice() const {
19290 assert(Inst && Origin && "Unable to replace a non-existing slice.");
19291 const SDValue &OldBaseAddr = Origin->getBasePtr();
19292 SDValue BaseAddr = OldBaseAddr;
19293 // Get the offset in that chunk of bytes w.r.t. the endianness.
19294 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19295 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19296 if (Offset) {
19297 // BaseAddr = BaseAddr + Offset.
19298 EVT ArithType = BaseAddr.getValueType();
19299 SDLoc DL(Origin);
19300 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19301 DAG->getConstant(Offset, DL, ArithType));
19302 }
19303
19304 // Create the type of the loaded slice according to its size.
19305 EVT SliceType = getLoadedType();
19306
19307 // Create the load for the slice.
19308 SDValue LastInst =
19309 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19311 Origin->getMemOperand()->getFlags());
19312 // If the final type is not the same as the loaded type, this means that
19313 // we have to pad with zero. Create a zero extend for that.
19314 EVT FinalType = Inst->getValueType(0);
19315 if (SliceType != FinalType)
19316 LastInst =
19317 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19318 return LastInst;
19319 }
19320
19321 /// Check if this slice can be merged with an expensive cross register
19322 /// bank copy. E.g.,
19323 /// i = load i32
19324 /// f = bitcast i32 i to float
19325 bool canMergeExpensiveCrossRegisterBankCopy() const {
19326 if (!Inst || !Inst->hasOneUse())
19327 return false;
19328 SDNode *Use = *Inst->use_begin();
19329 if (Use->getOpcode() != ISD::BITCAST)
19330 return false;
19331 assert(DAG && "Missing context");
19332 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19333 EVT ResVT = Use->getValueType(0);
19334 const TargetRegisterClass *ResRC =
19335 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
19336 const TargetRegisterClass *ArgRC =
19337 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
19338 Use->getOperand(0)->isDivergent());
19339 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19340 return false;
19341
19342 // At this point, we know that we perform a cross-register-bank copy.
19343 // Check if it is expensive.
19345 // Assume bitcasts are cheap, unless both register classes do not
19346 // explicitly share a common sub class.
19347 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19348 return false;
19349
19350 // Check if it will be merged with the load.
19351 // 1. Check the alignment / fast memory access constraint.
19352 unsigned IsFast = 0;
19353 if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19354 Origin->getAddressSpace(), getAlign(),
19355 Origin->getMemOperand()->getFlags(), &IsFast) ||
19356 !IsFast)
19357 return false;
19358
19359 // 2. Check that the load is a legal operation for that type.
19360 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19361 return false;
19362
19363 // 3. Check that we do not have a zext in the way.
19364 if (Inst->getValueType(0) != getLoadedType())
19365 return false;
19366
19367 return true;
19368 }
19369};
19370
19371} // end anonymous namespace
19372
19373/// Check that all bits set in \p UsedBits form a dense region, i.e.,
19374/// \p UsedBits looks like 0..0 1..1 0..0.
19375static bool areUsedBitsDense(const APInt &UsedBits) {
19376 // If all the bits are one, this is dense!
19377 if (UsedBits.isAllOnes())
19378 return true;
19379
19380 // Get rid of the unused bits on the right.
19381 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19382 // Get rid of the unused bits on the left.
19383 if (NarrowedUsedBits.countl_zero())
19384 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19385 // Check that the chunk of bits is completely used.
19386 return NarrowedUsedBits.isAllOnes();
19387}
19388
19389/// Check whether or not \p First and \p Second are next to each other
19390/// in memory. This means that there is no hole between the bits loaded
19391/// by \p First and the bits loaded by \p Second.
19392static bool areSlicesNextToEachOther(const LoadedSlice &First,
19393 const LoadedSlice &Second) {
19394 assert(First.Origin == Second.Origin && First.Origin &&
19395 "Unable to match different memory origins.");
19396 APInt UsedBits = First.getUsedBits();
19397 assert((UsedBits & Second.getUsedBits()) == 0 &&
19398 "Slices are not supposed to overlap.");
19399 UsedBits |= Second.getUsedBits();
19400 return areUsedBitsDense(UsedBits);
19401}
19402
19403/// Adjust the \p GlobalLSCost according to the target
19404/// paring capabilities and the layout of the slices.
19405/// \pre \p GlobalLSCost should account for at least as many loads as
19406/// there is in the slices in \p LoadedSlices.
19408 LoadedSlice::Cost &GlobalLSCost) {
19409 unsigned NumberOfSlices = LoadedSlices.size();
19410 // If there is less than 2 elements, no pairing is possible.
19411 if (NumberOfSlices < 2)
19412 return;
19413
19414 // Sort the slices so that elements that are likely to be next to each
19415 // other in memory are next to each other in the list.
19416 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19417 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19418 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19419 });
19420 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19421 // First (resp. Second) is the first (resp. Second) potentially candidate
19422 // to be placed in a paired load.
19423 const LoadedSlice *First = nullptr;
19424 const LoadedSlice *Second = nullptr;
19425 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19426 // Set the beginning of the pair.
19427 First = Second) {
19428 Second = &LoadedSlices[CurrSlice];
19429
19430 // If First is NULL, it means we start a new pair.
19431 // Get to the next slice.
19432 if (!First)
19433 continue;
19434
19435 EVT LoadedType = First->getLoadedType();
19436
19437 // If the types of the slices are different, we cannot pair them.
19438 if (LoadedType != Second->getLoadedType())
19439 continue;
19440
19441 // Check if the target supplies paired loads for this type.
19442 Align RequiredAlignment;
19443 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19444 // move to the next pair, this type is hopeless.
19445 Second = nullptr;
19446 continue;
19447 }
19448 // Check if we meet the alignment requirement.
19449 if (First->getAlign() < RequiredAlignment)
19450 continue;
19451
19452 // Check that both loads are next to each other in memory.
19453 if (!areSlicesNextToEachOther(*First, *Second))
19454 continue;
19455
19456 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19457 --GlobalLSCost.Loads;
19458 // Move to the next pair.
19459 Second = nullptr;
19460 }
19461}
19462
19463/// Check the profitability of all involved LoadedSlice.
19464/// Currently, it is considered profitable if there is exactly two
19465/// involved slices (1) which are (2) next to each other in memory, and
19466/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
19467///
19468/// Note: The order of the elements in \p LoadedSlices may be modified, but not
19469/// the elements themselves.
19470///
19471/// FIXME: When the cost model will be mature enough, we can relax
19472/// constraints (1) and (2).
19474 const APInt &UsedBits, bool ForCodeSize) {
19475 unsigned NumberOfSlices = LoadedSlices.size();
19477 return NumberOfSlices > 1;
19478
19479 // Check (1).
19480 if (NumberOfSlices != 2)
19481 return false;
19482
19483 // Check (2).
19484 if (!areUsedBitsDense(UsedBits))
19485 return false;
19486
19487 // Check (3).
19488 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
19489 // The original code has one big load.
19490 OrigCost.Loads = 1;
19491 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
19492 const LoadedSlice &LS = LoadedSlices[CurrSlice];
19493 // Accumulate the cost of all the slices.
19494 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
19495 GlobalSlicingCost += SliceCost;
19496
19497 // Account as cost in the original configuration the gain obtained
19498 // with the current slices.
19499 OrigCost.addSliceGain(LS);
19500 }
19501
19502 // If the target supports paired load, adjust the cost accordingly.
19503 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
19504 return OrigCost > GlobalSlicingCost;
19505}
19506
19507/// If the given load, \p LI, is used only by trunc or trunc(lshr)
19508/// operations, split it in the various pieces being extracted.
19509///
19510/// This sort of thing is introduced by SROA.
19511/// This slicing takes care not to insert overlapping loads.
19512/// \pre LI is a simple load (i.e., not an atomic or volatile load).
19513bool DAGCombiner::SliceUpLoad(SDNode *N) {
19514 if (Level < AfterLegalizeDAG)
19515 return false;
19516
19517 LoadSDNode *LD = cast<LoadSDNode>(N);
19518 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
19519 !LD->getValueType(0).isInteger())
19520 return false;
19521
19522 // The algorithm to split up a load of a scalable vector into individual
19523 // elements currently requires knowing the length of the loaded type,
19524 // so will need adjusting to work on scalable vectors.
19525 if (LD->getValueType(0).isScalableVector())
19526 return false;
19527
19528 // Keep track of already used bits to detect overlapping values.
19529 // In that case, we will just abort the transformation.
19530 APInt UsedBits(LD->getValueSizeInBits(0), 0);
19531
19532 SmallVector<LoadedSlice, 4> LoadedSlices;
19533
19534 // Check if this load is used as several smaller chunks of bits.
19535 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
19536 // of computation for each trunc.
19537 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
19538 UI != UIEnd; ++UI) {
19539 // Skip the uses of the chain.
19540 if (UI.getUse().getResNo() != 0)
19541 continue;
19542
19543 SDNode *User = *UI;
19544 unsigned Shift = 0;
19545
19546 // Check if this is a trunc(lshr).
19547 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
19548 isa<ConstantSDNode>(User->getOperand(1))) {
19549 Shift = User->getConstantOperandVal(1);
19550 User = *User->use_begin();
19551 }
19552
19553 // At this point, User is a Truncate, iff we encountered, trunc or
19554 // trunc(lshr).
19555 if (User->getOpcode() != ISD::TRUNCATE)
19556 return false;
19557
19558 // The width of the type must be a power of 2 and greater than 8-bits.
19559 // Otherwise the load cannot be represented in LLVM IR.
19560 // Moreover, if we shifted with a non-8-bits multiple, the slice
19561 // will be across several bytes. We do not support that.
19562 unsigned Width = User->getValueSizeInBits(0);
19563 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
19564 return false;
19565
19566 // Build the slice for this chain of computations.
19567 LoadedSlice LS(User, LD, Shift, &DAG);
19568 APInt CurrentUsedBits = LS.getUsedBits();
19569
19570 // Check if this slice overlaps with another.
19571 if ((CurrentUsedBits & UsedBits) != 0)
19572 return false;
19573 // Update the bits used globally.
19574 UsedBits |= CurrentUsedBits;
19575
19576 // Check if the new slice would be legal.
19577 if (!LS.isLegal())
19578 return false;
19579
19580 // Record the slice.
19581 LoadedSlices.push_back(LS);
19582 }
19583
19584 // Abort slicing if it does not seem to be profitable.
19585 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
19586 return false;
19587
19588 ++SlicedLoads;
19589
19590 // Rewrite each chain to use an independent load.
19591 // By construction, each chain can be represented by a unique load.
19592
19593 // Prepare the argument for the new token factor for all the slices.
19594 SmallVector<SDValue, 8> ArgChains;
19595 for (const LoadedSlice &LS : LoadedSlices) {
19596 SDValue SliceInst = LS.loadSlice();
19597 CombineTo(LS.Inst, SliceInst, true);
19598 if (SliceInst.getOpcode() != ISD::LOAD)
19599 SliceInst = SliceInst.getOperand(0);
19600 assert(SliceInst->getOpcode() == ISD::LOAD &&
19601 "It takes more than a zext to get to the loaded slice!!");
19602 ArgChains.push_back(SliceInst.getValue(1));
19603 }
19604
19605 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
19606 ArgChains);
19607 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19608 AddToWorklist(Chain.getNode());
19609 return true;
19610}
19611
19612/// Check to see if V is (and load (ptr), imm), where the load is having
19613/// specific bytes cleared out. If so, return the byte size being masked out
19614/// and the shift amount.
19615static std::pair<unsigned, unsigned>
19617 std::pair<unsigned, unsigned> Result(0, 0);
19618
19619 // Check for the structure we're looking for.
19620 if (V->getOpcode() != ISD::AND ||
19621 !isa<ConstantSDNode>(V->getOperand(1)) ||
19622 !ISD::isNormalLoad(V->getOperand(0).getNode()))
19623 return Result;
19624
19625 // Check the chain and pointer.
19626 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
19627 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
19628
19629 // This only handles simple types.
19630 if (V.getValueType() != MVT::i16 &&
19631 V.getValueType() != MVT::i32 &&
19632 V.getValueType() != MVT::i64)
19633 return Result;
19634
19635 // Check the constant mask. Invert it so that the bits being masked out are
19636 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
19637 // follow the sign bit for uniformity.
19638 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
19639 unsigned NotMaskLZ = llvm::countl_zero(NotMask);
19640 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
19641 unsigned NotMaskTZ = llvm::countr_zero(NotMask);
19642 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
19643 if (NotMaskLZ == 64) return Result; // All zero mask.
19644
19645 // See if we have a continuous run of bits. If so, we have 0*1+0*
19646 if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
19647 return Result;
19648
19649 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
19650 if (V.getValueType() != MVT::i64 && NotMaskLZ)
19651 NotMaskLZ -= 64-V.getValueSizeInBits();
19652
19653 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
19654 switch (MaskedBytes) {
19655 case 1:
19656 case 2:
19657 case 4: break;
19658 default: return Result; // All one mask, or 5-byte mask.
19659 }
19660
19661 // Verify that the first bit starts at a multiple of mask so that the access
19662 // is aligned the same as the access width.
19663 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
19664
19665 // For narrowing to be valid, it must be the case that the load the
19666 // immediately preceding memory operation before the store.
19667 if (LD == Chain.getNode())
19668 ; // ok.
19669 else if (Chain->getOpcode() == ISD::TokenFactor &&
19670 SDValue(LD, 1).hasOneUse()) {
19671 // LD has only 1 chain use so they are no indirect dependencies.
19672 if (!LD->isOperandOf(Chain.getNode()))
19673 return Result;
19674 } else
19675 return Result; // Fail.
19676
19677 Result.first = MaskedBytes;
19678 Result.second = NotMaskTZ/8;
19679 return Result;
19680}
19681
19682/// Check to see if IVal is something that provides a value as specified by
19683/// MaskInfo. If so, replace the specified store with a narrower store of
19684/// truncated IVal.
19685static SDValue
19686ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
19687 SDValue IVal, StoreSDNode *St,
19688 DAGCombiner *DC) {
19689 unsigned NumBytes = MaskInfo.first;
19690 unsigned ByteShift = MaskInfo.second;
19691 SelectionDAG &DAG = DC->getDAG();
19692
19693 // Check to see if IVal is all zeros in the part being masked in by the 'or'
19694 // that uses this. If not, this is not a replacement.
19695 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
19696 ByteShift*8, (ByteShift+NumBytes)*8);
19697 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
19698
19699 // Check that it is legal on the target to do this. It is legal if the new
19700 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
19701 // legalization. If the source type is legal, but the store type isn't, see
19702 // if we can use a truncating store.
19703 MVT VT = MVT::getIntegerVT(NumBytes * 8);
19704 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19705 bool UseTruncStore;
19706 if (DC->isTypeLegal(VT))
19707 UseTruncStore = false;
19708 else if (TLI.isTypeLegal(IVal.getValueType()) &&
19709 TLI.isTruncStoreLegal(IVal.getValueType(), VT))
19710 UseTruncStore = true;
19711 else
19712 return SDValue();
19713
19714 // Can't do this for indexed stores.
19715 if (St->isIndexed())
19716 return SDValue();
19717
19718 // Check that the target doesn't think this is a bad idea.
19719 if (St->getMemOperand() &&
19720 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
19721 *St->getMemOperand()))
19722 return SDValue();
19723
19724 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
19725 // shifted by ByteShift and truncated down to NumBytes.
19726 if (ByteShift) {
19727 SDLoc DL(IVal);
19728 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
19729 DAG.getConstant(ByteShift*8, DL,
19730 DC->getShiftAmountTy(IVal.getValueType())));
19731 }
19732
19733 // Figure out the offset for the store and the alignment of the access.
19734 unsigned StOffset;
19735 if (DAG.getDataLayout().isLittleEndian())
19736 StOffset = ByteShift;
19737 else
19738 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
19739
19740 SDValue Ptr = St->getBasePtr();
19741 if (StOffset) {
19742 SDLoc DL(IVal);
19744 }
19745
19746 ++OpsNarrowed;
19747 if (UseTruncStore)
19748 return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
19749 St->getPointerInfo().getWithOffset(StOffset),
19750 VT, St->getOriginalAlign());
19751
19752 // Truncate down to the new size.
19753 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
19754
19755 return DAG
19756 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
19757 St->getPointerInfo().getWithOffset(StOffset),
19758 St->getOriginalAlign());
19759}
19760
19761/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
19762/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
19763/// narrowing the load and store if it would end up being a win for performance
19764/// or code size.
19765SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
19766 StoreSDNode *ST = cast<StoreSDNode>(N);
19767 if (!ST->isSimple())
19768 return SDValue();
19769
19770 SDValue Chain = ST->getChain();
19771 SDValue Value = ST->getValue();
19772 SDValue Ptr = ST->getBasePtr();
19773 EVT VT = Value.getValueType();
19774
19775 if (ST->isTruncatingStore() || VT.isVector())
19776 return SDValue();
19777
19778 unsigned Opc = Value.getOpcode();
19779
19780 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
19781 !Value.hasOneUse())
19782 return SDValue();
19783
19784 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
19785 // is a byte mask indicating a consecutive number of bytes, check to see if
19786 // Y is known to provide just those bytes. If so, we try to replace the
19787 // load + replace + store sequence with a single (narrower) store, which makes
19788 // the load dead.
19790 std::pair<unsigned, unsigned> MaskedLoad;
19791 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
19792 if (MaskedLoad.first)
19793 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19794 Value.getOperand(1), ST,this))
19795 return NewST;
19796
19797 // Or is commutative, so try swapping X and Y.
19798 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
19799 if (MaskedLoad.first)
19800 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19801 Value.getOperand(0), ST,this))
19802 return NewST;
19803 }
19804
19806 return SDValue();
19807
19808 if (Value.getOperand(1).getOpcode() != ISD::Constant)
19809 return SDValue();
19810
19811 SDValue N0 = Value.getOperand(0);
19812 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19813 Chain == SDValue(N0.getNode(), 1)) {
19814 LoadSDNode *LD = cast<LoadSDNode>(N0);
19815 if (LD->getBasePtr() != Ptr ||
19816 LD->getPointerInfo().getAddrSpace() !=
19817 ST->getPointerInfo().getAddrSpace())
19818 return SDValue();
19819
19820 // Find the type to narrow it the load / op / store to.
19821 SDValue N1 = Value.getOperand(1);
19822 unsigned BitWidth = N1.getValueSizeInBits();
19823 APInt Imm = N1->getAsAPIntVal();
19824 if (Opc == ISD::AND)
19826 if (Imm == 0 || Imm.isAllOnes())
19827 return SDValue();
19828 unsigned ShAmt = Imm.countr_zero();
19829 unsigned MSB = BitWidth - Imm.countl_zero() - 1;
19830 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
19831 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19832 // The narrowing should be profitable, the load/store operation should be
19833 // legal (or custom) and the store size should be equal to the NewVT width.
19834 while (NewBW < BitWidth &&
19835 (NewVT.getStoreSizeInBits() != NewBW ||
19836 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
19837 !TLI.isNarrowingProfitable(VT, NewVT))) {
19838 NewBW = NextPowerOf2(NewBW);
19839 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19840 }
19841 if (NewBW >= BitWidth)
19842 return SDValue();
19843
19844 // If the lsb changed does not start at the type bitwidth boundary,
19845 // start at the previous one.
19846 if (ShAmt % NewBW)
19847 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
19849 std::min(BitWidth, ShAmt + NewBW));
19850 if ((Imm & Mask) == Imm) {
19851 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
19852 if (Opc == ISD::AND)
19853 NewImm ^= APInt::getAllOnes(NewBW);
19854 uint64_t PtrOff = ShAmt / 8;
19855 // For big endian targets, we need to adjust the offset to the pointer to
19856 // load the correct bytes.
19857 if (DAG.getDataLayout().isBigEndian())
19858 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
19859
19860 unsigned IsFast = 0;
19861 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
19862 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
19863 LD->getAddressSpace(), NewAlign,
19864 LD->getMemOperand()->getFlags(), &IsFast) ||
19865 !IsFast)
19866 return SDValue();
19867
19868 SDValue NewPtr =
19870 SDValue NewLD =
19871 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
19872 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
19873 LD->getMemOperand()->getFlags(), LD->getAAInfo());
19874 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
19875 DAG.getConstant(NewImm, SDLoc(Value),
19876 NewVT));
19877 SDValue NewST =
19878 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
19879 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
19880
19881 AddToWorklist(NewPtr.getNode());
19882 AddToWorklist(NewLD.getNode());
19883 AddToWorklist(NewVal.getNode());
19884 WorklistRemover DeadNodes(*this);
19885 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
19886 ++OpsNarrowed;
19887 return NewST;
19888 }
19889 }
19890
19891 return SDValue();
19892}
19893
19894/// For a given floating point load / store pair, if the load value isn't used
19895/// by any other operations, then consider transforming the pair to integer
19896/// load / store operations if the target deems the transformation profitable.
19897SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
19898 StoreSDNode *ST = cast<StoreSDNode>(N);
19899 SDValue Value = ST->getValue();
19900 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
19901 Value.hasOneUse()) {
19902 LoadSDNode *LD = cast<LoadSDNode>(Value);
19903 EVT VT = LD->getMemoryVT();
19904 if (!VT.isFloatingPoint() ||
19905 VT != ST->getMemoryVT() ||
19906 LD->isNonTemporal() ||
19907 ST->isNonTemporal() ||
19908 LD->getPointerInfo().getAddrSpace() != 0 ||
19909 ST->getPointerInfo().getAddrSpace() != 0)
19910 return SDValue();
19911
19912 TypeSize VTSize = VT.getSizeInBits();
19913
19914 // We don't know the size of scalable types at compile time so we cannot
19915 // create an integer of the equivalent size.
19916 if (VTSize.isScalable())
19917 return SDValue();
19918
19919 unsigned FastLD = 0, FastST = 0;
19920 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
19921 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
19922 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
19925 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19926 *LD->getMemOperand(), &FastLD) ||
19927 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19928 *ST->getMemOperand(), &FastST) ||
19929 !FastLD || !FastST)
19930 return SDValue();
19931
19932 SDValue NewLD =
19933 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
19934 LD->getPointerInfo(), LD->getAlign());
19935
19936 SDValue NewST =
19937 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
19938 ST->getPointerInfo(), ST->getAlign());
19939
19940 AddToWorklist(NewLD.getNode());
19941 AddToWorklist(NewST.getNode());
19942 WorklistRemover DeadNodes(*this);
19943 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
19944 ++LdStFP2Int;
19945 return NewST;
19946 }
19947
19948 return SDValue();
19949}
19950
19951// This is a helper function for visitMUL to check the profitability
19952// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
19953// MulNode is the original multiply, AddNode is (add x, c1),
19954// and ConstNode is c2.
19955//
19956// If the (add x, c1) has multiple uses, we could increase
19957// the number of adds if we make this transformation.
19958// It would only be worth doing this if we can remove a
19959// multiply in the process. Check for that here.
19960// To illustrate:
19961// (A + c1) * c3
19962// (A + c2) * c3
19963// We're checking for cases where we have common "c3 * A" expressions.
19964bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
19965 SDValue ConstNode) {
19966 APInt Val;
19967
19968 // If the add only has one use, and the target thinks the folding is
19969 // profitable or does not lead to worse code, this would be OK to do.
19970 if (AddNode->hasOneUse() &&
19971 TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
19972 return true;
19973
19974 // Walk all the users of the constant with which we're multiplying.
19975 for (SDNode *Use : ConstNode->uses()) {
19976 if (Use == MulNode) // This use is the one we're on right now. Skip it.
19977 continue;
19978
19979 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
19980 SDNode *OtherOp;
19981 SDNode *MulVar = AddNode.getOperand(0).getNode();
19982
19983 // OtherOp is what we're multiplying against the constant.
19984 if (Use->getOperand(0) == ConstNode)
19985 OtherOp = Use->getOperand(1).getNode();
19986 else
19987 OtherOp = Use->getOperand(0).getNode();
19988
19989 // Check to see if multiply is with the same operand of our "add".
19990 //
19991 // ConstNode = CONST
19992 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
19993 // ...
19994 // AddNode = (A + c1) <-- MulVar is A.
19995 // = AddNode * ConstNode <-- current visiting instruction.
19996 //
19997 // If we make this transformation, we will have a common
19998 // multiply (ConstNode * A) that we can save.
19999 if (OtherOp == MulVar)
20000 return true;
20001
20002 // Now check to see if a future expansion will give us a common
20003 // multiply.
20004 //
20005 // ConstNode = CONST
20006 // AddNode = (A + c1)
20007 // ... = AddNode * ConstNode <-- current visiting instruction.
20008 // ...
20009 // OtherOp = (A + c2)
20010 // Use = OtherOp * ConstNode <-- visiting Use.
20011 //
20012 // If we make this transformation, we will have a common
20013 // multiply (CONST * A) after we also do the same transformation
20014 // to the "t2" instruction.
20015 if (OtherOp->getOpcode() == ISD::ADD &&
20017 OtherOp->getOperand(0).getNode() == MulVar)
20018 return true;
20019 }
20020 }
20021
20022 // Didn't find a case where this would be profitable.
20023 return false;
20024}
20025
20026SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
20027 unsigned NumStores) {
20030 SDLoc StoreDL(StoreNodes[0].MemNode);
20031
20032 for (unsigned i = 0; i < NumStores; ++i) {
20033 Visited.insert(StoreNodes[i].MemNode);
20034 }
20035
20036 // don't include nodes that are children or repeated nodes.
20037 for (unsigned i = 0; i < NumStores; ++i) {
20038 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
20039 Chains.push_back(StoreNodes[i].MemNode->getChain());
20040 }
20041
20042 assert(!Chains.empty() && "Chain should have generated a chain");
20043 return DAG.getTokenFactor(StoreDL, Chains);
20044}
20045
20046bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
20047 const Value *UnderlyingObj = nullptr;
20048 for (const auto &MemOp : StoreNodes) {
20049 const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
20050 // Pseudo value like stack frame has its own frame index and size, should
20051 // not use the first store's frame index for other frames.
20052 if (MMO->getPseudoValue())
20053 return false;
20054
20055 if (!MMO->getValue())
20056 return false;
20057
20058 const Value *Obj = getUnderlyingObject(MMO->getValue());
20059
20060 if (UnderlyingObj && UnderlyingObj != Obj)
20061 return false;
20062
20063 if (!UnderlyingObj)
20064 UnderlyingObj = Obj;
20065 }
20066
20067 return true;
20068}
20069
20070bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
20071 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
20072 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
20073 // Make sure we have something to merge.
20074 if (NumStores < 2)
20075 return false;
20076
20077 assert((!UseTrunc || !UseVector) &&
20078 "This optimization cannot emit a vector truncating store");
20079
20080 // The latest Node in the DAG.
20081 SDLoc DL(StoreNodes[0].MemNode);
20082
20083 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
20084 unsigned SizeInBits = NumStores * ElementSizeBits;
20085 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20086
20087 std::optional<MachineMemOperand::Flags> Flags;
20088 AAMDNodes AAInfo;
20089 for (unsigned I = 0; I != NumStores; ++I) {
20090 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20091 if (!Flags) {
20092 Flags = St->getMemOperand()->getFlags();
20093 AAInfo = St->getAAInfo();
20094 continue;
20095 }
20096 // Skip merging if there's an inconsistent flag.
20097 if (Flags != St->getMemOperand()->getFlags())
20098 return false;
20099 // Concatenate AA metadata.
20100 AAInfo = AAInfo.concat(St->getAAInfo());
20101 }
20102
20103 EVT StoreTy;
20104 if (UseVector) {
20105 unsigned Elts = NumStores * NumMemElts;
20106 // Get the type for the merged vector store.
20107 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20108 } else
20109 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
20110
20111 SDValue StoredVal;
20112 if (UseVector) {
20113 if (IsConstantSrc) {
20114 SmallVector<SDValue, 8> BuildVector;
20115 for (unsigned I = 0; I != NumStores; ++I) {
20116 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
20117 SDValue Val = St->getValue();
20118 // If constant is of the wrong type, convert it now. This comes up
20119 // when one of our stores was truncating.
20120 if (MemVT != Val.getValueType()) {
20121 Val = peekThroughBitcasts(Val);
20122 // Deal with constants of wrong size.
20123 if (ElementSizeBits != Val.getValueSizeInBits()) {
20124 auto *C = dyn_cast<ConstantSDNode>(Val);
20125 if (!C)
20126 // Not clear how to truncate FP values.
20127 // TODO: Handle truncation of build_vector constants
20128 return false;
20129
20130 EVT IntMemVT =
20132 Val = DAG.getConstant(C->getAPIntValue()
20133 .zextOrTrunc(Val.getValueSizeInBits())
20134 .zextOrTrunc(ElementSizeBits),
20135 SDLoc(C), IntMemVT);
20136 }
20137 // Make sure correctly size type is the correct type.
20138 Val = DAG.getBitcast(MemVT, Val);
20139 }
20140 BuildVector.push_back(Val);
20141 }
20142 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20144 DL, StoreTy, BuildVector);
20145 } else {
20147 for (unsigned i = 0; i < NumStores; ++i) {
20148 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20150 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
20151 // type MemVT. If the underlying value is not the correct
20152 // type, but it is an extraction of an appropriate vector we
20153 // can recast Val to be of the correct type. This may require
20154 // converting between EXTRACT_VECTOR_ELT and
20155 // EXTRACT_SUBVECTOR.
20156 if ((MemVT != Val.getValueType()) &&
20159 EVT MemVTScalarTy = MemVT.getScalarType();
20160 // We may need to add a bitcast here to get types to line up.
20161 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
20162 Val = DAG.getBitcast(MemVT, Val);
20163 } else if (MemVT.isVector() &&
20165 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
20166 } else {
20167 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
20169 SDValue Vec = Val.getOperand(0);
20170 SDValue Idx = Val.getOperand(1);
20171 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
20172 }
20173 }
20174 Ops.push_back(Val);
20175 }
20176
20177 // Build the extracted vector elements back into a vector.
20178 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
20180 DL, StoreTy, Ops);
20181 }
20182 } else {
20183 // We should always use a vector store when merging extracted vector
20184 // elements, so this path implies a store of constants.
20185 assert(IsConstantSrc && "Merged vector elements should use vector store");
20186
20187 APInt StoreInt(SizeInBits, 0);
20188
20189 // Construct a single integer constant which is made of the smaller
20190 // constant inputs.
20191 bool IsLE = DAG.getDataLayout().isLittleEndian();
20192 for (unsigned i = 0; i < NumStores; ++i) {
20193 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
20194 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
20195
20196 SDValue Val = St->getValue();
20197 Val = peekThroughBitcasts(Val);
20198 StoreInt <<= ElementSizeBits;
20199 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
20200 StoreInt |= C->getAPIntValue()
20201 .zextOrTrunc(ElementSizeBits)
20202 .zextOrTrunc(SizeInBits);
20203 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
20204 StoreInt |= C->getValueAPF()
20205 .bitcastToAPInt()
20206 .zextOrTrunc(ElementSizeBits)
20207 .zextOrTrunc(SizeInBits);
20208 // If fp truncation is necessary give up for now.
20209 if (MemVT.getSizeInBits() != ElementSizeBits)
20210 return false;
20211 } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
20213 // Not yet handled
20214 return false;
20215 } else {
20216 llvm_unreachable("Invalid constant element type");
20217 }
20218 }
20219
20220 // Create the new Load and Store operations.
20221 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
20222 }
20223
20224 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20225 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
20226 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20227
20228 // make sure we use trunc store if it's necessary to be legal.
20229 // When generate the new widen store, if the first store's pointer info can
20230 // not be reused, discard the pointer info except the address space because
20231 // now the widen store can not be represented by the original pointer info
20232 // which is for the narrow memory object.
20233 SDValue NewStore;
20234 if (!UseTrunc) {
20235 NewStore = DAG.getStore(
20236 NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20237 CanReusePtrInfo
20238 ? FirstInChain->getPointerInfo()
20239 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20240 FirstInChain->getAlign(), *Flags, AAInfo);
20241 } else { // Must be realized as a trunc store
20242 EVT LegalizedStoredValTy =
20243 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20244 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20245 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20246 SDValue ExtendedStoreVal =
20247 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20248 LegalizedStoredValTy);
20249 NewStore = DAG.getTruncStore(
20250 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20251 CanReusePtrInfo
20252 ? FirstInChain->getPointerInfo()
20253 : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20254 StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20255 AAInfo);
20256 }
20257
20258 // Replace all merged stores with the new store.
20259 for (unsigned i = 0; i < NumStores; ++i)
20260 CombineTo(StoreNodes[i].MemNode, NewStore);
20261
20262 AddToWorklist(NewChain.getNode());
20263 return true;
20264}
20265
20266void DAGCombiner::getStoreMergeCandidates(
20267 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20268 SDNode *&RootNode) {
20269 // This holds the base pointer, index, and the offset in bytes from the base
20270 // pointer. We must have a base and an offset. Do not handle stores to undef
20271 // base pointers.
20273 if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20274 return;
20275
20277 StoreSource StoreSrc = getStoreSource(Val);
20278 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20279
20280 // Match on loadbaseptr if relevant.
20281 EVT MemVT = St->getMemoryVT();
20282 BaseIndexOffset LBasePtr;
20283 EVT LoadVT;
20284 if (StoreSrc == StoreSource::Load) {
20285 auto *Ld = cast<LoadSDNode>(Val);
20286 LBasePtr = BaseIndexOffset::match(Ld, DAG);
20287 LoadVT = Ld->getMemoryVT();
20288 // Load and store should be the same type.
20289 if (MemVT != LoadVT)
20290 return;
20291 // Loads must only have one use.
20292 if (!Ld->hasNUsesOfValue(1, 0))
20293 return;
20294 // The memory operands must not be volatile/indexed/atomic.
20295 // TODO: May be able to relax for unordered atomics (see D66309)
20296 if (!Ld->isSimple() || Ld->isIndexed())
20297 return;
20298 }
20299 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20300 int64_t &Offset) -> bool {
20301 // The memory operands must not be volatile/indexed/atomic.
20302 // TODO: May be able to relax for unordered atomics (see D66309)
20303 if (!Other->isSimple() || Other->isIndexed())
20304 return false;
20305 // Don't mix temporal stores with non-temporal stores.
20306 if (St->isNonTemporal() != Other->isNonTemporal())
20307 return false;
20309 return false;
20310 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20311 // Allow merging constants of different types as integers.
20312 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20313 : Other->getMemoryVT() != MemVT;
20314 switch (StoreSrc) {
20315 case StoreSource::Load: {
20316 if (NoTypeMatch)
20317 return false;
20318 // The Load's Base Ptr must also match.
20319 auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20320 if (!OtherLd)
20321 return false;
20322 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20323 if (LoadVT != OtherLd->getMemoryVT())
20324 return false;
20325 // Loads must only have one use.
20326 if (!OtherLd->hasNUsesOfValue(1, 0))
20327 return false;
20328 // The memory operands must not be volatile/indexed/atomic.
20329 // TODO: May be able to relax for unordered atomics (see D66309)
20330 if (!OtherLd->isSimple() || OtherLd->isIndexed())
20331 return false;
20332 // Don't mix temporal loads with non-temporal loads.
20333 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20334 return false;
20335 if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20336 *OtherLd))
20337 return false;
20338 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20339 return false;
20340 break;
20341 }
20342 case StoreSource::Constant:
20343 if (NoTypeMatch)
20344 return false;
20345 if (getStoreSource(OtherBC) != StoreSource::Constant)
20346 return false;
20347 break;
20348 case StoreSource::Extract:
20349 // Do not merge truncated stores here.
20350 if (Other->isTruncatingStore())
20351 return false;
20352 if (!MemVT.bitsEq(OtherBC.getValueType()))
20353 return false;
20354 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20355 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20356 return false;
20357 break;
20358 default:
20359 llvm_unreachable("Unhandled store source for merging");
20360 }
20362 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20363 };
20364
20365 // Check if the pair of StoreNode and the RootNode already bail out many
20366 // times which is over the limit in dependence check.
20367 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20368 SDNode *RootNode) -> bool {
20369 auto RootCount = StoreRootCountMap.find(StoreNode);
20370 return RootCount != StoreRootCountMap.end() &&
20371 RootCount->second.first == RootNode &&
20372 RootCount->second.second > StoreMergeDependenceLimit;
20373 };
20374
20375 auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
20376 // This must be a chain use.
20377 if (UseIter.getOperandNo() != 0)
20378 return;
20379 if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
20381 int64_t PtrDiff;
20382 if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20383 !OverLimitInDependenceCheck(OtherStore, RootNode))
20384 StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20385 }
20386 };
20387
20388 // We looking for a root node which is an ancestor to all mergable
20389 // stores. We search up through a load, to our root and then down
20390 // through all children. For instance we will find Store{1,2,3} if
20391 // St is Store1, Store2. or Store3 where the root is not a load
20392 // which always true for nonvolatile ops. TODO: Expand
20393 // the search to find all valid candidates through multiple layers of loads.
20394 //
20395 // Root
20396 // |-------|-------|
20397 // Load Load Store3
20398 // | |
20399 // Store1 Store2
20400 //
20401 // FIXME: We should be able to climb and
20402 // descend TokenFactors to find candidates as well.
20403
20404 RootNode = St->getChain().getNode();
20405
20406 unsigned NumNodesExplored = 0;
20407 const unsigned MaxSearchNodes = 1024;
20408 if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20409 RootNode = Ldn->getChain().getNode();
20410 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20411 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20412 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
20413 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
20414 TryToAddCandidate(I2);
20415 }
20416 // Check stores that depend on the root (e.g. Store 3 in the chart above).
20417 if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
20418 TryToAddCandidate(I);
20419 }
20420 }
20421 } else {
20422 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20423 I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20424 TryToAddCandidate(I);
20425 }
20426}
20427
20428// We need to check that merging these stores does not cause a loop in the
20429// DAG. Any store candidate may depend on another candidate indirectly through
20430// its operands. Check in parallel by searching up from operands of candidates.
20431bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20432 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
20433 SDNode *RootNode) {
20434 // FIXME: We should be able to truncate a full search of
20435 // predecessors by doing a BFS and keeping tabs the originating
20436 // stores from which worklist nodes come from in a similar way to
20437 // TokenFactor simplfication.
20438
20441
20442 // RootNode is a predecessor to all candidates so we need not search
20443 // past it. Add RootNode (peeking through TokenFactors). Do not count
20444 // these towards size check.
20445
20446 Worklist.push_back(RootNode);
20447 while (!Worklist.empty()) {
20448 auto N = Worklist.pop_back_val();
20449 if (!Visited.insert(N).second)
20450 continue; // Already present in Visited.
20451 if (N->getOpcode() == ISD::TokenFactor) {
20452 for (SDValue Op : N->ops())
20453 Worklist.push_back(Op.getNode());
20454 }
20455 }
20456
20457 // Don't count pruning nodes towards max.
20458 unsigned int Max = 1024 + Visited.size();
20459 // Search Ops of store candidates.
20460 for (unsigned i = 0; i < NumStores; ++i) {
20461 SDNode *N = StoreNodes[i].MemNode;
20462 // Of the 4 Store Operands:
20463 // * Chain (Op 0) -> We have already considered these
20464 // in candidate selection, but only by following the
20465 // chain dependencies. We could still have a chain
20466 // dependency to a load, that has a non-chain dep to
20467 // another load, that depends on a store, etc. So it is
20468 // possible to have dependencies that consist of a mix
20469 // of chain and non-chain deps, and we need to include
20470 // chain operands in the analysis here..
20471 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
20472 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
20473 // but aren't necessarily fromt the same base node, so
20474 // cycles possible (e.g. via indexed store).
20475 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
20476 // non-indexed stores). Not constant on all targets (e.g. ARM)
20477 // and so can participate in a cycle.
20478 for (unsigned j = 0; j < N->getNumOperands(); ++j)
20479 Worklist.push_back(N->getOperand(j).getNode());
20480 }
20481 // Search through DAG. We can stop early if we find a store node.
20482 for (unsigned i = 0; i < NumStores; ++i)
20483 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
20484 Max)) {
20485 // If the searching bail out, record the StoreNode and RootNode in the
20486 // StoreRootCountMap. If we have seen the pair many times over a limit,
20487 // we won't add the StoreNode into StoreNodes set again.
20488 if (Visited.size() >= Max) {
20489 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
20490 if (RootCount.first == RootNode)
20491 RootCount.second++;
20492 else
20493 RootCount = {RootNode, 1};
20494 }
20495 return false;
20496 }
20497 return true;
20498}
20499
20500unsigned
20501DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
20502 int64_t ElementSizeBytes) const {
20503 while (true) {
20504 // Find a store past the width of the first store.
20505 size_t StartIdx = 0;
20506 while ((StartIdx + 1 < StoreNodes.size()) &&
20507 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
20508 StoreNodes[StartIdx + 1].OffsetFromBase)
20509 ++StartIdx;
20510
20511 // Bail if we don't have enough candidates to merge.
20512 if (StartIdx + 1 >= StoreNodes.size())
20513 return 0;
20514
20515 // Trim stores that overlapped with the first store.
20516 if (StartIdx)
20517 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
20518
20519 // Scan the memory operations on the chain and find the first
20520 // non-consecutive store memory address.
20521 unsigned NumConsecutiveStores = 1;
20522 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
20523 // Check that the addresses are consecutive starting from the second
20524 // element in the list of stores.
20525 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
20526 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
20527 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20528 break;
20529 NumConsecutiveStores = i + 1;
20530 }
20531 if (NumConsecutiveStores > 1)
20532 return NumConsecutiveStores;
20533
20534 // There are no consecutive stores at the start of the list.
20535 // Remove the first store and try again.
20536 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
20537 }
20538}
20539
20540bool DAGCombiner::tryStoreMergeOfConstants(
20541 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20542 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
20543 LLVMContext &Context = *DAG.getContext();
20544 const DataLayout &DL = DAG.getDataLayout();
20545 int64_t ElementSizeBytes = MemVT.getStoreSize();
20546 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20547 bool MadeChange = false;
20548
20549 // Store the constants into memory as one consecutive store.
20550 while (NumConsecutiveStores >= 2) {
20551 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20552 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20553 Align FirstStoreAlign = FirstInChain->getAlign();
20554 unsigned LastLegalType = 1;
20555 unsigned LastLegalVectorType = 1;
20556 bool LastIntegerTrunc = false;
20557 bool NonZero = false;
20558 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
20559 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20560 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
20561 SDValue StoredVal = ST->getValue();
20562 bool IsElementZero = false;
20563 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
20564 IsElementZero = C->isZero();
20565 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
20566 IsElementZero = C->getConstantFPValue()->isNullValue();
20567 else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
20568 IsElementZero = true;
20569 if (IsElementZero) {
20570 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
20571 FirstZeroAfterNonZero = i;
20572 }
20573 NonZero |= !IsElementZero;
20574
20575 // Find a legal type for the constant store.
20576 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20577 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20578 unsigned IsFast = 0;
20579
20580 // Break early when size is too large to be legal.
20581 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20582 break;
20583
20584 if (TLI.isTypeLegal(StoreTy) &&
20585 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20586 DAG.getMachineFunction()) &&
20587 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20588 *FirstInChain->getMemOperand(), &IsFast) &&
20589 IsFast) {
20590 LastIntegerTrunc = false;
20591 LastLegalType = i + 1;
20592 // Or check whether a truncstore is legal.
20593 } else if (TLI.getTypeAction(Context, StoreTy) ==
20595 EVT LegalizedStoredValTy =
20596 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
20597 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20598 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20599 DAG.getMachineFunction()) &&
20600 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20601 *FirstInChain->getMemOperand(), &IsFast) &&
20602 IsFast) {
20603 LastIntegerTrunc = true;
20604 LastLegalType = i + 1;
20605 }
20606 }
20607
20608 // We only use vectors if the target allows it and the function is not
20609 // marked with the noimplicitfloat attribute.
20610 if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
20611 AllowVectors) {
20612 // Find a legal type for the vector store.
20613 unsigned Elts = (i + 1) * NumMemElts;
20614 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20615 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
20616 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20617 TLI.allowsMemoryAccess(Context, DL, Ty,
20618 *FirstInChain->getMemOperand(), &IsFast) &&
20619 IsFast)
20620 LastLegalVectorType = i + 1;
20621 }
20622 }
20623
20624 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
20625 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
20626 bool UseTrunc = LastIntegerTrunc && !UseVector;
20627
20628 // Check if we found a legal integer type that creates a meaningful
20629 // merge.
20630 if (NumElem < 2) {
20631 // We know that candidate stores are in order and of correct
20632 // shape. While there is no mergeable sequence from the
20633 // beginning one may start later in the sequence. The only
20634 // reason a merge of size N could have failed where another of
20635 // the same size would not have, is if the alignment has
20636 // improved or we've dropped a non-zero value. Drop as many
20637 // candidates as we can here.
20638 unsigned NumSkip = 1;
20639 while ((NumSkip < NumConsecutiveStores) &&
20640 (NumSkip < FirstZeroAfterNonZero) &&
20641 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20642 NumSkip++;
20643
20644 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20645 NumConsecutiveStores -= NumSkip;
20646 continue;
20647 }
20648
20649 // Check that we can merge these candidates without causing a cycle.
20650 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20651 RootNode)) {
20652 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20653 NumConsecutiveStores -= NumElem;
20654 continue;
20655 }
20656
20657 MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
20658 /*IsConstantSrc*/ true,
20659 UseVector, UseTrunc);
20660
20661 // Remove merged stores for next iteration.
20662 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20663 NumConsecutiveStores -= NumElem;
20664 }
20665 return MadeChange;
20666}
20667
20668bool DAGCombiner::tryStoreMergeOfExtracts(
20669 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20670 EVT MemVT, SDNode *RootNode) {
20671 LLVMContext &Context = *DAG.getContext();
20672 const DataLayout &DL = DAG.getDataLayout();
20673 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20674 bool MadeChange = false;
20675
20676 // Loop on Consecutive Stores on success.
20677 while (NumConsecutiveStores >= 2) {
20678 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20679 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20680 Align FirstStoreAlign = FirstInChain->getAlign();
20681 unsigned NumStoresToMerge = 1;
20682 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20683 // Find a legal type for the vector store.
20684 unsigned Elts = (i + 1) * NumMemElts;
20685 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20686 unsigned IsFast = 0;
20687
20688 // Break early when size is too large to be legal.
20689 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
20690 break;
20691
20692 if (TLI.isTypeLegal(Ty) &&
20693 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20694 TLI.allowsMemoryAccess(Context, DL, Ty,
20695 *FirstInChain->getMemOperand(), &IsFast) &&
20696 IsFast)
20697 NumStoresToMerge = i + 1;
20698 }
20699
20700 // Check if we found a legal integer type creating a meaningful
20701 // merge.
20702 if (NumStoresToMerge < 2) {
20703 // We know that candidate stores are in order and of correct
20704 // shape. While there is no mergeable sequence from the
20705 // beginning one may start later in the sequence. The only
20706 // reason a merge of size N could have failed where another of
20707 // the same size would not have, is if the alignment has
20708 // improved. Drop as many candidates as we can here.
20709 unsigned NumSkip = 1;
20710 while ((NumSkip < NumConsecutiveStores) &&
20711 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20712 NumSkip++;
20713
20714 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20715 NumConsecutiveStores -= NumSkip;
20716 continue;
20717 }
20718
20719 // Check that we can merge these candidates without causing a cycle.
20720 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
20721 RootNode)) {
20722 StoreNodes.erase(StoreNodes.begin(),
20723 StoreNodes.begin() + NumStoresToMerge);
20724 NumConsecutiveStores -= NumStoresToMerge;
20725 continue;
20726 }
20727
20728 MadeChange |= mergeStoresOfConstantsOrVecElts(
20729 StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
20730 /*UseVector*/ true, /*UseTrunc*/ false);
20731
20732 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
20733 NumConsecutiveStores -= NumStoresToMerge;
20734 }
20735 return MadeChange;
20736}
20737
20738bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
20739 unsigned NumConsecutiveStores, EVT MemVT,
20740 SDNode *RootNode, bool AllowVectors,
20741 bool IsNonTemporalStore,
20742 bool IsNonTemporalLoad) {
20743 LLVMContext &Context = *DAG.getContext();
20744 const DataLayout &DL = DAG.getDataLayout();
20745 int64_t ElementSizeBytes = MemVT.getStoreSize();
20746 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20747 bool MadeChange = false;
20748
20749 // Look for load nodes which are used by the stored values.
20750 SmallVector<MemOpLink, 8> LoadNodes;
20751
20752 // Find acceptable loads. Loads need to have the same chain (token factor),
20753 // must not be zext, volatile, indexed, and they must be consecutive.
20754 BaseIndexOffset LdBasePtr;
20755
20756 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20757 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20759 LoadSDNode *Ld = cast<LoadSDNode>(Val);
20760
20761 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
20762 // If this is not the first ptr that we check.
20763 int64_t LdOffset = 0;
20764 if (LdBasePtr.getBase().getNode()) {
20765 // The base ptr must be the same.
20766 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
20767 break;
20768 } else {
20769 // Check that all other base pointers are the same as this one.
20770 LdBasePtr = LdPtr;
20771 }
20772
20773 // We found a potential memory operand to merge.
20774 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
20775 }
20776
20777 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
20778 Align RequiredAlignment;
20779 bool NeedRotate = false;
20780 if (LoadNodes.size() == 2) {
20781 // If we have load/store pair instructions and we only have two values,
20782 // don't bother merging.
20783 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
20784 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
20785 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
20786 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
20787 break;
20788 }
20789 // If the loads are reversed, see if we can rotate the halves into place.
20790 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
20791 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
20792 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
20793 if (Offset0 - Offset1 == ElementSizeBytes &&
20794 (hasOperation(ISD::ROTL, PairVT) ||
20795 hasOperation(ISD::ROTR, PairVT))) {
20796 std::swap(LoadNodes[0], LoadNodes[1]);
20797 NeedRotate = true;
20798 }
20799 }
20800 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20801 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20802 Align FirstStoreAlign = FirstInChain->getAlign();
20803 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
20804
20805 // Scan the memory operations on the chain and find the first
20806 // non-consecutive load memory address. These variables hold the index in
20807 // the store node array.
20808
20809 unsigned LastConsecutiveLoad = 1;
20810
20811 // This variable refers to the size and not index in the array.
20812 unsigned LastLegalVectorType = 1;
20813 unsigned LastLegalIntegerType = 1;
20814 bool isDereferenceable = true;
20815 bool DoIntegerTruncate = false;
20816 int64_t StartAddress = LoadNodes[0].OffsetFromBase;
20817 SDValue LoadChain = FirstLoad->getChain();
20818 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
20819 // All loads must share the same chain.
20820 if (LoadNodes[i].MemNode->getChain() != LoadChain)
20821 break;
20822
20823 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
20824 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20825 break;
20826 LastConsecutiveLoad = i;
20827
20828 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
20829 isDereferenceable = false;
20830
20831 // Find a legal type for the vector store.
20832 unsigned Elts = (i + 1) * NumMemElts;
20833 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20834
20835 // Break early when size is too large to be legal.
20836 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20837 break;
20838
20839 unsigned IsFastSt = 0;
20840 unsigned IsFastLd = 0;
20841 // Don't try vector types if we need a rotate. We may still fail the
20842 // legality checks for the integer type, but we can't handle the rotate
20843 // case with vectors.
20844 // FIXME: We could use a shuffle in place of the rotate.
20845 if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
20846 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20847 DAG.getMachineFunction()) &&
20848 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20849 *FirstInChain->getMemOperand(), &IsFastSt) &&
20850 IsFastSt &&
20851 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20852 *FirstLoad->getMemOperand(), &IsFastLd) &&
20853 IsFastLd) {
20854 LastLegalVectorType = i + 1;
20855 }
20856
20857 // Find a legal type for the integer store.
20858 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20859 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20860 if (TLI.isTypeLegal(StoreTy) &&
20861 TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20862 DAG.getMachineFunction()) &&
20863 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20864 *FirstInChain->getMemOperand(), &IsFastSt) &&
20865 IsFastSt &&
20866 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20867 *FirstLoad->getMemOperand(), &IsFastLd) &&
20868 IsFastLd) {
20869 LastLegalIntegerType = i + 1;
20870 DoIntegerTruncate = false;
20871 // Or check whether a truncstore and extload is legal.
20872 } else if (TLI.getTypeAction(Context, StoreTy) ==
20874 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
20875 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20876 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20877 DAG.getMachineFunction()) &&
20878 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20879 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20880 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
20881 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20882 *FirstInChain->getMemOperand(), &IsFastSt) &&
20883 IsFastSt &&
20884 TLI.allowsMemoryAccess(Context, DL, StoreTy,
20885 *FirstLoad->getMemOperand(), &IsFastLd) &&
20886 IsFastLd) {
20887 LastLegalIntegerType = i + 1;
20888 DoIntegerTruncate = true;
20889 }
20890 }
20891 }
20892
20893 // Only use vector types if the vector type is larger than the integer
20894 // type. If they are the same, use integers.
20895 bool UseVectorTy =
20896 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
20897 unsigned LastLegalType =
20898 std::max(LastLegalVectorType, LastLegalIntegerType);
20899
20900 // We add +1 here because the LastXXX variables refer to location while
20901 // the NumElem refers to array/index size.
20902 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
20903 NumElem = std::min(LastLegalType, NumElem);
20904 Align FirstLoadAlign = FirstLoad->getAlign();
20905
20906 if (NumElem < 2) {
20907 // We know that candidate stores are in order and of correct
20908 // shape. While there is no mergeable sequence from the
20909 // beginning one may start later in the sequence. The only
20910 // reason a merge of size N could have failed where another of
20911 // the same size would not have is if the alignment or either
20912 // the load or store has improved. Drop as many candidates as we
20913 // can here.
20914 unsigned NumSkip = 1;
20915 while ((NumSkip < LoadNodes.size()) &&
20916 (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
20917 (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20918 NumSkip++;
20919 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20920 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
20921 NumConsecutiveStores -= NumSkip;
20922 continue;
20923 }
20924
20925 // Check that we can merge these candidates without causing a cycle.
20926 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20927 RootNode)) {
20928 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20929 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20930 NumConsecutiveStores -= NumElem;
20931 continue;
20932 }
20933
20934 // Find if it is better to use vectors or integers to load and store
20935 // to memory.
20936 EVT JointMemOpVT;
20937 if (UseVectorTy) {
20938 // Find a legal type for the vector store.
20939 unsigned Elts = NumElem * NumMemElts;
20940 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20941 } else {
20942 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
20943 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
20944 }
20945
20946 SDLoc LoadDL(LoadNodes[0].MemNode);
20947 SDLoc StoreDL(StoreNodes[0].MemNode);
20948
20949 // The merged loads are required to have the same incoming chain, so
20950 // using the first's chain is acceptable.
20951
20952 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
20953 bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20954 AddToWorklist(NewStoreChain.getNode());
20955
20956 MachineMemOperand::Flags LdMMOFlags =
20957 isDereferenceable ? MachineMemOperand::MODereferenceable
20959 if (IsNonTemporalLoad)
20961
20962 LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
20963
20964 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
20967
20968 StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
20969
20970 SDValue NewLoad, NewStore;
20971 if (UseVectorTy || !DoIntegerTruncate) {
20972 NewLoad = DAG.getLoad(
20973 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
20974 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
20975 SDValue StoreOp = NewLoad;
20976 if (NeedRotate) {
20977 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
20978 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
20979 "Unexpected type for rotate-able load pair");
20980 SDValue RotAmt =
20981 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
20982 // Target can convert to the identical ROTR if it does not have ROTL.
20983 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
20984 }
20985 NewStore = DAG.getStore(
20986 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
20987 CanReusePtrInfo ? FirstInChain->getPointerInfo()
20988 : MachinePointerInfo(FirstStoreAS),
20989 FirstStoreAlign, StMMOFlags);
20990 } else { // This must be the truncstore/extload case
20991 EVT ExtendedTy =
20992 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
20993 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
20994 FirstLoad->getChain(), FirstLoad->getBasePtr(),
20995 FirstLoad->getPointerInfo(), JointMemOpVT,
20996 FirstLoadAlign, LdMMOFlags);
20997 NewStore = DAG.getTruncStore(
20998 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
20999 CanReusePtrInfo ? FirstInChain->getPointerInfo()
21000 : MachinePointerInfo(FirstStoreAS),
21001 JointMemOpVT, FirstInChain->getAlign(),
21002 FirstInChain->getMemOperand()->getFlags());
21003 }
21004
21005 // Transfer chain users from old loads to the new load.
21006 for (unsigned i = 0; i < NumElem; ++i) {
21007 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
21009 SDValue(NewLoad.getNode(), 1));
21010 }
21011
21012 // Replace all stores with the new store. Recursively remove corresponding
21013 // values if they are no longer used.
21014 for (unsigned i = 0; i < NumElem; ++i) {
21015 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
21016 CombineTo(StoreNodes[i].MemNode, NewStore);
21017 if (Val->use_empty())
21018 recursivelyDeleteUnusedNodes(Val.getNode());
21019 }
21020
21021 MadeChange = true;
21022 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
21023 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
21024 NumConsecutiveStores -= NumElem;
21025 }
21026 return MadeChange;
21027}
21028
21029bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21030 if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
21031 return false;
21032
21033 // TODO: Extend this function to merge stores of scalable vectors.
21034 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
21035 // store since we know <vscale x 16 x i8> is exactly twice as large as
21036 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
21037 EVT MemVT = St->getMemoryVT();
21038 if (MemVT.isScalableVT())
21039 return false;
21040 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
21041 return false;
21042
21043 // This function cannot currently deal with non-byte-sized memory sizes.
21044 int64_t ElementSizeBytes = MemVT.getStoreSize();
21045 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
21046 return false;
21047
21048 // Do not bother looking at stored values that are not constants, loads, or
21049 // extracted vector elements.
21050 SDValue StoredVal = peekThroughBitcasts(St->getValue());
21051 const StoreSource StoreSrc = getStoreSource(StoredVal);
21052 if (StoreSrc == StoreSource::Unknown)
21053 return false;
21054
21055 SmallVector<MemOpLink, 8> StoreNodes;
21056 SDNode *RootNode;
21057 // Find potential store merge candidates by searching through chain sub-DAG
21058 getStoreMergeCandidates(St, StoreNodes, RootNode);
21059
21060 // Check if there is anything to merge.
21061 if (StoreNodes.size() < 2)
21062 return false;
21063
21064 // Sort the memory operands according to their distance from the
21065 // base pointer.
21066 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
21067 return LHS.OffsetFromBase < RHS.OffsetFromBase;
21068 });
21069
21070 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
21071 Attribute::NoImplicitFloat);
21072 bool IsNonTemporalStore = St->isNonTemporal();
21073 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
21074 cast<LoadSDNode>(StoredVal)->isNonTemporal();
21075
21076 // Store Merge attempts to merge the lowest stores. This generally
21077 // works out as if successful, as the remaining stores are checked
21078 // after the first collection of stores is merged. However, in the
21079 // case that a non-mergeable store is found first, e.g., {p[-2],
21080 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
21081 // mergeable cases. To prevent this, we prune such stores from the
21082 // front of StoreNodes here.
21083 bool MadeChange = false;
21084 while (StoreNodes.size() > 1) {
21085 unsigned NumConsecutiveStores =
21086 getConsecutiveStores(StoreNodes, ElementSizeBytes);
21087 // There are no more stores in the list to examine.
21088 if (NumConsecutiveStores == 0)
21089 return MadeChange;
21090
21091 // We have at least 2 consecutive stores. Try to merge them.
21092 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
21093 switch (StoreSrc) {
21094 case StoreSource::Constant:
21095 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
21096 MemVT, RootNode, AllowVectors);
21097 break;
21098
21099 case StoreSource::Extract:
21100 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
21101 MemVT, RootNode);
21102 break;
21103
21104 case StoreSource::Load:
21105 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
21106 MemVT, RootNode, AllowVectors,
21107 IsNonTemporalStore, IsNonTemporalLoad);
21108 break;
21109
21110 default:
21111 llvm_unreachable("Unhandled store source type");
21112 }
21113 }
21114 return MadeChange;
21115}
21116
21117SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
21118 SDLoc SL(ST);
21119 SDValue ReplStore;
21120
21121 // Replace the chain to avoid dependency.
21122 if (ST->isTruncatingStore()) {
21123 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
21124 ST->getBasePtr(), ST->getMemoryVT(),
21125 ST->getMemOperand());
21126 } else {
21127 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
21128 ST->getMemOperand());
21129 }
21130
21131 // Create token to keep both nodes around.
21132 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
21133 MVT::Other, ST->getChain(), ReplStore);
21134
21135 // Make sure the new and old chains are cleaned up.
21136 AddToWorklist(Token.getNode());
21137
21138 // Don't add users to work list.
21139 return CombineTo(ST, Token, false);
21140}
21141
21142SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
21143 SDValue Value = ST->getValue();
21144 if (Value.getOpcode() == ISD::TargetConstantFP)
21145 return SDValue();
21146
21147 if (!ISD::isNormalStore(ST))
21148 return SDValue();
21149
21150 SDLoc DL(ST);
21151
21152 SDValue Chain = ST->getChain();
21153 SDValue Ptr = ST->getBasePtr();
21154
21155 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
21156
21157 // NOTE: If the original store is volatile, this transform must not increase
21158 // the number of stores. For example, on x86-32 an f64 can be stored in one
21159 // processor operation but an i64 (which is not legal) requires two. So the
21160 // transform should not be done in this case.
21161
21162 SDValue Tmp;
21163 switch (CFP->getSimpleValueType(0).SimpleTy) {
21164 default:
21165 llvm_unreachable("Unknown FP type");
21166 case MVT::f16: // We don't do this for these yet.
21167 case MVT::bf16:
21168 case MVT::f80:
21169 case MVT::f128:
21170 case MVT::ppcf128:
21171 return SDValue();
21172 case MVT::f32:
21173 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
21174 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
21175 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
21176 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
21177 MVT::i32);
21178 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
21179 }
21180
21181 return SDValue();
21182 case MVT::f64:
21183 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
21184 ST->isSimple()) ||
21185 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
21186 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
21187 getZExtValue(), SDLoc(CFP), MVT::i64);
21188 return DAG.getStore(Chain, DL, Tmp,
21189 Ptr, ST->getMemOperand());
21190 }
21191
21192 if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
21193 !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
21194 // Many FP stores are not made apparent until after legalize, e.g. for
21195 // argument passing. Since this is so common, custom legalize the
21196 // 64-bit integer store into two 32-bit stores.
21198 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
21199 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
21200 if (DAG.getDataLayout().isBigEndian())
21201 std::swap(Lo, Hi);
21202
21203 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21204 AAMDNodes AAInfo = ST->getAAInfo();
21205
21206 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21207 ST->getOriginalAlign(), MMOFlags, AAInfo);
21209 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
21210 ST->getPointerInfo().getWithOffset(4),
21211 ST->getOriginalAlign(), MMOFlags, AAInfo);
21212 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
21213 St0, St1);
21214 }
21215
21216 return SDValue();
21217 }
21218}
21219
21220// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
21221//
21222// If a store of a load with an element inserted into it has no other
21223// uses in between the chain, then we can consider the vector store
21224// dead and replace it with just the single scalar element store.
21225SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
21226 SDLoc DL(ST);
21227 SDValue Value = ST->getValue();
21228 SDValue Ptr = ST->getBasePtr();
21229 SDValue Chain = ST->getChain();
21230 if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21231 return SDValue();
21232
21233 SDValue Elt = Value.getOperand(1);
21234 SDValue Idx = Value.getOperand(2);
21235
21236 // If the element isn't byte sized or is implicitly truncated then we can't
21237 // compute an offset.
21238 EVT EltVT = Elt.getValueType();
21239 if (!EltVT.isByteSized() ||
21240 EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21241 return SDValue();
21242
21243 auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21244 if (!Ld || Ld->getBasePtr() != Ptr ||
21245 ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21246 !ISD::isNormalStore(ST) ||
21247 Ld->getAddressSpace() != ST->getAddressSpace() ||
21249 return SDValue();
21250
21251 unsigned IsFast;
21252 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21253 Elt.getValueType(), ST->getAddressSpace(),
21254 ST->getAlign(), ST->getMemOperand()->getFlags(),
21255 &IsFast) ||
21256 !IsFast)
21257 return SDValue();
21258
21259 MachinePointerInfo PointerInfo(ST->getAddressSpace());
21260
21261 // If the offset is a known constant then try to recover the pointer
21262 // info
21263 SDValue NewPtr;
21264 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21265 unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21266 NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21267 PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21268 } else {
21269 NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21270 }
21271
21272 return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21273 ST->getMemOperand()->getFlags());
21274}
21275
21276SDValue DAGCombiner::visitATOMIC_STORE(SDNode *N) {
21277 AtomicSDNode *ST = cast<AtomicSDNode>(N);
21278 SDValue Val = ST->getVal();
21279 EVT VT = Val.getValueType();
21280 EVT MemVT = ST->getMemoryVT();
21281
21282 if (MemVT.bitsLT(VT)) { // Is truncating store
21283 APInt TruncDemandedBits = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
21284 MemVT.getScalarSizeInBits());
21285 // See if we can simplify the operation with SimplifyDemandedBits, which
21286 // only works if the value has a single use.
21287 if (SimplifyDemandedBits(Val, TruncDemandedBits))
21288 return SDValue(N, 0);
21289 }
21290
21291 return SDValue();
21292}
21293
21294SDValue DAGCombiner::visitSTORE(SDNode *N) {
21295 StoreSDNode *ST = cast<StoreSDNode>(N);
21296 SDValue Chain = ST->getChain();
21297 SDValue Value = ST->getValue();
21298 SDValue Ptr = ST->getBasePtr();
21299
21300 // If this is a store of a bit convert, store the input value if the
21301 // resultant store does not need a higher alignment than the original.
21302 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21303 ST->isUnindexed()) {
21304 EVT SVT = Value.getOperand(0).getValueType();
21305 // If the store is volatile, we only want to change the store type if the
21306 // resulting store is legal. Otherwise we might increase the number of
21307 // memory accesses. We don't care if the original type was legal or not
21308 // as we assume software couldn't rely on the number of accesses of an
21309 // illegal type.
21310 // TODO: May be able to relax for unordered atomics (see D66309)
21311 if (((!LegalOperations && ST->isSimple()) ||
21312 TLI.isOperationLegal(ISD::STORE, SVT)) &&
21313 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21314 DAG, *ST->getMemOperand())) {
21315 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21316 ST->getMemOperand());
21317 }
21318 }
21319
21320 // Turn 'store undef, Ptr' -> nothing.
21321 if (Value.isUndef() && ST->isUnindexed())
21322 return Chain;
21323
21324 // Try to infer better alignment information than the store already has.
21325 if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21326 !ST->isAtomic()) {
21327 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21328 if (*Alignment > ST->getAlign() &&
21329 isAligned(*Alignment, ST->getSrcValueOffset())) {
21330 SDValue NewStore =
21331 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21332 ST->getMemoryVT(), *Alignment,
21333 ST->getMemOperand()->getFlags(), ST->getAAInfo());
21334 // NewStore will always be N as we are only refining the alignment
21335 assert(NewStore.getNode() == N);
21336 (void)NewStore;
21337 }
21338 }
21339 }
21340
21341 // Try transforming a pair floating point load / store ops to integer
21342 // load / store ops.
21343 if (SDValue NewST = TransformFPLoadStorePair(N))
21344 return NewST;
21345
21346 // Try transforming several stores into STORE (BSWAP).
21347 if (SDValue Store = mergeTruncStores(ST))
21348 return Store;
21349
21350 if (ST->isUnindexed()) {
21351 // Walk up chain skipping non-aliasing memory nodes, on this store and any
21352 // adjacent stores.
21353 if (findBetterNeighborChains(ST)) {
21354 // replaceStoreChain uses CombineTo, which handled all of the worklist
21355 // manipulation. Return the original node to not do anything else.
21356 return SDValue(ST, 0);
21357 }
21358 Chain = ST->getChain();
21359 }
21360
21361 // FIXME: is there such a thing as a truncating indexed store?
21362 if (ST->isTruncatingStore() && ST->isUnindexed() &&
21363 Value.getValueType().isInteger() &&
21364 (!isa<ConstantSDNode>(Value) ||
21365 !cast<ConstantSDNode>(Value)->isOpaque())) {
21366 // Convert a truncating store of a extension into a standard store.
21367 if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21368 Value.getOpcode() == ISD::SIGN_EXTEND ||
21369 Value.getOpcode() == ISD::ANY_EXTEND) &&
21370 Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21371 TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21372 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21373 ST->getMemOperand());
21374
21375 APInt TruncDemandedBits =
21376 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21377 ST->getMemoryVT().getScalarSizeInBits());
21378
21379 // See if we can simplify the operation with SimplifyDemandedBits, which
21380 // only works if the value has a single use.
21381 AddToWorklist(Value.getNode());
21382 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21383 // Re-visit the store if anything changed and the store hasn't been merged
21384 // with another node (N is deleted) SimplifyDemandedBits will add Value's
21385 // node back to the worklist if necessary, but we also need to re-visit
21386 // the Store node itself.
21387 if (N->getOpcode() != ISD::DELETED_NODE)
21388 AddToWorklist(N);
21389 return SDValue(N, 0);
21390 }
21391
21392 // Otherwise, see if we can simplify the input to this truncstore with
21393 // knowledge that only the low bits are being used. For example:
21394 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
21395 if (SDValue Shorter =
21396 TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21397 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21398 ST->getMemOperand());
21399
21400 // If we're storing a truncated constant, see if we can simplify it.
21401 // TODO: Move this to targetShrinkDemandedConstant?
21402 if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21403 if (!Cst->isOpaque()) {
21404 const APInt &CValue = Cst->getAPIntValue();
21405 APInt NewVal = CValue & TruncDemandedBits;
21406 if (NewVal != CValue) {
21407 SDValue Shorter =
21408 DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21409 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21410 ST->getMemoryVT(), ST->getMemOperand());
21411 }
21412 }
21413 }
21414
21415 // If this is a load followed by a store to the same location, then the store
21416 // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21417 // TODO: Add big-endian truncate support with test coverage.
21418 // TODO: Can relax for unordered atomics (see D66309)
21419 SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21421 : Value;
21422 if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21423 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21424 ST->isUnindexed() && ST->isSimple() &&
21425 Ld->getAddressSpace() == ST->getAddressSpace() &&
21426 // There can't be any side effects between the load and store, such as
21427 // a call or store.
21429 // The store is dead, remove it.
21430 return Chain;
21431 }
21432 }
21433
21434 // Try scalarizing vector stores of loads where we only change one element
21435 if (SDValue NewST = replaceStoreOfInsertLoad(ST))
21436 return NewST;
21437
21438 // TODO: Can relax for unordered atomics (see D66309)
21439 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
21440 if (ST->isUnindexed() && ST->isSimple() &&
21441 ST1->isUnindexed() && ST1->isSimple()) {
21442 if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
21443 ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
21444 ST->getAddressSpace() == ST1->getAddressSpace()) {
21445 // If this is a store followed by a store with the same value to the
21446 // same location, then the store is dead/noop.
21447 return Chain;
21448 }
21449
21450 if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
21451 !ST1->getBasePtr().isUndef() &&
21452 ST->getAddressSpace() == ST1->getAddressSpace()) {
21453 // If we consider two stores and one smaller in size is a scalable
21454 // vector type and another one a bigger size store with a fixed type,
21455 // then we could not allow the scalable store removal because we don't
21456 // know its final size in the end.
21457 if (ST->getMemoryVT().isScalableVector() ||
21458 ST1->getMemoryVT().isScalableVector()) {
21459 if (ST1->getBasePtr() == Ptr &&
21460 TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
21461 ST->getMemoryVT().getStoreSize())) {
21462 CombineTo(ST1, ST1->getChain());
21463 return SDValue(N, 0);
21464 }
21465 } else {
21466 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
21467 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
21468 // If this is a store who's preceding store to a subset of the current
21469 // location and no one other node is chained to that store we can
21470 // effectively drop the store. Do not remove stores to undef as they
21471 // may be used as data sinks.
21472 if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
21473 ChainBase,
21474 ST1->getMemoryVT().getFixedSizeInBits())) {
21475 CombineTo(ST1, ST1->getChain());
21476 return SDValue(N, 0);
21477 }
21478 }
21479 }
21480 }
21481 }
21482
21483 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
21484 // truncating store. We can do this even if this is already a truncstore.
21485 if ((Value.getOpcode() == ISD::FP_ROUND ||
21486 Value.getOpcode() == ISD::TRUNCATE) &&
21487 Value->hasOneUse() && ST->isUnindexed() &&
21488 TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
21489 ST->getMemoryVT(), LegalOperations)) {
21490 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
21491 Ptr, ST->getMemoryVT(), ST->getMemOperand());
21492 }
21493
21494 // Always perform this optimization before types are legal. If the target
21495 // prefers, also try this after legalization to catch stores that were created
21496 // by intrinsics or other nodes.
21497 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
21498 while (true) {
21499 // There can be multiple store sequences on the same chain.
21500 // Keep trying to merge store sequences until we are unable to do so
21501 // or until we merge the last store on the chain.
21502 bool Changed = mergeConsecutiveStores(ST);
21503 if (!Changed) break;
21504 // Return N as merge only uses CombineTo and no worklist clean
21505 // up is necessary.
21506 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
21507 return SDValue(N, 0);
21508 }
21509 }
21510
21511 // Try transforming N to an indexed store.
21512 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21513 return SDValue(N, 0);
21514
21515 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
21516 //
21517 // Make sure to do this only after attempting to merge stores in order to
21518 // avoid changing the types of some subset of stores due to visit order,
21519 // preventing their merging.
21520 if (isa<ConstantFPSDNode>(ST->getValue())) {
21521 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
21522 return NewSt;
21523 }
21524
21525 if (SDValue NewSt = splitMergedValStore(ST))
21526 return NewSt;
21527
21528 return ReduceLoadOpStoreWidth(N);
21529}
21530
21531SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
21532 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
21533 if (!LifetimeEnd->hasOffset())
21534 return SDValue();
21535
21536 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
21537 LifetimeEnd->getOffset(), false);
21538
21539 // We walk up the chains to find stores.
21540 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
21541 while (!Chains.empty()) {
21542 SDValue Chain = Chains.pop_back_val();
21543 if (!Chain.hasOneUse())
21544 continue;
21545 switch (Chain.getOpcode()) {
21546 case ISD::TokenFactor:
21547 for (unsigned Nops = Chain.getNumOperands(); Nops;)
21548 Chains.push_back(Chain.getOperand(--Nops));
21549 break;
21551 case ISD::LIFETIME_END:
21552 // We can forward past any lifetime start/end that can be proven not to
21553 // alias the node.
21554 if (!mayAlias(Chain.getNode(), N))
21555 Chains.push_back(Chain.getOperand(0));
21556 break;
21557 case ISD::STORE: {
21558 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
21559 // TODO: Can relax for unordered atomics (see D66309)
21560 if (!ST->isSimple() || ST->isIndexed())
21561 continue;
21562 const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
21563 // The bounds of a scalable store are not known until runtime, so this
21564 // store cannot be elided.
21565 if (StoreSize.isScalable())
21566 continue;
21567 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
21568 // If we store purely within object bounds just before its lifetime ends,
21569 // we can remove the store.
21570 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
21571 StoreSize.getFixedValue() * 8)) {
21572 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
21573 dbgs() << "\nwithin LIFETIME_END of : ";
21574 LifetimeEndBase.dump(); dbgs() << "\n");
21575 CombineTo(ST, ST->getChain());
21576 return SDValue(N, 0);
21577 }
21578 }
21579 }
21580 }
21581 return SDValue();
21582}
21583
21584/// For the instruction sequence of store below, F and I values
21585/// are bundled together as an i64 value before being stored into memory.
21586/// Sometimes it is more efficent to generate separate stores for F and I,
21587/// which can remove the bitwise instructions or sink them to colder places.
21588///
21589/// (store (or (zext (bitcast F to i32) to i64),
21590/// (shl (zext I to i64), 32)), addr) -->
21591/// (store F, addr) and (store I, addr+4)
21592///
21593/// Similarly, splitting for other merged store can also be beneficial, like:
21594/// For pair of {i32, i32}, i64 store --> two i32 stores.
21595/// For pair of {i32, i16}, i64 store --> two i32 stores.
21596/// For pair of {i16, i16}, i32 store --> two i16 stores.
21597/// For pair of {i16, i8}, i32 store --> two i16 stores.
21598/// For pair of {i8, i8}, i16 store --> two i8 stores.
21599///
21600/// We allow each target to determine specifically which kind of splitting is
21601/// supported.
21602///
21603/// The store patterns are commonly seen from the simple code snippet below
21604/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
21605/// void goo(const std::pair<int, float> &);
21606/// hoo() {
21607/// ...
21608/// goo(std::make_pair(tmp, ftmp));
21609/// ...
21610/// }
21611///
21612SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
21613 if (OptLevel == CodeGenOptLevel::None)
21614 return SDValue();
21615
21616 // Can't change the number of memory accesses for a volatile store or break
21617 // atomicity for an atomic one.
21618 if (!ST->isSimple())
21619 return SDValue();
21620
21621 SDValue Val = ST->getValue();
21622 SDLoc DL(ST);
21623
21624 // Match OR operand.
21625 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
21626 return SDValue();
21627
21628 // Match SHL operand and get Lower and Higher parts of Val.
21629 SDValue Op1 = Val.getOperand(0);
21630 SDValue Op2 = Val.getOperand(1);
21631 SDValue Lo, Hi;
21632 if (Op1.getOpcode() != ISD::SHL) {
21633 std::swap(Op1, Op2);
21634 if (Op1.getOpcode() != ISD::SHL)
21635 return SDValue();
21636 }
21637 Lo = Op2;
21638 Hi = Op1.getOperand(0);
21639 if (!Op1.hasOneUse())
21640 return SDValue();
21641
21642 // Match shift amount to HalfValBitSize.
21643 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
21644 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
21645 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
21646 return SDValue();
21647
21648 // Lo and Hi are zero-extended from int with size less equal than 32
21649 // to i64.
21650 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
21651 !Lo.getOperand(0).getValueType().isScalarInteger() ||
21652 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
21653 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
21654 !Hi.getOperand(0).getValueType().isScalarInteger() ||
21655 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
21656 return SDValue();
21657
21658 // Use the EVT of low and high parts before bitcast as the input
21659 // of target query.
21660 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
21661 ? Lo.getOperand(0).getValueType()
21662 : Lo.getValueType();
21663 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
21664 ? Hi.getOperand(0).getValueType()
21665 : Hi.getValueType();
21666 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
21667 return SDValue();
21668
21669 // Start to split store.
21670 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21671 AAMDNodes AAInfo = ST->getAAInfo();
21672
21673 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
21674 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
21675 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
21676 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
21677
21678 SDValue Chain = ST->getChain();
21679 SDValue Ptr = ST->getBasePtr();
21680 // Lower value store.
21681 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21682 ST->getOriginalAlign(), MMOFlags, AAInfo);
21683 Ptr =
21684 DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
21685 // Higher value store.
21686 SDValue St1 = DAG.getStore(
21687 St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
21688 ST->getOriginalAlign(), MMOFlags, AAInfo);
21689 return St1;
21690}
21691
21692// Merge an insertion into an existing shuffle:
21693// (insert_vector_elt (vector_shuffle X, Y, Mask),
21694// .(extract_vector_elt X, N), InsIndex)
21695// --> (vector_shuffle X, Y, NewMask)
21696// and variations where shuffle operands may be CONCAT_VECTORS.
21698 SmallVectorImpl<int> &NewMask, SDValue Elt,
21699 unsigned InsIndex) {
21700 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21701 !isa<ConstantSDNode>(Elt.getOperand(1)))
21702 return false;
21703
21704 // Vec's operand 0 is using indices from 0 to N-1 and
21705 // operand 1 from N to 2N - 1, where N is the number of
21706 // elements in the vectors.
21707 SDValue InsertVal0 = Elt.getOperand(0);
21708 int ElementOffset = -1;
21709
21710 // We explore the inputs of the shuffle in order to see if we find the
21711 // source of the extract_vector_elt. If so, we can use it to modify the
21712 // shuffle rather than perform an insert_vector_elt.
21714 ArgWorkList.emplace_back(Mask.size(), Y);
21715 ArgWorkList.emplace_back(0, X);
21716
21717 while (!ArgWorkList.empty()) {
21718 int ArgOffset;
21719 SDValue ArgVal;
21720 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
21721
21722 if (ArgVal == InsertVal0) {
21723 ElementOffset = ArgOffset;
21724 break;
21725 }
21726
21727 // Peek through concat_vector.
21728 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
21729 int CurrentArgOffset =
21730 ArgOffset + ArgVal.getValueType().getVectorNumElements();
21731 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
21732 for (SDValue Op : reverse(ArgVal->ops())) {
21733 CurrentArgOffset -= Step;
21734 ArgWorkList.emplace_back(CurrentArgOffset, Op);
21735 }
21736
21737 // Make sure we went through all the elements and did not screw up index
21738 // computation.
21739 assert(CurrentArgOffset == ArgOffset);
21740 }
21741 }
21742
21743 // If we failed to find a match, see if we can replace an UNDEF shuffle
21744 // operand.
21745 if (ElementOffset == -1) {
21746 if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
21747 return false;
21748 ElementOffset = Mask.size();
21749 Y = InsertVal0;
21750 }
21751
21752 NewMask.assign(Mask.begin(), Mask.end());
21753 NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
21754 assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
21755 "NewMask[InsIndex] is out of bound");
21756 return true;
21757}
21758
21759// Merge an insertion into an existing shuffle:
21760// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
21761// InsIndex)
21762// --> (vector_shuffle X, Y) and variations where shuffle operands may be
21763// CONCAT_VECTORS.
21764SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
21765 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21766 "Expected extract_vector_elt");
21767 SDValue InsertVal = N->getOperand(1);
21768 SDValue Vec = N->getOperand(0);
21769
21770 auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
21771 if (!SVN || !Vec.hasOneUse())
21772 return SDValue();
21773
21774 ArrayRef<int> Mask = SVN->getMask();
21775 SDValue X = Vec.getOperand(0);
21776 SDValue Y = Vec.getOperand(1);
21777
21778 SmallVector<int, 16> NewMask(Mask);
21779 if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
21780 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
21781 Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
21782 if (LegalShuffle)
21783 return LegalShuffle;
21784 }
21785
21786 return SDValue();
21787}
21788
21789// Convert a disguised subvector insertion into a shuffle:
21790// insert_vector_elt V, (bitcast X from vector type), IdxC -->
21791// bitcast(shuffle (bitcast V), (extended X), Mask)
21792// Note: We do not use an insert_subvector node because that requires a
21793// legal subvector type.
21794SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
21795 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21796 "Expected extract_vector_elt");
21797 SDValue InsertVal = N->getOperand(1);
21798
21799 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
21800 !InsertVal.getOperand(0).getValueType().isVector())
21801 return SDValue();
21802
21803 SDValue SubVec = InsertVal.getOperand(0);
21804 SDValue DestVec = N->getOperand(0);
21805 EVT SubVecVT = SubVec.getValueType();
21806 EVT VT = DestVec.getValueType();
21807 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
21808 // If the source only has a single vector element, the cost of creating adding
21809 // it to a vector is likely to exceed the cost of a insert_vector_elt.
21810 if (NumSrcElts == 1)
21811 return SDValue();
21812 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
21813 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
21814
21815 // Step 1: Create a shuffle mask that implements this insert operation. The
21816 // vector that we are inserting into will be operand 0 of the shuffle, so
21817 // those elements are just 'i'. The inserted subvector is in the first
21818 // positions of operand 1 of the shuffle. Example:
21819 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
21820 SmallVector<int, 16> Mask(NumMaskVals);
21821 for (unsigned i = 0; i != NumMaskVals; ++i) {
21822 if (i / NumSrcElts == InsIndex)
21823 Mask[i] = (i % NumSrcElts) + NumMaskVals;
21824 else
21825 Mask[i] = i;
21826 }
21827
21828 // Bail out if the target can not handle the shuffle we want to create.
21829 EVT SubVecEltVT = SubVecVT.getVectorElementType();
21830 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
21831 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
21832 return SDValue();
21833
21834 // Step 2: Create a wide vector from the inserted source vector by appending
21835 // undefined elements. This is the same size as our destination vector.
21836 SDLoc DL(N);
21837 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
21838 ConcatOps[0] = SubVec;
21839 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
21840
21841 // Step 3: Shuffle in the padded subvector.
21842 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
21843 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
21844 AddToWorklist(PaddedSubV.getNode());
21845 AddToWorklist(DestVecBC.getNode());
21846 AddToWorklist(Shuf.getNode());
21847 return DAG.getBitcast(VT, Shuf);
21848}
21849
21850// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
21851// possible and the new load will be quick. We use more loads but less shuffles
21852// and inserts.
21853SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
21854 EVT VT = N->getValueType(0);
21855
21856 // InsIndex is expected to be the first of last lane.
21857 if (!VT.isFixedLengthVector() ||
21858 (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
21859 return SDValue();
21860
21861 // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
21862 // depending on the InsIndex.
21863 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
21864 SDValue Scalar = N->getOperand(1);
21865 if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
21866 return InsIndex == P.index() || P.value() < 0 ||
21867 (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
21868 (InsIndex == VT.getVectorNumElements() - 1 &&
21869 P.value() == (int)P.index() + 1);
21870 }))
21871 return SDValue();
21872
21873 // We optionally skip over an extend so long as both loads are extended in the
21874 // same way from the same type.
21875 unsigned Extend = 0;
21876 if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
21877 Scalar.getOpcode() == ISD::SIGN_EXTEND ||
21878 Scalar.getOpcode() == ISD::ANY_EXTEND) {
21879 Extend = Scalar.getOpcode();
21880 Scalar = Scalar.getOperand(0);
21881 }
21882
21883 auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
21884 if (!ScalarLoad)
21885 return SDValue();
21886
21887 SDValue Vec = Shuffle->getOperand(0);
21888 if (Extend) {
21889 if (Vec.getOpcode() != Extend)
21890 return SDValue();
21891 Vec = Vec.getOperand(0);
21892 }
21893 auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
21894 if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
21895 return SDValue();
21896
21897 int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
21898 if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
21899 !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21900 ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21901 ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
21902 return SDValue();
21903
21904 // Check that the offset between the pointers to produce a single continuous
21905 // load.
21906 if (InsIndex == 0) {
21907 if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
21908 -1))
21909 return SDValue();
21910 } else {
21912 VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
21913 return SDValue();
21914 }
21915
21916 // And that the new unaligned load will be fast.
21917 unsigned IsFast = 0;
21918 Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
21919 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21920 Vec.getValueType(), VecLoad->getAddressSpace(),
21921 NewAlign, VecLoad->getMemOperand()->getFlags(),
21922 &IsFast) ||
21923 !IsFast)
21924 return SDValue();
21925
21926 // Calculate the new Ptr and create the new load.
21927 SDLoc DL(N);
21928 SDValue Ptr = ScalarLoad->getBasePtr();
21929 if (InsIndex != 0)
21930 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
21931 DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
21932 MachinePointerInfo PtrInfo =
21933 InsIndex == 0 ? ScalarLoad->getPointerInfo()
21934 : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
21935
21936 SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
21937 ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
21938 DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
21939 DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
21940 return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
21941}
21942
21943SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
21944 SDValue InVec = N->getOperand(0);
21945 SDValue InVal = N->getOperand(1);
21946 SDValue EltNo = N->getOperand(2);
21947 SDLoc DL(N);
21948
21949 EVT VT = InVec.getValueType();
21950 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
21951
21952 // Insert into out-of-bounds element is undefined.
21953 if (IndexC && VT.isFixedLengthVector() &&
21954 IndexC->getZExtValue() >= VT.getVectorNumElements())
21955 return DAG.getUNDEF(VT);
21956
21957 // Remove redundant insertions:
21958 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
21959 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21960 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
21961 return InVec;
21962
21963 if (!IndexC) {
21964 // If this is variable insert to undef vector, it might be better to splat:
21965 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
21966 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
21967 return DAG.getSplat(VT, DL, InVal);
21968 return SDValue();
21969 }
21970
21971 if (VT.isScalableVector())
21972 return SDValue();
21973
21974 unsigned NumElts = VT.getVectorNumElements();
21975
21976 // We must know which element is being inserted for folds below here.
21977 unsigned Elt = IndexC->getZExtValue();
21978
21979 // Handle <1 x ???> vector insertion special cases.
21980 if (NumElts == 1) {
21981 // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
21982 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21983 InVal.getOperand(0).getValueType() == VT &&
21984 isNullConstant(InVal.getOperand(1)))
21985 return InVal.getOperand(0);
21986 }
21987
21988 // Canonicalize insert_vector_elt dag nodes.
21989 // Example:
21990 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
21991 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
21992 //
21993 // Do this only if the child insert_vector node has one use; also
21994 // do this only if indices are both constants and Idx1 < Idx0.
21995 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
21996 && isa<ConstantSDNode>(InVec.getOperand(2))) {
21997 unsigned OtherElt = InVec.getConstantOperandVal(2);
21998 if (Elt < OtherElt) {
21999 // Swap nodes.
22000 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
22001 InVec.getOperand(0), InVal, EltNo);
22002 AddToWorklist(NewOp.getNode());
22003 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
22004 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
22005 }
22006 }
22007
22008 if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
22009 return Shuf;
22010
22011 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
22012 return Shuf;
22013
22014 if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
22015 return Shuf;
22016
22017 // Attempt to convert an insert_vector_elt chain into a legal build_vector.
22018 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
22019 // vXi1 vector - we don't need to recurse.
22020 if (NumElts == 1)
22021 return DAG.getBuildVector(VT, DL, {InVal});
22022
22023 // If we haven't already collected the element, insert into the op list.
22024 EVT MaxEltVT = InVal.getValueType();
22025 auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
22026 unsigned Idx) {
22027 if (!Ops[Idx]) {
22028 Ops[Idx] = Elt;
22029 if (VT.isInteger()) {
22030 EVT EltVT = Elt.getValueType();
22031 MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
22032 }
22033 }
22034 };
22035
22036 // Ensure all the operands are the same value type, fill any missing
22037 // operands with UNDEF and create the BUILD_VECTOR.
22038 auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
22039 assert(Ops.size() == NumElts && "Unexpected vector size");
22040 for (SDValue &Op : Ops) {
22041 if (Op)
22042 Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
22043 else
22044 Op = DAG.getUNDEF(MaxEltVT);
22045 }
22046 return DAG.getBuildVector(VT, DL, Ops);
22047 };
22048
22049 SmallVector<SDValue, 8> Ops(NumElts, SDValue());
22050 Ops[Elt] = InVal;
22051
22052 // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
22053 for (SDValue CurVec = InVec; CurVec;) {
22054 // UNDEF - build new BUILD_VECTOR from already inserted operands.
22055 if (CurVec.isUndef())
22056 return CanonicalizeBuildVector(Ops);
22057
22058 // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
22059 if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
22060 for (unsigned I = 0; I != NumElts; ++I)
22061 AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
22062 return CanonicalizeBuildVector(Ops);
22063 }
22064
22065 // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
22066 if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
22067 AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
22068 return CanonicalizeBuildVector(Ops);
22069 }
22070
22071 // INSERT_VECTOR_ELT - insert operand and continue up the chain.
22072 if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
22073 if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
22074 if (CurIdx->getAPIntValue().ult(NumElts)) {
22075 unsigned Idx = CurIdx->getZExtValue();
22076 AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
22077
22078 // Found entire BUILD_VECTOR.
22079 if (all_of(Ops, [](SDValue Op) { return !!Op; }))
22080 return CanonicalizeBuildVector(Ops);
22081
22082 CurVec = CurVec->getOperand(0);
22083 continue;
22084 }
22085
22086 // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
22087 // update the shuffle mask (and second operand if we started with unary
22088 // shuffle) and create a new legal shuffle.
22089 if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
22090 auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
22091 SDValue LHS = SVN->getOperand(0);
22092 SDValue RHS = SVN->getOperand(1);
22094 bool Merged = true;
22095 for (auto I : enumerate(Ops)) {
22096 SDValue &Op = I.value();
22097 if (Op) {
22098 SmallVector<int, 16> NewMask;
22099 if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
22100 Merged = false;
22101 break;
22102 }
22103 Mask = std::move(NewMask);
22104 }
22105 }
22106 if (Merged)
22107 if (SDValue NewShuffle =
22108 TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
22109 return NewShuffle;
22110 }
22111
22112 // If all insertions are zero value, try to convert to AND mask.
22113 // TODO: Do this for -1 with OR mask?
22114 if (!LegalOperations && llvm::isNullConstant(InVal) &&
22115 all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
22116 count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
22117 SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
22118 SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
22120 for (unsigned I = 0; I != NumElts; ++I)
22121 Mask[I] = Ops[I] ? Zero : AllOnes;
22122 return DAG.getNode(ISD::AND, DL, VT, CurVec,
22123 DAG.getBuildVector(VT, DL, Mask));
22124 }
22125
22126 // Failed to find a match in the chain - bail.
22127 break;
22128 }
22129
22130 // See if we can fill in the missing constant elements as zeros.
22131 // TODO: Should we do this for any constant?
22132 APInt DemandedZeroElts = APInt::getZero(NumElts);
22133 for (unsigned I = 0; I != NumElts; ++I)
22134 if (!Ops[I])
22135 DemandedZeroElts.setBit(I);
22136
22137 if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
22138 SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
22139 : DAG.getConstantFP(0, DL, MaxEltVT);
22140 for (unsigned I = 0; I != NumElts; ++I)
22141 if (!Ops[I])
22142 Ops[I] = Zero;
22143
22144 return CanonicalizeBuildVector(Ops);
22145 }
22146 }
22147
22148 return SDValue();
22149}
22150
22151SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
22152 SDValue EltNo,
22153 LoadSDNode *OriginalLoad) {
22154 assert(OriginalLoad->isSimple());
22155
22156 EVT ResultVT = EVE->getValueType(0);
22157 EVT VecEltVT = InVecVT.getVectorElementType();
22158
22159 // If the vector element type is not a multiple of a byte then we are unable
22160 // to correctly compute an address to load only the extracted element as a
22161 // scalar.
22162 if (!VecEltVT.isByteSized())
22163 return SDValue();
22164
22165 ISD::LoadExtType ExtTy =
22166 ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
22167 if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
22168 !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
22169 return SDValue();
22170
22171 Align Alignment = OriginalLoad->getAlign();
22173 SDLoc DL(EVE);
22174 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
22175 int Elt = ConstEltNo->getZExtValue();
22176 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
22177 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
22178 Alignment = commonAlignment(Alignment, PtrOff);
22179 } else {
22180 // Discard the pointer info except the address space because the memory
22181 // operand can't represent this new access since the offset is variable.
22182 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
22183 Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
22184 }
22185
22186 unsigned IsFast = 0;
22187 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
22188 OriginalLoad->getAddressSpace(), Alignment,
22189 OriginalLoad->getMemOperand()->getFlags(),
22190 &IsFast) ||
22191 !IsFast)
22192 return SDValue();
22193
22194 SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
22195 InVecVT, EltNo);
22196
22197 // We are replacing a vector load with a scalar load. The new load must have
22198 // identical memory op ordering to the original.
22199 SDValue Load;
22200 if (ResultVT.bitsGT(VecEltVT)) {
22201 // If the result type of vextract is wider than the load, then issue an
22202 // extending load instead.
22203 ISD::LoadExtType ExtType =
22204 TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
22205 : ISD::EXTLOAD;
22206 Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
22207 NewPtr, MPI, VecEltVT, Alignment,
22208 OriginalLoad->getMemOperand()->getFlags(),
22209 OriginalLoad->getAAInfo());
22210 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22211 } else {
22212 // The result type is narrower or the same width as the vector element
22213 Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
22214 Alignment, OriginalLoad->getMemOperand()->getFlags(),
22215 OriginalLoad->getAAInfo());
22216 DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
22217 if (ResultVT.bitsLT(VecEltVT))
22218 Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
22219 else
22220 Load = DAG.getBitcast(ResultVT, Load);
22221 }
22222 ++OpsNarrowed;
22223 return Load;
22224}
22225
22226/// Transform a vector binary operation into a scalar binary operation by moving
22227/// the math/logic after an extract element of a vector.
22229 const SDLoc &DL, bool LegalOperations) {
22230 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22231 SDValue Vec = ExtElt->getOperand(0);
22232 SDValue Index = ExtElt->getOperand(1);
22233 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22234 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
22235 Vec->getNumValues() != 1)
22236 return SDValue();
22237
22238 // Targets may want to avoid this to prevent an expensive register transfer.
22239 if (!TLI.shouldScalarizeBinop(Vec))
22240 return SDValue();
22241
22242 // Extracting an element of a vector constant is constant-folded, so this
22243 // transform is just replacing a vector op with a scalar op while moving the
22244 // extract.
22245 SDValue Op0 = Vec.getOperand(0);
22246 SDValue Op1 = Vec.getOperand(1);
22247 APInt SplatVal;
22248 if (isAnyConstantBuildVector(Op0, true) ||
22249 ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
22250 isAnyConstantBuildVector(Op1, true) ||
22251 ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
22252 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
22253 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22254 EVT VT = ExtElt->getValueType(0);
22255 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22256 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22257 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22258 }
22259
22260 return SDValue();
22261}
22262
22263// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22264// recursively analyse all of it's users. and try to model themselves as
22265// bit sequence extractions. If all of them agree on the new, narrower element
22266// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22267// new element type, do so now.
22268// This is mainly useful to recover from legalization that scalarized
22269// the vector as wide elements, but tries to rebuild it with narrower elements.
22270//
22271// Some more nodes could be modelled if that helps cover interesting patterns.
22272bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22273 SDNode *N) {
22274 // We perform this optimization post type-legalization because
22275 // the type-legalizer often scalarizes integer-promoted vectors.
22276 // Performing this optimization before may cause legalizaton cycles.
22277 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22278 return false;
22279
22280 // TODO: Add support for big-endian.
22281 if (DAG.getDataLayout().isBigEndian())
22282 return false;
22283
22284 SDValue VecOp = N->getOperand(0);
22285 EVT VecVT = VecOp.getValueType();
22286 assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22287
22288 // We must start with a constant extraction index.
22289 auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22290 if (!IndexC)
22291 return false;
22292
22293 assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22294 "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22295
22296 // TODO: deal with the case of implicit anyext of the extraction.
22297 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22298 EVT ScalarVT = N->getValueType(0);
22299 if (VecVT.getScalarType() != ScalarVT)
22300 return false;
22301
22302 // TODO: deal with the cases other than everything being integer-typed.
22303 if (!ScalarVT.isScalarInteger())
22304 return false;
22305
22306 struct Entry {
22308
22309 // Which bits of VecOp does it contain?
22310 unsigned BitPos;
22311 int NumBits;
22312 // NOTE: the actual width of \p Producer may be wider than NumBits!
22313
22314 Entry(Entry &&) = default;
22315 Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22316 : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22317
22318 Entry() = delete;
22319 Entry(const Entry &) = delete;
22320 Entry &operator=(const Entry &) = delete;
22321 Entry &operator=(Entry &&) = delete;
22322 };
22323 SmallVector<Entry, 32> Worklist;
22325
22326 // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22327 Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22328 /*NumBits=*/VecEltBitWidth);
22329
22330 while (!Worklist.empty()) {
22331 Entry E = Worklist.pop_back_val();
22332 // Does the node not even use any of the VecOp bits?
22333 if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22334 E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22335 return false; // Let's allow the other combines clean this up first.
22336 // Did we fail to model any of the users of the Producer?
22337 bool ProducerIsLeaf = false;
22338 // Look at each user of this Producer.
22339 for (SDNode *User : E.Producer->uses()) {
22340 switch (User->getOpcode()) {
22341 // TODO: support ISD::BITCAST
22342 // TODO: support ISD::ANY_EXTEND
22343 // TODO: support ISD::ZERO_EXTEND
22344 // TODO: support ISD::SIGN_EXTEND
22345 case ISD::TRUNCATE:
22346 // Truncation simply means we keep position, but extract less bits.
22347 Worklist.emplace_back(User, E.BitPos,
22348 /*NumBits=*/User->getValueSizeInBits(0));
22349 break;
22350 // TODO: support ISD::SRA
22351 // TODO: support ISD::SHL
22352 case ISD::SRL:
22353 // We should be shifting the Producer by a constant amount.
22354 if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22355 User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22356 // Logical right-shift means that we start extraction later,
22357 // but stop it at the same position we did previously.
22358 unsigned ShAmt = ShAmtC->getZExtValue();
22359 Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22360 break;
22361 }
22362 [[fallthrough]];
22363 default:
22364 // We can not model this user of the Producer.
22365 // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22366 ProducerIsLeaf = true;
22367 // Profitability check: all users that we can not model
22368 // must be ISD::BUILD_VECTOR's.
22369 if (User->getOpcode() != ISD::BUILD_VECTOR)
22370 return false;
22371 break;
22372 }
22373 }
22374 if (ProducerIsLeaf)
22375 Leafs.emplace_back(std::move(E));
22376 }
22377
22378 unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22379
22380 // If we are still at the same element granularity, give up,
22381 if (NewVecEltBitWidth == VecEltBitWidth)
22382 return false;
22383
22384 // The vector width must be a multiple of the new element width.
22385 if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22386 return false;
22387
22388 // All leafs must agree on the new element width.
22389 // All leafs must not expect any "padding" bits ontop of that width.
22390 // All leafs must start extraction from multiple of that width.
22391 if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22392 return (unsigned)E.NumBits == NewVecEltBitWidth &&
22393 E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22394 E.BitPos % NewVecEltBitWidth == 0;
22395 }))
22396 return false;
22397
22398 EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22399 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22400 VecVT.getSizeInBits() / NewVecEltBitWidth);
22401
22402 if (LegalTypes &&
22403 !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22404 return false;
22405
22406 if (LegalOperations &&
22407 !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22409 return false;
22410
22411 SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22412 for (const Entry &E : Leafs) {
22413 SDLoc DL(E.Producer);
22414 unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22415 assert(NewIndex < NewVecVT.getVectorNumElements() &&
22416 "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22417 SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
22418 DAG.getVectorIdxConstant(NewIndex, DL));
22419 CombineTo(E.Producer, V);
22420 }
22421
22422 return true;
22423}
22424
22425SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
22426 SDValue VecOp = N->getOperand(0);
22427 SDValue Index = N->getOperand(1);
22428 EVT ScalarVT = N->getValueType(0);
22429 EVT VecVT = VecOp.getValueType();
22430 if (VecOp.isUndef())
22431 return DAG.getUNDEF(ScalarVT);
22432
22433 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
22434 //
22435 // This only really matters if the index is non-constant since other combines
22436 // on the constant elements already work.
22437 SDLoc DL(N);
22438 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
22439 Index == VecOp.getOperand(2)) {
22440 SDValue Elt = VecOp.getOperand(1);
22441 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
22442 }
22443
22444 // (vextract (scalar_to_vector val, 0) -> val
22445 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22446 // Only 0'th element of SCALAR_TO_VECTOR is defined.
22447 if (DAG.isKnownNeverZero(Index))
22448 return DAG.getUNDEF(ScalarVT);
22449
22450 // Check if the result type doesn't match the inserted element type.
22451 // The inserted element and extracted element may have mismatched bitwidth.
22452 // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
22453 SDValue InOp = VecOp.getOperand(0);
22454 if (InOp.getValueType() != ScalarVT) {
22455 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22456 if (InOp.getValueType().bitsGT(ScalarVT))
22457 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
22458 return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
22459 }
22460 return InOp;
22461 }
22462
22463 // extract_vector_elt of out-of-bounds element -> UNDEF
22464 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22465 if (IndexC && VecVT.isFixedLengthVector() &&
22466 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
22467 return DAG.getUNDEF(ScalarVT);
22468
22469 // extract_vector_elt (build_vector x, y), 1 -> y
22470 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
22471 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
22472 TLI.isTypeLegal(VecVT)) {
22473 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
22474 VecVT.isFixedLengthVector()) &&
22475 "BUILD_VECTOR used for scalable vectors");
22476 unsigned IndexVal =
22477 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
22478 SDValue Elt = VecOp.getOperand(IndexVal);
22479 EVT InEltVT = Elt.getValueType();
22480
22481 if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
22482 isNullConstant(Elt)) {
22483 // Sometimes build_vector's scalar input types do not match result type.
22484 if (ScalarVT == InEltVT)
22485 return Elt;
22486
22487 // TODO: It may be useful to truncate if free if the build_vector
22488 // implicitly converts.
22489 }
22490 }
22491
22492 if (SDValue BO = scalarizeExtractedBinop(N, DAG, DL, LegalOperations))
22493 return BO;
22494
22495 if (VecVT.isScalableVector())
22496 return SDValue();
22497
22498 // All the code from this point onwards assumes fixed width vectors, but it's
22499 // possible that some of the combinations could be made to work for scalable
22500 // vectors too.
22501 unsigned NumElts = VecVT.getVectorNumElements();
22502 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22503
22504 // See if the extracted element is constant, in which case fold it if its
22505 // a legal fp immediate.
22506 if (IndexC && ScalarVT.isFloatingPoint()) {
22507 APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
22508 KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
22509 if (KnownElt.isConstant()) {
22510 APFloat CstFP =
22511 APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
22512 if (TLI.isFPImmLegal(CstFP, ScalarVT))
22513 return DAG.getConstantFP(CstFP, DL, ScalarVT);
22514 }
22515 }
22516
22517 // TODO: These transforms should not require the 'hasOneUse' restriction, but
22518 // there are regressions on multiple targets without it. We can end up with a
22519 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
22520 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
22521 VecOp.hasOneUse()) {
22522 // The vector index of the LSBs of the source depend on the endian-ness.
22523 bool IsLE = DAG.getDataLayout().isLittleEndian();
22524 unsigned ExtractIndex = IndexC->getZExtValue();
22525 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
22526 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
22527 SDValue BCSrc = VecOp.getOperand(0);
22528 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
22529 return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
22530
22531 if (LegalTypes && BCSrc.getValueType().isInteger() &&
22532 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22533 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
22534 // trunc i64 X to i32
22535 SDValue X = BCSrc.getOperand(0);
22536 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
22537 "Extract element and scalar to vector can't change element type "
22538 "from FP to integer.");
22539 unsigned XBitWidth = X.getValueSizeInBits();
22540 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
22541
22542 // An extract element return value type can be wider than its vector
22543 // operand element type. In that case, the high bits are undefined, so
22544 // it's possible that we may need to extend rather than truncate.
22545 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
22546 assert(XBitWidth % VecEltBitWidth == 0 &&
22547 "Scalar bitwidth must be a multiple of vector element bitwidth");
22548 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
22549 }
22550 }
22551 }
22552
22553 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
22554 // We only perform this optimization before the op legalization phase because
22555 // we may introduce new vector instructions which are not backed by TD
22556 // patterns. For example on AVX, extracting elements from a wide vector
22557 // without using extract_subvector. However, if we can find an underlying
22558 // scalar value, then we can always use that.
22559 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
22560 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
22561 // Find the new index to extract from.
22562 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
22563
22564 // Extracting an undef index is undef.
22565 if (OrigElt == -1)
22566 return DAG.getUNDEF(ScalarVT);
22567
22568 // Select the right vector half to extract from.
22569 SDValue SVInVec;
22570 if (OrigElt < (int)NumElts) {
22571 SVInVec = VecOp.getOperand(0);
22572 } else {
22573 SVInVec = VecOp.getOperand(1);
22574 OrigElt -= NumElts;
22575 }
22576
22577 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
22578 SDValue InOp = SVInVec.getOperand(OrigElt);
22579 if (InOp.getValueType() != ScalarVT) {
22580 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22581 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
22582 }
22583
22584 return InOp;
22585 }
22586
22587 // FIXME: We should handle recursing on other vector shuffles and
22588 // scalar_to_vector here as well.
22589
22590 if (!LegalOperations ||
22591 // FIXME: Should really be just isOperationLegalOrCustom.
22594 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
22595 DAG.getVectorIdxConstant(OrigElt, DL));
22596 }
22597 }
22598
22599 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
22600 // simplify it based on the (valid) extraction indices.
22601 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
22602 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22603 Use->getOperand(0) == VecOp &&
22604 isa<ConstantSDNode>(Use->getOperand(1));
22605 })) {
22606 APInt DemandedElts = APInt::getZero(NumElts);
22607 for (SDNode *Use : VecOp->uses()) {
22608 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
22609 if (CstElt->getAPIntValue().ult(NumElts))
22610 DemandedElts.setBit(CstElt->getZExtValue());
22611 }
22612 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
22613 // We simplified the vector operand of this extract element. If this
22614 // extract is not dead, visit it again so it is folded properly.
22615 if (N->getOpcode() != ISD::DELETED_NODE)
22616 AddToWorklist(N);
22617 return SDValue(N, 0);
22618 }
22619 APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
22620 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
22621 // We simplified the vector operand of this extract element. If this
22622 // extract is not dead, visit it again so it is folded properly.
22623 if (N->getOpcode() != ISD::DELETED_NODE)
22624 AddToWorklist(N);
22625 return SDValue(N, 0);
22626 }
22627 }
22628
22629 if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
22630 return SDValue(N, 0);
22631
22632 // Everything under here is trying to match an extract of a loaded value.
22633 // If the result of load has to be truncated, then it's not necessarily
22634 // profitable.
22635 bool BCNumEltsChanged = false;
22636 EVT ExtVT = VecVT.getVectorElementType();
22637 EVT LVT = ExtVT;
22638 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
22639 return SDValue();
22640
22641 if (VecOp.getOpcode() == ISD::BITCAST) {
22642 // Don't duplicate a load with other uses.
22643 if (!VecOp.hasOneUse())
22644 return SDValue();
22645
22646 EVT BCVT = VecOp.getOperand(0).getValueType();
22647 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
22648 return SDValue();
22649 if (NumElts != BCVT.getVectorNumElements())
22650 BCNumEltsChanged = true;
22651 VecOp = VecOp.getOperand(0);
22652 ExtVT = BCVT.getVectorElementType();
22653 }
22654
22655 // extract (vector load $addr), i --> load $addr + i * size
22656 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
22657 ISD::isNormalLoad(VecOp.getNode()) &&
22658 !Index->hasPredecessor(VecOp.getNode())) {
22659 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
22660 if (VecLoad && VecLoad->isSimple())
22661 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
22662 }
22663
22664 // Perform only after legalization to ensure build_vector / vector_shuffle
22665 // optimizations have already been done.
22666 if (!LegalOperations || !IndexC)
22667 return SDValue();
22668
22669 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
22670 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
22671 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
22672 int Elt = IndexC->getZExtValue();
22673 LoadSDNode *LN0 = nullptr;
22674 if (ISD::isNormalLoad(VecOp.getNode())) {
22675 LN0 = cast<LoadSDNode>(VecOp);
22676 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22677 VecOp.getOperand(0).getValueType() == ExtVT &&
22678 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
22679 // Don't duplicate a load with other uses.
22680 if (!VecOp.hasOneUse())
22681 return SDValue();
22682
22683 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
22684 }
22685 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
22686 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
22687 // =>
22688 // (load $addr+1*size)
22689
22690 // Don't duplicate a load with other uses.
22691 if (!VecOp.hasOneUse())
22692 return SDValue();
22693
22694 // If the bit convert changed the number of elements, it is unsafe
22695 // to examine the mask.
22696 if (BCNumEltsChanged)
22697 return SDValue();
22698
22699 // Select the input vector, guarding against out of range extract vector.
22700 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
22701 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
22702
22703 if (VecOp.getOpcode() == ISD::BITCAST) {
22704 // Don't duplicate a load with other uses.
22705 if (!VecOp.hasOneUse())
22706 return SDValue();
22707
22708 VecOp = VecOp.getOperand(0);
22709 }
22710 if (ISD::isNormalLoad(VecOp.getNode())) {
22711 LN0 = cast<LoadSDNode>(VecOp);
22712 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
22713 Index = DAG.getConstant(Elt, DL, Index.getValueType());
22714 }
22715 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
22716 VecVT.getVectorElementType() == ScalarVT &&
22717 (!LegalTypes ||
22718 TLI.isTypeLegal(
22720 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
22721 // -> extract_vector_elt a, 0
22722 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
22723 // -> extract_vector_elt a, 1
22724 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
22725 // -> extract_vector_elt b, 0
22726 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
22727 // -> extract_vector_elt b, 1
22728 EVT ConcatVT = VecOp.getOperand(0).getValueType();
22729 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
22730 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, DL,
22731 Index.getValueType());
22732
22733 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
22735 ConcatVT.getVectorElementType(),
22736 ConcatOp, NewIdx);
22737 return DAG.getNode(ISD::BITCAST, DL, ScalarVT, Elt);
22738 }
22739
22740 // Make sure we found a non-volatile load and the extractelement is
22741 // the only use.
22742 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
22743 return SDValue();
22744
22745 // If Idx was -1 above, Elt is going to be -1, so just return undef.
22746 if (Elt == -1)
22747 return DAG.getUNDEF(LVT);
22748
22749 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
22750}
22751
22752// Simplify (build_vec (ext )) to (bitcast (build_vec ))
22753SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
22754 // We perform this optimization post type-legalization because
22755 // the type-legalizer often scalarizes integer-promoted vectors.
22756 // Performing this optimization before may create bit-casts which
22757 // will be type-legalized to complex code sequences.
22758 // We perform this optimization only before the operation legalizer because we
22759 // may introduce illegal operations.
22760 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22761 return SDValue();
22762
22763 unsigned NumInScalars = N->getNumOperands();
22764 SDLoc DL(N);
22765 EVT VT = N->getValueType(0);
22766
22767 // Check to see if this is a BUILD_VECTOR of a bunch of values
22768 // which come from any_extend or zero_extend nodes. If so, we can create
22769 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
22770 // optimizations. We do not handle sign-extend because we can't fill the sign
22771 // using shuffles.
22772 EVT SourceType = MVT::Other;
22773 bool AllAnyExt = true;
22774
22775 for (unsigned i = 0; i != NumInScalars; ++i) {
22776 SDValue In = N->getOperand(i);
22777 // Ignore undef inputs.
22778 if (In.isUndef()) continue;
22779
22780 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
22781 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
22782
22783 // Abort if the element is not an extension.
22784 if (!ZeroExt && !AnyExt) {
22785 SourceType = MVT::Other;
22786 break;
22787 }
22788
22789 // The input is a ZeroExt or AnyExt. Check the original type.
22790 EVT InTy = In.getOperand(0).getValueType();
22791
22792 // Check that all of the widened source types are the same.
22793 if (SourceType == MVT::Other)
22794 // First time.
22795 SourceType = InTy;
22796 else if (InTy != SourceType) {
22797 // Multiple income types. Abort.
22798 SourceType = MVT::Other;
22799 break;
22800 }
22801
22802 // Check if all of the extends are ANY_EXTENDs.
22803 AllAnyExt &= AnyExt;
22804 }
22805
22806 // In order to have valid types, all of the inputs must be extended from the
22807 // same source type and all of the inputs must be any or zero extend.
22808 // Scalar sizes must be a power of two.
22809 EVT OutScalarTy = VT.getScalarType();
22810 bool ValidTypes =
22811 SourceType != MVT::Other &&
22812 llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
22813 llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
22814
22815 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
22816 // turn into a single shuffle instruction.
22817 if (!ValidTypes)
22818 return SDValue();
22819
22820 // If we already have a splat buildvector, then don't fold it if it means
22821 // introducing zeros.
22822 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
22823 return SDValue();
22824
22825 bool isLE = DAG.getDataLayout().isLittleEndian();
22826 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
22827 assert(ElemRatio > 1 && "Invalid element size ratio");
22828 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
22829 DAG.getConstant(0, DL, SourceType);
22830
22831 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
22832 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
22833
22834 // Populate the new build_vector
22835 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22836 SDValue Cast = N->getOperand(i);
22837 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
22838 Cast.getOpcode() == ISD::ZERO_EXTEND ||
22839 Cast.isUndef()) && "Invalid cast opcode");
22840 SDValue In;
22841 if (Cast.isUndef())
22842 In = DAG.getUNDEF(SourceType);
22843 else
22844 In = Cast->getOperand(0);
22845 unsigned Index = isLE ? (i * ElemRatio) :
22846 (i * ElemRatio + (ElemRatio - 1));
22847
22848 assert(Index < Ops.size() && "Invalid index");
22849 Ops[Index] = In;
22850 }
22851
22852 // The type of the new BUILD_VECTOR node.
22853 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
22854 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
22855 "Invalid vector size");
22856 // Check if the new vector type is legal.
22857 if (!isTypeLegal(VecVT) ||
22858 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
22860 return SDValue();
22861
22862 // Make the new BUILD_VECTOR.
22863 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
22864
22865 // The new BUILD_VECTOR node has the potential to be further optimized.
22866 AddToWorklist(BV.getNode());
22867 // Bitcast to the desired type.
22868 return DAG.getBitcast(VT, BV);
22869}
22870
22871// Simplify (build_vec (trunc $1)
22872// (trunc (srl $1 half-width))
22873// (trunc (srl $1 (2 * half-width))))
22874// to (bitcast $1)
22875SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
22876 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22877
22878 EVT VT = N->getValueType(0);
22879
22880 // Don't run this before LegalizeTypes if VT is legal.
22881 // Targets may have other preferences.
22882 if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
22883 return SDValue();
22884
22885 // Only for little endian
22886 if (!DAG.getDataLayout().isLittleEndian())
22887 return SDValue();
22888
22889 SDLoc DL(N);
22890 EVT OutScalarTy = VT.getScalarType();
22891 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
22892
22893 // Only for power of two types to be sure that bitcast works well
22894 if (!isPowerOf2_64(ScalarTypeBitsize))
22895 return SDValue();
22896
22897 unsigned NumInScalars = N->getNumOperands();
22898
22899 // Look through bitcasts
22900 auto PeekThroughBitcast = [](SDValue Op) {
22901 if (Op.getOpcode() == ISD::BITCAST)
22902 return Op.getOperand(0);
22903 return Op;
22904 };
22905
22906 // The source value where all the parts are extracted.
22907 SDValue Src;
22908 for (unsigned i = 0; i != NumInScalars; ++i) {
22909 SDValue In = PeekThroughBitcast(N->getOperand(i));
22910 // Ignore undef inputs.
22911 if (In.isUndef()) continue;
22912
22913 if (In.getOpcode() != ISD::TRUNCATE)
22914 return SDValue();
22915
22916 In = PeekThroughBitcast(In.getOperand(0));
22917
22918 if (In.getOpcode() != ISD::SRL) {
22919 // For now only build_vec without shuffling, handle shifts here in the
22920 // future.
22921 if (i != 0)
22922 return SDValue();
22923
22924 Src = In;
22925 } else {
22926 // In is SRL
22927 SDValue part = PeekThroughBitcast(In.getOperand(0));
22928
22929 if (!Src) {
22930 Src = part;
22931 } else if (Src != part) {
22932 // Vector parts do not stem from the same variable
22933 return SDValue();
22934 }
22935
22936 SDValue ShiftAmtVal = In.getOperand(1);
22937 if (!isa<ConstantSDNode>(ShiftAmtVal))
22938 return SDValue();
22939
22940 uint64_t ShiftAmt = In.getConstantOperandVal(1);
22941
22942 // The extracted value is not extracted at the right position
22943 if (ShiftAmt != i * ScalarTypeBitsize)
22944 return SDValue();
22945 }
22946 }
22947
22948 // Only cast if the size is the same
22949 if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
22950 return SDValue();
22951
22952 return DAG.getBitcast(VT, Src);
22953}
22954
22955SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
22956 ArrayRef<int> VectorMask,
22957 SDValue VecIn1, SDValue VecIn2,
22958 unsigned LeftIdx, bool DidSplitVec) {
22959 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
22960
22961 EVT VT = N->getValueType(0);
22962 EVT InVT1 = VecIn1.getValueType();
22963 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
22964
22965 unsigned NumElems = VT.getVectorNumElements();
22966 unsigned ShuffleNumElems = NumElems;
22967
22968 // If we artificially split a vector in two already, then the offsets in the
22969 // operands will all be based off of VecIn1, even those in VecIn2.
22970 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
22971
22972 uint64_t VTSize = VT.getFixedSizeInBits();
22973 uint64_t InVT1Size = InVT1.getFixedSizeInBits();
22974 uint64_t InVT2Size = InVT2.getFixedSizeInBits();
22975
22976 assert(InVT2Size <= InVT1Size &&
22977 "Inputs must be sorted to be in non-increasing vector size order.");
22978
22979 // We can't generate a shuffle node with mismatched input and output types.
22980 // Try to make the types match the type of the output.
22981 if (InVT1 != VT || InVT2 != VT) {
22982 if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
22983 // If the output vector length is a multiple of both input lengths,
22984 // we can concatenate them and pad the rest with undefs.
22985 unsigned NumConcats = VTSize / InVT1Size;
22986 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
22987 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
22988 ConcatOps[0] = VecIn1;
22989 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
22990 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22991 VecIn2 = SDValue();
22992 } else if (InVT1Size == VTSize * 2) {
22993 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
22994 return SDValue();
22995
22996 if (!VecIn2.getNode()) {
22997 // If we only have one input vector, and it's twice the size of the
22998 // output, split it in two.
22999 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
23000 DAG.getVectorIdxConstant(NumElems, DL));
23001 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
23002 // Since we now have shorter input vectors, adjust the offset of the
23003 // second vector's start.
23004 Vec2Offset = NumElems;
23005 } else {
23006 assert(InVT2Size <= InVT1Size &&
23007 "Second input is not going to be larger than the first one.");
23008
23009 // VecIn1 is wider than the output, and we have another, possibly
23010 // smaller input. Pad the smaller input with undefs, shuffle at the
23011 // input vector width, and extract the output.
23012 // The shuffle type is different than VT, so check legality again.
23013 if (LegalOperations &&
23015 return SDValue();
23016
23017 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
23018 // lower it back into a BUILD_VECTOR. So if the inserted type is
23019 // illegal, don't even try.
23020 if (InVT1 != InVT2) {
23021 if (!TLI.isTypeLegal(InVT2))
23022 return SDValue();
23023 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23024 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23025 }
23026 ShuffleNumElems = NumElems * 2;
23027 }
23028 } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
23029 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
23030 ConcatOps[0] = VecIn2;
23031 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23032 } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
23033 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
23034 !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
23035 return SDValue();
23036 // If dest vector has less than two elements, then use shuffle and extract
23037 // from larger regs will cost even more.
23038 if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
23039 return SDValue();
23040 assert(InVT2Size <= InVT1Size &&
23041 "Second input is not going to be larger than the first one.");
23042
23043 // VecIn1 is wider than the output, and we have another, possibly
23044 // smaller input. Pad the smaller input with undefs, shuffle at the
23045 // input vector width, and extract the output.
23046 // The shuffle type is different than VT, so check legality again.
23047 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
23048 return SDValue();
23049
23050 if (InVT1 != InVT2) {
23051 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
23052 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
23053 }
23054 ShuffleNumElems = InVT1Size / VTSize * NumElems;
23055 } else {
23056 // TODO: Support cases where the length mismatch isn't exactly by a
23057 // factor of 2.
23058 // TODO: Move this check upwards, so that if we have bad type
23059 // mismatches, we don't create any DAG nodes.
23060 return SDValue();
23061 }
23062 }
23063
23064 // Initialize mask to undef.
23065 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
23066
23067 // Only need to run up to the number of elements actually used, not the
23068 // total number of elements in the shuffle - if we are shuffling a wider
23069 // vector, the high lanes should be set to undef.
23070 for (unsigned i = 0; i != NumElems; ++i) {
23071 if (VectorMask[i] <= 0)
23072 continue;
23073
23074 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
23075 if (VectorMask[i] == (int)LeftIdx) {
23076 Mask[i] = ExtIndex;
23077 } else if (VectorMask[i] == (int)LeftIdx + 1) {
23078 Mask[i] = Vec2Offset + ExtIndex;
23079 }
23080 }
23081
23082 // The type the input vectors may have changed above.
23083 InVT1 = VecIn1.getValueType();
23084
23085 // If we already have a VecIn2, it should have the same type as VecIn1.
23086 // If we don't, get an undef/zero vector of the appropriate type.
23087 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
23088 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
23089
23090 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
23091 if (ShuffleNumElems > NumElems)
23092 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
23093
23094 return Shuffle;
23095}
23096
23098 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
23099
23100 // First, determine where the build vector is not undef.
23101 // TODO: We could extend this to handle zero elements as well as undefs.
23102 int NumBVOps = BV->getNumOperands();
23103 int ZextElt = -1;
23104 for (int i = 0; i != NumBVOps; ++i) {
23105 SDValue Op = BV->getOperand(i);
23106 if (Op.isUndef())
23107 continue;
23108 if (ZextElt == -1)
23109 ZextElt = i;
23110 else
23111 return SDValue();
23112 }
23113 // Bail out if there's no non-undef element.
23114 if (ZextElt == -1)
23115 return SDValue();
23116
23117 // The build vector contains some number of undef elements and exactly
23118 // one other element. That other element must be a zero-extended scalar
23119 // extracted from a vector at a constant index to turn this into a shuffle.
23120 // Also, require that the build vector does not implicitly truncate/extend
23121 // its elements.
23122 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
23123 EVT VT = BV->getValueType(0);
23124 SDValue Zext = BV->getOperand(ZextElt);
23125 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
23127 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
23129 return SDValue();
23130
23131 // The zero-extend must be a multiple of the source size, and we must be
23132 // building a vector of the same size as the source of the extract element.
23133 SDValue Extract = Zext.getOperand(0);
23134 unsigned DestSize = Zext.getValueSizeInBits();
23135 unsigned SrcSize = Extract.getValueSizeInBits();
23136 if (DestSize % SrcSize != 0 ||
23137 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
23138 return SDValue();
23139
23140 // Create a shuffle mask that will combine the extracted element with zeros
23141 // and undefs.
23142 int ZextRatio = DestSize / SrcSize;
23143 int NumMaskElts = NumBVOps * ZextRatio;
23144 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
23145 for (int i = 0; i != NumMaskElts; ++i) {
23146 if (i / ZextRatio == ZextElt) {
23147 // The low bits of the (potentially translated) extracted element map to
23148 // the source vector. The high bits map to zero. We will use a zero vector
23149 // as the 2nd source operand of the shuffle, so use the 1st element of
23150 // that vector (mask value is number-of-elements) for the high bits.
23151 int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
23152 ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
23153 : NumMaskElts;
23154 }
23155
23156 // Undef elements of the build vector remain undef because we initialize
23157 // the shuffle mask with -1.
23158 }
23159
23160 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
23161 // bitcast (shuffle V, ZeroVec, VectorMask)
23162 SDLoc DL(BV);
23163 EVT VecVT = Extract.getOperand(0).getValueType();
23164 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
23165 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23166 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
23167 ZeroVec, ShufMask, DAG);
23168 if (!Shuf)
23169 return SDValue();
23170 return DAG.getBitcast(VT, Shuf);
23171}
23172
23173// FIXME: promote to STLExtras.
23174template <typename R, typename T>
23175static auto getFirstIndexOf(R &&Range, const T &Val) {
23176 auto I = find(Range, Val);
23177 if (I == Range.end())
23178 return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
23179 return std::distance(Range.begin(), I);
23180}
23181
23182// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
23183// operations. If the types of the vectors we're extracting from allow it,
23184// turn this into a vector_shuffle node.
23185SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
23186 SDLoc DL(N);
23187 EVT VT = N->getValueType(0);
23188
23189 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
23190 if (!isTypeLegal(VT))
23191 return SDValue();
23192
23194 return V;
23195
23196 // May only combine to shuffle after legalize if shuffle is legal.
23197 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
23198 return SDValue();
23199
23200 bool UsesZeroVector = false;
23201 unsigned NumElems = N->getNumOperands();
23202
23203 // Record, for each element of the newly built vector, which input vector
23204 // that element comes from. -1 stands for undef, 0 for the zero vector,
23205 // and positive values for the input vectors.
23206 // VectorMask maps each element to its vector number, and VecIn maps vector
23207 // numbers to their initial SDValues.
23208
23209 SmallVector<int, 8> VectorMask(NumElems, -1);
23211 VecIn.push_back(SDValue());
23212
23213 for (unsigned i = 0; i != NumElems; ++i) {
23214 SDValue Op = N->getOperand(i);
23215
23216 if (Op.isUndef())
23217 continue;
23218
23219 // See if we can use a blend with a zero vector.
23220 // TODO: Should we generalize this to a blend with an arbitrary constant
23221 // vector?
23223 UsesZeroVector = true;
23224 VectorMask[i] = 0;
23225 continue;
23226 }
23227
23228 // Not an undef or zero. If the input is something other than an
23229 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
23230 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
23231 !isa<ConstantSDNode>(Op.getOperand(1)))
23232 return SDValue();
23233 SDValue ExtractedFromVec = Op.getOperand(0);
23234
23235 if (ExtractedFromVec.getValueType().isScalableVector())
23236 return SDValue();
23237
23238 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
23239 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
23240 return SDValue();
23241
23242 // All inputs must have the same element type as the output.
23243 if (VT.getVectorElementType() !=
23244 ExtractedFromVec.getValueType().getVectorElementType())
23245 return SDValue();
23246
23247 // Have we seen this input vector before?
23248 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23249 // a map back from SDValues to numbers isn't worth it.
23250 int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23251 if (Idx == -1) { // A new source vector?
23252 Idx = VecIn.size();
23253 VecIn.push_back(ExtractedFromVec);
23254 }
23255
23256 VectorMask[i] = Idx;
23257 }
23258
23259 // If we didn't find at least one input vector, bail out.
23260 if (VecIn.size() < 2)
23261 return SDValue();
23262
23263 // If all the Operands of BUILD_VECTOR extract from same
23264 // vector, then split the vector efficiently based on the maximum
23265 // vector access index and adjust the VectorMask and
23266 // VecIn accordingly.
23267 bool DidSplitVec = false;
23268 if (VecIn.size() == 2) {
23269 unsigned MaxIndex = 0;
23270 unsigned NearestPow2 = 0;
23271 SDValue Vec = VecIn.back();
23272 EVT InVT = Vec.getValueType();
23273 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23274
23275 for (unsigned i = 0; i < NumElems; i++) {
23276 if (VectorMask[i] <= 0)
23277 continue;
23278 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23279 IndexVec[i] = Index;
23280 MaxIndex = std::max(MaxIndex, Index);
23281 }
23282
23283 NearestPow2 = PowerOf2Ceil(MaxIndex);
23284 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23285 NumElems * 2 < NearestPow2) {
23286 unsigned SplitSize = NearestPow2 / 2;
23287 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23288 InVT.getVectorElementType(), SplitSize);
23289 if (TLI.isTypeLegal(SplitVT) &&
23290 SplitSize + SplitVT.getVectorNumElements() <=
23291 InVT.getVectorNumElements()) {
23292 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23293 DAG.getVectorIdxConstant(SplitSize, DL));
23294 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23295 DAG.getVectorIdxConstant(0, DL));
23296 VecIn.pop_back();
23297 VecIn.push_back(VecIn1);
23298 VecIn.push_back(VecIn2);
23299 DidSplitVec = true;
23300
23301 for (unsigned i = 0; i < NumElems; i++) {
23302 if (VectorMask[i] <= 0)
23303 continue;
23304 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23305 }
23306 }
23307 }
23308 }
23309
23310 // Sort input vectors by decreasing vector element count,
23311 // while preserving the relative order of equally-sized vectors.
23312 // Note that we keep the first "implicit zero vector as-is.
23313 SmallVector<SDValue, 8> SortedVecIn(VecIn);
23314 llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23315 [](const SDValue &a, const SDValue &b) {
23316 return a.getValueType().getVectorNumElements() >
23317 b.getValueType().getVectorNumElements();
23318 });
23319
23320 // We now also need to rebuild the VectorMask, because it referenced element
23321 // order in VecIn, and we just sorted them.
23322 for (int &SourceVectorIndex : VectorMask) {
23323 if (SourceVectorIndex <= 0)
23324 continue;
23325 unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23326 assert(Idx > 0 && Idx < SortedVecIn.size() &&
23327 VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23328 SourceVectorIndex = Idx;
23329 }
23330
23331 VecIn = std::move(SortedVecIn);
23332
23333 // TODO: Should this fire if some of the input vectors has illegal type (like
23334 // it does now), or should we let legalization run its course first?
23335
23336 // Shuffle phase:
23337 // Take pairs of vectors, and shuffle them so that the result has elements
23338 // from these vectors in the correct places.
23339 // For example, given:
23340 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23341 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23342 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23343 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23344 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23345 // We will generate:
23346 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23347 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23348 SmallVector<SDValue, 4> Shuffles;
23349 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23350 unsigned LeftIdx = 2 * In + 1;
23351 SDValue VecLeft = VecIn[LeftIdx];
23352 SDValue VecRight =
23353 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23354
23355 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23356 VecRight, LeftIdx, DidSplitVec))
23357 Shuffles.push_back(Shuffle);
23358 else
23359 return SDValue();
23360 }
23361
23362 // If we need the zero vector as an "ingredient" in the blend tree, add it
23363 // to the list of shuffles.
23364 if (UsesZeroVector)
23365 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23366 : DAG.getConstantFP(0.0, DL, VT));
23367
23368 // If we only have one shuffle, we're done.
23369 if (Shuffles.size() == 1)
23370 return Shuffles[0];
23371
23372 // Update the vector mask to point to the post-shuffle vectors.
23373 for (int &Vec : VectorMask)
23374 if (Vec == 0)
23375 Vec = Shuffles.size() - 1;
23376 else
23377 Vec = (Vec - 1) / 2;
23378
23379 // More than one shuffle. Generate a binary tree of blends, e.g. if from
23380 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23381 // generate:
23382 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23383 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23384 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23385 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23386 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23387 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23388 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23389
23390 // Make sure the initial size of the shuffle list is even.
23391 if (Shuffles.size() % 2)
23392 Shuffles.push_back(DAG.getUNDEF(VT));
23393
23394 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23395 if (CurSize % 2) {
23396 Shuffles[CurSize] = DAG.getUNDEF(VT);
23397 CurSize++;
23398 }
23399 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23400 int Left = 2 * In;
23401 int Right = 2 * In + 1;
23402 SmallVector<int, 8> Mask(NumElems, -1);
23403 SDValue L = Shuffles[Left];
23404 ArrayRef<int> LMask;
23405 bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
23406 L.use_empty() && L.getOperand(1).isUndef() &&
23407 L.getOperand(0).getValueType() == L.getValueType();
23408 if (IsLeftShuffle) {
23409 LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
23410 L = L.getOperand(0);
23411 }
23412 SDValue R = Shuffles[Right];
23413 ArrayRef<int> RMask;
23414 bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
23415 R.use_empty() && R.getOperand(1).isUndef() &&
23416 R.getOperand(0).getValueType() == R.getValueType();
23417 if (IsRightShuffle) {
23418 RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
23419 R = R.getOperand(0);
23420 }
23421 for (unsigned I = 0; I != NumElems; ++I) {
23422 if (VectorMask[I] == Left) {
23423 Mask[I] = I;
23424 if (IsLeftShuffle)
23425 Mask[I] = LMask[I];
23426 VectorMask[I] = In;
23427 } else if (VectorMask[I] == Right) {
23428 Mask[I] = I + NumElems;
23429 if (IsRightShuffle)
23430 Mask[I] = RMask[I] + NumElems;
23431 VectorMask[I] = In;
23432 }
23433 }
23434
23435 Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
23436 }
23437 }
23438 return Shuffles[0];
23439}
23440
23441// Try to turn a build vector of zero extends of extract vector elts into a
23442// a vector zero extend and possibly an extract subvector.
23443// TODO: Support sign extend?
23444// TODO: Allow undef elements?
23445SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
23446 if (LegalOperations)
23447 return SDValue();
23448
23449 EVT VT = N->getValueType(0);
23450
23451 bool FoundZeroExtend = false;
23452 SDValue Op0 = N->getOperand(0);
23453 auto checkElem = [&](SDValue Op) -> int64_t {
23454 unsigned Opc = Op.getOpcode();
23455 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
23456 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
23457 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23458 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
23459 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
23460 return C->getZExtValue();
23461 return -1;
23462 };
23463
23464 // Make sure the first element matches
23465 // (zext (extract_vector_elt X, C))
23466 // Offset must be a constant multiple of the
23467 // known-minimum vector length of the result type.
23468 int64_t Offset = checkElem(Op0);
23469 if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
23470 return SDValue();
23471
23472 unsigned NumElems = N->getNumOperands();
23473 SDValue In = Op0.getOperand(0).getOperand(0);
23474 EVT InSVT = In.getValueType().getScalarType();
23475 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
23476
23477 // Don't create an illegal input type after type legalization.
23478 if (LegalTypes && !TLI.isTypeLegal(InVT))
23479 return SDValue();
23480
23481 // Ensure all the elements come from the same vector and are adjacent.
23482 for (unsigned i = 1; i != NumElems; ++i) {
23483 if ((Offset + i) != checkElem(N->getOperand(i)))
23484 return SDValue();
23485 }
23486
23487 SDLoc DL(N);
23488 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
23489 Op0.getOperand(0).getOperand(1));
23490 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
23491 VT, In);
23492}
23493
23494// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
23495// and all other elements being constant zero's, granularize the BUILD_VECTOR's
23496// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
23497// This patten can appear during legalization.
23498//
23499// NOTE: This can be generalized to allow more than a single
23500// non-constant-zero op, UNDEF's, and to be KnownBits-based,
23501SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
23502 // Don't run this after legalization. Targets may have other preferences.
23503 if (Level >= AfterLegalizeDAG)
23504 return SDValue();
23505
23506 // FIXME: support big-endian.
23507 if (DAG.getDataLayout().isBigEndian())
23508 return SDValue();
23509
23510 EVT VT = N->getValueType(0);
23511 EVT OpVT = N->getOperand(0).getValueType();
23512 assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
23513
23514 EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23515
23516 if (!TLI.isTypeLegal(OpIntVT) ||
23517 (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
23518 return SDValue();
23519
23520 unsigned EltBitwidth = VT.getScalarSizeInBits();
23521 // NOTE: the actual width of operands may be wider than that!
23522
23523 // Analyze all operands of this BUILD_VECTOR. What is the largest number of
23524 // active bits they all have? We'll want to truncate them all to that width.
23525 unsigned ActiveBits = 0;
23526 APInt KnownZeroOps(VT.getVectorNumElements(), 0);
23527 for (auto I : enumerate(N->ops())) {
23528 SDValue Op = I.value();
23529 // FIXME: support UNDEF elements?
23530 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
23531 unsigned OpActiveBits =
23532 Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
23533 if (OpActiveBits == 0) {
23534 KnownZeroOps.setBit(I.index());
23535 continue;
23536 }
23537 // Profitability check: don't allow non-zero constant operands.
23538 return SDValue();
23539 }
23540 // Profitability check: there must only be a single non-zero operand,
23541 // and it must be the first operand of the BUILD_VECTOR.
23542 if (I.index() != 0)
23543 return SDValue();
23544 // The operand must be a zero-extension itself.
23545 // FIXME: this could be generalized to known leading zeros check.
23546 if (Op.getOpcode() != ISD::ZERO_EXTEND)
23547 return SDValue();
23548 unsigned CurrActiveBits =
23549 Op.getOperand(0).getValueSizeInBits().getFixedValue();
23550 assert(!ActiveBits && "Already encountered non-constant-zero operand?");
23551 ActiveBits = CurrActiveBits;
23552 // We want to at least halve the element size.
23553 if (2 * ActiveBits > EltBitwidth)
23554 return SDValue();
23555 }
23556
23557 // This BUILD_VECTOR must have at least one non-constant-zero operand.
23558 if (ActiveBits == 0)
23559 return SDValue();
23560
23561 // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
23562 // into how many chunks can we split our element width?
23563 EVT NewScalarIntVT, NewIntVT;
23564 std::optional<unsigned> Factor;
23565 // We can split the element into at least two chunks, but not into more
23566 // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
23567 // for which the element width is a multiple of it,
23568 // and the resulting types/operations on that chunk width are legal.
23569 assert(2 * ActiveBits <= EltBitwidth &&
23570 "We know that half or less bits of the element are active.");
23571 for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
23572 if (EltBitwidth % Scale != 0)
23573 continue;
23574 unsigned ChunkBitwidth = EltBitwidth / Scale;
23575 assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
23576 NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
23577 NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
23578 Scale * N->getNumOperands());
23579 if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
23580 (LegalOperations &&
23581 !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
23583 continue;
23584 Factor = Scale;
23585 break;
23586 }
23587 if (!Factor)
23588 return SDValue();
23589
23590 SDLoc DL(N);
23591 SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
23592
23593 // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
23595 NewOps.reserve(NewIntVT.getVectorNumElements());
23596 for (auto I : enumerate(N->ops())) {
23597 SDValue Op = I.value();
23598 assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
23599 unsigned SrcOpIdx = I.index();
23600 if (KnownZeroOps[SrcOpIdx]) {
23601 NewOps.append(*Factor, ZeroOp);
23602 continue;
23603 }
23604 Op = DAG.getBitcast(OpIntVT, Op);
23605 Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
23606 NewOps.emplace_back(Op);
23607 NewOps.append(*Factor - 1, ZeroOp);
23608 }
23609 assert(NewOps.size() == NewIntVT.getVectorNumElements());
23610 SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
23611 NewBV = DAG.getBitcast(VT, NewBV);
23612 return NewBV;
23613}
23614
23615SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
23616 EVT VT = N->getValueType(0);
23617
23618 // A vector built entirely of undefs is undef.
23620 return DAG.getUNDEF(VT);
23621
23622 // If this is a splat of a bitcast from another vector, change to a
23623 // concat_vector.
23624 // For example:
23625 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
23626 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
23627 //
23628 // If X is a build_vector itself, the concat can become a larger build_vector.
23629 // TODO: Maybe this is useful for non-splat too?
23630 if (!LegalOperations) {
23631 SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue();
23632 // Only change build_vector to a concat_vector if the splat value type is
23633 // same as the vector element type.
23634 if (Splat && Splat.getValueType() == VT.getVectorElementType()) {
23636 EVT SrcVT = Splat.getValueType();
23637 if (SrcVT.isVector()) {
23638 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
23639 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
23640 SrcVT.getVectorElementType(), NumElts);
23641 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
23642 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
23643 SDValue Concat =
23644 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), NewVT, Ops);
23645 return DAG.getBitcast(VT, Concat);
23646 }
23647 }
23648 }
23649 }
23650
23651 // Check if we can express BUILD VECTOR via subvector extract.
23652 if (!LegalTypes && (N->getNumOperands() > 1)) {
23653 SDValue Op0 = N->getOperand(0);
23654 auto checkElem = [&](SDValue Op) -> uint64_t {
23655 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
23656 (Op0.getOperand(0) == Op.getOperand(0)))
23657 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
23658 return CNode->getZExtValue();
23659 return -1;
23660 };
23661
23662 int Offset = checkElem(Op0);
23663 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
23664 if (Offset + i != checkElem(N->getOperand(i))) {
23665 Offset = -1;
23666 break;
23667 }
23668 }
23669
23670 if ((Offset == 0) &&
23671 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
23672 return Op0.getOperand(0);
23673 if ((Offset != -1) &&
23674 ((Offset % N->getValueType(0).getVectorNumElements()) ==
23675 0)) // IDX must be multiple of output size.
23676 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
23677 Op0.getOperand(0), Op0.getOperand(1));
23678 }
23679
23680 if (SDValue V = convertBuildVecZextToZext(N))
23681 return V;
23682
23683 if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
23684 return V;
23685
23686 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
23687 return V;
23688
23689 if (SDValue V = reduceBuildVecTruncToBitCast(N))
23690 return V;
23691
23692 if (SDValue V = reduceBuildVecToShuffle(N))
23693 return V;
23694
23695 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
23696 // Do this late as some of the above may replace the splat.
23698 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23699 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
23700 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
23701 }
23702
23703 return SDValue();
23704}
23705
23707 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23708 EVT OpVT = N->getOperand(0).getValueType();
23709
23710 // If the operands are legal vectors, leave them alone.
23711 if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
23712 return SDValue();
23713
23714 SDLoc DL(N);
23715 EVT VT = N->getValueType(0);
23717 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23718
23719 // Keep track of what we encounter.
23720 bool AnyInteger = false;
23721 bool AnyFP = false;
23722 for (const SDValue &Op : N->ops()) {
23723 if (ISD::BITCAST == Op.getOpcode() &&
23724 !Op.getOperand(0).getValueType().isVector())
23725 Ops.push_back(Op.getOperand(0));
23726 else if (ISD::UNDEF == Op.getOpcode())
23727 Ops.push_back(DAG.getNode(ISD::UNDEF, DL, SVT));
23728 else
23729 return SDValue();
23730
23731 // Note whether we encounter an integer or floating point scalar.
23732 // If it's neither, bail out, it could be something weird like x86mmx.
23733 EVT LastOpVT = Ops.back().getValueType();
23734 if (LastOpVT.isFloatingPoint())
23735 AnyFP = true;
23736 else if (LastOpVT.isInteger())
23737 AnyInteger = true;
23738 else
23739 return SDValue();
23740 }
23741
23742 // If any of the operands is a floating point scalar bitcast to a vector,
23743 // use floating point types throughout, and bitcast everything.
23744 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
23745 if (AnyFP) {
23747 if (AnyInteger) {
23748 for (SDValue &Op : Ops) {
23749 if (Op.getValueType() == SVT)
23750 continue;
23751 if (Op.isUndef())
23752 Op = DAG.getNode(ISD::UNDEF, DL, SVT);
23753 else
23754 Op = DAG.getBitcast(SVT, Op);
23755 }
23756 }
23757 }
23758
23759 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
23760 VT.getSizeInBits() / SVT.getSizeInBits());
23761 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
23762}
23763
23764// Attempt to merge nested concat_vectors/undefs.
23765// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
23766// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
23768 SelectionDAG &DAG) {
23769 EVT VT = N->getValueType(0);
23770
23771 // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
23772 EVT SubVT;
23773 SDValue FirstConcat;
23774 for (const SDValue &Op : N->ops()) {
23775 if (Op.isUndef())
23776 continue;
23777 if (Op.getOpcode() != ISD::CONCAT_VECTORS)
23778 return SDValue();
23779 if (!FirstConcat) {
23780 SubVT = Op.getOperand(0).getValueType();
23781 if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
23782 return SDValue();
23783 FirstConcat = Op;
23784 continue;
23785 }
23786 if (SubVT != Op.getOperand(0).getValueType())
23787 return SDValue();
23788 }
23789 assert(FirstConcat && "Concat of all-undefs found");
23790
23791 SmallVector<SDValue> ConcatOps;
23792 for (const SDValue &Op : N->ops()) {
23793 if (Op.isUndef()) {
23794 ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
23795 continue;
23796 }
23797 ConcatOps.append(Op->op_begin(), Op->op_end());
23798 }
23799 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
23800}
23801
23802// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
23803// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
23804// most two distinct vectors the same size as the result, attempt to turn this
23805// into a legal shuffle.
23807 EVT VT = N->getValueType(0);
23808 EVT OpVT = N->getOperand(0).getValueType();
23809
23810 // We currently can't generate an appropriate shuffle for a scalable vector.
23811 if (VT.isScalableVector())
23812 return SDValue();
23813
23814 int NumElts = VT.getVectorNumElements();
23815 int NumOpElts = OpVT.getVectorNumElements();
23816
23817 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
23819
23820 for (SDValue Op : N->ops()) {
23822
23823 // UNDEF nodes convert to UNDEF shuffle mask values.
23824 if (Op.isUndef()) {
23825 Mask.append((unsigned)NumOpElts, -1);
23826 continue;
23827 }
23828
23829 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23830 return SDValue();
23831
23832 // What vector are we extracting the subvector from and at what index?
23833 SDValue ExtVec = Op.getOperand(0);
23834 int ExtIdx = Op.getConstantOperandVal(1);
23835
23836 // We want the EVT of the original extraction to correctly scale the
23837 // extraction index.
23838 EVT ExtVT = ExtVec.getValueType();
23839 ExtVec = peekThroughBitcasts(ExtVec);
23840
23841 // UNDEF nodes convert to UNDEF shuffle mask values.
23842 if (ExtVec.isUndef()) {
23843 Mask.append((unsigned)NumOpElts, -1);
23844 continue;
23845 }
23846
23847 // Ensure that we are extracting a subvector from a vector the same
23848 // size as the result.
23849 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
23850 return SDValue();
23851
23852 // Scale the subvector index to account for any bitcast.
23853 int NumExtElts = ExtVT.getVectorNumElements();
23854 if (0 == (NumExtElts % NumElts))
23855 ExtIdx /= (NumExtElts / NumElts);
23856 else if (0 == (NumElts % NumExtElts))
23857 ExtIdx *= (NumElts / NumExtElts);
23858 else
23859 return SDValue();
23860
23861 // At most we can reference 2 inputs in the final shuffle.
23862 if (SV0.isUndef() || SV0 == ExtVec) {
23863 SV0 = ExtVec;
23864 for (int i = 0; i != NumOpElts; ++i)
23865 Mask.push_back(i + ExtIdx);
23866 } else if (SV1.isUndef() || SV1 == ExtVec) {
23867 SV1 = ExtVec;
23868 for (int i = 0; i != NumOpElts; ++i)
23869 Mask.push_back(i + ExtIdx + NumElts);
23870 } else {
23871 return SDValue();
23872 }
23873 }
23874
23875 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23876 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
23877 DAG.getBitcast(VT, SV1), Mask, DAG);
23878}
23879
23881 unsigned CastOpcode = N->getOperand(0).getOpcode();
23882 switch (CastOpcode) {
23883 case ISD::SINT_TO_FP:
23884 case ISD::UINT_TO_FP:
23885 case ISD::FP_TO_SINT:
23886 case ISD::FP_TO_UINT:
23887 // TODO: Allow more opcodes?
23888 // case ISD::BITCAST:
23889 // case ISD::TRUNCATE:
23890 // case ISD::ZERO_EXTEND:
23891 // case ISD::SIGN_EXTEND:
23892 // case ISD::FP_EXTEND:
23893 break;
23894 default:
23895 return SDValue();
23896 }
23897
23898 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
23899 if (!SrcVT.isVector())
23900 return SDValue();
23901
23902 // All operands of the concat must be the same kind of cast from the same
23903 // source type.
23905 for (SDValue Op : N->ops()) {
23906 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
23907 Op.getOperand(0).getValueType() != SrcVT)
23908 return SDValue();
23909 SrcOps.push_back(Op.getOperand(0));
23910 }
23911
23912 // The wider cast must be supported by the target. This is unusual because
23913 // the operation support type parameter depends on the opcode. In addition,
23914 // check the other type in the cast to make sure this is really legal.
23915 EVT VT = N->getValueType(0);
23916 EVT SrcEltVT = SrcVT.getVectorElementType();
23917 ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
23918 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
23919 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23920 switch (CastOpcode) {
23921 case ISD::SINT_TO_FP:
23922 case ISD::UINT_TO_FP:
23923 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
23924 !TLI.isTypeLegal(VT))
23925 return SDValue();
23926 break;
23927 case ISD::FP_TO_SINT:
23928 case ISD::FP_TO_UINT:
23929 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
23930 !TLI.isTypeLegal(ConcatSrcVT))
23931 return SDValue();
23932 break;
23933 default:
23934 llvm_unreachable("Unexpected cast opcode");
23935 }
23936
23937 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
23938 SDLoc DL(N);
23939 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
23940 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
23941}
23942
23943// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
23944// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
23945// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
23947 SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
23948 bool LegalOperations) {
23949 EVT VT = N->getValueType(0);
23950 EVT OpVT = N->getOperand(0).getValueType();
23951 if (VT.isScalableVector())
23952 return SDValue();
23953
23954 // For now, only allow simple 2-operand concatenations.
23955 if (N->getNumOperands() != 2)
23956 return SDValue();
23957
23958 // Don't create illegal types/shuffles when not allowed to.
23959 if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
23960 (LegalOperations &&
23962 return SDValue();
23963
23964 // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
23965 // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
23966 // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
23967 // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
23968 // (4) and for now, the SHUFFLE_VECTOR must be unary.
23969 ShuffleVectorSDNode *SVN = nullptr;
23970 for (SDValue Op : N->ops()) {
23971 if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
23972 CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
23973 all_of(N->ops(), [CurSVN](SDValue Op) {
23974 // FIXME: can we allow UNDEF operands?
23975 return !Op.isUndef() &&
23976 (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
23977 })) {
23978 SVN = CurSVN;
23979 break;
23980 }
23981 }
23982 if (!SVN)
23983 return SDValue();
23984
23985 // We are going to pad the shuffle operands, so any indice, that was picking
23986 // from the second operand, must be adjusted.
23987 SmallVector<int, 16> AdjustedMask;
23988 AdjustedMask.reserve(SVN->getMask().size());
23989 assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
23990 append_range(AdjustedMask, SVN->getMask());
23991
23992 // Identity masks for the operands of the (padded) shuffle.
23993 SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
23994 MutableArrayRef<int> FirstShufOpIdentityMask =
23995 MutableArrayRef<int>(IdentityMask)
23997 MutableArrayRef<int> SecondShufOpIdentityMask =
23999 std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
24000 std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
24002
24003 // New combined shuffle mask.
24005 Mask.reserve(VT.getVectorNumElements());
24006 for (SDValue Op : N->ops()) {
24007 assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
24008 if (Op.getNode() == SVN) {
24009 append_range(Mask, AdjustedMask);
24010 continue;
24011 }
24012 if (Op == SVN->getOperand(0)) {
24013 append_range(Mask, FirstShufOpIdentityMask);
24014 continue;
24015 }
24016 if (Op == SVN->getOperand(1)) {
24017 append_range(Mask, SecondShufOpIdentityMask);
24018 continue;
24019 }
24020 llvm_unreachable("Unexpected operand!");
24021 }
24022
24023 // Don't create illegal shuffle masks.
24024 if (!TLI.isShuffleMaskLegal(Mask, VT))
24025 return SDValue();
24026
24027 // Pad the shuffle operands with UNDEF.
24028 SDLoc dl(N);
24029 std::array<SDValue, 2> ShufOps;
24030 for (auto I : zip(SVN->ops(), ShufOps)) {
24031 SDValue ShufOp = std::get<0>(I);
24032 SDValue &NewShufOp = std::get<1>(I);
24033 if (ShufOp.isUndef())
24034 NewShufOp = DAG.getUNDEF(VT);
24035 else {
24036 SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
24037 DAG.getUNDEF(OpVT));
24038 ShufOpParts[0] = ShufOp;
24039 NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
24040 }
24041 }
24042 // Finally, create the new wide shuffle.
24043 return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
24044}
24045
24046SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
24047 // If we only have one input vector, we don't need to do any concatenation.
24048 if (N->getNumOperands() == 1)
24049 return N->getOperand(0);
24050
24051 // Check if all of the operands are undefs.
24052 EVT VT = N->getValueType(0);
24054 return DAG.getUNDEF(VT);
24055
24056 // Optimize concat_vectors where all but the first of the vectors are undef.
24057 if (all_of(drop_begin(N->ops()),
24058 [](const SDValue &Op) { return Op.isUndef(); })) {
24059 SDValue In = N->getOperand(0);
24060 assert(In.getValueType().isVector() && "Must concat vectors");
24061
24062 // If the input is a concat_vectors, just make a larger concat by padding
24063 // with smaller undefs.
24064 //
24065 // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
24066 // here could cause an infinite loop. That legalizing happens when LegalDAG
24067 // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
24068 // scalable.
24069 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
24070 !(LegalDAG && In.getValueType().isScalableVector())) {
24071 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
24072 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
24073 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
24074 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24075 }
24076
24078
24079 // concat_vectors(scalar_to_vector(scalar), undef) ->
24080 // scalar_to_vector(scalar)
24081 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
24082 Scalar.hasOneUse()) {
24083 EVT SVT = Scalar.getValueType().getVectorElementType();
24084 if (SVT == Scalar.getOperand(0).getValueType())
24085 Scalar = Scalar.getOperand(0);
24086 }
24087
24088 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
24089 if (!Scalar.getValueType().isVector() && In.hasOneUse()) {
24090 // If the bitcast type isn't legal, it might be a trunc of a legal type;
24091 // look through the trunc so we can still do the transform:
24092 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
24093 if (Scalar->getOpcode() == ISD::TRUNCATE &&
24094 !TLI.isTypeLegal(Scalar.getValueType()) &&
24095 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
24096 Scalar = Scalar->getOperand(0);
24097
24098 EVT SclTy = Scalar.getValueType();
24099
24100 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
24101 return SDValue();
24102
24103 // Bail out if the vector size is not a multiple of the scalar size.
24104 if (VT.getSizeInBits() % SclTy.getSizeInBits())
24105 return SDValue();
24106
24107 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
24108 if (VNTNumElms < 2)
24109 return SDValue();
24110
24111 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
24112 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
24113 return SDValue();
24114
24115 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
24116 return DAG.getBitcast(VT, Res);
24117 }
24118 }
24119
24120 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
24121 // We have already tested above for an UNDEF only concatenation.
24122 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
24123 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
24124 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
24125 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
24126 };
24127 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
24129 EVT SVT = VT.getScalarType();
24130
24131 EVT MinVT = SVT;
24132 if (!SVT.isFloatingPoint()) {
24133 // If BUILD_VECTOR are from built from integer, they may have different
24134 // operand types. Get the smallest type and truncate all operands to it.
24135 bool FoundMinVT = false;
24136 for (const SDValue &Op : N->ops())
24137 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24138 EVT OpSVT = Op.getOperand(0).getValueType();
24139 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
24140 FoundMinVT = true;
24141 }
24142 assert(FoundMinVT && "Concat vector type mismatch");
24143 }
24144
24145 for (const SDValue &Op : N->ops()) {
24146 EVT OpVT = Op.getValueType();
24147 unsigned NumElts = OpVT.getVectorNumElements();
24148
24149 if (ISD::UNDEF == Op.getOpcode())
24150 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
24151
24152 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
24153 if (SVT.isFloatingPoint()) {
24154 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
24155 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
24156 } else {
24157 for (unsigned i = 0; i != NumElts; ++i)
24158 Opnds.push_back(
24159 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
24160 }
24161 }
24162 }
24163
24164 assert(VT.getVectorNumElements() == Opnds.size() &&
24165 "Concat vector type mismatch");
24166 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
24167 }
24168
24169 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
24170 // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
24172 return V;
24173
24174 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
24175 // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
24177 return V;
24178
24179 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
24181 return V;
24182 }
24183
24184 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
24185 return V;
24186
24188 N, DAG, TLI, LegalTypes, LegalOperations))
24189 return V;
24190
24191 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
24192 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
24193 // operands and look for a CONCAT operations that place the incoming vectors
24194 // at the exact same location.
24195 //
24196 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
24197 SDValue SingleSource = SDValue();
24198 unsigned PartNumElem =
24199 N->getOperand(0).getValueType().getVectorMinNumElements();
24200
24201 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
24202 SDValue Op = N->getOperand(i);
24203
24204 if (Op.isUndef())
24205 continue;
24206
24207 // Check if this is the identity extract:
24208 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
24209 return SDValue();
24210
24211 // Find the single incoming vector for the extract_subvector.
24212 if (SingleSource.getNode()) {
24213 if (Op.getOperand(0) != SingleSource)
24214 return SDValue();
24215 } else {
24216 SingleSource = Op.getOperand(0);
24217
24218 // Check the source type is the same as the type of the result.
24219 // If not, this concat may extend the vector, so we can not
24220 // optimize it away.
24221 if (SingleSource.getValueType() != N->getValueType(0))
24222 return SDValue();
24223 }
24224
24225 // Check that we are reading from the identity index.
24226 unsigned IdentityIndex = i * PartNumElem;
24227 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
24228 return SDValue();
24229 }
24230
24231 if (SingleSource.getNode())
24232 return SingleSource;
24233
24234 return SDValue();
24235}
24236
24237// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
24238// if the subvector can be sourced for free.
24240 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24241 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24242 return V.getOperand(1);
24243 }
24244 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24245 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24246 V.getOperand(0).getValueType() == SubVT &&
24247 (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24248 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24249 return V.getOperand(SubIdx);
24250 }
24251 return SDValue();
24252}
24253
24255 SelectionDAG &DAG,
24256 bool LegalOperations) {
24257 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24258 SDValue BinOp = Extract->getOperand(0);
24259 unsigned BinOpcode = BinOp.getOpcode();
24260 if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24261 return SDValue();
24262
24263 EVT VecVT = BinOp.getValueType();
24264 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24265 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24266 return SDValue();
24267
24268 SDValue Index = Extract->getOperand(1);
24269 EVT SubVT = Extract->getValueType(0);
24270 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24271 return SDValue();
24272
24273 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24274 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24275
24276 // TODO: We could handle the case where only 1 operand is being inserted by
24277 // creating an extract of the other operand, but that requires checking
24278 // number of uses and/or costs.
24279 if (!Sub0 || !Sub1)
24280 return SDValue();
24281
24282 // We are inserting both operands of the wide binop only to extract back
24283 // to the narrow vector size. Eliminate all of the insert/extract:
24284 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24285 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24286 BinOp->getFlags());
24287}
24288
24289/// If we are extracting a subvector produced by a wide binary operator try
24290/// to use a narrow binary operator and/or avoid concatenation and extraction.
24292 bool LegalOperations) {
24293 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24294 // some of these bailouts with other transforms.
24295
24296 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24297 return V;
24298
24299 // The extract index must be a constant, so we can map it to a concat operand.
24300 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24301 if (!ExtractIndexC)
24302 return SDValue();
24303
24304 // We are looking for an optionally bitcasted wide vector binary operator
24305 // feeding an extract subvector.
24306 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24307 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24308 unsigned BOpcode = BinOp.getOpcode();
24309 if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24310 return SDValue();
24311
24312 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24313 // reduced to the unary fneg when it is visited, and we probably want to deal
24314 // with fneg in a target-specific way.
24315 if (BOpcode == ISD::FSUB) {
24316 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24317 if (C && C->getValueAPF().isNegZero())
24318 return SDValue();
24319 }
24320
24321 // The binop must be a vector type, so we can extract some fraction of it.
24322 EVT WideBVT = BinOp.getValueType();
24323 // The optimisations below currently assume we are dealing with fixed length
24324 // vectors. It is possible to add support for scalable vectors, but at the
24325 // moment we've done no analysis to prove whether they are profitable or not.
24326 if (!WideBVT.isFixedLengthVector())
24327 return SDValue();
24328
24329 EVT VT = Extract->getValueType(0);
24330 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24331 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24332 "Extract index is not a multiple of the vector length.");
24333
24334 // Bail out if this is not a proper multiple width extraction.
24335 unsigned WideWidth = WideBVT.getSizeInBits();
24336 unsigned NarrowWidth = VT.getSizeInBits();
24337 if (WideWidth % NarrowWidth != 0)
24338 return SDValue();
24339
24340 // Bail out if we are extracting a fraction of a single operation. This can
24341 // occur because we potentially looked through a bitcast of the binop.
24342 unsigned NarrowingRatio = WideWidth / NarrowWidth;
24343 unsigned WideNumElts = WideBVT.getVectorNumElements();
24344 if (WideNumElts % NarrowingRatio != 0)
24345 return SDValue();
24346
24347 // Bail out if the target does not support a narrower version of the binop.
24348 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24349 WideNumElts / NarrowingRatio);
24350 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24351 LegalOperations))
24352 return SDValue();
24353
24354 // If extraction is cheap, we don't need to look at the binop operands
24355 // for concat ops. The narrow binop alone makes this transform profitable.
24356 // We can't just reuse the original extract index operand because we may have
24357 // bitcasted.
24358 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24359 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24360 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24361 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24362 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24363 SDLoc DL(Extract);
24364 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24365 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24366 BinOp.getOperand(0), NewExtIndex);
24367 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24368 BinOp.getOperand(1), NewExtIndex);
24369 SDValue NarrowBinOp =
24370 DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24371 return DAG.getBitcast(VT, NarrowBinOp);
24372 }
24373
24374 // Only handle the case where we are doubling and then halving. A larger ratio
24375 // may require more than two narrow binops to replace the wide binop.
24376 if (NarrowingRatio != 2)
24377 return SDValue();
24378
24379 // TODO: The motivating case for this transform is an x86 AVX1 target. That
24380 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24381 // flavors, but no other 256-bit integer support. This could be extended to
24382 // handle any binop, but that may require fixing/adding other folds to avoid
24383 // codegen regressions.
24384 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24385 return SDValue();
24386
24387 // We need at least one concatenation operation of a binop operand to make
24388 // this transform worthwhile. The concat must double the input vector sizes.
24389 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24390 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24391 return V.getOperand(ConcatOpNum);
24392 return SDValue();
24393 };
24394 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24395 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24396
24397 if (SubVecL || SubVecR) {
24398 // If a binop operand was not the result of a concat, we must extract a
24399 // half-sized operand for our new narrow binop:
24400 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24401 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24402 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24403 SDLoc DL(Extract);
24404 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24405 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24406 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24407 BinOp.getOperand(0), IndexC);
24408
24409 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
24410 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24411 BinOp.getOperand(1), IndexC);
24412
24413 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
24414 return DAG.getBitcast(VT, NarrowBinOp);
24415 }
24416
24417 return SDValue();
24418}
24419
24420/// If we are extracting a subvector from a wide vector load, convert to a
24421/// narrow load to eliminate the extraction:
24422/// (extract_subvector (load wide vector)) --> (load narrow vector)
24424 // TODO: Add support for big-endian. The offset calculation must be adjusted.
24425 if (DAG.getDataLayout().isBigEndian())
24426 return SDValue();
24427
24428 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
24429 if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
24430 return SDValue();
24431
24432 // Allow targets to opt-out.
24433 EVT VT = Extract->getValueType(0);
24434
24435 // We can only create byte sized loads.
24436 if (!VT.isByteSized())
24437 return SDValue();
24438
24439 unsigned Index = Extract->getConstantOperandVal(1);
24440 unsigned NumElts = VT.getVectorMinNumElements();
24441 // A fixed length vector being extracted from a scalable vector
24442 // may not be any *smaller* than the scalable one.
24443 if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
24444 return SDValue();
24445
24446 // The definition of EXTRACT_SUBVECTOR states that the index must be a
24447 // multiple of the minimum number of elements in the result type.
24448 assert(Index % NumElts == 0 && "The extract subvector index is not a "
24449 "multiple of the result's element count");
24450
24451 // It's fine to use TypeSize here as we know the offset will not be negative.
24452 TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
24453
24454 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24455 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
24456 return SDValue();
24457
24458 // The narrow load will be offset from the base address of the old load if
24459 // we are extracting from something besides index 0 (little-endian).
24460 SDLoc DL(Extract);
24461
24462 // TODO: Use "BaseIndexOffset" to make this more effective.
24463 SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
24464
24467 MachineMemOperand *MMO;
24468 if (Offset.isScalable()) {
24469 MachinePointerInfo MPI =
24471 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
24472 } else
24473 MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
24474 StoreSize);
24475
24476 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
24477 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
24478 return NewLd;
24479}
24480
24481/// Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
24482/// try to produce VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
24483/// EXTRACT_SUBVECTOR(Op?, ?),
24484/// Mask'))
24485/// iff it is legal and profitable to do so. Notably, the trimmed mask
24486/// (containing only the elements that are extracted)
24487/// must reference at most two subvectors.
24489 SelectionDAG &DAG,
24490 const TargetLowering &TLI,
24491 bool LegalOperations) {
24492 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24493 "Must only be called on EXTRACT_SUBVECTOR's");
24494
24495 SDValue N0 = N->getOperand(0);
24496
24497 // Only deal with non-scalable vectors.
24498 EVT NarrowVT = N->getValueType(0);
24499 EVT WideVT = N0.getValueType();
24500 if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
24501 return SDValue();
24502
24503 // The operand must be a shufflevector.
24504 auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
24505 if (!WideShuffleVector)
24506 return SDValue();
24507
24508 // The old shuffleneeds to go away.
24509 if (!WideShuffleVector->hasOneUse())
24510 return SDValue();
24511
24512 // And the narrow shufflevector that we'll form must be legal.
24513 if (LegalOperations &&
24515 return SDValue();
24516
24517 uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
24518 int NumEltsExtracted = NarrowVT.getVectorNumElements();
24519 assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
24520 "Extract index is not a multiple of the output vector length.");
24521
24522 int WideNumElts = WideVT.getVectorNumElements();
24523
24524 SmallVector<int, 16> NewMask;
24525 NewMask.reserve(NumEltsExtracted);
24526 SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
24527 DemandedSubvectors;
24528
24529 // Try to decode the wide mask into narrow mask from at most two subvectors.
24530 for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
24531 NumEltsExtracted)) {
24532 assert((M >= -1) && (M < (2 * WideNumElts)) &&
24533 "Out-of-bounds shuffle mask?");
24534
24535 if (M < 0) {
24536 // Does not depend on operands, does not require adjustment.
24537 NewMask.emplace_back(M);
24538 continue;
24539 }
24540
24541 // From which operand of the shuffle does this shuffle mask element pick?
24542 int WideShufOpIdx = M / WideNumElts;
24543 // Which element of that operand is picked?
24544 int OpEltIdx = M % WideNumElts;
24545
24546 assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
24547 "Shuffle mask vector decomposition failure.");
24548
24549 // And which NumEltsExtracted-sized subvector of that operand is that?
24550 int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
24551 // And which element within that subvector of that operand is that?
24552 int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
24553
24554 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
24555 "Shuffle mask subvector decomposition failure.");
24556
24557 assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
24558 WideShufOpIdx * WideNumElts) == M &&
24559 "Shuffle mask full decomposition failure.");
24560
24561 SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
24562
24563 if (Op.isUndef()) {
24564 // Picking from an undef operand. Let's adjust mask instead.
24565 NewMask.emplace_back(-1);
24566 continue;
24567 }
24568
24569 const std::pair<SDValue, int> DemandedSubvector =
24570 std::make_pair(Op, OpSubvecIdx);
24571
24572 if (DemandedSubvectors.insert(DemandedSubvector)) {
24573 if (DemandedSubvectors.size() > 2)
24574 return SDValue(); // We can't handle more than two subvectors.
24575 // How many elements into the WideVT does this subvector start?
24576 int Index = NumEltsExtracted * OpSubvecIdx;
24577 // Bail out if the extraction isn't going to be cheap.
24578 if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
24579 return SDValue();
24580 }
24581
24582 // Ok, but from which operand of the new shuffle will this element pick?
24583 int NewOpIdx =
24584 getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
24585 assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
24586
24587 int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
24588 NewMask.emplace_back(AdjM);
24589 }
24590 assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
24591 assert(DemandedSubvectors.size() <= 2 &&
24592 "Should have ended up demanding at most two subvectors.");
24593
24594 // Did we discover that the shuffle does not actually depend on operands?
24595 if (DemandedSubvectors.empty())
24596 return DAG.getUNDEF(NarrowVT);
24597
24598 // Profitability check: only deal with extractions from the first subvector
24599 // unless the mask becomes an identity mask.
24600 if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
24601 any_of(NewMask, [](int M) { return M < 0; }))
24602 for (auto &DemandedSubvector : DemandedSubvectors)
24603 if (DemandedSubvector.second != 0)
24604 return SDValue();
24605
24606 // We still perform the exact same EXTRACT_SUBVECTOR, just on different
24607 // operand[s]/index[es], so there is no point in checking for it's legality.
24608
24609 // Do not turn a legal shuffle into an illegal one.
24610 if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
24611 !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
24612 return SDValue();
24613
24614 SDLoc DL(N);
24615
24617 for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
24618 &DemandedSubvector : DemandedSubvectors) {
24619 // How many elements into the WideVT does this subvector start?
24620 int Index = NumEltsExtracted * DemandedSubvector.second;
24621 SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
24622 NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
24623 DemandedSubvector.first, IndexC));
24624 }
24625 assert((NewOps.size() == 1 || NewOps.size() == 2) &&
24626 "Should end up with either one or two ops");
24627
24628 // If we ended up with only one operand, pad with an undef.
24629 if (NewOps.size() == 1)
24630 NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
24631
24632 return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
24633}
24634
24635SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
24636 EVT NVT = N->getValueType(0);
24637 SDValue V = N->getOperand(0);
24638 uint64_t ExtIdx = N->getConstantOperandVal(1);
24639 SDLoc DL(N);
24640
24641 // Extract from UNDEF is UNDEF.
24642 if (V.isUndef())
24643 return DAG.getUNDEF(NVT);
24644
24646 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
24647 return NarrowLoad;
24648
24649 // Combine an extract of an extract into a single extract_subvector.
24650 // ext (ext X, C), 0 --> ext X, C
24651 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
24652 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
24653 V.getConstantOperandVal(1)) &&
24655 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(0),
24656 V.getOperand(1));
24657 }
24658 }
24659
24660 // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
24661 if (V.getOpcode() == ISD::SPLAT_VECTOR)
24662 if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
24663 if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
24664 return DAG.getSplatVector(NVT, DL, V.getOperand(0));
24665
24666 // extract_subvector(insert_subvector(x,y,c1),c2)
24667 // --> extract_subvector(y,c2-c1)
24668 // iff we're just extracting from the inserted subvector.
24669 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24670 SDValue InsSub = V.getOperand(1);
24671 EVT InsSubVT = InsSub.getValueType();
24672 unsigned NumInsElts = InsSubVT.getVectorMinNumElements();
24673 unsigned InsIdx = V.getConstantOperandVal(2);
24674 unsigned NumSubElts = NVT.getVectorMinNumElements();
24675 if (InsIdx <= ExtIdx && (ExtIdx + NumSubElts) <= (InsIdx + NumInsElts) &&
24676 TLI.isExtractSubvectorCheap(NVT, InsSubVT, ExtIdx - InsIdx) &&
24677 InsSubVT.isFixedLengthVector() && NVT.isFixedLengthVector() &&
24678 V.getValueType().isFixedLengthVector())
24679 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, InsSub,
24680 DAG.getVectorIdxConstant(ExtIdx - InsIdx, DL));
24681 }
24682
24683 // Try to move vector bitcast after extract_subv by scaling extraction index:
24684 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
24685 if (V.getOpcode() == ISD::BITCAST &&
24686 V.getOperand(0).getValueType().isVector() &&
24687 (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
24688 SDValue SrcOp = V.getOperand(0);
24689 EVT SrcVT = SrcOp.getValueType();
24690 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
24691 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
24692 if ((SrcNumElts % DestNumElts) == 0) {
24693 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
24694 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
24695 EVT NewExtVT =
24696 EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NewExtEC);
24698 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
24699 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24700 V.getOperand(0), NewIndex);
24701 return DAG.getBitcast(NVT, NewExtract);
24702 }
24703 }
24704 if ((DestNumElts % SrcNumElts) == 0) {
24705 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
24706 if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
24707 ElementCount NewExtEC =
24708 NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
24709 EVT ScalarVT = SrcVT.getScalarType();
24710 if ((ExtIdx % DestSrcRatio) == 0) {
24711 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
24712 EVT NewExtVT =
24713 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
24715 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24716 SDValue NewExtract =
24717 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24718 V.getOperand(0), NewIndex);
24719 return DAG.getBitcast(NVT, NewExtract);
24720 }
24721 if (NewExtEC.isScalar() &&
24723 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24724 SDValue NewExtract =
24725 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
24726 V.getOperand(0), NewIndex);
24727 return DAG.getBitcast(NVT, NewExtract);
24728 }
24729 }
24730 }
24731 }
24732 }
24733
24734 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
24735 unsigned ExtNumElts = NVT.getVectorMinNumElements();
24736 EVT ConcatSrcVT = V.getOperand(0).getValueType();
24737 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
24738 "Concat and extract subvector do not change element type");
24739 assert((ExtIdx % ExtNumElts) == 0 &&
24740 "Extract index is not a multiple of the input vector length.");
24741
24742 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
24743 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
24744
24745 // If the concatenated source types match this extract, it's a direct
24746 // simplification:
24747 // extract_subvec (concat V1, V2, ...), i --> Vi
24748 if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
24749 return V.getOperand(ConcatOpIdx);
24750
24751 // If the concatenated source vectors are a multiple length of this extract,
24752 // then extract a fraction of one of those source vectors directly from a
24753 // concat operand. Example:
24754 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
24755 // v2i8 extract_subvec v8i8 Y, 6
24756 if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
24757 ConcatSrcNumElts % ExtNumElts == 0) {
24758 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
24759 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
24760 "Trying to extract from >1 concat operand?");
24761 assert(NewExtIdx % ExtNumElts == 0 &&
24762 "Extract index is not a multiple of the input vector length.");
24763 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
24764 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
24765 V.getOperand(ConcatOpIdx), NewIndexC);
24766 }
24767 }
24768
24769 if (SDValue V =
24770 foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
24771 return V;
24772
24774
24775 // If the input is a build vector. Try to make a smaller build vector.
24776 if (V.getOpcode() == ISD::BUILD_VECTOR) {
24777 EVT InVT = V.getValueType();
24778 unsigned ExtractSize = NVT.getSizeInBits();
24779 unsigned EltSize = InVT.getScalarSizeInBits();
24780 // Only do this if we won't split any elements.
24781 if (ExtractSize % EltSize == 0) {
24782 unsigned NumElems = ExtractSize / EltSize;
24783 EVT EltVT = InVT.getVectorElementType();
24784 EVT ExtractVT =
24785 NumElems == 1 ? EltVT
24786 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
24787 if ((Level < AfterLegalizeDAG ||
24788 (NumElems == 1 ||
24789 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
24790 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
24791 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
24792
24793 if (NumElems == 1) {
24794 SDValue Src = V->getOperand(IdxVal);
24795 if (EltVT != Src.getValueType())
24796 Src = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Src);
24797 return DAG.getBitcast(NVT, Src);
24798 }
24799
24800 // Extract the pieces from the original build_vector.
24801 SDValue BuildVec =
24802 DAG.getBuildVector(ExtractVT, DL, V->ops().slice(IdxVal, NumElems));
24803 return DAG.getBitcast(NVT, BuildVec);
24804 }
24805 }
24806 }
24807
24808 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24809 // Handle only simple case where vector being inserted and vector
24810 // being extracted are of same size.
24811 EVT SmallVT = V.getOperand(1).getValueType();
24812 if (!NVT.bitsEq(SmallVT))
24813 return SDValue();
24814
24815 // Combine:
24816 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
24817 // Into:
24818 // indices are equal or bit offsets are equal => V1
24819 // otherwise => (extract_subvec V1, ExtIdx)
24820 uint64_t InsIdx = V.getConstantOperandVal(2);
24821 if (InsIdx * SmallVT.getScalarSizeInBits() ==
24822 ExtIdx * NVT.getScalarSizeInBits()) {
24823 if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
24824 return SDValue();
24825
24826 return DAG.getBitcast(NVT, V.getOperand(1));
24827 }
24828 return DAG.getNode(
24830 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
24831 N->getOperand(1));
24832 }
24833
24834 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
24835 return NarrowBOp;
24836
24838 return SDValue(N, 0);
24839
24840 return SDValue();
24841}
24842
24843/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
24844/// followed by concatenation. Narrow vector ops may have better performance
24845/// than wide ops, and this can unlock further narrowing of other vector ops.
24846/// Targets can invert this transform later if it is not profitable.
24848 SelectionDAG &DAG) {
24849 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
24850 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
24851 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
24852 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
24853 return SDValue();
24854
24855 // Split the wide shuffle mask into halves. Any mask element that is accessing
24856 // operand 1 is offset down to account for narrowing of the vectors.
24857 ArrayRef<int> Mask = Shuf->getMask();
24858 EVT VT = Shuf->getValueType(0);
24859 unsigned NumElts = VT.getVectorNumElements();
24860 unsigned HalfNumElts = NumElts / 2;
24861 SmallVector<int, 16> Mask0(HalfNumElts, -1);
24862 SmallVector<int, 16> Mask1(HalfNumElts, -1);
24863 for (unsigned i = 0; i != NumElts; ++i) {
24864 if (Mask[i] == -1)
24865 continue;
24866 // If we reference the upper (undef) subvector then the element is undef.
24867 if ((Mask[i] % NumElts) >= HalfNumElts)
24868 continue;
24869 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
24870 if (i < HalfNumElts)
24871 Mask0[i] = M;
24872 else
24873 Mask1[i - HalfNumElts] = M;
24874 }
24875
24876 // Ask the target if this is a valid transform.
24877 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24878 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
24879 HalfNumElts);
24880 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
24881 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
24882 return SDValue();
24883
24884 // shuffle (concat X, undef), (concat Y, undef), Mask -->
24885 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
24886 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
24887 SDLoc DL(Shuf);
24888 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
24889 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
24890 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
24891}
24892
24893// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
24894// or turn a shuffle of a single concat into simpler shuffle then concat.
24896 EVT VT = N->getValueType(0);
24897 unsigned NumElts = VT.getVectorNumElements();
24898
24899 SDValue N0 = N->getOperand(0);
24900 SDValue N1 = N->getOperand(1);
24901 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24902 ArrayRef<int> Mask = SVN->getMask();
24903
24905 EVT ConcatVT = N0.getOperand(0).getValueType();
24906 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
24907 unsigned NumConcats = NumElts / NumElemsPerConcat;
24908
24909 auto IsUndefMaskElt = [](int i) { return i == -1; };
24910
24911 // Special case: shuffle(concat(A,B)) can be more efficiently represented
24912 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
24913 // half vector elements.
24914 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
24915 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
24916 IsUndefMaskElt)) {
24917 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
24918 N0.getOperand(1),
24919 Mask.slice(0, NumElemsPerConcat));
24920 N1 = DAG.getUNDEF(ConcatVT);
24921 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
24922 }
24923
24924 // Look at every vector that's inserted. We're looking for exact
24925 // subvector-sized copies from a concatenated vector
24926 for (unsigned I = 0; I != NumConcats; ++I) {
24927 unsigned Begin = I * NumElemsPerConcat;
24928 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
24929
24930 // Make sure we're dealing with a copy.
24931 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
24932 Ops.push_back(DAG.getUNDEF(ConcatVT));
24933 continue;
24934 }
24935
24936 int OpIdx = -1;
24937 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
24938 if (IsUndefMaskElt(SubMask[i]))
24939 continue;
24940 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
24941 return SDValue();
24942 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
24943 if (0 <= OpIdx && EltOpIdx != OpIdx)
24944 return SDValue();
24945 OpIdx = EltOpIdx;
24946 }
24947 assert(0 <= OpIdx && "Unknown concat_vectors op");
24948
24949 if (OpIdx < (int)N0.getNumOperands())
24950 Ops.push_back(N0.getOperand(OpIdx));
24951 else
24952 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
24953 }
24954
24955 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24956}
24957
24958// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
24959// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
24960//
24961// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
24962// a simplification in some sense, but it isn't appropriate in general: some
24963// BUILD_VECTORs are substantially cheaper than others. The general case
24964// of a BUILD_VECTOR requires inserting each element individually (or
24965// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
24966// all constants is a single constant pool load. A BUILD_VECTOR where each
24967// element is identical is a splat. A BUILD_VECTOR where most of the operands
24968// are undef lowers to a small number of element insertions.
24969//
24970// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
24971// We don't fold shuffles where one side is a non-zero constant, and we don't
24972// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
24973// non-constant operands. This seems to work out reasonably well in practice.
24975 SelectionDAG &DAG,
24976 const TargetLowering &TLI) {
24977 EVT VT = SVN->getValueType(0);
24978 unsigned NumElts = VT.getVectorNumElements();
24979 SDValue N0 = SVN->getOperand(0);
24980 SDValue N1 = SVN->getOperand(1);
24981
24982 if (!N0->hasOneUse())
24983 return SDValue();
24984
24985 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
24986 // discussed above.
24987 if (!N1.isUndef()) {
24988 if (!N1->hasOneUse())
24989 return SDValue();
24990
24991 bool N0AnyConst = isAnyConstantBuildVector(N0);
24992 bool N1AnyConst = isAnyConstantBuildVector(N1);
24993 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
24994 return SDValue();
24995 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
24996 return SDValue();
24997 }
24998
24999 // If both inputs are splats of the same value then we can safely merge this
25000 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
25001 bool IsSplat = false;
25002 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
25003 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
25004 if (BV0 && BV1)
25005 if (SDValue Splat0 = BV0->getSplatValue())
25006 IsSplat = (Splat0 == BV1->getSplatValue());
25007
25009 SmallSet<SDValue, 16> DuplicateOps;
25010 for (int M : SVN->getMask()) {
25011 SDValue Op = DAG.getUNDEF(VT.getScalarType());
25012 if (M >= 0) {
25013 int Idx = M < (int)NumElts ? M : M - NumElts;
25014 SDValue &S = (M < (int)NumElts ? N0 : N1);
25015 if (S.getOpcode() == ISD::BUILD_VECTOR) {
25016 Op = S.getOperand(Idx);
25017 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
25018 SDValue Op0 = S.getOperand(0);
25019 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
25020 } else {
25021 // Operand can't be combined - bail out.
25022 return SDValue();
25023 }
25024 }
25025
25026 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
25027 // generating a splat; semantically, this is fine, but it's likely to
25028 // generate low-quality code if the target can't reconstruct an appropriate
25029 // shuffle.
25030 if (!Op.isUndef() && !isIntOrFPConstant(Op))
25031 if (!IsSplat && !DuplicateOps.insert(Op).second)
25032 return SDValue();
25033
25034 Ops.push_back(Op);
25035 }
25036
25037 // BUILD_VECTOR requires all inputs to be of the same type, find the
25038 // maximum type and extend them all.
25039 EVT SVT = VT.getScalarType();
25040 if (SVT.isInteger())
25041 for (SDValue &Op : Ops)
25042 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
25043 if (SVT != VT.getScalarType())
25044 for (SDValue &Op : Ops)
25045 Op = Op.isUndef() ? DAG.getUNDEF(SVT)
25046 : (TLI.isZExtFree(Op.getValueType(), SVT)
25047 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
25048 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
25049 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
25050}
25051
25052// Match shuffles that can be converted to *_vector_extend_in_reg.
25053// This is often generated during legalization.
25054// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
25055// and returns the EVT to which the extension should be performed.
25056// NOTE: this assumes that the src is the first operand of the shuffle.
25058 unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
25059 SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
25060 bool LegalOperations) {
25061 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25062
25063 // TODO Add support for big-endian when we have a test case.
25064 if (!VT.isInteger() || IsBigEndian)
25065 return std::nullopt;
25066
25067 unsigned NumElts = VT.getVectorNumElements();
25068 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25069
25070 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
25071 // power-of-2 extensions as they are the most likely.
25072 // FIXME: should try Scale == NumElts case too,
25073 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
25074 // The vector width must be a multiple of Scale.
25075 if (NumElts % Scale != 0)
25076 continue;
25077
25078 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
25079 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
25080
25081 if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
25082 (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
25083 continue;
25084
25085 if (Match(Scale))
25086 return OutVT;
25087 }
25088
25089 return std::nullopt;
25090}
25091
25092// Match shuffles that can be converted to any_vector_extend_in_reg.
25093// This is often generated during legalization.
25094// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
25096 SelectionDAG &DAG,
25097 const TargetLowering &TLI,
25098 bool LegalOperations) {
25099 EVT VT = SVN->getValueType(0);
25100 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25101
25102 // TODO Add support for big-endian when we have a test case.
25103 if (!VT.isInteger() || IsBigEndian)
25104 return SDValue();
25105
25106 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
25107 auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
25108 Mask = SVN->getMask()](unsigned Scale) {
25109 for (unsigned i = 0; i != NumElts; ++i) {
25110 if (Mask[i] < 0)
25111 continue;
25112 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
25113 continue;
25114 return false;
25115 }
25116 return true;
25117 };
25118
25119 unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
25120 SDValue N0 = SVN->getOperand(0);
25121 // Never create an illegal type. Only create unsupported operations if we
25122 // are pre-legalization.
25123 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25124 Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
25125 if (!OutVT)
25126 return SDValue();
25127 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
25128}
25129
25130// Match shuffles that can be converted to zero_extend_vector_inreg.
25131// This is often generated during legalization.
25132// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
25134 SelectionDAG &DAG,
25135 const TargetLowering &TLI,
25136 bool LegalOperations) {
25137 bool LegalTypes = true;
25138 EVT VT = SVN->getValueType(0);
25139 assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
25140 unsigned NumElts = VT.getVectorNumElements();
25141 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25142
25143 // TODO: add support for big-endian when we have a test case.
25144 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25145 if (!VT.isInteger() || IsBigEndian)
25146 return SDValue();
25147
25148 SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
25149 auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
25150 for (int &Indice : Mask) {
25151 if (Indice < 0)
25152 continue;
25153 int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
25154 int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
25155 Fn(Indice, OpIdx, OpEltIdx);
25156 }
25157 };
25158
25159 // Which elements of which operand does this shuffle demand?
25160 std::array<APInt, 2> OpsDemandedElts;
25161 for (APInt &OpDemandedElts : OpsDemandedElts)
25162 OpDemandedElts = APInt::getZero(NumElts);
25163 ForEachDecomposedIndice(
25164 [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
25165 OpsDemandedElts[OpIdx].setBit(OpEltIdx);
25166 });
25167
25168 // Element-wise(!), which of these demanded elements are know to be zero?
25169 std::array<APInt, 2> OpsKnownZeroElts;
25170 for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
25171 std::get<2>(I) =
25172 DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
25173
25174 // Manifest zeroable element knowledge in the shuffle mask.
25175 // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
25176 // this is a local invention, but it won't leak into DAG.
25177 // FIXME: should we not manifest them, but just check when matching?
25178 bool HadZeroableElts = false;
25179 ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
25180 int &Indice, int OpIdx, int OpEltIdx) {
25181 if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
25182 Indice = -2; // Zeroable element.
25183 HadZeroableElts = true;
25184 }
25185 });
25186
25187 // Don't proceed unless we've refined at least one zeroable mask indice.
25188 // If we didn't, then we are still trying to match the same shuffle mask
25189 // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
25190 // and evidently failed. Proceeding will lead to endless combine loops.
25191 if (!HadZeroableElts)
25192 return SDValue();
25193
25194 // The shuffle may be more fine-grained than we want. Widen elements first.
25195 // FIXME: should we do this before manifesting zeroable shuffle mask indices?
25196 SmallVector<int, 16> ScaledMask;
25197 getShuffleMaskWithWidestElts(Mask, ScaledMask);
25198 assert(Mask.size() >= ScaledMask.size() &&
25199 Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
25200 int Prescale = Mask.size() / ScaledMask.size();
25201
25202 NumElts = ScaledMask.size();
25203 EltSizeInBits *= Prescale;
25204
25205 EVT PrescaledVT = EVT::getVectorVT(
25206 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
25207 NumElts);
25208
25209 if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
25210 return SDValue();
25211
25212 // For example,
25213 // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
25214 // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
25215 auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
25216 assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
25217 "Unexpected mask scaling factor.");
25218 ArrayRef<int> Mask = ScaledMask;
25219 for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
25220 SrcElt != NumSrcElts; ++SrcElt) {
25221 // Analyze the shuffle mask in Scale-sized chunks.
25222 ArrayRef<int> MaskChunk = Mask.take_front(Scale);
25223 assert(MaskChunk.size() == Scale && "Unexpected mask size.");
25224 Mask = Mask.drop_front(MaskChunk.size());
25225 // The first indice in this chunk must be SrcElt, but not zero!
25226 // FIXME: undef should be fine, but that results in more-defined result.
25227 if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
25228 return false;
25229 // The rest of the indices in this chunk must be zeros.
25230 // FIXME: undef should be fine, but that results in more-defined result.
25231 if (!all_of(MaskChunk.drop_front(1),
25232 [](int Indice) { return Indice == -2; }))
25233 return false;
25234 }
25235 assert(Mask.empty() && "Did not process the whole mask?");
25236 return true;
25237 };
25238
25239 unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
25240 for (bool Commuted : {false, true}) {
25241 SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
25242 if (Commuted)
25244 std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
25245 Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
25246 LegalOperations);
25247 if (OutVT)
25248 return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
25249 DAG.getBitcast(PrescaledVT, Op)));
25250 }
25251 return SDValue();
25252}
25253
25254// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25255// each source element of a large type into the lowest elements of a smaller
25256// destination type. This is often generated during legalization.
25257// If the source node itself was a '*_extend_vector_inreg' node then we should
25258// then be able to remove it.
25260 SelectionDAG &DAG) {
25261 EVT VT = SVN->getValueType(0);
25262 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25263
25264 // TODO Add support for big-endian when we have a test case.
25265 if (!VT.isInteger() || IsBigEndian)
25266 return SDValue();
25267
25269
25270 unsigned Opcode = N0.getOpcode();
25271 if (!ISD::isExtVecInRegOpcode(Opcode))
25272 return SDValue();
25273
25274 SDValue N00 = N0.getOperand(0);
25275 ArrayRef<int> Mask = SVN->getMask();
25276 unsigned NumElts = VT.getVectorNumElements();
25277 unsigned EltSizeInBits = VT.getScalarSizeInBits();
25278 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25279 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25280
25281 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25282 return SDValue();
25283 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25284
25285 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25286 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25287 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25288 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25289 for (unsigned i = 0; i != NumElts; ++i) {
25290 if (Mask[i] < 0)
25291 continue;
25292 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25293 continue;
25294 return false;
25295 }
25296 return true;
25297 };
25298
25299 // At the moment we just handle the case where we've truncated back to the
25300 // same size as before the extension.
25301 // TODO: handle more extension/truncation cases as cases arise.
25302 if (EltSizeInBits != ExtSrcSizeInBits)
25303 return SDValue();
25304
25305 // We can remove *extend_vector_inreg only if the truncation happens at
25306 // the same scale as the extension.
25307 if (isTruncate(ExtScale))
25308 return DAG.getBitcast(VT, N00);
25309
25310 return SDValue();
25311}
25312
25313// Combine shuffles of splat-shuffles of the form:
25314// shuffle (shuffle V, undef, splat-mask), undef, M
25315// If splat-mask contains undef elements, we need to be careful about
25316// introducing undef's in the folded mask which are not the result of composing
25317// the masks of the shuffles.
25319 SelectionDAG &DAG) {
25320 EVT VT = Shuf->getValueType(0);
25321 unsigned NumElts = VT.getVectorNumElements();
25322
25323 if (!Shuf->getOperand(1).isUndef())
25324 return SDValue();
25325
25326 // See if this unary non-splat shuffle actually *is* a splat shuffle,
25327 // in disguise, with all demanded elements being identical.
25328 // FIXME: this can be done per-operand.
25329 if (!Shuf->isSplat()) {
25330 APInt DemandedElts(NumElts, 0);
25331 for (int Idx : Shuf->getMask()) {
25332 if (Idx < 0)
25333 continue; // Ignore sentinel indices.
25334 assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25335 DemandedElts.setBit(Idx);
25336 }
25337 assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25338 APInt UndefElts;
25339 if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25340 // Even if all demanded elements are splat, some of them could be undef.
25341 // Which lowest demanded element is *not* known-undef?
25342 std::optional<unsigned> MinNonUndefIdx;
25343 for (int Idx : Shuf->getMask()) {
25344 if (Idx < 0 || UndefElts[Idx])
25345 continue; // Ignore sentinel indices, and undef elements.
25346 MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25347 }
25348 if (!MinNonUndefIdx)
25349 return DAG.getUNDEF(VT); // All undef - result is undef.
25350 assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25351 SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
25352 Shuf->getMask().end());
25353 for (int &Idx : SplatMask) {
25354 if (Idx < 0)
25355 continue; // Passthrough sentinel indices.
25356 // Otherwise, just pick the lowest demanded non-undef element.
25357 // Or sentinel undef, if we know we'd pick a known-undef element.
25358 Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25359 }
25360 assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25361 return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25362 Shuf->getOperand(1), SplatMask);
25363 }
25364 }
25365
25366 // If the inner operand is a known splat with no undefs, just return that directly.
25367 // TODO: Create DemandedElts mask from Shuf's mask.
25368 // TODO: Allow undef elements and merge with the shuffle code below.
25369 if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25370 return Shuf->getOperand(0);
25371
25372 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25373 if (!Splat || !Splat->isSplat())
25374 return SDValue();
25375
25376 ArrayRef<int> ShufMask = Shuf->getMask();
25377 ArrayRef<int> SplatMask = Splat->getMask();
25378 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25379
25380 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25381 // every undef mask element in the splat-shuffle has a corresponding undef
25382 // element in the user-shuffle's mask or if the composition of mask elements
25383 // would result in undef.
25384 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25385 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25386 // In this case it is not legal to simplify to the splat-shuffle because we
25387 // may be exposing the users of the shuffle an undef element at index 1
25388 // which was not there before the combine.
25389 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25390 // In this case the composition of masks yields SplatMask, so it's ok to
25391 // simplify to the splat-shuffle.
25392 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25393 // In this case the composed mask includes all undef elements of SplatMask
25394 // and in addition sets element zero to undef. It is safe to simplify to
25395 // the splat-shuffle.
25396 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25397 ArrayRef<int> SplatMask) {
25398 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25399 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25400 SplatMask[UserMask[i]] != -1)
25401 return false;
25402 return true;
25403 };
25404 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25405 return Shuf->getOperand(0);
25406
25407 // Create a new shuffle with a mask that is composed of the two shuffles'
25408 // masks.
25409 SmallVector<int, 32> NewMask;
25410 for (int Idx : ShufMask)
25411 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
25412
25413 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
25414 Splat->getOperand(0), Splat->getOperand(1),
25415 NewMask);
25416}
25417
25418// Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
25419// the mask can be treated as a larger type.
25421 SelectionDAG &DAG,
25422 const TargetLowering &TLI,
25423 bool LegalOperations) {
25424 SDValue Op0 = SVN->getOperand(0);
25425 SDValue Op1 = SVN->getOperand(1);
25426 EVT VT = SVN->getValueType(0);
25427 if (Op0.getOpcode() != ISD::BITCAST)
25428 return SDValue();
25429 EVT InVT = Op0.getOperand(0).getValueType();
25430 if (!InVT.isVector() ||
25431 (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
25432 Op1.getOperand(0).getValueType() != InVT)))
25433 return SDValue();
25435 (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
25436 return SDValue();
25437
25438 int VTLanes = VT.getVectorNumElements();
25439 int InLanes = InVT.getVectorNumElements();
25440 if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
25441 (LegalOperations &&
25443 return SDValue();
25444 int Factor = VTLanes / InLanes;
25445
25446 // Check that each group of lanes in the mask are either undef or make a valid
25447 // mask for the wider lane type.
25448 ArrayRef<int> Mask = SVN->getMask();
25449 SmallVector<int> NewMask;
25450 if (!widenShuffleMaskElts(Factor, Mask, NewMask))
25451 return SDValue();
25452
25453 if (!TLI.isShuffleMaskLegal(NewMask, InVT))
25454 return SDValue();
25455
25456 // Create the new shuffle with the new mask and bitcast it back to the
25457 // original type.
25458 SDLoc DL(SVN);
25459 Op0 = Op0.getOperand(0);
25460 Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
25461 SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
25462 return DAG.getBitcast(VT, NewShuf);
25463}
25464
25465/// Combine shuffle of shuffle of the form:
25466/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
25468 SelectionDAG &DAG) {
25469 if (!OuterShuf->getOperand(1).isUndef())
25470 return SDValue();
25471 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
25472 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
25473 return SDValue();
25474
25475 ArrayRef<int> OuterMask = OuterShuf->getMask();
25476 ArrayRef<int> InnerMask = InnerShuf->getMask();
25477 unsigned NumElts = OuterMask.size();
25478 assert(NumElts == InnerMask.size() && "Mask length mismatch");
25479 SmallVector<int, 32> CombinedMask(NumElts, -1);
25480 int SplatIndex = -1;
25481 for (unsigned i = 0; i != NumElts; ++i) {
25482 // Undef lanes remain undef.
25483 int OuterMaskElt = OuterMask[i];
25484 if (OuterMaskElt == -1)
25485 continue;
25486
25487 // Peek through the shuffle masks to get the underlying source element.
25488 int InnerMaskElt = InnerMask[OuterMaskElt];
25489 if (InnerMaskElt == -1)
25490 continue;
25491
25492 // Initialize the splatted element.
25493 if (SplatIndex == -1)
25494 SplatIndex = InnerMaskElt;
25495
25496 // Non-matching index - this is not a splat.
25497 if (SplatIndex != InnerMaskElt)
25498 return SDValue();
25499
25500 CombinedMask[i] = InnerMaskElt;
25501 }
25502 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
25503 getSplatIndex(CombinedMask) != -1) &&
25504 "Expected a splat mask");
25505
25506 // TODO: The transform may be a win even if the mask is not legal.
25507 EVT VT = OuterShuf->getValueType(0);
25508 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
25509 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
25510 return SDValue();
25511
25512 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
25513 InnerShuf->getOperand(1), CombinedMask);
25514}
25515
25516/// If the shuffle mask is taking exactly one element from the first vector
25517/// operand and passing through all other elements from the second vector
25518/// operand, return the index of the mask element that is choosing an element
25519/// from the first operand. Otherwise, return -1.
25521 int MaskSize = Mask.size();
25522 int EltFromOp0 = -1;
25523 // TODO: This does not match if there are undef elements in the shuffle mask.
25524 // Should we ignore undefs in the shuffle mask instead? The trade-off is
25525 // removing an instruction (a shuffle), but losing the knowledge that some
25526 // vector lanes are not needed.
25527 for (int i = 0; i != MaskSize; ++i) {
25528 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
25529 // We're looking for a shuffle of exactly one element from operand 0.
25530 if (EltFromOp0 != -1)
25531 return -1;
25532 EltFromOp0 = i;
25533 } else if (Mask[i] != i + MaskSize) {
25534 // Nothing from operand 1 can change lanes.
25535 return -1;
25536 }
25537 }
25538 return EltFromOp0;
25539}
25540
25541/// If a shuffle inserts exactly one element from a source vector operand into
25542/// another vector operand and we can access the specified element as a scalar,
25543/// then we can eliminate the shuffle.
25545 SelectionDAG &DAG) {
25546 // First, check if we are taking one element of a vector and shuffling that
25547 // element into another vector.
25548 ArrayRef<int> Mask = Shuf->getMask();
25549 SmallVector<int, 16> CommutedMask(Mask);
25550 SDValue Op0 = Shuf->getOperand(0);
25551 SDValue Op1 = Shuf->getOperand(1);
25552 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
25553 if (ShufOp0Index == -1) {
25554 // Commute mask and check again.
25556 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
25557 if (ShufOp0Index == -1)
25558 return SDValue();
25559 // Commute operands to match the commuted shuffle mask.
25560 std::swap(Op0, Op1);
25561 Mask = CommutedMask;
25562 }
25563
25564 // The shuffle inserts exactly one element from operand 0 into operand 1.
25565 // Now see if we can access that element as a scalar via a real insert element
25566 // instruction.
25567 // TODO: We can try harder to locate the element as a scalar. Examples: it
25568 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
25569 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
25570 "Shuffle mask value must be from operand 0");
25571 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
25572 return SDValue();
25573
25574 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
25575 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
25576 return SDValue();
25577
25578 // There's an existing insertelement with constant insertion index, so we
25579 // don't need to check the legality/profitability of a replacement operation
25580 // that differs at most in the constant value. The target should be able to
25581 // lower any of those in a similar way. If not, legalization will expand this
25582 // to a scalar-to-vector plus shuffle.
25583 //
25584 // Note that the shuffle may move the scalar from the position that the insert
25585 // element used. Therefore, our new insert element occurs at the shuffle's
25586 // mask index value, not the insert's index value.
25587 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
25588 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
25589 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
25590 Op1, Op0.getOperand(1), NewInsIndex);
25591}
25592
25593/// If we have a unary shuffle of a shuffle, see if it can be folded away
25594/// completely. This has the potential to lose undef knowledge because the first
25595/// shuffle may not have an undef mask element where the second one does. So
25596/// only call this after doing simplifications based on demanded elements.
25598 // shuf (shuf0 X, Y, Mask0), undef, Mask
25599 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25600 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
25601 return SDValue();
25602
25603 ArrayRef<int> Mask = Shuf->getMask();
25604 ArrayRef<int> Mask0 = Shuf0->getMask();
25605 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
25606 // Ignore undef elements.
25607 if (Mask[i] == -1)
25608 continue;
25609 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
25610
25611 // Is the element of the shuffle operand chosen by this shuffle the same as
25612 // the element chosen by the shuffle operand itself?
25613 if (Mask0[Mask[i]] != Mask0[i])
25614 return SDValue();
25615 }
25616 // Every element of this shuffle is identical to the result of the previous
25617 // shuffle, so we can replace this value.
25618 return Shuf->getOperand(0);
25619}
25620
25621SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
25622 EVT VT = N->getValueType(0);
25623 unsigned NumElts = VT.getVectorNumElements();
25624
25625 SDValue N0 = N->getOperand(0);
25626 SDValue N1 = N->getOperand(1);
25627
25628 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
25629
25630 // Canonicalize shuffle undef, undef -> undef
25631 if (N0.isUndef() && N1.isUndef())
25632 return DAG.getUNDEF(VT);
25633
25634 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25635
25636 // Canonicalize shuffle v, v -> v, undef
25637 if (N0 == N1)
25638 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
25639 createUnaryMask(SVN->getMask(), NumElts));
25640
25641 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
25642 if (N0.isUndef())
25643 return DAG.getCommutedVectorShuffle(*SVN);
25644
25645 // Remove references to rhs if it is undef
25646 if (N1.isUndef()) {
25647 bool Changed = false;
25648 SmallVector<int, 8> NewMask;
25649 for (unsigned i = 0; i != NumElts; ++i) {
25650 int Idx = SVN->getMaskElt(i);
25651 if (Idx >= (int)NumElts) {
25652 Idx = -1;
25653 Changed = true;
25654 }
25655 NewMask.push_back(Idx);
25656 }
25657 if (Changed)
25658 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
25659 }
25660
25661 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
25662 return InsElt;
25663
25664 // A shuffle of a single vector that is a splatted value can always be folded.
25665 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
25666 return V;
25667
25668 if (SDValue V = formSplatFromShuffles(SVN, DAG))
25669 return V;
25670
25671 // If it is a splat, check if the argument vector is another splat or a
25672 // build_vector.
25673 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
25674 int SplatIndex = SVN->getSplatIndex();
25675 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
25676 TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
25677 // splat (vector_bo L, R), Index -->
25678 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
25679 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
25680 SDLoc DL(N);
25681 EVT EltVT = VT.getScalarType();
25682 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
25683 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
25684 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
25685 SDValue NewBO =
25686 DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
25687 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
25689 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
25690 }
25691
25692 // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
25693 // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
25694 if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
25695 N0.hasOneUse()) {
25696 if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
25697 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
25698
25700 if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
25701 if (Idx->getAPIntValue() == SplatIndex)
25702 return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
25703
25704 // Look through a bitcast if LE and splatting lane 0, through to a
25705 // scalar_to_vector or a build_vector.
25706 if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
25707 SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
25710 EVT N00VT = N0.getOperand(0).getValueType();
25711 if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
25712 VT.isInteger() && N00VT.isInteger()) {
25713 EVT InVT =
25716 SDLoc(N), InVT);
25717 return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
25718 }
25719 }
25720 }
25721
25722 // If this is a bit convert that changes the element type of the vector but
25723 // not the number of vector elements, look through it. Be careful not to
25724 // look though conversions that change things like v4f32 to v2f64.
25725 SDNode *V = N0.getNode();
25726 if (V->getOpcode() == ISD::BITCAST) {
25727 SDValue ConvInput = V->getOperand(0);
25728 if (ConvInput.getValueType().isVector() &&
25729 ConvInput.getValueType().getVectorNumElements() == NumElts)
25730 V = ConvInput.getNode();
25731 }
25732
25733 if (V->getOpcode() == ISD::BUILD_VECTOR) {
25734 assert(V->getNumOperands() == NumElts &&
25735 "BUILD_VECTOR has wrong number of operands");
25736 SDValue Base;
25737 bool AllSame = true;
25738 for (unsigned i = 0; i != NumElts; ++i) {
25739 if (!V->getOperand(i).isUndef()) {
25740 Base = V->getOperand(i);
25741 break;
25742 }
25743 }
25744 // Splat of <u, u, u, u>, return <u, u, u, u>
25745 if (!Base.getNode())
25746 return N0;
25747 for (unsigned i = 0; i != NumElts; ++i) {
25748 if (V->getOperand(i) != Base) {
25749 AllSame = false;
25750 break;
25751 }
25752 }
25753 // Splat of <x, x, x, x>, return <x, x, x, x>
25754 if (AllSame)
25755 return N0;
25756
25757 // Canonicalize any other splat as a build_vector.
25758 SDValue Splatted = V->getOperand(SplatIndex);
25759 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
25760 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
25761
25762 // We may have jumped through bitcasts, so the type of the
25763 // BUILD_VECTOR may not match the type of the shuffle.
25764 if (V->getValueType(0) != VT)
25765 NewBV = DAG.getBitcast(VT, NewBV);
25766 return NewBV;
25767 }
25768 }
25769
25770 // Simplify source operands based on shuffle mask.
25772 return SDValue(N, 0);
25773
25774 // This is intentionally placed after demanded elements simplification because
25775 // it could eliminate knowledge of undef elements created by this shuffle.
25776 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
25777 return ShufOp;
25778
25779 // Match shuffles that can be converted to any_vector_extend_in_reg.
25780 if (SDValue V =
25781 combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
25782 return V;
25783
25784 // Combine "truncate_vector_in_reg" style shuffles.
25785 if (SDValue V = combineTruncationShuffle(SVN, DAG))
25786 return V;
25787
25788 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
25789 Level < AfterLegalizeVectorOps &&
25790 (N1.isUndef() ||
25791 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
25792 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
25793 if (SDValue V = partitionShuffleOfConcats(N, DAG))
25794 return V;
25795 }
25796
25797 // A shuffle of a concat of the same narrow vector can be reduced to use
25798 // only low-half elements of a concat with undef:
25799 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
25800 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
25801 N0.getNumOperands() == 2 &&
25802 N0.getOperand(0) == N0.getOperand(1)) {
25803 int HalfNumElts = (int)NumElts / 2;
25804 SmallVector<int, 8> NewMask;
25805 for (unsigned i = 0; i != NumElts; ++i) {
25806 int Idx = SVN->getMaskElt(i);
25807 if (Idx >= HalfNumElts) {
25808 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
25809 Idx -= HalfNumElts;
25810 }
25811 NewMask.push_back(Idx);
25812 }
25813 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
25814 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
25815 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
25816 N0.getOperand(0), UndefVec);
25817 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
25818 }
25819 }
25820
25821 // See if we can replace a shuffle with an insert_subvector.
25822 // e.g. v2i32 into v8i32:
25823 // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
25824 // --> insert_subvector(lhs,rhs1,4).
25825 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
25827 auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
25828 // Ensure RHS subvectors are legal.
25829 assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
25830 EVT SubVT = RHS.getOperand(0).getValueType();
25831 int NumSubVecs = RHS.getNumOperands();
25832 int NumSubElts = SubVT.getVectorNumElements();
25833 assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
25834 if (!TLI.isTypeLegal(SubVT))
25835 return SDValue();
25836
25837 // Don't bother if we have an unary shuffle (matches undef + LHS elts).
25838 if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
25839 return SDValue();
25840
25841 // Search [NumSubElts] spans for RHS sequence.
25842 // TODO: Can we avoid nested loops to increase performance?
25843 SmallVector<int> InsertionMask(NumElts);
25844 for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
25845 for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
25846 // Reset mask to identity.
25847 std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
25848
25849 // Add subvector insertion.
25850 std::iota(InsertionMask.begin() + SubIdx,
25851 InsertionMask.begin() + SubIdx + NumSubElts,
25852 NumElts + (SubVec * NumSubElts));
25853
25854 // See if the shuffle mask matches the reference insertion mask.
25855 bool MatchingShuffle = true;
25856 for (int i = 0; i != (int)NumElts; ++i) {
25857 int ExpectIdx = InsertionMask[i];
25858 int ActualIdx = Mask[i];
25859 if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
25860 MatchingShuffle = false;
25861 break;
25862 }
25863 }
25864
25865 if (MatchingShuffle)
25866 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
25867 RHS.getOperand(SubVec),
25868 DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
25869 }
25870 }
25871 return SDValue();
25872 };
25873 ArrayRef<int> Mask = SVN->getMask();
25874 if (N1.getOpcode() == ISD::CONCAT_VECTORS)
25875 if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
25876 return InsertN1;
25877 if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
25878 SmallVector<int> CommuteMask(Mask);
25880 if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
25881 return InsertN0;
25882 }
25883 }
25884
25885 // If we're not performing a select/blend shuffle, see if we can convert the
25886 // shuffle into a AND node, with all the out-of-lane elements are known zero.
25887 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25888 bool IsInLaneMask = true;
25889 ArrayRef<int> Mask = SVN->getMask();
25890 SmallVector<int, 16> ClearMask(NumElts, -1);
25891 APInt DemandedLHS = APInt::getZero(NumElts);
25892 APInt DemandedRHS = APInt::getZero(NumElts);
25893 for (int I = 0; I != (int)NumElts; ++I) {
25894 int M = Mask[I];
25895 if (M < 0)
25896 continue;
25897 ClearMask[I] = M == I ? I : (I + NumElts);
25898 IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
25899 if (M != I) {
25900 APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
25901 Demanded.setBit(M % NumElts);
25902 }
25903 }
25904 // TODO: Should we try to mask with N1 as well?
25905 if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
25906 (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
25907 (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
25908 SDLoc DL(N);
25911 // Transform the type to a legal type so that the buildvector constant
25912 // elements are not illegal. Make sure that the result is larger than the
25913 // original type, incase the value is split into two (eg i64->i32).
25914 if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
25915 IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
25916 if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
25917 SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
25918 SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
25919 SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
25920 for (int I = 0; I != (int)NumElts; ++I)
25921 if (0 <= Mask[I])
25922 AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
25923
25924 // See if a clear mask is legal instead of going via
25925 // XformToShuffleWithZero which loses UNDEF mask elements.
25926 if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
25927 return DAG.getBitcast(
25928 VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
25929 DAG.getConstant(0, DL, IntVT), ClearMask));
25930
25931 if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
25932 return DAG.getBitcast(
25933 VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
25934 DAG.getBuildVector(IntVT, DL, AndMask)));
25935 }
25936 }
25937 }
25938
25939 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25940 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25941 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
25942 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
25943 return Res;
25944
25945 // If this shuffle only has a single input that is a bitcasted shuffle,
25946 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
25947 // back to their original types.
25948 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
25949 N1.isUndef() && Level < AfterLegalizeVectorOps &&
25950 TLI.isTypeLegal(VT)) {
25951
25953 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
25954 EVT SVT = VT.getScalarType();
25955 EVT InnerVT = BC0->getValueType(0);
25956 EVT InnerSVT = InnerVT.getScalarType();
25957
25958 // Determine which shuffle works with the smaller scalar type.
25959 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
25960 EVT ScaleSVT = ScaleVT.getScalarType();
25961
25962 if (TLI.isTypeLegal(ScaleVT) &&
25963 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
25964 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
25965 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25966 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25967
25968 // Scale the shuffle masks to the smaller scalar type.
25969 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
25970 SmallVector<int, 8> InnerMask;
25971 SmallVector<int, 8> OuterMask;
25972 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
25973 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
25974
25975 // Merge the shuffle masks.
25976 SmallVector<int, 8> NewMask;
25977 for (int M : OuterMask)
25978 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
25979
25980 // Test for shuffle mask legality over both commutations.
25981 SDValue SV0 = BC0->getOperand(0);
25982 SDValue SV1 = BC0->getOperand(1);
25983 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25984 if (!LegalMask) {
25985 std::swap(SV0, SV1);
25987 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25988 }
25989
25990 if (LegalMask) {
25991 SV0 = DAG.getBitcast(ScaleVT, SV0);
25992 SV1 = DAG.getBitcast(ScaleVT, SV1);
25993 return DAG.getBitcast(
25994 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
25995 }
25996 }
25997 }
25998 }
25999
26000 // Match shuffles of bitcasts, so long as the mask can be treated as the
26001 // larger type.
26002 if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
26003 return V;
26004
26005 // Compute the combined shuffle mask for a shuffle with SV0 as the first
26006 // operand, and SV1 as the second operand.
26007 // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
26008 // Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
26009 auto MergeInnerShuffle =
26010 [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
26011 ShuffleVectorSDNode *OtherSVN, SDValue N1,
26012 const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
26013 SmallVectorImpl<int> &Mask) -> bool {
26014 // Don't try to fold splats; they're likely to simplify somehow, or they
26015 // might be free.
26016 if (OtherSVN->isSplat())
26017 return false;
26018
26019 SV0 = SV1 = SDValue();
26020 Mask.clear();
26021
26022 for (unsigned i = 0; i != NumElts; ++i) {
26023 int Idx = SVN->getMaskElt(i);
26024 if (Idx < 0) {
26025 // Propagate Undef.
26026 Mask.push_back(Idx);
26027 continue;
26028 }
26029
26030 if (Commute)
26031 Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
26032
26033 SDValue CurrentVec;
26034 if (Idx < (int)NumElts) {
26035 // This shuffle index refers to the inner shuffle N0. Lookup the inner
26036 // shuffle mask to identify which vector is actually referenced.
26037 Idx = OtherSVN->getMaskElt(Idx);
26038 if (Idx < 0) {
26039 // Propagate Undef.
26040 Mask.push_back(Idx);
26041 continue;
26042 }
26043 CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
26044 : OtherSVN->getOperand(1);
26045 } else {
26046 // This shuffle index references an element within N1.
26047 CurrentVec = N1;
26048 }
26049
26050 // Simple case where 'CurrentVec' is UNDEF.
26051 if (CurrentVec.isUndef()) {
26052 Mask.push_back(-1);
26053 continue;
26054 }
26055
26056 // Canonicalize the shuffle index. We don't know yet if CurrentVec
26057 // will be the first or second operand of the combined shuffle.
26058 Idx = Idx % NumElts;
26059 if (!SV0.getNode() || SV0 == CurrentVec) {
26060 // Ok. CurrentVec is the left hand side.
26061 // Update the mask accordingly.
26062 SV0 = CurrentVec;
26063 Mask.push_back(Idx);
26064 continue;
26065 }
26066 if (!SV1.getNode() || SV1 == CurrentVec) {
26067 // Ok. CurrentVec is the right hand side.
26068 // Update the mask accordingly.
26069 SV1 = CurrentVec;
26070 Mask.push_back(Idx + NumElts);
26071 continue;
26072 }
26073
26074 // Last chance - see if the vector is another shuffle and if it
26075 // uses one of the existing candidate shuffle ops.
26076 if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
26077 int InnerIdx = CurrentSVN->getMaskElt(Idx);
26078 if (InnerIdx < 0) {
26079 Mask.push_back(-1);
26080 continue;
26081 }
26082 SDValue InnerVec = (InnerIdx < (int)NumElts)
26083 ? CurrentSVN->getOperand(0)
26084 : CurrentSVN->getOperand(1);
26085 if (InnerVec.isUndef()) {
26086 Mask.push_back(-1);
26087 continue;
26088 }
26089 InnerIdx %= NumElts;
26090 if (InnerVec == SV0) {
26091 Mask.push_back(InnerIdx);
26092 continue;
26093 }
26094 if (InnerVec == SV1) {
26095 Mask.push_back(InnerIdx + NumElts);
26096 continue;
26097 }
26098 }
26099
26100 // Bail out if we cannot convert the shuffle pair into a single shuffle.
26101 return false;
26102 }
26103
26104 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26105 return true;
26106
26107 // Avoid introducing shuffles with illegal mask.
26108 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26109 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26110 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26111 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
26112 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
26113 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
26114 if (TLI.isShuffleMaskLegal(Mask, VT))
26115 return true;
26116
26117 std::swap(SV0, SV1);
26119 return TLI.isShuffleMaskLegal(Mask, VT);
26120 };
26121
26122 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
26123 // Canonicalize shuffles according to rules:
26124 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
26125 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
26126 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
26127 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26129 // The incoming shuffle must be of the same type as the result of the
26130 // current shuffle.
26131 assert(N1->getOperand(0).getValueType() == VT &&
26132 "Shuffle types don't match");
26133
26134 SDValue SV0 = N1->getOperand(0);
26135 SDValue SV1 = N1->getOperand(1);
26136 bool HasSameOp0 = N0 == SV0;
26137 bool IsSV1Undef = SV1.isUndef();
26138 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
26139 // Commute the operands of this shuffle so merging below will trigger.
26140 return DAG.getCommutedVectorShuffle(*SVN);
26141 }
26142
26143 // Canonicalize splat shuffles to the RHS to improve merging below.
26144 // shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
26145 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
26146 N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
26147 cast<ShuffleVectorSDNode>(N0)->isSplat() &&
26148 !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
26149 return DAG.getCommutedVectorShuffle(*SVN);
26150 }
26151
26152 // Try to fold according to rules:
26153 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
26154 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
26155 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
26156 // Don't try to fold shuffles with illegal type.
26157 // Only fold if this shuffle is the only user of the other shuffle.
26158 // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
26159 for (int i = 0; i != 2; ++i) {
26160 if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
26161 N->isOnlyUserOf(N->getOperand(i).getNode())) {
26162 // The incoming shuffle must be of the same type as the result of the
26163 // current shuffle.
26164 auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
26165 assert(OtherSV->getOperand(0).getValueType() == VT &&
26166 "Shuffle types don't match");
26167
26168 SDValue SV0, SV1;
26170 if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
26171 SV0, SV1, Mask)) {
26172 // Check if all indices in Mask are Undef. In case, propagate Undef.
26173 if (llvm::all_of(Mask, [](int M) { return M < 0; }))
26174 return DAG.getUNDEF(VT);
26175
26176 return DAG.getVectorShuffle(VT, SDLoc(N),
26177 SV0 ? SV0 : DAG.getUNDEF(VT),
26178 SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
26179 }
26180 }
26181 }
26182
26183 // Merge shuffles through binops if we are able to merge it with at least
26184 // one other shuffles.
26185 // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
26186 // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
26187 unsigned SrcOpcode = N0.getOpcode();
26188 if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
26189 (N1.isUndef() ||
26190 (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
26191 // Get binop source ops, or just pass on the undef.
26192 SDValue Op00 = N0.getOperand(0);
26193 SDValue Op01 = N0.getOperand(1);
26194 SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
26195 SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
26196 // TODO: We might be able to relax the VT check but we don't currently
26197 // have any isBinOp() that has different result/ops VTs so play safe until
26198 // we have test coverage.
26199 if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
26200 Op01.getValueType() == VT && Op11.getValueType() == VT &&
26201 (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
26202 Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
26203 Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
26204 Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
26205 auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
26206 SmallVectorImpl<int> &Mask, bool LeftOp,
26207 bool Commute) {
26208 SDValue InnerN = Commute ? N1 : N0;
26209 SDValue Op0 = LeftOp ? Op00 : Op01;
26210 SDValue Op1 = LeftOp ? Op10 : Op11;
26211 if (Commute)
26212 std::swap(Op0, Op1);
26213 // Only accept the merged shuffle if we don't introduce undef elements,
26214 // or the inner shuffle already contained undef elements.
26215 auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
26216 return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
26217 MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
26218 Mask) &&
26219 (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
26220 llvm::none_of(Mask, [](int M) { return M < 0; }));
26221 };
26222
26223 // Ensure we don't increase the number of shuffles - we must merge a
26224 // shuffle from at least one of the LHS and RHS ops.
26225 bool MergedLeft = false;
26226 SDValue LeftSV0, LeftSV1;
26227 SmallVector<int, 4> LeftMask;
26228 if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
26229 CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
26230 MergedLeft = true;
26231 } else {
26232 LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26233 LeftSV0 = Op00, LeftSV1 = Op10;
26234 }
26235
26236 bool MergedRight = false;
26237 SDValue RightSV0, RightSV1;
26238 SmallVector<int, 4> RightMask;
26239 if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
26240 CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
26241 MergedRight = true;
26242 } else {
26243 RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
26244 RightSV0 = Op01, RightSV1 = Op11;
26245 }
26246
26247 if (MergedLeft || MergedRight) {
26248 SDLoc DL(N);
26250 VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
26251 LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
26253 VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26254 RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26255 return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26256 }
26257 }
26258 }
26259 }
26260
26261 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26262 return V;
26263
26264 // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26265 // Perform this really late, because it could eliminate knowledge
26266 // of undef elements created by this shuffle.
26267 if (Level < AfterLegalizeTypes)
26268 if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26269 LegalOperations))
26270 return V;
26271
26272 return SDValue();
26273}
26274
26275SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26276 EVT VT = N->getValueType(0);
26277 if (!VT.isFixedLengthVector())
26278 return SDValue();
26279
26280 // Try to convert a scalar binop with an extracted vector element to a vector
26281 // binop. This is intended to reduce potentially expensive register moves.
26282 // TODO: Check if both operands are extracted.
26283 // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26284 // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26285 SDValue Scalar = N->getOperand(0);
26286 unsigned Opcode = Scalar.getOpcode();
26287 EVT VecEltVT = VT.getScalarType();
26288 if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26289 TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26290 Scalar.getOperand(0).getValueType() == VecEltVT &&
26291 Scalar.getOperand(1).getValueType() == VecEltVT &&
26292 Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26293 Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26294 DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26295 // Match an extract element and get a shuffle mask equivalent.
26296 SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26297
26298 for (int i : {0, 1}) {
26299 // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26300 // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26301 SDValue EE = Scalar.getOperand(i);
26302 auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26303 if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26304 EE.getOperand(0).getValueType() == VT &&
26305 isa<ConstantSDNode>(EE.getOperand(1))) {
26306 // Mask = {ExtractIndex, undef, undef....}
26307 ShufMask[0] = EE.getConstantOperandVal(1);
26308 // Make sure the shuffle is legal if we are crossing lanes.
26309 if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26310 SDLoc DL(N);
26311 SDValue V[] = {EE.getOperand(0),
26312 DAG.getConstant(C->getAPIntValue(), DL, VT)};
26313 SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26314 return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26315 ShufMask);
26316 }
26317 }
26318 }
26319 }
26320
26321 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26322 // with a VECTOR_SHUFFLE and possible truncate.
26323 if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26324 !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26325 return SDValue();
26326
26327 // If we have an implicit truncate, truncate here if it is legal.
26328 if (VecEltVT != Scalar.getValueType() &&
26329 Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26330 SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26331 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26332 }
26333
26334 auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26335 if (!ExtIndexC)
26336 return SDValue();
26337
26338 SDValue SrcVec = Scalar.getOperand(0);
26339 EVT SrcVT = SrcVec.getValueType();
26340 unsigned SrcNumElts = SrcVT.getVectorNumElements();
26341 unsigned VTNumElts = VT.getVectorNumElements();
26342 if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26343 // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26344 SmallVector<int, 8> Mask(SrcNumElts, -1);
26345 Mask[0] = ExtIndexC->getZExtValue();
26346 SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26347 SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26348 if (!LegalShuffle)
26349 return SDValue();
26350
26351 // If the initial vector is the same size, the shuffle is the result.
26352 if (VT == SrcVT)
26353 return LegalShuffle;
26354
26355 // If not, shorten the shuffled vector.
26356 if (VTNumElts != SrcNumElts) {
26357 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26358 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26359 SrcVT.getVectorElementType(), VTNumElts);
26360 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26361 ZeroIdx);
26362 }
26363 }
26364
26365 return SDValue();
26366}
26367
26368SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26369 EVT VT = N->getValueType(0);
26370 SDValue N0 = N->getOperand(0);
26371 SDValue N1 = N->getOperand(1);
26372 SDValue N2 = N->getOperand(2);
26373 uint64_t InsIdx = N->getConstantOperandVal(2);
26374
26375 // If inserting an UNDEF, just return the original vector.
26376 if (N1.isUndef())
26377 return N0;
26378
26379 // If this is an insert of an extracted vector into an undef vector, we can
26380 // just use the input to the extract if the types match, and can simplify
26381 // in some cases even if they don't.
26382 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26383 N1.getOperand(1) == N2) {
26384 EVT SrcVT = N1.getOperand(0).getValueType();
26385 if (SrcVT == VT)
26386 return N1.getOperand(0);
26387 // TODO: To remove the zero check, need to adjust the offset to
26388 // a multiple of the new src type.
26389 if (isNullConstant(N2) &&
26390 VT.isScalableVector() == SrcVT.isScalableVector()) {
26392 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26393 VT, N0, N1.getOperand(0), N2);
26394 else
26396 VT, N1.getOperand(0), N2);
26397 }
26398 }
26399
26400 // Handle case where we've ended up inserting back into the source vector
26401 // we extracted the subvector from.
26402 // insert_subvector(N0, extract_subvector(N0, N2), N2) --> N0
26403 if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0) == N0 &&
26404 N1.getOperand(1) == N2)
26405 return N0;
26406
26407 // Simplify scalar inserts into an undef vector:
26408 // insert_subvector undef, (splat X), N2 -> splat X
26409 if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
26410 if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
26411 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
26412
26413 // If we are inserting a bitcast value into an undef, with the same
26414 // number of elements, just use the bitcast input of the extract.
26415 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
26416 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
26417 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
26419 N1.getOperand(0).getOperand(1) == N2 &&
26421 VT.getVectorElementCount() &&
26423 VT.getSizeInBits()) {
26424 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
26425 }
26426
26427 // If both N1 and N2 are bitcast values on which insert_subvector
26428 // would makes sense, pull the bitcast through.
26429 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
26430 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
26431 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
26432 SDValue CN0 = N0.getOperand(0);
26433 SDValue CN1 = N1.getOperand(0);
26434 EVT CN0VT = CN0.getValueType();
26435 EVT CN1VT = CN1.getValueType();
26436 if (CN0VT.isVector() && CN1VT.isVector() &&
26437 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
26439 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26440 CN0.getValueType(), CN0, CN1, N2);
26441 return DAG.getBitcast(VT, NewINSERT);
26442 }
26443 }
26444
26445 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
26446 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
26447 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
26448 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26449 N0.getOperand(1).getValueType() == N1.getValueType() &&
26450 N0.getOperand(2) == N2)
26451 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
26452 N1, N2);
26453
26454 // Eliminate an intermediate insert into an undef vector:
26455 // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
26456 // insert_subvector undef, X, 0
26457 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
26458 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
26459 isNullConstant(N2))
26460 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
26461 N1.getOperand(1), N2);
26462
26463 // Push subvector bitcasts to the output, adjusting the index as we go.
26464 // insert_subvector(bitcast(v), bitcast(s), c1)
26465 // -> bitcast(insert_subvector(v, s, c2))
26466 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
26467 N1.getOpcode() == ISD::BITCAST) {
26468 SDValue N0Src = peekThroughBitcasts(N0);
26469 SDValue N1Src = peekThroughBitcasts(N1);
26470 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
26471 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
26472 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
26473 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
26474 EVT NewVT;
26475 SDLoc DL(N);
26476 SDValue NewIdx;
26477 LLVMContext &Ctx = *DAG.getContext();
26478 ElementCount NumElts = VT.getVectorElementCount();
26479 unsigned EltSizeInBits = VT.getScalarSizeInBits();
26480 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
26481 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
26482 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
26483 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
26484 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
26485 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
26486 if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
26487 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
26488 NumElts.divideCoefficientBy(Scale));
26489 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
26490 }
26491 }
26492 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
26493 SDValue Res = DAG.getBitcast(NewVT, N0Src);
26494 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
26495 return DAG.getBitcast(VT, Res);
26496 }
26497 }
26498 }
26499
26500 // Canonicalize insert_subvector dag nodes.
26501 // Example:
26502 // (insert_subvector (insert_subvector A, Idx0), Idx1)
26503 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
26504 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
26505 N1.getValueType() == N0.getOperand(1).getValueType()) {
26506 unsigned OtherIdx = N0.getConstantOperandVal(2);
26507 if (InsIdx < OtherIdx) {
26508 // Swap nodes.
26509 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
26510 N0.getOperand(0), N1, N2);
26511 AddToWorklist(NewOp.getNode());
26512 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
26513 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
26514 }
26515 }
26516
26517 // If the input vector is a concatenation, and the insert replaces
26518 // one of the pieces, we can optimize into a single concat_vectors.
26519 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
26520 N0.getOperand(0).getValueType() == N1.getValueType() &&
26523 unsigned Factor = N1.getValueType().getVectorMinNumElements();
26524 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
26525 Ops[InsIdx / Factor] = N1;
26526 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26527 }
26528
26529 // Simplify source operands based on insertion.
26531 return SDValue(N, 0);
26532
26533 return SDValue();
26534}
26535
26536SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
26537 SDValue N0 = N->getOperand(0);
26538
26539 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
26540 if (N0->getOpcode() == ISD::FP16_TO_FP)
26541 return N0->getOperand(0);
26542
26543 return SDValue();
26544}
26545
26546SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
26547 auto Op = N->getOpcode();
26549 "opcode should be FP16_TO_FP or BF16_TO_FP.");
26550 SDValue N0 = N->getOperand(0);
26551
26552 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
26553 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26554 if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
26556 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
26557 return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
26558 }
26559 }
26560
26561 return SDValue();
26562}
26563
26564SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
26565 SDValue N0 = N->getOperand(0);
26566
26567 // fold (fp_to_bf16 (bf16_to_fp op)) -> op
26568 if (N0->getOpcode() == ISD::BF16_TO_FP)
26569 return N0->getOperand(0);
26570
26571 return SDValue();
26572}
26573
26574SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
26575 // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26576 return visitFP16_TO_FP(N);
26577}
26578
26579SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
26580 SDValue N0 = N->getOperand(0);
26581 EVT VT = N0.getValueType();
26582 unsigned Opcode = N->getOpcode();
26583
26584 // VECREDUCE over 1-element vector is just an extract.
26585 if (VT.getVectorElementCount().isScalar()) {
26586 SDLoc dl(N);
26587 SDValue Res =
26589 DAG.getVectorIdxConstant(0, dl));
26590 if (Res.getValueType() != N->getValueType(0))
26591 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
26592 return Res;
26593 }
26594
26595 // On an boolean vector an and/or reduction is the same as a umin/umax
26596 // reduction. Convert them if the latter is legal while the former isn't.
26597 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
26598 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
26600 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
26601 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
26603 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
26604 }
26605
26606 // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
26607 // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
26608 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26609 TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
26610 SDValue Vec = N0.getOperand(0);
26611 SDValue Subvec = N0.getOperand(1);
26612 if ((Opcode == ISD::VECREDUCE_OR &&
26613 (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
26614 (Opcode == ISD::VECREDUCE_AND &&
26615 (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
26616 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
26617 }
26618
26619 return SDValue();
26620}
26621
26622SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
26623 SelectionDAG::FlagInserter FlagsInserter(DAG, N);
26624
26625 // FSUB -> FMA combines:
26626 if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
26627 AddToWorklist(Fused.getNode());
26628 return Fused;
26629 }
26630 return SDValue();
26631}
26632
26633SDValue DAGCombiner::visitVPOp(SDNode *N) {
26634
26635 if (N->getOpcode() == ISD::VP_GATHER)
26636 if (SDValue SD = visitVPGATHER(N))
26637 return SD;
26638
26639 if (N->getOpcode() == ISD::VP_SCATTER)
26640 if (SDValue SD = visitVPSCATTER(N))
26641 return SD;
26642
26643 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
26644 if (SDValue SD = visitVP_STRIDED_LOAD(N))
26645 return SD;
26646
26647 if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26648 if (SDValue SD = visitVP_STRIDED_STORE(N))
26649 return SD;
26650
26651 // VP operations in which all vector elements are disabled - either by
26652 // determining that the mask is all false or that the EVL is 0 - can be
26653 // eliminated.
26654 bool AreAllEltsDisabled = false;
26655 if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
26656 AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
26657 if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
26658 AreAllEltsDisabled |=
26659 ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
26660
26661 // This is the only generic VP combine we support for now.
26662 if (!AreAllEltsDisabled) {
26663 switch (N->getOpcode()) {
26664 case ISD::VP_FADD:
26665 return visitVP_FADD(N);
26666 case ISD::VP_FSUB:
26667 return visitVP_FSUB(N);
26668 case ISD::VP_FMA:
26669 return visitFMA<VPMatchContext>(N);
26670 case ISD::VP_SELECT:
26671 return visitVP_SELECT(N);
26672 }
26673 return SDValue();
26674 }
26675
26676 // Binary operations can be replaced by UNDEF.
26677 if (ISD::isVPBinaryOp(N->getOpcode()))
26678 return DAG.getUNDEF(N->getValueType(0));
26679
26680 // VP Memory operations can be replaced by either the chain (stores) or the
26681 // chain + undef (loads).
26682 if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
26683 if (MemSD->writeMem())
26684 return MemSD->getChain();
26685 return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
26686 }
26687
26688 // Reduction operations return the start operand when no elements are active.
26689 if (ISD::isVPReduction(N->getOpcode()))
26690 return N->getOperand(0);
26691
26692 return SDValue();
26693}
26694
26695SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
26696 SDValue Chain = N->getOperand(0);
26697 SDValue Ptr = N->getOperand(1);
26698 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26699
26700 // Check if the memory, where FP state is written to, is used only in a single
26701 // load operation.
26702 LoadSDNode *LdNode = nullptr;
26703 for (auto *U : Ptr->uses()) {
26704 if (U == N)
26705 continue;
26706 if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
26707 if (LdNode && LdNode != Ld)
26708 return SDValue();
26709 LdNode = Ld;
26710 continue;
26711 }
26712 return SDValue();
26713 }
26714 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26715 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26717 return SDValue();
26718
26719 // Check if the loaded value is used only in a store operation.
26720 StoreSDNode *StNode = nullptr;
26721 for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
26722 SDUse &U = I.getUse();
26723 if (U.getResNo() == 0) {
26724 if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
26725 if (StNode)
26726 return SDValue();
26727 StNode = St;
26728 } else {
26729 return SDValue();
26730 }
26731 }
26732 }
26733 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26734 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26735 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26736 return SDValue();
26737
26738 // Create new node GET_FPENV_MEM, which uses the store address to write FP
26739 // environment.
26740 SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
26741 StNode->getMemOperand());
26742 CombineTo(StNode, Res, false);
26743 return Res;
26744}
26745
26746SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
26747 SDValue Chain = N->getOperand(0);
26748 SDValue Ptr = N->getOperand(1);
26749 EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26750
26751 // Check if the address of FP state is used also in a store operation only.
26752 StoreSDNode *StNode = nullptr;
26753 for (auto *U : Ptr->uses()) {
26754 if (U == N)
26755 continue;
26756 if (auto *St = dyn_cast<StoreSDNode>(U)) {
26757 if (StNode && StNode != St)
26758 return SDValue();
26759 StNode = St;
26760 continue;
26761 }
26762 return SDValue();
26763 }
26764 if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26765 !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26766 !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
26767 return SDValue();
26768
26769 // Check if the stored value is loaded from some location and the loaded
26770 // value is used only in the store operation.
26771 SDValue StValue = StNode->getValue();
26772 auto *LdNode = dyn_cast<LoadSDNode>(StValue);
26773 if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26774 !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26775 !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26776 return SDValue();
26777
26778 // Create new node SET_FPENV_MEM, which uses the load address to read FP
26779 // environment.
26780 SDValue Res =
26781 DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
26782 LdNode->getMemOperand());
26783 return Res;
26784}
26785
26786/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
26787/// with the destination vector and a zero vector.
26788/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
26789/// vector_shuffle V, Zero, <0, 4, 2, 4>
26790SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
26791 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
26792
26793 EVT VT = N->getValueType(0);
26794 SDValue LHS = N->getOperand(0);
26795 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
26796 SDLoc DL(N);
26797
26798 // Make sure we're not running after operation legalization where it
26799 // may have custom lowered the vector shuffles.
26800 if (LegalOperations)
26801 return SDValue();
26802
26803 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
26804 return SDValue();
26805
26806 EVT RVT = RHS.getValueType();
26807 unsigned NumElts = RHS.getNumOperands();
26808
26809 // Attempt to create a valid clear mask, splitting the mask into
26810 // sub elements and checking to see if each is
26811 // all zeros or all ones - suitable for shuffle masking.
26812 auto BuildClearMask = [&](int Split) {
26813 int NumSubElts = NumElts * Split;
26814 int NumSubBits = RVT.getScalarSizeInBits() / Split;
26815
26816 SmallVector<int, 8> Indices;
26817 for (int i = 0; i != NumSubElts; ++i) {
26818 int EltIdx = i / Split;
26819 int SubIdx = i % Split;
26820 SDValue Elt = RHS.getOperand(EltIdx);
26821 // X & undef --> 0 (not undef). So this lane must be converted to choose
26822 // from the zero constant vector (same as if the element had all 0-bits).
26823 if (Elt.isUndef()) {
26824 Indices.push_back(i + NumSubElts);
26825 continue;
26826 }
26827
26828 APInt Bits;
26829 if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
26830 Bits = Cst->getAPIntValue();
26831 else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
26832 Bits = CstFP->getValueAPF().bitcastToAPInt();
26833 else
26834 return SDValue();
26835
26836 // Extract the sub element from the constant bit mask.
26837 if (DAG.getDataLayout().isBigEndian())
26838 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
26839 else
26840 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
26841
26842 if (Bits.isAllOnes())
26843 Indices.push_back(i);
26844 else if (Bits == 0)
26845 Indices.push_back(i + NumSubElts);
26846 else
26847 return SDValue();
26848 }
26849
26850 // Let's see if the target supports this vector_shuffle.
26851 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
26852 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
26853 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
26854 return SDValue();
26855
26856 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
26857 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
26858 DAG.getBitcast(ClearVT, LHS),
26859 Zero, Indices));
26860 };
26861
26862 // Determine maximum split level (byte level masking).
26863 int MaxSplit = 1;
26864 if (RVT.getScalarSizeInBits() % 8 == 0)
26865 MaxSplit = RVT.getScalarSizeInBits() / 8;
26866
26867 for (int Split = 1; Split <= MaxSplit; ++Split)
26868 if (RVT.getScalarSizeInBits() % Split == 0)
26869 if (SDValue S = BuildClearMask(Split))
26870 return S;
26871
26872 return SDValue();
26873}
26874
26875/// If a vector binop is performed on splat values, it may be profitable to
26876/// extract, scalarize, and insert/splat.
26878 const SDLoc &DL) {
26879 SDValue N0 = N->getOperand(0);
26880 SDValue N1 = N->getOperand(1);
26881 unsigned Opcode = N->getOpcode();
26882 EVT VT = N->getValueType(0);
26883 EVT EltVT = VT.getVectorElementType();
26884 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26885
26886 // TODO: Remove/replace the extract cost check? If the elements are available
26887 // as scalars, then there may be no extract cost. Should we ask if
26888 // inserting a scalar back into a vector is cheap instead?
26889 int Index0, Index1;
26890 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26891 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26892 // Extract element from splat_vector should be free.
26893 // TODO: use DAG.isSplatValue instead?
26894 bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
26896 if (!Src0 || !Src1 || Index0 != Index1 ||
26897 Src0.getValueType().getVectorElementType() != EltVT ||
26898 Src1.getValueType().getVectorElementType() != EltVT ||
26899 !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
26900 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
26901 return SDValue();
26902
26903 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26904 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
26905 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
26906 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
26907
26908 // If all lanes but 1 are undefined, no need to splat the scalar result.
26909 // TODO: Keep track of undefs and use that info in the general case.
26910 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
26911 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
26912 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
26913 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
26914 // build_vec ..undef, (bo X, Y), undef...
26916 Ops[Index0] = ScalarBO;
26917 return DAG.getBuildVector(VT, DL, Ops);
26918 }
26919
26920 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
26921 return DAG.getSplat(VT, DL, ScalarBO);
26922}
26923
26924/// Visit a vector cast operation, like FP_EXTEND.
26925SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
26926 EVT VT = N->getValueType(0);
26927 assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
26928 EVT EltVT = VT.getVectorElementType();
26929 unsigned Opcode = N->getOpcode();
26930
26931 SDValue N0 = N->getOperand(0);
26932 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26933
26934 // TODO: promote operation might be also good here?
26935 int Index0;
26936 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26937 if (Src0 &&
26938 (N0.getOpcode() == ISD::SPLAT_VECTOR ||
26939 TLI.isExtractVecEltCheap(VT, Index0)) &&
26940 TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
26941 TLI.preferScalarizeSplat(N)) {
26942 EVT SrcVT = N0.getValueType();
26943 EVT SrcEltVT = SrcVT.getVectorElementType();
26944 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26945 SDValue Elt =
26946 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
26947 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
26948 if (VT.isScalableVector())
26949 return DAG.getSplatVector(VT, DL, ScalarBO);
26951 return DAG.getBuildVector(VT, DL, Ops);
26952 }
26953
26954 return SDValue();
26955}
26956
26957/// Visit a binary vector operation, like ADD.
26958SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
26959 EVT VT = N->getValueType(0);
26960 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
26961
26962 SDValue LHS = N->getOperand(0);
26963 SDValue RHS = N->getOperand(1);
26964 unsigned Opcode = N->getOpcode();
26965 SDNodeFlags Flags = N->getFlags();
26966
26967 // Move unary shuffles with identical masks after a vector binop:
26968 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
26969 // --> shuffle (VBinOp A, B), Undef, Mask
26970 // This does not require type legality checks because we are creating the
26971 // same types of operations that are in the original sequence. We do have to
26972 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
26973 // though. This code is adapted from the identical transform in instcombine.
26974 if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
26975 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
26976 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
26977 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
26978 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
26979 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
26980 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
26981 RHS.getOperand(0), Flags);
26982 SDValue UndefV = LHS.getOperand(1);
26983 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
26984 }
26985
26986 // Try to sink a splat shuffle after a binop with a uniform constant.
26987 // This is limited to cases where neither the shuffle nor the constant have
26988 // undefined elements because that could be poison-unsafe or inhibit
26989 // demanded elements analysis. It is further limited to not change a splat
26990 // of an inserted scalar because that may be optimized better by
26991 // load-folding or other target-specific behaviors.
26992 if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
26993 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
26994 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26995 // binop (splat X), (splat C) --> splat (binop X, C)
26996 SDValue X = Shuf0->getOperand(0);
26997 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
26998 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26999 Shuf0->getMask());
27000 }
27001 if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
27002 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
27003 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
27004 // binop (splat C), (splat X) --> splat (binop C, X)
27005 SDValue X = Shuf1->getOperand(0);
27006 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
27007 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
27008 Shuf1->getMask());
27009 }
27010 }
27011
27012 // The following pattern is likely to emerge with vector reduction ops. Moving
27013 // the binary operation ahead of insertion may allow using a narrower vector
27014 // instruction that has better performance than the wide version of the op:
27015 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
27016 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
27017 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
27018 LHS.getOperand(2) == RHS.getOperand(2) &&
27019 (LHS.hasOneUse() || RHS.hasOneUse())) {
27020 SDValue X = LHS.getOperand(1);
27021 SDValue Y = RHS.getOperand(1);
27022 SDValue Z = LHS.getOperand(2);
27023 EVT NarrowVT = X.getValueType();
27024 if (NarrowVT == Y.getValueType() &&
27025 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
27026 LegalOperations)) {
27027 // (binop undef, undef) may not return undef, so compute that result.
27028 SDValue VecC =
27029 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
27030 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
27031 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
27032 }
27033 }
27034
27035 // Make sure all but the first op are undef or constant.
27036 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
27037 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
27038 all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
27039 return Op.isUndef() ||
27040 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
27041 });
27042 };
27043
27044 // The following pattern is likely to emerge with vector reduction ops. Moving
27045 // the binary operation ahead of the concat may allow using a narrower vector
27046 // instruction that has better performance than the wide version of the op:
27047 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
27048 // concat (VBinOp X, Y), VecC
27049 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
27050 (LHS.hasOneUse() || RHS.hasOneUse())) {
27051 EVT NarrowVT = LHS.getOperand(0).getValueType();
27052 if (NarrowVT == RHS.getOperand(0).getValueType() &&
27053 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
27054 unsigned NumOperands = LHS.getNumOperands();
27055 SmallVector<SDValue, 4> ConcatOps;
27056 for (unsigned i = 0; i != NumOperands; ++i) {
27057 // This constant fold for operands 1 and up.
27058 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
27059 RHS.getOperand(i)));
27060 }
27061
27062 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
27063 }
27064 }
27065
27066 if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
27067 return V;
27068
27069 return SDValue();
27070}
27071
27072SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
27073 SDValue N2) {
27074 assert(N0.getOpcode() == ISD::SETCC &&
27075 "First argument must be a SetCC node!");
27076
27077 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
27078 cast<CondCodeSDNode>(N0.getOperand(2))->get());
27079
27080 // If we got a simplified select_cc node back from SimplifySelectCC, then
27081 // break it down into a new SETCC node, and a new SELECT node, and then return
27082 // the SELECT node, since we were called with a SELECT node.
27083 if (SCC.getNode()) {
27084 // Check to see if we got a select_cc back (to turn into setcc/select).
27085 // Otherwise, just return whatever node we got back, like fabs.
27086 if (SCC.getOpcode() == ISD::SELECT_CC) {
27087 const SDNodeFlags Flags = N0->getFlags();
27089 N0.getValueType(),
27090 SCC.getOperand(0), SCC.getOperand(1),
27091 SCC.getOperand(4), Flags);
27092 AddToWorklist(SETCC.getNode());
27093 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
27094 SCC.getOperand(2), SCC.getOperand(3));
27095 SelectNode->setFlags(Flags);
27096 return SelectNode;
27097 }
27098
27099 return SCC;
27100 }
27101 return SDValue();
27102}
27103
27104/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
27105/// being selected between, see if we can simplify the select. Callers of this
27106/// should assume that TheSelect is deleted if this returns true. As such, they
27107/// should return the appropriate thing (e.g. the node) back to the top-level of
27108/// the DAG combiner loop to avoid it being looked at.
27109bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
27110 SDValue RHS) {
27111 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27112 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
27113 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
27114 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
27115 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
27116 SDValue Sqrt = RHS;
27118 SDValue CmpLHS;
27119 const ConstantFPSDNode *Zero = nullptr;
27120
27121 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
27122 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
27123 CmpLHS = TheSelect->getOperand(0);
27124 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
27125 } else {
27126 // SELECT or VSELECT
27127 SDValue Cmp = TheSelect->getOperand(0);
27128 if (Cmp.getOpcode() == ISD::SETCC) {
27129 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
27130 CmpLHS = Cmp.getOperand(0);
27131 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
27132 }
27133 }
27134 if (Zero && Zero->isZero() &&
27135 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
27136 CC == ISD::SETULT || CC == ISD::SETLT)) {
27137 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
27138 CombineTo(TheSelect, Sqrt);
27139 return true;
27140 }
27141 }
27142 }
27143 // Cannot simplify select with vector condition
27144 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
27145
27146 // If this is a select from two identical things, try to pull the operation
27147 // through the select.
27148 if (LHS.getOpcode() != RHS.getOpcode() ||
27149 !LHS.hasOneUse() || !RHS.hasOneUse())
27150 return false;
27151
27152 // If this is a load and the token chain is identical, replace the select
27153 // of two loads with a load through a select of the address to load from.
27154 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
27155 // constants have been dropped into the constant pool.
27156 if (LHS.getOpcode() == ISD::LOAD) {
27157 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
27158 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
27159
27160 // Token chains must be identical.
27161 if (LHS.getOperand(0) != RHS.getOperand(0) ||
27162 // Do not let this transformation reduce the number of volatile loads.
27163 // Be conservative for atomics for the moment
27164 // TODO: This does appear to be legal for unordered atomics (see D66309)
27165 !LLD->isSimple() || !RLD->isSimple() ||
27166 // FIXME: If either is a pre/post inc/dec load,
27167 // we'd need to split out the address adjustment.
27168 LLD->isIndexed() || RLD->isIndexed() ||
27169 // If this is an EXTLOAD, the VT's must match.
27170 LLD->getMemoryVT() != RLD->getMemoryVT() ||
27171 // If this is an EXTLOAD, the kind of extension must match.
27172 (LLD->getExtensionType() != RLD->getExtensionType() &&
27173 // The only exception is if one of the extensions is anyext.
27174 LLD->getExtensionType() != ISD::EXTLOAD &&
27175 RLD->getExtensionType() != ISD::EXTLOAD) ||
27176 // FIXME: this discards src value information. This is
27177 // over-conservative. It would be beneficial to be able to remember
27178 // both potential memory locations. Since we are discarding
27179 // src value info, don't do the transformation if the memory
27180 // locations are not in the default address space.
27181 LLD->getPointerInfo().getAddrSpace() != 0 ||
27182 RLD->getPointerInfo().getAddrSpace() != 0 ||
27183 // We can't produce a CMOV of a TargetFrameIndex since we won't
27184 // generate the address generation required.
27187 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
27188 LLD->getBasePtr().getValueType()))
27189 return false;
27190
27191 // The loads must not depend on one another.
27192 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
27193 return false;
27194
27195 // Check that the select condition doesn't reach either load. If so,
27196 // folding this will induce a cycle into the DAG. If not, this is safe to
27197 // xform, so create a select of the addresses.
27198
27201
27202 // Always fail if LLD and RLD are not independent. TheSelect is a
27203 // predecessor to all Nodes in question so we need not search past it.
27204
27205 Visited.insert(TheSelect);
27206 Worklist.push_back(LLD);
27207 Worklist.push_back(RLD);
27208
27209 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
27210 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
27211 return false;
27212
27213 SDValue Addr;
27214 if (TheSelect->getOpcode() == ISD::SELECT) {
27215 // We cannot do this optimization if any pair of {RLD, LLD} is a
27216 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
27217 // Loads, we only need to check if CondNode is a successor to one of the
27218 // loads. We can further avoid this if there's no use of their chain
27219 // value.
27220 SDNode *CondNode = TheSelect->getOperand(0).getNode();
27221 Worklist.push_back(CondNode);
27222
27223 if ((LLD->hasAnyUseOfValue(1) &&
27224 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27225 (RLD->hasAnyUseOfValue(1) &&
27226 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27227 return false;
27228
27229 Addr = DAG.getSelect(SDLoc(TheSelect),
27230 LLD->getBasePtr().getValueType(),
27231 TheSelect->getOperand(0), LLD->getBasePtr(),
27232 RLD->getBasePtr());
27233 } else { // Otherwise SELECT_CC
27234 // We cannot do this optimization if any pair of {RLD, LLD} is a
27235 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
27236 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
27237 // one of the loads. We can further avoid this if there's no use of their
27238 // chain value.
27239
27240 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
27241 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
27242 Worklist.push_back(CondLHS);
27243 Worklist.push_back(CondRHS);
27244
27245 if ((LLD->hasAnyUseOfValue(1) &&
27246 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
27247 (RLD->hasAnyUseOfValue(1) &&
27248 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
27249 return false;
27250
27251 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
27252 LLD->getBasePtr().getValueType(),
27253 TheSelect->getOperand(0),
27254 TheSelect->getOperand(1),
27255 LLD->getBasePtr(), RLD->getBasePtr(),
27256 TheSelect->getOperand(4));
27257 }
27258
27259 SDValue Load;
27260 // It is safe to replace the two loads if they have different alignments,
27261 // but the new load must be the minimum (most restrictive) alignment of the
27262 // inputs.
27263 Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27264 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27265 if (!RLD->isInvariant())
27266 MMOFlags &= ~MachineMemOperand::MOInvariant;
27267 if (!RLD->isDereferenceable())
27268 MMOFlags &= ~MachineMemOperand::MODereferenceable;
27269 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27270 // FIXME: Discards pointer and AA info.
27271 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27272 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27273 MMOFlags);
27274 } else {
27275 // FIXME: Discards pointer and AA info.
27276 Load = DAG.getExtLoad(
27278 : LLD->getExtensionType(),
27279 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27280 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27281 }
27282
27283 // Users of the select now use the result of the load.
27284 CombineTo(TheSelect, Load);
27285
27286 // Users of the old loads now use the new load's chain. We know the
27287 // old-load value is dead now.
27288 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27289 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27290 return true;
27291 }
27292
27293 return false;
27294}
27295
27296/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27297/// bitwise 'and'.
27298SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27299 SDValue N1, SDValue N2, SDValue N3,
27300 ISD::CondCode CC) {
27301 // If this is a select where the false operand is zero and the compare is a
27302 // check of the sign bit, see if we can perform the "gzip trick":
27303 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27304 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27305 EVT XType = N0.getValueType();
27306 EVT AType = N2.getValueType();
27307 if (!isNullConstant(N3) || !XType.bitsGE(AType))
27308 return SDValue();
27309
27310 // If the comparison is testing for a positive value, we have to invert
27311 // the sign bit mask, so only do that transform if the target has a bitwise
27312 // 'and not' instruction (the invert is free).
27313 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27314 // (X > -1) ? A : 0
27315 // (X > 0) ? X : 0 <-- This is canonical signed max.
27316 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27317 return SDValue();
27318 } else if (CC == ISD::SETLT) {
27319 // (X < 0) ? A : 0
27320 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
27321 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27322 return SDValue();
27323 } else {
27324 return SDValue();
27325 }
27326
27327 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27328 // constant.
27329 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
27330 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27331 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27332 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27333 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27334 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27335 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27336 AddToWorklist(Shift.getNode());
27337
27338 if (XType.bitsGT(AType)) {
27339 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27340 AddToWorklist(Shift.getNode());
27341 }
27342
27343 if (CC == ISD::SETGT)
27344 Shift = DAG.getNOT(DL, Shift, AType);
27345
27346 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27347 }
27348 }
27349
27350 unsigned ShCt = XType.getSizeInBits() - 1;
27351 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27352 return SDValue();
27353
27354 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27355 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27356 AddToWorklist(Shift.getNode());
27357
27358 if (XType.bitsGT(AType)) {
27359 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27360 AddToWorklist(Shift.getNode());
27361 }
27362
27363 if (CC == ISD::SETGT)
27364 Shift = DAG.getNOT(DL, Shift, AType);
27365
27366 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27367}
27368
27369// Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27370SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27371 SDValue N0 = N->getOperand(0);
27372 SDValue N1 = N->getOperand(1);
27373 SDValue N2 = N->getOperand(2);
27374 SDLoc DL(N);
27375
27376 unsigned BinOpc = N1.getOpcode();
27377 if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
27378 (N1.getResNo() != N2.getResNo()))
27379 return SDValue();
27380
27381 // The use checks are intentionally on SDNode because we may be dealing
27382 // with opcodes that produce more than one SDValue.
27383 // TODO: Do we really need to check N0 (the condition operand of the select)?
27384 // But removing that clause could cause an infinite loop...
27385 if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
27386 return SDValue();
27387
27388 // Binops may include opcodes that return multiple values, so all values
27389 // must be created/propagated from the newly created binops below.
27390 SDVTList OpVTs = N1->getVTList();
27391
27392 // Fold select(cond, binop(x, y), binop(z, y))
27393 // --> binop(select(cond, x, z), y)
27394 if (N1.getOperand(1) == N2.getOperand(1)) {
27395 SDValue N10 = N1.getOperand(0);
27396 SDValue N20 = N2.getOperand(0);
27397 SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
27398 SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
27399 NewBinOp->setFlags(N1->getFlags());
27400 NewBinOp->intersectFlagsWith(N2->getFlags());
27401 return SDValue(NewBinOp.getNode(), N1.getResNo());
27402 }
27403
27404 // Fold select(cond, binop(x, y), binop(x, z))
27405 // --> binop(x, select(cond, y, z))
27406 if (N1.getOperand(0) == N2.getOperand(0)) {
27407 SDValue N11 = N1.getOperand(1);
27408 SDValue N21 = N2.getOperand(1);
27409 // Second op VT might be different (e.g. shift amount type)
27410 if (N11.getValueType() == N21.getValueType()) {
27411 SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
27412 SDValue NewBinOp =
27413 DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
27414 NewBinOp->setFlags(N1->getFlags());
27415 NewBinOp->intersectFlagsWith(N2->getFlags());
27416 return SDValue(NewBinOp.getNode(), N1.getResNo());
27417 }
27418 }
27419
27420 // TODO: Handle isCommutativeBinOp patterns as well?
27421 return SDValue();
27422}
27423
27424// Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
27425SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
27426 SDValue N0 = N->getOperand(0);
27427 EVT VT = N->getValueType(0);
27428 bool IsFabs = N->getOpcode() == ISD::FABS;
27429 bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
27430
27431 if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
27432 return SDValue();
27433
27434 SDValue Int = N0.getOperand(0);
27435 EVT IntVT = Int.getValueType();
27436
27437 // The operand to cast should be integer.
27438 if (!IntVT.isInteger() || IntVT.isVector())
27439 return SDValue();
27440
27441 // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
27442 // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
27443 APInt SignMask;
27444 if (N0.getValueType().isVector()) {
27445 // For vector, create a sign mask (0x80...) or its inverse (for fabs,
27446 // 0x7f...) per element and splat it.
27448 if (IsFabs)
27449 SignMask = ~SignMask;
27450 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
27451 } else {
27452 // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
27453 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
27454 if (IsFabs)
27455 SignMask = ~SignMask;
27456 }
27457 SDLoc DL(N0);
27458 Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
27459 DAG.getConstant(SignMask, DL, IntVT));
27460 AddToWorklist(Int.getNode());
27461 return DAG.getBitcast(VT, Int);
27462}
27463
27464/// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
27465/// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
27466/// in it. This may be a win when the constant is not otherwise available
27467/// because it replaces two constant pool loads with one.
27468SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
27469 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
27470 ISD::CondCode CC) {
27472 return SDValue();
27473
27474 // If we are before legalize types, we want the other legalization to happen
27475 // first (for example, to avoid messing with soft float).
27476 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
27477 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
27478 EVT VT = N2.getValueType();
27479 if (!TV || !FV || !TLI.isTypeLegal(VT))
27480 return SDValue();
27481
27482 // If a constant can be materialized without loads, this does not make sense.
27484 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
27485 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
27486 return SDValue();
27487
27488 // If both constants have multiple uses, then we won't need to do an extra
27489 // load. The values are likely around in registers for other users.
27490 if (!TV->hasOneUse() && !FV->hasOneUse())
27491 return SDValue();
27492
27493 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
27494 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
27495 Type *FPTy = Elts[0]->getType();
27496 const DataLayout &TD = DAG.getDataLayout();
27497
27498 // Create a ConstantArray of the two constants.
27499 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
27500 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
27501 TD.getPrefTypeAlign(FPTy));
27502 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
27503
27504 // Get offsets to the 0 and 1 elements of the array, so we can select between
27505 // them.
27506 SDValue Zero = DAG.getIntPtrConstant(0, DL);
27507 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
27508 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
27509 SDValue Cond =
27510 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
27511 AddToWorklist(Cond.getNode());
27512 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
27513 AddToWorklist(CstOffset.getNode());
27514 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
27515 AddToWorklist(CPIdx.getNode());
27516 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
27518 DAG.getMachineFunction()), Alignment);
27519}
27520
27521/// Simplify an expression of the form (N0 cond N1) ? N2 : N3
27522/// where 'cond' is the comparison specified by CC.
27523SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
27525 bool NotExtCompare) {
27526 // (x ? y : y) -> y.
27527 if (N2 == N3) return N2;
27528
27529 EVT CmpOpVT = N0.getValueType();
27530 EVT CmpResVT = getSetCCResultType(CmpOpVT);
27531 EVT VT = N2.getValueType();
27532 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
27533 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27534 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
27535
27536 // Determine if the condition we're dealing with is constant.
27537 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
27538 AddToWorklist(SCC.getNode());
27539 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
27540 // fold select_cc true, x, y -> x
27541 // fold select_cc false, x, y -> y
27542 return !(SCCC->isZero()) ? N2 : N3;
27543 }
27544 }
27545
27546 if (SDValue V =
27547 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
27548 return V;
27549
27550 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
27551 return V;
27552
27553 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
27554 // where y is has a single bit set.
27555 // A plaintext description would be, we can turn the SELECT_CC into an AND
27556 // when the condition can be materialized as an all-ones register. Any
27557 // single bit-test can be materialized as an all-ones register with
27558 // shift-left and shift-right-arith.
27559 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
27560 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
27561 SDValue AndLHS = N0->getOperand(0);
27562 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
27563 if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
27564 // Shift the tested bit over the sign bit.
27565 const APInt &AndMask = ConstAndRHS->getAPIntValue();
27566 if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27567 unsigned ShCt = AndMask.getBitWidth() - 1;
27568 SDValue ShlAmt =
27569 DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
27570 getShiftAmountTy(AndLHS.getValueType()));
27571 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
27572
27573 // Now arithmetic right shift it all the way over, so the result is
27574 // either all-ones, or zero.
27575 SDValue ShrAmt =
27576 DAG.getConstant(ShCt, SDLoc(Shl),
27578 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
27579
27580 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
27581 }
27582 }
27583 }
27584
27585 // fold select C, 16, 0 -> shl C, 4
27586 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
27587 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
27588
27589 if ((Fold || Swap) &&
27590 TLI.getBooleanContents(CmpOpVT) ==
27592 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27593
27594 if (Swap) {
27595 CC = ISD::getSetCCInverse(CC, CmpOpVT);
27596 std::swap(N2C, N3C);
27597 }
27598
27599 // If the caller doesn't want us to simplify this into a zext of a compare,
27600 // don't do it.
27601 if (NotExtCompare && N2C->isOne())
27602 return SDValue();
27603
27604 SDValue Temp, SCC;
27605 // zext (setcc n0, n1)
27606 if (LegalTypes) {
27607 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
27608 Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
27609 } else {
27610 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
27611 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
27612 }
27613
27614 AddToWorklist(SCC.getNode());
27615 AddToWorklist(Temp.getNode());
27616
27617 if (N2C->isOne())
27618 return Temp;
27619
27620 unsigned ShCt = N2C->getAPIntValue().logBase2();
27621 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
27622 return SDValue();
27623
27624 // shl setcc result by log2 n2c
27625 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
27626 DAG.getConstant(ShCt, SDLoc(Temp),
27628 }
27629
27630 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
27631 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
27632 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
27633 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
27634 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
27635 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
27636 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
27637 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
27638 if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
27639 SDValue ValueOnZero = N2;
27640 SDValue Count = N3;
27641 // If the condition is NE instead of E, swap the operands.
27642 if (CC == ISD::SETNE)
27643 std::swap(ValueOnZero, Count);
27644 // Check if the value on zero is a constant equal to the bits in the type.
27645 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
27646 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
27647 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
27648 // legal, combine to just cttz.
27649 if ((Count.getOpcode() == ISD::CTTZ ||
27650 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
27651 N0 == Count.getOperand(0) &&
27652 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
27653 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
27654 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
27655 // legal, combine to just ctlz.
27656 if ((Count.getOpcode() == ISD::CTLZ ||
27657 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
27658 N0 == Count.getOperand(0) &&
27659 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
27660 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
27661 }
27662 }
27663 }
27664
27665 // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
27666 // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
27667 if (!NotExtCompare && N1C && N2C && N3C &&
27668 N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
27669 ((N1C->isAllOnes() && CC == ISD::SETGT) ||
27670 (N1C->isZero() && CC == ISD::SETLT)) &&
27671 !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
27672 SDValue ASR = DAG.getNode(
27673 ISD::SRA, DL, CmpOpVT, N0,
27674 DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
27675 return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
27676 DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
27677 }
27678
27679 if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27680 return S;
27681 if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27682 return S;
27683
27684 return SDValue();
27685}
27686
27687/// This is a stub for TargetLowering::SimplifySetCC.
27688SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
27689 ISD::CondCode Cond, const SDLoc &DL,
27690 bool foldBooleans) {
27692 DagCombineInfo(DAG, Level, false, this);
27693 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
27694}
27695
27696/// Given an ISD::SDIV node expressing a divide by constant, return
27697/// a DAG expression to select that will generate the same value by multiplying
27698/// by a magic number.
27699/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27700SDValue DAGCombiner::BuildSDIV(SDNode *N) {
27701 // when optimising for minimum size, we don't want to expand a div to a mul
27702 // and a shift.
27704 return SDValue();
27705
27707 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
27708 for (SDNode *N : Built)
27709 AddToWorklist(N);
27710 return S;
27711 }
27712
27713 return SDValue();
27714}
27715
27716/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
27717/// DAG expression that will generate the same value by right shifting.
27718SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
27719 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27720 if (!C)
27721 return SDValue();
27722
27723 // Avoid division by zero.
27724 if (C->isZero())
27725 return SDValue();
27726
27728 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
27729 for (SDNode *N : Built)
27730 AddToWorklist(N);
27731 return S;
27732 }
27733
27734 return SDValue();
27735}
27736
27737/// Given an ISD::UDIV node expressing a divide by constant, return a DAG
27738/// expression that will generate the same value by multiplying by a magic
27739/// number.
27740/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27741SDValue DAGCombiner::BuildUDIV(SDNode *N) {
27742 // when optimising for minimum size, we don't want to expand a div to a mul
27743 // and a shift.
27745 return SDValue();
27746
27748 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
27749 for (SDNode *N : Built)
27750 AddToWorklist(N);
27751 return S;
27752 }
27753
27754 return SDValue();
27755}
27756
27757/// Given an ISD::SREM node expressing a remainder by constant power of 2,
27758/// return a DAG expression that will generate the same value.
27759SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
27760 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27761 if (!C)
27762 return SDValue();
27763
27764 // Avoid division by zero.
27765 if (C->isZero())
27766 return SDValue();
27767
27769 if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
27770 for (SDNode *N : Built)
27771 AddToWorklist(N);
27772 return S;
27773 }
27774
27775 return SDValue();
27776}
27777
27778// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
27779//
27780// Returns the node that represents `Log2(Op)`. This may create a new node. If
27781// we are unable to compute `Log2(Op)` its return `SDValue()`.
27782//
27783// All nodes will be created at `DL` and the output will be of type `VT`.
27784//
27785// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
27786// `AssumeNonZero` if this function should simply assume (not require proving
27787// `Op` is non-zero).
27789 SDValue Op, unsigned Depth,
27790 bool AssumeNonZero) {
27791 assert(VT.isInteger() && "Only integer types are supported!");
27792
27793 auto PeekThroughCastsAndTrunc = [](SDValue V) {
27794 while (true) {
27795 switch (V.getOpcode()) {
27796 case ISD::TRUNCATE:
27797 case ISD::ZERO_EXTEND:
27798 V = V.getOperand(0);
27799 break;
27800 default:
27801 return V;
27802 }
27803 }
27804 };
27805
27806 if (VT.isScalableVector())
27807 return SDValue();
27808
27809 Op = PeekThroughCastsAndTrunc(Op);
27810
27811 // Helper for determining whether a value is a power-2 constant scalar or a
27812 // vector of such elements.
27813 SmallVector<APInt> Pow2Constants;
27814 auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
27815 if (C->isZero() || C->isOpaque())
27816 return false;
27817 // TODO: We may also be able to support negative powers of 2 here.
27818 if (C->getAPIntValue().isPowerOf2()) {
27819 Pow2Constants.emplace_back(C->getAPIntValue());
27820 return true;
27821 }
27822 return false;
27823 };
27824
27825 if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
27826 if (!VT.isVector())
27827 return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
27828 // We need to create a build vector
27829 SmallVector<SDValue> Log2Ops;
27830 for (const APInt &Pow2 : Pow2Constants)
27831 Log2Ops.emplace_back(
27832 DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
27833 return DAG.getBuildVector(VT, DL, Log2Ops);
27834 }
27835
27836 if (Depth >= DAG.MaxRecursionDepth)
27837 return SDValue();
27838
27839 auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
27840 ToCast = PeekThroughCastsAndTrunc(ToCast);
27841 EVT CurVT = ToCast.getValueType();
27842 if (NewVT == CurVT)
27843 return ToCast;
27844
27845 if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
27846 return DAG.getBitcast(NewVT, ToCast);
27847
27848 return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
27849 };
27850
27851 // log2(X << Y) -> log2(X) + Y
27852 if (Op.getOpcode() == ISD::SHL) {
27853 // 1 << Y and X nuw/nsw << Y are all non-zero.
27854 if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
27855 Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
27856 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
27857 Depth + 1, AssumeNonZero))
27858 return DAG.getNode(ISD::ADD, DL, VT, LogX,
27859 CastToVT(VT, Op.getOperand(1)));
27860 }
27861
27862 // c ? X : Y -> c ? Log2(X) : Log2(Y)
27863 if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
27864 Op.hasOneUse()) {
27865 if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
27866 Depth + 1, AssumeNonZero))
27867 if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
27868 Depth + 1, AssumeNonZero))
27869 return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
27870 }
27871
27872 // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
27873 // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
27874 if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
27875 Op.hasOneUse()) {
27876 // Use AssumeNonZero as false here. Otherwise we can hit case where
27877 // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
27878 if (SDValue LogX =
27879 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
27880 /*AssumeNonZero*/ false))
27881 if (SDValue LogY =
27882 takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
27883 /*AssumeNonZero*/ false))
27884 return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
27885 }
27886
27887 return SDValue();
27888}
27889
27890/// Determines the LogBase2 value for a non-null input value using the
27891/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
27892SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
27893 bool KnownNonZero, bool InexpensiveOnly,
27894 std::optional<EVT> OutVT) {
27895 EVT VT = OutVT ? *OutVT : V.getValueType();
27896 SDValue InexpensiveLogBase2 =
27897 takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
27898 if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
27899 return InexpensiveLogBase2;
27900
27901 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
27902 SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
27903 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
27904 return LogBase2;
27905}
27906
27907/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27908/// For the reciprocal, we need to find the zero of the function:
27909/// F(X) = 1/X - A [which has a zero at X = 1/A]
27910/// =>
27911/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
27912/// does not require additional intermediate precision]
27913/// For the last iteration, put numerator N into it to gain more precision:
27914/// Result = N X_i + X_i (N - N A X_i)
27915SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
27916 SDNodeFlags Flags) {
27917 if (LegalDAG)
27918 return SDValue();
27919
27920 // TODO: Handle extended types?
27921 EVT VT = Op.getValueType();
27922 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27923 VT.getScalarType() != MVT::f64)
27924 return SDValue();
27925
27926 // If estimates are explicitly disabled for this function, we're done.
27928 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
27929 if (Enabled == TLI.ReciprocalEstimate::Disabled)
27930 return SDValue();
27931
27932 // Estimates may be explicitly enabled for this type with a custom number of
27933 // refinement steps.
27934 int Iterations = TLI.getDivRefinementSteps(VT, MF);
27935 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
27936 AddToWorklist(Est.getNode());
27937
27938 SDLoc DL(Op);
27939 if (Iterations) {
27940 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
27941
27942 // Newton iterations: Est = Est + Est (N - Arg * Est)
27943 // If this is the last iteration, also multiply by the numerator.
27944 for (int i = 0; i < Iterations; ++i) {
27945 SDValue MulEst = Est;
27946
27947 if (i == Iterations - 1) {
27948 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
27949 AddToWorklist(MulEst.getNode());
27950 }
27951
27952 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
27953 AddToWorklist(NewEst.getNode());
27954
27955 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
27956 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
27957 AddToWorklist(NewEst.getNode());
27958
27959 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27960 AddToWorklist(NewEst.getNode());
27961
27962 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
27963 AddToWorklist(Est.getNode());
27964 }
27965 } else {
27966 // If no iterations are available, multiply with N.
27967 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
27968 AddToWorklist(Est.getNode());
27969 }
27970
27971 return Est;
27972 }
27973
27974 return SDValue();
27975}
27976
27977/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27978/// For the reciprocal sqrt, we need to find the zero of the function:
27979/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27980/// =>
27981/// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
27982/// As a result, we precompute A/2 prior to the iteration loop.
27983SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
27984 unsigned Iterations,
27985 SDNodeFlags Flags, bool Reciprocal) {
27986 EVT VT = Arg.getValueType();
27987 SDLoc DL(Arg);
27988 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
27989
27990 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
27991 // this entire sequence requires only one FP constant.
27992 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
27993 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
27994
27995 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
27996 for (unsigned i = 0; i < Iterations; ++i) {
27997 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
27998 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
27999 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
28000 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
28001 }
28002
28003 // If non-reciprocal square root is requested, multiply the result by Arg.
28004 if (!Reciprocal)
28005 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
28006
28007 return Est;
28008}
28009
28010/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
28011/// For the reciprocal sqrt, we need to find the zero of the function:
28012/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
28013/// =>
28014/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
28015SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
28016 unsigned Iterations,
28017 SDNodeFlags Flags, bool Reciprocal) {
28018 EVT VT = Arg.getValueType();
28019 SDLoc DL(Arg);
28020 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
28021 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
28022
28023 // This routine must enter the loop below to work correctly
28024 // when (Reciprocal == false).
28025 assert(Iterations > 0);
28026
28027 // Newton iterations for reciprocal square root:
28028 // E = (E * -0.5) * ((A * E) * E + -3.0)
28029 for (unsigned i = 0; i < Iterations; ++i) {
28030 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
28031 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
28032 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
28033
28034 // When calculating a square root at the last iteration build:
28035 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
28036 // (notice a common subexpression)
28037 SDValue LHS;
28038 if (Reciprocal || (i + 1) < Iterations) {
28039 // RSQRT: LHS = (E * -0.5)
28040 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
28041 } else {
28042 // SQRT: LHS = (A * E) * -0.5
28043 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
28044 }
28045
28046 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
28047 }
28048
28049 return Est;
28050}
28051
28052/// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
28053/// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
28054/// Op can be zero.
28055SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
28056 bool Reciprocal) {
28057 if (LegalDAG)
28058 return SDValue();
28059
28060 // TODO: Handle extended types?
28061 EVT VT = Op.getValueType();
28062 if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
28063 VT.getScalarType() != MVT::f64)
28064 return SDValue();
28065
28066 // If estimates are explicitly disabled for this function, we're done.
28068 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
28069 if (Enabled == TLI.ReciprocalEstimate::Disabled)
28070 return SDValue();
28071
28072 // Estimates may be explicitly enabled for this type with a custom number of
28073 // refinement steps.
28074 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
28075
28076 bool UseOneConstNR = false;
28077 if (SDValue Est =
28078 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
28079 Reciprocal)) {
28080 AddToWorklist(Est.getNode());
28081
28082 if (Iterations > 0)
28083 Est = UseOneConstNR
28084 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
28085 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
28086 if (!Reciprocal) {
28087 SDLoc DL(Op);
28088 // Try the target specific test first.
28089 SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
28090
28091 // The estimate is now completely wrong if the input was exactly 0.0 or
28092 // possibly a denormal. Force the answer to 0.0 or value provided by
28093 // target for those cases.
28094 Est = DAG.getNode(
28095 Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
28096 Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
28097 }
28098 return Est;
28099 }
28100
28101 return SDValue();
28102}
28103
28104SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28105 return buildSqrtEstimateImpl(Op, Flags, true);
28106}
28107
28108SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
28109 return buildSqrtEstimateImpl(Op, Flags, false);
28110}
28111
28112/// Return true if there is any possibility that the two addresses overlap.
28113bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
28114
28115 struct MemUseCharacteristics {
28116 bool IsVolatile;
28117 bool IsAtomic;
28119 int64_t Offset;
28120 LocationSize NumBytes;
28121 MachineMemOperand *MMO;
28122 };
28123
28124 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
28125 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
28126 int64_t Offset = 0;
28127 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
28128 Offset = (LSN->getAddressingMode() == ISD::PRE_INC) ? C->getSExtValue()
28129 : (LSN->getAddressingMode() == ISD::PRE_DEC)
28130 ? -1 * C->getSExtValue()
28131 : 0;
28132 TypeSize Size = LSN->getMemoryVT().getStoreSize();
28133 return {LSN->isVolatile(), LSN->isAtomic(),
28134 LSN->getBasePtr(), Offset /*base offset*/,
28135 LocationSize::precise(Size), LSN->getMemOperand()};
28136 }
28137 if (const auto *LN = cast<LifetimeSDNode>(N))
28138 return {false /*isVolatile*/,
28139 /*isAtomic*/ false,
28140 LN->getOperand(1),
28141 (LN->hasOffset()) ? LN->getOffset() : 0,
28142 (LN->hasOffset()) ? LocationSize::precise(LN->getSize())
28144 (MachineMemOperand *)nullptr};
28145 // Default.
28146 return {false /*isvolatile*/,
28147 /*isAtomic*/ false,
28148 SDValue(),
28149 (int64_t)0 /*offset*/,
28151 (MachineMemOperand *)nullptr};
28152 };
28153
28154 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
28155 MUC1 = getCharacteristics(Op1);
28156
28157 // If they are to the same address, then they must be aliases.
28158 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
28159 MUC0.Offset == MUC1.Offset)
28160 return true;
28161
28162 // If they are both volatile then they cannot be reordered.
28163 if (MUC0.IsVolatile && MUC1.IsVolatile)
28164 return true;
28165
28166 // Be conservative about atomics for the moment
28167 // TODO: This is way overconservative for unordered atomics (see D66309)
28168 if (MUC0.IsAtomic && MUC1.IsAtomic)
28169 return true;
28170
28171 if (MUC0.MMO && MUC1.MMO) {
28172 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28173 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28174 return false;
28175 }
28176
28177 // If NumBytes is scalable and offset is not 0, conservatively return may
28178 // alias
28179 if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
28180 MUC0.Offset != 0) ||
28181 (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
28182 MUC1.Offset != 0))
28183 return true;
28184 // Try to prove that there is aliasing, or that there is no aliasing. Either
28185 // way, we can return now. If nothing can be proved, proceed with more tests.
28186 bool IsAlias;
28187 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
28188 DAG, IsAlias))
28189 return IsAlias;
28190
28191 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
28192 // either are not known.
28193 if (!MUC0.MMO || !MUC1.MMO)
28194 return true;
28195
28196 // If one operation reads from invariant memory, and the other may store, they
28197 // cannot alias. These should really be checking the equivalent of mayWrite,
28198 // but it only matters for memory nodes other than load /store.
28199 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
28200 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
28201 return false;
28202
28203 // If we know required SrcValue1 and SrcValue2 have relatively large
28204 // alignment compared to the size and offset of the access, we may be able
28205 // to prove they do not alias. This check is conservative for now to catch
28206 // cases created by splitting vector types, it only works when the offsets are
28207 // multiples of the size of the data.
28208 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
28209 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
28210 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
28211 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
28212 LocationSize Size0 = MUC0.NumBytes;
28213 LocationSize Size1 = MUC1.NumBytes;
28214
28215 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
28216 Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
28217 !Size1.isScalable() && Size0 == Size1 &&
28218 OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
28219 SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
28220 SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
28221 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
28222 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
28223
28224 // There is no overlap between these relatively aligned accesses of
28225 // similar size. Return no alias.
28226 if ((OffAlign0 + static_cast<int64_t>(
28227 Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
28228 (OffAlign1 + static_cast<int64_t>(
28229 Size1.getValue().getKnownMinValue())) <= OffAlign0)
28230 return false;
28231 }
28232
28233 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
28235 : DAG.getSubtarget().useAA();
28236#ifndef NDEBUG
28237 if (CombinerAAOnlyFunc.getNumOccurrences() &&
28239 UseAA = false;
28240#endif
28241
28242 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
28243 Size0.hasValue() && Size1.hasValue() &&
28244 // Can't represent a scalable size + fixed offset in LocationSize
28245 (!Size0.isScalable() || SrcValOffset0 == 0) &&
28246 (!Size1.isScalable() || SrcValOffset1 == 0)) {
28247 // Use alias analysis information.
28248 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
28249 int64_t Overlap0 =
28250 Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
28251 int64_t Overlap1 =
28252 Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
28253 LocationSize Loc0 =
28254 Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
28255 LocationSize Loc1 =
28256 Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
28257 if (AA->isNoAlias(
28258 MemoryLocation(MUC0.MMO->getValue(), Loc0,
28259 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
28260 MemoryLocation(MUC1.MMO->getValue(), Loc1,
28261 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
28262 return false;
28263 }
28264
28265 // Otherwise we have to assume they alias.
28266 return true;
28267}
28268
28269/// Walk up chain skipping non-aliasing memory nodes,
28270/// looking for aliasing nodes and adding them to the Aliases vector.
28271void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
28272 SmallVectorImpl<SDValue> &Aliases) {
28273 SmallVector<SDValue, 8> Chains; // List of chains to visit.
28274 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
28275
28276 // Get alias information for node.
28277 // TODO: relax aliasing for unordered atomics (see D66309)
28278 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
28279
28280 // Starting off.
28281 Chains.push_back(OriginalChain);
28282 unsigned Depth = 0;
28283
28284 // Attempt to improve chain by a single step
28285 auto ImproveChain = [&](SDValue &C) -> bool {
28286 switch (C.getOpcode()) {
28287 case ISD::EntryToken:
28288 // No need to mark EntryToken.
28289 C = SDValue();
28290 return true;
28291 case ISD::LOAD:
28292 case ISD::STORE: {
28293 // Get alias information for C.
28294 // TODO: Relax aliasing for unordered atomics (see D66309)
28295 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28296 cast<LSBaseSDNode>(C.getNode())->isSimple();
28297 if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28298 // Look further up the chain.
28299 C = C.getOperand(0);
28300 return true;
28301 }
28302 // Alias, so stop here.
28303 return false;
28304 }
28305
28306 case ISD::CopyFromReg:
28307 // Always forward past CopyFromReg.
28308 C = C.getOperand(0);
28309 return true;
28310
28312 case ISD::LIFETIME_END: {
28313 // We can forward past any lifetime start/end that can be proven not to
28314 // alias the memory access.
28315 if (!mayAlias(N, C.getNode())) {
28316 // Look further up the chain.
28317 C = C.getOperand(0);
28318 return true;
28319 }
28320 return false;
28321 }
28322 default:
28323 return false;
28324 }
28325 };
28326
28327 // Look at each chain and determine if it is an alias. If so, add it to the
28328 // aliases list. If not, then continue up the chain looking for the next
28329 // candidate.
28330 while (!Chains.empty()) {
28331 SDValue Chain = Chains.pop_back_val();
28332
28333 // Don't bother if we've seen Chain before.
28334 if (!Visited.insert(Chain.getNode()).second)
28335 continue;
28336
28337 // For TokenFactor nodes, look at each operand and only continue up the
28338 // chain until we reach the depth limit.
28339 //
28340 // FIXME: The depth check could be made to return the last non-aliasing
28341 // chain we found before we hit a tokenfactor rather than the original
28342 // chain.
28343 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28344 Aliases.clear();
28345 Aliases.push_back(OriginalChain);
28346 return;
28347 }
28348
28349 if (Chain.getOpcode() == ISD::TokenFactor) {
28350 // We have to check each of the operands of the token factor for "small"
28351 // token factors, so we queue them up. Adding the operands to the queue
28352 // (stack) in reverse order maintains the original order and increases the
28353 // likelihood that getNode will find a matching token factor (CSE.)
28354 if (Chain.getNumOperands() > 16) {
28355 Aliases.push_back(Chain);
28356 continue;
28357 }
28358 for (unsigned n = Chain.getNumOperands(); n;)
28359 Chains.push_back(Chain.getOperand(--n));
28360 ++Depth;
28361 continue;
28362 }
28363 // Everything else
28364 if (ImproveChain(Chain)) {
28365 // Updated Chain Found, Consider new chain if one exists.
28366 if (Chain.getNode())
28367 Chains.push_back(Chain);
28368 ++Depth;
28369 continue;
28370 }
28371 // No Improved Chain Possible, treat as Alias.
28372 Aliases.push_back(Chain);
28373 }
28374}
28375
28376/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
28377/// (aliasing node.)
28378SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
28379 if (OptLevel == CodeGenOptLevel::None)
28380 return OldChain;
28381
28382 // Ops for replacing token factor.
28384
28385 // Accumulate all the aliases to this node.
28386 GatherAllAliases(N, OldChain, Aliases);
28387
28388 // If no operands then chain to entry token.
28389 if (Aliases.empty())
28390 return DAG.getEntryNode();
28391
28392 // If a single operand then chain to it. We don't need to revisit it.
28393 if (Aliases.size() == 1)
28394 return Aliases[0];
28395
28396 // Construct a custom tailored token factor.
28397 return DAG.getTokenFactor(SDLoc(N), Aliases);
28398}
28399
28400// This function tries to collect a bunch of potentially interesting
28401// nodes to improve the chains of, all at once. This might seem
28402// redundant, as this function gets called when visiting every store
28403// node, so why not let the work be done on each store as it's visited?
28404//
28405// I believe this is mainly important because mergeConsecutiveStores
28406// is unable to deal with merging stores of different sizes, so unless
28407// we improve the chains of all the potential candidates up-front
28408// before running mergeConsecutiveStores, it might only see some of
28409// the nodes that will eventually be candidates, and then not be able
28410// to go from a partially-merged state to the desired final
28411// fully-merged state.
28412
28413bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
28414 SmallVector<StoreSDNode *, 8> ChainedStores;
28415 StoreSDNode *STChain = St;
28416 // Intervals records which offsets from BaseIndex have been covered. In
28417 // the common case, every store writes to the immediately previous address
28418 // space and thus merged with the previous interval at insertion time.
28419
28420 using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
28422 IMap::Allocator A;
28423 IMap Intervals(A);
28424
28425 // This holds the base pointer, index, and the offset in bytes from the base
28426 // pointer.
28428
28429 // We must have a base and an offset.
28430 if (!BasePtr.getBase().getNode())
28431 return false;
28432
28433 // Do not handle stores to undef base pointers.
28434 if (BasePtr.getBase().isUndef())
28435 return false;
28436
28437 // Do not handle stores to opaque types
28438 if (St->getMemoryVT().isZeroSized())
28439 return false;
28440
28441 // BaseIndexOffset assumes that offsets are fixed-size, which
28442 // is not valid for scalable vectors where the offsets are
28443 // scaled by `vscale`, so bail out early.
28444 if (St->getMemoryVT().isScalableVT())
28445 return false;
28446
28447 // Add ST's interval.
28448 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
28449 std::monostate{});
28450
28451 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
28452 if (Chain->getMemoryVT().isScalableVector())
28453 return false;
28454
28455 // If the chain has more than one use, then we can't reorder the mem ops.
28456 if (!SDValue(Chain, 0)->hasOneUse())
28457 break;
28458 // TODO: Relax for unordered atomics (see D66309)
28459 if (!Chain->isSimple() || Chain->isIndexed())
28460 break;
28461
28462 // Find the base pointer and offset for this memory node.
28463 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
28464 // Check that the base pointer is the same as the original one.
28465 int64_t Offset;
28466 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
28467 break;
28468 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
28469 // Make sure we don't overlap with other intervals by checking the ones to
28470 // the left or right before inserting.
28471 auto I = Intervals.find(Offset);
28472 // If there's a next interval, we should end before it.
28473 if (I != Intervals.end() && I.start() < (Offset + Length))
28474 break;
28475 // If there's a previous interval, we should start after it.
28476 if (I != Intervals.begin() && (--I).stop() <= Offset)
28477 break;
28478 Intervals.insert(Offset, Offset + Length, std::monostate{});
28479
28480 ChainedStores.push_back(Chain);
28481 STChain = Chain;
28482 }
28483
28484 // If we didn't find a chained store, exit.
28485 if (ChainedStores.empty())
28486 return false;
28487
28488 // Improve all chained stores (St and ChainedStores members) starting from
28489 // where the store chain ended and return single TokenFactor.
28490 SDValue NewChain = STChain->getChain();
28492 for (unsigned I = ChainedStores.size(); I;) {
28493 StoreSDNode *S = ChainedStores[--I];
28494 SDValue BetterChain = FindBetterChain(S, NewChain);
28495 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
28496 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
28497 TFOps.push_back(SDValue(S, 0));
28498 ChainedStores[I] = S;
28499 }
28500
28501 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
28502 SDValue BetterChain = FindBetterChain(St, NewChain);
28503 SDValue NewST;
28504 if (St->isTruncatingStore())
28505 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
28506 St->getBasePtr(), St->getMemoryVT(),
28507 St->getMemOperand());
28508 else
28509 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
28510 St->getBasePtr(), St->getMemOperand());
28511
28512 TFOps.push_back(NewST);
28513
28514 // If we improved every element of TFOps, then we've lost the dependence on
28515 // NewChain to successors of St and we need to add it back to TFOps. Do so at
28516 // the beginning to keep relative order consistent with FindBetterChains.
28517 auto hasImprovedChain = [&](SDValue ST) -> bool {
28518 return ST->getOperand(0) != NewChain;
28519 };
28520 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
28521 if (AddNewChain)
28522 TFOps.insert(TFOps.begin(), NewChain);
28523
28524 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
28525 CombineTo(St, TF);
28526
28527 // Add TF and its operands to the worklist.
28528 AddToWorklist(TF.getNode());
28529 for (const SDValue &Op : TF->ops())
28530 AddToWorklist(Op.getNode());
28531 AddToWorklist(STChain);
28532 return true;
28533}
28534
28535bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
28536 if (OptLevel == CodeGenOptLevel::None)
28537 return false;
28538
28540
28541 // We must have a base and an offset.
28542 if (!BasePtr.getBase().getNode())
28543 return false;
28544
28545 // Do not handle stores to undef base pointers.
28546 if (BasePtr.getBase().isUndef())
28547 return false;
28548
28549 // Directly improve a chain of disjoint stores starting at St.
28550 if (parallelizeChainedStores(St))
28551 return true;
28552
28553 // Improve St's Chain..
28554 SDValue BetterChain = FindBetterChain(St, St->getChain());
28555 if (St->getChain() != BetterChain) {
28556 replaceStoreChain(St, BetterChain);
28557 return true;
28558 }
28559 return false;
28560}
28561
28562/// This is the entry point for the file.
28564 CodeGenOptLevel OptLevel) {
28565 /// This is the main entry point to this class.
28566 DAGCombiner(*this, AA, OptLevel).Run(Level);
28567}
static bool mayAlias(MachineInstr &MIa, SmallVectorImpl< MachineInstr * > &MemInsns, AliasAnalysis *AA)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static const LLT S1
amdgpu AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques=false)
static cl::opt< bool > EnableShrinkLoadReplaceStoreWithStore("combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store"))
static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
static cl::opt< unsigned > TokenFactorInlineLimit("combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors"))
static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc, bool NonNegZExt=false)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, CombineLevel Level)
Fold (sext (select c, load x, load y)) -> (select c, sextload x, sextload y) (zext (select c,...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtendOfConstant(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG)
static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
If a vector binop is performed on splat values, it may be profitable to extract, scalarize,...
static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL)
Helper function for visitOR to extract the needed side of a rotate idiom from a shl/srl/mul/udiv.
static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec, bool &IsLoad, bool &IsMasked, SDValue &Ptr, const TargetLowering &TLI)
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG)
static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL)
Given the operands of an add/sub operation, see if the 2nd operand is a masked 0/1 whose source opera...
static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef< int > Mask, SmallVectorImpl< int > &NewMask, SDValue Elt, unsigned InsIndex)
static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf)
If we have a unary shuffle of a shuffle, see if it can be folded away completely.
static bool canSplitIdx(LoadSDNode *LD)
static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo.
static cl::opt< bool > StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, cl::desc("Bypass the profitability model of load slicing"), cl::init(false))
Hidden option to stress test load slicing, i.e., when this option is enabled, load slicing bypasses m...
static cl::opt< bool > UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA"))
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices.
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, SDValue N0, SDValue N1, SDNode *N)
static SDValue foldExtendVectorInregToExtendOfSubvector(SDNode *N, const SDLoc &DL, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalOperations)
static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode)
Check if N satisfies: N is used once.
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, SDValue RightHand, SelectionDAG &DAG)
Given a tree of logic operations with shape like (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) try to match an...
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, bool LegalOperations)
If we are extracting a subvector produced by a wide binary operator try to use a narrow binary operat...
static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op, unsigned Depth, bool AssumeNonZero)
static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, const SDLoc &DL, SelectionDAG &DAG)
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0....
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG)
If we are extracting a subvector from a wide vector load, convert to a narrow load to eliminate the e...
static ElementCount numVectorEltsOrZero(EVT T)
static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, bool ShouldCommuteOperands)
This inverts a canonicalization in IR that replaces a variable select arm with an identity constant.
static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG)
Given an extending node with a pop-count operand, if the target does not support a pop-count in the n...
static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG)
static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
If a shuffle inserts exactly one element from a source vector operand into another vector operand and...
static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG, const SDLoc &DL)
For targets that support usubsat, match a bit-hack form of that operation that ends in 'and' and conv...
static cl::opt< bool > CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis"))
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy)
static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, SelectionDAG &DAG)
Combine shuffle of shuffle of the form: shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X...
static bool isDivisorPowerOfTwo(SDValue Divisor)
static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
Given EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)), try to produce VECTOR_SHUFFLE(EXTRACT_SUBVEC...
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static bool hasNoInfs(const TargetOptions &Options, SDValue N)
static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static std::optional< EVT > canCombineShuffleToExtendVectorInreg(unsigned Opcode, EVT VT, std::function< bool(unsigned)> Match, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static SDValue foldAddSubOfSignBit(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold a 'not' shifted sign-bit with add/sub with constant operand into a shift and add with a d...
static SDValue stripTruncAndExt(SDValue Value)
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, const SDLoc &DL, bool LegalOperations)
Transform a vector binary operation into a scalar binary operation by moving the math/logic after an ...
static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG, SDValue X, SDValue Carry0, SDValue Carry1, SDNode *N)
If we are facing some sort of diamond carry propagation pattern try to break it up to generate someth...
static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles followed by concatenatio...
static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG)
static auto getFirstIndexOf(R &&Range, const T &Val)
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out.
static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef< int > Mask)
If the shuffle mask is taking exactly one element from the first vector operand and passing through a...
static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, const TargetLowering &TLI)
static cl::opt< bool > EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable merging multiple stores " "into a wider store"))
static bool isContractableFMUL(const TargetOptions &Options, SDValue N)
static cl::opt< bool > MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads"))
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, SDValue &Mask)
static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static bool isBSwapHWordPair(SDValue N, MutableArrayRef< SDNode * > Parts)
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x,...
static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, bool isFMAXNUMFMINNUM_IEEE, bool isFMAXNUMFMINNUM)
static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
static SDNode * getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, bool &IsMasked, SDValue &Ptr, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS, const TargetLowering &TLI)
static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, const TargetLowering &TLI, bool Force)
Flips a boolean if it is cheaper to compute.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, KnownBits &Known)
static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, bool LegalOperations, SDNode *N, SDValue N0, ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc)
static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT)
static SDValue combineConcatVectorOfShuffleAndItsOperands(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, bool LegalOperations)
bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, SelectionDAG &DAG)
static cl::opt< bool > EnableVectorFCopySignExtendRound("combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), cl::desc("Enable merging extends and rounds into FCOPYSIGN on vector types"))
static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG)
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
static SDValue combineShiftToMULH(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI)
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, SelectionDAG &DAG)
static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, SelectionDAG &DAG)
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, SelectionDAG &DAG, bool IsRotate)
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N)
OR combines for which the commuted variant will be tried as well.
static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalOperations)
static cl::opt< bool > EnableReduceLoadOpStoreWidth("combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence"))
static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, SDValue &BasePtr, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations)
static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG)
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG)
Try to replace shift/logic that tests if a bit is clear with mask + setcc.
static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, SelectionDAG &DAG, SDNode *N, SDValue N0, SDValue N1, EVT VT, EVT ShiftAmountTy)
static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1)
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG)
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineConcatVectorOfConcatVectors(SDNode *N, SelectionDAG &DAG)
static SDValue tryToFoldExtOfAtomicLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, SDValue N0, ISD::LoadExtType ExtLoadType)
static SDValue getAsCarry(const TargetLowering &TLI, SDValue V, bool ForceCarryReconstruction=false)
static SDValue foldSelectOfConstantsUsingSra(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
If a (v)select has a condition value that is a sign-bit test, try to smear the condition operand sign...
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG)
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations)
static cl::opt< unsigned > StoreMergeDependenceLimit("combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check"))
static cl::opt< std::string > CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, cl::desc("Only use DAG-combiner alias analysis in this" " function"))
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, SelectionDAG &DAG)
Given a bitwise logic operation N with a matching bitwise logic operand, fold a pattern where 2 of th...
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, unsigned &BW, bool &Unsigned, SelectionDAG &DAG)
static std::optional< SDByteProvider > calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional< uint64_t > VectorIndex, unsigned StartingIndex=0)
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides an implementation of debug counters.
#define DEBUG_COUNTER(VARNAME, COUNTERNAME, DESC)
Definition: DebugCounter.h:182
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
static bool isUndef(ArrayRef< int > Mask)
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:530
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Value * simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, const SimplifyQuery &Q, unsigned MaxRecurse)
Check for common or similar folds of integer division or integer remainder.
This file implements a coalescing interval map for small objects.
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file provides utility analysis objects describing memory locations.
This file contains the declarations for metadata subclasses.
#define T1
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
static constexpr int Concat[]
Value * RHS
Value * LHS
bool isNegative() const
Definition: APFloat.h:1295
bool isNormal() const
Definition: APFloat.h:1299
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: APFloat.h:1278
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
bool isNaN() const
Definition: APFloat.h:1293
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
bool isLargest() const
Definition: APFloat.h:1311
bool isIEEE() const
Definition: APFloat.h:1313
bool isInfinity() const
Definition: APFloat.h:1292
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt umul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1941
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
Definition: APInt.cpp:1728
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:613
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
APInt abs() const
Get the absolute value.
Definition: APInt.h:1737
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
Definition: APInt.h:444
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1636
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition: APInt.h:1227
int32_t exactLogBase2() const
Definition: APInt.h:1725
APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1905
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
unsigned countLeadingZeros() const
Definition: APInt.h:1556
unsigned logBase2() const
Definition: APInt.h:1703
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition: APInt.h:488
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1930
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition: APInt.h:1128
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition: APInt.cpp:453
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
This is an SDNode representing atomic operations.
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
static bool computeAliasing(const SDNode *Op0, const LocationSize NumBytes0, const SDNode *Op1, const LocationSize NumBytes1, const SelectionDAG &DAG, bool &IsAlias)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Represents known origin of an individual byte in combine pattern.
Definition: ByteProvider.h:30
static ByteProvider getConstantZero()
Definition: ByteProvider.h:73
static ByteProvider getSrc(std::optional< ISelOp > Val, int64_t ByteOffset, int64_t VectorOffset)
Definition: ByteProvider.h:66
Combiner implementation.
Definition: Combiner.h:34
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
const APFloat & getValueAPF() const
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
bool isNegative() const
Return true if the value is negative.
bool isZero() const
Return true if the value is positive or negative zero.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
const ConstantInt * getConstantIntValue() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
static bool shouldExecute(unsigned CounterName)
Definition: DebugCounter.h:72
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:308
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:319
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:340
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
Helper struct to store a base, index and offset that forms an address.
Definition: LoadStoreOpt.h:38
This class is used to form a handle around another node that is persistent and is updated across invo...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
static LocationSize precise(uint64_t Value)
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
bool isScalable() const
TypeSize getValue() const
Machine Value Type.
SimpleValueType SimpleTy
static auto all_valuetypes()
SimpleValueType Iteration.
static MVT getIntegerVT(unsigned BitWidth)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Function & getFunction()
Return the LLVM function that this machine code represents.
A description of a memory reference used in the backend.
const PseudoSourceValue * getPseudoValue() const
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
const SDValue & getIndex() const
const SDValue & getScale() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
bool isNonTemporal() const
bool isInvariant() const
bool isDereferenceable() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Representation for a specific memory location.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:419
iterator end() const
Definition: ArrayRef.h:357
iterator begin() const
Definition: ArrayRef.h:356
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:412
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
size_t use_size() const
Return the number of uses of this node.
void intersectFlagsWith(const SDNodeFlags Flags)
Clear any flags in this node that aren't also set in Flags.
TypeSize getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isOperandOf(const SDNode *N) const
Return true if this node is an operand of N.
const APInt & getConstantOperandAPInt(unsigned Num) const
Helper method returns the APInt of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
bool reachesChainWithoutSideEffects(SDValue Dest, unsigned Depth=2) const
Return true if this operand (which must be a chain) reaches the specified operand without crossing an...
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
virtual bool disableGenericCombines(CodeGenOptLevel OptLevel) const
Help to insert SDNodeFlags automatically in transforming.
Definition: SelectionDAG.h:361
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
bool willNotOverflowAdd(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the addition of 2 nodes can never overflow.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
Definition: SelectionDAG.h:954
SDValue getSplatSourceVector(SDValue V, int &SplatIndex)
If V is a splatted value, return the source vector and its splat index.
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
Definition: SelectionDAG.h:551
SDValue getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
bool isKnownNeverSNaN(SDValue Op, unsigned Depth=0) const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl)
Constant fold a setcc to true or false.
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm, bool ConstantFold=true)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
bool cannotBeOrderedNegativeFP(SDValue Op) const
Test whether the given float value is known to be positive.
SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A)
Return an AssertAlignSDNode.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
bool willNotOverflowSub(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the sub of 2 nodes can never overflow.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
bool shouldOptForSize() const
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:448
SDValue getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
APInt computeVectorKnownZeroElements(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
For each demanded element of a vector, see if it is known to be zero.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
void salvageDebugInfo(SDNode &N)
To be invoked on an SDNode that is slated to be erased.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
void DeleteNode(SDNode *N)
Remove the specified node from the system.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
SDValue simplifySelect(SDValue Cond, SDValue TVal, SDValue FVal)
Try to simplify a select/vselect into 1 of its operands or a constant.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N) const
Test whether the given value is a constant FP or similar node.
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool LegalizeOp(SDNode *N, SmallSetVector< SDNode *, 16 > &UpdatedNodes)
Transforms a SelectionDAG node and any operands to it into a node that is compatible with the target ...
bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops)
Check if a node exists without modifying its flags.
void Combine(CombineLevel Level, AAResults *AA, CodeGenOptLevel OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together,...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
bool willNotOverflowMul(bool IsSigned, SDValue N0, SDValue N1) const
Determine if the result of the mul of 2 nodes can never overflow.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly=false, unsigned Depth=0) const
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth=0) const
Test if the given value is known to have exactly one bit set.
bool isKnownNeverZero(SDValue Op, unsigned Depth=0) const
Test whether the given SDValue is known to contain non-zero value(s).
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO)
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:543
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges=nullptr, bool IsExpanding=false)
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
const TargetLibraryInfo & getLibInfo() const
Definition: SelectionDAG.h:479
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
bool MaskedVectorIsZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Return true if 'Op' is known to be zero in DemandedElts.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
bool canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, bool PoisonOnly=false, bool ConsiderFlags=true, unsigned Depth=0) const
Return true if Op can create undef or poison from non-undef & non-poison operands.
OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const
Determine if the result of the unsigned addition of 2 nodes can overflow.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const
Check if the provided node is save to speculatively executed given its current arguments.
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
bool isKnownToBeAPowerOfTwoFP(SDValue Val, unsigned Depth=0) const
Test if the given fp value is known to be an integer power-of-2, either positive or negative.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags)
Try to simplify a floating-point binary operation into 1 of its operands or a constant.
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
Definition: SelectionDAG.h:560
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
bool isUndef(unsigned Opcode, ArrayRef< SDValue > Ops)
Return true if the result of this operation is always undefined.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
DenormalMode getDenormalMode(EVT VT) const
Return the current function's default denormal handling kind for the given floating point type.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
Definition: SelectionDAG.h:878
static unsigned getOpcode_EXTEND(unsigned Opcode)
Convert *_EXTEND_VECTOR_INREG to *_EXTEND opcode.
Definition: SelectionDAG.h:908
bool isADDLike(SDValue Op, bool NoWrap=false) const
Return true if the specified operand is an ISD::OR or ISD::XOR node that can be treated as an ISD::AD...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue simplifyShift(SDValue X, SDValue Y)
Try to simplify a shift into 1 of its operands or a constant.
void transferDbgValues(SDValue From, SDValue To, unsigned OffsetInBits=0, unsigned SizeInBits=0, bool InvalidateDbg=true)
Transfer debug values from one node to another, while optionally generating fragment expressions for ...
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
SDValue getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
void push_back(bool Val)
void reserve(unsigned N)
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:717
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool has(LibFunc F) const
Tests whether a library function is available.
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT) const
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const
virtual bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
virtual bool enableAggressiveFMAFusion(EVT VT) const
Return true if target always benefits from combining into FMA for a given value type.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, bool LegalOnly) const
virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const
Use bitwise logic to make pairs of compares more efficient.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy, LLT SrcTy) const
Return true if an fpext operation input to an Opcode operation is free (for instance,...
virtual bool hasBitTest(SDValue X, SDValue Y) const
Return true if the target has a bit-test instruction: (X & (1 << Y)) ==/!= 0 This knowledge can be us...
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
virtual bool areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, const MemSDNode &NodeY) const
Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
virtual bool isCommutativeBinOp(unsigned Opcode) const
Returns true if the opcode is a commutative binary operation.
virtual bool isFPImmLegal(const APFloat &, EVT, bool ForCodeSize=false) const
Returns true if the target can instruction select the specified FP immediate natively.
virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, SDValue IntPow2) const
virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, EVT VT) const
Returns true if we should normalize select(N0&N1, X, Y) => select(N0, select(N1, X,...
virtual bool preferScalarizeSplat(SDNode *N) const
bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const
Return true if it is profitable to convert a select of FP constants into a constant pool load whose a...
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
virtual bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const
Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes.
virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to fold a pair of shifts into a mask.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const
Return true if creating a shift of the type by the given amount is not profitable.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, const APInt &AndMask) const
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const
Return true if pulling a binary operation into a select with an identity constant is profitable.
virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
virtual bool preferIncOfAddToSubOfNot(EVT VT) const
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const
virtual bool isFNegFree(EVT VT) const
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool mergeStoresAfterLegalization(EVT MemVT) const
Allow store merging for the specified type after legalization in addition to before legalization.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getGatherAllAliasesMaxDepth() const
virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal or custom on this target.
bool isAtomicLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified atomic load with extension is legal on this target.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const
There are two ways to clear extreme bits (either low or high): Mask: x & (-1 << y) (the instcombine c...
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const
Returns if it's reasonable to merge stores to MemVT size.
virtual bool preferABDSToABSWithNSW(EVT VT) const
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
AndOrSETCCFoldKind
Enum of different potentially desirable ways to fold (and/or (setcc ...), (setcc ....
virtual bool shouldScalarizeBinop(SDValue VecOp) const
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x,...
bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
virtual bool isVectorClearMaskLegal(ArrayRef< int >, EVT) const
Similar to isShuffleMaskLegal.
bool hasTargetDAGCombine(ISD::NodeType NT) const
If true, the target has custom DAG combine transformations that it can perform for the specified node...
virtual bool shouldSplatInsEltVarIndex(EVT) const
Return true if inserting a scalar into a variable element of an undef vector is more efficiently hand...
NegatibleCost
Enum that specifies when a float negation is beneficial.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand(EVT VT, unsigned ShiftOpc, bool MayTransformRotate, const APInt &ShiftOrRotateAmt, const std::optional< APInt > &AndMask) const
virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const
Returns true if MI can be combined with another instruction to form TargetOpcode::G_FMAD.
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool generateFMAsInMachineCombiner(EVT VT, CodeGenOptLevel OptLevel) const
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool hasPairedLoad(EVT, Align &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
virtual bool convertSelectOfConstantsToMath(EVT VT) const
Return true if a select of constants (select Cond, C1, C2) should be transformed into simple math ops...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
SDValue getCheaperOrNeutralNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, const NegatibleCost CostThreshold=NegatibleCost::Neutral, unsigned Depth=0) const
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
virtual bool IsDesirableToPromoteOp(SDValue, EVT &) const
This method query the target whether it is beneficial for dag combiner to promote the specified node.
virtual bool isTypeDesirableForOp(unsigned, EVT VT) const
Return true if the target has native support for the specified value type and it is 'desirable' to us...
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
bool isConstFalseVal(SDValue N) const
Return if the N is a constant or constant vector equal to the false value from getBooleanContents().
SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool getPostIndexedAddressParts(SDNode *, SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if this node ...
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const
Get a pointer to vector element Idx located in memory for a vector of type VecVT starting at a base a...
virtual bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const
Return true if it is profitable to move this shift by a constant amount through its operand,...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const
virtual bool getPreIndexedAddressParts(SDNode *, SDValue &, SDValue &, ISD::MemIndexedMode &, SelectionDAG &) const
Returns true by value, base pointer and offset pointer and addressing mode by reference if the node's...
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, SmallVectorImpl< SDNode * > &Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const
Targets may override this function to provide custom SREM lowering for power-of-2 denominators.
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoSignedZerosFPMath
NoSignedZerosFPMath - This flag is enabled when the -enable-no-signed-zeros-fp-math is specified on t...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
const fltSemantics & getFltSemantics() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
Value * getOperand(unsigned i) const
Definition: User.h:169
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent an VP_SCATTER node.
const SDValue & getValue() const
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
bool use_empty() const
Definition: Value.h:344
iterator_range< use_iterator > uses()
Definition: Value.h:376
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition: TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:229
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:251
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2178
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2183
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2188
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2193
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:237
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1377
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:560
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition: ISDOpcodes.h:368
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1248
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:270
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:488
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:986
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition: ISDOpcodes.h:374
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:544
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1362
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
Definition: ISDOpcodes.h:1366
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:821
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1376
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:914
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:663
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1359
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:723
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1363
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:759
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition: ISDOpcodes.h:647
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:350
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1244
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1378
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:223
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition: ISDOpcodes.h:68
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:209
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1371
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:574
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:985
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ TargetConstantFP
Definition: ISDOpcodes.h:159
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:857
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1336
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition: ISDOpcodes.h:360
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:332
@ TargetFrameIndex
Definition: ISDOpcodes.h:166
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:810
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1311
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:304
@ HANDLENODE
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:1198
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1379
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
Definition: ISDOpcodes.h:923
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ GET_FPENV_MEM
Gets the current floating-point environment.
Definition: ISDOpcodes.h:1014
@ CARRY_FALSE
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:261
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:658
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1360
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:280
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:401
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
Definition: ISDOpcodes.h:945
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:832
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:856
@ VECREDUCE_FMINIMUM
Definition: ISDOpcodes.h:1367
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:341
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1147
@ SET_FPENV_MEM
Sets the current floating point environment.
Definition: ISDOpcodes.h:1019
@ SADDO_CARRY
Carry-using overflow-aware nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:314
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isIndexTypeSigned(MemIndexType IndexType)
Definition: ISDOpcodes.h:1496
bool isExtVecInRegOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1606
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool matchUnaryPredicate(SDValue Op, std::function< bool(ConstantSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantSDNode predicate.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool matchUnaryFpPredicate(SDValue Op, std::function< bool(ConstantFPSDNode *)> Match, bool AllowUndefs=false)
Hook for matching ConstantFPSDNode predicate.
bool isFPEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with floati...
Definition: ISDOpcodes.h:1581
bool isExtOpcode(unsigned Opcode)
Definition: ISDOpcodes.h:1601
bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
bool isVPBinaryOp(unsigned Opcode)
Whether this is a vector-predicated binary operation opcode.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1422
std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1492
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1492
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1563
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
bool matchBinaryPredicate(SDValue LHS, SDValue RHS, std::function< bool(ConstantSDNode *, ConstantSDNode *)> Match, bool AllowUndefs=false, bool AllowTypeMismatch=false)
Attempt to match a binary predicate against a pair of scalar/splat constants or every element of a pa...
bool isVPReduction(unsigned Opcode)
Whether this is a vector-predicated reduction opcode.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1479
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, EVT Type)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
Definition: ISDOpcodes.h:1575
@ VecLoad
Definition: NVPTX.h:93
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:972
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:893
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< cst_pred_ty< is_all_ones >, ValTy, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
BinaryOpc_match< LHS, RHS, false > m_Sra(const LHS &L, const RHS &R)
BinaryOpc_match< LHS, RHS, false > m_Srl(const LHS &L, const RHS &R)
UnaryOpc_match< Opnd > m_AnyExt(const Opnd &Op)
Or< Preds... > m_AnyOf(Preds &&...preds)
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
NUses_match< 1, Value_match > m_OneUse()
ConstantInt_match m_ConstInt()
Match any interger constants or splat of an integer constant.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
int ilogb(const IEEEFloat &Arg)
Definition: APFloat.cpp:4504
constexpr double e
Definition: MathExtras.h:31
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:337
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:239
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1527
SDValue getBitwiseNotOperand(SDValue V, SDValue Mask, bool AllowUndefs)
If V is a bitwise not, returns the inverted operand.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:360
std::string & operator+=(std::string &buffer, StringRef string)
Definition: StringRef.h:899
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1509
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
bool isMinSignedConstant(SDValue V)
Returns true if V is a constant min signed integer value.
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:372
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:324
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1477
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
detail::ValueMatchesPoly< M > HasValue(M Matcher)
Definition: Error.h:221
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SDValue peekThroughTruncates(SDValue V)
Return the non-truncated source operand of V if it exists.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
SDValue peekThroughOneUseBitcasts(SDValue V)
Return the non-bitcasted and one-use source operand of V if it exists.
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool isOneOrOneSplat(SDValue V, bool AllowUndefs=false)
Return true if the value is a constant 1 integer or a splatted vector of a constant 1 integer (with n...
@ Other
Any other memory.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
CombineLevel
Definition: DAGCombine.h:15
@ AfterLegalizeDAG
Definition: DAGCombine.h:19
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ AfterLegalizeTypes
Definition: DAGCombine.h:17
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ FMul
Product of floats.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1921
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2039
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
bool isNeutralConstant(unsigned Opc, SDNodeFlags Flags, SDValue V, unsigned OperandNo)
Returns true if V is a neutral element of Opc with Flags.
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:359
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:360
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
static ExponentType semanticsMinExponent(const fltSemantics &)
Definition: APFloat.cpp:300
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static ExponentType semanticsMaxExponent(const fltSemantics &)
Definition: APFloat.cpp:296
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:292
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:246
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:306
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getIEEE()
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:120
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:233
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isScalableVT() const
Return true if the type is a scalable type.
Definition: ValueTypes.h:183
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:282
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:246
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:173
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isZeroSized() const
Test if the given EVT has zero size, this will fail if called on a scalable type.
Definition: ValueTypes.h:131
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:104
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:238
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition: KnownBits.h:292
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:83
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoUnsignedWrap() const
bool hasDisjoint() const
bool hasNoSignedWrap() const
bool hasNonNeg() const
bool hasAllowReassociation() const
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Clients of various APIs that cause global effects on the DAG can optionally implement this interface.
Definition: SelectionDAG.h:307
virtual void NodeDeleted(SDNode *N, SDNode *E)
The node N that was deleted and, if E is not null, an equivalent node E that replaced it.
virtual void NodeInserted(SDNode *N)
The node N that was inserted.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...