llvm.org GIT mirror llvm / 5d538f7
Revert r235560; this commit was causing several failed assertions in Debug builds using MSVC's STL. The iterator is being used outside of its valid range. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235597 91177308-0d34-0410-b5e6-96231b3b80d8 Aaron Ballman 5 years ago
14 changed file(s) with 922 addition(s) and 1333 deletion(s). Raw diff Collapse all Expand all
19271927
19281928 // Avoid emitting unnecessary branches to the next block.
19291929 if (MBB != NextBlock(SwitchBB))
1930 BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrRange,
1930 BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo,
19311931 DAG.getBasicBlock(MBB));
19321932
19331933 DAG.setRoot(BrRange);
21002100 return VReg;
21012101 }
21022102
2103 void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
2104 #ifndef NDEBUG
2105 for (const CaseCluster &CC : Clusters)
2106 assert(CC.Low == CC.High && "Input clusters must be single-case");
2107 #endif
2108
2109 std::sort(Clusters.begin(), Clusters.end(),
2110 [](const CaseCluster &a, const CaseCluster &b) {
2111 return a.Low->getValue().slt(b.Low->getValue());
2103 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
2104 /// small case ranges).
2105 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
2106 CaseRecVector& WorkList,
2107 const Value* SV,
2108 MachineBasicBlock *Default,
2109 MachineBasicBlock *SwitchBB) {
2110 // Size is the number of Cases represented by this range.
2111 size_t Size = CR.Range.second - CR.Range.first;
2112 if (Size > 3)
2113 return false;
2114
2115 // Get the MachineFunction which holds the current MBB. This is used when
2116 // inserting any additional MBBs necessary to represent the switch.
2117 MachineFunction *CurMF = FuncInfo.MF;
2118
2119 // Figure out which block is immediately after the current one.
2120 MachineBasicBlock *NextMBB = nullptr;
2121 MachineFunction::iterator BBI = CR.CaseBB;
2122 if (++BBI != FuncInfo.MF->end())
2123 NextMBB = BBI;
2124
2125 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2126 // If any two of the cases has the same destination, and if one value
2127 // is the same as the other, but has one bit unset that the other has set,
2128 // use bit manipulation to do two compares at once. For example:
2129 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
2130 // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
2131 // TODO: Handle cases where CR.CaseBB != SwitchBB.
2132 if (Size == 2 && CR.CaseBB == SwitchBB) {
2133 Case &Small = *CR.Range.first;
2134 Case &Big = *(CR.Range.second-1);
2135
2136 if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
2137 const APInt& SmallValue = Small.Low->getValue();
2138 const APInt& BigValue = Big.Low->getValue();
2139
2140 // Check that there is only one bit different.
2141 if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
2142 (SmallValue | BigValue) == BigValue) {
2143 // Isolate the common bit.
2144 APInt CommonBit = BigValue & ~SmallValue;
2145 assert((SmallValue | CommonBit) == BigValue &&
2146 CommonBit.countPopulation() == 1 && "Not a common bit?");
2147
2148 SDValue CondLHS = getValue(SV);
2149 EVT VT = CondLHS.getValueType();
2150 SDLoc DL = getCurSDLoc();
2151
2152 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
2153 DAG.getConstant(CommonBit, VT));
2154 SDValue Cond = DAG.getSetCC(DL, MVT::i1,
2155 Or, DAG.getConstant(BigValue, VT),
2156 ISD::SETEQ);
2157
2158 // Update successor info.
2159 // Both Small and Big will jump to Small.BB, so we sum up the weights.
2160 addSuccessorWithWeight(SwitchBB, Small.BB,
2161 Small.ExtraWeight + Big.ExtraWeight);
2162 addSuccessorWithWeight(SwitchBB, Default,
2163 // The default destination is the first successor in IR.
2164 BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0);
2165
2166 // Insert the true branch.
2167 SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
2168 getControlRoot(), Cond,
2169 DAG.getBasicBlock(Small.BB));
2170
2171 // Insert the false branch.
2172 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
2173 DAG.getBasicBlock(Default));
2174
2175 DAG.setRoot(BrCond);
2176 return true;
2177 }
2178 }
2179 }
2180
2181 // Order cases by weight so the most likely case will be checked first.
2182 uint32_t UnhandledWeights = 0;
2183 if (BPI) {
2184 for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
2185 uint32_t IWeight = I->ExtraWeight;
2186 UnhandledWeights += IWeight;
2187 for (CaseItr J = CR.Range.first; J < I; ++J) {
2188 uint32_t JWeight = J->ExtraWeight;
2189 if (IWeight > JWeight)
2190 std::swap(*I, *J);
2191 }
2192 }
2193 }
2194 // Rearrange the case blocks so that the last one falls through if possible.
2195 Case &BackCase = *(CR.Range.second-1);
2196 if (Size > 1 && NextMBB && Default != NextMBB && BackCase.BB != NextMBB) {
2197 // The last case block won't fall through into 'NextMBB' if we emit the
2198 // branches in this order. See if rearranging a case value would help.
2199 // We start at the bottom as it's the case with the least weight.
2200 for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I)
2201 if (I->BB == NextMBB) {
2202 std::swap(*I, BackCase);
2203 break;
2204 }
2205 }
2206
2207 // Create a CaseBlock record representing a conditional branch to
2208 // the Case's target mbb if the value being switched on SV is equal
2209 // to C.
2210 MachineBasicBlock *CurBlock = CR.CaseBB;
2211 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
2212 MachineBasicBlock *FallThrough;
2213 if (I != E-1) {
2214 FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
2215 CurMF->insert(BBI, FallThrough);
2216
2217 // Put SV in a virtual register to make it available from the new blocks.
2218 ExportFromCurrentBlock(SV);
2219 } else {
2220 // If the last case doesn't match, go to the default block.
2221 FallThrough = Default;
2222 }
2223
2224 const Value *RHS, *LHS, *MHS;
2225 ISD::CondCode CC;
2226 if (I->High == I->Low) {
2227 // This is just small small case range :) containing exactly 1 case
2228 CC = ISD::SETEQ;
2229 LHS = SV; RHS = I->High; MHS = nullptr;
2230 } else {
2231 CC = ISD::SETLE;
2232 LHS = I->Low; MHS = SV; RHS = I->High;
2233 }
2234
2235 // The false weight should be sum of all un-handled cases.
2236 UnhandledWeights -= I->ExtraWeight;
2237 CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
2238 /* me */ CurBlock,
2239 /* trueweight */ I->ExtraWeight,
2240 /* falseweight */ UnhandledWeights);
2241
2242 // If emitting the first comparison, just call visitSwitchCase to emit the
2243 // code into the current block. Otherwise, push the CaseBlock onto the
2244 // vector to be later processed by SDISel, and insert the node's MBB
2245 // before the next MBB.
2246 if (CurBlock == SwitchBB)
2247 visitSwitchCase(CB, SwitchBB);
2248 else
2249 SwitchCases.push_back(CB);
2250
2251 CurBlock = FallThrough;
2252 }
2253
2254 return true;
2255 }
2256
2257 static inline bool areJTsAllowed(const TargetLowering &TLI) {
2258 return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
2259 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
2260 }
2261
2262 static APInt ComputeRange(const APInt &First, const APInt &Last) {
2263 uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
2264 APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
2265 return (LastExt - FirstExt + 1ULL);
2266 }
2267
2268 /// handleJTSwitchCase - Emit jumptable for current switch case range
2269 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
2270 CaseRecVector &WorkList,
2271 const Value *SV,
2272 MachineBasicBlock *Default,
2273 MachineBasicBlock *SwitchBB) {
2274 Case& FrontCase = *CR.Range.first;
2275 Case& BackCase = *(CR.Range.second-1);
2276
2277 const APInt &First = FrontCase.Low->getValue();
2278 const APInt &Last = BackCase.High->getValue();
2279
2280 APInt TSize(First.getBitWidth(), 0);
2281 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
2282 TSize += I->size();
2283
2284 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2285 if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
2286 return false;
2287
2288 APInt Range = ComputeRange(First, Last);
2289 // The density is TSize / Range. Require at least 40%.
2290 // It should not be possible for IntTSize to saturate for sane code, but make
2291 // sure we handle Range saturation correctly.
2292 uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10);
2293 uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10);
2294 if (IntTSize * 10 < IntRange * 4)
2295 return false;
2296
2297 DEBUG(dbgs() << "Lowering jump table\n"
2298 << "First entry: " << First << ". Last entry: " << Last << '\n'
2299 << "Range: " << Range << ". Size: " << TSize << ".\n\n");
2300
2301 // Get the MachineFunction which holds the current MBB. This is used when
2302 // inserting any additional MBBs necessary to represent the switch.
2303 MachineFunction *CurMF = FuncInfo.MF;
2304
2305 // Figure out which block is immediately after the current one.
2306 MachineFunction::iterator BBI = CR.CaseBB;
2307 ++BBI;
2308
2309 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2310
2311 // Create a new basic block to hold the code for loading the address
2312 // of the jump table, and jumping to it. Update successor information;
2313 // we will either branch to the default case for the switch, or the jump
2314 // table.
2315 MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2316 CurMF->insert(BBI, JumpTableBB);
2317
2318 addSuccessorWithWeight(CR.CaseBB, Default);
2319 addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
2320
2321 // Build a vector of destination BBs, corresponding to each target
2322 // of the jump table. If the value of the jump table slot corresponds to
2323 // a case statement, push the case's BB onto the vector, otherwise, push
2324 // the default BB.
2325 std::vector DestBBs;
2326 APInt TEI = First;
2327 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
2328 const APInt &Low = I->Low->getValue();
2329 const APInt &High = I->High->getValue();
2330
2331 if (Low.sle(TEI) && TEI.sle(High)) {
2332 DestBBs.push_back(I->BB);
2333 if (TEI==High)
2334 ++I;
2335 } else {
2336 DestBBs.push_back(Default);
2337 }
2338 }
2339
2340 // Calculate weight for each unique destination in CR.
2341 DenseMap DestWeights;
2342 if (FuncInfo.BPI) {
2343 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
2344 DestWeights[I->BB] += I->ExtraWeight;
2345 }
2346
2347 // Update successor info. Add one edge to each unique successor.
2348 BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
2349 for (MachineBasicBlock *DestBB : DestBBs) {
2350 if (!SuccsHandled[DestBB->getNumber()]) {
2351 SuccsHandled[DestBB->getNumber()] = true;
2352 auto I = DestWeights.find(DestBB);
2353 addSuccessorWithWeight(JumpTableBB, DestBB,
2354 I != DestWeights.end() ? I->second : 0);
2355 }
2356 }
2357
2358 // Create a jump table index for this jump table.
2359 unsigned JTEncoding = TLI.getJumpTableEncoding();
2360 unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
2361 ->createJumpTableIndex(DestBBs);
2362
2363 // Set the jump table information so that we can codegen it as a second
2364 // MachineBasicBlock
2365 JumpTable JT(-1U, JTI, JumpTableBB, Default);
2366 JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
2367 if (CR.CaseBB == SwitchBB)
2368 visitJumpTableHeader(JT, JTH, SwitchBB);
2369
2370 JTCases.push_back(JumpTableBlock(JTH, JT));
2371 return true;
2372 }
2373
2374 /// handleBTSplitSwitchCase - emit comparison and split binary search tree into
2375 /// 2 subtrees.
2376 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
2377 CaseRecVector& WorkList,
2378 const Value* SV,
2379 MachineBasicBlock* SwitchBB) {
2380 Case& FrontCase = *CR.Range.first;
2381 Case& BackCase = *(CR.Range.second-1);
2382
2383 // Size is the number of Cases represented by this range.
2384 unsigned Size = CR.Range.second - CR.Range.first;
2385
2386 const APInt &First = FrontCase.Low->getValue();
2387 const APInt &Last = BackCase.High->getValue();
2388 double FMetric = 0;
2389 CaseItr Pivot = CR.Range.first + Size/2;
2390
2391 // Select optimal pivot, maximizing sum density of LHS and RHS. This will
2392 // (heuristically) allow us to emit JumpTable's later.
2393 APInt TSize(First.getBitWidth(), 0);
2394 for (CaseItr I = CR.Range.first, E = CR.Range.second;
2395 I!=E; ++I)
2396 TSize += I->size();
2397
2398 APInt LSize = FrontCase.size();
2399 APInt RSize = TSize-LSize;
2400 DEBUG(dbgs() << "Selecting best pivot: \n"
2401 << "First: " << First << ", Last: " << Last <<'\n'
2402 << "LSize: " << LSize << ", RSize: " << RSize << '\n');
2403 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2404 for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
2405 J!=E; ++I, ++J) {
2406 const APInt &LEnd = I->High->getValue();
2407 const APInt &RBegin = J->Low->getValue();
2408 APInt Range = ComputeRange(LEnd, RBegin);
2409 assert((Range - 2ULL).isNonNegative() &&
2410 "Invalid case distance");
2411 // Use volatile double here to avoid excess precision issues on some hosts,
2412 // e.g. that use 80-bit X87 registers.
2413 // Only consider the density of sub-ranges that actually have sufficient
2414 // entries to be lowered as a jump table.
2415 volatile double LDensity =
2416 LSize.ult(TLI.getMinimumJumpTableEntries())
2417 ? 0.0
2418 : LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble();
2419 volatile double RDensity =
2420 RSize.ult(TLI.getMinimumJumpTableEntries())
2421 ? 0.0
2422 : RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble();
2423 volatile double Metric = Range.logBase2() * (LDensity + RDensity);
2424 // Should always split in some non-trivial place
2425 DEBUG(dbgs() <<"=>Step\n"
2426 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
2427 << "LDensity: " << LDensity
2428 << ", RDensity: " << RDensity << '\n'
2429 << "Metric: " << Metric << '\n');
2430 if (FMetric < Metric) {
2431 Pivot = J;
2432 FMetric = Metric;
2433 DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
2434 }
2435
2436 LSize += J->size();
2437 RSize -= J->size();
2438 }
2439
2440 if (FMetric == 0 || !areJTsAllowed(TLI))
2441 Pivot = CR.Range.first + Size/2;
2442 splitSwitchCase(CR, Pivot, WorkList, SV, SwitchBB);
2443 return true;
2444 }
2445
2446 void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot,
2447 CaseRecVector &WorkList,
2448 const Value *SV,
2449 MachineBasicBlock *SwitchBB) {
2450 // Get the MachineFunction which holds the current MBB. This is used when
2451 // inserting any additional MBBs necessary to represent the switch.
2452 MachineFunction *CurMF = FuncInfo.MF;
2453
2454 // Figure out which block is immediately after the current one.
2455 MachineFunction::iterator BBI = CR.CaseBB;
2456 ++BBI;
2457
2458 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2459
2460 CaseRange LHSR(CR.Range.first, Pivot);
2461 CaseRange RHSR(Pivot, CR.Range.second);
2462 const ConstantInt *C = Pivot->Low;
2463 MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr;
2464
2465 // We know that we branch to the LHS if the Value being switched on is
2466 // less than the Pivot value, C. We use this to optimize our binary
2467 // tree a bit, by recognizing that if SV is greater than or equal to the
2468 // LHS's Case Value, and that Case Value is exactly one less than the
2469 // Pivot's Value, then we can branch directly to the LHS's Target,
2470 // rather than creating a leaf node for it.
2471 if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE &&
2472 C->getValue() == (CR.GE->getValue() + 1LL)) {
2473 TrueBB = LHSR.first->BB;
2474 } else {
2475 TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2476 CurMF->insert(BBI, TrueBB);
2477 WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
2478
2479 // Put SV in a virtual register to make it available from the new blocks.
2480 ExportFromCurrentBlock(SV);
2481 }
2482
2483 // Similar to the optimization above, if the Value being switched on is
2484 // known to be less than the Constant CR.LT, and the current Case Value
2485 // is CR.LT - 1, then we can branch directly to the target block for
2486 // the current Case Value, rather than emitting a RHS leaf node for it.
2487 if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
2488 RHSR.first->Low->getValue() == (CR.LT->getValue() - 1LL)) {
2489 FalseBB = RHSR.first->BB;
2490 } else {
2491 FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2492 CurMF->insert(BBI, FalseBB);
2493 WorkList.push_back(CaseRec(FalseBB, CR.LT, C, RHSR));
2494
2495 // Put SV in a virtual register to make it available from the new blocks.
2496 ExportFromCurrentBlock(SV);
2497 }
2498
2499 // Create a CaseBlock record representing a conditional branch to
2500 // the LHS node if the value being switched on SV is less than C.
2501 // Otherwise, branch to LHS.
2502 CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB);
2503
2504 if (CR.CaseBB == SwitchBB)
2505 visitSwitchCase(CB, SwitchBB);
2506 else
2507 SwitchCases.push_back(CB);
2508 }
2509
2510 /// handleBitTestsSwitchCase - if current case range has few destination and
2511 /// range span less, than machine word bitwidth, encode case range into series
2512 /// of masks and emit bit tests with these masks.
2513 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
2514 CaseRecVector& WorkList,
2515 const Value* SV,
2516 MachineBasicBlock* Default,
2517 MachineBasicBlock* SwitchBB) {
2518 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2519 EVT PTy = TLI.getPointerTy();
2520 unsigned IntPtrBits = PTy.getSizeInBits();
2521
2522 Case& FrontCase = *CR.Range.first;
2523 Case& BackCase = *(CR.Range.second-1);
2524
2525 // Get the MachineFunction which holds the current MBB. This is used when
2526 // inserting any additional MBBs necessary to represent the switch.
2527 MachineFunction *CurMF = FuncInfo.MF;
2528
2529 // If target does not have legal shift left, do not emit bit tests at all.
2530 if (!TLI.isOperationLegal(ISD::SHL, PTy))
2531 return false;
2532
2533 size_t numCmps = 0;
2534 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
2535 // Single case counts one, case range - two.
2536 numCmps += (I->Low == I->High ? 1 : 2);
2537 }
2538
2539 // Count unique destinations
2540 SmallSet Dests;
2541 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
2542 Dests.insert(I->BB);
2543 if (Dests.size() > 3)
2544 // Don't bother the code below, if there are too much unique destinations
2545 return false;
2546 }
2547 DEBUG(dbgs() << "Total number of unique destinations: "
2548 << Dests.size() << '\n'
2549 << "Total number of comparisons: " << numCmps << '\n');
2550
2551 // Compute span of values.
2552 const APInt& minValue = FrontCase.Low->getValue();
2553 const APInt& maxValue = BackCase.High->getValue();
2554 APInt cmpRange = maxValue - minValue;
2555
2556 DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
2557 << "Low bound: " << minValue << '\n'
2558 << "High bound: " << maxValue << '\n');
2559
2560 if (cmpRange.uge(IntPtrBits) ||
2561 (!(Dests.size() == 1 && numCmps >= 3) &&
2562 !(Dests.size() == 2 && numCmps >= 5) &&
2563 !(Dests.size() >= 3 && numCmps >= 6)))
2564 return false;
2565
2566 DEBUG(dbgs() << "Emitting bit tests\n");
2567 APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
2568
2569 // Optimize the case where all the case values fit in a
2570 // word without having to subtract minValue. In this case,
2571 // we can optimize away the subtraction.
2572 if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
2573 cmpRange = maxValue;
2574 } else {
2575 lowBound = minValue;
2576 }
2577
2578 CaseBitsVector CasesBits;
2579 unsigned i, count = 0;
2580
2581 for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
2582 MachineBasicBlock* Dest = I->BB;
2583 for (i = 0; i < count; ++i)
2584 if (Dest == CasesBits[i].BB)
2585 break;
2586
2587 if (i == count) {
2588 assert((count < 3) && "Too much destinations to test!");
2589 CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/));
2590 count++;
2591 }
2592
2593 const APInt& lowValue = I->Low->getValue();
2594 const APInt& highValue = I->High->getValue();
2595
2596 uint64_t lo = (lowValue - lowBound).getZExtValue();
2597 uint64_t hi = (highValue - lowBound).getZExtValue();
2598 CasesBits[i].ExtraWeight += I->ExtraWeight;
2599
2600 for (uint64_t j = lo; j <= hi; j++) {
2601 CasesBits[i].Mask |= 1ULL << j;
2602 CasesBits[i].Bits++;
2603 }
2604
2605 }
2606 std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
2607
2608 BitTestInfo BTC;
2609
2610 // Figure out which block is immediately after the current one.
2611 MachineFunction::iterator BBI = CR.CaseBB;
2612 ++BBI;
2613
2614 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2615
2616 DEBUG(dbgs() << "Cases:\n");
2617 for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
2618 DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
2619 << ", Bits: " << CasesBits[i].Bits
2620 << ", BB: " << CasesBits[i].BB << '\n');
2621
2622 MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2623 CurMF->insert(BBI, CaseBB);
2624 BTC.push_back(BitTestCase(CasesBits[i].Mask,
2625 CaseBB,
2626 CasesBits[i].BB, CasesBits[i].ExtraWeight));
2627
2628 // Put SV in a virtual register to make it available from the new blocks.
2629 ExportFromCurrentBlock(SV);
2630 }
2631
2632 BitTestBlock BTB(lowBound, cmpRange, SV,
2633 -1U, MVT::Other, (CR.CaseBB == SwitchBB),
2634 CR.CaseBB, Default, std::move(BTC));
2635
2636 if (CR.CaseBB == SwitchBB)
2637 visitBitTestHeader(BTB, SwitchBB);
2638
2639 BitTestCases.push_back(std::move(BTB));
2640
2641 return true;
2642 }
2643
2644 void SelectionDAGBuilder::Clusterify(CaseVector &Cases, const SwitchInst *SI) {
2645 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2646
2647 // Extract cases from the switch and sort them.
2648 typedef std::pair CasePair;
2649 std::vector Sorted;
2650 Sorted.reserve(SI->getNumCases());
2651 for (auto I : SI->cases())
2652 Sorted.push_back(std::make_pair(I.getCaseValue(), I.getSuccessorIndex()));
2653 std::sort(Sorted.begin(), Sorted.end(), [](CasePair a, CasePair b) {
2654 return a.first->getValue().slt(b.first->getValue());
21122655 });
21132656
2114 // Merge adjacent clusters with the same destination.
2115 const unsigned N = Clusters.size();
2116 unsigned DstIndex = 0;
2117 for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
2118 CaseCluster &CC = Clusters[SrcIndex];
2119 const ConstantInt *CaseVal = CC.Low;
2120 MachineBasicBlock *Succ = CC.MBB;
2121
2122 if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
2123 (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
2657 // Merge adjacent cases with the same destination, build Cases vector.
2658 assert(Cases.empty() && "Cases should be empty before Clusterify;");
2659 Cases.reserve(SI->getNumCases());
2660 MachineBasicBlock *PreviousSucc = nullptr;
2661 for (CasePair &CP : Sorted) {
2662 const ConstantInt *CaseVal = CP.first;
2663 unsigned SuccIndex = CP.second;
2664 MachineBasicBlock *Succ = FuncInfo.MBBMap[SI->getSuccessor(SuccIndex)];
2665 uint32_t Weight = BPI ? BPI->getEdgeWeight(SI->getParent(), SuccIndex) : 0;
2666
2667 if (PreviousSucc == Succ &&
2668 (CaseVal->getValue() - Cases.back().High->getValue()) == 1) {
21242669 // If this case has the same successor and is a neighbour, merge it into
21252670 // the previous cluster.
2126 Clusters[DstIndex - 1].High = CaseVal;
2127 Clusters[DstIndex - 1].Weight += CC.Weight;
2671 Cases.back().High = CaseVal;
2672 Cases.back().ExtraWeight += Weight;
21282673 } else {
2129 std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
2130 sizeof(Clusters[SrcIndex]));
2131 }
2132 }
2133 Clusters.resize(DstIndex);
2674 Cases.push_back(Case(CaseVal, CaseVal, Succ, Weight));
2675 }
2676
2677 PreviousSucc = Succ;
2678 }
2679
2680 DEBUG({
2681 size_t numCmps = 0;
2682 for (auto &I : Cases)
2683 // A range counts double, since it requires two compares.
2684 numCmps += I.Low != I.High ? 2 : 1;
2685
2686 dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
2687 << ". Total compares: " << numCmps << '\n';
2688 });
21342689 }
21352690
21362691 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
21442699 for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
21452700 if (BitTestCases[i].Parent == First)
21462701 BitTestCases[i].Parent = Last;
2702 }
2703
2704 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
2705 MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
2706
2707 // Create a vector of Cases, sorted so that we can efficiently create a binary
2708 // search tree from them.
2709 CaseVector Cases;
2710 Clusterify(Cases, &SI);
2711
2712 // Get the default destination MBB.
2713 MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
2714
2715 if (isa(SI.getDefaultDest()->getFirstNonPHIOrDbg()) &&
2716 !Cases.empty()) {
2717 // Replace an unreachable default destination with the most popular case
2718 // destination.
2719 DenseMap Popularity;
2720 unsigned MaxPop = 0;
2721 const BasicBlock *MaxBB = nullptr;
2722 for (auto I : SI.cases()) {
2723 const BasicBlock *BB = I.getCaseSuccessor();
2724 if (++Popularity[BB] > MaxPop) {
2725 MaxPop = Popularity[BB];
2726 MaxBB = BB;
2727 }
2728 }
2729
2730 // Set new default.
2731 assert(MaxPop > 0);
2732 assert(MaxBB);
2733 Default = FuncInfo.MBBMap[MaxBB];
2734
2735 // Remove cases that were pointing to the destination that is now the default.
2736 Cases.erase(std::remove_if(Cases.begin(), Cases.end(),
2737 [&](const Case &C) { return C.BB == Default; }),
2738 Cases.end());
2739 }
2740
2741 // If there is only the default destination, go there directly.
2742 if (Cases.empty()) {
2743 // Update machine-CFG edges.
2744 SwitchMBB->addSuccessor(Default);
2745
2746 // If this is not a fall-through branch, emit the branch.
2747 if (Default != NextBlock(SwitchMBB)) {
2748 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
2749 getControlRoot(), DAG.getBasicBlock(Default)));
2750 }
2751 return;
2752 }
2753
2754 // Get the Value to be switched on.
2755 const Value *SV = SI.getCondition();
2756
2757 // Push the initial CaseRec onto the worklist
2758 CaseRecVector WorkList;
2759 WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr,
2760 CaseRange(Cases.begin(),Cases.end())));
2761
2762 while (!WorkList.empty()) {
2763 // Grab a record representing a case range to process off the worklist
2764 CaseRec CR = WorkList.back();
2765 WorkList.pop_back();
2766
2767 if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
2768 continue;
2769
2770 // If the range has few cases (two or less) emit a series of specific
2771 // tests.
2772 if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
2773 continue;
2774
2775 // If the switch has more than N blocks, and is at least 40% dense, and the
2776 // target supports indirect branches, then emit a jump table rather than
2777 // lowering the switch to a binary tree of conditional branches.
2778 // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
2779 if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
2780 continue;
2781
2782 // Emit binary tree. We need to pick a pivot, and push left and right ranges
2783 // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
2784 handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB);
2785 }
21472786 }
21482787
21492788 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
71797818 HasTailCall = true;
71807819 }
71817820
7182 bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
7183 unsigned *TotalCases, unsigned First,
7184 unsigned Last) {
7185 assert(Last >= First);
7186 assert(TotalCases[Last] >= TotalCases[First]);
7187
7188 APInt LowCase = Clusters[First].Low->getValue();
7189 APInt HighCase = Clusters[Last].High->getValue();
7190 assert(LowCase.getBitWidth() == HighCase.getBitWidth());
7191
7192 // FIXME: A range of consecutive cases has 100% density, but only requires one
7193 // comparison to lower. We should discriminate against such consecutive ranges
7194 // in jump tables.
7195
7196 uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100);
7197 uint64_t Range = Diff + 1;
7198
7199 uint64_t NumCases =
7200 TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
7201
7202 assert(NumCases < UINT64_MAX / 100);
7203 assert(Range >= NumCases);
7204
7205 return NumCases * 100 >= Range * MinJumpTableDensity;
7206 }
7207
7208 static inline bool areJTsAllowed(const TargetLowering &TLI) {
7209 return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
7210 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
7211 }
7212
7213 bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
7214 unsigned First, unsigned Last,
7215 const SwitchInst *SI,
7216 MachineBasicBlock *DefaultMBB,
7217 CaseCluster &JTCluster) {
7218 assert(First <= Last);
7219
7220 uint64_t Weight = 0;
7221 unsigned NumCmps = 0;
7222 std::vector Table;
7223 DenseMap JTWeights;
7224 for (unsigned I = First; I <= Last; ++I) {
7225 assert(Clusters[I].Kind == CC_Range);
7226 Weight += Clusters[I].Weight;
7227 APInt Low = Clusters[I].Low->getValue();
7228 APInt High = Clusters[I].High->getValue();
7229 NumCmps += (Low == High) ? 1 : 2;
7230 if (I != First) {
7231 // Fill the gap between this and the previous cluster.
7232 APInt PreviousHigh = Clusters[I - 1].High->getValue();
7233 assert(PreviousHigh.slt(Low));
7234 uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
7235 for (uint64_t J = 0; J < Gap; J++)
7236 Table.push_back(DefaultMBB);
7237 }
7238 for (APInt X = Low; X.sle(High); ++X)
7239 Table.push_back(Clusters[I].MBB);
7240 JTWeights[Clusters[I].MBB] += Clusters[I].Weight;
7241 }
7242
7243 unsigned NumDests = JTWeights.size();
7244 if (isSuitableForBitTests(NumDests, NumCmps,
7245 Clusters[First].Low->getValue(),
7246 Clusters[Last].High->getValue())) {
7247 // Clusters[First..Last] should be lowered as bit tests instead.
7248 return false;
7249 }
7250
7251 // Create the MBB that will load from and jump through the table.
7252 // Note: We create it here, but it's not inserted into the function yet.
7253 MachineFunction *CurMF = FuncInfo.MF;
7254 MachineBasicBlock *JumpTableMBB =
7255 CurMF->CreateMachineBasicBlock(SI->getParent());
7256
7257 // Add successors. Note: use table order for determinism.
7258 SmallPtrSet Done;
7259 for (MachineBasicBlock *Succ : Table) {
7260 if (Done.count(Succ))
7261 continue;
7262 addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]);
7263 Done.insert(Succ);
7264 }
7265
7266 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7267 unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
7268 ->createJumpTableIndex(Table);
7269
7270 // Set up the jump table info.
7271 JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
7272 JumpTableHeader JTH(Clusters[First].Low->getValue(),
7273 Clusters[Last].High->getValue(), SI->getCondition(),
7274 nullptr, false);
7275 JTCases.push_back(JumpTableBlock(JTH, JT));
7276
7277 JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
7278 JTCases.size() - 1, Weight);
7279 return true;
7280 }
7281
7282 void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
7283 const SwitchInst *SI,
7284 MachineBasicBlock *DefaultMBB) {
7285 #ifndef NDEBUG
7286 // Clusters must be non-empty, sorted, and only contain Range clusters.
7287 assert(!Clusters.empty());
7288 for (CaseCluster &C : Clusters)
7289 assert(C.Kind == CC_Range);
7290 for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
7291 assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
7292 #endif
7293
7294 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7295 if (!areJTsAllowed(TLI))
7296 return;
7297
7298 const int64_t N = Clusters.size();
7299 const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
7300
7301 // Split Clusters into minimum number of dense partitions. The algorithm uses
7302 // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
7303 // for the Case Statement'" (1994), but builds the MinPartitions array in
7304 // reverse order to make it easier to reconstruct the partitions in ascending
7305 // order. In the choice between two optimal partitionings, it picks the one
7306 // which yields more jump tables.
7307
7308 // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
7309 SmallVector MinPartitions(N);
7310 // LastElement[i] is the last element of the partition starting at i.
7311 SmallVector LastElement(N);
7312 // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1].
7313 SmallVector NumTables(N);
7314 // TotalCases[i]: Total nbr of cases in Clusters[0..i].
7315 SmallVector TotalCases(N);
7316
7317 for (unsigned i = 0; i < N; ++i) {
7318 APInt Hi = Clusters[i].High->getValue();
7319 APInt Lo = Clusters[i].Low->getValue();
7320 TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
7321 if (i != 0)
7322 TotalCases[i] += TotalCases[i - 1];
7323 }
7324
7325 // Base case: There is only one way to partition Clusters[N-1].
7326 MinPartitions[N - 1] = 1;
7327 LastElement[N - 1] = N - 1;
7328 assert(MinJumpTableSize > 1);
7329 NumTables[N - 1] = 0;
7330
7331 // Note: loop indexes are signed to avoid underflow.
7332 for (int64_t i = N - 2; i >= 0; i--) {
7333 // Find optimal partitioning of Clusters[i..N-1].
7334 // Baseline: Put Clusters[i] into a partition on its own.
7335 MinPartitions[i] = MinPartitions[i + 1] + 1;
7336 LastElement[i] = i;
7337 NumTables[i] = NumTables[i + 1];
7338
7339 // Search for a solution that results in fewer partitions.
7340 for (int64_t j = N - 1; j > i; j--) {
7341 // Try building a partition from Clusters[i..j].
7342 if (isDense(Clusters, &TotalCases[0], i, j)) {
7343 unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
7344 bool IsTable = j - i + 1 >= MinJumpTableSize;
7345 unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
7346
7347 // If this j leads to fewer partitions, or same number of partitions
7348 // with more lookup tables, it is a better partitioning.
7349 if (NumPartitions < MinPartitions[i] ||
7350 (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) {
7351 MinPartitions[i] = NumPartitions;
7352 LastElement[i] = j;
7353 NumTables[i] = Tables;
7354 }
7355 }
7356 }
7357 }
7358
7359 // Iterate over the partitions, replacing some with jump tables in-place.
7360 unsigned DstIndex = 0;
7361 for (unsigned First = 0, Last; First < N; First = Last + 1) {
7362 Last = LastElement[First];
7363 assert(Last >= First);
7364 assert(DstIndex <= First);
7365 unsigned NumClusters = Last - First + 1;
7366
7367 CaseCluster JTCluster;
7368 if (NumClusters >= MinJumpTableSize &&
7369 buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
7370 Clusters[DstIndex++] = JTCluster;
7371 } else {
7372 for (unsigned I = First; I <= Last; ++I)
7373 std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
7374 }
7375 }
7376 Clusters.resize(DstIndex);
7377 }
7378
7379 bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) {
7380 // FIXME: Using the pointer type doesn't seem ideal.
7381 uint64_t BW = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits();
7382 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
7383 return Range <= BW;
7384 }
7385
7386 bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests,
7387 unsigned NumCmps,
7388 const APInt &Low,
7389 const APInt &High) {
7390 // FIXME: I don't think NumCmps is the correct metric: a single case and a
7391 // range of cases both require only one branch to lower. Just looking at the
7392 // number of clusters and destinations should be enough to decide whether to
7393 // build bit tests.
7394
7395 // To lower a range with bit tests, the range must fit the bitwidth of a
7396 // machine word.
7397 if (!rangeFitsInWord(Low, High))
7398 return false;
7399
7400 // Decide whether it's profitable to lower this range with bit tests. Each
7401 // destination requires a bit test and branch, and there is an overall range
7402 // check branch. For a small number of clusters, separate comparisons might be
7403 // cheaper, and for many destinations, splitting the range might be better.
7404 return (NumDests == 1 && NumCmps >= 3) ||
7405 (NumDests == 2 && NumCmps >= 5) ||
7406 (NumDests == 3 && NumCmps >= 6);
7407 }
7408
7409 bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
7410 unsigned First, unsigned Last,
7411 const SwitchInst *SI,
7412 CaseCluster &BTCluster) {
7413 assert(First <= Last);
7414 if (First == Last)
7415 return false;
7416
7417 BitVector Dests(FuncInfo.MF->getNumBlockIDs());
7418 unsigned NumCmps = 0;
7419 for (int64_t I = First; I <= Last; ++I) {
7420 assert(Clusters[I].Kind == CC_Range);
7421 Dests.set(Clusters[I].MBB->getNumber());
7422 NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
7423 }
7424 unsigned NumDests = Dests.count();
7425
7426 APInt Low = Clusters[First].Low->getValue();
7427 APInt High = Clusters[Last].High->getValue();
7428 assert(Low.slt(High));
7429
7430 if (!isSuitableForBitTests(NumDests, NumCmps, Low, High))
7431 return false;
7432
7433 APInt LowBound;
7434 APInt CmpRange;
7435
7436 const int BitWidth =
7437 DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits();
7438 assert((High - Low + 1).sle(BitWidth) && "Case range must fit in bit mask!");
7439
7440 if (Low.isNonNegative() && High.slt(BitWidth)) {
7441 // Optimize the case where all the case values fit in a
7442 // word without having to subtract minValue. In this case,
7443 // we can optimize away the subtraction.
7444 LowBound = APInt::getNullValue(Low.getBitWidth());
7445 CmpRange = High;
7446 } else {
7447 LowBound = Low;
7448 CmpRange = High - Low;
7449 }
7450
7451 CaseBitsVector CBV;
7452 uint64_t TotalWeight = 0;
7453 for (unsigned i = First; i <= Last; ++i) {
7454 // Find the CaseBits for this destination.
7455 unsigned j;
7456 for (j = 0; j < CBV.size(); ++j)
7457 if (CBV[j].BB == Clusters[i].MBB)
7458 break;
7459 if (j == CBV.size())
7460 CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0));
7461 CaseBits *CB = &CBV[j];
7462
7463 // Update Mask, Bits and ExtraWeight.
7464 uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
7465 uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
7466 for (uint64_t j = Lo; j <= Hi; ++j) {
7467 CB->Mask |= 1ULL << j;
7468 CB->Bits++;
7469 }
7470 CB->ExtraWeight += Clusters[i].Weight;
7471 TotalWeight += Clusters[i].Weight;
7472 }
7473
7474 BitTestInfo BTI;
7475 std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
7476 // FIXME: Sort by weight.
7477 return a.Bits > b.Bits;
7478 });
7479
7480 for (auto &CB : CBV) {
7481 MachineBasicBlock *BitTestBB =
7482 FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
7483 BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight));
7484 }
7485 BitTestCases.push_back(BitTestBlock(LowBound, CmpRange, SI->getCondition(),
7486 -1U, MVT::Other, false, nullptr,
7487 nullptr, std::move(BTI)));
7488
7489 BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
7490 BitTestCases.size() - 1, TotalWeight);
7491 return true;
7492 }
7493
7494 void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
7495 const SwitchInst *SI) {
7496 // Partition Clusters into as few subsets as possible, where each subset has a
7497 // range that fits in a machine word and has <= 3 unique destinations.
7498
7499 #ifndef NDEBUG
7500 // Clusters must be sorted and contain Range or JumpTable clusters.
7501 assert(!Clusters.empty());
7502 assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
7503 for (const CaseCluster &C : Clusters)
7504 assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
7505 for (unsigned i = 1; i < Clusters.size(); ++i)
7506 assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
7507 #endif
7508
7509 // If target does not have legal shift left, do not emit bit tests at all.
7510 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7511 EVT PTy = TLI.getPointerTy();
7512 if (!TLI.isOperationLegal(ISD::SHL, PTy))
7513 return;
7514
7515 int BitWidth = PTy.getSizeInBits();
7516 const int64_t N = Clusters.size();
7517
7518 // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
7519 SmallVector MinPartitions(N);
7520 // LastElement[i] is the last element of the partition starting at i.
7521 SmallVector LastElement(N);
7522
7523 // FIXME: This might not be the best algorithm for finding bit test clusters.
7524
7525 // Base case: There is only one way to partition Clusters[N-1].
7526 MinPartitions[N - 1] = 1;
7527 LastElement[N - 1] = N - 1;
7528
7529 // Note: loop indexes are signed to avoid underflow.
7530 for (int64_t i = N - 2; i >= 0; --i) {
7531 // Find optimal partitioning of Clusters[i..N-1].
7532 // Baseline: Put Clusters[i] into a partition on its own.
7533 MinPartitions[i] = MinPartitions[i + 1] + 1;
7534 LastElement[i] = i;
7535
7536 // Search for a solution that results in fewer partitions.
7537 // Note: the search is limited by BitWidth, reducing time complexity.
7538 for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
7539 // Try building a partition from Clusters[i..j].
7540
7541 // Check the range.
7542 if (!rangeFitsInWord(Clusters[i].Low->getValue(),
7543 Clusters[j].High->getValue()))
7544 continue;
7545
7546 // Check nbr of destinations and cluster types.
7547 // FIXME: This works, but doesn't seem very efficient.
7548 bool RangesOnly = true;
7549 BitVector Dests(FuncInfo.MF->getNumBlockIDs());
7550 for (int64_t k = i; k <= j; k++) {
7551 if (Clusters[k].Kind != CC_Range) {
7552 RangesOnly = false;
7553 break;
7554 }
7555 Dests.set(Clusters[k].MBB->getNumber());
7556 }
7557 if (!RangesOnly || Dests.count() > 3)
7558 break;
7559
7560 // Check if it's a better partition.
7561 unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
7562 if (NumPartitions < MinPartitions[i]) {
7563 // Found a better partition.
7564 MinPartitions[i] = NumPartitions;
7565 LastElement[i] = j;
7566 }
7567 }
7568 }
7569
7570 // Iterate over the partitions, replacing with bit-test clusters in-place.
7571 unsigned DstIndex = 0;
7572 for (unsigned First = 0, Last; First < N; First = Last + 1) {
7573 Last = LastElement[First];
7574 assert(First <= Last);
7575 assert(DstIndex <= First);
7576
7577 CaseCluster BitTestCluster;
7578 if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
7579 Clusters[DstIndex++] = BitTestCluster;
7580 } else {
7581 for (unsigned I = First; I <= Last; ++I)
7582 std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
7583 }
7584 }
7585 Clusters.resize(DstIndex);
7586 }
7587
7588 void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
7589 MachineBasicBlock *SwitchMBB,
7590 MachineBasicBlock *DefaultMBB) {
7591 MachineFunction *CurMF = FuncInfo.MF;
7592 MachineBasicBlock *NextMBB = nullptr;
7593 MachineFunction::iterator BBI = W.MBB;
7594 if (++BBI != FuncInfo.MF->end())
7595 NextMBB = BBI;
7596
7597 unsigned Size = W.LastCluster - W.FirstCluster + 1;
7598
7599 BranchProbabilityInfo *BPI = FuncInfo.BPI;
7600
7601 if (Size == 2 && W.MBB == SwitchMBB) {
7602 // If any two of the cases has the same destination, and if one value
7603 // is the same as the other, but has one bit unset that the other has set,
7604 // use bit manipulation to do two compares at once. For example:
7605 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
7606 // TODO: This could be extended to merge any 2 cases in switches with 3
7607 // cases.
7608 // TODO: Handle cases where W.CaseBB != SwitchBB.
7609 CaseCluster &Small = *W.FirstCluster;
7610 CaseCluster &Big = *W.LastCluster;
7611
7612 if (Small.Low == Small.High && Big.Low == Big.High &&
7613 Small.MBB == Big.MBB) {
7614 const APInt &SmallValue = Small.Low->getValue();
7615 const APInt &BigValue = Big.Low->getValue();
7616
7617 // Check that there is only one bit different.
7618 if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
7619 (SmallValue | BigValue) == BigValue) {
7620 // Isolate the common bit.
7621 APInt CommonBit = BigValue & ~SmallValue;
7622 assert((SmallValue | CommonBit) == BigValue &&
7623 CommonBit.countPopulation() == 1 && "Not a common bit?");
7624
7625 SDValue CondLHS = getValue(Cond);
7626 EVT VT = CondLHS.getValueType();
7627 SDLoc DL = getCurSDLoc();
7628
7629 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
7630 DAG.getConstant(CommonBit, VT));
7631 SDValue Cond = DAG.getSetCC(DL, MVT::i1, Or,
7632 DAG.getConstant(BigValue, VT), ISD::SETEQ);
7633
7634 // Update successor info.
7635 // Both Small and Big will jump to Small.BB, so we sum up the weights.
7636 addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight);
7637 addSuccessorWithWeight(
7638 SwitchMBB, DefaultMBB,
7639 // The default destination is the first successor in IR.
7640 BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0)
7641 : 0);
7642
7643 // Insert the true branch.
7644 SDValue BrCond =
7645 DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
7646 DAG.getBasicBlock(Small.MBB));
7647 // Insert the false branch.
7648 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
7649 DAG.getBasicBlock(DefaultMBB));
7650
7651 DAG.setRoot(BrCond);
7652 return;
7653 }
7654 }
7655 }
7656
7657 if (TM.getOptLevel() != CodeGenOpt::None) {
7658 // Order cases by weight so the most likely case will be checked first.
7659 std::sort(W.FirstCluster, W.LastCluster + 1,
7660 [](const CaseCluster &a, const CaseCluster &b) {
7661 return a.Weight > b.Weight;
7662 });
7663
7664 // Rearrange the case blocks so that the last one falls through if possible.
7665 // Start at the bottom as that's the case with the lowest weight.
7666 // FIXME: Take branch probability into account.
7667 for (CaseClusterIt I = W.LastCluster - 1; I >= W.FirstCluster; --I) {
7668 if (I->Kind == CC_Range && I->MBB == NextMBB) {
7669 std::swap(*I, *W.LastCluster);
7670 break;
7671 }
7672 }
7673 }
7674
7675 // Compute total weight.
7676 uint32_t UnhandledWeights = 0;
7677 for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
7678 UnhandledWeights += I->Weight;
7679
7680 MachineBasicBlock *CurMBB = W.MBB;
7681 for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
7682 MachineBasicBlock *Fallthrough;
7683 if (I == W.LastCluster) {
7684 // For the last cluster, fall through to the default destination.
7685 Fallthrough = DefaultMBB;
7686 } else {
7687 Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
7688 CurMF->insert(BBI, Fallthrough);
7689 // Put Cond in a virtual register to make it available from the new blocks.
7690 ExportFromCurrentBlock(Cond);
7691 }
7692
7693 switch (I->Kind) {
7694 case CC_JumpTable: {
7695 // FIXME: Optimize away range check based on pivot comparisons.
7696 JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first;
7697 JumpTable *JT = &JTCases[I->JTCasesIndex].second;
7698
7699 // The jump block hasn't been inserted yet; insert it here.
7700 MachineBasicBlock *JumpMBB = JT->MBB;
7701 CurMF->insert(BBI, JumpMBB);
7702 addSuccessorWithWeight(CurMBB, Fallthrough);
7703 addSuccessorWithWeight(CurMBB, JumpMBB);
7704
7705 // The jump table header will be inserted in our current block, do the
7706 // range check, and fall through to our fallthrough block.
7707 JTH->HeaderBB = CurMBB;
7708 JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
7709
7710 // If we're in the right place, emit the jump table header right now.
7711 if (CurMBB == SwitchMBB) {
7712 visitJumpTableHeader(*JT, *JTH, SwitchMBB);
7713 JTH->Emitted = true;
7714 }
7715 break;
7716 }
7717 case CC_BitTests: {
7718 // FIXME: Optimize away range check based on pivot comparisons.
7719 BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex];
7720
7721 // The bit test blocks haven't been inserted yet; insert them here.
7722 for (BitTestCase &BTC : BTB->Cases)
7723 CurMF->insert(BBI, BTC.ThisBB);
7724
7725 // Fill in fields of the BitTestBlock.
7726 BTB->Parent = CurMBB;
7727 BTB->Default = Fallthrough;
7728
7729 // If we're in the right place, emit the bit test header header right now.
7730 if (CurMBB ==SwitchMBB) {
7731 visitBitTestHeader(*BTB, SwitchMBB);
7732 BTB->Emitted = true;
7733 }
7734 break;
7735 }
7736 case CC_Range: {
7737 const Value *RHS, *LHS, *MHS;
7738 ISD::CondCode CC;
7739 if (I->Low == I->High) {
7740 // Check Cond == I->Low.
7741 CC = ISD::SETEQ;
7742 LHS = Cond;
7743 RHS=I->Low;
7744 MHS = nullptr;
7745 } else {
7746 // Check I->Low <= Cond <= I->High.
7747 CC = ISD::SETLE;
7748 LHS = I->Low;
7749 MHS = Cond;
7750 RHS = I->High;
7751 }
7752
7753 // The false weight is the sum of all unhandled cases.
7754 UnhandledWeights -= I->Weight;
7755 CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight,
7756 UnhandledWeights);
7757
7758 if (CurMBB == SwitchMBB)
7759 visitSwitchCase(CB, SwitchMBB);
7760 else
7761 SwitchCases.push_back(CB);
7762
7763 break;
7764 }
7765 }
7766 CurMBB = Fallthrough;
7767 }
7768 }
7769
7770 void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
7771 const SwitchWorkListItem &W,
7772 Value *Cond,
7773 MachineBasicBlock *SwitchMBB) {
7774 assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
7775 "Clusters not sorted?");
7776
7777 unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
7778 assert(NumClusters >= 2 && "Too small to split!");
7779
7780 // FIXME: When we have profile info, we might want to balance the tree based
7781 // on weights instead of node count.
7782
7783 CaseClusterIt PivotCluster = W.FirstCluster + NumClusters / 2;
7784 CaseClusterIt FirstLeft = W.FirstCluster;
7785 CaseClusterIt LastLeft = PivotCluster - 1;
7786 CaseClusterIt FirstRight = PivotCluster;
7787 CaseClusterIt LastRight = W.LastCluster;
7788 const ConstantInt *Pivot = PivotCluster->Low;
7789
7790 // New blocks will be inserted immediately after the current one.
7791 MachineFunction::iterator BBI = W.MBB;
7792 ++BBI;
7793
7794 // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
7795 // we can branch to its destination directly if it's squeezed exactly in
7796 // between the known lower bound and Pivot - 1.
7797 MachineBasicBlock *LeftMBB;
7798 if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
7799 FirstLeft->Low == W.GE &&
7800 (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
7801 LeftMBB = FirstLeft->MBB;
7802 } else {
7803 LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
7804 FuncInfo.MF->insert(BBI, LeftMBB);
7805 WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot});
7806 // Put Cond in a virtual register to make it available from the new blocks.
7807 ExportFromCurrentBlock(Cond);
7808 }
7809
7810 // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
7811 // single cluster, RHS.Low == Pivot, and we can branch to its destination
7812 // directly if RHS.High equals the current upper bound.
7813 MachineBasicBlock *RightMBB;
7814 if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
7815 W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
7816 RightMBB = FirstRight->MBB;
7817 } else {
7818 RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
7819 FuncInfo.MF->insert(BBI, RightMBB);
7820 WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT});
7821 // Put Cond in a virtual register to make it available from the new blocks.
7822 ExportFromCurrentBlock(Cond);
7823 }
7824
7825 // Create the CaseBlock record that will be used to lower the branch.
7826 CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB);
7827
7828 if (W.MBB == SwitchMBB)
7829 visitSwitchCase(CB, SwitchMBB);
7830 else
7831 SwitchCases.push_back(CB);
7832 }
7833
7834 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
7835 // Extract cases from the switch.
7836 BranchProbabilityInfo *BPI = FuncInfo.BPI;
7837 CaseClusterVector Clusters;
7838 Clusters.reserve(SI.getNumCases());
7839 for (auto I : SI.cases()) {
7840 MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
7841 const ConstantInt *CaseVal = I.getCaseValue();
7842 uint32_t Weight = 0; // FIXME: Use 1 instead?
7843 if (BPI)
7844 Weight = BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex());
7845 Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight));
7846 }
7847
7848 MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
7849
7850 if (TM.getOptLevel() != CodeGenOpt::None) {
7851 // Cluster adjacent cases with the same destination.
7852 sortAndRangeify(Clusters);
7853
7854 // Replace an unreachable default with the most popular destination.
7855 // FIXME: Exploit unreachable default more aggressively.
7856 bool UnreachableDefault =
7857 isa(SI.getDefaultDest()->getFirstNonPHIOrDbg());
7858 if (UnreachableDefault && !Clusters.empty()) {
7859 DenseMap Popularity;
7860 unsigned MaxPop = 0;
7861 const BasicBlock *MaxBB = nullptr;
7862 for (auto I : SI.cases()) {
7863 const BasicBlock *BB = I.getCaseSuccessor();
7864 if (++Popularity[BB] > MaxPop) {
7865 MaxPop = Popularity[BB];
7866 MaxBB = BB;
7867 }
7868 }
7869 // Set new default.
7870 assert(MaxPop > 0 && MaxBB);
7871 DefaultMBB = FuncInfo.MBBMap[MaxBB];
7872
7873 // Remove cases that were pointing to the destination that is now the
7874 // default.
7875 CaseClusterVector New;
7876 New.reserve(Clusters.size());
7877 for (CaseCluster &CC : Clusters) {
7878 if (CC.MBB != DefaultMBB)
7879 New.push_back(CC);
7880 }
7881 Clusters = std::move(New);
7882 }
7883 }
7884
7885 // If there is only the default destination, jump there directly.
7886 MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
7887 if (Clusters.empty()) {
7888 SwitchMBB->addSuccessor(DefaultMBB);
7889 if (DefaultMBB != NextBlock(SwitchMBB)) {
7890 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
7891 getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
7892 }
7893 return;
7894 }
7895
7896 if (TM.getOptLevel() != CodeGenOpt::None) {
7897 findJumpTables(Clusters, &SI, DefaultMBB);
7898 findBitTestClusters(Clusters, &SI);
7899 }
7900
7901
7902 DEBUG({
7903 dbgs() << "Case clusters: ";
7904 for (const CaseCluster &C : Clusters) {
7905 if (C.Kind == CC_JumpTable) dbgs() << "JT:";
7906 if (C.Kind == CC_BitTests) dbgs() << "BT:";
7907
7908 C.Low->getValue().print(dbgs(), true);
7909 if (C.Low != C.High) {
7910 dbgs() << '-';
7911 C.High->getValue().print(dbgs(), true);
7912 }
7913 dbgs() << ' ';
7914 }
7915 dbgs() << '\n';
7916 });
7917
7918 assert(!Clusters.empty());
7919 SwitchWorkList WorkList;
7920 CaseClusterIt First = Clusters.begin();
7921 CaseClusterIt Last = Clusters.end() - 1;
7922 WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr});
7923
7924 while (!WorkList.empty()) {
7925 SwitchWorkListItem W = WorkList.back();
7926 WorkList.pop_back();
7927 unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
7928
7929 if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) {
7930 // For optimized builds, lower large range as a balanced binary tree.
7931 splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
7932 continue;
7933 }
7934
7935 lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
7936 }
7937 }
133133 /// SDNodes we create.
134134 unsigned SDNodeOrder;
135135
136 enum CaseClusterKind {
137 /// A cluster of adjacent case labels with the same destination, or just one
138 /// case.
139 CC_Range,
140 /// A cluster of cases suitable for jump table lowering.
141 CC_JumpTable,
142 /// A cluster of cases suitable for bit test lowering.
143 CC_BitTests
144 };
145
146 /// A cluster of case labels.
147 struct CaseCluster {
148 CaseClusterKind Kind;
149 const ConstantInt *Low, *High;
150 union {
151 MachineBasicBlock *MBB;
152 unsigned JTCasesIndex;
153 unsigned BTCasesIndex;
154 };
155 uint64_t Weight;
156
157 static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
158 MachineBasicBlock *MBB, uint32_t Weight) {
159 CaseCluster C;
160 C.Kind = CC_Range;
161 C.Low = Low;
162 C.High = High;
163 C.MBB = MBB;
164 C.Weight = Weight;
165 return C;
136 /// Case - A struct to record the Value for a switch case, and the
137 /// case's target basic block.
138 struct Case {
139 const ConstantInt *Low;
140 const ConstantInt *High;
141 MachineBasicBlock* BB;
142 uint32_t ExtraWeight;
143
144 Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { }
145 Case(const ConstantInt *low, const ConstantInt *high, MachineBasicBlock *bb,
146 uint32_t extraweight) : Low(low), High(high), BB(bb),
147 ExtraWeight(extraweight) { }
148
149 APInt size() const {
150 const APInt &rHigh = High->getValue();
151 const APInt &rLow = Low->getValue();
152 return (rHigh - rLow + 1ULL);
166153 }
167
168 static CaseCluster jumpTable(const ConstantInt *Low,
169 const ConstantInt *High, unsigned JTCasesIndex,
170 uint32_t Weight) {
171 CaseCluster C;
172 C.Kind = CC_JumpTable;
173 C.Low = Low;
174 C.High = High;
175 C.JTCasesIndex = JTCasesIndex;
176 C.Weight = Weight;
177 return C;
178 }
179
180 static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
181 unsigned BTCasesIndex, uint32_t Weight) {
182 CaseCluster C;
183 C.Kind = CC_BitTests;
184 C.Low = Low;
185 C.High = High;
186 C.BTCasesIndex = BTCasesIndex;
187 C.Weight = Weight;
188 return C;
189 }
190 };
191
192 typedef std::vector CaseClusterVector;
193 typedef CaseClusterVector::iterator CaseClusterIt;
154 };
194155
195156 struct CaseBits {
196157 uint64_t Mask;
201162 CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
202163 uint32_t Weight):
203164 Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
204
205 CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {}
206 };
207
208 typedef std::vector CaseBitsVector;
209
210 /// Sort Clusters and merge adjacent cases.
211 void sortAndRangeify(CaseClusterVector &Clusters);
165 };
166
167 typedef std::vector CaseVector;
168 typedef std::vector CaseBitsVector;
169 typedef CaseVector::iterator CaseItr;
170 typedef std::pair CaseRange;
171
172 /// CaseRec - A struct with ctor used in lowering switches to a binary tree
173 /// of conditional branches.
174 struct CaseRec {
175 CaseRec(MachineBasicBlock *bb, const ConstantInt *lt, const ConstantInt *ge,
176 CaseRange r) :
177 CaseBB(bb), LT(lt), GE(ge), Range(r) {}
178
179 /// CaseBB - The MBB in which to emit the compare and branch
180 MachineBasicBlock *CaseBB;
181 /// LT, GE - If nonzero, we know the current case value must be less-than or
182 /// greater-than-or-equal-to these Constants.
183 const ConstantInt *LT;
184 const ConstantInt *GE;
185 /// Range - A pair of iterators representing the range of case values to be
186 /// processed at this point in the binary search tree.
187 CaseRange Range;
188 };
189
190 typedef std::vector CaseRecVector;
191
192 struct CaseBitsCmp {
193 bool operator()(const CaseBits &C1, const CaseBits &C2) {
194 return C1.Bits > C2.Bits;
195 }
196 };
197
198 /// Populate Cases with the cases in SI, clustering adjacent cases with the
199 /// same destination together.
200 void Clusterify(CaseVector &Cases, const SwitchInst *SI);
212201
213202 /// CaseBlock - This structure is used to communicate between
214203 /// SelectionDAGBuilder and SDISel for the code generation of additional basic
298287 BitTestInfo Cases;
299288 };
300289
301 /// Minimum jump table density, in percent.
302 enum { MinJumpTableDensity = 40 };
303
304 /// Check whether a range of clusters is dense enough for a jump table.
305 bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases,
306 unsigned First, unsigned Last);
307
308 /// Build a jump table cluster from Clusters[First..Last]. Returns false if it
309 /// decides it's not a good idea.
310 bool buildJumpTable(CaseClusterVector &Clusters, unsigned First,
311 unsigned Last, const SwitchInst *SI,
312 MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
313
314 /// Find clusters of cases suitable for jump table lowering.
315 void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
316 MachineBasicBlock *DefaultMBB);
317
318 /// Check whether the range [Low,High] fits in a machine word.
319 bool rangeFitsInWord(const APInt &Low, const APInt &High);
320
321 /// Check whether these clusters are suitable for lowering with bit tests based
322 /// on the number of destinations, comparison metric, and range.
323 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
324 const APInt &Low, const APInt &High);
325
326 /// Build a bit test cluster from Clusters[First..Last]. Returns false if it
327 /// decides it's not a good idea.
328 bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
329 const SwitchInst *SI, CaseCluster &BTCluster);
330
331 /// Find clusters of cases suitable for bit test lowering.
332 void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI);
333
334 struct SwitchWorkListItem {
335 MachineBasicBlock *MBB;
336 CaseClusterIt FirstCluster;
337 CaseClusterIt LastCluster;
338 const ConstantInt *GE;
339 const ConstantInt *LT;
340 };
341 typedef SmallVector SwitchWorkList;
342
343 /// Emit comparison and split W into two subtrees.
344 void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W,
345 Value *Cond, MachineBasicBlock *SwitchMBB);
346
347 /// Lower W.
348 void lowerWorkItem(SwitchWorkListItem W, Value *Cond,
349 MachineBasicBlock *SwitchMBB,
350 MachineBasicBlock *DefaultMBB);
351
352
353290 /// A class which encapsulates all of the information needed to generate a
354291 /// stack protector check and signals to isel via its state being initialized
355292 /// that a stack protector needs to be generated.
731668 void visitSwitch(const SwitchInst &I);
732669 void visitIndirectBr(const IndirectBrInst &I);
733670 void visitUnreachable(const UnreachableInst &I);
671
672 // Helpers for visitSwitch
673 bool handleSmallSwitchRange(CaseRec& CR,
674 CaseRecVector& WorkList,
675 const Value* SV,
676 MachineBasicBlock* Default,
677 MachineBasicBlock *SwitchBB);
678 bool handleJTSwitchCase(CaseRec& CR,
679 CaseRecVector& WorkList,
680 const Value* SV,
681 MachineBasicBlock* Default,
682 MachineBasicBlock *SwitchBB);
683 bool handleBTSplitSwitchCase(CaseRec& CR,
684 CaseRecVector& WorkList,
685 const Value* SV,
686 MachineBasicBlock *SwitchBB);
687 void splitSwitchCase(CaseRec &CR, CaseItr Pivot, CaseRecVector &WorkList,
688 const Value *SV, MachineBasicBlock *SwitchBB);
689 bool handleBitTestsSwitchCase(CaseRec& CR,
690 CaseRecVector& WorkList,
691 const Value* SV,
692 MachineBasicBlock* Default,
693 MachineBasicBlock *SwitchBB);
734694
735695 uint32_t getEdgeWeight(const MachineBasicBlock *Src,
736696 const MachineBasicBlock *Dst) const;
14121412 << FuncInfo->PHINodesToUpdate[i].first
14131413 << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
14141414
1415 const bool MustUpdatePHINodes = SDB->SwitchCases.empty() &&
1416 SDB->JTCases.empty() &&
1417 SDB->BitTestCases.empty();
1418
14151419 // Next, now that we know what the last MBB the LLVM BB expanded is, update
14161420 // PHI nodes in successors.
1417 for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
1418 MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
1419 assert(PHI->isPHI() &&
1420 "This is not a machine PHI node that we are updating!");
1421 if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
1422 continue;
1423 PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
1421 if (MustUpdatePHINodes) {
1422 for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
1423 MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
1424 assert(PHI->isPHI() &&
1425 "This is not a machine PHI node that we are updating!");
1426 if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
1427 continue;
1428 PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
1429 }
14241430 }
14251431
14261432 // Handle stack protector.
14641470 // Clear the Per-BB State.
14651471 SDB->SPDescriptor.resetPerBBState();
14661472 }
1473
1474 // If we updated PHI Nodes, return early.
1475 if (MustUpdatePHINodes)
1476 return;
14671477
14681478 for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
14691479 // Lower header first, if it wasn't already lowered
15771587 }
15781588 }
15791589 SDB->JTCases.clear();
1590
1591 // If the switch block involved a branch to one of the actual successors, we
1592 // need to update PHI nodes in that block.
1593 for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
1594 MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
1595 assert(PHI->isPHI() &&
1596 "This is not a machine PHI node that we are updating!");
1597 if (FuncInfo->MBB->isSuccessor(PHI->getParent()))
1598 PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
1599 }
15801600
15811601 // If we generated any switch lowering information, build and codegen any
15821602 // additional DAGs necessary.
150150 if (Changed && !ReturnMBB.hasAddressTaken()) {
151151 // We now might be able to merge this blr-only block into its
152152 // by-layout predecessor.
153 if (ReturnMBB.pred_size() == 1) {
153 if (ReturnMBB.pred_size() == 1 &&
154 (*ReturnMBB.pred_begin())->isLayoutSuccessor(&ReturnMBB)) {
155 // Move the blr into the preceding block.
154156 MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
155 if (PrevMBB.isLayoutSuccessor(&ReturnMBB) && PrevMBB.canFallThrough()) {
156 // Move the blr into the preceding block.
157 PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
158 PrevMBB.removeSuccessor(&ReturnMBB);
159 }
157 PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
158 PrevMBB.removeSuccessor(&ReturnMBB);
160159 }
161160
162161 if (ReturnMBB.pred_empty())
33
44 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
55 ; CHECK-LABEL: t1:
6 ; CHECK: cmp r2, #7
7 ; CHECK: cmpne r2, #1
6 ; CHECK: cmp r2, #1
7 ; CHECK: cmpne r2, #7
88 switch i32 %c, label %cond_next [
99 i32 1, label %cond_true
1010 i32 7, label %cond_true
193193 %18 = load i32, i32* %mb_type, align 4
194194 switch i32 %18, label %for.inc503 [
195195 i32 9, label %if.then475
196 i32 11, label %if.then475
196 i32 10, label %if.then475
197197 i32 13, label %if.then475
198198 i32 14, label %if.then475
199199 ]
1616 ; CHECK: BB#0: derived from LLVM BB %entry
1717 ; CHECK: Successors according to CFG: BB#2(64) BB#4(14)
1818 ; CHECK: BB#4: derived from LLVM BB %entry
19 ; CHECK: Successors according to CFG: BB#1(4) BB#5(10)
19 ; CHECK: Successors according to CFG: BB#1(10) BB#5(4)
2020 ; CHECK: BB#5: derived from LLVM BB %entry
21 ; CHECK: Successors according to CFG: BB#1(10) BB#3(7)
21 ; CHECK: Successors according to CFG: BB#1(4) BB#3(7)
2222
2323 sw.bb:
2424 br label %return
None ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
0 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
11 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
22 target triple = "powerpc64-unknown-linux-gnu"
33
4444 ; CHECK: blr
4545 }
4646
47
48 @.str0 = private unnamed_addr constant [2 x i8] c"a\00"
49 @.str1 = private unnamed_addr constant [2 x i8] c"b\00"
50 @.str2 = private unnamed_addr constant [2 x i8] c"c\00"
51 @.str3 = private unnamed_addr constant [2 x i8] c"d\00"
52 @.str4 = private unnamed_addr constant [2 x i8] c"e\00"
53 define i8* @dont_assert(i32 %x) {
54 ; LLVM would assert due to moving an early return into the jump table block and
55 ; removing one of its predecessors despite that block ending with an indirect
56 ; branch.
57 entry:
58 switch i32 %x, label %sw.epilog [
59 i32 1, label %return
60 i32 2, label %sw.bb1
61 i32 3, label %sw.bb2
62 i32 4, label %sw.bb3
63 i32 255, label %sw.bb4
64 ]
65 sw.bb1: br label %return
66 sw.bb2: br label %return
67 sw.bb3: br label %return
68 sw.bb4: br label %return
69 sw.epilog: br label %return
70 return:
71 %retval.0 = phi i8* [ null, %sw.epilog ],
72 [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str4, i64 0, i64 0), %sw.bb4 ],
73 [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
74 [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
75 [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
76 [ getelementptr inbounds ([2 x i8], [2 x i8]* @.str0, i64 0, i64 0), %entry ]
77 ret i8* %retval.0
78 }
79
8047 attributes #0 = { nounwind }
None ; RUN: llc -mcpu=pwr7 -code-model=medium <%s | FileCheck %s
1 ; RUN: llc -mcpu=pwr7 -code-model=large <%s | FileCheck %s
0 ; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
1 ; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
22
33 ; Test correct code generation for medium and large code model
44 ; for loading the address of a jump table from the TOC.
11 ; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM %s
22 ; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
33 ; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE %s
4
5 ; Run jump table test separately since jump tables aren't generated at -O0.
6 ; RUN: llc -mcpu=pwr7 -code-model=medium -filetype=obj -fast-isel=false %s -o - | \
7 ; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM-JT %s
8 ; RUN: llc -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
9 ; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE-JT %s
104
115 ; FIXME: When asm-parse is available, could make this an assembly test.
126
9791 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
9892 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM4]]
9993
100 @ti = common global i32 0, align 4
101
102 define signext i32 @test_tentative() nounwind {
103 entry:
104 %0 = load i32, i32* @ti, align 4
105 %inc = add nsw i32 %0, 1
106 store i32 %inc, i32* @ti, align 4
107 ret i32 %0
108 }
109
110 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
111 ; accessing tentatively declared variable ti.
112 ;
113 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
114 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
115 ;
116 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
117 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
118
119 define i8* @test_fnaddr() nounwind {
120 entry:
121 %func = alloca i32 (i32)*, align 8
122 store i32 (i32)* @foo, i32 (i32)** %func, align 8
123 %0 = load i32 (i32)*, i32 (i32)** %func, align 8
124 %1 = bitcast i32 (i32)* %0 to i8*
125 ret i8* %1
126 }
127
128 declare signext i32 @foo(i32 signext)
129
130 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
131 ; accessing function address foo.
132 ;
133 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
134 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
135 ;
136 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
137 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
138
139
14094 define signext i32 @test_jump_table(i32 signext %i) nounwind {
14195 entry:
14296 %i.addr = alloca i32, align 4
184138 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
185139 ; accessing a jump table address.
186140 ;
187 ; MEDIUM-JT: Relocations [
188 ; MEDIUM-JT: Section ({{.*}}) .rela.text {
189 ; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
190 ; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
141 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
142 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
191143 ;
192 ; LARGE-JT: Relocations [
193 ; LARGE-JT: Section ({{.*}}) .rela.text {
194 ; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
195 ; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
144 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
145 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
146
147 @ti = common global i32 0, align 4
148
149 define signext i32 @test_tentative() nounwind {
150 entry:
151 %0 = load i32, i32* @ti, align 4
152 %inc = add nsw i32 %0, 1
153 store i32 %inc, i32* @ti, align 4
154 ret i32 %0
155 }
156
157 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
158 ; accessing tentatively declared variable ti.
159 ;
160 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
161 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
162 ;
163 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
164 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
165
166 define i8* @test_fnaddr() nounwind {
167 entry:
168 %func = alloca i32 (i32)*, align 8
169 store i32 (i32)* @foo, i32 (i32)** %func, align 8
170 %0 = load i32 (i32)*, i32 (i32)** %func, align 8
171 %1 = bitcast i32 (i32)* %0 to i8*
172 ret i8* %1
173 }
174
175 declare signext i32 @foo(i32 signext)
176
177 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
178 ; accessing function address foo.
179 ;
180 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
181 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
182 ;
183 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
184 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
5454 ]
5555
5656 bb: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
57 call void @_Z3bari( i32 0 )
5857 br label %bb1
5958
6059 bb1: ; preds = %bb, %entry
61 call void @_Z3bari( i32 1 )
6260 br label %bb2
6361
6462 bb2: ; preds = %bb1, %entry
65 call void @_Z3bari( i32 2 )
63 call void @_Z3bari( i32 1 )
6664 br label %bb11
6765
6866 bb3: ; preds = %entry
139139
140140 ; The balanced binary switch here would start with a comparison against 39, but
141141 ; it is currently starting with 29 because of the density-sum heuristic.
142 ; CHECK: cmpl $39
142 ; CHECK: cmpl $29
143143 ; CHECK: jg
144144 ; CHECK: cmpl $10
145 ; CHECK: je
145 ; CHECK: jne
146 ; CHECK: cmpl $49
147 ; CHECK: jg
148 ; CHECK: cmpl $30
149 ; CHECK: jne
146150 ; CHECK: cmpl $20
147151 ; CHECK: jne
148 ; CHECK: cmpl $40
149 ; CHECK: je
150152 ; CHECK: cmpl $50
151153 ; CHECK: jne
152 ; CHECK: cmpl $30
154 ; CHECK: cmpl $40
153155 ; CHECK: jne
154156 ; CHECK: cmpl $60
155157 ; CHECK: jne
+0
-306
test/CodeGen/X86/switch.ll less more
None ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
1 ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 | FileCheck --check-prefix=NOOPT %s
2
3 declare void @g(i32)
4
5 define void @basic(i32 %x) {
6 entry:
7 switch i32 %x, label %return [
8 i32 3, label %bb0
9 i32 1, label %bb1
10 i32 4, label %bb1
11 i32 5, label %bb0
12 ]
13 bb0: tail call void @g(i32 0) br label %return
14 bb1: tail call void @g(i32 1) br label %return
15 return: ret void
16
17 ; Should be lowered as straight compares in -O0 mode.
18 ; NOOPT-LABEL: basic
19 ; NOOPT: subl $3, %eax
20 ; NOOPT: je
21 ; NOOPT: subl $1, %eax
22 ; NOOPT: je
23 ; NOOPT: subl $4, %eax
24 ; NOOPT: je
25 ; NOOPT: subl $5, %eax
26 ; NOOPT: je
27
28 ; Jump table otherwise.
29 ; CHECK-LABEL: basic
30 ; CHECK: decl
31 ; CHECK: cmpl $4
32 ; CHECK: ja
33 ; CHECK: jmpq *.LJTI
34 }
35
36
37 define void @simple_ranges(i32 %x) {
38 entry:
39 switch i32 %x, label %return [
40 i32 0, label %bb0
41 i32 1, label %bb0
42 i32 2, label %bb0
43 i32 3, label %bb0
44 i32 100, label %bb1
45 i32 101, label %bb1
46 i32 102, label %bb1
47 i32 103, label %bb1
48 ]
49 bb0: tail call void @g(i32 0) br label %return
50 bb1: tail call void @g(i32 1) br label %return
51 return: ret void
52
53 ; Should be lowered to two range checks.
54 ; CHECK-LABEL: simple_ranges
55 ; CHECK: leal -100
56 ; CHECK: cmpl $4
57 ; CHECK: jae
58 ; CHECK: cmpl $3
59 ; CHECK: ja
60 }
61
62
63 define void @jt_is_better(i32 %x) {
64 entry:
65 switch i32 %x, label %return [
66 i32 0, label %bb0
67 i32 2, label %bb0
68 i32 4, label %bb0
69 i32 1, label %bb1
70 i32 3, label %bb1
71 i32 5, label %bb1
72
73 i32 6, label %bb2
74 i32 7, label %bb3
75 i32 8, label %bb4
76 ]
77 bb0: tail call void @g(i32 0) br label %return
78 bb1: tail call void @g(i32 1) br label %return
79 bb2: tail call void @g(i32 2) br label %return
80 bb3: tail call void @g(i32 3) br label %return
81 bb4: tail call void @g(i32 4) br label %return
82 return: ret void
83
84 ; Cases 0-5 could be lowered with two bit tests,
85 ; but with 6-8, the whole switch is suitable for a jump table.
86 ; CHECK-LABEL: jt_is_better
87 ; CHECK: cmpl $8
88 ; CHECK: jbe
89 ; CHECK: jmpq *.LJTI
90 }
91
92
93 define void @bt_is_better(i32 %x) {
94 entry:
95 switch i32 %x, label %return [
96 i32 0, label %bb0
97 i32 3, label %bb0
98 i32 6, label %bb0
99 i32 1, label %bb1
100 i32 4, label %bb1
101 i32 7, label %bb1
102 i32 2, label %bb2
103 i32 5, label %bb2
104 i32 8, label %bb2
105
106 ]
107 bb0: tail call void @g(i32 0) br label %return
108 bb1: tail call void @g(i32 1) br label %return
109 bb2: tail call void @g(i32 2) br label %return
110 return: ret void
111
112 ; This could be lowered as a jump table, but bit tests is more efficient.
113 ; CHECK-LABEL: bt_is_better
114 ; 73 = 2^0 + 2^3 + 2^6
115 ; CHECK: movl $73
116 ; CHECK: btl
117 ; 146 = 2^1 + 2^4 + 2^7
118 ; CHECK: movl $146
119 ; CHECK: btl
120 ; 292 = 2^2 + 2^5 + 2^8
121 ; CHECK: movl $292
122 ; CHECK: btl
123 }
124
125
126 define void @optimal_pivot1(i32 %x) {
127 entry:
128 switch i32 %x, label %return [
129 i32 100, label %bb0
130 i32 200, label %bb1
131 i32 300, label %bb0
132 i32 400, label %bb1
133 i32 500, label %bb0
134 i32 600, label %bb1
135
136 ]
137 bb0: tail call void @g(i32 0) br label %return
138 bb1: tail call void @g(i32 1) br label %return
139 return: ret void
140
141 ; Should pivot around 400 for two subtrees of equal size.
142 ; CHECK-LABEL: optimal_pivot1
143 ; CHECK-NOT: cmpl
144 ; CHECK: cmpl $399
145 }
146
147
148 define void @optimal_pivot2(i32 %x) {
149 entry:
150 switch i32 %x, label %return [
151 i32 100, label %bb0 i32 101, label %bb1 i32 102, label %bb2 i32 103, label %bb3
152 i32 200, label %bb0 i32 201, label %bb1 i32 202, label %bb2 i32 203, label %bb3
153 i32 300, label %bb0 i32 301, label %bb1 i32 302, label %bb2 i32 303, label %bb3
154 i32 400, label %bb0 i32 401, label %bb1 i32 402, label %bb2 i32 403, label %bb3
155
156 ]
157 bb0: tail call void @g(i32 0) br label %return
158 bb1: tail call void @g(i32 1) br label %return
159 bb2: tail call void @g(i32 2) br label %return
160 bb3: tail call void @g(i32 3) br label %return
161 return: ret void
162
163 ; Should pivot around 300 for two subtrees with two jump tables each.
164 ; CHECK-LABEL: optimal_pivot2
165 ; CHECK-NOT: cmpl
166 ; CHECK: cmpl $299
167 ; CHECK: jmpq *.LJTI
168 ; CHECK: jmpq *.LJTI
169 ; CHECK: jmpq *.LJTI
170 ; CHECK: jmpq *.LJTI
171 }
172
173
174 define void @optimal_jump_table1(i32 %x) {
175 entry:
176 switch i32 %x, label %return [
177 i32 0, label %bb0
178 i32 5, label %bb1
179 i32 6, label %bb2
180 i32 12, label %bb3
181 i32 13, label %bb4
182 i32 15, label %bb5
183 ]
184 bb0: tail call void @g(i32 0) br label %return
185 bb1: tail call void @g(i32 1) br label %return
186 bb2: tail call void @g(i32 2) br label %return
187 bb3: tail call void @g(i32 3) br label %return
188 bb4: tail call void @g(i32 4) br label %return
189 bb5: tail call void @g(i32 5) br label %return
190 return: ret void
191
192 ; Splitting in the largest gap (between 6 and 12) would yield suboptimal result.
193 ; Expecting a jump table from 5 to 15.
194 ; CHECK-LABEL: optimal_jump_table1
195 ; CHECK: leal -5
196 ; CHECK: cmpl $10
197 ; CHECK: jmpq *.LJTI
198 }
199
200
201 define void @optimal_jump_table2(i32 %x) {
202 entry:
203 switch i32 %x, label %return [
204 i32 0, label %bb0
205 i32 1, label %bb1
206 i32 2, label %bb2
207 i32 9, label %bb3
208 i32 14, label %bb4
209 i32 15, label %bb5
210 ]
211 bb0: tail call void @g(i32 0) br label %return
212 bb1: tail call void @g(i32 1) br label %return
213 bb2: tail call void @g(i32 2) br label %return
214 bb3: tail call void @g(i32 3) br label %return
215 bb4: tail call void @g(i32 4) br label %return
216 bb5: tail call void @g(i32 5) br label %return
217 return: ret void
218
219 ; Partitioning the cases to the minimum number of dense sets is not good enough.
220 ; This can be partitioned as {0,1,2,9},{14,15} or {0,1,2},{9,14,15}. The former
221 ; should be preferred. Expecting a table from 0-9.
222 ; CHECK-LABEL: optimal_jump_table2
223 ; CHECK: cmpl $9
224 ; CHECK: jmpq *.LJTI
225 }
226
227
228 define void @optimal_jump_table3(i32 %x) {
229 entry:
230 switch i32 %x, label %return [
231 i32 1, label %bb0
232 i32 2, label %bb1
233 i32 3, label %bb2
234 i32 10, label %bb3
235 i32 13, label %bb0
236 i32 14, label %bb1
237 i32 15, label %bb2
238 i32 20, label %bb3
239 i32 25, label %bb4
240 ]
241 bb0: tail call void @g(i32 0) br label %return
242 bb1: tail call void @g(i32 1) br label %return
243 bb2: tail call void @g(i32 2) br label %return
244 bb3: tail call void @g(i32 3) br label %return
245 bb4: tail call void @g(i32 4) br label %return
246 return: ret void
247
248 ; Splitting to maximize left-right density sum and gap size would split this
249 ; between 3 and 10, and then between 20 and 25. It's better to build a table
250 ; from 1-20.
251 ; CHECK-LABEL: optimal_jump_table3
252 ; CHECK: leal -1
253 ; CHECK: cmpl $19
254 ; CHECK: jmpq *.LJTI
255 }
256
257 %struct.S = type { %struct.S*, i32 }
258 define void @phi_node_trouble(%struct.S* %s) {
259 entry:
260 br label %header
261 header:
262 %ptr = phi %struct.S* [ %s, %entry ], [ %next, %loop ]
263 %bool = icmp eq %struct.S* %ptr, null
264 br i1 %bool, label %exit, label %loop
265 loop:
266 %nextptr = getelementptr inbounds %struct.S, %struct.S* %ptr, i64 0, i32 0
267 %next = load %struct.S*, %struct.S** %nextptr
268 %xptr = getelementptr inbounds %struct.S, %struct.S* %next, i64 0, i32 1
269 %x = load i32, i32* %xptr
270 switch i32 %x, label %exit [
271 i32 4, label %header
272 i32 36, label %exit2
273 i32 69, label %exit2
274 i32 25, label %exit2
275 ]
276 exit:
277 ret void
278 exit2:
279 ret void
280
281 ; This will be lowered to a comparison with 4 and then bit tests. Make sure
282 ; that the phi node in %header gets a value from the comparison block.
283 ; CHECK-LABEL: phi_node_trouble
284 ; CHECK: movq (%[[REG1:[a-z]+]]), %[[REG1]]
285 ; CHECK: movl 8(%[[REG1]]), %[[REG2:[a-z]+]]
286 ; CHECK: cmpl $4, %[[REG2]]
287 }
288
289
290 define void @default_only(i32 %x) {
291 entry:
292 br label %sw
293 return:
294 ret void
295 sw:
296 switch i32 %x, label %return [
297 ]
298
299 ; Branch directly to the default.
300 ; (In optimized builds the switch is removed earlier.)
301 ; NOOPT-LABEL: default_only
302 ; NOOPT: .[[L:[A-Z0-9_]+]]:
303 ; NOOPT-NEXT: retq
304 ; NOOPT: jmp .[[L]]
305 }
None ;; RUN: llc -verify-machineinstrs \
0 ;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 \
11 ;; RUN: -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \
22 ;; RUN: llvm-readobj -t | FileCheck -check-prefix=ARM %s
33
4 ;; RUN: llc -verify-machineinstrs \
4 ;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 \
55 ;; RUN: -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \
66 ;; RUN: llvm-readobj -t | FileCheck -check-prefix=TMB %s
77
1010
1111 define void @foo(i32* %ptr) nounwind ssp {
1212 %tmp = load i32, i32* %ptr, align 4
13 switch i32 %tmp, label %exit [
14 i32 0, label %bb0
15 i32 1, label %bb1
16 i32 2, label %bb2
17 i32 3, label %bb3
13 switch i32 %tmp, label %default [
14 i32 11, label %bb0
15 i32 10, label %bb1
16 i32 8, label %bb2
17 i32 4, label %bb3
18 i32 2, label %bb4
19 i32 6, label %bb5
20 i32 9, label %bb6
21 i32 15, label %bb7
22 i32 1, label %bb8
23 i32 3, label %bb9
24 i32 5, label %bb10
25 i32 30, label %bb11
26 i32 31, label %bb12
27 i32 13, label %bb13
28 i32 14, label %bb14
29 i32 20, label %bb15
30 i32 19, label %bb16
31 i32 17, label %bb17
32 i32 18, label %bb18
33 i32 21, label %bb19
34 i32 22, label %bb20
35 i32 16, label %bb21
36 i32 24, label %bb22
37 i32 25, label %bb23
38 i32 26, label %bb24
39 i32 27, label %bb25
40 i32 28, label %bb26
41 i32 23, label %bb27
42 i32 12, label %bb28
1843 ]
44
45 default:
46 br label %exit
1947 bb0:
20 store i32 0, i32* %ptr, align 4
2148 br label %exit
2249 bb1:
23 store i32 1, i32* %ptr, align 4
2450 br label %exit
2551 bb2:
26 store i32 2, i32* %ptr, align 4
2752 br label %exit
2853 bb3:
29 store i32 3, i32* %ptr, align 4
3054 br label %exit
55 bb4:
56 br label %exit
57 bb5:
58 br label %exit
59 bb6:
60 br label %exit
61 bb7:
62 br label %exit
63 bb8:
64 br label %exit
65 bb9:
66 br label %exit
67 bb10:
68 br label %exit
69 bb11:
70 br label %exit
71 bb12:
72 br label %exit
73 bb13:
74 br label %exit
75 bb14:
76 br label %exit
77 bb15:
78 br label %exit
79 bb16:
80 br label %exit
81 bb17:
82 br label %exit
83 bb18:
84 br label %exit
85 bb19:
86 br label %exit
87 bb20:
88 br label %exit
89 bb21:
90 br label %exit
91 bb22:
92 br label %exit
93 bb23:
94 br label %exit
95 bb24:
96 br label %exit
97 bb25:
98 br label %exit
99 bb26:
100 br label %exit
101 bb27:
102 br label %exit
103 bb28:
104 br label %exit
105
106
31107 exit:
108
32109 ret void
33110 }
34111