llvm.org GIT mirror llvm / cc987d9
Switch lowering: extract jump tables and bit tests before building binary tree (PR22262) This is a major rewrite of the SelectionDAG switch lowering. The previous code would lower switches as a binary tre, discovering clusters of cases suitable for lowering by jump tables or bit tests as it went along. To increase the likelihood of finding jump tables, the binary tree pivot was selected to maximize case density on both sides of the pivot. By not selecting the pivot in the middle, the binary trees would not always be balanced, leading to performance problems in the generated code. This patch rewrites the lowering to search for clusters of cases suitable for jump tables or bit tests first, and then builds the binary tree around those clusters. This way, the binary tree will always be balanced. This has the added benefit of decoupling the different aspects of the lowering: tree building and jump table or bit tests finding are now easier to tweak separately. For example, this will enable us to balance the tree based on profile info in the future. The algorithm for finding jump tables is O(n^2), whereas the previous algorithm was O(n log n) for common cases, and quadratic only in the worst-case. This doesn't seem to be major problem in practice, e.g. compiling a file consisting of a 10k-case switch was only 30% slower, and such large switches should be rare in practice. Compiling e.g. gcc.c showed no compile-time difference. If this does turn out to be a problem, we could limit the search space of the algorithm. This commit also disables all optimizations during switch lowering in -O0. Differential Revision: http://reviews.llvm.org/D8649 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235101 91177308-0d34-0410-b5e6-96231b3b80d8 Hans Wennborg 5 years ago
12 changed file(s) with 1275 addition(s) and 916 deletion(s). Raw diff Collapse all Expand all
19271927
19281928 // Avoid emitting unnecessary branches to the next block.
19291929 if (MBB != NextBlock(SwitchBB))
1930 BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo,
1930 BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrRange,
19311931 DAG.getBasicBlock(MBB));
19321932
19331933 DAG.setRoot(BrRange);
21002100 return VReg;
21012101 }
21022102
2103 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
2104 /// small case ranges).
2105 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
2106 CaseRecVector& WorkList,
2107 const Value* SV,
2108 MachineBasicBlock *Default,
2109 MachineBasicBlock *SwitchBB) {
2110 // Size is the number of Cases represented by this range.
2111 size_t Size = CR.Range.second - CR.Range.first;
2112 if (Size > 3)
2113 return false;
2114
2115 // Get the MachineFunction which holds the current MBB. This is used when
2116 // inserting any additional MBBs necessary to represent the switch.
2117 MachineFunction *CurMF = FuncInfo.MF;
2118
2119 // Figure out which block is immediately after the current one.
2120 MachineBasicBlock *NextMBB = nullptr;
2121 MachineFunction::iterator BBI = CR.CaseBB;
2122 if (++BBI != FuncInfo.MF->end())
2123 NextMBB = BBI;
2124
2125 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2126 // If any two of the cases has the same destination, and if one value
2127 // is the same as the other, but has one bit unset that the other has set,
2128 // use bit manipulation to do two compares at once. For example:
2129 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
2130 // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
2131 // TODO: Handle cases where CR.CaseBB != SwitchBB.
2132 if (Size == 2 && CR.CaseBB == SwitchBB) {
2133 Case &Small = *CR.Range.first;
2134 Case &Big = *(CR.Range.second-1);
2135
2136 if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
2137 const APInt& SmallValue = Small.Low->getValue();
2138 const APInt& BigValue = Big.Low->getValue();
2139
2140 // Check that there is only one bit different.
2141 if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
2142 (SmallValue | BigValue) == BigValue) {
2143 // Isolate the common bit.
2144 APInt CommonBit = BigValue & ~SmallValue;
2145 assert((SmallValue | CommonBit) == BigValue &&
2146 CommonBit.countPopulation() == 1 && "Not a common bit?");
2147
2148 SDValue CondLHS = getValue(SV);
2149 EVT VT = CondLHS.getValueType();
2150 SDLoc DL = getCurSDLoc();
2151
2152 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
2153 DAG.getConstant(CommonBit, VT));
2154 SDValue Cond = DAG.getSetCC(DL, MVT::i1,
2155 Or, DAG.getConstant(BigValue, VT),
2156 ISD::SETEQ);
2157
2158 // Update successor info.
2159 // Both Small and Big will jump to Small.BB, so we sum up the weights.
2160 addSuccessorWithWeight(SwitchBB, Small.BB,
2161 Small.ExtraWeight + Big.ExtraWeight);
2162 addSuccessorWithWeight(SwitchBB, Default,
2163 // The default destination is the first successor in IR.
2164 BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0);
2165
2166 // Insert the true branch.
2167 SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
2168 getControlRoot(), Cond,
2169 DAG.getBasicBlock(Small.BB));
2170
2171 // Insert the false branch.
2172 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
2173 DAG.getBasicBlock(Default));
2174
2175 DAG.setRoot(BrCond);
2176 return true;
2177 }
2178 }
2179 }
2180
2181 // Order cases by weight so the most likely case will be checked first.
2182 uint32_t UnhandledWeights = 0;
2183 if (BPI) {
2184 for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
2185 uint32_t IWeight = I->ExtraWeight;
2186 UnhandledWeights += IWeight;
2187 for (CaseItr J = CR.Range.first; J < I; ++J) {
2188 uint32_t JWeight = J->ExtraWeight;
2189 if (IWeight > JWeight)
2190 std::swap(*I, *J);
2191 }
2192 }
2193 }
2194 // Rearrange the case blocks so that the last one falls through if possible.
2195 Case &BackCase = *(CR.Range.second-1);
2196 if (Size > 1 && NextMBB && Default != NextMBB && BackCase.BB != NextMBB) {
2197 // The last case block won't fall through into 'NextMBB' if we emit the
2198 // branches in this order. See if rearranging a case value would help.
2199 // We start at the bottom as it's the case with the least weight.
2200 for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I)
2201 if (I->BB == NextMBB) {
2202 std::swap(*I, BackCase);
2203 break;
2204 }
2205 }
2206
2207 // Create a CaseBlock record representing a conditional branch to
2208 // the Case's target mbb if the value being switched on SV is equal
2209 // to C.
2210 MachineBasicBlock *CurBlock = CR.CaseBB;
2211 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
2212 MachineBasicBlock *FallThrough;
2213 if (I != E-1) {
2214 FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
2215 CurMF->insert(BBI, FallThrough);
2216
2217 // Put SV in a virtual register to make it available from the new blocks.
2218 ExportFromCurrentBlock(SV);
2219 } else {
2220 // If the last case doesn't match, go to the default block.
2221 FallThrough = Default;
2222 }
2223
2224 const Value *RHS, *LHS, *MHS;
2225 ISD::CondCode CC;
2226 if (I->High == I->Low) {
2227 // This is just small small case range :) containing exactly 1 case
2228 CC = ISD::SETEQ;
2229 LHS = SV; RHS = I->High; MHS = nullptr;
2230 } else {
2231 CC = ISD::SETLE;
2232 LHS = I->Low; MHS = SV; RHS = I->High;
2233 }
2234
2235 // The false weight should be sum of all un-handled cases.
2236 UnhandledWeights -= I->ExtraWeight;
2237 CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
2238 /* me */ CurBlock,
2239 /* trueweight */ I->ExtraWeight,
2240 /* falseweight */ UnhandledWeights);
2241
2242 // If emitting the first comparison, just call visitSwitchCase to emit the
2243 // code into the current block. Otherwise, push the CaseBlock onto the
2244 // vector to be later processed by SDISel, and insert the node's MBB
2245 // before the next MBB.
2246 if (CurBlock == SwitchBB)
2247 visitSwitchCase(CB, SwitchBB);
2248 else
2249 SwitchCases.push_back(CB);
2250
2251 CurBlock = FallThrough;
2252 }
2253
2254 return true;
2255 }
2256
2257 static inline bool areJTsAllowed(const TargetLowering &TLI) {
2258 return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
2259 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
2260 }
2261
2262 static APInt ComputeRange(const APInt &First, const APInt &Last) {
2263 uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
2264 APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
2265 return (LastExt - FirstExt + 1ULL);
2266 }
2267
2268 /// handleJTSwitchCase - Emit jumptable for current switch case range
2269 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
2270 CaseRecVector &WorkList,
2271 const Value *SV,
2272 MachineBasicBlock *Default,
2273 MachineBasicBlock *SwitchBB) {
2274 Case& FrontCase = *CR.Range.first;
2275 Case& BackCase = *(CR.Range.second-1);
2276
2277 const APInt &First = FrontCase.Low->getValue();
2278 const APInt &Last = BackCase.High->getValue();
2279
2280 APInt TSize(First.getBitWidth(), 0);
2281 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
2282 TSize += I->size();
2283
2284 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2285 if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
2286 return false;
2287
2288 APInt Range = ComputeRange(First, Last);
2289 // The density is TSize / Range. Require at least 40%.
2290 // It should not be possible for IntTSize to saturate for sane code, but make
2291 // sure we handle Range saturation correctly.
2292 uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10);
2293 uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10);
2294 if (IntTSize * 10 < IntRange * 4)
2295 return false;
2296
2297 DEBUG(dbgs() << "Lowering jump table\n"
2298 << "First entry: " << First << ". Last entry: " << Last << '\n'
2299 << "Range: " << Range << ". Size: " << TSize << ".\n\n");
2300
2301 // Get the MachineFunction which holds the current MBB. This is used when
2302 // inserting any additional MBBs necessary to represent the switch.
2303 MachineFunction *CurMF = FuncInfo.MF;
2304
2305 // Figure out which block is immediately after the current one.
2306 MachineFunction::iterator BBI = CR.CaseBB;
2307 ++BBI;
2308
2309 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2310
2311 // Create a new basic block to hold the code for loading the address
2312 // of the jump table, and jumping to it. Update successor information;
2313 // we will either branch to the default case for the switch, or the jump
2314 // table.
2315 MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2316 CurMF->insert(BBI, JumpTableBB);
2317
2318 addSuccessorWithWeight(CR.CaseBB, Default);
2319 addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
2320
2321 // Build a vector of destination BBs, corresponding to each target
2322 // of the jump table. If the value of the jump table slot corresponds to
2323 // a case statement, push the case's BB onto the vector, otherwise, push
2324 // the default BB.
2325 std::vector DestBBs;
2326 APInt TEI = First;
2327 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
2328 const APInt &Low = I->Low->getValue();
2329 const APInt &High = I->High->getValue();
2330
2331 if (Low.sle(TEI) && TEI.sle(High)) {
2332 DestBBs.push_back(I->BB);
2333 if (TEI==High)
2334 ++I;
2335 } else {
2336 DestBBs.push_back(Default);
2337 }
2338 }
2339
2340 // Calculate weight for each unique destination in CR.
2341 DenseMap DestWeights;
2342 if (FuncInfo.BPI) {
2343 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
2344 DestWeights[I->BB] += I->ExtraWeight;
2345 }
2346
2347 // Update successor info. Add one edge to each unique successor.
2348 BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
2349 for (MachineBasicBlock *DestBB : DestBBs) {
2350 if (!SuccsHandled[DestBB->getNumber()]) {
2351 SuccsHandled[DestBB->getNumber()] = true;
2352 auto I = DestWeights.find(DestBB);
2353 addSuccessorWithWeight(JumpTableBB, DestBB,
2354 I != DestWeights.end() ? I->second : 0);
2355 }
2356 }
2357
2358 // Create a jump table index for this jump table.
2359 unsigned JTEncoding = TLI.getJumpTableEncoding();
2360 unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
2361 ->createJumpTableIndex(DestBBs);
2362
2363 // Set the jump table information so that we can codegen it as a second
2364 // MachineBasicBlock
2365 JumpTable JT(-1U, JTI, JumpTableBB, Default);
2366 JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
2367 if (CR.CaseBB == SwitchBB)
2368 visitJumpTableHeader(JT, JTH, SwitchBB);
2369
2370 JTCases.push_back(JumpTableBlock(JTH, JT));
2371 return true;
2372 }
2373
2374 /// handleBTSplitSwitchCase - emit comparison and split binary search tree into
2375 /// 2 subtrees.
2376 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
2377 CaseRecVector& WorkList,
2378 const Value* SV,
2379 MachineBasicBlock* SwitchBB) {
2380 Case& FrontCase = *CR.Range.first;
2381 Case& BackCase = *(CR.Range.second-1);
2382
2383 // Size is the number of Cases represented by this range.
2384 unsigned Size = CR.Range.second - CR.Range.first;
2385
2386 const APInt &First = FrontCase.Low->getValue();
2387 const APInt &Last = BackCase.High->getValue();
2388 double FMetric = 0;
2389 CaseItr Pivot = CR.Range.first + Size/2;
2390
2391 // Select optimal pivot, maximizing sum density of LHS and RHS. This will
2392 // (heuristically) allow us to emit JumpTable's later.
2393 APInt TSize(First.getBitWidth(), 0);
2394 for (CaseItr I = CR.Range.first, E = CR.Range.second;
2395 I!=E; ++I)
2396 TSize += I->size();
2397
2398 APInt LSize = FrontCase.size();
2399 APInt RSize = TSize-LSize;
2400 DEBUG(dbgs() << "Selecting best pivot: \n"
2401 << "First: " << First << ", Last: " << Last <<'\n'
2402 << "LSize: " << LSize << ", RSize: " << RSize << '\n');
2403 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2404 for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
2405 J!=E; ++I, ++J) {
2406 const APInt &LEnd = I->High->getValue();
2407 const APInt &RBegin = J->Low->getValue();
2408 APInt Range = ComputeRange(LEnd, RBegin);
2409 assert((Range - 2ULL).isNonNegative() &&
2410 "Invalid case distance");
2411 // Use volatile double here to avoid excess precision issues on some hosts,
2412 // e.g. that use 80-bit X87 registers.
2413 // Only consider the density of sub-ranges that actually have sufficient
2414 // entries to be lowered as a jump table.
2415 volatile double LDensity =
2416 LSize.ult(TLI.getMinimumJumpTableEntries())
2417 ? 0.0
2418 : LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble();
2419 volatile double RDensity =
2420 RSize.ult(TLI.getMinimumJumpTableEntries())
2421 ? 0.0
2422 : RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble();
2423 volatile double Metric = Range.logBase2() * (LDensity + RDensity);
2424 // Should always split in some non-trivial place
2425 DEBUG(dbgs() <<"=>Step\n"
2426 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
2427 << "LDensity: " << LDensity
2428 << ", RDensity: " << RDensity << '\n'
2429 << "Metric: " << Metric << '\n');
2430 if (FMetric < Metric) {
2431 Pivot = J;
2432 FMetric = Metric;
2433 DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
2434 }
2435
2436 LSize += J->size();
2437 RSize -= J->size();
2438 }
2439
2440 if (FMetric == 0 || !areJTsAllowed(TLI))
2441 Pivot = CR.Range.first + Size/2;
2442 splitSwitchCase(CR, Pivot, WorkList, SV, SwitchBB);
2443 return true;
2444 }
2445
2446 void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot,
2447 CaseRecVector &WorkList,
2448 const Value *SV,
2449 MachineBasicBlock *SwitchBB) {
2450 // Get the MachineFunction which holds the current MBB. This is used when
2451 // inserting any additional MBBs necessary to represent the switch.
2452 MachineFunction *CurMF = FuncInfo.MF;
2453
2454 // Figure out which block is immediately after the current one.
2455 MachineFunction::iterator BBI = CR.CaseBB;
2456 ++BBI;
2457
2458 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2459
2460 CaseRange LHSR(CR.Range.first, Pivot);
2461 CaseRange RHSR(Pivot, CR.Range.second);
2462 const ConstantInt *C = Pivot->Low;
2463 MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr;
2464
2465 // We know that we branch to the LHS if the Value being switched on is
2466 // less than the Pivot value, C. We use this to optimize our binary
2467 // tree a bit, by recognizing that if SV is greater than or equal to the
2468 // LHS's Case Value, and that Case Value is exactly one less than the
2469 // Pivot's Value, then we can branch directly to the LHS's Target,
2470 // rather than creating a leaf node for it.
2471 if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE &&
2472 C->getValue() == (CR.GE->getValue() + 1LL)) {
2473 TrueBB = LHSR.first->BB;
2474 } else {
2475 TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2476 CurMF->insert(BBI, TrueBB);
2477 WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
2478
2479 // Put SV in a virtual register to make it available from the new blocks.
2480 ExportFromCurrentBlock(SV);
2481 }
2482
2483 // Similar to the optimization above, if the Value being switched on is
2484 // known to be less than the Constant CR.LT, and the current Case Value
2485 // is CR.LT - 1, then we can branch directly to the target block for
2486 // the current Case Value, rather than emitting a RHS leaf node for it.
2487 if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
2488 RHSR.first->Low->getValue() == (CR.LT->getValue() - 1LL)) {
2489 FalseBB = RHSR.first->BB;
2490 } else {
2491 FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2492 CurMF->insert(BBI, FalseBB);
2493 WorkList.push_back(CaseRec(FalseBB, CR.LT, C, RHSR));
2494
2495 // Put SV in a virtual register to make it available from the new blocks.
2496 ExportFromCurrentBlock(SV);
2497 }
2498
2499 // Create a CaseBlock record representing a conditional branch to
2500 // the LHS node if the value being switched on SV is less than C.
2501 // Otherwise, branch to LHS.
2502 CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB);
2503
2504 if (CR.CaseBB == SwitchBB)
2505 visitSwitchCase(CB, SwitchBB);
2506 else
2507 SwitchCases.push_back(CB);
2508 }
2509
2510 /// handleBitTestsSwitchCase - if current case range has few destination and
2511 /// range span less, than machine word bitwidth, encode case range into series
2512 /// of masks and emit bit tests with these masks.
2513 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
2514 CaseRecVector& WorkList,
2515 const Value* SV,
2516 MachineBasicBlock* Default,
2517 MachineBasicBlock* SwitchBB) {
2518 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2519 EVT PTy = TLI.getPointerTy();
2520 unsigned IntPtrBits = PTy.getSizeInBits();
2521
2522 Case& FrontCase = *CR.Range.first;
2523 Case& BackCase = *(CR.Range.second-1);
2524
2525 // Get the MachineFunction which holds the current MBB. This is used when
2526 // inserting any additional MBBs necessary to represent the switch.
2527 MachineFunction *CurMF = FuncInfo.MF;
2528
2529 // If target does not have legal shift left, do not emit bit tests at all.
2530 if (!TLI.isOperationLegal(ISD::SHL, PTy))
2531 return false;
2532
2533 size_t numCmps = 0;
2534 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
2535 // Single case counts one, case range - two.
2536 numCmps += (I->Low == I->High ? 1 : 2);
2537 }
2538
2539 // Count unique destinations
2540 SmallSet Dests;
2541 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
2542 Dests.insert(I->BB);
2543 if (Dests.size() > 3)
2544 // Don't bother the code below, if there are too much unique destinations
2545 return false;
2546 }
2547 DEBUG(dbgs() << "Total number of unique destinations: "
2548 << Dests.size() << '\n'
2549 << "Total number of comparisons: " << numCmps << '\n');
2550
2551 // Compute span of values.
2552 const APInt& minValue = FrontCase.Low->getValue();
2553 const APInt& maxValue = BackCase.High->getValue();
2554 APInt cmpRange = maxValue - minValue;
2555
2556 DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
2557 << "Low bound: " << minValue << '\n'
2558 << "High bound: " << maxValue << '\n');
2559
2560 if (cmpRange.uge(IntPtrBits) ||
2561 (!(Dests.size() == 1 && numCmps >= 3) &&
2562 !(Dests.size() == 2 && numCmps >= 5) &&
2563 !(Dests.size() >= 3 && numCmps >= 6)))
2564 return false;
2565
2566 DEBUG(dbgs() << "Emitting bit tests\n");
2567 APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
2568
2569 // Optimize the case where all the case values fit in a
2570 // word without having to subtract minValue. In this case,
2571 // we can optimize away the subtraction.
2572 if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
2573 cmpRange = maxValue;
2574 } else {
2575 lowBound = minValue;
2576 }
2577
2578 CaseBitsVector CasesBits;
2579 unsigned i, count = 0;
2580
2581 for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
2582 MachineBasicBlock* Dest = I->BB;
2583 for (i = 0; i < count; ++i)
2584 if (Dest == CasesBits[i].BB)
2585 break;
2586
2587 if (i == count) {
2588 assert((count < 3) && "Too much destinations to test!");
2589 CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/));
2590 count++;
2591 }
2592
2593 const APInt& lowValue = I->Low->getValue();
2594 const APInt& highValue = I->High->getValue();
2595
2596 uint64_t lo = (lowValue - lowBound).getZExtValue();
2597 uint64_t hi = (highValue - lowBound).getZExtValue();
2598 CasesBits[i].ExtraWeight += I->ExtraWeight;
2599
2600 for (uint64_t j = lo; j <= hi; j++) {
2601 CasesBits[i].Mask |= 1ULL << j;
2602 CasesBits[i].Bits++;
2603 }
2604
2605 }
2606 std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
2607
2608 BitTestInfo BTC;
2609
2610 // Figure out which block is immediately after the current one.
2611 MachineFunction::iterator BBI = CR.CaseBB;
2612 ++BBI;
2613
2614 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2615
2616 DEBUG(dbgs() << "Cases:\n");
2617 for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
2618 DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
2619 << ", Bits: " << CasesBits[i].Bits
2620 << ", BB: " << CasesBits[i].BB << '\n');
2621
2622 MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2623 CurMF->insert(BBI, CaseBB);
2624 BTC.push_back(BitTestCase(CasesBits[i].Mask,
2625 CaseBB,
2626 CasesBits[i].BB, CasesBits[i].ExtraWeight));
2627
2628 // Put SV in a virtual register to make it available from the new blocks.
2629 ExportFromCurrentBlock(SV);
2630 }
2631
2632 BitTestBlock BTB(lowBound, cmpRange, SV,
2633 -1U, MVT::Other, (CR.CaseBB == SwitchBB),
2634 CR.CaseBB, Default, std::move(BTC));
2635
2636 if (CR.CaseBB == SwitchBB)
2637 visitBitTestHeader(BTB, SwitchBB);
2638
2639 BitTestCases.push_back(std::move(BTB));
2640
2641 return true;
2642 }
2643
2644 void SelectionDAGBuilder::Clusterify(CaseVector &Cases, const SwitchInst *SI) {
2645 BranchProbabilityInfo *BPI = FuncInfo.BPI;
2646
2647 // Extract cases from the switch and sort them.
2648 typedef std::pair CasePair;
2649 std::vector Sorted;
2650 Sorted.reserve(SI->getNumCases());
2651 for (auto I : SI->cases())
2652 Sorted.push_back(std::make_pair(I.getCaseValue(), I.getSuccessorIndex()));
2653 std::sort(Sorted.begin(), Sorted.end(), [](CasePair a, CasePair b) {
2654 return a.first->getValue().slt(b.first->getValue());
2103 void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
2104 #ifndef NDEBUG
2105 for (const CaseCluster &CC : Clusters)
2106 assert(CC.Low == CC.High && "Input clusters must be single-case");
2107 #endif
2108
2109 std::sort(Clusters.begin(), Clusters.end(),
2110 [](const CaseCluster &a, const CaseCluster &b) {
2111 return a.Low->getValue().slt(b.Low->getValue());
26552112 });
26562113
2657 // Merge adjacent cases with the same destination, build Cases vector.
2658 assert(Cases.empty() && "Cases should be empty before Clusterify;");
2659 Cases.reserve(SI->getNumCases());
2660 MachineBasicBlock *PreviousSucc = nullptr;
2661 for (CasePair &CP : Sorted) {
2662 const ConstantInt *CaseVal = CP.first;
2663 unsigned SuccIndex = CP.second;
2664 MachineBasicBlock *Succ = FuncInfo.MBBMap[SI->getSuccessor(SuccIndex)];
2665 uint32_t Weight = BPI ? BPI->getEdgeWeight(SI->getParent(), SuccIndex) : 0;
2666
2667 if (PreviousSucc == Succ &&
2668 (CaseVal->getValue() - Cases.back().High->getValue()) == 1) {
2114 // Merge adjacent clusters with the same destination.
2115 const unsigned N = Clusters.size();
2116 unsigned DstIndex = 0;
2117 for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
2118 CaseCluster &CC = Clusters[SrcIndex];
2119 const ConstantInt *CaseVal = CC.Low;
2120 MachineBasicBlock *Succ = CC.MBB;
2121
2122 if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
2123 (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
26692124 // If this case has the same successor and is a neighbour, merge it into
26702125 // the previous cluster.
2671 Cases.back().High = CaseVal;
2672 Cases.back().ExtraWeight += Weight;
2126 Clusters[DstIndex - 1].High = CaseVal;
2127 Clusters[DstIndex - 1].Weight += CC.Weight;
26732128 } else {
2674 Cases.push_back(Case(CaseVal, CaseVal, Succ, Weight));
2675 }
2676
2677 PreviousSucc = Succ;
2678 }
2679
2680 DEBUG({
2681 size_t numCmps = 0;
2682 for (auto &I : Cases)
2683 // A range counts double, since it requires two compares.
2684 numCmps += I.Low != I.High ? 2 : 1;
2685
2686 dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
2687 << ". Total compares: " << numCmps << '\n';
2688 });
2129 std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
2130 sizeof(Clusters[SrcIndex]));
2131 }
2132 }
2133 Clusters.resize(DstIndex);
26892134 }
26902135
26912136 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
26992144 for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
27002145 if (BitTestCases[i].Parent == First)
27012146 BitTestCases[i].Parent = Last;
2702 }
2703
2704 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
2705 MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
2706
2707 // Create a vector of Cases, sorted so that we can efficiently create a binary
2708 // search tree from them.
2709 CaseVector Cases;
2710 Clusterify(Cases, &SI);
2711
2712 // Get the default destination MBB.
2713 MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
2714
2715 if (isa(SI.getDefaultDest()->getFirstNonPHIOrDbg()) &&
2716 !Cases.empty()) {
2717 // Replace an unreachable default destination with the most popular case
2718 // destination.
2719 DenseMap Popularity;
2720 unsigned MaxPop = 0;
2721 const BasicBlock *MaxBB = nullptr;
2722 for (auto I : SI.cases()) {
2723 const BasicBlock *BB = I.getCaseSuccessor();
2724 if (++Popularity[BB] > MaxPop) {
2725 MaxPop = Popularity[BB];
2726 MaxBB = BB;
2727 }
2728 }
2729
2730 // Set new default.
2731 assert(MaxPop > 0);
2732 assert(MaxBB);
2733 Default = FuncInfo.MBBMap[MaxBB];
2734
2735 // Remove cases that were pointing to the destination that is now the default.
2736 Cases.erase(std::remove_if(Cases.begin(), Cases.end(),
2737 [&](const Case &C) { return C.BB == Default; }),
2738 Cases.end());
2739 }
2740
2741 // If there is only the default destination, go there directly.
2742 if (Cases.empty()) {
2743 // Update machine-CFG edges.
2744 SwitchMBB->addSuccessor(Default);
2745
2746 // If this is not a fall-through branch, emit the branch.
2747 if (Default != NextBlock(SwitchMBB)) {
2748 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
2749 getControlRoot(), DAG.getBasicBlock(Default)));
2750 }
2751 return;
2752 }
2753
2754 // Get the Value to be switched on.
2755 const Value *SV = SI.getCondition();
2756
2757 // Push the initial CaseRec onto the worklist
2758 CaseRecVector WorkList;
2759 WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr,
2760 CaseRange(Cases.begin(),Cases.end())));
2761
2762 while (!WorkList.empty()) {
2763 // Grab a record representing a case range to process off the worklist
2764 CaseRec CR = WorkList.back();
2765 WorkList.pop_back();
2766
2767 if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
2768 continue;
2769
2770 // If the range has few cases (two or less) emit a series of specific
2771 // tests.
2772 if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
2773 continue;
2774
2775 // If the switch has more than N blocks, and is at least 40% dense, and the
2776 // target supports indirect branches, then emit a jump table rather than
2777 // lowering the switch to a binary tree of conditional branches.
2778 // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
2779 if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
2780 continue;
2781
2782 // Emit binary tree. We need to pick a pivot, and push left and right ranges
2783 // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
2784 handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB);
2785 }
27862147 }
27872148
27882149 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
78167177 HasTailCall = true;
78177178 }
78187179
7180 bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
7181 unsigned *TotalCases, unsigned First,
7182 unsigned Last) {
7183 assert(Last >= First);
7184 assert(TotalCases[Last] >= TotalCases[First]);
7185
7186 APInt LowCase = Clusters[First].Low->getValue();
7187 APInt HighCase = Clusters[Last].High->getValue();
7188 assert(LowCase.getBitWidth() == HighCase.getBitWidth());
7189
7190 // FIXME: A range of consecutive cases has 100% density, but only requires one
7191 // comparison to lower. We should discriminate against such consecutive ranges
7192 // in jump tables.
7193
7194 uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100);
7195 uint64_t Range = Diff + 1;
7196
7197 uint64_t NumCases =
7198 TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
7199
7200 assert(NumCases < UINT64_MAX / 100);
7201 assert(Range >= NumCases);
7202
7203 return NumCases * 100 >= Range * MinJumpTableDensity;
7204 }
7205
7206 static inline bool areJTsAllowed(const TargetLowering &TLI) {
7207 return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
7208 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
7209 }
7210
7211 bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
7212 unsigned First, unsigned Last,
7213 const SwitchInst *SI,
7214 MachineBasicBlock *DefaultMBB,
7215 CaseCluster &JTCluster) {
7216 assert(First <= Last);
7217
7218 uint64_t Weight = 0;
7219 unsigned NumCmps = 0;
7220 std::vector Table;
7221 DenseMap JTWeights;
7222 for (unsigned I = First; I <= Last; ++I) {
7223 assert(Clusters[I].Kind == CC_Range);
7224 Weight += Clusters[I].Weight;
7225 APInt Low = Clusters[I].Low->getValue();
7226 APInt High = Clusters[I].High->getValue();
7227 NumCmps += (Low == High) ? 1 : 2;
7228 if (I != First) {
7229 // Fill the gap between this and the previous cluster.
7230 APInt PreviousHigh = Clusters[I - 1].High->getValue();
7231 assert(PreviousHigh.slt(Low));
7232 uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
7233 for (uint64_t J = 0; J < Gap; J++)
7234 Table.push_back(DefaultMBB);
7235 }
7236 for (APInt X = Low; X.sle(High); ++X)
7237 Table.push_back(Clusters[I].MBB);
7238 JTWeights[Clusters[I].MBB] += Clusters[I].Weight;
7239 }
7240
7241 unsigned NumDests = JTWeights.size();
7242 if (isSuitableForBitTests(NumDests, NumCmps,
7243 Clusters[First].Low->getValue(),
7244 Clusters[Last].High->getValue())) {
7245 // Clusters[First..Last] should be lowered as bit tests instead.
7246 return false;
7247 }
7248
7249 // Create the MBB that will load from and jump through the table.
7250 // Note: We create it here, but it's not inserted into the function yet.
7251 MachineFunction *CurMF = FuncInfo.MF;
7252 MachineBasicBlock *JumpTableMBB =
7253 CurMF->CreateMachineBasicBlock(SI->getParent());
7254
7255 // Add successors. Note: use table order for determinism.
7256 SmallPtrSet Done;
7257 for (MachineBasicBlock *Succ : Table) {
7258 if (Done.count(Succ))
7259 continue;
7260 addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]);
7261 Done.insert(Succ);
7262 }
7263
7264 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7265 unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
7266 ->createJumpTableIndex(Table);
7267
7268 // Set up the jump table info.
7269 JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
7270 JumpTableHeader JTH(Clusters[First].Low->getValue(),
7271 Clusters[Last].High->getValue(), SI->getCondition(),
7272 nullptr, false);
7273 JTCases.push_back(JumpTableBlock(JTH, JT));
7274
7275 JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
7276 JTCases.size() - 1, Weight);
7277 return true;
7278 }
7279
7280 void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
7281 const SwitchInst *SI,
7282 MachineBasicBlock *DefaultMBB) {
7283 #ifndef NDEBUG
7284 // Clusters must be non-empty, sorted, and only contain Range clusters.
7285 assert(!Clusters.empty());
7286 for (CaseCluster &C : Clusters)
7287 assert(C.Kind == CC_Range);
7288 for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
7289 assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
7290 #endif
7291
7292 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7293 if (!areJTsAllowed(TLI))
7294 return;
7295
7296 const int64_t N = Clusters.size();
7297 const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
7298
7299 // Split Clusters into minimum number of dense partitions. The algorithm uses
7300 // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
7301 // for the Case Statement'" (1994), but builds the MinPartitions array in
7302 // reverse order to make it easier to reconstruct the partitions in ascending
7303 // order. In the choice between two optimal partitionings, it picks the one
7304 // which yields more jump tables.
7305
7306 // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
7307 SmallVector MinPartitions(N);
7308 // LastElement[i] is the last element of the partition starting at i.
7309 SmallVector LastElement(N);
7310 // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1].
7311 SmallVector NumTables(N);
7312 // TotalCases[i]: Total nbr of cases in Clusters[0..i].
7313 SmallVector TotalCases(N);
7314
7315 for (unsigned i = 0; i < N; ++i) {
7316 APInt Hi = Clusters[i].High->getValue();
7317 APInt Lo = Clusters[i].Low->getValue();
7318 TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
7319 if (i != 0)
7320 TotalCases[i] += TotalCases[i - 1];
7321 }
7322
7323 // Base case: There is only one way to partition Clusters[N-1].
7324 MinPartitions[N - 1] = 1;
7325 LastElement[N - 1] = N - 1;
7326 assert(MinJumpTableSize > 1);
7327 NumTables[N - 1] = 0;
7328
7329 // Note: loop indexes are signed to avoid underflow.
7330 for (int64_t i = N - 2; i >= 0; i--) {
7331 // Find optimal partitioning of Clusters[i..N-1].
7332 // Baseline: Put Clusters[i] into a partition on its own.
7333 MinPartitions[i] = MinPartitions[i + 1] + 1;
7334 LastElement[i] = i;
7335 NumTables[i] = NumTables[i + 1];
7336
7337 // Search for a solution that results in fewer partitions.
7338 for (int64_t j = N - 1; j > i; j--) {
7339 // Try building a partition from Clusters[i..j].
7340 if (isDense(Clusters, &TotalCases[0], i, j)) {
7341 unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
7342 bool IsTable = j - i + 1 >= MinJumpTableSize;
7343 unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
7344
7345 // If this j leads to fewer partitions, or same number of partitions
7346 // with more lookup tables, it is a better partitioning.
7347 if (NumPartitions < MinPartitions[i] ||
7348 (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) {
7349 MinPartitions[i] = NumPartitions;
7350 LastElement[i] = j;
7351 NumTables[i] = Tables;
7352 }
7353 }
7354 }
7355 }
7356
7357 // Iterate over the partitions, replacing some with jump tables in-place.
7358 unsigned DstIndex = 0;
7359 for (unsigned First = 0, Last; First < N; First = Last + 1) {
7360 Last = LastElement[First];
7361 assert(Last >= First);
7362 assert(DstIndex <= First);
7363 unsigned NumClusters = Last - First + 1;
7364
7365 CaseCluster JTCluster;
7366 if (NumClusters >= MinJumpTableSize &&
7367 buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
7368 Clusters[DstIndex++] = JTCluster;
7369 } else {
7370 for (unsigned I = First; I <= Last; ++I)
7371 std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
7372 }
7373 }
7374 Clusters.resize(DstIndex);
7375 }
7376
7377 bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) {
7378 // FIXME: Using the pointer type doesn't seem ideal.
7379 uint64_t BW = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits();
7380 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
7381 return Range <= BW;
7382 }
7383
7384 bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests,
7385 unsigned NumCmps,
7386 const APInt &Low,
7387 const APInt &High) {
7388 // FIXME: I don't think NumCmps is the correct metric: a single case and a
7389 // range of cases both require only one branch to lower. Just looking at the
7390 // number of clusters and destinations should be enough to decide whether to
7391 // build bit tests.
7392
7393 // To lower a range with bit tests, the range must fit the bitwidth of a
7394 // machine word.
7395 if (!rangeFitsInWord(Low, High))
7396 return false;
7397
7398 // Decide whether it's profitable to lower this range with bit tests. Each
7399 // destination requires a bit test and branch, and there is an overall range
7400 // check branch. For a small number of clusters, separate comparisons might be
7401 // cheaper, and for many destinations, splitting the range might be better.
7402 return (NumDests == 1 && NumCmps >= 3) ||
7403 (NumDests == 2 && NumCmps >= 5) ||
7404 (NumDests == 3 && NumCmps >= 6);
7405 }
7406
7407 bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
7408 unsigned First, unsigned Last,
7409 const SwitchInst *SI,
7410 CaseCluster &BTCluster) {
7411 assert(First <= Last);
7412 if (First == Last)
7413 return false;
7414
7415 BitVector Dests(FuncInfo.MF->getNumBlockIDs());
7416 unsigned NumCmps = 0;
7417 for (int64_t I = First; I <= Last; ++I) {
7418 assert(Clusters[I].Kind == CC_Range);
7419 Dests.set(Clusters[I].MBB->getNumber());
7420 NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
7421 }
7422 unsigned NumDests = Dests.count();
7423
7424 APInt Low = Clusters[First].Low->getValue();
7425 APInt High = Clusters[Last].High->getValue();
7426 assert(Low.slt(High));
7427
7428 if (!isSuitableForBitTests(NumDests, NumCmps, Low, High))
7429 return false;
7430
7431 APInt LowBound;
7432 APInt CmpRange;
7433
7434 const int BitWidth =
7435 DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits();
7436 assert((High - Low + 1).sle(BitWidth) && "Case range must fit in bit mask!");
7437
7438 if (Low.isNonNegative() && High.slt(BitWidth)) {
7439 // Optimize the case where all the case values fit in a
7440 // word without having to subtract minValue. In this case,
7441 // we can optimize away the subtraction.
7442 LowBound = APInt::getNullValue(Low.getBitWidth());
7443 CmpRange = High;
7444 } else {
7445 LowBound = Low;
7446 CmpRange = High - Low;
7447 }
7448
7449 CaseBitsVector CBV;
7450 uint64_t TotalWeight = 0;
7451 for (unsigned i = First; i <= Last; ++i) {
7452 // Find the CaseBits for this destination.
7453 unsigned j;
7454 for (j = 0; j < CBV.size(); ++j)
7455 if (CBV[j].BB == Clusters[i].MBB)
7456 break;
7457 if (j == CBV.size())
7458 CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0));
7459 CaseBits *CB = &CBV[j];
7460
7461 // Update Mask, Bits and ExtraWeight.
7462 uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
7463 uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
7464 for (uint64_t j = Lo; j <= Hi; ++j) {
7465 CB->Mask |= 1ULL << j;
7466 CB->Bits++;
7467 }
7468 CB->ExtraWeight += Clusters[i].Weight;
7469 TotalWeight += Clusters[i].Weight;
7470 }
7471
7472 BitTestInfo BTI;
7473 std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
7474 // FIXME: Sort by weight.
7475 return a.Bits > b.Bits;
7476 });
7477
7478 for (auto &CB : CBV) {
7479 MachineBasicBlock *BitTestBB =
7480 FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
7481 BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight));
7482 }
7483 BitTestCases.push_back(BitTestBlock(LowBound, CmpRange, SI->getCondition(),
7484 -1U, MVT::Other, false, nullptr,
7485 nullptr, std::move(BTI)));
7486
7487 BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
7488 BitTestCases.size() - 1, TotalWeight);
7489 return true;
7490 }
7491
7492 void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
7493 const SwitchInst *SI) {
7494 // Partition Clusters into as few subsets as possible, where each subset has a
7495 // range that fits in a machine word and has <= 3 unique destinations.
7496
7497 #ifndef NDEBUG
7498 // Clusters must be sorted and contain Range or JumpTable clusters.
7499 assert(!Clusters.empty());
7500 assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
7501 for (const CaseCluster &C : Clusters)
7502 assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
7503 for (unsigned i = 1; i < Clusters.size(); ++i)
7504 assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
7505 #endif
7506
7507 // If target does not have legal shift left, do not emit bit tests at all.
7508 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7509 EVT PTy = TLI.getPointerTy();
7510 if (!TLI.isOperationLegal(ISD::SHL, PTy))
7511 return;
7512
7513 int BitWidth = PTy.getSizeInBits();
7514 const int64_t N = Clusters.size();
7515
7516 // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
7517 SmallVector MinPartitions(N);
7518 // LastElement[i] is the last element of the partition starting at i.
7519 SmallVector LastElement(N);
7520
7521 // FIXME: This might not be the best algorithm for finding bit test clusters.
7522
7523 // Base case: There is only one way to partition Clusters[N-1].
7524 MinPartitions[N - 1] = 1;
7525 LastElement[N - 1] = N - 1;
7526
7527 // Note: loop indexes are signed to avoid underflow.
7528 for (int64_t i = N - 2; i >= 0; --i) {
7529 // Find optimal partitioning of Clusters[i..N-1].
7530 // Baseline: Put Clusters[i] into a partition on its own.
7531 MinPartitions[i] = MinPartitions[i + 1] + 1;
7532 LastElement[i] = i;
7533
7534 // Search for a solution that results in fewer partitions.
7535 // Note: the search is limited by BitWidth, reducing time complexity.
7536 for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
7537 // Try building a partition from Clusters[i..j].
7538
7539 // Check the range.
7540 if (!rangeFitsInWord(Clusters[i].Low->getValue(),
7541 Clusters[j].High->getValue()))
7542 continue;
7543
7544 // Check nbr of destinations and cluster types.
7545 // FIXME: This works, but doesn't seem very efficient.
7546 bool RangesOnly = true;
7547 BitVector Dests(FuncInfo.MF->getNumBlockIDs());
7548 for (int64_t k = i; k <= j; k++) {
7549 if (Clusters[k].Kind != CC_Range) {
7550 RangesOnly = false;
7551 break;
7552 }
7553 Dests.set(Clusters[k].MBB->getNumber());
7554 }
7555 if (!RangesOnly || Dests.count() > 3)
7556 break;
7557
7558 // Check if it's a better partition.
7559 unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
7560 if (NumPartitions < MinPartitions[i]) {
7561 // Found a better partition.
7562 MinPartitions[i] = NumPartitions;
7563 LastElement[i] = j;
7564 }
7565 }
7566 }
7567
7568 // Iterate over the partitions, replacing with bit-test clusters in-place.
7569 unsigned DstIndex = 0;
7570 for (unsigned First = 0, Last; First < N; First = Last + 1) {
7571 Last = LastElement[First];
7572 assert(First <= Last);
7573 assert(DstIndex <= First);
7574
7575 CaseCluster BitTestCluster;
7576 if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
7577 Clusters[DstIndex++] = BitTestCluster;
7578 } else {
7579 for (unsigned I = First; I <= Last; ++I)
7580 std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
7581 }
7582 }
7583 Clusters.resize(DstIndex);
7584 }
7585
7586 void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
7587 MachineBasicBlock *SwitchMBB,
7588 MachineBasicBlock *DefaultMBB) {
7589 MachineFunction *CurMF = FuncInfo.MF;
7590 MachineBasicBlock *NextMBB = nullptr;
7591 MachineFunction::iterator BBI = W.MBB;
7592 if (++BBI != FuncInfo.MF->end())
7593 NextMBB = BBI;
7594
7595 unsigned Size = W.LastCluster - W.FirstCluster + 1;
7596
7597 BranchProbabilityInfo *BPI = FuncInfo.BPI;
7598
7599 if (Size == 2 && W.MBB == SwitchMBB) {
7600 // If any two of the cases has the same destination, and if one value
7601 // is the same as the other, but has one bit unset that the other has set,
7602 // use bit manipulation to do two compares at once. For example:
7603 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
7604 // TODO: This could be extended to merge any 2 cases in switches with 3
7605 // cases.
7606 // TODO: Handle cases where W.CaseBB != SwitchBB.
7607 CaseCluster &Small = *W.FirstCluster;
7608 CaseCluster &Big = *W.LastCluster;
7609
7610 if (Small.Low == Small.High && Big.Low == Big.High &&
7611 Small.MBB == Big.MBB) {
7612 const APInt &SmallValue = Small.Low->getValue();
7613 const APInt &BigValue = Big.Low->getValue();
7614
7615 // Check that there is only one bit different.
7616 if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
7617 (SmallValue | BigValue) == BigValue) {
7618 // Isolate the common bit.
7619 APInt CommonBit = BigValue & ~SmallValue;
7620 assert((SmallValue | CommonBit) == BigValue &&
7621 CommonBit.countPopulation() == 1 && "Not a common bit?");
7622
7623 SDValue CondLHS = getValue(Cond);
7624 EVT VT = CondLHS.getValueType();
7625 SDLoc DL = getCurSDLoc();
7626
7627 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
7628 DAG.getConstant(CommonBit, VT));
7629 SDValue Cond = DAG.getSetCC(DL, MVT::i1, Or,
7630 DAG.getConstant(BigValue, VT), ISD::SETEQ);
7631
7632 // Update successor info.
7633 // Both Small and Big will jump to Small.BB, so we sum up the weights.
7634 addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight);
7635 addSuccessorWithWeight(
7636 SwitchMBB, DefaultMBB,
7637 // The default destination is the first successor in IR.
7638 BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0)
7639 : 0);
7640
7641 // Insert the true branch.
7642 SDValue BrCond =
7643 DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
7644 DAG.getBasicBlock(Small.MBB));
7645 // Insert the false branch.
7646 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
7647 DAG.getBasicBlock(DefaultMBB));
7648
7649 DAG.setRoot(BrCond);
7650 return;
7651 }
7652 }
7653 }
7654
7655 if (TM.getOptLevel() != CodeGenOpt::None) {
7656 // Order cases by weight so the most likely case will be checked first.
7657 std::sort(W.FirstCluster, W.LastCluster + 1,
7658 [](const CaseCluster &a, const CaseCluster &b) {
7659 return a.Weight > b.Weight;
7660 });
7661
7662 // Rearrange the case blocks so that the last one falls through if possible.
7663 // Start at the bottom as that's the case with the lowest weight.
7664 // FIXME: Take branch probability into account.
7665 for (CaseClusterIt I = W.LastCluster - 1; I >= W.FirstCluster; --I) {
7666 if (I->Kind == CC_Range && I->MBB == NextMBB) {
7667 std::swap(*I, *W.LastCluster);
7668 break;
7669 }
7670 }
7671 }
7672
7673 // Compute total weight.
7674 uint32_t UnhandledWeights = 0;
7675 for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
7676 UnhandledWeights += I->Weight;
7677
7678 MachineBasicBlock *CurMBB = W.MBB;
7679 for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
7680 MachineBasicBlock *Fallthrough;
7681 if (I == W.LastCluster) {
7682 // For the last cluster, fall through to the default destination.
7683 Fallthrough = DefaultMBB;
7684 } else {
7685 Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
7686 CurMF->insert(BBI, Fallthrough);
7687 // Put Cond in a virtual register to make it available from the new blocks.
7688 ExportFromCurrentBlock(Cond);
7689 }
7690
7691 switch (I->Kind) {
7692 case CC_JumpTable: {
7693 // FIXME: Optimize away range check based on pivot comparisons.
7694 JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first;
7695 JumpTable *JT = &JTCases[I->JTCasesIndex].second;
7696
7697 // The jump block hasn't been inserted yet; insert it here.
7698 MachineBasicBlock *JumpMBB = JT->MBB;
7699 CurMF->insert(BBI, JumpMBB);
7700 addSuccessorWithWeight(CurMBB, Fallthrough);
7701 addSuccessorWithWeight(CurMBB, JumpMBB);
7702
7703 // The jump table header will be inserted in our current block, do the
7704 // range check, and fall through to our fallthrough block.
7705 JTH->HeaderBB = CurMBB;
7706 JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
7707
7708 // If we're in the right place, emit the jump table header right now.
7709 if (CurMBB == SwitchMBB) {
7710 visitJumpTableHeader(*JT, *JTH, SwitchMBB);
7711 JTH->Emitted = true;
7712 }
7713 break;
7714 }
7715 case CC_BitTests: {
7716 // FIXME: Optimize away range check based on pivot comparisons.
7717 BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex];
7718
7719 // The bit test blocks haven't been inserted yet; insert them here.
7720 for (BitTestCase &BTC : BTB->Cases)
7721 CurMF->insert(BBI, BTC.ThisBB);
7722
7723 // Fill in fields of the BitTestBlock.
7724 BTB->Parent = CurMBB;
7725 BTB->Default = Fallthrough;
7726
7727 // If we're in the right place, emit the bit test header header right now.
7728 if (CurMBB ==SwitchMBB) {
7729 visitBitTestHeader(*BTB, SwitchMBB);
7730 BTB->Emitted = true;
7731 }
7732 break;
7733 }
7734 case CC_Range: {
7735 const Value *RHS, *LHS, *MHS;
7736 ISD::CondCode CC;
7737 if (I->Low == I->High) {
7738 // Check Cond == I->Low.
7739 CC = ISD::SETEQ;
7740 LHS = Cond;
7741 RHS=I->Low;
7742 MHS = nullptr;
7743 } else {
7744 // Check I->Low <= Cond <= I->High.
7745 CC = ISD::SETLE;
7746 LHS = I->Low;
7747 MHS = Cond;
7748 RHS = I->High;
7749 }
7750
7751 // The false weight is the sum of all unhandled cases.
7752 UnhandledWeights -= I->Weight;
7753 CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight,
7754 UnhandledWeights);
7755
7756 if (CurMBB == SwitchMBB)
7757 visitSwitchCase(CB, SwitchMBB);
7758 else
7759 SwitchCases.push_back(CB);
7760
7761 break;
7762 }
7763 }
7764 CurMBB = Fallthrough;
7765 }
7766 }
7767
7768 void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
7769 const SwitchWorkListItem &W,
7770 Value *Cond,
7771 MachineBasicBlock *SwitchMBB) {
7772 assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
7773 "Clusters not sorted?");
7774
7775 unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
7776 assert(NumClusters >= 2 && "Too small to split!");
7777
7778 // FIXME: When we have profile info, we might want to balance the tree based
7779 // on weights instead of node count.
7780
7781 CaseClusterIt PivotCluster = W.FirstCluster + NumClusters / 2;
7782 CaseClusterIt FirstLeft = W.FirstCluster;
7783 CaseClusterIt LastLeft = PivotCluster - 1;
7784 CaseClusterIt FirstRight = PivotCluster;
7785 CaseClusterIt LastRight = W.LastCluster;
7786 const ConstantInt *Pivot = PivotCluster->Low;
7787
7788 // New blocks will be inserted immediately after the current one.
7789 MachineFunction::iterator BBI = W.MBB;
7790 ++BBI;
7791
7792 // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
7793 // we can branch to its destination directly if it's squeezed exactly in
7794 // between the known lower bound and Pivot - 1.
7795 MachineBasicBlock *LeftMBB;
7796 if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
7797 FirstLeft->Low == W.GE &&
7798 (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
7799 LeftMBB = FirstLeft->MBB;
7800 } else {
7801 LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
7802 FuncInfo.MF->insert(BBI, LeftMBB);
7803 WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot});
7804 // Put Cond in a virtual register to make it available from the new blocks.
7805 ExportFromCurrentBlock(Cond);
7806 }
7807
7808 // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
7809 // single cluster, RHS.Low == Pivot, and we can branch to its destination
7810 // directly if RHS.High equals the current upper bound.
7811 MachineBasicBlock *RightMBB;
7812 if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
7813 W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
7814 RightMBB = FirstRight->MBB;
7815 } else {
7816 RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
7817 FuncInfo.MF->insert(BBI, RightMBB);
7818 WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT});
7819 // Put Cond in a virtual register to make it available from the new blocks.
7820 ExportFromCurrentBlock(Cond);
7821 }
7822
7823 // Create the CaseBlock record that will be used to lower the branch.
7824 CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB);
7825
7826 if (W.MBB == SwitchMBB)
7827 visitSwitchCase(CB, SwitchMBB);
7828 else
7829 SwitchCases.push_back(CB);
7830 }
7831
7832 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
7833 // Extract cases from the switch.
7834 BranchProbabilityInfo *BPI = FuncInfo.BPI;
7835 CaseClusterVector Clusters;
7836 Clusters.reserve(SI.getNumCases());
7837 for (auto I : SI.cases()) {
7838 MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
7839 const ConstantInt *CaseVal = I.getCaseValue();
7840 uint32_t Weight = 0; // FIXME: Use 1 instead?
7841 if (BPI)
7842 Weight = BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex());
7843 Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight));
7844 }
7845
7846 MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
7847
7848 if (TM.getOptLevel() != CodeGenOpt::None) {
7849 // Cluster adjacent cases with the same destination.
7850 sortAndRangeify(Clusters);
7851
7852 // Replace an unreachable default with the most popular destination.
7853 // FIXME: Exploit unreachable default more aggressively.
7854 bool UnreachableDefault =
7855 isa(SI.getDefaultDest()->getFirstNonPHIOrDbg());
7856 if (UnreachableDefault && !Clusters.empty()) {
7857 DenseMap Popularity;
7858 unsigned MaxPop = 0;
7859 const BasicBlock *MaxBB = nullptr;
7860 for (auto I : SI.cases()) {
7861 const BasicBlock *BB = I.getCaseSuccessor();
7862 if (++Popularity[BB] > MaxPop) {
7863 MaxPop = Popularity[BB];
7864 MaxBB = BB;
7865 }
7866 }
7867 // Set new default.
7868 assert(MaxPop > 0 && MaxBB);
7869 DefaultMBB = FuncInfo.MBBMap[MaxBB];
7870
7871 // Remove cases that were pointing to the destination that is now the
7872 // default.
7873 CaseClusterVector New;
7874 New.reserve(Clusters.size());
7875 for (CaseCluster &CC : Clusters) {
7876 if (CC.MBB != DefaultMBB)
7877 New.push_back(CC);
7878 }
7879 Clusters = std::move(New);
7880 }
7881 }
7882
7883 // If there is only the default destination, jump there directly.
7884 MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
7885 if (Clusters.empty()) {
7886 SwitchMBB->addSuccessor(DefaultMBB);
7887 if (DefaultMBB != NextBlock(SwitchMBB)) {
7888 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
7889 getControlRoot(), DAG.getBasicBlock(SwitchMBB)));
7890 }
7891 return;
7892 }
7893
7894 if (TM.getOptLevel() != CodeGenOpt::None) {
7895 findJumpTables(Clusters, &SI, DefaultMBB);
7896 findBitTestClusters(Clusters, &SI);
7897 }
7898
7899
7900 DEBUG({
7901 dbgs() << "Case clusters: ";
7902 for (const CaseCluster &C : Clusters) {
7903 if (C.Kind == CC_JumpTable) dbgs() << "JT:";
7904 if (C.Kind == CC_BitTests) dbgs() << "BT:";
7905
7906 C.Low->getValue().print(dbgs(), true);
7907 if (C.Low != C.High) {
7908 dbgs() << '-';
7909 C.High->getValue().print(dbgs(), true);
7910 }
7911 dbgs() << ' ';
7912 }
7913 dbgs() << '\n';
7914 });
7915
7916 assert(!Clusters.empty());
7917 SwitchWorkList WorkList;
7918 CaseClusterIt First = Clusters.begin();
7919 CaseClusterIt Last = Clusters.end() - 1;
7920 WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr});
7921
7922 while (!WorkList.empty()) {
7923 SwitchWorkListItem W = WorkList.back();
7924 WorkList.pop_back();
7925 unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
7926
7927 if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) {
7928 // For optimized builds, lower large range as a balanced binary tree.
7929 splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
7930 continue;
7931 }
7932
7933 lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
7934 }
7935 }
133133 /// SDNodes we create.
134134 unsigned SDNodeOrder;
135135
136 /// Case - A struct to record the Value for a switch case, and the
137 /// case's target basic block.
138 struct Case {
139 const ConstantInt *Low;
140 const ConstantInt *High;
141 MachineBasicBlock* BB;
142 uint32_t ExtraWeight;
143
144 Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { }
145 Case(const ConstantInt *low, const ConstantInt *high, MachineBasicBlock *bb,
146 uint32_t extraweight) : Low(low), High(high), BB(bb),
147 ExtraWeight(extraweight) { }
148
149 APInt size() const {
150 const APInt &rHigh = High->getValue();
151 const APInt &rLow = Low->getValue();
152 return (rHigh - rLow + 1ULL);
136 enum CaseClusterKind {
137 /// A cluster of adjacent case labels with the same destination, or just one
138 /// case.
139 CC_Range,
140 /// A cluster of cases suitable for jump table lowering.
141 CC_JumpTable,
142 /// A cluster of cases suitable for bit test lowering.
143 CC_BitTests
144 };
145
146 /// A cluster of case labels.
147 struct CaseCluster {
148 CaseClusterKind Kind;
149 const ConstantInt *Low, *High;
150 union {
151 MachineBasicBlock *MBB;
152 unsigned JTCasesIndex;
153 unsigned BTCasesIndex;
154 };
155 uint64_t Weight;
156
157 static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
158 MachineBasicBlock *MBB, uint32_t Weight) {
159 CaseCluster C;
160 C.Kind = CC_Range;
161 C.Low = Low;
162 C.High = High;
163 C.MBB = MBB;
164 C.Weight = Weight;
165 return C;
153166 }
154 };
167
168 static CaseCluster jumpTable(const ConstantInt *Low,
169 const ConstantInt *High, unsigned JTCasesIndex,
170 uint32_t Weight) {
171 CaseCluster C;
172 C.Kind = CC_JumpTable;
173 C.Low = Low;
174 C.High = High;
175 C.JTCasesIndex = JTCasesIndex;
176 C.Weight = Weight;
177 return C;
178 }
179
180 static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
181 unsigned BTCasesIndex, uint32_t Weight) {
182 CaseCluster C;
183 C.Kind = CC_BitTests;
184 C.Low = Low;
185 C.High = High;
186 C.BTCasesIndex = BTCasesIndex;
187 C.Weight = Weight;
188 return C;
189 }
190 };
191
192 typedef std::vector CaseClusterVector;
193 typedef CaseClusterVector::iterator CaseClusterIt;
155194
156195 struct CaseBits {
157196 uint64_t Mask;
162201 CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
163202 uint32_t Weight):
164203 Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
165 };
166
167 typedef std::vector CaseVector;
168 typedef std::vector CaseBitsVector;
169 typedef CaseVector::iterator CaseItr;
170 typedef std::pair CaseRange;
171
172 /// CaseRec - A struct with ctor used in lowering switches to a binary tree
173 /// of conditional branches.
174 struct CaseRec {
175 CaseRec(MachineBasicBlock *bb, const ConstantInt *lt, const ConstantInt *ge,
176 CaseRange r) :
177 CaseBB(bb), LT(lt), GE(ge), Range(r) {}
178
179 /// CaseBB - The MBB in which to emit the compare and branch
180 MachineBasicBlock *CaseBB;
181 /// LT, GE - If nonzero, we know the current case value must be less-than or
182 /// greater-than-or-equal-to these Constants.
183 const ConstantInt *LT;
184 const ConstantInt *GE;
185 /// Range - A pair of iterators representing the range of case values to be
186 /// processed at this point in the binary search tree.
187 CaseRange Range;
188 };
189
190 typedef std::vector CaseRecVector;
191
192 struct CaseBitsCmp {
193 bool operator()(const CaseBits &C1, const CaseBits &C2) {
194 return C1.Bits > C2.Bits;
195 }
196 };
197
198 /// Populate Cases with the cases in SI, clustering adjacent cases with the
199 /// same destination together.
200 void Clusterify(CaseVector &Cases, const SwitchInst *SI);
204
205 CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {}
206 };
207
208 typedef std::vector CaseBitsVector;
209
210 /// Sort Clusters and merge adjacent cases.
211 void sortAndRangeify(CaseClusterVector &Clusters);
201212
202213 /// CaseBlock - This structure is used to communicate between
203214 /// SelectionDAGBuilder and SDISel for the code generation of additional basic
287298 BitTestInfo Cases;
288299 };
289300
301 /// Minimum jump table density, in percent.
302 enum { MinJumpTableDensity = 40 };
303
304 /// Check whether a range of clusters is dense enough for a jump table.
305 bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases,
306 unsigned First, unsigned Last);
307
308 /// Build a jump table cluster from Clusters[First..Last]. Returns false if it
309 /// decides it's not a good idea.
310 bool buildJumpTable(CaseClusterVector &Clusters, unsigned First,
311 unsigned Last, const SwitchInst *SI,
312 MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
313
314 /// Find clusters of cases suitable for jump table lowering.
315 void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
316 MachineBasicBlock *DefaultMBB);
317
318 /// Check whether the range [Low,High] fits in a machine word.
319 bool rangeFitsInWord(const APInt &Low, const APInt &High);
320
321 /// Check whether these clusters are suitable for lowering with bit tests based
322 /// on the number of destinations, comparison metric, and range.
323 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
324 const APInt &Low, const APInt &High);
325
326 /// Build a bit test cluster from Clusters[First..Last]. Returns false if it
327 /// decides it's not a good idea.
328 bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
329 const SwitchInst *SI, CaseCluster &BTCluster);
330
331 /// Find clusters of cases suitable for bit test lowering.
332 void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI);
333
334 struct SwitchWorkListItem {
335 MachineBasicBlock *MBB;
336 CaseClusterIt FirstCluster;
337 CaseClusterIt LastCluster;
338 const ConstantInt *GE;
339 const ConstantInt *LT;
340 };
341 typedef SmallVector SwitchWorkList;
342
343 /// Emit comparison and split W into two subtrees.
344 void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W,
345 Value *Cond, MachineBasicBlock *SwitchMBB);
346
347 /// Lower W.
348 void lowerWorkItem(SwitchWorkListItem W, Value *Cond,
349 MachineBasicBlock *SwitchMBB,
350 MachineBasicBlock *DefaultMBB);
351
352
290353 /// A class which encapsulates all of the information needed to generate a
291354 /// stack protector check and signals to isel via its state being initialized
292355 /// that a stack protector needs to be generated.
668731 void visitSwitch(const SwitchInst &I);
669732 void visitIndirectBr(const IndirectBrInst &I);
670733 void visitUnreachable(const UnreachableInst &I);
671
672 // Helpers for visitSwitch
673 bool handleSmallSwitchRange(CaseRec& CR,
674 CaseRecVector& WorkList,
675 const Value* SV,
676 MachineBasicBlock* Default,
677 MachineBasicBlock *SwitchBB);
678 bool handleJTSwitchCase(CaseRec& CR,
679 CaseRecVector& WorkList,
680 const Value* SV,
681 MachineBasicBlock* Default,
682 MachineBasicBlock *SwitchBB);
683 bool handleBTSplitSwitchCase(CaseRec& CR,
684 CaseRecVector& WorkList,
685 const Value* SV,
686 MachineBasicBlock *SwitchBB);
687 void splitSwitchCase(CaseRec &CR, CaseItr Pivot, CaseRecVector &WorkList,
688 const Value *SV, MachineBasicBlock *SwitchBB);
689 bool handleBitTestsSwitchCase(CaseRec& CR,
690 CaseRecVector& WorkList,
691 const Value* SV,
692 MachineBasicBlock* Default,
693 MachineBasicBlock *SwitchBB);
694734
695735 uint32_t getEdgeWeight(const MachineBasicBlock *Src,
696736 const MachineBasicBlock *Dst) const;
14581458 << FuncInfo->PHINodesToUpdate[i].first
14591459 << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
14601460
1461 const bool MustUpdatePHINodes = SDB->SwitchCases.empty() &&
1462 SDB->JTCases.empty() &&
1463 SDB->BitTestCases.empty();
1464
14651461 // Next, now that we know what the last MBB the LLVM BB expanded is, update
14661462 // PHI nodes in successors.
1467 if (MustUpdatePHINodes) {
1468 for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
1469 MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
1470 assert(PHI->isPHI() &&
1471 "This is not a machine PHI node that we are updating!");
1472 if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
1473 continue;
1474 PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
1475 }
1463 for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
1464 MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
1465 assert(PHI->isPHI() &&
1466 "This is not a machine PHI node that we are updating!");
1467 if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
1468 continue;
1469 PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
14761470 }
14771471
14781472 // Handle stack protector.
15161510 // Clear the Per-BB State.
15171511 SDB->SPDescriptor.resetPerBBState();
15181512 }
1519
1520 // If we updated PHI Nodes, return early.
1521 if (MustUpdatePHINodes)
1522 return;
15231513
15241514 for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
15251515 // Lower header first, if it wasn't already lowered
16331623 }
16341624 }
16351625 SDB->JTCases.clear();
1636
1637 // If the switch block involved a branch to one of the actual successors, we
1638 // need to update PHI nodes in that block.
1639 for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
1640 MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
1641 assert(PHI->isPHI() &&
1642 "This is not a machine PHI node that we are updating!");
1643 if (FuncInfo->MBB->isSuccessor(PHI->getParent()))
1644 PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
1645 }
16461626
16471627 // If we generated any switch lowering information, build and codegen any
16481628 // additional DAGs necessary.
33
44 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
55 ; CHECK-LABEL: t1:
6 ; CHECK: cmp r2, #1
7 ; CHECK: cmpne r2, #7
6 ; CHECK: cmp r2, #7
7 ; CHECK: cmpne r2, #1
88 switch i32 %c, label %cond_next [
99 i32 1, label %cond_true
1010 i32 7, label %cond_true
193193 %18 = load i32, i32* %mb_type, align 4
194194 switch i32 %18, label %for.inc503 [
195195 i32 9, label %if.then475
196 i32 10, label %if.then475
196 i32 11, label %if.then475
197197 i32 13, label %if.then475
198198 i32 14, label %if.then475
199199 ]
1616 ; CHECK: BB#0: derived from LLVM BB %entry
1717 ; CHECK: Successors according to CFG: BB#2(64) BB#4(14)
1818 ; CHECK: BB#4: derived from LLVM BB %entry
19 ; CHECK: Successors according to CFG: BB#1(10) BB#5(4)
19 ; CHECK: Successors according to CFG: BB#1(4) BB#5(10)
2020 ; CHECK: BB#5: derived from LLVM BB %entry
21 ; CHECK: Successors according to CFG: BB#1(4) BB#3(7)
21 ; CHECK: Successors according to CFG: BB#1(10) BB#3(7)
2222
2323 sw.bb:
2424 br label %return
None ; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
1 ; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
0 ; RUN: llc -mcpu=pwr7 -code-model=medium <%s | FileCheck %s
1 ; RUN: llc -mcpu=pwr7 -code-model=large <%s | FileCheck %s
22
33 ; Test correct code generation for medium and large code model
44 ; for loading the address of a jump table from the TOC.
11 ; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM %s
22 ; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
33 ; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE %s
4
5 ; Run jump table test separately since jump tables aren't generated at -O0.
6 ; RUN: llc -mcpu=pwr7 -code-model=medium -filetype=obj -fast-isel=false %s -o - | \
7 ; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM-JT %s
8 ; RUN: llc -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
9 ; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE-JT %s
410
511 ; FIXME: When asm-parse is available, could make this an assembly test.
612
9197 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
9298 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM4]]
9399
100 @ti = common global i32 0, align 4
101
102 define signext i32 @test_tentative() nounwind {
103 entry:
104 %0 = load i32, i32* @ti, align 4
105 %inc = add nsw i32 %0, 1
106 store i32 %inc, i32* @ti, align 4
107 ret i32 %0
108 }
109
110 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
111 ; accessing tentatively declared variable ti.
112 ;
113 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
114 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
115 ;
116 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
117 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
118
119 define i8* @test_fnaddr() nounwind {
120 entry:
121 %func = alloca i32 (i32)*, align 8
122 store i32 (i32)* @foo, i32 (i32)** %func, align 8
123 %0 = load i32 (i32)*, i32 (i32)** %func, align 8
124 %1 = bitcast i32 (i32)* %0 to i8*
125 ret i8* %1
126 }
127
128 declare signext i32 @foo(i32 signext)
129
130 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
131 ; accessing function address foo.
132 ;
133 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
134 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
135 ;
136 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
137 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
138
139
94140 define signext i32 @test_jump_table(i32 signext %i) nounwind {
95141 entry:
96142 %i.addr = alloca i32, align 4
138184 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
139185 ; accessing a jump table address.
140186 ;
141 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
142 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
187 ; MEDIUM-JT: Relocations [
188 ; MEDIUM-JT: Section (2) .rela.text {
189 ; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
190 ; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
143191 ;
144 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
145 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
146
147 @ti = common global i32 0, align 4
148
149 define signext i32 @test_tentative() nounwind {
150 entry:
151 %0 = load i32, i32* @ti, align 4
152 %inc = add nsw i32 %0, 1
153 store i32 %inc, i32* @ti, align 4
154 ret i32 %0
155 }
156
157 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
158 ; accessing tentatively declared variable ti.
159 ;
160 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
161 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
162 ;
163 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
164 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
165
166 define i8* @test_fnaddr() nounwind {
167 entry:
168 %func = alloca i32 (i32)*, align 8
169 store i32 (i32)* @foo, i32 (i32)** %func, align 8
170 %0 = load i32 (i32)*, i32 (i32)** %func, align 8
171 %1 = bitcast i32 (i32)* %0 to i8*
172 ret i8* %1
173 }
174
175 declare signext i32 @foo(i32 signext)
176
177 ; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
178 ; accessing function address foo.
179 ;
180 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
181 ; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
182 ;
183 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
184 ; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
192 ; LARGE-JT: Relocations [
193 ; LARGE-JT: Section (2) .rela.text {
194 ; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
195 ; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
5454 ]
5555
5656 bb: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
57 call void @_Z3bari( i32 0 )
5758 br label %bb1
5859
5960 bb1: ; preds = %bb, %entry
61 call void @_Z3bari( i32 1 )
6062 br label %bb2
6163
6264 bb2: ; preds = %bb1, %entry
63 call void @_Z3bari( i32 1 )
65 call void @_Z3bari( i32 2 )
6466 br label %bb11
6567
6668 bb3: ; preds = %entry
139139
140140 ; The balanced binary switch here would start with a comparison against 39, but
141141 ; it is currently starting with 29 because of the density-sum heuristic.
142 ; CHECK: cmpl $29
142 ; CHECK: cmpl $39
143143 ; CHECK: jg
144144 ; CHECK: cmpl $10
145 ; CHECK: jne
146 ; CHECK: cmpl $49
147 ; CHECK: jg
148 ; CHECK: cmpl $30
149 ; CHECK: jne
145 ; CHECK: je
150146 ; CHECK: cmpl $20
151147 ; CHECK: jne
148 ; CHECK: cmpl $40
149 ; CHECK: je
152150 ; CHECK: cmpl $50
153151 ; CHECK: jne
154 ; CHECK: cmpl $40
152 ; CHECK: cmpl $30
155153 ; CHECK: jne
156154 ; CHECK: cmpl $60
157155 ; CHECK: jne
0 ; RUN: llc -march=x86-64 %s -o - | FileCheck %s
1 ; RUN: llc -march=x86-64 %s -o - -O0 | FileCheck --check-prefix=NOOPT %s
2
3 declare void @g(i32)
4
5 define void @basic(i32 %x) {
6 entry:
7 switch i32 %x, label %return [
8 i32 3, label %bb0
9 i32 1, label %bb1
10 i32 4, label %bb1
11 i32 5, label %bb0
12 ]
13 bb0: tail call void @g(i32 0) br label %return
14 bb1: tail call void @g(i32 1) br label %return
15 return: ret void
16
17 ; Should be lowered as straight compares in -O0 mode.
18 ; NOOPT-LABEL: basic
19 ; NOOPT: subl $3, %eax
20 ; NOOPT: je
21 ; NOOPT: subl $1, %eax
22 ; NOOPT: je
23 ; NOOPT: subl $4, %eax
24 ; NOOPT: je
25 ; NOOPT: subl $5, %eax
26 ; NOOPT: je
27
28 ; Jump table otherwise.
29 ; CHECK-LABEL: basic
30 ; CHECK: decl
31 ; CHECK: cmpl $4
32 ; CHECK: ja
33 ; CHECK: jmpq *.LJTI
34 }
35
36
37 define void @simple_ranges(i32 %x) {
38 entry:
39 switch i32 %x, label %return [
40 i32 0, label %bb0
41 i32 1, label %bb0
42 i32 2, label %bb0
43 i32 3, label %bb0
44 i32 100, label %bb1
45 i32 101, label %bb1
46 i32 102, label %bb1
47 i32 103, label %bb1
48 ]
49 bb0: tail call void @g(i32 0) br label %return
50 bb1: tail call void @g(i32 1) br label %return
51 return: ret void
52
53 ; Should be lowered to two range checks.
54 ; CHECK-LABEL: simple_ranges
55 ; CHECK: leal -100
56 ; CHECK: cmpl $4
57 ; CHECK: jae
58 ; CHECK: cmpl $3
59 ; CHECK: ja
60 }
61
62
63 define void @jt_is_better(i32 %x) {
64 entry:
65 switch i32 %x, label %return [
66 i32 0, label %bb0
67 i32 2, label %bb0
68 i32 4, label %bb0
69 i32 1, label %bb1
70 i32 3, label %bb1
71 i32 5, label %bb1
72
73 i32 6, label %bb2
74 i32 7, label %bb3
75 i32 8, label %bb4
76 ]
77 bb0: tail call void @g(i32 0) br label %return
78 bb1: tail call void @g(i32 1) br label %return
79 bb2: tail call void @g(i32 2) br label %return
80 bb3: tail call void @g(i32 3) br label %return
81 bb4: tail call void @g(i32 4) br label %return
82 return: ret void
83
84 ; Cases 0-5 could be lowered with two bit tests,
85 ; but with 6-8, the whole switch is suitable for a jump table.
86 ; CHECK-LABEL: jt_is_better
87 ; CHECK: cmpl $8
88 ; CHECK: jbe
89 ; CHECK: jmpq *.LJTI
90 }
91
92
93 define void @bt_is_better(i32 %x) {
94 entry:
95 switch i32 %x, label %return [
96 i32 0, label %bb0
97 i32 3, label %bb0
98 i32 6, label %bb0
99 i32 1, label %bb1
100 i32 4, label %bb1
101 i32 7, label %bb1
102 i32 2, label %bb2
103 i32 5, label %bb2
104 i32 8, label %bb2
105
106 ]
107 bb0: tail call void @g(i32 0) br label %return
108 bb1: tail call void @g(i32 1) br label %return
109 bb2: tail call void @g(i32 2) br label %return
110 return: ret void
111
112 ; This could be lowered as a jump table, but bit tests is more efficient.
113 ; CHECK-LABEL: bt_is_better
114 ; 73 = 2^0 + 2^3 + 2^6
115 ; CHECK: movl $73
116 ; CHECK: btl
117 ; 146 = 2^1 + 2^4 + 2^7
118 ; CHECK: movl $146
119 ; CHECK: btl
120 ; 292 = 2^2 + 2^5 + 2^8
121 ; CHECK: movl $292
122 ; CHECK: btl
123 }
124
125
126 define void @optimal_pivot1(i32 %x) {
127 entry:
128 switch i32 %x, label %return [
129 i32 100, label %bb0
130 i32 200, label %bb1
131 i32 300, label %bb0
132 i32 400, label %bb1
133 i32 500, label %bb0
134 i32 600, label %bb1
135
136 ]
137 bb0: tail call void @g(i32 0) br label %return
138 bb1: tail call void @g(i32 1) br label %return
139 return: ret void
140
141 ; Should pivot around 400 for two subtrees of equal size.
142 ; CHECK-LABEL: optimal_pivot1
143 ; CHECK-NOT: cmpl
144 ; CHECK: cmpl $399
145 }
146
147
148 define void @optimal_pivot2(i32 %x) {
149 entry:
150 switch i32 %x, label %return [
151 i32 100, label %bb0 i32 101, label %bb1 i32 102, label %bb2 i32 103, label %bb3
152 i32 200, label %bb0 i32 201, label %bb1 i32 202, label %bb2 i32 203, label %bb3
153 i32 300, label %bb0 i32 301, label %bb1 i32 302, label %bb2 i32 303, label %bb3
154 i32 400, label %bb0 i32 401, label %bb1 i32 402, label %bb2 i32 403, label %bb3
155
156 ]
157 bb0: tail call void @g(i32 0) br label %return
158 bb1: tail call void @g(i32 1) br label %return
159 bb2: tail call void @g(i32 2) br label %return
160 bb3: tail call void @g(i32 3) br label %return
161 return: ret void
162
163 ; Should pivot around 300 for two subtrees with two jump tables each.
164 ; CHECK-LABEL: optimal_pivot2
165 ; CHECK-NOT: cmpl
166 ; CHECK: cmpl $299
167 ; CHECK: jmpq *.LJTI
168 ; CHECK: jmpq *.LJTI
169 ; CHECK: jmpq *.LJTI
170 ; CHECK: jmpq *.LJTI
171 }
172
173
174 define void @optimal_jump_table1(i32 %x) {
175 entry:
176 switch i32 %x, label %return [
177 i32 0, label %bb0
178 i32 5, label %bb1
179 i32 6, label %bb2
180 i32 12, label %bb3
181 i32 13, label %bb4
182 i32 15, label %bb5
183 ]
184 bb0: tail call void @g(i32 0) br label %return
185 bb1: tail call void @g(i32 1) br label %return
186 bb2: tail call void @g(i32 2) br label %return
187 bb3: tail call void @g(i32 3) br label %return
188 bb4: tail call void @g(i32 4) br label %return
189 bb5: tail call void @g(i32 5) br label %return
190 return: ret void
191
192 ; Splitting in the largest gap (between 6 and 12) would yield suboptimal result.
193 ; Expecting a jump table from 5 to 15.
194 ; CHECK-LABEL: optimal_jump_table1
195 ; CHECK: leal -5
196 ; CHECK: cmpl $10
197 ; CHECK: jmpq *.LJTI
198 }
199
200
201 define void @optimal_jump_table2(i32 %x) {
202 entry:
203 switch i32 %x, label %return [
204 i32 0, label %bb0
205 i32 1, label %bb1
206 i32 2, label %bb2
207 i32 9, label %bb3
208 i32 14, label %bb4
209 i32 15, label %bb5
210 ]
211 bb0: tail call void @g(i32 0) br label %return
212 bb1: tail call void @g(i32 1) br label %return
213 bb2: tail call void @g(i32 2) br label %return
214 bb3: tail call void @g(i32 3) br label %return
215 bb4: tail call void @g(i32 4) br label %return
216 bb5: tail call void @g(i32 5) br label %return
217 return: ret void
218
219 ; Partitioning the cases to the minimum number of dense sets is not good enough.
220 ; This can be partitioned as {0,1,2,9},{14,15} or {0,1,2},{9,14,15}. The former
221 ; should be preferred. Expecting a table from 0-9.
222 ; CHECK-LABEL: optimal_jump_table2
223 ; CHECK: cmpl $9
224 ; CHECK: jmpq *.LJTI
225 }
226
227
228 define void @optimal_jump_table3(i32 %x) {
229 entry:
230 switch i32 %x, label %return [
231 i32 1, label %bb0
232 i32 2, label %bb1
233 i32 3, label %bb2
234 i32 10, label %bb3
235 i32 13, label %bb0
236 i32 14, label %bb1
237 i32 15, label %bb2
238 i32 20, label %bb3
239 i32 25, label %bb4
240 ]
241 bb0: tail call void @g(i32 0) br label %return
242 bb1: tail call void @g(i32 1) br label %return
243 bb2: tail call void @g(i32 2) br label %return
244 bb3: tail call void @g(i32 3) br label %return
245 bb4: tail call void @g(i32 4) br label %return
246 return: ret void
247
248 ; Splitting to maximize left-right density sum and gap size would split this
249 ; between 3 and 10, and then between 20 and 25. It's better to build a table
250 ; from 1-20.
251 ; CHECK-LABEL: optimal_jump_table3
252 ; CHECK: leal -1
253 ; CHECK: cmpl $19
254 ; CHECK: jmpq *.LJTI
255 }
256
257 %struct.S = type { %struct.S*, i32 }
258 define void @phi_node_trouble(%struct.S* %s) {
259 entry:
260 br label %header
261 header:
262 %ptr = phi %struct.S* [ %s, %entry ], [ %next, %loop ]
263 %bool = icmp eq %struct.S* %ptr, null
264 br i1 %bool, label %exit, label %loop
265 loop:
266 %nextptr = getelementptr inbounds %struct.S, %struct.S* %ptr, i64 0, i32 0
267 %next = load %struct.S*, %struct.S** %nextptr
268 %xptr = getelementptr inbounds %struct.S, %struct.S* %next, i64 0, i32 1
269 %x = load i32, i32* %xptr
270 switch i32 %x, label %exit [
271 i32 4, label %header
272 i32 36, label %exit2
273 i32 69, label %exit2
274 i32 25, label %exit2
275 ]
276 exit:
277 ret void
278 exit2:
279 ret void
280
281 ; This will be lowered to a comparison with 4 and then bit tests. Make sure
282 ; that the phi node in %header gets a value from the comparison block.
283 ; CHECK-LABEL: phi_node_trouble
284 ; CHECK: movq (%[[REG1:[a-z]+]]), %[[REG1]]
285 ; CHECK: movl 8(%[[REG1]]), %[[REG2:[a-z]+]]
286 ; CHECK: cmpl $4, %[[REG2]]
287 }
None ;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 \
0 ;; RUN: llc -verify-machineinstrs \
11 ;; RUN: -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \
22 ;; RUN: llvm-readobj -t | FileCheck -check-prefix=ARM %s
33
4 ;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 \
4 ;; RUN: llc -verify-machineinstrs \
55 ;; RUN: -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \
66 ;; RUN: llvm-readobj -t | FileCheck -check-prefix=TMB %s
77
1010
1111 define void @foo(i32* %ptr) nounwind ssp {
1212 %tmp = load i32, i32* %ptr, align 4
13 switch i32 %tmp, label %default [
14 i32 11, label %bb0
15 i32 10, label %bb1
16 i32 8, label %bb2
17 i32 4, label %bb3
18 i32 2, label %bb4
19 i32 6, label %bb5
20 i32 9, label %bb6
21 i32 15, label %bb7
22 i32 1, label %bb8
23 i32 3, label %bb9
24 i32 5, label %bb10
25 i32 30, label %bb11
26 i32 31, label %bb12
27 i32 13, label %bb13
28 i32 14, label %bb14
29 i32 20, label %bb15
30 i32 19, label %bb16
31 i32 17, label %bb17
32 i32 18, label %bb18
33 i32 21, label %bb19
34 i32 22, label %bb20
35 i32 16, label %bb21
36 i32 24, label %bb22
37 i32 25, label %bb23
38 i32 26, label %bb24
39 i32 27, label %bb25
40 i32 28, label %bb26
41 i32 23, label %bb27
42 i32 12, label %bb28
13 switch i32 %tmp, label %exit [
14 i32 0, label %bb0
15 i32 1, label %bb1
16 i32 2, label %bb2
17 i32 3, label %bb3
4318 ]
44
45 default:
46 br label %exit
4719 bb0:
20 store i32 0, i32* %ptr, align 4
4821 br label %exit
4922 bb1:
23 store i32 1, i32* %ptr, align 4
5024 br label %exit
5125 bb2:
26 store i32 2, i32* %ptr, align 4
5227 br label %exit
5328 bb3:
29 store i32 3, i32* %ptr, align 4
5430 br label %exit
55 bb4:
56 br label %exit
57 bb5:
58 br label %exit
59 bb6:
60 br label %exit
61 bb7:
62 br label %exit
63 bb8:
64 br label %exit
65 bb9:
66 br label %exit
67 bb10:
68 br label %exit
69 bb11:
70 br label %exit
71 bb12:
72 br label %exit
73 bb13:
74 br label %exit
75 bb14:
76 br label %exit
77 bb15:
78 br label %exit
79 bb16:
80 br label %exit
81 bb17:
82 br label %exit
83 bb18:
84 br label %exit
85 bb19:
86 br label %exit
87 bb20:
88 br label %exit
89 bb21:
90 br label %exit
91 bb22:
92 br label %exit
93 bb23:
94 br label %exit
95 bb24:
96 br label %exit
97 bb25:
98 br label %exit
99 bb26:
100 br label %exit
101 bb27:
102 br label %exit
103 bb28:
104 br label %exit
105
106
10731 exit:
108
10932 ret void
11033 }
11134