llvm.org GIT mirror llvm / 108f92e
If all IV uses are extending integer IV then change the type of IV itself, if possible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55674 91177308-0d34-0410-b5e6-96231b3b80d8 Devang Patel 11 years ago
2 changed file(s) with 258 addition(s) and 0 deletion(s). Raw diff Collapse all Expand all
4545 STATISTIC(NumVariable, "Number of PHIs with variable strides");
4646 STATISTIC(NumEliminated, "Number of strides eliminated");
4747 STATISTIC(NumShadow, "Number of Shadow IVs optimized");
48 STATISTIC(NumIVType, "Number of IV types optimized");
4849
4950 namespace {
5051
182183 /// OptimizeShadowIV - If IV is used in a int-to-float cast
183184 /// inside the loop then try to eliminate the cast opeation.
184185 void OptimizeShadowIV(Loop *L);
186
187 /// OptimizeIVType - If IV is always sext'ed or zext'ed then
188 /// change the type of IV, if possible.
189 void OptimizeIVType(Loop *L);
185190
186191 bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
187192 const SCEVHandle *&CondStride);
18071812 }
18081813 }
18091814
1815 /// suitableExtInstruction - Helper function used by OptimizeIVType.
1816 /// If I is a suitable SEXT or ZEXT instruction then return type
1817 /// to which I is extended to. Otherwise return NULL.
1818 const Type *suitableExtInstruction(Instruction *I, bool isSigned,
1819 const Type *ExtType) {
1820
1821 const Type *DestType = NULL;
1822 if (ZExtInst *ZI = dyn_cast(I))
1823 DestType = ZI->getDestTy();
1824 else if (SExtInst *SI = dyn_cast(I)) {
1825 // If the inital value is signed then this is not suitable for
1826 // OptimizeIVType transformation.
1827 if (isSigned)
1828 return NULL;
1829 DestType = SI->getDestTy();
1830 }
1831
1832 if (!DestType) return NULL;
1833
1834 if (!ExtType)
1835 return DestType;
1836
1837 // If another use of IV extended to some other type then the IV is not
1838 // suitable for OptimizeIVType transformation.
1839 if (ExtType != DestType)
1840 return NULL;
1841
1842 return DestType;
1843 }
1844
1845 /// suitableIVIncr - Helper function used by OptimizeIVType. If I is
1846 /// a suitable binary operator whose all uses are either SEXT or ZEXT
1847 /// then return the type to which all uses are extended to. Otherwise
1848 /// return NULL.
1849 const Type *suitableIVIncr(Instruction *I,
1850 Instruction *PHI, bool isSigned,
1851 const Type *ExtType) {
1852
1853 BinaryOperator *Incr = dyn_cast(I);
1854 if (!Incr) return NULL;
1855
1856 if (Incr->getOpcode() != Instruction::Add)
1857 return NULL;
1858
1859 ConstantInt *C = NULL;
1860 if (Incr->getOperand(0) == PHI)
1861 C = dyn_cast(Incr->getOperand(1));
1862 else if (Incr->getOperand(1) == PHI)
1863 C = dyn_cast(Incr->getOperand(0));
1864
1865 if (!C) return NULL;
1866
1867 const Type *RExtType = NULL;
1868 for (Value::use_iterator IncUI = Incr->use_begin(),
1869 IncUE = Incr->use_end(); IncUI != IncUE; ++IncUI) {
1870
1871 Instruction *U2 = dyn_cast(*IncUI);
1872 if (U2 == PHI)
1873 continue;
1874 const Type *DestType = suitableExtInstruction(U2, isSigned, ExtType);
1875 if (!DestType)
1876 return NULL;
1877
1878 if (!RExtType)
1879 RExtType = DestType;
1880
1881 if (DestType != RExtType)
1882 return NULL;
1883 }
1884
1885 return RExtType;
1886
1887 }
1888
1889 /// getNewPHIIncrement - Create a new increment instruction for newPHI
1890 /// using type Ty based on increment instruction Incr.
1891 /// Helper function used by OptimizeIVType.
1892 BinaryOperator *getNewPHIIncrement(BinaryOperator *Incr, PHINode *PHI,
1893 PHINode *NewPHI, const Type *Ty) {
1894 ConstantInt *C = NULL;
1895 if (Incr->getOperand(0) == PHI)
1896 C = dyn_cast(Incr->getOperand(1));
1897 else if (Incr->getOperand(1) == PHI)
1898 C = dyn_cast(Incr->getOperand(0));
1899
1900 assert (C && "Unexpected Incr operand!");
1901 return BinaryOperator::Create(Incr->getOpcode(), NewPHI,
1902 ConstantInt::get(Ty, C->getZExtValue()),
1903 "IV.next", Incr);
1904 }
1905
1906 /// OptimizeIVType - If IV is always sext'ed or zext'ed then
1907 /// change the type of IV, if possible.
1908 void LoopStrengthReduce::OptimizeIVType(Loop *L) {
1909
1910 BasicBlock *LPH = L->getLoopPreheader();
1911 SmallVector PHIs;
1912 for (BasicBlock::iterator BI = L->getHeader()->begin(),
1913 BE = L->getHeader()->end(); BI != BE; ++BI) {
1914 if (PHINode *PHI = dyn_cast(BI))
1915 PHIs.push_back(PHI);
1916 else
1917 break;
1918 }
1919
1920 while(!PHIs.empty()) {
1921 PHINode *PHI = PHIs.back(); PHIs.pop_back();
1922 if (PHI->getNumIncomingValues() != 2) continue;
1923
1924 unsigned Entry = 0, Latch = 1;
1925 if (PHI->getIncomingBlock(0) != LPH) {
1926 Entry = 1;
1927 Latch = 0;
1928 }
1929
1930 ConstantInt *CInit = dyn_cast(PHI->getIncomingValue(Entry));
1931 if (!CInit) return;
1932
1933 bool signedInit = CInit->getValue().isNegative();
1934
1935 bool TransformPhi = true;
1936 const Type *ExtType = NULL;
1937 BinaryOperator *Incr = NULL;
1938 SmallVector PHIUses;
1939
1940 // Collect all IV uses.
1941 for (Value::use_iterator UI = PHI->use_begin(),
1942 UE = PHI->use_end(); UI != UE; ++UI) {
1943 Instruction *Use = dyn_cast(*UI);
1944 if (!Use) {
1945 TransformPhi = false;
1946 break;
1947 }
1948
1949 ExtType = suitableIVIncr(Use, PHI, signedInit, ExtType);
1950 if (ExtType) {
1951 Incr = cast(Use);
1952 continue;
1953 }
1954 ExtType = suitableExtInstruction(Use, signedInit, ExtType);
1955 if (ExtType) {
1956 PHIUses.push_back(Use);
1957 continue;
1958 }
1959
1960 TransformPhi = false;
1961 break;
1962 }
1963
1964 if (!TransformPhi || Incr == false || PHIUses.empty())
1965 continue;
1966
1967 // Apply transformation. Extend IV type and eliminate SEXT or ZEXT
1968 // instructions.
1969 NumIVType++;
1970
1971 PHINode *NewPH = PHINode::Create(ExtType, "IV", PHI);
1972 ConstantInt *NewCInit = ConstantInt::get(ExtType, CInit->getZExtValue());
1973 BinaryOperator *NewIncr = getNewPHIIncrement(Incr, PHI, NewPH, ExtType);
1974
1975 NewPH->addIncoming(NewCInit, PHI->getIncomingBlock(Entry));
1976 NewPH->addIncoming(NewIncr, PHI->getIncomingBlock(Latch));
1977
1978 // Replace all SEXT or ZEXT uses with new IV directly.
1979 while (!PHIUses.empty()) {
1980 Instruction *Use = PHIUses.back(); PHIUses.pop_back();
1981 SE->deleteValueFromRecords(Use);
1982 Use->replaceAllUsesWith(NewPH);
1983 Use->eraseFromParent();
1984 }
1985
1986 // Replace all uses of IV increment with new increment.
1987 SmallVector IncrUses;
1988 for (Value::use_iterator UI2 = Incr->use_begin(),
1989 UE2 = Incr->use_end(); UI2 != UE2; ++UI2)
1990 IncrUses.push_back(cast(*UI2));
1991
1992 while (!IncrUses.empty()) {
1993 Instruction *Use = IncrUses.back(); IncrUses.pop_back();
1994 if (Use == PHI) continue;
1995 SE->deleteValueFromRecords(Use);
1996 Use->replaceAllUsesWith(NewIncr);
1997 Use->eraseFromParent();
1998 }
1999
2000 // Remove old PHI and increment instruction.
2001 SE->deleteValueFromRecords(PHI);
2002 PHI->removeIncomingValue(Entry);
2003 PHI->removeIncomingValue(Latch);
2004 SE->deleteValueFromRecords(Incr);
2005 Incr->eraseFromParent();
2006 }
2007 }
2008
18102009 // OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
18112010 // uses in the loop, look to see if we can eliminate some, in favor of using
18122011 // common indvars for the different uses.
18752074 TD = &getAnalysis();
18762075 UIntPtrTy = TD->getIntPtrType();
18772076 Changed = false;
2077
2078 OptimizeIVType(L);
18782079
18792080 // Find all uses of induction variables in this loop, and catagorize
18802081 // them by stride. Start by finding all of the PHI nodes in the header for
0 ; RUN: llvm-as < %s | opt -loop-reduce | llvm-dis | grep sext | count 1
1 ; ModuleID = ''
2 %struct.App1Marker = type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>
3 %struct.ComponentInstanceRecord = type <{ [1 x i32] }>
4 %struct.DCPredictors = type { [5 x i16] }
5 %struct.DecodeTable = type { i16, i16, i16, i16, i8**, i8** }
6 %struct.ICMDataProcRecord = type <{ i16 (i8**, i32, i32)*, i32 }>
7 %struct.JPEGBitStream = type { i8*, i32, i32, i32, i32, i32, %struct.App1Marker*, i8*, i32, i16, i16, i32 }
8 %struct.JPEGGlobals = type { [2048 x i8], %struct.JPEGBitStream, i8*, i32, i32, %struct.ComponentInstanceRecord*, %struct.ComponentInstanceRecord*, i32, %struct.OpaqueQTMLMutex*, %struct.Rect, i32, i32, %struct.SharedGlobals, %struct.DCPredictors, i8, i8, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, %struct.YUVGeneralParams, i16, i16, i32, [5 x i16*], [5 x %struct.DecodeTable*], [5 x %struct.DecodeTable*], [5 x i8], [5 x i8], [4 x [65 x i16]], [4 x %struct.DecodeTable], [4 x %struct.DecodeTable], [4 x i8*], [4 x i8*], i16, i16, i32, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, [18 x i8], [18 x i8], [18 x i8], [18 x i8], i32, i32, i8**, i8**, i8, i8, i8, i8, i16, i16, %struct.App1Marker*, i8, i8, i8, i8, i32**, i8*, i16*, i8*, i16*, i8, [3 x i8], i32, [3 x i32], [3 x i32], [3 x i32], [3 x i32], [3 x i32], [3 x i16*], [3 x i16*], [3 x i8**], [3 x %struct.DecodeTable*], [3 x %struct.DecodeTable*], [3 x i32], i32, [3 x i16*], i32, i32, i32, [3 x i32], i8, i8, i8, i8, %struct.ICMDataProcRecord*, i32, i32, i8**, i8**, i8**, i8**, i32, i32, i8*, i32, i32, i16*, i16*, i8*, i32, i32, i32, i32, i32, i32, i32, [16 x <2 x i64>], [1280 x i8], i8 }
9 %struct.OpaqueQTMLMutex = type opaque
10 %struct.Rect = type { i16, i16, i16, i16 }
11 %struct.SharedDGlobals = type { %struct.DecodeTable, %struct.DecodeTable, %struct.DecodeTable, %struct.DecodeTable }
12 %struct.SharedEGlobals = type { i8**, i8**, i8**, i8** }
13 %struct.SharedGlobals = type { %struct.SharedEGlobals*, %struct.SharedDGlobals* }
14 %struct.YUVGeneralParams = type { i16*, i8*, i8*, i8*, i8*, i8*, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16, i16, [6 x i8], void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16 }
15 @llvm.used = appending global [1 x i8*] [ i8* bitcast (i16 (%struct.JPEGGlobals*)* @ExtractBufferedBlocksIgnored to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
16
17 define i16 @ExtractBufferedBlocksIgnored(%struct.JPEGGlobals* %globp) signext nounwind {
18 entry:
19 %tmp4311 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 70 ; [#uses=1]
20 %tmp4412 = load i32* %tmp4311, align 16 ; [#uses=2]
21 %tmp4613 = icmp sgt i32 %tmp4412, 0 ; [#uses=1]
22 br i1 %tmp4613, label %bb, label %bb49
23
24 bb: ; preds = %bb28, %entry
25 %component.09 = phi i16 [ 0, %entry ], [ %tmp37, %bb28 ] ; [#uses=2]
26 %tmp12 = sext i16 %component.09 to i32 ; [#uses=2]
27 %tmp6 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 77, i32 %tmp12 ; [#uses=2]
28 %tmp7 = load i16** %tmp6, align 4 ; [#uses=2]
29 %tmp235 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 71, i32 %tmp12 ; [#uses=1]
30 %tmp246 = load i32* %tmp235, align 4 ; [#uses=2]
31 %tmp267 = icmp sgt i32 %tmp246, 0 ; [#uses=1]
32 br i1 %tmp267, label %bb8, label %bb28
33
34 bb8: ; preds = %bb8, %bb
35 %indvar = phi i32 [ 0, %bb ], [ %indvar.next2, %bb8 ] ; [#uses=3]
36 %theDCTBufferIter.01.rec = shl i32 %indvar, 6 ; [#uses=1]
37 %tmp10.rec = add i32 %theDCTBufferIter.01.rec, 64 ; [#uses=1]
38 %tmp10 = getelementptr i16* %tmp7, i32 %tmp10.rec ; [#uses=1]
39 %i.02 = trunc i32 %indvar to i16 ; [#uses=1]
40 %tmp13 = add i16 %i.02, 1 ; [#uses=1]
41 %phitmp = sext i16 %tmp13 to i32 ; [#uses=1]
42 %tmp26 = icmp slt i32 %phitmp, %tmp246 ; [#uses=1]
43 %indvar.next2 = add i32 %indvar, 1 ; [#uses=1]
44 br i1 %tmp26, label %bb8, label %bb28
45
46 bb28: ; preds = %bb8, %bb
47 %theDCTBufferIter.0.lcssa = phi i16* [ %tmp7, %bb ], [ %tmp10, %bb8 ] ; [#uses=1]
48 store i16* %theDCTBufferIter.0.lcssa, i16** %tmp6, align 4
49 %tmp37 = add i16 %component.09, 1 ; [#uses=2]
50 %phitmp15 = sext i16 %tmp37 to i32 ; [#uses=1]
51 %tmp46 = icmp slt i32 %phitmp15, %tmp4412 ; [#uses=1]
52 br i1 %tmp46, label %bb, label %bb49
53
54 bb49: ; preds = %bb28, %entry
55 ret i16 0
56 }