llvm.org GIT mirror llvm / f2f39d6
[PGO]: Implement Func PGO name string compression This is part of the effort/prepration to reduce the size instr-pgo (object, binary, memory footprint, and raw data). The functionality is currently off by default and not yet used by any clients. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256667 91177308-0d34-0410-b5e6-96231b3b80d8 Xinliang David Li 3 years ago
3 changed file(s) with 196 addition(s) and 4 deletion(s). Raw diff Collapse all Expand all
158158 /// Given a PGO function name, remove the filename prefix and return
159159 /// the original (static) function name.
160160 StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName);
161
162 /// Given a vector of strings (function PGO names) \c NameStrs, the
163 /// method generates a combined string \c Result thatis ready to be
164 /// serialized. The \c Result string is comprised of three fields:
165 /// The first field is the legnth of the uncompressed strings, and the
166 /// the second field is the length of the zlib-compressed string.
167 /// Both fields are encoded in ULEB128. If \c doCompress is false, the
168 /// third field is the uncompressed strings; otherwise it is the
169 /// compressed string. When the string compression is off, the
170 /// second field will have value zero.
171 int collectPGOFuncNameStrings(const std::vector &NameStrs,
172 bool doCompression, std::string &Result);
173 /// Produce \c Result string with the same format described above. The input
174 /// is vector of PGO function name variables that are referenced.
175 int collectPGOFuncNameStrings(const std::vector &NameVars,
176 std::string &Result);
177 class InstrProfSymtab;
178 /// \c NameStrings is a string composed of one of more sub-strings encoded in
179 /// the
180 /// format described above. The substrings are seperated by 0 or more zero
181 /// bytes.
182 /// This method decodes the string and populates the \c Symtab.
183 int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
161184
162185 const std::error_category &instrprof_category();
163186
234257 /// This interface is used by reader of CoverageMapping test
235258 /// format.
236259 inline std::error_code create(StringRef D, uint64_t BaseAddr);
260 /// \c NameStrings is a string composed of one of more sub-strings
261 /// encoded in the format described above. The substrings are
262 /// seperated by 0 or more zero bytes. This method decodes the
263 /// string and populates the \c Symtab.
264 inline std::error_code create(StringRef NameStrings);
237265 /// Create InstrProfSymtab from a set of names iteratable from
238266 /// \p IterRange. This interface is used by IndexedProfReader.
239267 template void create(const NameIterRange &IterRange);
254282 AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
255283 }
256284 AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
257 /// Return function's PGO name from the function name's symabol
258 /// address in the object file. If an error occurs, Return
285 /// Return function's PGO name from the function name's symbol
286 /// address in the object file. If an error occurs, return
259287 /// an empty string.
260288 StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
261289 /// Return function's PGO name from the name's md5 hash value.
266294 std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
267295 Data = D;
268296 Address = BaseAddr;
297 return std::error_code();
298 }
299
300 std::error_code InstrProfSymtab::create(StringRef NameStrings) {
301 if (readPGOFuncNameStrings(NameStrings, *this))
302 return make_error_code(instrprof_error::malformed);
269303 return std::error_code();
270304 }
271305
1111 //
1212 //===----------------------------------------------------------------------===//
1313
14 #include "llvm/ProfileData/InstrProf.h"
1415 #include "llvm/IR/Constants.h"
1516 #include "llvm/IR/Function.h"
17 #include "llvm/IR/GlobalVariable.h"
1618 #include "llvm/IR/Module.h"
17 #include "llvm/IR/GlobalVariable.h"
18 #include "llvm/ProfileData/InstrProf.h"
19 #include "llvm/Support/Compression.h"
1920 #include "llvm/Support/ErrorHandling.h"
21 #include "llvm/Support/LEB128.h"
2022 #include "llvm/Support/ManagedStatic.h"
2123
2224 using namespace llvm;
161163 return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName);
162164 }
163165
166 int collectPGOFuncNameStrings(const std::vector &NameStrs,
167 bool doCompression, std::string &Result) {
168 uint8_t Header[16], *P = Header;
169 std::string UncompressedNameStrings;
170
171 for (auto NameStr : NameStrs) {
172 UncompressedNameStrings += NameStr;
173 UncompressedNameStrings.append(" ");
174 }
175 unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
176 P += EncLen;
177 if (!doCompression) {
178 EncLen = encodeULEB128(0, P);
179 P += EncLen;
180 Result.append(reinterpret_cast(&Header[0]), P - &Header[0]);
181 Result += UncompressedNameStrings;
182 return 0;
183 }
184 SmallVector CompressedNameStrings;
185 zlib::Status Success =
186 zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings,
187 zlib::BestSizeCompression);
188 assert(Success == zlib::StatusOK);
189 if (Success != zlib::StatusOK)
190 return 1;
191 EncLen = encodeULEB128(CompressedNameStrings.size(), P);
192 P += EncLen;
193 Result.append(reinterpret_cast(&Header[0]), P - &Header[0]);
194 Result +=
195 std::string(CompressedNameStrings.data(), CompressedNameStrings.size());
196 return 0;
197 }
198
199 int collectPGOFuncNameStrings(const std::vector &NameVars,
200 std::string &Result) {
201 std::vector NameStrs;
202 for (auto *NameVar : NameVars) {
203 auto *Arr = cast(NameVar->getInitializer());
204 StringRef NameStr =
205 Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
206 NameStrs.push_back(NameStr.str());
207 }
208 return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result);
209 }
210
211 int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
212 const uint8_t *P = reinterpret_cast(NameStrings.data());
213 const uint8_t *EndP = reinterpret_cast(NameStrings.data() +
214 NameStrings.size());
215 while (P < EndP) {
216 uint32_t N;
217 uint64_t UncompressedSize = decodeULEB128(P, &N);
218 P += N;
219 uint64_t CompressedSize = decodeULEB128(P, &N);
220 P += N;
221 bool isCompressed = (CompressedSize != 0);
222 SmallString<128> UncompressedNameStrings;
223 StringRef NameStrings;
224 if (isCompressed) {
225 StringRef CompressedNameStrings(reinterpret_cast(P),
226 CompressedSize);
227 if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings,
228 UncompressedSize) != zlib::StatusOK)
229 return 1;
230 P += CompressedSize;
231 NameStrings = StringRef(UncompressedNameStrings.data(),
232 UncompressedNameStrings.size());
233 } else {
234 NameStrings =
235 StringRef(reinterpret_cast(P), UncompressedSize);
236 P += UncompressedSize;
237 }
238 // Now parse the name strings.
239 size_t NameStart = 0;
240 bool isLast = false;
241 do {
242 size_t NameStop = NameStrings.find(' ', NameStart);
243 if (NameStop == StringRef::npos)
244 return 1;
245 if (NameStop == NameStrings.size() - 1)
246 isLast = true;
247 StringRef Name = NameStrings.substr(NameStart, NameStop - NameStart);
248 Symtab.addFuncName(Name);
249 if (isLast)
250 break;
251 NameStart = NameStop + 1;
252 } while (true);
253
254 while (P < EndP && *P == 0)
255 P++;
256 }
257 Symtab.finalizeSymtab();
258 return 0;
259 }
260
164261 instrprof_error
165262 InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
166263 uint64_t Weight) {
88
99 #include "llvm/ProfileData/InstrProfReader.h"
1010 #include "llvm/ProfileData/InstrProfWriter.h"
11 #include "llvm/Support/Compression.h"
1112 #include "gtest/gtest.h"
1213
1314 #include
582583 ASSERT_EQ(StringRef("bar3"), R);
583584 }
584585
586 TEST_F(InstrProfTest, instr_prof_symtab_compression_test) {
587 std::vector FuncNames1;
588 std::vector FuncNames2;
589 for (int I = 0; I < 10 * 1024; I++) {
590 std::string str;
591 raw_string_ostream OS(str);
592 OS << "func_" << I;
593 FuncNames1.push_back(OS.str());
594 str.clear();
595 OS << "fooooooooooooooo_" << I;
596 FuncNames1.push_back(OS.str());
597 str.clear();
598 OS << "BAR_" << I;
599 FuncNames2.push_back(OS.str());
600 str.clear();
601 OS << "BlahblahBlahblahBar_" << I;
602 FuncNames2.push_back(OS.str());
603 }
604
605 for (int Padding = 0; Padding < 10; Padding++) {
606 for (int DoCompression = 0; DoCompression < 2; DoCompression++) {
607 // Compressing:
608 std::string FuncNameStrings1;
609 collectPGOFuncNameStrings(FuncNames1,
610 (DoCompression != 0 && zlib::isAvailable()),
611 FuncNameStrings1);
612
613 // Compressing:
614 std::string FuncNameStrings2;
615 collectPGOFuncNameStrings(FuncNames2,
616 (DoCompression != 0 && zlib::isAvailable()),
617 FuncNameStrings2);
618
619 // Join with paddings:
620 std::string FuncNameStrings = FuncNameStrings1;
621 for (int P = 0; P < Padding; P++) {
622 FuncNameStrings.push_back('\0');
623 }
624 FuncNameStrings += FuncNameStrings2;
625
626 // Now decompress
627 InstrProfSymtab Symtab;
628 Symtab.create(StringRef(FuncNameStrings));
629
630 // Now check
631 for (int I = 0; I < 10 * 1024; I++) {
632 std::string N[4];
633 N[0] = FuncNames1[2 * I];
634 N[1] = FuncNames1[2 * I + 1];
635 N[2] = FuncNames2[2 * I];
636 N[3] = FuncNames2[2 * I + 1];
637 for (int J = 0; J < 4; J++) {
638 StringRef R = Symtab.getFuncName(IndexedInstrProf::ComputeHash(N[J]));
639 ASSERT_EQ(StringRef(N[J]), R);
640 }
641 }
642 }
643 }
644 }
645
585646 } // end anonymous namespace