llvm.org GIT mirror llvm / 9f482fc
LTO: Reduce memory consumption by creating an in-memory symbol table for InputFiles. NFCI. Introduce symbol table data structures that can be potentially written to disk, have the LTO library build those data structures using temporarily constructed modules and redirect the LTO library implementation to go through those data structures. This allows us to remove the LLVMContext and Modules owned by InputFile. With this change I measured a peak memory consumption decrease from 5.4GB to 2.8GB in a no-op incremental ThinLTO link of Chromium on Linux. The impact on memory consumption is larger in COFF linkers where we are currently forced to materialize all metadata in order to read linker options. Peak memory consumption linking a large piece of Chromium for Windows with full LTO and debug info decreases from >64GB (OOM) to 15GB. Part of PR27551. Differential Revision: https://reviews.llvm.org/D31364 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@299168 91177308-0d34-0410-b5e6-96231b3b80d8 Peter Collingbourne 3 years ago
6 changed file(s) with 664 addition(s) and 296 deletion(s). Raw diff Collapse all Expand all
2323 #include "llvm/IR/ModuleSummaryIndex.h"
2424 #include "llvm/LTO/Config.h"
2525 #include "llvm/Linker/IRMover.h"
26 #include "llvm/Object/ModuleSymbolTable.h"
26 #include "llvm/Object/IRSymtab.h"
2727 #include "llvm/Support/Error.h"
2828 #include "llvm/Support/ToolOutputFile.h"
2929 #include "llvm/Support/thread.h"
7878 struct SymbolResolution;
7979 class ThinBackendProc;
8080
81 /// An input file. This is a wrapper for ModuleSymbolTable that exposes only the
81 /// An input file. This is a symbol table wrapper that only exposes the
8282 /// information that an LTO client should need in order to do symbol resolution.
8383 class InputFile {
84 public:
85 class Symbol;
86
87 private:
8488 // FIXME: Remove LTO class friendship once we have bitcode symbol tables.
8589 friend LTO;
8690 InputFile() = default;
8791
88 // FIXME: Remove the LLVMContext once we have bitcode symbol tables.
89 LLVMContext Ctx;
90 struct InputModule;
91 std::vector Mods;
92 ModuleSymbolTable SymTab;
93
94 std::vector Comdats;
95 DenseMap ComdatMap;
92 std::vector Mods;
93 SmallVector Strtab;
94 std::vector Symbols;
95
96 // [begin, end) for each module
97 std::vector> ModuleSymIndices;
98
99 StringRef SourceFileName, COFFLinkerOpts;
100 std::vector ComdatTable;
96101
97102 public:
98103 ~InputFile();
100105 /// Create an InputFile.
101106 static Expected> create(MemoryBufferRef Object);
102107
103 class symbol_iterator;
104
105 /// This is a wrapper for ArrayRef::iterator that
106 /// exposes only the information that an LTO client should need in order to do
107 /// symbol resolution.
108 ///
109 /// This object is ephemeral; it is only valid as long as an iterator obtained
110 /// from symbols() refers to it.
111 class Symbol {
112 friend symbol_iterator;
108 /// The purpose of this class is to only expose the symbol information that an
109 /// LTO client should need in order to do symbol resolution.
110 class Symbol : irsymtab::Symbol {
113111 friend LTO;
114112
115 ArrayRef::iterator I;
116 const ModuleSymbolTable &SymTab;
117 const InputFile *File;
118 uint32_t Flags;
119 SmallString<64> Name;
120
121 bool shouldSkip() {
122 return !(Flags & object::BasicSymbolRef::SF_Global) ||
123 (Flags & object::BasicSymbolRef::SF_FormatSpecific);
124 }
125
126 void skip() {
127 ArrayRef::iterator E = SymTab.symbols().end();
128 while (I != E) {
129 Flags = SymTab.getSymbolFlags(*I);
130 if (!shouldSkip())
131 break;
132 ++I;
133 }
134 if (I == E)
135 return;
136
137 Name.clear();
138 {
139 raw_svector_ostream OS(Name);
140 SymTab.printSymbolName(OS, *I);
141 }
142 }
143
144 bool isGV() const { return I->is(); }
145 GlobalValue *getGV() const { return I->get(); }
146
147113 public:
148 Symbol(ArrayRef::iterator I,
149 const ModuleSymbolTable &SymTab, const InputFile *File)
150 : I(I), SymTab(SymTab), File(File) {
151 skip();
152 }
153
154 bool isUndefined() const {
155 return Flags & object::BasicSymbolRef::SF_Undefined;
156 }
157 bool isCommon() const { return Flags & object::BasicSymbolRef::SF_Common; }
158 bool isWeak() const { return Flags & object::BasicSymbolRef::SF_Weak; }
159 bool isIndirect() const {
160 return Flags & object::BasicSymbolRef::SF_Indirect;
161 }
162
163 /// For COFF weak externals, returns the name of the symbol that is used
164 /// as a fallback if the weak external remains undefined.
165 std::string getCOFFWeakExternalFallback() const {
166 assert((Flags & object::BasicSymbolRef::SF_Weak) &&
167 (Flags & object::BasicSymbolRef::SF_Indirect) &&
168 "symbol is not a weak external");
169 std::string Name;
170 raw_string_ostream OS(Name);
171 SymTab.printSymbolName(
172 OS,
173 cast(
174 cast(getGV())->getAliasee()->stripPointerCasts()));
175 OS.flush();
176 return Name;
177 }
178
179 /// Returns the mangled name of the global.
180 StringRef getName() const { return Name; }
181
182 GlobalValue::VisibilityTypes getVisibility() const {
183 if (isGV())
184 return getGV()->getVisibility();
185 return GlobalValue::DefaultVisibility;
186 }
187 bool canBeOmittedFromSymbolTable() const {
188 return isGV() && llvm::canBeOmittedFromSymbolTable(getGV());
189 }
190 bool isTLS() const {
191 // FIXME: Expose a thread-local flag for module asm symbols.
192 return isGV() && getGV()->isThreadLocal();
193 }
194
195 // Returns the index of the comdat this symbol is in or -1 if the symbol
196 // is not in a comdat.
197 // FIXME: We have to return Expected because aliases point to an
198 // arbitrary ConstantExpr and that might not actually be a constant. That
199 // means we might not be able to find what an alias is aliased to and
200 // so find its comdat.
201 Expected getComdatIndex() const;
202
203 uint64_t getCommonSize() const {
204 assert(Flags & object::BasicSymbolRef::SF_Common);
205 if (!isGV())
206 return 0;
207 return getGV()->getParent()->getDataLayout().getTypeAllocSize(
208 getGV()->getType()->getElementType());
209 }
210 unsigned getCommonAlignment() const {
211 assert(Flags & object::BasicSymbolRef::SF_Common);
212 if (!isGV())
213 return 0;
214 return getGV()->getAlignment();
215 }
114 Symbol(const irsymtab::Symbol &S) : irsymtab::Symbol(S) {}
115
116 using irsymtab::Symbol::isUndefined;
117 using irsymtab::Symbol::isCommon;
118 using irsymtab::Symbol::isWeak;
119 using irsymtab::Symbol::isIndirect;
120 using irsymtab::Symbol::getName;
121 using irsymtab::Symbol::getVisibility;
122 using irsymtab::Symbol::canBeOmittedFromSymbolTable;
123 using irsymtab::Symbol::isTLS;
124 using irsymtab::Symbol::getComdatIndex;
125 using irsymtab::Symbol::getCommonSize;
126 using irsymtab::Symbol::getCommonAlignment;
127 using irsymtab::Symbol::getCOFFWeakExternalFallback;
216128 };
217129
218 class symbol_iterator {
219 Symbol Sym;
220
221 public:
222 symbol_iterator(ArrayRef::iterator I,
223 const ModuleSymbolTable &SymTab, const InputFile *File)
224 : Sym(I, SymTab, File) {}
225
226 symbol_iterator &operator++() {
227 ++Sym.I;
228 Sym.skip();
229 return *this;
230 }
231
232 symbol_iterator operator++(int) {
233 symbol_iterator I = *this;
234 ++*this;
235 return I;
236 }
237
238 const Symbol &operator*() const { return Sym; }
239 const Symbol *operator->() const { return &Sym; }
240
241 bool operator!=(const symbol_iterator &Other) const {
242 return Sym.I != Other.Sym.I;
243 }
244 };
245
246130 /// A range over the symbols in this InputFile.
247 iterator_range symbols() {
248 return llvm::make_range(
249 symbol_iterator(SymTab.symbols().begin(), SymTab, this),
250 symbol_iterator(SymTab.symbols().end(), SymTab, this));
251 }
131 ArrayRef symbols() const { return Symbols; }
252132
253133 /// Returns linker options specified in the input file.
254 Expected getLinkerOpts();
134 StringRef getCOFFLinkerOpts() const { return COFFLinkerOpts; }
255135
256136 /// Returns the path to the InputFile.
257137 StringRef getName() const;
258138
259139 /// Returns the source file path specified at compile time.
260 StringRef getSourceFileName() const;
140 StringRef getSourceFileName() const { return SourceFileName; }
261141
262142 // Returns a table with all the comdats used by this file.
263 ArrayRef getComdatTable() const { return Comdats; }
143 ArrayRef getComdatTable() const { return ComdatTable; }
264144
265145 private:
266 iterator_range module_symbols(InputModule &IM);
146 ArrayRef module_symbols(unsigned I) const {
147 const auto &Indices = ModuleSymIndices[I];
148 return {Symbols.data() + Indices.first, Symbols.data() + Indices.second};
149 }
267150 };
268151
269152 /// This class wraps an output stream for a native object. Most clients should
451334 // Global mapping from mangled symbol names to resolutions.
452335 StringMap GlobalResolutions;
453336
454 void addSymbolToGlobalRes(SmallPtrSet &Used,
455 const InputFile::Symbol &Sym, SymbolResolution Res,
337 void addSymbolToGlobalRes(const InputFile::Symbol &Sym, SymbolResolution Res,
456338 unsigned Partition);
457339
458340 // These functions take a range of symbol resolutions [ResI, ResE) and consume
459341 // the resolutions used by a single input module by incrementing ResI. After
460342 // these functions return, [ResI, ResE) will refer to the resolution range for
461343 // the remaining modules in the InputFile.
462 Error addModule(InputFile &Input, InputFile::InputModule &IM,
344 Error addModule(InputFile &Input, unsigned ModI,
463345 const SymbolResolution *&ResI, const SymbolResolution *ResE);
464 Error addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
346 Error addRegularLTO(BitcodeModule BM,
347 ArrayRef Syms,
348 const SymbolResolution *&ResI,
465349 const SymbolResolution *ResE);
466 Error addThinLTO(BitcodeModule BM, Module &M,
467 iterator_range> Syms,
350 Error addThinLTO(BitcodeModule BM, ArrayRef> Syms,
468351 const SymbolResolution *&ResI, const SymbolResolution *ResE);
469352
470353 Error runRegularLTO(AddStreamFn AddStream);
0 //===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains data definitions and a reader and builder for a symbol
10 // table for LLVM IR. Its purpose is to allow linkers and other consumers of
11 // bitcode files to efficiently read the symbol table for symbol resolution
12 // purposes without needing to construct a module in memory.
13 //
14 // As with most object files the symbol table has two parts: the symbol table
15 // itself and a string table which is referenced by the symbol table.
16 //
17 // A symbol table corresponds to a single bitcode file, which may consist of
18 // multiple modules, so symbol tables may likewise contain symbols for multiple
19 // modules.
20 //
21 //===----------------------------------------------------------------------===//
22
23 #ifndef LLVM_OBJECT_IRSYMTAB_H
24 #define LLVM_OBJECT_IRSYMTAB_H
25
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/IR/GlobalValue.h"
28 #include "llvm/Object/SymbolicFile.h"
29 #include "llvm/Support/Endian.h"
30
31 namespace llvm {
32 namespace irsymtab {
33 namespace storage {
34
35 // The data structures in this namespace define the low-level serialization
36 // format. Clients that just want to read a symbol table should use the
37 // irsymtab::Reader class.
38
39 typedef support::ulittle32_t Word;
40
41 /// A reference to a string in the string table.
42 struct Str {
43 Word Offset;
44 StringRef get(StringRef Strtab) const {
45 return Strtab.data() + Offset;
46 }
47 };
48
49 /// A reference to a range of objects in the symbol table.
50 template struct Range {
51 Word Offset, Size;
52 ArrayRef get(StringRef Symtab) const {
53 return {reinterpret_cast(Symtab.data() + Offset), Size};
54 }
55 };
56
57 /// Describes the range of a particular module's symbols within the symbol
58 /// table.
59 struct Module {
60 Word Begin, End;
61 };
62
63 /// This is equivalent to an IR comdat.
64 struct Comdat {
65 Str Name;
66 };
67
68 /// Contains the information needed by linkers for symbol resolution, as well as
69 /// by the LTO implementation itself.
70 struct Symbol {
71 /// The mangled symbol name.
72 Str Name;
73
74 /// The unmangled symbol name, or the empty string if this is not an IR
75 /// symbol.
76 Str IRName;
77
78 /// The index into Header::Comdats, or -1 if not a comdat member.
79 Word ComdatIndex;
80
81 Word Flags;
82 enum FlagBits {
83 FB_visibility, // 2 bits
84 FB_undefined = FB_visibility + 2,
85 FB_weak,
86 FB_common,
87 FB_indirect,
88 FB_used,
89 FB_tls,
90 FB_may_omit,
91 FB_global,
92 FB_format_specific,
93 FB_unnamed_addr,
94 };
95
96 /// The index into the Uncommon table, or -1 if this symbol does not have an
97 /// Uncommon.
98 Word UncommonIndex;
99 };
100
101 /// This data structure contains rarely used symbol fields and is optionally
102 /// referenced by a Symbol.
103 struct Uncommon {
104 Word CommonSize, CommonAlign;
105
106 /// COFF-specific: the name of the symbol that a weak external resolves to
107 /// if not defined.
108 Str COFFWeakExternFallbackName;
109 };
110
111 struct Header {
112 Range Modules;
113 Range Comdats;
114 Range Symbols;
115 Range Uncommons;
116
117 Str SourceFileName;
118
119 /// COFF-specific: linker directives.
120 Str COFFLinkerOpts;
121 };
122
123 }
124
125 /// Fills in Symtab and Strtab with a valid symbol and string table for Mods.
126 Error build(ArrayRef Mods, SmallVector &Symtab,
127 SmallVector &Strtab);
128
129 /// This represents a symbol that has been read from a storage::Symbol and
130 /// possibly a storage::Uncommon.
131 struct Symbol {
132 // Copied from storage::Symbol.
133 StringRef Name, IRName;
134 int ComdatIndex;
135 uint32_t Flags;
136
137 // Copied from storage::Uncommon.
138 uint32_t CommonSize, CommonAlign;
139 StringRef COFFWeakExternFallbackName;
140
141 /// Returns the mangled symbol name.
142 StringRef getName() const { return Name; }
143
144 /// Returns the unmangled symbol name, or the empty string if this is not an
145 /// IR symbol.
146 StringRef getIRName() const { return IRName; }
147
148 /// Returns the index into the comdat table (see Reader::getComdatTable()), or
149 /// -1 if not a comdat member.
150 int getComdatIndex() const { return ComdatIndex; }
151
152 using S = storage::Symbol;
153 GlobalValue::VisibilityTypes getVisibility() const {
154 return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3);
155 }
156 bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; }
157 bool isWeak() const { return (Flags >> S::FB_weak) & 1; }
158 bool isCommon() const { return (Flags >> S::FB_common) & 1; }
159 bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; }
160 bool isUsed() const { return (Flags >> S::FB_used) & 1; }
161 bool isTLS() const { return (Flags >> S::FB_tls) & 1; }
162 bool canBeOmittedFromSymbolTable() const {
163 return (Flags >> S::FB_may_omit) & 1;
164 }
165 bool isGlobal() const { return (Flags >> S::FB_global) & 1; }
166 bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; }
167 bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; }
168
169 size_t getCommonSize() const {
170 assert(isCommon());
171 return CommonSize;
172 }
173 uint32_t getCommonAlignment() const {
174 assert(isCommon());
175 return CommonAlign;
176 }
177
178 /// COFF-specific: for weak externals, returns the name of the symbol that is
179 /// used as a fallback if the weak external remains undefined.
180 StringRef getCOFFWeakExternalFallback() const {
181 assert(isWeak() && isIndirect());
182 return COFFWeakExternFallbackName;
183 }
184 };
185
186 /// This class can be used to read a Symtab and Strtab produced by
187 /// irsymtab::build.
188 class Reader {
189 StringRef Symtab, Strtab;
190
191 ArrayRef Modules;
192 ArrayRef Comdats;
193 ArrayRef Symbols;
194 ArrayRef Uncommons;
195
196 StringRef str(storage::Str S) const { return S.get(Strtab); }
197 template ArrayRef range(storage::Range R) const {
198 return R.get(Symtab);
199 }
200 const storage::Header &header() const {
201 return *reinterpret_cast(Symtab.data());
202 }
203
204 public:
205 class SymbolRef;
206
207 Reader() = default;
208 Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) {
209 Modules = range(header().Modules);
210 Comdats = range(header().Comdats);
211 Symbols = range(header().Symbols);
212 Uncommons = range(header().Uncommons);
213 }
214
215 typedef iterator_range> symbol_range;
216
217 /// Returns the symbol table for the entire bitcode file.
218 /// The symbols enumerated by this method are ephemeral, but they can be
219 /// copied into an irsymtab::Symbol object.
220 symbol_range symbols() const;
221
222 /// Returns a slice of the symbol table for the I'th module in the file.
223 /// The symbols enumerated by this method are ephemeral, but they can be
224 /// copied into an irsymtab::Symbol object.
225 symbol_range module_symbols(unsigned I) const;
226
227 /// Returns the source file path specified at compile time.
228 StringRef getSourceFileName() const { return str(header().SourceFileName); }
229
230 /// Returns a table with all the comdats used by this file.
231 std::vector getComdatTable() const {
232 std::vector ComdatTable;
233 ComdatTable.reserve(Comdats.size());
234 for (auto C : Comdats)
235 ComdatTable.push_back(str(C.Name));
236 return ComdatTable;
237 }
238
239 /// COFF-specific: returns linker options specified in the input file.
240 StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); }
241 };
242
243 /// Ephemeral symbols produced by Reader::symbols() and
244 /// Reader::module_symbols().
245 class Reader::SymbolRef : public Symbol {
246 const storage::Symbol *SymI, *SymE;
247 const Reader *R;
248
249 public:
250 SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE,
251 const Reader *R)
252 : SymI(SymI), SymE(SymE), R(R) {
253 read();
254 }
255
256 void read() {
257 if (SymI == SymE)
258 return;
259
260 Name = R->str(SymI->Name);
261 IRName = R->str(SymI->IRName);
262 ComdatIndex = SymI->ComdatIndex;
263 Flags = SymI->Flags;
264
265 uint32_t UncI = SymI->UncommonIndex;
266 if (UncI != -1u) {
267 const storage::Uncommon &Unc = R->Uncommons[UncI];
268 CommonSize = Unc.CommonSize;
269 CommonAlign = Unc.CommonAlign;
270 COFFWeakExternFallbackName = R->str(Unc.COFFWeakExternFallbackName);
271 }
272 }
273 void moveNext() {
274 ++SymI;
275 read();
276 }
277
278 bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; }
279 };
280
281 inline Reader::symbol_range Reader::symbols() const {
282 return {SymbolRef(Symbols.begin(), Symbols.end(), this),
283 SymbolRef(Symbols.end(), Symbols.end(), this)};
284 }
285
286 inline Reader::symbol_range Reader::module_symbols(unsigned I) const {
287 const storage::Module &M = Modules[I];
288 const storage::Symbol *MBegin = Symbols.begin() + M.Begin,
289 *MEnd = Symbols.begin() + M.End;
290 return {SymbolRef(MBegin, MEnd, this), SymbolRef(MEnd, MEnd, this)};
291 }
292
293 }
294
295 }
296
297 #endif
304304 thinLTOInternalizeAndPromoteGUID(I.second, I.first, isExported);
305305 }
306306
307 struct InputFile::InputModule {
308 BitcodeModule BM;
309 std::unique_ptr Mod;
310
311 // The range of ModuleSymbolTable entries for this input module.
312 size_t SymBegin, SymEnd;
313 };
314
315307 // Requires a destructor for std::vector.
316308 InputFile::~InputFile() = default;
317309
332324 return make_error("Bitcode file does not contain any modules",
333325 inconvertibleErrorCode());
334326
335 // Create an InputModule for each module in the InputFile, and add it to the
336 // ModuleSymbolTable.
327 File->Mods = *BMsOrErr;
328
329 LLVMContext Ctx;
330 std::vector Mods;
331 std::vector> OwnedMods;
337332 for (auto BM : *BMsOrErr) {
338333 Expected> MOrErr =
339 BM.getLazyModule(File->Ctx, /*ShouldLazyLoadMetadata*/ true,
334 BM.getLazyModule(Ctx, /*ShouldLazyLoadMetadata*/ true,
340335 /*IsImporting*/ false);
341336 if (!MOrErr)
342337 return MOrErr.takeError();
343338
344 size_t SymBegin = File->SymTab.symbols().size();
345 File->SymTab.addModule(MOrErr->get());
346 size_t SymEnd = File->SymTab.symbols().size();
347
348 for (const auto &C : (*MOrErr)->getComdatSymbolTable()) {
349 auto P = File->ComdatMap.insert(
350 std::make_pair(&C.second, File->Comdats.size()));
351 assert(P.second);
352 (void)P;
353 File->Comdats.push_back(C.first());
354 }
355
356 File->Mods.push_back({BM, std::move(*MOrErr), SymBegin, SymEnd});
339 if ((*MOrErr)->getDataLayoutStr().empty())
340 return make_error("input module has no datalayout",
341 inconvertibleErrorCode());
342
343 Mods.push_back(MOrErr->get());
344 OwnedMods.push_back(std::move(*MOrErr));
345 }
346
347 SmallVector Symtab;
348 if (Error E = irsymtab::build(Mods, Symtab, File->Strtab))
349 return std::move(E);
350
351 irsymtab::Reader R({Symtab.data(), Symtab.size()},
352 {File->Strtab.data(), File->Strtab.size()});
353 File->SourceFileName = R.getSourceFileName();
354 File->COFFLinkerOpts = R.getCOFFLinkerOpts();
355 File->ComdatTable = R.getComdatTable();
356
357 for (unsigned I = 0; I != Mods.size(); ++I) {
358 size_t Begin = File->Symbols.size();
359 for (const irsymtab::Reader::SymbolRef &Sym : R.module_symbols(I))
360 // Skip symbols that are irrelevant to LTO. Note that this condition needs
361 // to match the one in Skip() in LTO::addRegularLTO().
362 if (Sym.isGlobal() && !Sym.isFormatSpecific())
363 File->Symbols.push_back(Sym);
364 File->ModuleSymIndices.push_back({Begin, File->Symbols.size()});
357365 }
358366
359367 return std::move(File);
360368 }
361369
362 Expected InputFile::Symbol::getComdatIndex() const {
363 if (!isGV())
364 return -1;
365 const GlobalObject *GO = getGV()->getBaseObject();
366 if (!GO)
367 return make_error("Unable to determine comdat of alias!",
368 inconvertibleErrorCode());
369 if (const Comdat *C = GO->getComdat()) {
370 auto I = File->ComdatMap.find(C);
371 assert(I != File->ComdatMap.end());
372 return I->second;
373 }
374 return -1;
375 }
376
377 Expected InputFile::getLinkerOpts() {
378 std::string LinkerOpts;
379 raw_string_ostream LOS(LinkerOpts);
380 // Extract linker options from module metadata.
381 for (InputModule &Mod : Mods) {
382 std::unique_ptr &M = Mod.Mod;
383 if (auto E = M->materializeMetadata())
384 return std::move(E);
385 if (Metadata *Val = M->getModuleFlag("Linker Options")) {
386 MDNode *LinkerOptions = cast(Val);
387 for (const MDOperand &MDOptions : LinkerOptions->operands())
388 for (const MDOperand &MDOption : cast(MDOptions)->operands())
389 LOS << " " << cast(MDOption)->getString();
390 }
391 }
392
393 // Synthesize export flags for symbols with dllexport storage.
394 const Triple TT(Mods[0].Mod->getTargetTriple());
395 Mangler M;
396 for (const ModuleSymbolTable::Symbol &Sym : SymTab.symbols())
397 if (auto *GV = Sym.dyn_cast())
398 emitLinkerFlagsForGlobalCOFF(LOS, GV, TT, M);
399 LOS.flush();
400 return LinkerOpts;
401 }
402
403370 StringRef InputFile::getName() const {
404 return Mods[0].BM.getModuleIdentifier();
405 }
406
407 StringRef InputFile::getSourceFileName() const {
408 return Mods[0].Mod->getSourceFileName();
409 }
410
411 iterator_range
412 InputFile::module_symbols(InputModule &IM) {
413 return llvm::make_range(
414 symbol_iterator(SymTab.symbols().data() + IM.SymBegin, SymTab, this),
415 symbol_iterator(SymTab.symbols().data() + IM.SymEnd, SymTab, this));
371 return Mods[0].getModuleIdentifier();
416372 }
417373
418374 LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
436392 LTO::~LTO() = default;
437393
438394 // Add the given symbol to the GlobalResolutions map, and resolve its partition.
439 void LTO::addSymbolToGlobalRes(SmallPtrSet &Used,
440 const InputFile::Symbol &Sym,
395 void LTO::addSymbolToGlobalRes(const InputFile::Symbol &Sym,
441396 SymbolResolution Res, unsigned Partition) {
442 GlobalValue *GV = Sym.isGV() ? Sym.getGV() : nullptr;
443
444397 auto &GlobalRes = GlobalResolutions[Sym.getName()];
445 if (GV) {
446 GlobalRes.UnnamedAddr &= GV->hasGlobalUnnamedAddr();
447 if (Res.Prevailing)
448 GlobalRes.IRName = GV->getName();
449 }
398 GlobalRes.UnnamedAddr &= Sym.isUnnamedAddr();
399 if (Res.Prevailing)
400 GlobalRes.IRName = Sym.getIRName();
401
450402 // Set the partition to external if we know it is used elsewhere, e.g.
451403 // it is visible to a regular object, is referenced from llvm.compiler_used,
452404 // or was already recorded as being referenced from a different partition.
453 if (Res.VisibleToRegularObj || (GV && Used.count(GV)) ||
405 if (Res.VisibleToRegularObj || Sym.isUsed() ||
454406 (GlobalRes.Partition != GlobalResolution::Unknown &&
455407 GlobalRes.Partition != Partition)) {
456408 GlobalRes.Partition = GlobalResolution::External;
494446 writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res);
495447
496448 const SymbolResolution *ResI = Res.begin();
497 for (InputFile::InputModule &IM : Input->Mods)
498 if (Error Err = addModule(*Input, IM, ResI, Res.end()))
449 for (unsigned I = 0; I != Input->Mods.size(); ++I)
450 if (Error Err = addModule(*Input, I, ResI, Res.end()))
499451 return Err;
500452
501453 assert(ResI == Res.end());
502454 return Error::success();
503455 }
504456
505 Error LTO::addModule(InputFile &Input, InputFile::InputModule &IM,
457 Error LTO::addModule(InputFile &Input, unsigned ModI,
506458 const SymbolResolution *&ResI,
507459 const SymbolResolution *ResE) {
508 // FIXME: move to backend
509 Module &M = *IM.Mod;
510
511 if (M.getDataLayoutStr().empty())
512 return make_error("input module has no datalayout",
513 inconvertibleErrorCode());
514
515 if (!Conf.OverrideTriple.empty())
516 M.setTargetTriple(Conf.OverrideTriple);
517 else if (M.getTargetTriple().empty())
518 M.setTargetTriple(Conf.DefaultTriple);
519
520 Expected HasThinLTOSummary = IM.BM.hasSummary();
460 Expected HasThinLTOSummary = Input.Mods[ModI].hasSummary();
521461 if (!HasThinLTOSummary)
522462 return HasThinLTOSummary.takeError();
523463
464 auto ModSyms = Input.module_symbols(ModI);
524465 if (*HasThinLTOSummary)
525 return addThinLTO(IM.BM, M, Input.module_symbols(IM), ResI, ResE);
466 return addThinLTO(Input.Mods[ModI], ModSyms, ResI, ResE);
526467 else
527 return addRegularLTO(IM.BM, ResI, ResE);
468 return addRegularLTO(Input.Mods[ModI], ModSyms, ResI, ResE);
528469 }
529470
530471 // Add a regular LTO object to the link.
531 Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
472 Error LTO::addRegularLTO(BitcodeModule BM,
473 ArrayRef Syms,
474 const SymbolResolution *&ResI,
532475 const SymbolResolution *ResE) {
533476 if (!RegularLTO.CombinedModule) {
534477 RegularLTO.CombinedModule =
549492 ModuleSymbolTable SymTab;
550493 SymTab.addModule(&M);
551494
552 SmallPtrSet Used;
553 collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
554
555495 std::vector Keep;
556496
557497 for (GlobalVariable &GV : M.globals())
563503 if (GlobalObject *GO = GA.getBaseObject())
564504 AliasedGlobals.insert(GO);
565505
566 for (const InputFile::Symbol &Sym :
567 make_range(InputFile::symbol_iterator(SymTab.symbols().begin(), SymTab,
568 nullptr),
569 InputFile::symbol_iterator(SymTab.symbols().end(), SymTab,
570 nullptr))) {
506 // In this function we need IR GlobalValues matching the symbols in Syms
507 // (which is not backed by a module), so we need to enumerate them in the same
508 // order. The symbol enumeration order of a ModuleSymbolTable intentionally
509 // matches the order of an irsymtab, but when we read the irsymtab in
510 // InputFile::create we omit some symbols that are irrelevant to LTO. The
511 // Skip() function skips the same symbols from the module as InputFile does
512 // from the symbol table.
513 auto MsymI = SymTab.symbols().begin(), MsymE = SymTab.symbols().end();
514 auto Skip = [&]() {
515 while (MsymI != MsymE) {
516 auto Flags = SymTab.getSymbolFlags(*MsymI);
517 if ((Flags & object::BasicSymbolRef::SF_Global) &&
518 !(Flags & object::BasicSymbolRef::SF_FormatSpecific))
519 return;
520 ++MsymI;
521 }
522 };
523 Skip();
524
525 for (const InputFile::Symbol &Sym : Syms) {
571526 assert(ResI != ResE);
572527 SymbolResolution Res = *ResI++;
573 addSymbolToGlobalRes(Used, Sym, Res, 0);
574
575 if (Sym.isGV()) {
576 GlobalValue *GV = Sym.getGV();
528 addSymbolToGlobalRes(Sym, Res, 0);
529
530 assert(MsymI != MsymE);
531 ModuleSymbolTable::Symbol Msym = *MsymI++;
532 Skip();
533
534 if (GlobalValue *GV = Msym.dyn_cast()) {
577535 if (Res.Prevailing) {
578536 if (Sym.isUndefined())
579537 continue;
611569 if (Sym.isCommon()) {
612570 // FIXME: We should figure out what to do about commons defined by asm.
613571 // For now they aren't reported correctly by ModuleSymbolTable.
614 auto &CommonRes = RegularLTO.Commons[Sym.getGV()->getName()];
572 auto &CommonRes = RegularLTO.Commons[Sym.getIRName()];
615573 CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize());
616574 CommonRes.Align = std::max(CommonRes.Align, Sym.getCommonAlignment());
617575 CommonRes.Prevailing |= Res.Prevailing;
619577
620578 // FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit.
621579 }
580 assert(MsymI == MsymE);
622581
623582 return RegularLTO.Mover->move(std::move(*MOrErr), Keep,
624583 [](GlobalValue &, IRMover::ValueAdder) {},
626585 }
627586
628587 // Add a ThinLTO object to the link.
629 // FIXME: This function should not need to take as many parameters once we have
630 // a bitcode symbol table.
631 Error LTO::addThinLTO(BitcodeModule BM, Module &M,
632 iterator_range Syms,
588 Error LTO::addThinLTO(BitcodeModule BM,
589 ArrayRef Syms,
633590 const SymbolResolution *&ResI,
634591 const SymbolResolution *ResE) {
635 SmallPtrSet Used;
636 collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
637
638592 Expected> SummaryOrErr = BM.getSummary();
639593 if (!SummaryOrErr)
640594 return SummaryOrErr.takeError();
644598 for (const InputFile::Symbol &Sym : Syms) {
645599 assert(ResI != ResE);
646600 SymbolResolution Res = *ResI++;
647 addSymbolToGlobalRes(Used, Sym, Res, ThinLTO.ModuleMap.size() + 1);
648
649 if (Res.Prevailing && Sym.isGV())
650 ThinLTO.PrevailingModuleForGUID[Sym.getGV()->getGUID()] =
651 BM.getModuleIdentifier();
601 addSymbolToGlobalRes(Sym, Res, ThinLTO.ModuleMap.size() + 1);
602
603 if (Res.Prevailing) {
604 if (!Sym.getIRName().empty()) {
605 auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier(
606 Sym.getIRName(), GlobalValue::ExternalLinkage, ""));
607 ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier();
608 }
609 }
652610 }
653611
654612 if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second)
77 ELFObjectFile.cpp
88 Error.cpp
99 IRObjectFile.cpp
10 IRSymtab.cpp
1011 MachOObjectFile.cpp
1112 MachOUniversal.cpp
1213 ModuleSummaryIndexObjectFile.cpp
0 //===- IRSymtab.cpp - implementation of IR symbol tables --------*- C++ -*-===//
1 //
2 // The LLVM Compiler Infrastructure
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "llvm/Object/IRSymtab.h"
10 #include "llvm/CodeGen/Analysis.h"
11 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
12 #include "llvm/IR/Module.h"
13 #include "llvm/MC/StringTableBuilder.h"
14 #include "llvm/Object/ModuleSymbolTable.h"
15 #include "llvm/Support/Allocator.h"
16 #include "llvm/Support/StringSaver.h"
17
18 using namespace llvm;
19 using namespace irsymtab;
20
21 namespace {
22
23 /// Stores the temporary state that is required to build an IR symbol table.
24 struct Builder {
25 SmallVector &Symtab;
26 SmallVector &Strtab;
27 Builder(SmallVector &Symtab, SmallVector &Strtab)
28 : Symtab(Symtab), Strtab(Strtab) {}
29
30 StringTableBuilder StrtabBuilder{StringTableBuilder::ELF};
31
32 BumpPtrAllocator Alloc;
33 StringSaver Saver{Alloc};
34
35 DenseMap ComdatMap;
36 ModuleSymbolTable Msymtab;
37 SmallPtrSet Used;
38 Mangler Mang;
39 Triple TT;
40
41 std::vector Comdats;
42 std::vector Mods;
43 std::vector Syms;
44 std::vector Uncommons;
45
46 std::string COFFLinkerOpts;
47 raw_string_ostream COFFLinkerOptsOS{COFFLinkerOpts};
48
49 void setStr(storage::Str &S, StringRef Value) {
50 S.Offset = StrtabBuilder.add(Value);
51 }
52 template
53 void writeRange(storage::Range &R, const std::vector &Objs) {
54 R.Offset = Symtab.size();
55 R.Size = Objs.size();
56 Symtab.insert(Symtab.end(), reinterpret_cast(Objs.data()),
57 reinterpret_cast(Objs.data() + Objs.size()));
58 }
59
60 Error addModule(Module *M);
61 Error addSymbol(ModuleSymbolTable::Symbol Sym);
62
63 Error build(ArrayRef Mods);
64 };
65
66 Error Builder::addModule(Module *M) {
67 collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false);
68
69 storage::Module Mod;
70 Mod.Begin = Msymtab.symbols().size();
71 Msymtab.addModule(M);
72 Mod.End = Msymtab.symbols().size();
73 Mods.push_back(Mod);
74
75 if (TT.isOSBinFormatCOFF()) {
76 if (auto E = M->materializeMetadata())
77 return E;
78 if (Metadata *Val = M->getModuleFlag("Linker Options")) {
79 MDNode *LinkerOptions = cast(Val);
80 for (const MDOperand &MDOptions : LinkerOptions->operands())
81 for (const MDOperand &MDOption : cast(MDOptions)->operands())
82 COFFLinkerOptsOS << " " << cast(MDOption)->getString();
83 }
84 }
85
86 return Error::success();
87 }
88
89 Error Builder::addSymbol(ModuleSymbolTable::Symbol Msym) {
90 Syms.emplace_back();
91 storage::Symbol &Sym = Syms.back();
92 Sym = {};
93
94 Sym.UncommonIndex = -1;
95 storage::Uncommon *Unc = nullptr;
96 auto Uncommon = [&]() -> storage::Uncommon & {
97 if (Unc)
98 return *Unc;
99 Sym.UncommonIndex = Uncommons.size();
100 Uncommons.emplace_back();
101 Unc = &Uncommons.back();
102 *Unc = {};
103 setStr(Unc->COFFWeakExternFallbackName, "");
104 return *Unc;
105 };
106
107 SmallString<64> Name;
108 {
109 raw_svector_ostream OS(Name);
110 Msymtab.printSymbolName(OS, Msym);
111 }
112 setStr(Sym.Name, Saver.save(StringRef(Name)));
113
114 auto Flags = Msymtab.getSymbolFlags(Msym);
115 if (Flags & object::BasicSymbolRef::SF_Undefined)
116 Sym.Flags |= 1 << storage::Symbol::FB_undefined;
117 if (Flags & object::BasicSymbolRef::SF_Weak)
118 Sym.Flags |= 1 << storage::Symbol::FB_weak;
119 if (Flags & object::BasicSymbolRef::SF_Common)
120 Sym.Flags |= 1 << storage::Symbol::FB_common;
121 if (Flags & object::BasicSymbolRef::SF_Indirect)
122 Sym.Flags |= 1 << storage::Symbol::FB_indirect;
123 if (Flags & object::BasicSymbolRef::SF_Global)
124 Sym.Flags |= 1 << storage::Symbol::FB_global;
125 if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
126 Sym.Flags |= 1 << storage::Symbol::FB_format_specific;
127
128 Sym.ComdatIndex = -1;
129 auto *GV = Msym.dyn_cast();
130 if (!GV) {
131 setStr(Sym.IRName, "");
132 return Error::success();
133 }
134
135 setStr(Sym.IRName, GV->getName());
136
137 if (Used.count(GV))
138 Sym.Flags |= 1 << storage::Symbol::FB_used;
139 if (GV->isThreadLocal())
140 Sym.Flags |= 1 << storage::Symbol::FB_tls;
141 if (GV->hasGlobalUnnamedAddr())
142 Sym.Flags |= 1 << storage::Symbol::FB_unnamed_addr;
143 if (canBeOmittedFromSymbolTable(GV))
144 Sym.Flags |= 1 << storage::Symbol::FB_may_omit;
145 Sym.Flags |= unsigned(GV->getVisibility()) << storage::Symbol::FB_visibility;
146
147 if (Flags & object::BasicSymbolRef::SF_Common) {
148 Uncommon().CommonSize = GV->getParent()->getDataLayout().getTypeAllocSize(
149 GV->getType()->getElementType());
150 Uncommon().CommonAlign = GV->getAlignment();
151 }
152
153 const GlobalObject *Base = GV->getBaseObject();
154 if (!Base)
155 return make_error("Unable to determine comdat of alias!",
156 inconvertibleErrorCode());
157 if (const Comdat *C = Base->getComdat()) {
158 auto P = ComdatMap.insert(std::make_pair(C, Comdats.size()));
159 Sym.ComdatIndex = P.first->second;
160
161 if (P.second) {
162 storage::Comdat Comdat;
163 setStr(Comdat.Name, C->getName());
164 Comdats.push_back(Comdat);
165 }
166 }
167
168 if (TT.isOSBinFormatCOFF()) {
169 emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS, GV, TT, Mang);
170
171 if ((Flags & object::BasicSymbolRef::SF_Weak) &&
172 (Flags & object::BasicSymbolRef::SF_Indirect)) {
173 std::string FallbackName;
174 raw_string_ostream OS(FallbackName);
175 Msymtab.printSymbolName(
176 OS, cast(
177 cast(GV)->getAliasee()->stripPointerCasts()));
178 OS.flush();
179 setStr(Uncommon().COFFWeakExternFallbackName, Saver.save(FallbackName));
180 }
181 }
182
183 return Error::success();
184 }
185
186 Error Builder::build(ArrayRef IRMods) {
187 storage::Header Hdr;
188
189 assert(!IRMods.empty());
190 setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName());
191 TT = Triple(IRMods[0]->getTargetTriple());
192
193 // This adds the symbols for each module to Msymtab.
194 for (auto *M : IRMods)
195 if (Error Err = addModule(M))
196 return Err;
197
198 for (ModuleSymbolTable::Symbol Msym : Msymtab.symbols())
199 if (Error Err = addSymbol(Msym))
200 return Err;
201
202 COFFLinkerOptsOS.flush();
203 setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts);
204
205 // We are about to fill in the header's range fields, so reserve space for it
206 // and copy it in afterwards.
207 Symtab.resize(sizeof(storage::Header));
208 writeRange(Hdr.Modules, Mods);
209 writeRange(Hdr.Comdats, Comdats);
210 writeRange(Hdr.Symbols, Syms);
211 writeRange(Hdr.Uncommons, Uncommons);
212
213 *reinterpret_cast(Symtab.data()) = Hdr;
214
215 raw_svector_ostream OS(Strtab);
216 StrtabBuilder.finalizeInOrder();
217 StrtabBuilder.write(OS);
218
219 return Error::success();
220 }
221
222 } // anonymous namespace
223
224 Error irsymtab::build(ArrayRef Mods, SmallVector &Symtab,
225 SmallVector &Strtab) {
226 return Builder(Symtab, Strtab).build(Mods);
227 }
464464 EC == object::object_error::bitcode_section_not_found)
465465 *claimed = 0;
466466 else
467 message(LDPL_ERROR,
467 message(LDPL_FATAL,
468468 "LLVM gold plugin has failed to create LTO module: %s",
469469 EI.message().c_str());
470470 });
535535
536536 sym.size = 0;
537537 sym.comdat_key = nullptr;
538 int CI = check(Sym.getComdatIndex());
538 int CI = Sym.getComdatIndex();
539539 if (CI != -1) {
540540 StringRef C = Obj->getComdatTable()[CI];
541541 sym.comdat_key = strdup(C.str().c_str());