llvm.org GIT mirror llvm / 6568fb5
[MS Demangler] Fix several crashes and demangling bugs. These bugs were found by writing a Python script which spidered the entire Chromium build directory tree demangling every symbol in every object file. At the start, the tool printed: Processed 27443 object files. 2926377/2936108 symbols successfully demangled (99.6686%) 9731 symbols could not be demangled (0.3314%) 14589 files crashed while demangling (53.1611%) After this patch, it prints: Processed 27443 object files. 41295518/41295617 symbols successfully demangled (99.9998%) 99 symbols could not be demangled (0.0002%) 0 files crashed while demangling (0.0000%) The issues fixed in this patch are: * Ignore empty parameter packs. Previously we would encounter a mangling for an empty parameter pack and add a null node to the AST. Since we don't print these anyway, we now just don't add anything to the AST and ignore it entirely. This fixes some of the crashes. * Account for "incorrect" string literal demanglings. Apparently an older version of clang would not truncate mangled string literals to 32 bytes of encoded character data. The demangling code however would allocate a 32 byte buffer thinking that it would not encounter more than this, and overrun the buffer. We now demangle up to 128 bytes of data, since the buggy clang would encode up to 32 *characters* of data. * Extended support for demangling init-fini stubs. If you had something like struct Foo { static vector<string> S; }; this would generate a dynamic atexit initializer *for the variable*. We didn't handle this, but now we print something nice. This is actually an improvement over undname, which will fail to demangle this at all. * Fixed one case of static this adjustment. We weren't handling several thunk codes so we didn't recognize the mangling. These are now handled. * Fixed a back-referencing problem. Member pointer templates should have their components considered for back-referencing The remaining 99 symbols which can't be demangled are all symbols which are compiler-generated and undname can't demangle either. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@341000 91177308-0d34-0410-b5e6-96231b3b80d8 Zachary Turner 1 year, 18 days ago
6 changed file(s) with 86 addition(s) and 32 deletion(s). Raw diff Collapse all Expand all
349349 VariableSymbolNode *
350350 demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
351351 StringView &MangledName);
352 FunctionSymbolNode *demangleDynamicStructorFunction(StringView &MangledName,
353 bool IsDestructor);
352 FunctionSymbolNode *demangleInitFiniStub(StringView &MangledName,
353 bool IsDestructor);
354354
355355 NamedIdentifierNode *demangleSimpleName(StringView &MangledName,
356356 bool Memorize);
519519 return VSN;
520520 }
521521
522 FunctionSymbolNode *
523 Demangler::demangleDynamicStructorFunction(StringView &MangledName,
524 bool IsDestructor) {
522 FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
523 bool IsDestructor) {
525524 DynamicStructorIdentifierNode *DSIN =
526525 Arena.alloc();
527526 DSIN->IsDestructor = IsDestructor;
528 DSIN->Name = demangleFullyQualifiedTypeName(MangledName);
529 QualifiedNameNode *QNN = synthesizeQualifiedName(Arena, DSIN);
530 FunctionSymbolNode *FSN = demangleFunctionEncoding(MangledName);
531 FSN->Name = QNN;
527
528 // What follows is a main symbol name. This may include namespaces or class
529 // back references.
530 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
531
532 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
533 FunctionSymbolNode *FSN = nullptr;
534 Symbol->Name = QN;
535
536 if (Symbol->kind() == NodeKind::VariableSymbol) {
537 DSIN->Variable = static_cast(Symbol);
538 if (!MangledName.consumeFront('@')) {
539 Error = true;
540 return nullptr;
541 }
542
543 FSN = demangleFunctionEncoding(MangledName);
544 FSN->Name = synthesizeQualifiedName(Arena, DSIN);
545 } else {
546 FSN = static_cast(Symbol);
547 DSIN->Name = Symbol->Name;
548 FSN->Name = synthesizeQualifiedName(Arena, DSIN);
549 }
550
532551 return FSN;
533552 }
534553
568587 case SpecialIntrinsicKind::RttiBaseClassDescriptor:
569588 return demangleRttiBaseClassDescriptorNode(Arena, MangledName);
570589 case SpecialIntrinsicKind::DynamicInitializer:
571 return demangleDynamicStructorFunction(MangledName, false);
590 return demangleInitFiniStub(MangledName, false);
572591 case SpecialIntrinsicKind::DynamicAtexitDestructor:
573 return demangleDynamicStructorFunction(MangledName, true);
592 return demangleInitFiniStub(MangledName, true);
574593 default:
575594 break;
576595 }
836855 // What follows is a main symbol name. This may include namespaces or class
837856 // back references.
838857 QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
858 if (Error)
859 return nullptr;
839860
840861 SymbolNode *Symbol = demangleEncodedSymbol(MangledName, QN);
841862 if (Symbol) {
13241345 goto StringLiteralError;
13251346 }
13261347 } else {
1327 if (StringByteSize > 32)
1328 Result->IsTruncated = true;
1329
1330 constexpr unsigned MaxStringByteLength = 32;
1348 // The max byte length is actually 32, but some compilers mangled strings
1349 // incorrectly, so we have to assume it can go higher.
1350 constexpr unsigned MaxStringByteLength = 32 * 4;
13311351 uint8_t StringBytes[MaxStringByteLength];
13321352
13331353 unsigned BytesDecoded = 0;
13351355 assert(StringByteSize >= 1);
13361356 StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
13371357 }
1358
1359 if (StringByteSize > BytesDecoded)
1360 Result->IsTruncated = true;
13381361
13391362 unsigned CharBytes =
13401363 guessCharByteSize(StringBytes, BytesDecoded, StringByteSize);
15861609 return FuncClass(FC_Private | FC_Virtual);
15871610 case 'F':
15881611 return FuncClass(FC_Private | FC_Virtual);
1612 case 'G':
1613 return FuncClass(FC_Private | FC_StaticThisAdjust);
1614 case 'H':
1615 return FuncClass(FC_Private | FC_StaticThisAdjust | FC_Far);
15891616 case 'I':
15901617 return FuncClass(FC_Protected);
15911618 case 'J':
17591786 Ty = demangleCustomType(MangledName);
17601787 } else {
17611788 Ty = demanglePrimitiveType(MangledName);
1762 assert(Ty && !Error);
17631789 if (!Ty || Error)
17641790 return Ty;
17651791 }
19752001 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
19762002
19772003 if (MangledName.consumeFront("8")) {
1978 Pointer->ClassParent = demangleFullyQualifiedSymbolName(MangledName);
2004 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
19792005 Pointer->Pointee = demangleFunctionType(MangledName, true);
19802006 } else {
19812007 Qualifiers PointeeQuals = Q_None;
19822008 bool IsMember = false;
19832009 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
19842010 assert(IsMember);
1985 Pointer->ClassParent = demangleFullyQualifiedSymbolName(MangledName);
2011 Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
19862012
19872013 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
19882014 Pointer->Pointee->Quals = PointeeQuals;
21202146 size_t Count = 0;
21212147
21222148 while (!Error && !MangledName.startsWith('@')) {
2123 ++Count;
2124 // Template parameter lists don't participate in back-referencing.
2125 *Current = Arena.alloc();
2126
2127 NodeList &TP = **Current;
2128
2129 TemplateParameterReferenceNode *TPRN = nullptr;
21302149 if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
21312150 MangledName.consumeFront("$$$V")) {
21322151 // Empty parameter pack.
2133 TP.N = nullptr;
2134 } else if (MangledName.consumeFront("$$Y")) {
2152 continue;
2153 }
2154
2155 ++Count;
2156
2157 // Template parameter lists don't participate in back-referencing.
2158 *Current = Arena.alloc();
2159
2160 NodeList &TP = **Current;
2161
2162 TemplateParameterReferenceNode *TPRN = nullptr;
2163 if (MangledName.consumeFront("$$Y")) {
21352164 // Template alias
21362165 TP.N = demangleFullyQualifiedTypeName(MangledName);
21372166 } else if (MangledName.consumeFront("$$B")) {
222222 else
223223 OS << "`dynamic initializer for ";
224224
225 OS << "'";
226 Name->output(OS, Flags);
227 OS << "''";
225 if (Variable) {
226 OS << "`";
227 Variable->output(OS, Flags);
228 OS << "''";
229 } else {
230 OS << "'";
231 Name->output(OS, Flags);
232 OS << "''";
233 }
228234 }
229235
230236 void NamedIdentifierNode::output(OutputStream &OS, OutputFlags Flags) const {
321321 LocalStaticGuardVariable,
322322 FunctionSymbol,
323323 VariableSymbol,
324 SpecialTableSymbol,
324 SpecialTableSymbol
325325 };
326326
327327 struct Node {
442442
443443 void output(OutputStream &OS, OutputFlags Flags) const override;
444444
445 VariableSymbolNode *Variable = nullptr;
445446 QualifiedNameNode *Name = nullptr;
446447 bool IsDestructor = false;
447448 };
168168
169169 ?AddEmitPasses@EmitAssemblyHelper@?A0x43583946@@AEAA_NAEAVPassManager@legacy@llvm@@W4BackendAction@clang@@AEAVraw_pwrite_stream@5@PEAV85@@Z
170170 ; CHECK: bool __cdecl `anonymous namespace'::EmitAssemblyHelper::AddEmitPasses(class llvm::legacy::PassManager &, enum clang::BackendAction, class llvm::raw_pwrite_stream &, class llvm::raw_pwrite_stream *)
171
172 ??$forward@P8?$DecoderStream@$01@media@@AEXXZ@std@@YA$$QAP8?$DecoderStream@$01@media@@AEXXZAAP812@AEXXZ@Z
173 ; CHECK: void (__thiscall media::DecoderStream<2>::*&& __cdecl std::forward::*)(void)>(void (__thiscall media::DecoderStream<2>::*&)(void)))(void)
160160 ??_EBase@@UEAAPEAXI@Z
161161 ; CHECK: virtual void * __cdecl Base::`vector deleting dtor'(unsigned int)
162162
163 ??_EBase@@G3AEPAXI@Z
164 ; CHECK: [thunk]: void * __thiscall Base::`vector deleting dtor'`adjustor{4}'(unsigned int)
165
163166 ??_F?$SomeTemplate@H@@QAEXXZ
164167 ; CHECK: void __thiscall SomeTemplate::`default ctor closure'(void)
165168
223226 ??__FFoo@@YAXXZ
224227 ; CHECK: void __cdecl `dynamic atexit destructor for 'Foo''(void)
225228
229 ??__F_decisionToDFA@XPathLexer@@0V?$vector@VDFA@dfa@antlr4@@V?$allocator@VDFA@dfa@antlr4@@@std@@@std@@A@YAXXZ
230 ; CHECK: void __cdecl `dynamic atexit destructor for `static class std::vector> XPathLexer::_decisionToDFA''(void)
231
226232 ??__K_deg@@YAHO@Z
227233 ; CHECK: int __cdecl operator ""_deg(long double)
228234
760760 ; CHECK: const char16_t * {u"012345678901234"}
761761
762762 ??_C@_0CA@KFPHPCC@0?$AA?$AA?$AA1?$AA?$AA?$AA2?$AA?$AA?$AA3?$AA?$AA?$AA4?$AA?$AA?$AA5?$AA?$AA?$AA6?$AA?$AA?$AA?$AA?$AA?$AA?$AA@
763 ; CHECK: const char32_t * {U"0123456"}
763 ; CHECK: const char32_t * {U"0123456"}
764
765 ; There are too many bytes encoded in this string literal (it should encode a max of 32 bytes)
766 ; but some buggy compilers will incorrectly generate this, so we need to be able to demangle
767 ; both the correct and incorrect versions.
768 ??_C@_0CG@HJGBPLNO@l?$AAo?$AAo?$AAk?$AAA?$AAh?$AAe?$AAa?$AAd?$AAH?$AAa?$AAr?$AAd?$AAB?$AAr?$AAe?$AAa?$AAk?$AA?$AA?$AA@
769 ; CHECK: const char16_t * {u"lookAheadHardBreak"}
770
771 ??_C@_0CG@HJGBPLNO@l?$AAo?$AAo?$AAk?$AAA?$AAh?$AAe?$AAa?$AAd?$AAH?$AAa?$AAr?$AAd?$AAB?$AAr?$AAe?$AA@
772 ; CHECK: const char16_t * {u"lookAheadHardBre"...}