From d3b10150b683142a7481893ddffe9206e1d29f95 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Mon, 28 Feb 2022 10:13:44 -0800 Subject: [PATCH] [demangler] Simplify OutputBuffer initialization Every non-testcase use of OutputBuffer contains code to allocate an initial buffer (using either 128 or 1024 as initial guesses). There's now no need to do that, given recent changes to the buffer extension heuristics -- it allocates a 1k(ish) buffer on first need. Just pass in a buffer (if any) to the constructor. Thus the OutputBuffer's ownership of the buffer starts at its own lifetime start. We can reduce the lifetime of this object in several cases. That new constructor takes a 'size_t *' for the size argument, as all uses with a non-null buffer are passing through a malloc'd buffer from their own caller in this manner. The buffer reset member function is never used, and is deleted. Some adjustment to a couple of uses is needed, due to the lazy buffer creation of this patch. a) the Microsoft demangler can demangle empty strings to nothing, which it then memoizes. We need to avoid the UB of passing nullptr to memcpy. b) a unit test checks insertion of no characters into an empty buffer. We need to avoid UB when converting that to std::string. The original buffer initialization code would return a failure code if that first malloc failed. Existing code either ignored that, called std::terminate with a FIXME, or returned an error code. But that's not foolproof anyway, as a subsequent buffer extension failure ends up calling std::terminate. I am working on addressing that unfortunate failure mode in a manner more consistent with the C++ ABI design. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D122604 --- libcxxabi/src/cxa_demangle.cpp | 5 +--- libcxxabi/src/demangle/Utility.h | 25 +++----------------- llvm/include/llvm/Demangle/Utility.h | 25 +++----------------- llvm/lib/Demangle/DLangDemangle.cpp | 3 --- llvm/lib/Demangle/ItaniumDemangle.cpp | 21 ++++------------ llvm/lib/Demangle/MicrosoftDemangle.cpp | 22 ++++------------- llvm/lib/Demangle/MicrosoftDemangleNodes.cpp | 1 - llvm/lib/Demangle/RustDemangle.cpp | 3 --- 8 files changed, 17 insertions(+), 88 deletions(-) diff --git a/libcxxabi/src/cxa_demangle.cpp b/libcxxabi/src/cxa_demangle.cpp index ddab6d33358a..7baac680074c 100644 --- a/libcxxabi/src/cxa_demangle.cpp +++ b/libcxxabi/src/cxa_demangle.cpp @@ -386,15 +386,12 @@ __cxa_demangle(const char *MangledName, char *Buf, size_t *N, int *Status) { int InternalStatus = demangle_success; Demangler Parser(MangledName, MangledName + std::strlen(MangledName)); - OutputBuffer O; - Node *AST = Parser.parse(); if (AST == nullptr) InternalStatus = demangle_invalid_mangled_name; - else if (!initializeOutputBuffer(Buf, N, O, 1024)) - InternalStatus = demangle_memory_alloc_failure; else { + OutputBuffer O(Buf, N); assert(Parser.ForwardTemplateRefs.empty()); AST->print(O); O += '\0'; diff --git a/libcxxabi/src/demangle/Utility.h b/libcxxabi/src/demangle/Utility.h index db19dcac0147..c9b211b5441a 100644 --- a/libcxxabi/src/demangle/Utility.h +++ b/libcxxabi/src/demangle/Utility.h @@ -69,7 +69,9 @@ class OutputBuffer { public: OutputBuffer(char *StartBuf, size_t Size) - : Buffer(StartBuf), CurrentPosition(0), BufferCapacity(Size) {} + : Buffer(StartBuf), BufferCapacity(Size) {} + OutputBuffer(char *StartBuf, size_t *SizePtr) + : OutputBuffer(StartBuf, StartBuf ? *SizePtr : 0) {} OutputBuffer() = default; // Non-copyable OutputBuffer(const OutputBuffer &) = delete; @@ -77,12 +79,6 @@ public: operator StringView() const { return StringView(Buffer, CurrentPosition); } - void reset(char *Buffer_, size_t BufferCapacity_) { - CurrentPosition = 0; - Buffer = Buffer_; - BufferCapacity = BufferCapacity_; - } - /// If a ParameterPackExpansion (or similar type) is encountered, the offset /// into the pack that we're currently printing. unsigned CurrentPackIndex = std::numeric_limits::max(); @@ -198,21 +194,6 @@ public: ScopedOverride &operator=(const ScopedOverride &) = delete; }; -inline bool initializeOutputBuffer(char *Buf, size_t *N, OutputBuffer &OB, - size_t InitSize) { - size_t BufferSize; - if (Buf == nullptr) { - Buf = static_cast(std::malloc(InitSize)); - if (Buf == nullptr) - return false; - BufferSize = InitSize; - } else - BufferSize = *N; - - OB.reset(Buf, BufferSize); - return true; -} - DEMANGLE_NAMESPACE_END #endif diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h index ca7e44b948c7..855c56e9df32 100644 --- a/llvm/include/llvm/Demangle/Utility.h +++ b/llvm/include/llvm/Demangle/Utility.h @@ -69,7 +69,9 @@ class OutputBuffer { public: OutputBuffer(char *StartBuf, size_t Size) - : Buffer(StartBuf), CurrentPosition(0), BufferCapacity(Size) {} + : Buffer(StartBuf), BufferCapacity(Size) {} + OutputBuffer(char *StartBuf, size_t *SizePtr) + : OutputBuffer(StartBuf, StartBuf ? *SizePtr : 0) {} OutputBuffer() = default; // Non-copyable OutputBuffer(const OutputBuffer &) = delete; @@ -77,12 +79,6 @@ public: operator StringView() const { return StringView(Buffer, CurrentPosition); } - void reset(char *Buffer_, size_t BufferCapacity_) { - CurrentPosition = 0; - Buffer = Buffer_; - BufferCapacity = BufferCapacity_; - } - /// If a ParameterPackExpansion (or similar type) is encountered, the offset /// into the pack that we're currently printing. unsigned CurrentPackIndex = std::numeric_limits::max(); @@ -198,21 +194,6 @@ public: ScopedOverride &operator=(const ScopedOverride &) = delete; }; -inline bool initializeOutputBuffer(char *Buf, size_t *N, OutputBuffer &OB, - size_t InitSize) { - size_t BufferSize; - if (Buf == nullptr) { - Buf = static_cast(std::malloc(InitSize)); - if (Buf == nullptr) - return false; - BufferSize = InitSize; - } else - BufferSize = *N; - - OB.reset(Buf, BufferSize); - return true; -} - DEMANGLE_NAMESPACE_END #endif diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 7cecd8007087..b747b0f9cc67 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -548,9 +548,6 @@ char *llvm::dlangDemangle(const char *MangledName) { return nullptr; OutputBuffer Demangled; - if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024)) - return nullptr; - if (strcmp(MangledName, "_Dmain") == 0) { Demangled << "D main"; } else { diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp index 1c9209d8f369..9b646ea800aa 100644 --- a/llvm/lib/Demangle/ItaniumDemangle.cpp +++ b/llvm/lib/Demangle/ItaniumDemangle.cpp @@ -375,15 +375,12 @@ char *llvm::itaniumDemangle(const char *MangledName, char *Buf, int InternalStatus = demangle_success; Demangler Parser(MangledName, MangledName + std::strlen(MangledName)); - OutputBuffer OB; - Node *AST = Parser.parse(); if (AST == nullptr) InternalStatus = demangle_invalid_mangled_name; - else if (!initializeOutputBuffer(Buf, N, OB, 1024)) - InternalStatus = demangle_memory_alloc_failure; else { + OutputBuffer OB(Buf, N); assert(Parser.ForwardTemplateRefs.empty()); AST->print(OB); OB += '\0'; @@ -427,9 +424,7 @@ bool ItaniumPartialDemangler::partialDemangle(const char *MangledName) { } static char *printNode(const Node *RootNode, char *Buf, size_t *N) { - OutputBuffer OB; - if (!initializeOutputBuffer(Buf, N, OB, 128)) - return nullptr; + OutputBuffer OB(Buf, N); RootNode->print(OB); OB += '\0'; if (N != nullptr) @@ -472,9 +467,7 @@ char *ItaniumPartialDemangler::getFunctionDeclContextName(char *Buf, return nullptr; const Node *Name = static_cast(RootNode)->getName(); - OutputBuffer OB; - if (!initializeOutputBuffer(Buf, N, OB, 128)) - return nullptr; + OutputBuffer OB(Buf, N); KeepGoingLocalFunction: while (true) { @@ -525,9 +518,7 @@ char *ItaniumPartialDemangler::getFunctionParameters(char *Buf, return nullptr; NodeArray Params = static_cast(RootNode)->getParams(); - OutputBuffer OB; - if (!initializeOutputBuffer(Buf, N, OB, 128)) - return nullptr; + OutputBuffer OB(Buf, N); OB += '('; Params.printWithComma(OB); @@ -543,9 +534,7 @@ char *ItaniumPartialDemangler::getFunctionReturnType( if (!isFunction()) return nullptr; - OutputBuffer OB; - if (!initializeOutputBuffer(Buf, N, OB, 128)) - return nullptr; + OutputBuffer OB(Buf, N); if (const Node *Ret = static_cast(RootNode)->getReturnType()) diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp index b4e98a20f389..c21b0a30105e 100644 --- a/llvm/lib/Demangle/MicrosoftDemangle.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp @@ -246,7 +246,10 @@ demanglePointerCVQualifiers(StringView &MangledName) { StringView Demangler::copyString(StringView Borrowed) { char *Stable = Arena.allocUnalignedBuffer(Borrowed.size()); - std::memcpy(Stable, Borrowed.begin(), Borrowed.size()); + // This is not a micro-optimization, it avoids UB, should Borrowed be an null + // buffer. + if (Borrowed.size()) + std::memcpy(Stable, Borrowed.begin(), Borrowed.size()); return {Stable, Borrowed.size()}; } @@ -970,9 +973,6 @@ void Demangler::memorizeIdentifier(IdentifierNode *Identifier) { // Render this class template name into a string buffer so that we can // memorize it for the purpose of back-referencing. OutputBuffer OB; - if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024)) - // FIXME: Propagate out-of-memory as an error? - std::terminate(); Identifier->output(OB, OF_Default); StringView Owned = copyString(OB); memorizeString(Owned); @@ -1283,11 +1283,6 @@ Demangler::demangleStringLiteral(StringView &MangledName) { EncodedStringLiteralNode *Result = Arena.alloc(); - // Must happen before the first `goto StringLiteralError`. - if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024)) - // FIXME: Propagate out-of-memory as an error? - std::terminate(); - // Prefix indicating the beginning of a string literal if (!MangledName.consumeFront("@_")) goto StringLiteralError; @@ -1446,9 +1441,6 @@ Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) { // Render the parent symbol's name into a buffer. OutputBuffer OB; - if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024)) - // FIXME: Propagate out-of-memory as an error? - std::terminate(); OB << '`'; Scope->output(OB, OF_Default); OB << '\''; @@ -2311,8 +2303,6 @@ void Demangler::dumpBackReferences() { // Create an output stream so we can render each type. OutputBuffer OB; - if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024)) - std::terminate(); for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) { OB.setCurrentPosition(0); @@ -2339,7 +2329,6 @@ char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled, char *Buf, size_t *N, int *Status, MSDemangleFlags Flags) { Demangler D; - OutputBuffer OB; StringView Name{MangledName}; SymbolNode *AST = D.parse(Name); @@ -2364,9 +2353,8 @@ char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled, int InternalStatus = demangle_success; if (D.Error) InternalStatus = demangle_invalid_mangled_name; - else if (!initializeOutputBuffer(Buf, N, OB, 1024)) - InternalStatus = demangle_memory_alloc_failure; else { + OutputBuffer OB(Buf, N); AST->output(OB, OF); OB += '\0'; if (N != nullptr) diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp index 494cdabad41f..975649f28ad2 100644 --- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp +++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp @@ -119,7 +119,6 @@ static void outputCallingConvention(OutputBuffer &OB, CallingConv CC) { std::string Node::toString(OutputFlags Flags) const { OutputBuffer OB; - initializeOutputBuffer(nullptr, nullptr, OB, 1024); this->output(OB, Flags); StringView SV = OB; std::string Owned(SV.begin(), SV.end()); diff --git a/llvm/lib/Demangle/RustDemangle.cpp b/llvm/lib/Demangle/RustDemangle.cpp index 32b10db2a968..8c01155127d8 100644 --- a/llvm/lib/Demangle/RustDemangle.cpp +++ b/llvm/lib/Demangle/RustDemangle.cpp @@ -157,9 +157,6 @@ char *llvm::rustDemangle(const char *MangledName) { return nullptr; Demangler D; - if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) - return nullptr; - if (!D.demangle(Mangled)) { std::free(D.Output.getBuffer()); return nullptr;