From 1e1a3f67ee717ebb71c461e51c5c233a13f25edb Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Thu, 21 May 2020 15:26:35 -0700 Subject: [PATCH] [lld-macho] Ensure reads from nlist_64 structs are aligned when necessary My test refactoring in D80217 seems to have caused yaml2obj to emit unaligned nlist_64 structs, causing ASAN'd lld to be unhappy. I don't think this is an issue with yaml2obj though -- llvm-mc also seems to emit unaligned nlist_64s. This diff makes lld able to safely do aligned reads under ASAN builds while hopefully creating no overhead for regular builds on architectures that support unaligned reads. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D80414 --- lld/MachO/InputFiles.cpp | 15 +++++++------- lld/MachO/InputFiles.h | 4 +++- lld/MachO/MachOStructs.h | 36 +++++++++++++++++++++++++++++++++ lld/MachO/SyntheticSections.cpp | 5 +++-- lld/MachO/Writer.cpp | 2 -- 5 files changed, 50 insertions(+), 12 deletions(-) create mode 100644 lld/MachO/MachOStructs.h diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 50ec84f98080..321e1caacc9b 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -45,6 +45,7 @@ #include "Config.h" #include "ExportTrie.h" #include "InputSection.h" +#include "MachOStructs.h" #include "OutputSection.h" #include "SymbolTable.h" #include "Symbols.h" @@ -211,14 +212,14 @@ void InputFile::parseRelocations(const section_64 &sec, } } -void InputFile::parseSymbols(ArrayRef nList, const char *strtab, - bool subsectionsViaSymbols) { +void InputFile::parseSymbols(ArrayRef nList, + const char *strtab, bool subsectionsViaSymbols) { // resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols // out-of-sequence. symbols.resize(nList.size()); std::vector altEntrySymIdxs; - auto createDefined = [&](const nlist_64 &sym, InputSection *isec, + auto createDefined = [&](const structs::nlist_64 &sym, InputSection *isec, uint32_t value) -> Symbol * { StringRef name = strtab + sym.n_strx; if (sym.n_type & N_EXT) @@ -230,7 +231,7 @@ void InputFile::parseSymbols(ArrayRef nList, const char *strtab, }; for (size_t i = 0, n = nList.size(); i < n; ++i) { - const nlist_64 &sym = nList[i]; + const structs::nlist_64 &sym = nList[i]; // Undefined symbol if (!sym.n_sect) { @@ -289,7 +290,7 @@ void InputFile::parseSymbols(ArrayRef nList, const char *strtab, } for (size_t idx : altEntrySymIdxs) { - const nlist_64 &sym = nList[idx]; + const structs::nlist_64 &sym = nList[idx]; SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr; InputSection *subsec = findContainingSubsection(subsecMap, &off); @@ -311,8 +312,8 @@ ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) { // TODO: Error on missing LC_SYMTAB? if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) { auto *c = reinterpret_cast(cmd); - ArrayRef nList( - reinterpret_cast(buf + c->symoff), c->nsyms); + ArrayRef nList( + reinterpret_cast(buf + c->symoff), c->nsyms); const char *strtab = reinterpret_cast(buf) + c->stroff; bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS; parseSymbols(nList, strtab, subsectionsViaSymbols); diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index 200ec85ea290..ffe0c2ec77f2 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -9,6 +9,8 @@ #ifndef LLD_MACHO_INPUT_FILES_H #define LLD_MACHO_INPUT_FILES_H +#include "MachOStructs.h" + #include "lld/Common/LLVM.h" #include "llvm/ADT/DenseSet.h" #include "llvm/BinaryFormat/MachO.h" @@ -52,7 +54,7 @@ protected: void parseSections(ArrayRef); - void parseSymbols(ArrayRef nList, const char *strtab, + void parseSymbols(ArrayRef nList, const char *strtab, bool subsectionsViaSymbols); void parseRelocations(const llvm::MachO::section_64 &, SubsectionMap &); diff --git a/lld/MachO/MachOStructs.h b/lld/MachO/MachOStructs.h new file mode 100644 index 000000000000..69b50ec23173 --- /dev/null +++ b/lld/MachO/MachOStructs.h @@ -0,0 +1,36 @@ +//===- MachOStructs.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines structures used in the MachO object file format. Note that +// unlike llvm/BinaryFormat/MachO.h, the structs here are defined in terms of +// endian- and alignment-compatibility wrappers. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_MACHO_STRUCTS_H +#define LLD_MACHO_MACHO_STRUCTS_H + +#include "llvm/Support/Endian.h" + +namespace lld { + +namespace structs { + +struct nlist_64 { + llvm::support::ulittle32_t n_strx; + uint8_t n_type; + uint8_t n_sect; + llvm::support::ulittle16_t n_desc; + llvm::support::ulittle64_t n_value; +}; + +} // namespace structs + +} // namespace lld + +#endif diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index 26fa19258d44..af1c18134152 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -10,6 +10,7 @@ #include "Config.h" #include "ExportTrie.h" #include "InputFiles.h" +#include "MachOStructs.h" #include "OutputSegment.h" #include "SymbolTable.h" #include "Symbols.h" @@ -281,7 +282,7 @@ SymtabSection::SymtabSection(StringTableSection &stringTableSection) } size_t SymtabSection::getSize() const { - return symbols.size() * sizeof(nlist_64); + return symbols.size() * sizeof(structs::nlist_64); } void SymtabSection::finalizeContents() { @@ -292,7 +293,7 @@ void SymtabSection::finalizeContents() { } void SymtabSection::writeTo(uint8_t *buf) const { - auto *nList = reinterpret_cast(buf); + auto *nList = reinterpret_cast(buf); for (const SymtabEntry &entry : symbols) { nList->n_strx = entry.strx; // TODO support other symbol types diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index e93cb8064a08..768966103531 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -414,8 +414,6 @@ void Writer::assignAddresses(OutputSegment *seg) { for (auto &p : seg->getSections()) { OutputSection *section = p.second; addr = alignTo(addr, section->align); - // We must align the file offsets too to avoid misaligned writes of - // structs. fileOff = alignTo(fileOff, section->align); section->addr = addr; section->fileOff = fileOff;