[lld-macho] Fold __objc_imageinfo sections

Previously, we treated it as a regular ConcatInputSection. However, ld64
actually parses its contents and uses that to synthesize a single image
info struct, generating one 8-byte section instead of `8 * number of
object files with ObjC code`.

I'm not entirely sure what impact this section has on the runtime, so I
just tried to follow ld64's semantics as closely as possible in this
diff. My main motivation though was to reduce binary size.

No significant perf change on chromium_framework on my 16-core Mac Pro:

             base           diff           difference (95% CI)
  sys_time   1.764 ± 0.062  1.748 ± 0.032  [  -2.4% ..   +0.5%]
  user_time  5.112 ± 0.104  5.106 ± 0.046  [  -0.9% ..   +0.7%]
  wall_time  6.111 ± 0.184  6.085 ± 0.076  [  -1.6% ..   +0.8%]
  samples    30             32

Reviewed By: #lld-macho, thakis

Differential Revision: https://reviews.llvm.org/D130125
This commit is contained in:
Jez Ng 2022-07-23 12:11:46 -04:00
parent 676a03d8a5
commit d23da0ec6c
9 changed files with 296 additions and 8 deletions

View File

@ -586,7 +586,7 @@ static void initializeSectionRenameMap() {
section_names::objcCatList,
section_names::objcNonLazyCatList,
section_names::objcProtoList,
section_names::objcImageInfo};
section_names::objCImageInfo};
for (StringRef s : v)
config->sectionRenameMap[{segment_names::data, s}] = {
segment_names::dataConst, s};
@ -1102,6 +1102,8 @@ static void gatherInputSections() {
}
}
}
if (!file->objCImageInfo.empty())
in.objCImageInfo->addFile(file);
}
assert(inputOrder <= UnspecifiedInputOrder);
}

View File

@ -363,6 +363,9 @@ void ObjFile::parseSections(ArrayRef<SectionHeader> sectionHeaders) {
// have the same name without causing duplicate symbol errors. To avoid
// spurious duplicate symbol errors, we do not parse these sections.
// TODO: Evaluate whether the bitcode metadata is needed.
} else if (name == section_names::objCImageInfo &&
segname == segment_names::data) {
objCImageInfo = data;
} else {
if (name == section_names::addrSig)
addrSigSection = sections.back();

View File

@ -120,6 +120,7 @@ public:
std::vector<Symbol *> symbols;
std::vector<Section *> sections;
ArrayRef<uint8_t> objCImageInfo;
// If not empty, this stores the name of the archive containing this file.
// We use this string for creating error messages.

View File

@ -321,7 +321,7 @@ constexpr const char objcCatList[] = "__objc_catlist";
constexpr const char objcClassList[] = "__objc_classlist";
constexpr const char objcClassRefs[] = "__objc_classrefs";
constexpr const char objcConst[] = "__objc_const";
constexpr const char objcImageInfo[] = "__objc_imageinfo";
constexpr const char objCImageInfo[] = "__objc_imageinfo";
constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
constexpr const char objcProtoList[] = "__objc_protolist";

View File

@ -1621,6 +1621,86 @@ void WordLiteralSection::writeTo(uint8_t *buf) const {
memcpy(buf + p.second * 4, &p.first, 4);
}
ObjCImageInfoSection::ObjCImageInfoSection()
: SyntheticSection(segment_names::data, section_names::objCImageInfo) {}
ObjCImageInfoSection::ImageInfo
ObjCImageInfoSection::parseImageInfo(const InputFile *file) {
ImageInfo info;
ArrayRef<uint8_t> data = file->objCImageInfo;
// The image info struct has the following layout:
// struct {
// uint32_t version;
// uint32_t flags;
// };
if (data.size() < 8) {
warn(toString(file) + ": invalid __objc_imageinfo size");
return info;
}
auto *buf = reinterpret_cast<const uint32_t *>(data.data());
if (read32le(buf) != 0) {
warn(toString(file) + ": invalid __objc_imageinfo version");
return info;
}
uint32_t flags = read32le(buf + 1);
info.swiftVersion = (flags >> 8) & 0xff;
info.hasCategoryClassProperties = flags & 0x40;
return info;
}
static std::string swiftVersionString(uint8_t version) {
switch (version) {
case 1:
return "1.0";
case 2:
return "1.1";
case 3:
return "2.0";
case 4:
return "3.0";
case 5:
return "4.0";
default:
return ("0x" + Twine::utohexstr(version)).str();
}
}
// Validate each object file's __objc_imageinfo and use them to generate the
// image info for the output binary. Only two pieces of info are relevant:
// 1. The Swift version (should be identical across inputs)
// 2. `bool hasCategoryClassProperties` (true only if true for all inputs)
void ObjCImageInfoSection::finalizeContents() {
assert(files.size() != 0); // should have already been checked via isNeeded()
info.hasCategoryClassProperties = true;
const InputFile *firstFile;
for (auto file : files) {
ImageInfo inputInfo = parseImageInfo(file);
info.hasCategoryClassProperties &= inputInfo.hasCategoryClassProperties;
if (inputInfo.swiftVersion != 0) {
if (info.swiftVersion != 0 &&
info.swiftVersion != inputInfo.swiftVersion) {
error("Swift version mismatch: " + toString(firstFile) +
" has version " + swiftVersionString(info.swiftVersion) +
" but " + toString(file) + " has version " +
swiftVersionString(inputInfo.swiftVersion));
} else {
info.swiftVersion = inputInfo.swiftVersion;
firstFile = file;
}
}
}
}
void ObjCImageInfoSection::writeTo(uint8_t *buf) const {
uint32_t flags = info.hasCategoryClassProperties ? 0x40 : 0x0;
flags |= info.swiftVersion << 8;
write32le(buf + 4, flags);
}
void macho::createSyntheticSymbols() {
auto addHeaderSymbol = [](const char *name) {
symtab->addSynthetic(name, in.header->isec, /*value=*/0,

View File

@ -19,6 +19,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/MathExtras.h"
@ -600,6 +601,27 @@ private:
std::unordered_map<uint32_t, uint64_t> literal4Map;
};
class ObjCImageInfoSection final : public SyntheticSection {
public:
ObjCImageInfoSection();
bool isNeeded() const override { return !files.empty(); }
uint64_t getSize() const override { return 8; }
void addFile(const InputFile *file) {
assert(!file->objCImageInfo.empty());
files.push_back(file);
}
void finalizeContents();
void writeTo(uint8_t *buf) const override;
private:
struct ImageInfo {
uint8_t swiftVersion = 0;
bool hasCategoryClassProperties = false;
} info;
static ImageInfo parseImageInfo(const InputFile *);
std::vector<const InputFile *> files; // files with image info
};
struct InStruct {
const uint8_t *bufferStart = nullptr;
MachHeaderSection *header = nullptr;
@ -616,6 +638,7 @@ struct InStruct {
StubsSection *stubs = nullptr;
StubHelperSection *stubHelper = nullptr;
UnwindInfoSection *unwindInfo = nullptr;
ObjCImageInfoSection *objCImageInfo = nullptr;
ConcatInputSection *imageLoaderCache = nullptr;
};

View File

@ -1164,6 +1164,10 @@ template <class LP> void Writer::run() {
if (in.stubHelper->isNeeded())
in.stubHelper->setup();
if (in.objCImageInfo->isNeeded())
in.objCImageInfo->finalizeContents();
// At this point, we should know exactly which output sections are needed,
// courtesy of scanSymbols() and scanRelocations().
createOutputSections<LP>();
@ -1210,6 +1214,7 @@ void macho::createSyntheticSections() {
in.stubs = make<StubsSection>();
in.stubHelper = make<StubHelperSection>();
in.unwindInfo = makeUnwindInfoSection();
in.objCImageInfo = make<ObjCImageInfoSection>();
// This section contains space for just a single word, and will be used by
// dyld to cache an address to the image loader it uses.

View File

@ -37,7 +37,6 @@
# NDATA-DAG: __DATA,__objc_catlist __DATA__objc_catlist
# NDATA-DAG: __DATA,__objc_nlcatlist __DATA__objc_nlcatlist
# NDATA-DAG: __DATA,__objc_protolist __DATA__objc_protolist
# NDATA-DAG: __DATA,__objc_imageinfo __DATA__objc_imageinfo
# NDATA-DAG: __DATA,__nl_symbol_ptr __IMPORT__pointers
# YDATA-DAG: __DATA_CONST,__auth_got __DATA__auth_got
@ -52,7 +51,6 @@
# YDATA-DAG: __DATA_CONST,__objc_catlist __DATA__objc_catlist
# YDATA-DAG: __DATA_CONST,__objc_nlcatlist __DATA__objc_nlcatlist
# YDATA-DAG: __DATA_CONST,__objc_protolist __DATA__objc_protolist
# YDATA-DAG: __DATA_CONST,__objc_imageinfo __DATA__objc_imageinfo
# YDATA-DAG: __DATA_CONST,__nl_symbol_ptr __IMPORT__pointers
## LLD doesn't support defining symbols in synthetic sections, so we test them
@ -133,10 +131,14 @@ __DATA__objc_nlcatlist:
__DATA__objc_protolist:
.space 8
.section __DATA,__objc_imageinfo
.global __DATA__objc_imageinfo
__DATA__objc_imageinfo:
.space 8
## __objc_imageinfo should get moved under __DATA_CONST as well, but symbols
## within __objc_imageinfo get dropped during link, so we are cannot test this
## case using the output of `llvm-objdump --syms`. TODO: rewrite test to use
## `llvm-readobj --section-headers`, which will avoid this issue.
# .section __DATA,__objc_imageinfo
# .global __DATA__objc_imageinfo
# __DATA__objc_imageinfo:
# .space 8
.section __IMPORT,__pointers,non_lazy_symbol_pointers
.global __IMPORT__pointers

View File

@ -0,0 +1,172 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t
## ld64 ignores the __objc_imageinfo section entirely if there is no actual
## ObjC class + category data in the file. LLD doesn't yet do this check, but
## to make this test work for both linkers, I am inserting an appropriate class
## definition into each test file.
# RUN: cat %t/no-category-cls.s %t/foo-cls.s > %t/no-category-cls-1.s
# RUN: cat %t/with-category-cls.s %t/foo-cls.s > %t/with-category-cls-1.s
# RUN: cat %t/ignored-flags.s %t/foo-cls.s > %t/ignored-flags-1.s
# RUN: cat %t/invalid-version.s %t/foo-cls.s > %t/invalid-version-1.s
# RUN: cat %t/invalid-size.s %t/foo-cls.s > %t/invalid-size-1.s
# RUN: cat %t/swift-version-1.s %t/foo-cls.s > %t/swift-version-1-1.s
# RUN: cat %t/swift-version-2.s %t/foo-cls.s > %t/swift-version-2-1.s
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-category-cls-1.s -o %t/no-category-cls.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/with-category-cls-1.s -o %t/with-category-cls.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/ignored-flags-1.s -o %t/ignored-flags.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/invalid-version-1.s -o %t/invalid-version.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/swift-version-1-1.s -o %t/swift-version-1.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/swift-version-2-1.s -o %t/swift-version-2.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/invalid-size-1.s -o %t/invalid-size.o
# RUN: %lld -dylib -lSystem %t/with-category-cls.o -o %t/test-with-cat
# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" --syms \
# RUN: %t/test-with-cat | FileCheck %s --check-prefix=HAS-CAT-CLS \
# RUN: --implicit-check-not=_discard_me
# RUN: %lld -dylib -lSystem %t/no-category-cls.o -o %t/test-no-cat
# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" --syms \
# RUN: %t/test-no-cat | FileCheck %s --check-prefix=NO-CAT-CLS \
# RUN: --implicit-check-not=_discard_me
# RUN: %lld -dylib -lSystem %t/no-category-cls.o %t/with-category-cls.o -o %t/test1
# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" %t/test1 \
# RUN: | FileCheck %s --check-prefix=NO-CAT-CLS
# RUN: %lld -dylib -lSystem %t/with-category-cls.o %t/ignored-flags.o -o %t/test2
# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" %t/test2 \
# RUN: | FileCheck %s --check-prefix=HAS-CAT-CLS
# RUN: %lld -dylib -lSystem %t/no-category-cls.o %t/ignored-flags.o -o %t/test3
# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" %t/test3 \
# RUN: | FileCheck %s --check-prefix=NO-CAT-CLS
# RUN: %no-fatal-warnings-lld -dylib -lSystem %t/with-category-cls.o \
# RUN: %t/invalid-version.o -o %t/test4 2>&1 | FileCheck %s \
# RUN: --check-prefix=IMAGE-VERSION
# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" %t/test4 \
# RUN: | FileCheck %s --check-prefix=NO-CAT-CLS
# RUN: %no-fatal-warnings-lld -dylib -lSystem %t/no-category-cls.o \
# RUN: %t/invalid-version.o -o %t/test5 2>&1 | FileCheck %s \
# RUN: --check-prefix=IMAGE-VERSION
# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" %t/test5 \
# RUN: | FileCheck %s --check-prefix=NO-CAT-CLS
# RUN: %no-fatal-warnings-lld -dylib -lSystem %t/with-category-cls.o \
# RUN: %t/invalid-size.o -o %t/test6 2>&1 | FileCheck %s \
# RUN: --check-prefix=INVALID-SIZE
# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" %t/test6 \
# RUN: | FileCheck %s --check-prefix=NO-CAT-CLS
# RUN: not %lld -dylib -lSystem %t/swift-version-1.o %t/swift-version-2.o -o \
# RUN: /dev/null 2>&1 | FileCheck %s --check-prefix=SWIFT-MISMATCH-12
# RUN: not %lld -dylib -lSystem %t/swift-version-2.o %t/swift-version-1.o -o \
# RUN: /dev/null 2>&1 | FileCheck %s --check-prefix=SWIFT-MISMATCH-21
## with-category-cls.o does not have a Swift version (it's set to zero) and
## should be compatible with any Swift version.
# RUN: %lld -dylib -lSystem %t/with-category-cls.o %t/swift-version-1.o -o %t/swift-v1
# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" \
# RUN: %t/swift-v1 | FileCheck %s --check-prefix=SWIFT-V1
# RUN: %lld -dylib -lSystem %t/with-category-cls.o %t/swift-version-2.o -o %t/swift-v2
# RUN: llvm-objdump --macho --section="__DATA_CONST,__objc_imageinfo" \
# RUN: %t/swift-v2 | FileCheck %s --check-prefix=SWIFT-V2
# HAS-CAT-CLS: Contents of (__DATA_CONST,__objc_imageinfo) section
# HAS-CAT-CLS: 00 00 00 40 00 00 00
# HAS-CAT-CLS-EMPTY:
# NO-CAT-CLS: Contents of (__DATA_CONST,__objc_imageinfo) section
# NO-CAT-CLS: 00 00 00 00 00 00 00
# NO-CAT-CLS-EMPTY:
# SWIFT-V1: Contents of (__DATA_CONST,__objc_imageinfo) section
# SWIFT-V1: 00 00 00 40 01 00 00
# SWIFT-V1-EMPTY:
# SWIFT-V2: Contents of (__DATA_CONST,__objc_imageinfo) section
# SWIFT-V2: 00 00 00 40 02 00 00
# SWIFT-V2-EMPTY:
# IMAGE-VERSION: warning: {{.*}}invalid-version.o: invalid __objc_imageinfo version
# INVALID-SIZE: warning: {{.*}}invalid-size.o: invalid __objc_imageinfo size
# SWIFT-MISMATCH-12: error: Swift version mismatch: {{.*}}swift-version-1.o has version 1.0 but {{.*}}swift-version-2.o has version 1.1
# SWIFT-MISMATCH-21: error: Swift version mismatch: {{.*}}swift-version-2.o has version 1.1 but {{.*}}swift-version-1.o has version 1.0
#--- no-category-cls.s
.section __DATA,__objc_imageinfo,regular,no_dead_strip
## ld64 discards any symbols in this section; we follow suit.
_discard_me:
.long 0
.long 0
#--- with-category-cls.s
.section __DATA,__objc_imageinfo,regular,no_dead_strip
_discard_me:
.long 0
.long 0x40 ## "has category class properties" flag
#--- ignored-flags.s
.section __DATA,__objc_imageinfo,regular,no_dead_strip
.long 0
## Only the 0x40 flag is carried through to the output binary.
.long (0x40 | 0x20 | 0x4 | 0x2)
#--- invalid-version.s
.section __DATA,__objc_imageinfo,regular,no_dead_strip
.long 1 ## only 0 is valid; the flag field below will not be parsed.
.long 0x40
#--- invalid-size.s
.section __DATA,__objc_imageinfo
.long 0
#--- swift-version-1.s
.section __DATA,__objc_imageinfo,regular,no_dead_strip
.long 0
.byte 0x40
.byte 0x1 ## Swift version
.short 0
#--- swift-version-2.s
.section __DATA,__objc_imageinfo,regular,no_dead_strip
.long 0
.byte 0x40
.byte 0x2 ## Swift version
.short 0
#--- foo-cls.s
.section __TEXT,__objc_classname,cstring_literals
L_CAT_NAME:
.asciz "barcat"
.section __DATA,__objc_data
.p2align 3
_OBJC_CLASS_$_FooClass:
.space 40
.section __DATA,__objc_const
.p2align 3
__OBJC_$_CATEGORY_INSTANCE_METHODS_FooClass_$_barcat:
.p2align 3
__OBJC_$_CATEGORY_FooClass_$_barcat:
.quad L_CAT_NAME
.quad _OBJC_CLASS_$_FooClass
.quad __OBJC_$_CATEGORY_INSTANCE_METHODS_FooClass_$_barcat
.quad 0
.quad 0
.quad 0
.quad 0
.long 64
.space 4
.section __DATA,__objc_catlist,regular,no_dead_strip
.p2align 3
.quad __OBJC_$_CATEGORY_FooClass_$_barcat