forked from OSchip/llvm-project
[BOLT] Fix instruction encoding validation
Always use non-symbolizing disassembler for instruction encoding validation as symbols will be treated as undefined/zeros be the encoder and causing byte sequence mismatches. Reviewed By: Amir Differential Revision: https://reviews.llvm.org/D136118
This commit is contained in:
parent
5b773dcd2d
commit
bcc4c90954
|
@ -1236,10 +1236,10 @@ public:
|
|||
return Size;
|
||||
}
|
||||
|
||||
/// Verify that assembling instruction \p Inst results in the same sequence of
|
||||
/// bytes as \p Encoding.
|
||||
bool validateEncoding(const MCInst &Instruction,
|
||||
ArrayRef<uint8_t> Encoding) const;
|
||||
/// Validate that disassembling the \p Sequence of bytes into an instruction
|
||||
/// and assembling the instruction again, results in a byte sequence identical
|
||||
/// to the original one.
|
||||
bool validateInstructionEncoding(ArrayRef<uint8_t> Sequence) const;
|
||||
|
||||
/// Return a function execution count threshold for determining whether
|
||||
/// the function is 'hot'. Consider it hot if count is above the average exec
|
||||
|
|
|
@ -2287,19 +2287,25 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
|
|||
return std::make_pair(HotSize, ColdSize);
|
||||
}
|
||||
|
||||
bool BinaryContext::validateEncoding(const MCInst &Inst,
|
||||
ArrayRef<uint8_t> InputEncoding) const {
|
||||
bool BinaryContext::validateInstructionEncoding(
|
||||
ArrayRef<uint8_t> InputSequence) const {
|
||||
MCInst Inst;
|
||||
uint64_t InstSize;
|
||||
DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
|
||||
assert(InstSize == InputSequence.size() &&
|
||||
"Disassembled instruction size does not match the sequence.");
|
||||
|
||||
SmallString<256> Code;
|
||||
SmallVector<MCFixup, 4> Fixups;
|
||||
raw_svector_ostream VecOS(Code);
|
||||
|
||||
MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
|
||||
auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
|
||||
if (InputEncoding != EncodedData) {
|
||||
auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
|
||||
if (InputSequence != OutputSequence) {
|
||||
if (opts::Verbosity > 1) {
|
||||
errs() << "BOLT-WARNING: mismatched encoding detected\n"
|
||||
<< " input: " << InputEncoding << '\n'
|
||||
<< " output: " << EncodedData << '\n';
|
||||
<< " input: " << InputSequence << '\n'
|
||||
<< " output: " << OutputSequence << '\n';
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -1217,7 +1217,7 @@ bool BinaryFunction::disassemble() {
|
|||
// Check integrity of LLVM assembler/disassembler.
|
||||
if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
|
||||
!BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
|
||||
if (!BC.validateEncoding(Instruction, FunctionData.slice(Offset, Size))) {
|
||||
if (!BC.validateInstructionEncoding(FunctionData.slice(Offset, Size))) {
|
||||
errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
|
||||
<< "function " << *this << " for instruction :\n";
|
||||
BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
|
||||
|
@ -1233,15 +1233,10 @@ bool BinaryFunction::disassemble() {
|
|||
break;
|
||||
}
|
||||
|
||||
// Disassemble again without the symbolizer and check that the disassembly
|
||||
// matches the assembler output.
|
||||
MCInst TempInst;
|
||||
BC.DisAsm->getInstruction(TempInst, Size, FunctionData.slice(Offset),
|
||||
AbsoluteInstrAddr, nulls());
|
||||
if (!BC.validateEncoding(TempInst, FunctionData.slice(Offset, Size))) {
|
||||
if (!BC.validateInstructionEncoding(FunctionData.slice(Offset, Size))) {
|
||||
errs() << "BOLT-WARNING: internal assembler/disassembler error "
|
||||
"detected for AVX512 instruction:\n";
|
||||
BC.printInstruction(errs(), TempInst, AbsoluteInstrAddr);
|
||||
BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
|
||||
errs() << " in function " << *this << '\n';
|
||||
setIgnored();
|
||||
break;
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
# REQUIRES: system-linux
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
|
||||
# RUN: ld.lld %t.o -o %t.exe -q
|
||||
# RUN: llvm-bolt %t.exe --relocs -o %t.out --check-encoding |& FileCheck %s
|
||||
|
||||
.text
|
||||
.globl _start
|
||||
.type _start, %function
|
||||
_start:
|
||||
.cfi_startproc
|
||||
|
||||
## Check that llvm-bolt uses non-symbolizing disassembler while validating
|
||||
## instruction encodings. If symbol "foo" below is symbolized, the encoded
|
||||
## instruction would have a different sequence of bytes from the input
|
||||
## sequence, as "foo" will not have any address assigned at that point.
|
||||
|
||||
movq foo(%rip), %rax
|
||||
# CHECK-NOT: mismatching LLVM encoding detected
|
||||
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size _start, .-_start
|
||||
|
||||
.globl foo
|
||||
.type foo, %function
|
||||
foo:
|
||||
.cfi_startproc
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size foo, .-foo
|
Loading…
Reference in New Issue