forked from OSchip/llvm-project
[MCDisassembler] Disambiguate Size parameter in tryAddingSymbolicOperand()
MCSymbolizer::tryAddingSymbolicOperand() overloaded the Size parameter to specify either the instruction size or the operand size depending on the architecture. However, for proper symbolic disassembly on X86, we need to know both sizes, as an instruction can have two operands, and the instruction size cannot be reliably calculated based on the operand offset and its size. Hence, split Size into OpSize and InstSize. For X86, the new interface allows to fix a couple of issues: * Correctly adjust the value of PC-relative operands. * Set operand size to zero when the operand is specified implicitly. Differential Revision: https://reviews.llvm.org/D126101
This commit is contained in:
parent
e51a6b7374
commit
bed9efed71
|
@ -38,15 +38,15 @@ typedef void *LLVMDisasmContextRef;
|
||||||
* one operand with symbolic information. To determine the symbolic operand
|
* one operand with symbolic information. To determine the symbolic operand
|
||||||
* information for each operand, the bytes for the specific operand in the
|
* information for each operand, the bytes for the specific operand in the
|
||||||
* instruction are specified by the Offset parameter and its byte widith is the
|
* instruction are specified by the Offset parameter and its byte widith is the
|
||||||
* size parameter. For instructions sets with fixed widths and one symbolic
|
* OpSize parameter. For instructions sets with fixed widths and one symbolic
|
||||||
* operand per instruction, the Offset parameter will be zero and Size parameter
|
* operand per instruction, the Offset parameter will be zero and InstSize
|
||||||
* will be the instruction width. The information is returned in TagBuf and is
|
* parameter will be the instruction width. The information is returned in
|
||||||
* Triple specific with its specific information defined by the value of
|
* TagBuf and is Triple specific with its specific information defined by the
|
||||||
* TagType for that Triple. If symbolic information is returned the function
|
* value of TagType for that Triple. If symbolic information is returned the
|
||||||
* returns 1, otherwise it returns 0.
|
* function * returns 1, otherwise it returns 0.
|
||||||
*/
|
*/
|
||||||
typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC,
|
typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset,
|
||||||
uint64_t Offset, uint64_t Size,
|
uint64_t OpSize, uint64_t InstSize,
|
||||||
int TagType, void *TagBuf);
|
int TagType, void *TagBuf);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -181,10 +181,9 @@ protected:
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Helpers around MCSymbolizer
|
// Helpers around MCSymbolizer
|
||||||
bool tryAddingSymbolicOperand(MCInst &Inst,
|
bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
|
||||||
int64_t Value,
|
bool IsBranch, uint64_t Offset, uint64_t OpSize,
|
||||||
uint64_t Address, bool IsBranch,
|
uint64_t InstSize) const;
|
||||||
uint64_t Offset, uint64_t InstSize) const;
|
|
||||||
|
|
||||||
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
|
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,8 @@ public:
|
||||||
|
|
||||||
bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
|
bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
|
||||||
int64_t Value, uint64_t Address, bool IsBranch,
|
int64_t Value, uint64_t Address, bool IsBranch,
|
||||||
uint64_t Offset, uint64_t InstSize) override;
|
uint64_t Offset, uint64_t OpSize,
|
||||||
|
uint64_t InstSize) override;
|
||||||
void tryAddingPcLoadReferenceComment(raw_ostream &CommentStream,
|
void tryAddingPcLoadReferenceComment(raw_ostream &CommentStream,
|
||||||
int64_t Value,
|
int64_t Value,
|
||||||
uint64_t Address) override;
|
uint64_t Address) override;
|
||||||
|
|
|
@ -63,12 +63,13 @@ public:
|
||||||
/// \param Address - Load address of the instruction.
|
/// \param Address - Load address of the instruction.
|
||||||
/// \param IsBranch - Is the instruction a branch?
|
/// \param IsBranch - Is the instruction a branch?
|
||||||
/// \param Offset - Byte offset of the operand inside the inst.
|
/// \param Offset - Byte offset of the operand inside the inst.
|
||||||
|
/// \param OpSize - Size of the operand in bytes.
|
||||||
/// \param InstSize - Size of the instruction in bytes.
|
/// \param InstSize - Size of the instruction in bytes.
|
||||||
/// \return Whether a symbolic operand was added.
|
/// \return Whether a symbolic operand was added.
|
||||||
virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
|
virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
|
||||||
int64_t Value, uint64_t Address,
|
int64_t Value, uint64_t Address,
|
||||||
bool IsBranch, uint64_t Offset,
|
bool IsBranch, uint64_t Offset,
|
||||||
uint64_t InstSize) = 0;
|
uint64_t OpSize, uint64_t InstSize) = 0;
|
||||||
|
|
||||||
/// Try to add a comment on the PC-relative load.
|
/// Try to add a comment on the PC-relative load.
|
||||||
/// For instance, in Mach-O, this is used to add annotations to instructions
|
/// For instance, in Mach-O, this is used to add annotations to instructions
|
||||||
|
|
|
@ -22,11 +22,12 @@ MCDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
|
||||||
|
|
||||||
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
|
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
|
||||||
uint64_t Address, bool IsBranch,
|
uint64_t Address, bool IsBranch,
|
||||||
uint64_t Offset,
|
uint64_t Offset, uint64_t OpSize,
|
||||||
uint64_t InstSize) const {
|
uint64_t InstSize) const {
|
||||||
if (Symbolizer)
|
if (Symbolizer)
|
||||||
return Symbolizer->tryAddingSymbolicOperand(
|
return Symbolizer->tryAddingSymbolicOperand(Inst, *CommentStream, Value,
|
||||||
Inst, *CommentStream, Value, Address, IsBranch, Offset, InstSize);
|
Address, IsBranch, Offset,
|
||||||
|
OpSize, InstSize);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,19 +31,15 @@ class Triple;
|
||||||
// is found an MCExpr is created with that, else an MCExpr with Value is
|
// is found an MCExpr is created with that, else an MCExpr with Value is
|
||||||
// created. This function returns true if it adds an operand to the MCInst and
|
// created. This function returns true if it adds an operand to the MCInst and
|
||||||
// false otherwise.
|
// false otherwise.
|
||||||
bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
|
bool MCExternalSymbolizer::tryAddingSymbolicOperand(
|
||||||
raw_ostream &cStream,
|
MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address,
|
||||||
int64_t Value,
|
bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
|
||||||
uint64_t Address,
|
|
||||||
bool IsBranch,
|
|
||||||
uint64_t Offset,
|
|
||||||
uint64_t InstSize) {
|
|
||||||
struct LLVMOpInfo1 SymbolicOp;
|
struct LLVMOpInfo1 SymbolicOp;
|
||||||
std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
|
std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
|
||||||
SymbolicOp.Value = Value;
|
SymbolicOp.Value = Value;
|
||||||
|
|
||||||
if (!GetOpInfo ||
|
if (!GetOpInfo ||
|
||||||
!GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) {
|
!GetOpInfo(DisInfo, Address, Offset, OpSize, InstSize, 1, &SymbolicOp)) {
|
||||||
// Clear SymbolicOp.Value from above and also all other fields.
|
// Clear SymbolicOp.Value from above and also all other fields.
|
||||||
std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
|
std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
|
||||||
|
|
||||||
|
@ -53,10 +49,10 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
|
||||||
// that always makes sense to guess. But in the case of an immediate it is
|
// that always makes sense to guess. But in the case of an immediate it is
|
||||||
// a bit more questionable if it is an address of a symbol or some other
|
// a bit more questionable if it is an address of a symbol or some other
|
||||||
// reference. So if the immediate Value comes from a width of 1 byte,
|
// reference. So if the immediate Value comes from a width of 1 byte,
|
||||||
// InstSize, we will not guess it is an address of a symbol. Because in
|
// OpSize, we will not guess it is an address of a symbol. Because in
|
||||||
// object files assembled starting at address 0 this usually leads to
|
// object files assembled starting at address 0 this usually leads to
|
||||||
// incorrect symbolication.
|
// incorrect symbolication.
|
||||||
if (!SymbolLookUp || (InstSize == 1 && !IsBranch))
|
if (!SymbolLookUp || (OpSize == 1 && !IsBranch))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
uint64_t ReferenceType;
|
uint64_t ReferenceType;
|
||||||
|
|
|
@ -749,7 +749,7 @@ static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm,
|
||||||
ImmVal |= ~((1LL << 19) - 1);
|
ImmVal |= ~((1LL << 19) - 1);
|
||||||
|
|
||||||
if (!Decoder->tryAddingSymbolicOperand(
|
if (!Decoder->tryAddingSymbolicOperand(
|
||||||
Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 4))
|
Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 0, 4))
|
||||||
Inst.addOperand(MCOperand::createImm(ImmVal));
|
Inst.addOperand(MCOperand::createImm(ImmVal));
|
||||||
return Success;
|
return Success;
|
||||||
}
|
}
|
||||||
|
@ -1030,7 +1030,7 @@ DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr,
|
||||||
}
|
}
|
||||||
|
|
||||||
DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
|
DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
|
||||||
if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4))
|
if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 0, 4))
|
||||||
Inst.addOperand(MCOperand::createImm(offset));
|
Inst.addOperand(MCOperand::createImm(offset));
|
||||||
return Success;
|
return Success;
|
||||||
}
|
}
|
||||||
|
@ -1640,7 +1640,7 @@ static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
|
||||||
imm |= ~((1LL << 21) - 1);
|
imm |= ~((1LL << 21) - 1);
|
||||||
|
|
||||||
DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
|
DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
|
||||||
if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4))
|
if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 0, 4))
|
||||||
Inst.addOperand(MCOperand::createImm(imm));
|
Inst.addOperand(MCOperand::createImm(imm));
|
||||||
|
|
||||||
return Success;
|
return Success;
|
||||||
|
@ -1675,7 +1675,7 @@ static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
|
||||||
DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
|
DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4))
|
if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 0, 4))
|
||||||
Inst.addOperand(MCOperand::createImm(ImmVal));
|
Inst.addOperand(MCOperand::createImm(ImmVal));
|
||||||
Inst.addOperand(MCOperand::createImm(12 * ShifterVal));
|
Inst.addOperand(MCOperand::createImm(12 * ShifterVal));
|
||||||
return Success;
|
return Success;
|
||||||
|
@ -1690,7 +1690,7 @@ static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn,
|
||||||
if (imm & (1 << (26 - 1)))
|
if (imm & (1 << (26 - 1)))
|
||||||
imm |= ~((1LL << 26) - 1);
|
imm |= ~((1LL << 26) - 1);
|
||||||
|
|
||||||
if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4))
|
if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 0, 4))
|
||||||
Inst.addOperand(MCOperand::createImm(imm));
|
Inst.addOperand(MCOperand::createImm(imm));
|
||||||
|
|
||||||
return Success;
|
return Success;
|
||||||
|
@ -1742,7 +1742,7 @@ static DecodeStatus DecodeTestAndBranch(MCInst &Inst, uint32_t insn,
|
||||||
else
|
else
|
||||||
DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
|
DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
|
||||||
Inst.addOperand(MCOperand::createImm(bit));
|
Inst.addOperand(MCOperand::createImm(bit));
|
||||||
if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4))
|
if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 0, 4))
|
||||||
Inst.addOperand(MCOperand::createImm(dst));
|
Inst.addOperand(MCOperand::createImm(dst));
|
||||||
|
|
||||||
return Success;
|
return Success;
|
||||||
|
|
|
@ -60,7 +60,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
|
||||||
/// an operand to the MCInst and Fail otherwise.
|
/// an operand to the MCInst and Fail otherwise.
|
||||||
bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
|
bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
|
||||||
MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
|
MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
|
||||||
bool IsBranch, uint64_t Offset, uint64_t InstSize) {
|
bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
|
||||||
if (!SymbolLookUp)
|
if (!SymbolLookUp)
|
||||||
return false;
|
return false;
|
||||||
// FIXME: This method shares a lot of code with
|
// FIXME: This method shares a lot of code with
|
||||||
|
@ -73,8 +73,8 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
|
||||||
SymbolicOp.Value = Value;
|
SymbolicOp.Value = Value;
|
||||||
uint64_t ReferenceType;
|
uint64_t ReferenceType;
|
||||||
const char *ReferenceName;
|
const char *ReferenceName;
|
||||||
if (!GetOpInfo ||
|
if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize,
|
||||||
!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
|
1, &SymbolicOp)) {
|
||||||
if (IsBranch) {
|
if (IsBranch) {
|
||||||
ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
|
ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
|
||||||
const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
|
const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
|
||||||
|
|
|
@ -29,7 +29,8 @@ public:
|
||||||
|
|
||||||
bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
|
bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
|
||||||
int64_t Value, uint64_t Address, bool IsBranch,
|
int64_t Value, uint64_t Address, bool IsBranch,
|
||||||
uint64_t Offset, uint64_t InstSize) override;
|
uint64_t Offset, uint64_t OpSize,
|
||||||
|
uint64_t InstSize) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace llvm
|
} // namespace llvm
|
||||||
|
|
|
@ -82,7 +82,7 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
|
||||||
APInt SignedOffset(18, Imm * 4, true);
|
APInt SignedOffset(18, Imm * 4, true);
|
||||||
int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
|
int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
|
||||||
|
|
||||||
if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
|
if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
|
||||||
return MCDisassembler::Success;
|
return MCDisassembler::Success;
|
||||||
return addOperand(Inst, MCOperand::createImm(Imm));
|
return addOperand(Inst, MCOperand::createImm(Imm));
|
||||||
}
|
}
|
||||||
|
@ -1918,10 +1918,10 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
// Try to find symbol name for specified label
|
// Try to find symbol name for specified label
|
||||||
bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
|
bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
|
||||||
raw_ostream &/*cStream*/, int64_t Value,
|
MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
|
||||||
uint64_t /*Address*/, bool IsBranch,
|
uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
|
||||||
uint64_t /*Offset*/, uint64_t /*InstSize*/) {
|
uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
|
||||||
|
|
||||||
if (!IsBranch) {
|
if (!IsBranch) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -217,8 +217,8 @@ public:
|
||||||
: MCSymbolizer(Ctx, std::move(RelInfo)), DisInfo(disInfo) {}
|
: MCSymbolizer(Ctx, std::move(RelInfo)), DisInfo(disInfo) {}
|
||||||
|
|
||||||
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
|
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
|
||||||
int64_t Value, uint64_t Address,
|
int64_t Value, uint64_t Address, bool IsBranch,
|
||||||
bool IsBranch, uint64_t Offset,
|
uint64_t Offset, uint64_t OpSize,
|
||||||
uint64_t InstSize) override;
|
uint64_t InstSize) override;
|
||||||
|
|
||||||
void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
|
void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
|
||||||
|
|
|
@ -181,7 +181,7 @@ static bool DecodeSymbolicOperand(MCInst &Inst, uint64_t Address,
|
||||||
const MCDisassembler *Decoder) {
|
const MCDisassembler *Decoder) {
|
||||||
static const uint64_t AtLeast = 2;
|
static const uint64_t AtLeast = 2;
|
||||||
return (nullptr != Decoder && Decoder->tryAddingSymbolicOperand(
|
return (nullptr != Decoder && Decoder->tryAddingSymbolicOperand(
|
||||||
Inst, Value, Address, true, 0, AtLeast));
|
Inst, Value, Address, true, 0, AtLeast, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DecodeSymbolicOperandOff(MCInst &Inst, uint64_t Address,
|
static void DecodeSymbolicOperandOff(MCInst &Inst, uint64_t Address,
|
||||||
|
|
|
@ -835,7 +835,8 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
|
||||||
const MCDisassembler *Decoder) {
|
const MCDisassembler *Decoder) {
|
||||||
// FIXME: Does it make sense for value to be negative?
|
// FIXME: Does it make sense for value to be negative?
|
||||||
return Decoder->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address,
|
return Decoder->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address,
|
||||||
isBranch, /* Offset */ 0, InstSize);
|
isBranch, /*Offset=*/0, /*OpSize=*/0,
|
||||||
|
InstSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
|
/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
|
||||||
|
|
|
@ -768,7 +768,8 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
|
||||||
Bits = 15;
|
Bits = 15;
|
||||||
uint64_t FullValue = fullValue(Disassembler, MI, SignExtend64(tmp, Bits));
|
uint64_t FullValue = fullValue(Disassembler, MI, SignExtend64(tmp, Bits));
|
||||||
uint32_t Extended = FullValue + Address;
|
uint32_t Extended = FullValue + Address;
|
||||||
if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 4))
|
if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 0,
|
||||||
|
4))
|
||||||
HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
|
HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
|
||||||
return MCDisassembler::Success;
|
return MCDisassembler::Success;
|
||||||
}
|
}
|
||||||
|
|
|
@ -215,7 +215,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool IsBranch,
|
||||||
uint64_t Width, MCInst &MI,
|
uint64_t Width, MCInst &MI,
|
||||||
const MCDisassembler *Decoder) {
|
const MCDisassembler *Decoder) {
|
||||||
return Decoder->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset,
|
return Decoder->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset,
|
||||||
Width);
|
Width, /*InstSize=*/0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static DecodeStatus decodeBranch(MCInst &MI, unsigned Insn, uint64_t Address,
|
static DecodeStatus decodeBranch(MCInst &MI, unsigned Insn, uint64_t Address,
|
||||||
|
|
|
@ -509,7 +509,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
|
||||||
uint64_t Width, MCInst &MI,
|
uint64_t Width, MCInst &MI,
|
||||||
const MCDisassembler *Decoder) {
|
const MCDisassembler *Decoder) {
|
||||||
return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
|
return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
|
||||||
Width);
|
Width, /*InstSize=*/4);
|
||||||
}
|
}
|
||||||
|
|
||||||
static DecodeStatus DecodeCall(MCInst &MI, unsigned insn, uint64_t Address,
|
static DecodeStatus DecodeCall(MCInst &MI, unsigned insn, uint64_t Address,
|
||||||
|
|
|
@ -75,7 +75,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
|
||||||
uint64_t Width, MCInst &MI,
|
uint64_t Width, MCInst &MI,
|
||||||
const MCDisassembler *Decoder) {
|
const MCDisassembler *Decoder) {
|
||||||
return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
|
return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
|
||||||
Width);
|
Width, /*InstSize=*/0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
|
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
|
||||||
|
|
|
@ -1874,8 +1874,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
|
||||||
uint64_t pcrel = 0;
|
uint64_t pcrel = 0;
|
||||||
if (type == TYPE_REL) {
|
if (type == TYPE_REL) {
|
||||||
isBranch = true;
|
isBranch = true;
|
||||||
pcrel = insn.startLocation +
|
pcrel = insn.startLocation + insn.length;
|
||||||
insn.immediateOffset + insn.immediateSize;
|
|
||||||
switch (operand.encoding) {
|
switch (operand.encoding) {
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
@ -1950,9 +1949,9 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Dis->tryAddingSymbolicOperand(mcInst, immediate + pcrel,
|
if (!Dis->tryAddingSymbolicOperand(
|
||||||
insn.startLocation, isBranch,
|
mcInst, immediate + pcrel, insn.startLocation, isBranch,
|
||||||
insn.immediateOffset, insn.immediateSize))
|
insn.immediateOffset, insn.immediateSize, insn.length))
|
||||||
mcInst.addOperand(MCOperand::createImm(immediate));
|
mcInst.addOperand(MCOperand::createImm(immediate));
|
||||||
|
|
||||||
if (type == TYPE_MOFFS) {
|
if (type == TYPE_MOFFS) {
|
||||||
|
@ -2089,8 +2088,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (insn.mode == MODE_64BIT){
|
if (insn.mode == MODE_64BIT){
|
||||||
pcrel = insn.startLocation +
|
pcrel = insn.startLocation + insn.length;
|
||||||
insn.displacementOffset + insn.displacementSize;
|
|
||||||
Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel,
|
Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel,
|
||||||
insn.startLocation +
|
insn.startLocation +
|
||||||
insn.displacementOffset);
|
insn.displacementOffset);
|
||||||
|
@ -2153,9 +2151,13 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
|
||||||
mcInst.addOperand(baseReg);
|
mcInst.addOperand(baseReg);
|
||||||
mcInst.addOperand(scaleAmount);
|
mcInst.addOperand(scaleAmount);
|
||||||
mcInst.addOperand(indexReg);
|
mcInst.addOperand(indexReg);
|
||||||
|
|
||||||
|
const uint8_t dispSize =
|
||||||
|
(insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;
|
||||||
|
|
||||||
if (!Dis->tryAddingSymbolicOperand(
|
if (!Dis->tryAddingSymbolicOperand(
|
||||||
mcInst, insn.displacement + pcrel, insn.startLocation, false,
|
mcInst, insn.displacement + pcrel, insn.startLocation, false,
|
||||||
insn.displacementOffset, insn.displacementSize))
|
insn.displacementOffset, dispSize, insn.length))
|
||||||
mcInst.addOperand(displacement);
|
mcInst.addOperand(displacement);
|
||||||
mcInst.addOperand(segmentReg);
|
mcInst.addOperand(segmentReg);
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -2608,7 +2608,8 @@ struct DisassembleInfo {
|
||||||
// value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic
|
// value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic
|
||||||
// information is returned then this function returns 1 else it returns 0.
|
// information is returned then this function returns 1 else it returns 0.
|
||||||
static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
|
static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
|
||||||
uint64_t Size, int TagType, void *TagBuf) {
|
uint64_t OpSize, uint64_t InstSize, int TagType,
|
||||||
|
void *TagBuf) {
|
||||||
struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo;
|
struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo;
|
||||||
struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf;
|
struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf;
|
||||||
uint64_t value = op_info->Value;
|
uint64_t value = op_info->Value;
|
||||||
|
@ -2625,7 +2626,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
|
||||||
|
|
||||||
unsigned int Arch = info->O->getArch();
|
unsigned int Arch = info->O->getArch();
|
||||||
if (Arch == Triple::x86) {
|
if (Arch == Triple::x86) {
|
||||||
if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
|
if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0)
|
||||||
return 0;
|
return 0;
|
||||||
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
|
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
|
||||||
// TODO:
|
// TODO:
|
||||||
|
@ -2705,7 +2706,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (Arch == Triple::x86_64) {
|
if (Arch == Triple::x86_64) {
|
||||||
if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
|
if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0)
|
||||||
return 0;
|
return 0;
|
||||||
// For non MH_OBJECT types, like MH_KEXT_BUNDLE, Search the external
|
// For non MH_OBJECT types, like MH_KEXT_BUNDLE, Search the external
|
||||||
// relocation entries of a linked image (if any) for an entry that matches
|
// relocation entries of a linked image (if any) for an entry that matches
|
||||||
|
@ -2737,7 +2738,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
|
||||||
// adds the Pc. But for x86_64 external relocation entries the Value
|
// adds the Pc. But for x86_64 external relocation entries the Value
|
||||||
// is the offset from the external symbol.
|
// is the offset from the external symbol.
|
||||||
if (info->O->getAnyRelocationPCRel(RE))
|
if (info->O->getAnyRelocationPCRel(RE))
|
||||||
op_info->Value -= Pc + Offset + Size;
|
op_info->Value -= Pc + InstSize;
|
||||||
const char *name =
|
const char *name =
|
||||||
unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
|
unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
|
||||||
op_info->AddSymbol.Present = 1;
|
op_info->AddSymbol.Present = 1;
|
||||||
|
@ -2775,7 +2776,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
|
||||||
// adds the Pc. But for x86_64 external relocation entries the Value
|
// adds the Pc. But for x86_64 external relocation entries the Value
|
||||||
// is the offset from the external symbol.
|
// is the offset from the external symbol.
|
||||||
if (info->O->getAnyRelocationPCRel(RE))
|
if (info->O->getAnyRelocationPCRel(RE))
|
||||||
op_info->Value -= Pc + Offset + Size;
|
op_info->Value -= Pc + InstSize;
|
||||||
const char *name =
|
const char *name =
|
||||||
unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
|
unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
|
||||||
unsigned Type = info->O->getAnyRelocationType(RE);
|
unsigned Type = info->O->getAnyRelocationType(RE);
|
||||||
|
@ -2803,7 +2804,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (Arch == Triple::arm) {
|
if (Arch == Triple::arm) {
|
||||||
if (Offset != 0 || (Size != 4 && Size != 2))
|
if (Offset != 0 || (InstSize != 4 && InstSize != 2))
|
||||||
return 0;
|
return 0;
|
||||||
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
|
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
|
||||||
// TODO:
|
// TODO:
|
||||||
|
@ -2940,7 +2941,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (Arch == Triple::aarch64) {
|
if (Arch == Triple::aarch64) {
|
||||||
if (Offset != 0 || Size != 4)
|
if (Offset != 0 || InstSize != 4)
|
||||||
return 0;
|
return 0;
|
||||||
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
|
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
|
||||||
// TODO:
|
// TODO:
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
set(LLVM_LINK_COMPONENTS
|
||||||
|
MC
|
||||||
|
MCDisassembler
|
||||||
|
Target
|
||||||
|
X86Desc
|
||||||
|
X86Disassembler
|
||||||
|
X86Info
|
||||||
|
)
|
||||||
|
|
||||||
|
add_llvm_unittest(X86MCTests
|
||||||
|
X86MCDisassemblerTest.cpp
|
||||||
|
)
|
|
@ -0,0 +1,145 @@
|
||||||
|
//===- X86MCDisassemblerTest.cpp - Tests for X86 MCDisassembler -----------===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "llvm/MC/MCAsmInfo.h"
|
||||||
|
#include "llvm/MC/MCContext.h"
|
||||||
|
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
|
||||||
|
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
|
||||||
|
#include "llvm/MC/MCInst.h"
|
||||||
|
#include "llvm/MC/MCRegisterInfo.h"
|
||||||
|
#include "llvm/MC/MCSubtargetInfo.h"
|
||||||
|
#include "llvm/MC/MCTargetOptions.h"
|
||||||
|
#include "llvm/MC/TargetRegistry.h"
|
||||||
|
#include "llvm/Support/TargetSelect.h"
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct Context {
|
||||||
|
const char *TripleName = "x86_64-unknown-elf";
|
||||||
|
std::unique_ptr<MCRegisterInfo> MRI;
|
||||||
|
std::unique_ptr<MCAsmInfo> MAI;
|
||||||
|
std::unique_ptr<MCContext> Ctx;
|
||||||
|
std::unique_ptr<MCSubtargetInfo> STI;
|
||||||
|
std::unique_ptr<MCDisassembler> DisAsm;
|
||||||
|
|
||||||
|
Context() {
|
||||||
|
LLVMInitializeX86TargetInfo();
|
||||||
|
LLVMInitializeX86TargetMC();
|
||||||
|
LLVMInitializeX86Disassembler();
|
||||||
|
|
||||||
|
// If we didn't build x86, do not run the test.
|
||||||
|
std::string Error;
|
||||||
|
const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
|
||||||
|
if (!TheTarget)
|
||||||
|
return;
|
||||||
|
|
||||||
|
MRI.reset(TheTarget->createMCRegInfo(TripleName));
|
||||||
|
MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
|
||||||
|
STI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
|
||||||
|
Ctx = std::make_unique<MCContext>(Triple(TripleName), MAI.get(), MRI.get(),
|
||||||
|
STI.get());
|
||||||
|
|
||||||
|
DisAsm.reset(TheTarget->createMCDisassembler(*STI, *Ctx));
|
||||||
|
}
|
||||||
|
|
||||||
|
operator MCContext &() { return *Ctx; };
|
||||||
|
};
|
||||||
|
|
||||||
|
Context &getContext() {
|
||||||
|
static Context Ctxt;
|
||||||
|
return Ctxt;
|
||||||
|
}
|
||||||
|
|
||||||
|
class X86MCSymbolizerTest : public MCSymbolizer {
|
||||||
|
public:
|
||||||
|
X86MCSymbolizerTest(MCContext &MC) : MCSymbolizer(MC, nullptr) {}
|
||||||
|
~X86MCSymbolizerTest() {}
|
||||||
|
|
||||||
|
struct OpInfo {
|
||||||
|
int64_t Value = 0;
|
||||||
|
uint64_t Offset = 0;
|
||||||
|
uint64_t Size;
|
||||||
|
};
|
||||||
|
std::vector<OpInfo> Operands;
|
||||||
|
uint64_t InstructionSize = 0;
|
||||||
|
|
||||||
|
void reset() {
|
||||||
|
Operands.clear();
|
||||||
|
InstructionSize = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
|
||||||
|
int64_t Value, uint64_t Address, bool IsBranch,
|
||||||
|
uint64_t Offset, uint64_t OpSize,
|
||||||
|
uint64_t InstSize) override {
|
||||||
|
Operands.push_back({Value, Offset, OpSize});
|
||||||
|
InstructionSize = InstSize;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
|
||||||
|
uint64_t Address) override {}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
TEST(X86Disassembler, X86MCSymbolizerTest) {
|
||||||
|
X86MCSymbolizerTest *TestSymbolizer = new X86MCSymbolizerTest(getContext());
|
||||||
|
getContext().DisAsm->setSymbolizer(
|
||||||
|
std::unique_ptr<MCSymbolizer>(TestSymbolizer));
|
||||||
|
|
||||||
|
MCDisassembler::DecodeStatus Status;
|
||||||
|
MCInst Inst;
|
||||||
|
uint64_t InstSize;
|
||||||
|
|
||||||
|
auto checkBytes = [&](ArrayRef<uint8_t> Bytes) {
|
||||||
|
TestSymbolizer->reset();
|
||||||
|
Status =
|
||||||
|
getContext().DisAsm->getInstruction(Inst, InstSize, Bytes, 0, nulls());
|
||||||
|
ASSERT_TRUE(Status == MCDisassembler::Success);
|
||||||
|
EXPECT_EQ(TestSymbolizer->InstructionSize, InstSize);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto checkOperand = [&](size_t OpNo, int64_t Value, uint64_t Offset,
|
||||||
|
uint64_t Size) {
|
||||||
|
ASSERT_TRUE(TestSymbolizer->Operands.size() > OpNo);
|
||||||
|
EXPECT_EQ(TestSymbolizer->Operands[OpNo].Value, Value);
|
||||||
|
EXPECT_EQ(TestSymbolizer->Operands[OpNo].Offset, Offset);
|
||||||
|
EXPECT_EQ(TestSymbolizer->Operands[OpNo].Size, Size);
|
||||||
|
};
|
||||||
|
|
||||||
|
// movq $0x80000, 0x80000
|
||||||
|
checkBytes(
|
||||||
|
{0x48, 0xc7, 0x04, 0x25, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00});
|
||||||
|
checkOperand(0, 0x80000, 4, 4);
|
||||||
|
checkOperand(1, 0x80000, 8, 4);
|
||||||
|
|
||||||
|
// movq $0x2a, 0x123(%rax,%r14,8)
|
||||||
|
checkBytes(
|
||||||
|
{0x4a, 0xc7, 0x84, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00});
|
||||||
|
checkOperand(0, 291, 4, 4);
|
||||||
|
checkOperand(1, 42, 8, 4);
|
||||||
|
|
||||||
|
// movq $0xffffffffffffefe8, -0x1(%rip)
|
||||||
|
// Test that the value of the rip-relative operand is set correctly.
|
||||||
|
// The instruction address is 0 and the size is 12 bytes.
|
||||||
|
checkBytes(
|
||||||
|
{0x48, 0xc7, 0x05, 0xff, 0xff, 0xff, 0xff, 0xe8, 0xef, 0xff, 0xff});
|
||||||
|
checkOperand(0, /*next instr address*/ 11 - /*disp*/ 1, 3, 4);
|
||||||
|
checkOperand(1, 0xffffffffffffefe8, 7, 4);
|
||||||
|
|
||||||
|
// movq $0xfffffffffffffef5, (%r12)
|
||||||
|
// Test that the displacement operand has a size of 0, since it is not
|
||||||
|
// explicitly specified in the instruction.
|
||||||
|
checkBytes({0x49, 0xc7, 0x04, 0x24, 0xf5, 0xfe, 0xff, 0xff});
|
||||||
|
checkOperand(0, 0, 4, 0);
|
||||||
|
checkOperand(1, 0xfffffffffffffef5, 4, 4);
|
||||||
|
}
|
Loading…
Reference in New Issue