[LLDB][RISCV][NFC] Rewrite instruction in algebraic datatype

The old approach (dedicated ExecXXX for each instruction) is not flexible and results in duplicated code when RVC kicks in.

According to the spec, every compressed instruction can be decoded to a non-compressed one. So we can lower compressed instructions to instructions we already had, which requires a decoupling between the decoder and executor.

This patch:
- use llvm::Optional and its combinators AMAP.
- use template constraints on common instruction.
- make instructions strongly-typed (no uint32_t everywhere bc it is error-prone and burdens the developer when lowering the RVC) with the help of algebraic datatype (std::variant).

Note:
(NFC) because this is more of a refactoring in preparation for RVC.

Reviewed By: DavidSpickett

Differential Revision: https://reviews.llvm.org/D135015
This commit is contained in:
Emmmer 2022-09-28 23:04:08 +08:00
parent 11897708c0
commit d0dcbb9b02
4 changed files with 1152 additions and 1146 deletions

View File

@ -9,6 +9,8 @@
#ifndef LLDB_SOURCE_PLUGINS_INSTRUCTION_RISCV_EMULATEINSTRUCTIONRISCV_H
#define LLDB_SOURCE_PLUGINS_INSTRUCTION_RISCV_EMULATEINSTRUCTIONRISCV_H
#include "RISCVInstructions.h"
#include "lldb/Core/EmulateInstruction.h"
#include "lldb/Interpreter/OptionValue.h"
#include "lldb/Utility/Log.h"
@ -17,22 +19,6 @@
namespace lldb_private {
constexpr uint32_t DecodeRD(uint32_t inst) { return (inst & 0xF80) >> 7; }
constexpr uint32_t DecodeRS1(uint32_t inst) { return (inst & 0xF8000) >> 15; }
constexpr uint32_t DecodeRS2(uint32_t inst) { return (inst & 0x1F00000) >> 20; }
class EmulateInstructionRISCV;
struct InstrPattern {
const char *name;
/// Bit mask to check the type of a instruction (B-Type, I-Type, J-Type, etc.)
uint32_t type_mask;
/// Characteristic value after bitwise-and with type_mask.
uint32_t eigen;
bool (*exec)(EmulateInstructionRISCV &emulator, uint32_t inst,
bool ignore_cond);
};
class EmulateInstructionRISCV : public EmulateInstruction {
public:
static llvm::StringRef GetPluginNameStatic() { return "riscv"; }
@ -79,31 +65,36 @@ public:
llvm::Optional<RegisterInfo> GetRegisterInfo(lldb::RegisterKind reg_kind,
uint32_t reg_num) override;
lldb::addr_t ReadPC(bool &success);
llvm::Optional<lldb::addr_t> ReadPC();
bool WritePC(lldb::addr_t pc);
const InstrPattern *Decode(uint32_t inst);
bool DecodeAndExecute(uint32_t inst, bool ignore_cond);
llvm::Optional<DecodeResult> ReadInstructionAt(lldb::addr_t addr);
llvm::Optional<DecodeResult> Decode(uint32_t inst);
bool Execute(DecodeResult inst, bool ignore_cond);
template <typename T>
static std::enable_if_t<std::is_integral_v<T>, T>
ReadMem(EmulateInstructionRISCV &emulator, uint64_t addr, bool *success) {
std::enable_if_t<std::is_integral_v<T>, llvm::Optional<T>>
ReadMem(uint64_t addr) {
EmulateInstructionRISCV::Context ctx;
ctx.type = EmulateInstruction::eContextRegisterLoad;
ctx.SetNoArgs();
return T(emulator.ReadMemoryUnsigned(ctx, addr, sizeof(T), T(), success));
bool success = false;
T result = ReadMemoryUnsigned(ctx, addr, sizeof(T), T(), &success);
if (!success)
return {}; // aka return false
return result;
}
template <typename T>
static bool WriteMem(EmulateInstructionRISCV &emulator, uint64_t addr,
RegisterValue value) {
template <typename T> bool WriteMem(uint64_t addr, uint64_t value) {
EmulateInstructionRISCV::Context ctx;
ctx.type = EmulateInstruction::eContextRegisterStore;
ctx.SetNoArgs();
return emulator.WriteMemoryUnsigned(ctx, addr, value.GetAsUInt64(),
sizeof(T));
return WriteMemoryUnsigned(ctx, addr, value, sizeof(T));
}
private:
/// Last decoded instruction from m_opcode
DecodeResult m_decoded;
};
} // namespace lldb_private

View File

@ -0,0 +1,220 @@
//===-- RISCVInstructions.h -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLDB_SOURCE_PLUGINS_INSTRUCTION_RISCV_RISCVINSTRUCTION_H
#define LLDB_SOURCE_PLUGINS_INSTRUCTION_RISCV_RISCVINSTRUCTION_H
#include <cstdint>
#include <variant>
#include "EmulateInstructionRISCV.h"
#include "llvm/ADT/Optional.h"
namespace lldb_private {
class EmulateInstructionRISCV;
struct Rd {
uint32_t rd;
bool Write(EmulateInstructionRISCV &emulator, uint64_t value);
};
struct Rs {
uint32_t rs;
llvm::Optional<uint64_t> Read(EmulateInstructionRISCV &emulator);
llvm::Optional<int32_t> ReadI32(EmulateInstructionRISCV &emulator);
llvm::Optional<int64_t> ReadI64(EmulateInstructionRISCV &emulator);
llvm::Optional<uint32_t> ReadU32(EmulateInstructionRISCV &emulator);
};
#define I_TYPE_INST(NAME) \
struct NAME { \
Rd rd; \
Rs rs1; \
uint32_t imm; \
}
#define S_TYPE_INST(NAME) \
struct NAME { \
Rs rs1; \
Rs rs2; \
uint32_t imm; \
}
#define U_TYPE_INST(NAME) \
struct NAME { \
Rd rd; \
uint32_t imm; \
}
/// The memory layout are the same in our code.
#define J_TYPE_INST(NAME) U_TYPE_INST(NAME)
#define R_TYPE_INST(NAME) \
struct NAME { \
Rd rd; \
Rs rs1; \
Rs rs2; \
}
#define R_SHAMT_TYPE_INST(NAME) \
struct NAME { \
Rd rd; \
Rs rs1; \
uint32_t shamt; \
}
#define R_RS1_TYPE_INST(NAME) \
struct NAME { \
Rd rd; \
Rs rs1; \
}
// RV32I instructions (The base integer ISA)
struct B {
Rs rs1;
Rs rs2;
uint32_t imm;
uint32_t funct3;
};
U_TYPE_INST(LUI);
U_TYPE_INST(AUIPC);
J_TYPE_INST(JAL);
I_TYPE_INST(JALR);
I_TYPE_INST(LB);
I_TYPE_INST(LH);
I_TYPE_INST(LW);
I_TYPE_INST(LBU);
I_TYPE_INST(LHU);
S_TYPE_INST(SB);
S_TYPE_INST(SH);
S_TYPE_INST(SW);
I_TYPE_INST(ADDI);
I_TYPE_INST(SLTI);
I_TYPE_INST(SLTIU);
I_TYPE_INST(XORI);
I_TYPE_INST(ORI);
I_TYPE_INST(ANDI);
R_TYPE_INST(ADD);
R_TYPE_INST(SUB);
R_TYPE_INST(SLL);
R_TYPE_INST(SLT);
R_TYPE_INST(SLTU);
R_TYPE_INST(XOR);
R_TYPE_INST(SRL);
R_TYPE_INST(SRA);
R_TYPE_INST(OR);
R_TYPE_INST(AND);
// RV64I inst (The base integer ISA)
I_TYPE_INST(LWU);
I_TYPE_INST(LD);
S_TYPE_INST(SD);
R_SHAMT_TYPE_INST(SLLI);
R_SHAMT_TYPE_INST(SRLI);
R_SHAMT_TYPE_INST(SRAI);
I_TYPE_INST(ADDIW);
R_SHAMT_TYPE_INST(SLLIW);
R_SHAMT_TYPE_INST(SRLIW);
R_SHAMT_TYPE_INST(SRAIW);
R_TYPE_INST(ADDW);
R_TYPE_INST(SUBW);
R_TYPE_INST(SLLW);
R_TYPE_INST(SRLW);
R_TYPE_INST(SRAW);
// RV32M inst (The standard integer multiplication and division extension)
R_TYPE_INST(MUL);
R_TYPE_INST(MULH);
R_TYPE_INST(MULHSU);
R_TYPE_INST(MULHU);
R_TYPE_INST(DIV);
R_TYPE_INST(DIVU);
R_TYPE_INST(REM);
R_TYPE_INST(REMU);
// RV64M inst (The standard integer multiplication and division extension)
R_TYPE_INST(MULW);
R_TYPE_INST(DIVW);
R_TYPE_INST(DIVUW);
R_TYPE_INST(REMW);
R_TYPE_INST(REMUW);
// RV32A inst (The standard atomic instruction extension)
R_RS1_TYPE_INST(LR_W);
R_TYPE_INST(SC_W);
R_TYPE_INST(AMOSWAP_W);
R_TYPE_INST(AMOADD_W);
R_TYPE_INST(AMOXOR_W);
R_TYPE_INST(AMOAND_W);
R_TYPE_INST(AMOOR_W);
R_TYPE_INST(AMOMIN_W);
R_TYPE_INST(AMOMAX_W);
R_TYPE_INST(AMOMINU_W);
R_TYPE_INST(AMOMAXU_W);
// RV64A inst (The standard atomic instruction extension)
R_RS1_TYPE_INST(LR_D);
R_TYPE_INST(SC_D);
R_TYPE_INST(AMOSWAP_D);
R_TYPE_INST(AMOADD_D);
R_TYPE_INST(AMOXOR_D);
R_TYPE_INST(AMOAND_D);
R_TYPE_INST(AMOOR_D);
R_TYPE_INST(AMOMIN_D);
R_TYPE_INST(AMOMAX_D);
R_TYPE_INST(AMOMINU_D);
R_TYPE_INST(AMOMAXU_D);
using RISCVInst =
std::variant<LUI, AUIPC, JAL, JALR, B, LB, LH, LW, LBU, LHU, SB, SH, SW,
ADDI, SLTI, SLTIU, XORI, ORI, ANDI, ADD, SUB, SLL, SLT, SLTU,
XOR, SRL, SRA, OR, AND, LWU, LD, SD, SLLI, SRLI, SRAI, ADDIW,
SLLIW, SRLIW, SRAIW, ADDW, SUBW, SLLW, SRLW, SRAW, MUL, MULH,
MULHSU, MULHU, DIV, DIVU, REM, REMU, MULW, DIVW, DIVUW, REMW,
REMUW, LR_W, SC_W, AMOSWAP_W, AMOADD_W, AMOXOR_W, AMOAND_W,
AMOOR_W, AMOMIN_W, AMOMAX_W, AMOMINU_W, AMOMAXU_W, LR_D, SC_D,
AMOSWAP_D, AMOADD_D, AMOXOR_D, AMOAND_D, AMOOR_D, AMOMIN_D,
AMOMAX_D, AMOMINU_D, AMOMAXU_D>;
struct InstrPattern {
const char *name;
/// Bit mask to check the type of a instruction (B-Type, I-Type, J-Type, etc.)
uint32_t type_mask;
/// Characteristic value after bitwise-and with type_mask.
uint32_t eigen;
RISCVInst (*decode)(uint32_t inst);
};
struct DecodeResult {
RISCVInst decoded;
uint32_t inst;
bool is_rvc;
InstrPattern pattern;
};
constexpr uint32_t DecodeRD(uint32_t inst) { return (inst & 0xF80) >> 7; }
constexpr uint32_t DecodeRS1(uint32_t inst) { return (inst & 0xF8000) >> 15; }
constexpr uint32_t DecodeRS2(uint32_t inst) { return (inst & 0x1F00000) >> 20; }
// decode register for RVC
constexpr uint16_t DecodeCR_RD(uint16_t inst) { return DecodeRD(inst); }
constexpr uint16_t DecodeCI_RD(uint16_t inst) { return DecodeRD(inst); }
constexpr uint16_t DecodeCIW_RD(uint16_t inst) { return (inst & 0x1C) >> 2; }
constexpr uint16_t DecodeCL_RD(uint16_t inst) { return DecodeCIW_RD(inst); }
constexpr uint16_t DecodeCA_RD(uint16_t inst) { return (inst & 0x380) >> 7; }
constexpr uint16_t DecodeCB_RD(uint16_t inst) { return DecodeCA_RD(inst); }
constexpr uint16_t DecodeCR_RS1(uint16_t inst) { return DecodeRD(inst); }
constexpr uint16_t DecodeCI_RS1(uint16_t inst) { return DecodeRD(inst); }
constexpr uint16_t DecodeCL_RS1(uint16_t inst) { return DecodeCA_RD(inst); }
constexpr uint16_t DecodeCS_RS1(uint16_t inst) { return DecodeCA_RD(inst); }
constexpr uint16_t DecodeCA_RS1(uint16_t inst) { return DecodeCA_RD(inst); }
constexpr uint16_t DecodeCB_RS1(uint16_t inst) { return DecodeCA_RD(inst); }
constexpr uint16_t DecodeCR_RS2(uint16_t inst) { return (inst & 0x7C) >> 2; }
constexpr uint16_t DecodeCSS_RS2(uint16_t inst) { return DecodeCR_RS2(inst); }
constexpr uint16_t DecodeCS_RS2(uint16_t inst) { return DecodeCIW_RD(inst); }
constexpr uint16_t DecodeCA_RS2(uint16_t inst) { return DecodeCIW_RD(inst); }
} // namespace lldb_private
#endif // LLDB_SOURCE_PLUGINS_INSTRUCTION_RISCV_RISCVINSTRUCTION_H

View File

@ -75,6 +75,12 @@ struct RISCVEmulatorTester : public EmulateInstructionRISCV, testing::Test {
memcpy(tester->memory + addr, dst, length);
return length;
};
bool DecodeAndExecute(uint32_t inst, bool ignore_cond) {
return Decode(inst)
.transform([&](DecodeResult res) { return Execute(res, ignore_cond); })
.value_or(false);
}
};
TEST_F(RISCVEmulatorTester, testJAL) {
@ -84,13 +90,10 @@ TEST_F(RISCVEmulatorTester, testJAL) {
uint32_t inst = 0b11111110100111111111000011101111;
ASSERT_TRUE(DecodeAndExecute(inst, false));
auto x1 = gpr.gpr[1];
bool success = false;
auto pc = ReadPC(success);
ASSERT_TRUE(success);
auto pc = ReadPC();
ASSERT_TRUE(pc.has_value());
ASSERT_EQ(x1, old_pc + 4);
ASSERT_EQ(pc, old_pc + (-6 * 4));
ASSERT_EQ(*pc, old_pc + (-6 * 4));
}
constexpr uint32_t EncodeIType(uint32_t opcode, uint32_t funct3, uint32_t rd,
@ -98,7 +101,7 @@ constexpr uint32_t EncodeIType(uint32_t opcode, uint32_t funct3, uint32_t rd,
return imm << 20 | rs1 << 15 | funct3 << 12 | rd << 7 | opcode;
}
constexpr uint32_t JALR(uint32_t rd, uint32_t rs1, int32_t offset) {
constexpr uint32_t EncodeJALR(uint32_t rd, uint32_t rs1, int32_t offset) {
return EncodeIType(0b1100111, 0, rd, rs1, uint32_t(offset));
}
@ -108,17 +111,14 @@ TEST_F(RISCVEmulatorTester, testJALR) {
WritePC(old_pc);
gpr.gpr[2] = old_x2;
// jalr x1, x2(-255)
uint32_t inst = JALR(1, 2, -255);
uint32_t inst = EncodeJALR(1, 2, -255);
ASSERT_TRUE(DecodeAndExecute(inst, false));
auto x1 = gpr.gpr[1];
bool success = false;
auto pc = ReadPC(success);
ASSERT_TRUE(success);
auto pc = ReadPC();
ASSERT_TRUE(pc.has_value());
ASSERT_EQ(x1, old_pc + 4);
// JALR always zeros the bottom bit of the target address.
ASSERT_EQ(pc, (old_x2 + (-255)) & (~1));
ASSERT_EQ(*pc, (old_x2 + (-255)) & (~1));
}
constexpr uint32_t EncodeBType(uint32_t opcode, uint32_t funct3, uint32_t rs1,
@ -165,10 +165,9 @@ void testBranch(RISCVEmulatorTester *tester, EncoderB encoder, bool branched,
// b<cmp> x1, x2, (-256)
uint32_t inst = encoder(1, 2, -256);
ASSERT_TRUE(tester->DecodeAndExecute(inst, false));
bool success = false;
auto pc = tester->ReadPC(success);
ASSERT_TRUE(success);
ASSERT_EQ(pc, old_pc + (branched ? (-256) : 0));
auto pc = tester->ReadPC();
ASSERT_TRUE(pc.has_value());
ASSERT_EQ(*pc, old_pc + (branched ? (-256) : 0));
}
#define GEN_BRANCH_TEST(name, rs1, rs2_branched, rs2_continued) \
@ -185,9 +184,9 @@ void CheckRD(RISCVEmulatorTester *tester, uint64_t rd, uint64_t value) {
template <typename T>
void CheckMem(RISCVEmulatorTester *tester, uint64_t addr, uint64_t value) {
bool success = false;
ASSERT_EQ(tester->ReadMem<T>(*tester, addr, &success), value);
ASSERT_TRUE(success);
auto mem = tester->ReadMem<T>(addr);
ASSERT_TRUE(mem.has_value());
ASSERT_EQ(*mem, value);
}
using RS1 = uint64_t;
@ -195,13 +194,13 @@ using RS2 = uint64_t;
using PC = uint64_t;
using RDComputer = std::function<uint64_t(RS1, RS2, PC)>;
void TestInst(RISCVEmulatorTester *tester, uint64_t inst, bool has_rs2,
void TestInst(RISCVEmulatorTester *tester, DecodeResult inst, bool has_rs2,
RDComputer rd_val) {
lldb::addr_t old_pc = 0x114514;
tester->WritePC(old_pc);
uint32_t rd = DecodeRD(inst);
uint32_t rs1 = DecodeRS1(inst);
uint32_t rd = DecodeRD(inst.inst);
uint32_t rs1 = DecodeRS1(inst.inst);
uint32_t rs2 = 0;
uint64_t rs1_val = 0x19;
@ -211,7 +210,7 @@ void TestInst(RISCVEmulatorTester *tester, uint64_t inst, bool has_rs2,
tester->gpr.gpr[rs1] = rs1_val;
if (has_rs2) {
rs2 = DecodeRS2(inst);
rs2 = DecodeRS2(inst.inst);
if (rs2) {
if (rs1 == rs2)
rs2_val = rs1_val;
@ -219,7 +218,7 @@ void TestInst(RISCVEmulatorTester *tester, uint64_t inst, bool has_rs2,
}
}
ASSERT_TRUE(tester->DecodeAndExecute(inst, false));
ASSERT_TRUE(tester->Execute(inst, false));
CheckRD(tester, rd, rd_val(rs1_val, rs2 ? rs2_val : 0, old_pc));
}
@ -239,8 +238,7 @@ void TestAtomic(RISCVEmulatorTester *tester, uint64_t inst, T rs1_val,
tester->gpr.gpr[rs2] = rs2_val;
// Write and check rs1_val in atomic_addr
ASSERT_TRUE(
tester->WriteMem<T>(*tester, atomic_addr, RegisterValue(rs1_val)));
ASSERT_TRUE(tester->WriteMem<T>(atomic_addr, rs1_val));
CheckMem<T>(tester, atomic_addr, rs1_val);
ASSERT_TRUE(tester->DecodeAndExecute(inst, false));
@ -295,10 +293,8 @@ TEST_F(RISCVEmulatorTester, TestDecodeAndExcute) {
// RV32M & RV64M Tests
{0x02f787b3, "MUL", true, [](RS1 rs1, RS2 rs2, PC) { return rs1 * rs2; }},
{0x2F797B3, "MULH", true, [](RS1 rs1, RS2 rs2, PC) { return 0; }},
{0x2F7A7B3, "MULHSU", true,
[](RS1 rs1, RS2 rs2, PC) { return 0; }},
{0x2F7B7B3, "MULHU", true,
[](RS1 rs1, RS2 rs2, PC) { return 0; }},
{0x2F7A7B3, "MULHSU", true, [](RS1 rs1, RS2 rs2, PC) { return 0; }},
{0x2F7B7B3, "MULHU", true, [](RS1 rs1, RS2 rs2, PC) { return 0; }},
{0x02f747b3, "DIV", true, [](RS1 rs1, RS2 rs2, PC) { return rs1 / rs2; }},
{0x02f757b3, "DIVU", true,
[](RS1 rs1, RS2 rs2, PC) { return rs1 / rs2; }},
@ -317,11 +313,11 @@ TEST_F(RISCVEmulatorTester, TestDecodeAndExcute) {
[](RS1 rs1, RS2 rs2, PC) { return rs1 % rs2; }},
};
for (auto i : tests) {
const InstrPattern *pattern = this->Decode(i.inst);
ASSERT_TRUE(pattern != nullptr);
std::string name = pattern->name;
auto decode = this->Decode(i.inst);
ASSERT_TRUE(decode.has_value());
std::string name = decode->pattern.name;
ASSERT_EQ(name, i.name);
TestInst(this, i.inst, i.has_rs2, i.rd_val);
TestInst(this, *decode, i.has_rs2, i.rd_val);
}
}