[VENTUS][RISCV] Remove redundant code and enable regext insertion pass when verify machineinstrs.

This commit is contained in:
yanming 2023-07-07 17:05:46 +08:00
parent 9899aee134
commit d37e92610d
11 changed files with 49 additions and 24 deletions

View File

@ -143,7 +143,6 @@ Accordingly, after all the building process, you can change directory to `<llvm-
* Emit `barrier` instruction for all stores to local/global memory except sGPR spill.
* Stacks for sGPR spilling and per-thread usage is supported by using RISCV::X2 as warp level stack, RISCV::X4 as per-thread level stack. But the 2 stack size calculation are not yet splitted out, so a lot of stack slots are wasted.
* VentusRegextInsertion pass may generate incorrect register ordering for next instruction, see FIXME in that pass. To avoid breaking def-use chain, we could keep the extended instruction unmodified by removing `Op.setRegIgnoreDUChain()` from the pass, the elf generation pass should ignore the higher bit(>2^5) of the register encoding automatically.
* ~~Pattern match VV and VX optimization. There is only type information in the DAG pattern matching, we can't specify whether to match a DAG to a vop.vv or vop.vx MIR in a tblgen pattern, so a fix pass should be ran after codegen pass~~.
* Opencl kernel api - get_enqueued_local_size, need to support non-uniform workgroup
* `mem_fence` builtin support

View File

@ -477,10 +477,6 @@ public:
///
void setReg(Register Reg);
/// Change the register this operand corresponds to, but breaks def-use chain
///
void setRegIgnoreDUChain(Register Reg);
void setSubReg(unsigned subReg) {
assert(isReg() && "Wrong MachineOperand mutator");
SubReg_TargetFlags = subReg;

View File

@ -75,17 +75,6 @@ void MachineOperand::setReg(Register Reg) {
SmallContents.RegNo = Reg;
}
void MachineOperand::setRegIgnoreDUChain(Register Reg) {
if (getReg() == Reg)
return; // No change.
// Clear the IsRenamable bit to keep it conservatively correct.
IsRenamable = false;
// Otherwise, just change the register, no problem. :)
SmallContents.RegNo = Reg;
}
void MachineOperand::substVirtReg(Register Reg, unsigned SubIdx,
const TargetRegisterInfo &TRI) {
assert(Reg.isVirtual());

View File

@ -54,11 +54,6 @@ bool VentusRegextInsertion::runOnMachineFunction(MachineFunction &MF) {
MF.getSubtarget().getRegisterInfo());
bool Modified = false;
// FIXME: As this expansion pass will break def-use chain, it can not pass
// the MachineVerifierPass.
if (getCGPassBuilderOption().VerifyMachineCode)
return Modified;
for (auto &MBB : MF)
Modified |= runOnMachineBasicBlock(MBB);
return Modified;
@ -104,8 +99,7 @@ bool VentusRegextInsertion::insertRegext(MachineBasicBlock &MBB,
if (hasOverflow) {
DebugLoc DL = MI.getDebugLoc();
// Create instruction to expand register basic offset as imm * 32
BuildMI(MBB, &MI, DL, TII->get(RISCV::REGEXT))
.addReg(RISCV::X0)
BuildMI(MBB, &MI, DL, TII->get(RISCV::REGEXT), RISCV::X0)
.addReg(RISCV::X0)
.addImm(Offsets);
}

View File

@ -9,6 +9,7 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
; VENTUS-NEXT: .cfi_def_cfa_offset 12
; VENTUS-NEXT: addi tp, tp, 4
; VENTUS-NEXT: .cfi_def_cfa_offset 4
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
@ -19,6 +20,7 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vadd.vx v33, v0, zero
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z12get_local_idj
@ -26,6 +28,7 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
; VENTUS-NEXT: lw t1, -4(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vadd.vx v0, v0, t1
; VENTUS-NEXT: vlw12.v v0, 0(v0)
; VENTUS-NEXT: regext zero, zero, 64
; VENTUS-NEXT: vsll.vi v1, v33, 2
; VENTUS-NEXT: lw t0, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vadd.vx v1, v1, t0

View File

@ -11,11 +11,13 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: addi tp, tp, 24
; VENTUS-NEXT: .cfi_def_cfa_offset 24
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: lw t0, 0(a0)
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v33, t0
; VENTUS-NEXT: lui t1, %hi(foo.b)
; VENTUS-NEXT: addi t2, t1, %lo(foo.b)
@ -41,6 +43,7 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: lw t1, 16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vadd.vx v2, v0, t1
; VENTUS-NEXT: vlw12.v v2, 0(v2)
; VENTUS-NEXT: regext zero, zero, 64
; VENTUS-NEXT: vadd.vv v0, v33, v0
; VENTUS-NEXT: vlw12.v v3, 0(v0)
; VENTUS-NEXT: # kill: def $v4 killed $x5
@ -51,6 +54,7 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-NEXT: .LBB0_2: # %if.else
; VENTUS-NEXT: vmv.v.x v1, zero
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: regext zero, zero, 64
; VENTUS-NEXT: vadd.vv v0, v33, v0
; VENTUS-NEXT: vsw12.v v1, 0(v0)
; VENTUS-NEXT: .LBB0_3: # %if.end
@ -255,41 +259,54 @@ define dso_local i32 @stack_space(ptr addrspace(3) nocapture noundef readnone %a
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi tp, tp, 48
; VENTUS-NEXT: .cfi_def_cfa_offset 48
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -48(v32)
; VENTUS-NEXT: li t0, 1
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -44(v32)
; VENTUS-NEXT: li t0, 2
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -40(v32)
; VENTUS-NEXT: li t0, 3
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -36(v32)
; VENTUS-NEXT: li t0, 4
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -32(v32)
; VENTUS-NEXT: li t0, 5
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -28(v32)
; VENTUS-NEXT: li t0, 6
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -24(v32)
; VENTUS-NEXT: li t0, 7
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -20(v32)
; VENTUS-NEXT: li t0, 8
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -16(v32)
; VENTUS-NEXT: li t0, 9
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -12(v32)
; VENTUS-NEXT: li t0, 10
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -8(v32)
; VENTUS-NEXT: li t0, 11
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -4(v32)
; VENTUS-NEXT: vlw12.v v0, 0(v1)
; VENTUS-NEXT: vsll.vi v0, v0, 2

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu < %s \
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=VENTUS %s
define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrspace(1) nocapture noundef readonly %B) {

View File

@ -45,18 +45,23 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
; VENTUS-NEXT: .cfi_def_cfa_offset 4
; VENTUS-NEXT: addi tp, tp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 8
; VENTUS-NEXT: .cfi_offset v33.l, 4
; VENTUS-NEXT: .cfi_offset v34.l, 0
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vadd.vx v33, v1, zero
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vadd.vx v34, v0, zero
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: regext zero, zero, 64
; VENTUS-NEXT: vadd.vv v1, v33, v0
; VENTUS-NEXT: vlw12.v v1, 0(v1)
; VENTUS-NEXT: regext zero, zero, 64
; VENTUS-NEXT: vadd.vv v0, v34, v0
; VENTUS-NEXT: vlw12.v v2, 0(v0)
; VENTUS-NEXT: vadd.vv v1, v2, v1

View File

@ -84,10 +84,13 @@ define dso_local i32 @non_kernel_calling_convention(ptr nocapture noundef readon
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi tp, tp, 28
; VENTUS-NEXT: .cfi_def_cfa_offset 28
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: .cfi_offset v33.l, 4
; VENTUS-NEXT: .cfi_offset v34.l, 0
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v33, -24(v32)
; VENTUS-NEXT: regext zero, zero, 9
; VENTUS-NEXT: vlw.v v34, -28(v32)
; VENTUS-NEXT: vlw12.v v0, 0(v0)
; VENTUS-NEXT: vlw12.v v1, 0(v1)
@ -151,7 +154,9 @@ define dso_local i32 @non_kernel_calling_convention(ptr nocapture noundef readon
; VENTUS-NEXT: vadd.vv v0, v0, v2
; VENTUS-NEXT: vadd.vv v0, v0, v3
; VENTUS-NEXT: vlw12.v v1, 0(v31)
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vlw12.v v2, 0(v34)
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vlw.v v3, 0(v33)
; VENTUS-NEXT: vadd.vv v0, v0, v1
; VENTUS-NEXT: vadd.vv v0, v0, v2
@ -263,20 +268,24 @@ define dso_local i32 @test_add(ptr nocapture noundef readonly %a, ptr nocapture
; VENTUS-NEXT: .cfi_def_cfa_offset 4
; VENTUS-NEXT: addi tp, tp, 8
; VENTUS-NEXT: .cfi_def_cfa_offset 8
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 0
; VENTUS-NEXT: vlw12.v v0, 0(v0)
; VENTUS-NEXT: vadd.vi v0, v0, 1
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -8(v32)
; VENTUS-NEXT: vlw12.v v0, 0(v1)
; VENTUS-NEXT: vadd.vi v0, v0, 2
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v0, -4(v32)
; VENTUS-NEXT: addi t0, tp, -8
; VENTUS-NEXT: addi t1, tp, -4
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: vmv.v.x v1, t1
; VENTUS-NEXT: call add
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vlw.v v1, -8(v32)
; VENTUS-NEXT: vadd.vv v0, v1, v0
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload

View File

@ -14,14 +14,22 @@ define dso_local i32 @printf(ptr noundef %fmt, ...) {
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi tp, tp, 64
; VENTUS-NEXT: .cfi_def_cfa_offset 64
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: li t0, 0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v7, -60(v32)
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v6, -56(v32)
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v5, -52(v32)
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v4, -48(v32)
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v3, -44(v32)
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v2, -40(v32)
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: vsw.v v1, -36(v32)
; VENTUS-NEXT: addi t1, tp, -32
; VENTUS-NEXT: sw t1, -32(tp)

View File

@ -130,17 +130,20 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-NEXT: .cfi_def_cfa_offset 4
; VENTUS-NEXT: addi tp, tp, 4
; VENTUS-NEXT: .cfi_def_cfa_offset 4
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vmv.v.x v32, tp
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 4
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: vmv.v.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: regext zero, zero, 1
; VENTUS-NEXT: vadd.vx v33, v0, zero
; VENTUS-NEXT: li t0, 13
; VENTUS-NEXT: li t1, 14
; VENTUS-NEXT: vmv.v.x v1, t1
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 8
; VENTUS-NEXT: .Lpcrel_hi4:
; VENTUS-NEXT: auipc t1, %pcrel_hi(.LBB2_7)
; VENTUS-NEXT: setrpc zero, t1, %pcrel_lo(.Lpcrel_hi4)
@ -148,6 +151,7 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-NEXT: # %bb.1: # %if.else
; VENTUS-NEXT: li t0, 17
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: regext zero, zero, 64
; VENTUS-NEXT: .Lpcrel_hi5:
; VENTUS-NEXT: auipc t1, %pcrel_hi(.LBB2_7)
; VENTUS-NEXT: setrpc zero, t1, %pcrel_lo(.Lpcrel_hi5)
@ -156,6 +160,7 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
; VENTUS-NEXT: li t0, 1
; VENTUS-NEXT: vmv.v.x v0, t0
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: regext zero, zero, 64
; VENTUS-NEXT: .Lpcrel_hi6:
; VENTUS-NEXT: auipc t1, %pcrel_hi(.LBB2_6)
; VENTUS-NEXT: setrpc zero, t1, %pcrel_lo(.Lpcrel_hi6)