[VENTUS][RISCV] Remove redundant code and enable regext insertion pass when verify machineinstrs.
This commit is contained in:
parent
9899aee134
commit
d37e92610d
|
@ -143,7 +143,6 @@ Accordingly, after all the building process, you can change directory to `<llvm-
|
|||
|
||||
* Emit `barrier` instruction for all stores to local/global memory except sGPR spill.
|
||||
* Stacks for sGPR spilling and per-thread usage is supported by using RISCV::X2 as warp level stack, RISCV::X4 as per-thread level stack. But the 2 stack size calculation are not yet splitted out, so a lot of stack slots are wasted.
|
||||
* VentusRegextInsertion pass may generate incorrect register ordering for next instruction, see FIXME in that pass. To avoid breaking def-use chain, we could keep the extended instruction unmodified by removing `Op.setRegIgnoreDUChain()` from the pass, the elf generation pass should ignore the higher bit(>2^5) of the register encoding automatically.
|
||||
* ~~Pattern match VV and VX optimization. There is only type information in the DAG pattern matching, we can't specify whether to match a DAG to a vop.vv or vop.vx MIR in a tblgen pattern, so a fix pass should be ran after codegen pass~~.
|
||||
* Opencl kernel api - get_enqueued_local_size, need to support non-uniform workgroup
|
||||
* `mem_fence` builtin support
|
||||
|
|
|
@ -477,10 +477,6 @@ public:
|
|||
///
|
||||
void setReg(Register Reg);
|
||||
|
||||
/// Change the register this operand corresponds to, but breaks def-use chain
|
||||
///
|
||||
void setRegIgnoreDUChain(Register Reg);
|
||||
|
||||
void setSubReg(unsigned subReg) {
|
||||
assert(isReg() && "Wrong MachineOperand mutator");
|
||||
SubReg_TargetFlags = subReg;
|
||||
|
|
|
@ -75,17 +75,6 @@ void MachineOperand::setReg(Register Reg) {
|
|||
SmallContents.RegNo = Reg;
|
||||
}
|
||||
|
||||
void MachineOperand::setRegIgnoreDUChain(Register Reg) {
|
||||
if (getReg() == Reg)
|
||||
return; // No change.
|
||||
|
||||
// Clear the IsRenamable bit to keep it conservatively correct.
|
||||
IsRenamable = false;
|
||||
|
||||
// Otherwise, just change the register, no problem. :)
|
||||
SmallContents.RegNo = Reg;
|
||||
}
|
||||
|
||||
void MachineOperand::substVirtReg(Register Reg, unsigned SubIdx,
|
||||
const TargetRegisterInfo &TRI) {
|
||||
assert(Reg.isVirtual());
|
||||
|
|
|
@ -54,11 +54,6 @@ bool VentusRegextInsertion::runOnMachineFunction(MachineFunction &MF) {
|
|||
MF.getSubtarget().getRegisterInfo());
|
||||
bool Modified = false;
|
||||
|
||||
// FIXME: As this expansion pass will break def-use chain, it can not pass
|
||||
// the MachineVerifierPass.
|
||||
if (getCGPassBuilderOption().VerifyMachineCode)
|
||||
return Modified;
|
||||
|
||||
for (auto &MBB : MF)
|
||||
Modified |= runOnMachineBasicBlock(MBB);
|
||||
return Modified;
|
||||
|
@ -104,8 +99,7 @@ bool VentusRegextInsertion::insertRegext(MachineBasicBlock &MBB,
|
|||
if (hasOverflow) {
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
// Create instruction to expand register basic offset as imm * 32
|
||||
BuildMI(MBB, &MI, DL, TII->get(RISCV::REGEXT))
|
||||
.addReg(RISCV::X0)
|
||||
BuildMI(MBB, &MI, DL, TII->get(RISCV::REGEXT), RISCV::X0)
|
||||
.addReg(RISCV::X0)
|
||||
.addImm(Offsets);
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
|
|||
; VENTUS-NEXT: .cfi_def_cfa_offset 12
|
||||
; VENTUS-NEXT: addi tp, tp, 4
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 4
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: sw ra, -12(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 4
|
||||
|
@ -19,6 +20,7 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
|
|||
; VENTUS-NEXT: sw t0, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
; VENTUS-NEXT: call _Z13get_global_idj
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vadd.vx v33, v0, zero
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
; VENTUS-NEXT: call _Z12get_local_idj
|
||||
|
@ -26,6 +28,7 @@ define dso_local ventus_kernel void @func(ptr addrspace(1) nocapture noundef ali
|
|||
; VENTUS-NEXT: lw t1, -4(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: vadd.vx v0, v0, t1
|
||||
; VENTUS-NEXT: vlw12.v v0, 0(v0)
|
||||
; VENTUS-NEXT: regext zero, zero, 64
|
||||
; VENTUS-NEXT: vsll.vi v1, v33, 2
|
||||
; VENTUS-NEXT: lw t0, -8(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: vadd.vx v1, v1, t0
|
||||
|
|
|
@ -11,11 +11,13 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
|||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: addi tp, tp, 24
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 24
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 4
|
||||
; VENTUS-NEXT: .cfi_offset v33.l, 0
|
||||
; VENTUS-NEXT: lw t0, 0(a0)
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v33, t0
|
||||
; VENTUS-NEXT: lui t1, %hi(foo.b)
|
||||
; VENTUS-NEXT: addi t2, t1, %lo(foo.b)
|
||||
|
@ -41,6 +43,7 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
|||
; VENTUS-NEXT: lw t1, 16(sp) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: vadd.vx v2, v0, t1
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v2)
|
||||
; VENTUS-NEXT: regext zero, zero, 64
|
||||
; VENTUS-NEXT: vadd.vv v0, v33, v0
|
||||
; VENTUS-NEXT: vlw12.v v3, 0(v0)
|
||||
; VENTUS-NEXT: # kill: def $v4 killed $x5
|
||||
|
@ -51,6 +54,7 @@ define ventus_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
|||
; VENTUS-NEXT: .LBB0_2: # %if.else
|
||||
; VENTUS-NEXT: vmv.v.x v1, zero
|
||||
; VENTUS-NEXT: vsll.vi v0, v0, 2
|
||||
; VENTUS-NEXT: regext zero, zero, 64
|
||||
; VENTUS-NEXT: vadd.vv v0, v33, v0
|
||||
; VENTUS-NEXT: vsw12.v v1, 0(v0)
|
||||
; VENTUS-NEXT: .LBB0_3: # %if.end
|
||||
|
@ -255,41 +259,54 @@ define dso_local i32 @stack_space(ptr addrspace(3) nocapture noundef readnone %a
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi tp, tp, 48
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 48
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -48(v32)
|
||||
; VENTUS-NEXT: li t0, 1
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -44(v32)
|
||||
; VENTUS-NEXT: li t0, 2
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -40(v32)
|
||||
; VENTUS-NEXT: li t0, 3
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -36(v32)
|
||||
; VENTUS-NEXT: li t0, 4
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -32(v32)
|
||||
; VENTUS-NEXT: li t0, 5
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -28(v32)
|
||||
; VENTUS-NEXT: li t0, 6
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -24(v32)
|
||||
; VENTUS-NEXT: li t0, 7
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -20(v32)
|
||||
; VENTUS-NEXT: li t0, 8
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -16(v32)
|
||||
; VENTUS-NEXT: li t0, 9
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -12(v32)
|
||||
; VENTUS-NEXT: li t0, 10
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -8(v32)
|
||||
; VENTUS-NEXT: li t0, 11
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -4(v32)
|
||||
; VENTUS-NEXT: vlw12.v v0, 0(v1)
|
||||
; VENTUS-NEXT: vsll.vi v0, v0, 2
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu < %s \
|
||||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||
|
||||
define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrspace(1) nocapture noundef readonly %B) {
|
||||
|
|
|
@ -45,18 +45,23 @@ define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrsp
|
|||
; VENTUS-NEXT: .cfi_def_cfa_offset 4
|
||||
; VENTUS-NEXT: addi tp, tp, 8
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 8
|
||||
; VENTUS-NEXT: .cfi_offset v33.l, 4
|
||||
; VENTUS-NEXT: .cfi_offset v34.l, 0
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vadd.vx v33, v1, zero
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vadd.vx v34, v0, zero
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
; VENTUS-NEXT: call _Z13get_global_idj
|
||||
; VENTUS-NEXT: vsll.vi v0, v0, 2
|
||||
; VENTUS-NEXT: regext zero, zero, 64
|
||||
; VENTUS-NEXT: vadd.vv v1, v33, v0
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v1)
|
||||
; VENTUS-NEXT: regext zero, zero, 64
|
||||
; VENTUS-NEXT: vadd.vv v0, v34, v0
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v0)
|
||||
; VENTUS-NEXT: vadd.vv v1, v2, v1
|
||||
|
|
|
@ -84,10 +84,13 @@ define dso_local i32 @non_kernel_calling_convention(ptr nocapture noundef readon
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi tp, tp, 28
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 28
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: .cfi_offset v33.l, 4
|
||||
; VENTUS-NEXT: .cfi_offset v34.l, 0
|
||||
; VENTUS-NEXT: regext zero, zero, 9
|
||||
; VENTUS-NEXT: vlw.v v33, -24(v32)
|
||||
; VENTUS-NEXT: regext zero, zero, 9
|
||||
; VENTUS-NEXT: vlw.v v34, -28(v32)
|
||||
; VENTUS-NEXT: vlw12.v v0, 0(v0)
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v1)
|
||||
|
@ -151,7 +154,9 @@ define dso_local i32 @non_kernel_calling_convention(ptr nocapture noundef readon
|
|||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v3
|
||||
; VENTUS-NEXT: vlw12.v v1, 0(v31)
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vlw12.v v2, 0(v34)
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vlw.v v3, 0(v33)
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v1
|
||||
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
||||
|
@ -263,20 +268,24 @@ define dso_local i32 @test_add(ptr nocapture noundef readonly %a, ptr nocapture
|
|||
; VENTUS-NEXT: .cfi_def_cfa_offset 4
|
||||
; VENTUS-NEXT: addi tp, tp, 8
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 8
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 0
|
||||
; VENTUS-NEXT: vlw12.v v0, 0(v0)
|
||||
; VENTUS-NEXT: vadd.vi v0, v0, 1
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -8(v32)
|
||||
; VENTUS-NEXT: vlw12.v v0, 0(v1)
|
||||
; VENTUS-NEXT: vadd.vi v0, v0, 2
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v0, -4(v32)
|
||||
; VENTUS-NEXT: addi t0, tp, -8
|
||||
; VENTUS-NEXT: addi t1, tp, -4
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: vmv.v.x v1, t1
|
||||
; VENTUS-NEXT: call add
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vlw.v v1, -8(v32)
|
||||
; VENTUS-NEXT: vadd.vv v0, v1, v0
|
||||
; VENTUS-NEXT: lw ra, -4(sp) # 4-byte Folded Reload
|
||||
|
|
|
@ -14,14 +14,22 @@ define dso_local i32 @printf(ptr noundef %fmt, ...) {
|
|||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi tp, tp, 64
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 64
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: li t0, 0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v7, -60(v32)
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v6, -56(v32)
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v5, -52(v32)
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v4, -48(v32)
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v3, -44(v32)
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v2, -40(v32)
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: vsw.v v1, -36(v32)
|
||||
; VENTUS-NEXT: addi t1, tp, -32
|
||||
; VENTUS-NEXT: sw t1, -32(tp)
|
||||
|
|
|
@ -130,17 +130,20 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
|
|||
; VENTUS-NEXT: .cfi_def_cfa_offset 4
|
||||
; VENTUS-NEXT: addi tp, tp, 4
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 4
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vmv.v.x v32, tp
|
||||
; VENTUS-NEXT: sw ra, -4(sp) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset ra, 4
|
||||
; VENTUS-NEXT: .cfi_offset v33.l, 0
|
||||
; VENTUS-NEXT: vmv.v.x v0, zero
|
||||
; VENTUS-NEXT: call _Z13get_global_idj
|
||||
; VENTUS-NEXT: regext zero, zero, 1
|
||||
; VENTUS-NEXT: vadd.vx v33, v0, zero
|
||||
; VENTUS-NEXT: li t0, 13
|
||||
; VENTUS-NEXT: li t1, 14
|
||||
; VENTUS-NEXT: vmv.v.x v1, t1
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 8
|
||||
; VENTUS-NEXT: .Lpcrel_hi4:
|
||||
; VENTUS-NEXT: auipc t1, %pcrel_hi(.LBB2_7)
|
||||
; VENTUS-NEXT: setrpc zero, t1, %pcrel_lo(.Lpcrel_hi4)
|
||||
|
@ -148,6 +151,7 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
|
|||
; VENTUS-NEXT: # %bb.1: # %if.else
|
||||
; VENTUS-NEXT: li t0, 17
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: regext zero, zero, 64
|
||||
; VENTUS-NEXT: .Lpcrel_hi5:
|
||||
; VENTUS-NEXT: auipc t1, %pcrel_hi(.LBB2_7)
|
||||
; VENTUS-NEXT: setrpc zero, t1, %pcrel_lo(.Lpcrel_hi5)
|
||||
|
@ -156,6 +160,7 @@ define dso_local i32 @branch_in_branch(i32 noundef %dim) local_unnamed_addr {
|
|||
; VENTUS-NEXT: li t0, 1
|
||||
; VENTUS-NEXT: vmv.v.x v0, t0
|
||||
; VENTUS-NEXT: call _Z13get_global_idj
|
||||
; VENTUS-NEXT: regext zero, zero, 64
|
||||
; VENTUS-NEXT: .Lpcrel_hi6:
|
||||
; VENTUS-NEXT: auipc t1, %pcrel_hi(.LBB2_6)
|
||||
; VENTUS-NEXT: setrpc zero, t1, %pcrel_lo(.Lpcrel_hi6)
|
||||
|
|
Loading…
Reference in New Issue