Fix bug in addressing space mapping

This commit is contained in:
Aries 2023-01-03 10:45:58 +08:00
parent ac76b82de6
commit 0b43b70327
4 changed files with 108 additions and 5 deletions

View File

@ -28,8 +28,8 @@ class RISCVTargetInfo : public TargetInfo {
enum AddrSpace {
Generic = 0,
Global = 1,
Constant = 1,
Local = 3,
Constant = 4,
Private = 5
};

View File

@ -84,13 +84,13 @@ InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
namespace RISCVAS {
enum : unsigned {
// The maximum value for flat, generic, local, private, constant and region.
MAX_VENTUS_ADDRESS = 4,
MAX_VENTUS_ADDRESS = 5,
FLAT_ADDRESS = 0, ///< Address space for flat memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory
CONSTANT_ADDRESS = 1, ///< Address space for constant memory
LOCAL_ADDRESS = 2, ///< Address space for local memory.
PRIVATE_ADDRESS = 3, ///< Address space for private memory.
LOCAL_ADDRESS = 3, ///< Address space for local memory.
PRIVATE_ADDRESS = 5, ///< Address space for private memory.
// Some places use this if the address space can't be determined.
UNKNOWN_ADDRESS_SPACE = ~0u,

View File

@ -7308,7 +7308,7 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
}
case ISD::LOAD: {
const LoadSDNode *L = cast<LoadSDNode>(N);
return L->getAddressingMode() == RISCVAS::PRIVATE_ADDRESS;
return L->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS;
}
case ISD::CALLSEQ_END:
return true;

View File

@ -0,0 +1,103 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=VENTUS %s
@foo.b = internal addrspace(3) global [5 x i32] undef, align 4
define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
; VENTUS-LABEL: foo:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi x2, x2, -48
; VENTUS-NEXT: addi x4, x4, -48
; VENTUS-NEXT: .cfi_def_cfa_offset 48
; VENTUS-NEXT: sw x1, 44(x2) # 4-byte Folded Spill
; VENTUS-NEXT: sw x8, 40(x2) # 4-byte Folded Spill
; VENTUS-NEXT: sw x9, 36(x2) # 4-byte Folded Spill
; VENTUS-NEXT: sw x18, 32(x2) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset x1, -4
; VENTUS-NEXT: .cfi_offset x8, -8
; VENTUS-NEXT: .cfi_offset x9, -12
; VENTUS-NEXT: .cfi_offset x18, -16
; VENTUS-NEXT: lw x8, 0(x10)
; VENTUS-NEXT: lui x10, %hi(foo.b)
; VENTUS-NEXT: addi x9, x10, %lo(foo.b)
; VENTUS-NEXT: addi x18, x4, 12
; VENTUS-NEXT: vmv.s.x v0, x18
; VENTUS-NEXT: vmv.s.x v1, x9
; VENTUS-NEXT: vmv.s.x v2, x8
; VENTUS-NEXT: call bar
; VENTUS-NEXT: vmv.s.x v0, x0
; VENTUS-NEXT: call _Z12get_local_idj
; VENTUS-NEXT: vmv.x.s x10, v0
; VENTUS-NEXT: li x11, 4
; VENTUS-NEXT: vmv.s.x v0, x11
; VENTUS-NEXT: vmv.s.x v1, x10
; VENTUS-NEXT: vbltu v0, v1, .LBB0_2
; VENTUS-NEXT: # %bb.1: # %if.then
; VENTUS-NEXT: vmv.s.x v0, x0
; VENTUS-NEXT: slli x10, x10, 2
; VENTUS-NEXT: add x18, x18, x10
; VENTUS-NEXT: add x9, x9, x10
; VENTUS-NEXT: lw x11, 0(x9)
; VENTUS-NEXT: vluxei32.v v1, (x18), v0
; VENTUS-NEXT: vmv.s.x v2, x11
; VENTUS-NEXT: add x10, x8, x10
; VENTUS-NEXT: lw x11, 0(x10)
; VENTUS-NEXT: vmul.vv v1, v2, v1
; VENTUS-NEXT: vmv.s.x v2, x11
; VENTUS-NEXT: vadd.vv v1, v2, v1
; VENTUS-NEXT: vsuxei32.v v1, (x10), v0
; VENTUS-NEXT: j .LBB0_3
; VENTUS-NEXT: .LBB0_2: # %if.else
; VENTUS-NEXT: slli x10, x10, 2
; VENTUS-NEXT: add x10, x8, x10
; VENTUS-NEXT: sw x0, 0(x10)
; VENTUS-NEXT: .LBB0_3: # %if.end
; VENTUS-NEXT: lw x1, 44(x2) # 4-byte Folded Reload
; VENTUS-NEXT: lw x8, 40(x2) # 4-byte Folded Reload
; VENTUS-NEXT: lw x9, 36(x2) # 4-byte Folded Reload
; VENTUS-NEXT: lw x18, 32(x2) # 4-byte Folded Reload
; VENTUS-NEXT: addi x2, x2, 48
; VENTUS-NEXT: addi x4, x4, 48
; VENTUS-NEXT: ret
entry:
%a = alloca [5 x i32], align 4, addrspace(5)
call void @llvm.lifetime.start.p5(i64 20, ptr addrspace(5) %a)
call void @bar(ptr addrspace(5) noundef %a, ptr addrspace(3) noundef @foo.b, ptr addrspace(1) noundef %out)
%call = call i32 @_Z12get_local_idj(i32 noundef 0)
%cmp = icmp ult i32 %call, 5
br i1 %cmp, label %if.then, label %if.else
if.then: ; preds = %entry
%arrayidx = getelementptr inbounds [5 x i32], ptr addrspace(5) %a, i32 0, i32 %call
%0 = load i32, ptr addrspace(5) %arrayidx, align 4
%arrayidx1 = getelementptr inbounds [5 x i32], ptr addrspace(3) @foo.b, i32 0, i32 %call
%1 = load i32, ptr addrspace(3) %arrayidx1, align 4
%mul = mul nsw i32 %1, %0
%arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %call
%2 = load i32, ptr addrspace(1) %arrayidx2, align 4
%add = add nsw i32 %2, %mul
store i32 %add, ptr addrspace(1) %arrayidx2, align 4
br label %if.end
if.else: ; preds = %entry
%arrayidx3 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %call
store i32 0, ptr addrspace(1) %arrayidx3, align 4
br label %if.end
if.end: ; preds = %if.else, %if.then
call void @llvm.lifetime.end.p5(i64 20, ptr addrspace(5) %a)
ret void
}
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture)
; Function Attrs: convergent
declare dso_local void @bar(ptr addrspace(5) noundef, ptr addrspace(3) noundef, ptr addrspace(1) noundef)
; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
declare dso_local i32 @_Z12get_local_idj(i32 noundef)
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture)