Fix bug in addressing space mapping
This commit is contained in:
parent
ac76b82de6
commit
0b43b70327
|
@ -28,8 +28,8 @@ class RISCVTargetInfo : public TargetInfo {
|
|||
enum AddrSpace {
|
||||
Generic = 0,
|
||||
Global = 1,
|
||||
Constant = 1,
|
||||
Local = 3,
|
||||
Constant = 4,
|
||||
Private = 5
|
||||
};
|
||||
|
||||
|
|
|
@ -84,13 +84,13 @@ InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
|
|||
namespace RISCVAS {
|
||||
enum : unsigned {
|
||||
// The maximum value for flat, generic, local, private, constant and region.
|
||||
MAX_VENTUS_ADDRESS = 4,
|
||||
MAX_VENTUS_ADDRESS = 5,
|
||||
|
||||
FLAT_ADDRESS = 0, ///< Address space for flat memory.
|
||||
GLOBAL_ADDRESS = 1, ///< Address space for global memory
|
||||
CONSTANT_ADDRESS = 1, ///< Address space for constant memory
|
||||
LOCAL_ADDRESS = 2, ///< Address space for local memory.
|
||||
PRIVATE_ADDRESS = 3, ///< Address space for private memory.
|
||||
LOCAL_ADDRESS = 3, ///< Address space for local memory.
|
||||
PRIVATE_ADDRESS = 5, ///< Address space for private memory.
|
||||
|
||||
// Some places use this if the address space can't be determined.
|
||||
UNKNOWN_ADDRESS_SPACE = ~0u,
|
||||
|
|
|
@ -7308,7 +7308,7 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence(
|
|||
}
|
||||
case ISD::LOAD: {
|
||||
const LoadSDNode *L = cast<LoadSDNode>(N);
|
||||
return L->getAddressingMode() == RISCVAS::PRIVATE_ADDRESS;
|
||||
return L->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS;
|
||||
}
|
||||
case ISD::CALLSEQ_END:
|
||||
return true;
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck -check-prefix=VENTUS %s
|
||||
|
||||
@foo.b = internal addrspace(3) global [5 x i32] undef, align 4
|
||||
|
||||
define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) {
|
||||
; VENTUS-LABEL: foo:
|
||||
; VENTUS: # %bb.0: # %entry
|
||||
; VENTUS-NEXT: addi x2, x2, -48
|
||||
; VENTUS-NEXT: addi x4, x4, -48
|
||||
; VENTUS-NEXT: .cfi_def_cfa_offset 48
|
||||
; VENTUS-NEXT: sw x1, 44(x2) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw x8, 40(x2) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw x9, 36(x2) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: sw x18, 32(x2) # 4-byte Folded Spill
|
||||
; VENTUS-NEXT: .cfi_offset x1, -4
|
||||
; VENTUS-NEXT: .cfi_offset x8, -8
|
||||
; VENTUS-NEXT: .cfi_offset x9, -12
|
||||
; VENTUS-NEXT: .cfi_offset x18, -16
|
||||
; VENTUS-NEXT: lw x8, 0(x10)
|
||||
; VENTUS-NEXT: lui x10, %hi(foo.b)
|
||||
; VENTUS-NEXT: addi x9, x10, %lo(foo.b)
|
||||
; VENTUS-NEXT: addi x18, x4, 12
|
||||
; VENTUS-NEXT: vmv.s.x v0, x18
|
||||
; VENTUS-NEXT: vmv.s.x v1, x9
|
||||
; VENTUS-NEXT: vmv.s.x v2, x8
|
||||
; VENTUS-NEXT: call bar
|
||||
; VENTUS-NEXT: vmv.s.x v0, x0
|
||||
; VENTUS-NEXT: call _Z12get_local_idj
|
||||
; VENTUS-NEXT: vmv.x.s x10, v0
|
||||
; VENTUS-NEXT: li x11, 4
|
||||
; VENTUS-NEXT: vmv.s.x v0, x11
|
||||
; VENTUS-NEXT: vmv.s.x v1, x10
|
||||
; VENTUS-NEXT: vbltu v0, v1, .LBB0_2
|
||||
; VENTUS-NEXT: # %bb.1: # %if.then
|
||||
; VENTUS-NEXT: vmv.s.x v0, x0
|
||||
; VENTUS-NEXT: slli x10, x10, 2
|
||||
; VENTUS-NEXT: add x18, x18, x10
|
||||
; VENTUS-NEXT: add x9, x9, x10
|
||||
; VENTUS-NEXT: lw x11, 0(x9)
|
||||
; VENTUS-NEXT: vluxei32.v v1, (x18), v0
|
||||
; VENTUS-NEXT: vmv.s.x v2, x11
|
||||
; VENTUS-NEXT: add x10, x8, x10
|
||||
; VENTUS-NEXT: lw x11, 0(x10)
|
||||
; VENTUS-NEXT: vmul.vv v1, v2, v1
|
||||
; VENTUS-NEXT: vmv.s.x v2, x11
|
||||
; VENTUS-NEXT: vadd.vv v1, v2, v1
|
||||
; VENTUS-NEXT: vsuxei32.v v1, (x10), v0
|
||||
; VENTUS-NEXT: j .LBB0_3
|
||||
; VENTUS-NEXT: .LBB0_2: # %if.else
|
||||
; VENTUS-NEXT: slli x10, x10, 2
|
||||
; VENTUS-NEXT: add x10, x8, x10
|
||||
; VENTUS-NEXT: sw x0, 0(x10)
|
||||
; VENTUS-NEXT: .LBB0_3: # %if.end
|
||||
; VENTUS-NEXT: lw x1, 44(x2) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw x8, 40(x2) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw x9, 36(x2) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: lw x18, 32(x2) # 4-byte Folded Reload
|
||||
; VENTUS-NEXT: addi x2, x2, 48
|
||||
; VENTUS-NEXT: addi x4, x4, 48
|
||||
; VENTUS-NEXT: ret
|
||||
entry:
|
||||
%a = alloca [5 x i32], align 4, addrspace(5)
|
||||
call void @llvm.lifetime.start.p5(i64 20, ptr addrspace(5) %a)
|
||||
call void @bar(ptr addrspace(5) noundef %a, ptr addrspace(3) noundef @foo.b, ptr addrspace(1) noundef %out)
|
||||
%call = call i32 @_Z12get_local_idj(i32 noundef 0)
|
||||
%cmp = icmp ult i32 %call, 5
|
||||
br i1 %cmp, label %if.then, label %if.else
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%arrayidx = getelementptr inbounds [5 x i32], ptr addrspace(5) %a, i32 0, i32 %call
|
||||
%0 = load i32, ptr addrspace(5) %arrayidx, align 4
|
||||
%arrayidx1 = getelementptr inbounds [5 x i32], ptr addrspace(3) @foo.b, i32 0, i32 %call
|
||||
%1 = load i32, ptr addrspace(3) %arrayidx1, align 4
|
||||
%mul = mul nsw i32 %1, %0
|
||||
%arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %call
|
||||
%2 = load i32, ptr addrspace(1) %arrayidx2, align 4
|
||||
%add = add nsw i32 %2, %mul
|
||||
store i32 %add, ptr addrspace(1) %arrayidx2, align 4
|
||||
br label %if.end
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%arrayidx3 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %call
|
||||
store i32 0, ptr addrspace(1) %arrayidx3, align 4
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.else, %if.then
|
||||
call void @llvm.lifetime.end.p5(i64 20, ptr addrspace(5) %a)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
|
||||
declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture)
|
||||
|
||||
; Function Attrs: convergent
|
||||
declare dso_local void @bar(ptr addrspace(5) noundef, ptr addrspace(3) noundef, ptr addrspace(1) noundef)
|
||||
|
||||
; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none)
|
||||
declare dso_local i32 @_Z12get_local_idj(i32 noundef)
|
||||
|
||||
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
|
||||
declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture)
|
Loading…
Reference in New Issue