From 0b43b7032760f4e9dfe2012b6d297787ce71a152 Mon Sep 17 00:00:00 2001 From: Aries Date: Tue, 3 Jan 2023 10:45:58 +0800 Subject: [PATCH] Fix bug in addressing space mapping --- clang/lib/Basic/Targets/RISCV.h | 2 +- llvm/lib/Target/RISCV/RISCV.h | 6 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- .../CodeGen/RISCV/VentusGPGPU/addr-space2.ll | 103 ++++++++++++++++++ 4 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index 2ea010e878ff..e0cf21e5dd2d 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -28,8 +28,8 @@ class RISCVTargetInfo : public TargetInfo { enum AddrSpace { Generic = 0, Global = 1, + Constant = 1, Local = 3, - Constant = 4, Private = 5 }; diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 3d725d45dfeb..077539cf8951 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -84,13 +84,13 @@ InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &, namespace RISCVAS { enum : unsigned { // The maximum value for flat, generic, local, private, constant and region. - MAX_VENTUS_ADDRESS = 4, + MAX_VENTUS_ADDRESS = 5, FLAT_ADDRESS = 0, ///< Address space for flat memory. GLOBAL_ADDRESS = 1, ///< Address space for global memory CONSTANT_ADDRESS = 1, ///< Address space for constant memory - LOCAL_ADDRESS = 2, ///< Address space for local memory. - PRIVATE_ADDRESS = 3, ///< Address space for private memory. + LOCAL_ADDRESS = 3, ///< Address space for local memory. + PRIVATE_ADDRESS = 5, ///< Address space for private memory. // Some places use this if the address space can't be determined. UNKNOWN_ADDRESS_SPACE = ~0u, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 78faa0c49451..43b3ff6e468c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7308,7 +7308,7 @@ bool RISCVTargetLowering::isSDNodeSourceOfDivergence( } case ISD::LOAD: { const LoadSDNode *L = cast(N); - return L->getAddressingMode() == RISCVAS::PRIVATE_ADDRESS; + return L->getAddressSpace() == RISCVAS::PRIVATE_ADDRESS; } case ISD::CALLSEQ_END: return true; diff --git a/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll new file mode 100644 index 000000000000..8662e0830326 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/VentusGPGPU/addr-space2.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=VENTUS %s + +@foo.b = internal addrspace(3) global [5 x i32] undef, align 4 + +define spir_kernel void @foo(ptr addrspace(1) noundef align 4 %out) { +; VENTUS-LABEL: foo: +; VENTUS: # %bb.0: # %entry +; VENTUS-NEXT: addi x2, x2, -48 +; VENTUS-NEXT: addi x4, x4, -48 +; VENTUS-NEXT: .cfi_def_cfa_offset 48 +; VENTUS-NEXT: sw x1, 44(x2) # 4-byte Folded Spill +; VENTUS-NEXT: sw x8, 40(x2) # 4-byte Folded Spill +; VENTUS-NEXT: sw x9, 36(x2) # 4-byte Folded Spill +; VENTUS-NEXT: sw x18, 32(x2) # 4-byte Folded Spill +; VENTUS-NEXT: .cfi_offset x1, -4 +; VENTUS-NEXT: .cfi_offset x8, -8 +; VENTUS-NEXT: .cfi_offset x9, -12 +; VENTUS-NEXT: .cfi_offset x18, -16 +; VENTUS-NEXT: lw x8, 0(x10) +; VENTUS-NEXT: lui x10, %hi(foo.b) +; VENTUS-NEXT: addi x9, x10, %lo(foo.b) +; VENTUS-NEXT: addi x18, x4, 12 +; VENTUS-NEXT: vmv.s.x v0, x18 +; VENTUS-NEXT: vmv.s.x v1, x9 +; VENTUS-NEXT: vmv.s.x v2, x8 +; VENTUS-NEXT: call bar +; VENTUS-NEXT: vmv.s.x v0, x0 +; VENTUS-NEXT: call _Z12get_local_idj +; VENTUS-NEXT: vmv.x.s x10, v0 +; VENTUS-NEXT: li x11, 4 +; VENTUS-NEXT: vmv.s.x v0, x11 +; VENTUS-NEXT: vmv.s.x v1, x10 +; VENTUS-NEXT: vbltu v0, v1, .LBB0_2 +; VENTUS-NEXT: # %bb.1: # %if.then +; VENTUS-NEXT: vmv.s.x v0, x0 +; VENTUS-NEXT: slli x10, x10, 2 +; VENTUS-NEXT: add x18, x18, x10 +; VENTUS-NEXT: add x9, x9, x10 +; VENTUS-NEXT: lw x11, 0(x9) +; VENTUS-NEXT: vluxei32.v v1, (x18), v0 +; VENTUS-NEXT: vmv.s.x v2, x11 +; VENTUS-NEXT: add x10, x8, x10 +; VENTUS-NEXT: lw x11, 0(x10) +; VENTUS-NEXT: vmul.vv v1, v2, v1 +; VENTUS-NEXT: vmv.s.x v2, x11 +; VENTUS-NEXT: vadd.vv v1, v2, v1 +; VENTUS-NEXT: vsuxei32.v v1, (x10), v0 +; VENTUS-NEXT: j .LBB0_3 +; VENTUS-NEXT: .LBB0_2: # %if.else +; VENTUS-NEXT: slli x10, x10, 2 +; VENTUS-NEXT: add x10, x8, x10 +; VENTUS-NEXT: sw x0, 0(x10) +; VENTUS-NEXT: .LBB0_3: # %if.end +; VENTUS-NEXT: lw x1, 44(x2) # 4-byte Folded Reload +; VENTUS-NEXT: lw x8, 40(x2) # 4-byte Folded Reload +; VENTUS-NEXT: lw x9, 36(x2) # 4-byte Folded Reload +; VENTUS-NEXT: lw x18, 32(x2) # 4-byte Folded Reload +; VENTUS-NEXT: addi x2, x2, 48 +; VENTUS-NEXT: addi x4, x4, 48 +; VENTUS-NEXT: ret +entry: + %a = alloca [5 x i32], align 4, addrspace(5) + call void @llvm.lifetime.start.p5(i64 20, ptr addrspace(5) %a) + call void @bar(ptr addrspace(5) noundef %a, ptr addrspace(3) noundef @foo.b, ptr addrspace(1) noundef %out) + %call = call i32 @_Z12get_local_idj(i32 noundef 0) + %cmp = icmp ult i32 %call, 5 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %arrayidx = getelementptr inbounds [5 x i32], ptr addrspace(5) %a, i32 0, i32 %call + %0 = load i32, ptr addrspace(5) %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], ptr addrspace(3) @foo.b, i32 0, i32 %call + %1 = load i32, ptr addrspace(3) %arrayidx1, align 4 + %mul = mul nsw i32 %1, %0 + %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %call + %2 = load i32, ptr addrspace(1) %arrayidx2, align 4 + %add = add nsw i32 %2, %mul + store i32 %add, ptr addrspace(1) %arrayidx2, align 4 + br label %if.end + +if.else: ; preds = %entry + %arrayidx3 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %call + store i32 0, ptr addrspace(1) %arrayidx3, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + call void @llvm.lifetime.end.p5(i64 20, ptr addrspace(5) %a) + ret void +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture) + +; Function Attrs: convergent +declare dso_local void @bar(ptr addrspace(5) noundef, ptr addrspace(3) noundef, ptr addrspace(1) noundef) + +; Function Attrs: convergent mustprogress nofree nounwind willreturn memory(none) +declare dso_local i32 @_Z12get_local_idj(i32 noundef) + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture)