From b3609c1ccc2ac78fbff6b1b3e162e8d5a632a9f0 Mon Sep 17 00:00:00 2001 From: Aries Wu Date: Tue, 17 Jan 2023 18:17:55 +0800 Subject: [PATCH] Successfully build crt0.o and libworkitem.a --- libclc/CMakeLists.txt | 5 ++ libclc/riscv32/lib/CMakeLists.txt | 14 ++++ .../lib/ventus => libclc/riscv32/lib}/crt0.S | 2 +- .../ventus => libclc/riscv32/lib}/ventus.h | 0 libclc/riscv32/lib/workitem/workitem.S | 67 +++++++++---------- 5 files changed, 53 insertions(+), 35 deletions(-) create mode 100644 libclc/riscv32/lib/CMakeLists.txt rename {compiler-rt/lib/ventus => libclc/riscv32/lib}/crt0.S (98%) rename {compiler-rt/lib/ventus => libclc/riscv32/lib}/ventus.h (100%) diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 1cac7ccc4100..a2b25a07b9c1 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -194,6 +194,11 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) list( APPEND dirs amdgpu ) endif() + # Build Ventus GPGPU specific runtime and builtins + if( ${ARCH} STREQUAL riscv32 ) + add_subdirectory(riscv32/lib) + endif() + #nvptx is special if( ${ARCH} STREQUAL nvptx OR ${ARCH} STREQUAL nvptx64 ) set( DARCH ptx ) diff --git a/libclc/riscv32/lib/CMakeLists.txt b/libclc/riscv32/lib/CMakeLists.txt new file mode 100644 index 000000000000..fae638167acc --- /dev/null +++ b/libclc/riscv32/lib/CMakeLists.txt @@ -0,0 +1,14 @@ +# Build crt0 and workitem builtin implementation + +project(ventus-builtin VERSION 0.2.0 LANGUAGES C ASM) + +set(CMAKE_ASM_COMPILER clang) +set(CMAKE_ASM_FLAGS ${CMAKE_LLAsm_FLAGS}) + +# workitem builtins +add_library(workitem STATIC workitem/workitem.S) +target_include_directories(workitem PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + + +# crts +add_library(crt0 OBJECT crt0.S) diff --git a/compiler-rt/lib/ventus/crt0.S b/libclc/riscv32/lib/crt0.S similarity index 98% rename from compiler-rt/lib/ventus/crt0.S rename to libclc/riscv32/lib/crt0.S index 8df3d5d64c49..067bad9467ff 100644 --- a/compiler-rt/lib/ventus/crt0.S +++ b/libclc/riscv32/lib/crt0.S @@ -50,5 +50,5 @@ _start: # tail exit # End of warp execution - endprg + endprg x0, x0, x0 .size _start, .-_start diff --git a/compiler-rt/lib/ventus/ventus.h b/libclc/riscv32/lib/ventus.h similarity index 100% rename from compiler-rt/lib/ventus/ventus.h rename to libclc/riscv32/lib/ventus.h diff --git a/libclc/riscv32/lib/workitem/workitem.S b/libclc/riscv32/lib/workitem/workitem.S index f2215716a661..e22b094357c7 100644 --- a/libclc/riscv32/lib/workitem/workitem.S +++ b/libclc/riscv32/lib/workitem/workitem.S @@ -42,6 +42,29 @@ #include "ventus.h" + // Workaround for pocl driver + .type _local_id_x, @object + .section .sdata,"aw",@progbits + .globl _local_id_x + .p2align 2 +_local_id_x: + .word 0 + + .type _local_id_y, @object + .section .sdata,"aw",@progbits + .globl _local_id_y + .p2align 2 +_local_id_y: + .word 0 + + .type _local_id_z, @object + .section .sdata,"aw",@progbits + .globl _local_id_z + .p2align 2 +_local_id_z: + .word 0 + // End workaround for pocl driver + .text .global __builtin_riscv_workitem_linear_id .type __builtin_riscv_workitem_linear_id, @function @@ -51,7 +74,6 @@ __builtin_riscv_workitem_linear_id: vid.v v2 # current thread offset vadd.vx v0, v2, t1 # local_linear_id ret - .size __builtin_riscv_workitem_linear_id .- __builtin_riscv_workitem_linear_id .text @@ -67,8 +89,8 @@ __builtin_riscv_global_linear_id: beq t0, t5, .GLR # Return global_linear_id for 1 dim .GL_2DIM: call __builtin_riscv_global_id_y - lw t6, KL_GL_SIZE_X(a3) # global_size_x - lw t5, KL_GL_SIZE_Y(a3) # global_offset_y + lw t6, KNL_GL_SIZE_X(a3) # global_size_x + lw t5, KNL_GL_SIZE_Y(a3) # global_offset_y vsub.vx v6, v0, t5 # tmp = global_id_y - global_offset_y vmul.vx v6, v6, t6 # tmp = tmp * global_size_x vadd.vv v5, v5, v6 # global_linear_id2 = tmp + global_linear_id1 @@ -76,17 +98,16 @@ __builtin_riscv_global_linear_id: beq t0, t5, .GLR # Return global_linear_id for 2 dim .GL_3DIM: call __builtin_riscv_global_id_z - lw t6, KL_GL_SIZE_X(a3) # global_size_x - lw t7, KL_GL_SIZE_Y(a3) # global_size_y - lw t5, KL_GL_OFFSET_Z(a3) # global_offset_z + lw t6, KNL_GL_SIZE_X(a3) # global_size_x + lw t1, KNL_GL_SIZE_Y(a3) # global_size_y + lw t5, KNL_GL_OFFSET_Z(a3) # global_offset_z vsub.vx v6, v0, t5 # tmp = global_id_z - global_offset_z vmul.vx v6, v6, t6 # tmp = tmp * global_size_x - vmul.vx v6, v6, t7 # tmp = tmp * global_size_y + vmul.vx v6, v6, t1 # tmp = tmp * global_size_y vadd.vv v5, v5, v6 # global_linear_id3 = tmp + global_linear_id2 .GLR: vadd.vx v0, v5, zero # Return global_linear_id for 1/2/3 dims ret - .size __builtin_riscv_global_linear_id .- __builtin_riscv_global_linear_id .text @@ -96,7 +117,6 @@ __builtin_riscv_workgroup_id_x: csrr a0, CSR_GID_X # Read group_id_x vmv.s.x v0, a0 ret - .size __builtin_riscv_workgroup_id_x .- __builtin_riscv_workgroup_id_x .text @@ -106,7 +126,6 @@ __builtin_riscv_workgroup_id_y: csrr a0, CSR_GID_Y # Read group_id_y vmv.s.x v0, a0 ret - .size __builtin_riscv_workgroup_id_y .- __builtin_riscv_workgroup_id_y .text @@ -116,7 +135,6 @@ __builtin_riscv_workgroup_id_z: csrr a0, CSR_GID_Z # Read group_id_z vmv.s.x v0, a0 ret - .size __builtin_riscv_workgroup_id_z .- __builtin_riscv_workgroup_id_z .text @@ -142,7 +160,6 @@ __builtin_riscv_workitem_id_x: vremu.vx v0, v0, t4 # local_id_x = local_liner_id % (local_size_x * local_size_y) .WIXR: ret - .size __builtin_riscv_workitem_id_x .- __builtin_riscv_workitem_id_x .text @@ -164,9 +181,8 @@ __builtin_riscv_workitem_id_y: lw t4, KNL_LC_SIZE_Y(a0) # local_size_y mul t4, t4, t3 # local_size_x * local_size_y vremu.vx v0, v0, t4 # x = local_linear_id % (local_size_x * local_size_y) - vdivu.ux v0, v0, t3 # x / local_size_x + vdivu.vx v0, v0, t3 # x / local_size_x ret - .size __builtin_riscv_workitem_id_y .- __builtin_riscv_workitem_id_y .text @@ -183,7 +199,6 @@ __builtin_riscv_workitem_id_z: vdivu.vx v0, v0, t4 # local_linear_id / (local_size_x * local_size_y) 7: ret - .size __builtin_riscv_workitem_id_z .- __builtin_riscv_workitem_id_z .text @@ -192,7 +207,7 @@ __builtin_riscv_workitem_id_z: __builtin_riscv_global_id_x: csrr a0, CSR_KNL # Get kernel metadata buffer csrr t1, CSR_GID_X # Get group_id_x - sub t1, t1, 1 # group_id_x - 1 + addi t1, t1, -1 # group_id_x - 1 csrr t2, CSR_TID vid.v v2 vadd.vx v2, v2, t2 # workitem_id_x @@ -200,7 +215,6 @@ __builtin_riscv_global_id_x: mul t3, t1, t3 # (CSR_GID_X - 1) * local_size_x vadd.vx v0, v2, t3 # global_id_x ret - .size __builtin_riscv_global_id_x .- __builtin_riscv_global_idx_x .text @@ -209,7 +223,7 @@ __builtin_riscv_global_id_x: __builtin_riscv_global_id_y: csrr a0, CSR_KNL # Get kernel metadata buffer csrr t1, CSR_GID_Y # Get group_id_y - sub t1, t1, 1 # group_id_y - 1 + addi t1, t1, -1 # group_id_y - 1 csrr t2, CSR_TID vid.v v2 vadd.vx v2, v2, t2 # workitem_id_y @@ -217,7 +231,6 @@ __builtin_riscv_global_id_y: mul t3, t1, t3 # (CSR_GID_Y - 1) * local_size_y vadd.vx v0, v2, t3 # global_id_y ret - .size __builtin_riscv_global_id_y .- __builtin_riscv_global_idx_y .text @@ -226,7 +239,7 @@ __builtin_riscv_global_id_y: __builtin_riscv_global_id_z: csrr a0, CSR_KNL # Get kernel metadata buffer csrr t1, CSR_GID_Z # Get group_id_z - sub t1, t1, 1 # group_id_z - 1 + addi t1, t1, -1 # group_id_z - 1 csrr t2, CSR_TID vid.v v2 vadd.vx v2, v2, t2 # workitem_id_z @@ -234,7 +247,6 @@ __builtin_riscv_global_id_z: mul t3, t1, t3 # (CSR_GID_Z - 1) * local_size_z vadd.vx v0, v2, t3 # global_id_z ret - .size __builtin_riscv_global_id_z .- __builtin_riscv_global_idx_z .text @@ -245,7 +257,6 @@ __builtin_riscv_local_size_x: lw t0, KNL_LC_SIZE_X(a0) # Load local_size_x vmv.s.x v0, t0 ret - .size __builtin_riscv_local_size_x, .-__builtin_riscv_local_size_x .text @@ -256,7 +267,6 @@ __builtin_riscv_local_size_y: lw t0, KNL_LC_SIZE_Y(a0) # Load local_size_y vmv.s.x v0, t0 ret - .size __builtin_riscv_local_size_y, .-__builtin_riscv_local_size_y .text @@ -267,7 +277,6 @@ __builtin_riscv_local_size_z: lw t0, KNL_LC_SIZE_Z(a0) # Load local_size_z vmv.s.x v0, t0 ret - .size __builtin_riscv_local_size_z, .-__builtin_riscv_local_size_z .text @@ -278,7 +287,6 @@ __builtin_riscv_global_size_x: lw t0, KNL_GL_SIZE_X(a0) # Get global_size_x vmv.s.x v0, t0 ret - .size __builtin_riscv_global_size_x, .-__builtin_riscv_global_size_x .text @@ -289,7 +297,6 @@ __builtin_riscv_global_size_y: lw t0, KNL_GL_SIZE_Y(a0) # Get global_size_y vmv.s.x v0, t0 ret - .size __builtin_riscv_global_size_y, .-__builtin_riscv_global_size_y .text @@ -300,7 +307,6 @@ __builtin_riscv_global_size_z: lw t0, KNL_GL_SIZE_Z(a0) # Get global_size_z vmv.s.x v0, t0 ret - .size __builtin_riscv_global_size_z, .-__builtin_riscv_global_size_z .text @@ -311,7 +317,6 @@ __builtin_riscv_global_offset_x: lw t0, KNL_GL_OFFSET_X(a0) # Get global_offset_x vmv.s.x v0, t0 ret - .size __builtin_riscv_global_offset_x, .-__builtin_riscv_global_offset_x .text @@ -322,7 +327,6 @@ __builtin_riscv_global_offset_y: lw t0, KNL_GL_OFFSET_Y(a0) # Get global_offset_y vmv.s.x v0, t0 ret - .size __builtin_riscv_global_offset_y, .-__builtin_riscv_global_offset_y .text @@ -333,7 +337,6 @@ __builtin_riscv_global_offset_z: lw t0, KNL_GL_OFFSET_Z(a0) # Get global_offset_z vmv.s.x v0, t0 ret - .size __builtin_riscv_global_offset_z, .-__builtin_riscv_global_offset_z .text @@ -346,7 +349,6 @@ __builtin_riscv_num_groups_x: divu t1, t1, t0 # global_size_x / local_size_x vmv.s.x v0, t1 ret - .size __builtin_riscv_num_groups_x, .-__builtin_riscv_num_groups_x .text @@ -359,7 +361,6 @@ __builtin_riscv_num_groups_y: divu t1, t1, t0 # global_size_y / local_size_y vmv.s.x v0, t1 ret - .size __builtin_riscv_num_groups_y, .-__builtin_riscv_num_groups_y .text @@ -372,7 +373,6 @@ __builtin_riscv_num_groups_z: divu t1, t1, t2 # global_size_z / local_size_z vmv.s.x v0, t1 ret - .size __builtin_riscv_num_groups_z, .-__builtin_riscv_num_groups_z .text @@ -383,4 +383,3 @@ __builtin_riscv_work_dim: lw t0, KNL_WORK_DIM(a0) # Get work_dim vmv.s.x v0, t0 ret - .size __builtin_riscv_work_dim, .-__builtin_riscv_work_dim