[OpenMP][CMake] Use in-project clang as CUDA->IR compiler.

If available, use the clang that is already built in the same project as CUDA compiler unless another executable is explicitly defined. This also ensures the generated deviceRTL IR will be consistent with the version of Clang. This patch is required to reliably test OpenMP offloading in a buildbot without either a two-stage build (e.g. with LLVM_ENABLE_RUNTIMES) or a separately installed clang on the worker that will eventually become outdated. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D101265
2021-04-30 12:38:55 -05:00 · 2021-04-30 12:38:55 -05:00 · 7308862ff5
parent adf4dc0561
commit 7308862ff5
2 changed files with 47 additions and 6 deletions
--- a/openmp/README.rst
+++ b/openmp/README.rst
@ -263,14 +263,18 @@ Options for ``NVPTX device RTL``
 **LIBOMPTARGET_NVPTX_CUDA_COMPILER** = ``""``
  Location of a CUDA compiler capable of emitting LLVM bitcode. Currently only
  the Clang compiler is supported. This is only used when building the CUDA LLVM
-  bitcode offloading device RTL. If unspecified and the CMake C compiler is
-  Clang, then Clang is used.
+  bitcode offloading device RTL. If unspecified, either the Clang from the build
+  itself is used (i.e. an in-tree build with LLVM_ENABLE_PROJECTS including
+  clang), or the Clang compiler that the build uses as C compiler
+  (CMAKE_C_COMPILER; only if it is Clang). The latter is common for a
+  stage2-build or when using -DLLVM_ENABLE_RUNTIMES=openmp.

 **LIBOMPTARGET_NVPTX_BC_LINKER** = ``""``
  Location of a linker capable of linking LLVM bitcode objects. This is only
-  used when building the CUDA LLVM bitcode offloading device RTL. If unspecified
-  and the CMake C compiler is Clang and there exists a llvm-link binary in the
-  directory containing Clang, then this llvm-link binary is used.
+  used when building the CUDA LLVM bitcode offloading device RTL. If
+  unspecified, either the llvm-link in that same directory as
+  LIBOMPTARGET_NVPTX_CUDA_COMPILER is used, or the llvm-link from the
+  same build (available in an in-tree build).

 **LIBOMPTARGET_NVPTX_ALTERNATE_HOST_COMPILER** = ``""``
  Host compiler to use with NVCC. This compiler is not going to be used to
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@ -30,7 +30,17 @@ set(LIBOMPTARGET_NVPTX_BC_LINKER "" CACHE STRING

 if (NOT LIBOMPTARGET_NVPTX_CUDA_COMPILER STREQUAL "")
  set(cuda_compiler ${LIBOMPTARGET_NVPTX_CUDA_COMPILER})
+elseif (LLVM_TOOL_CLANG_BUILD AND NOT CMAKE_CROSSCOMPILING)
+  # Compile the deviceRTL with the clang that is built in the project.
+  set(cuda_compiler "$<TARGET_FILE:clang>")
 elseif(${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
+  # Compile the device runtime with the compiler that OpenMP is built with.
+  # This is the case with LLVM_ENABLE_RUNTIMES=openmp.
+  # FIXME: This is unreliable; the compiler can be on older version of clang
+  # that does not support compiling CUDA, or only an older version of it. The
+  # risk is especially high on sytems where clang is the default compiler
+  # (MacOS, BSDs). LLVM_ENABLE_RUNTIMES=openmp should itself set
+  # LIBOMPTARGET_NVPTX_CUDA_COMPILER instead.
  set(cuda_compiler ${CMAKE_C_COMPILER})
 else()
  libomptarget_say("Not building NVPTX deviceRTL: clang not found")
@ -44,7 +54,12 @@ set(llvm_link "${compiler_dir}/llvm-link")
 if (NOT LIBOMPTARGET_NVPTX_BC_LINKER STREQUAL "")
  set(bc_linker ${LIBOMPTARGET_NVPTX_BC_LINKER})
 elseif (EXISTS ${llvm_link})
+  # Try to use the linker consistent with the CUDA compiler unless explicitly
+  # set to a different linker.
  set(bc_linker ${llvm_link})
+elseif (NOT OPENMP_STANDALONE_BUILD AND NOT CMAKE_CROSSCOMPILING)
+  # Use the linker also built in the same project.
+  set(bc_linker "$<TARGET_FILE:llvm-link>")
 else()
  libomptarget_say("Not building NVPTX deviceRTL: llvm-link not found")
  return()
@ -113,7 +128,11 @@ endif()
 set(LIBOMPTARGET_NVPTX_DEBUG FALSE CACHE BOOL
  "Activate NVPTX device RTL debug messages.")

-libomptarget_say("Building CUDA LLVM bitcode offloading device RTL.")
+if ("${cuda_compiler}" STREQUAL "$<TARGET_FILE:clang>")
+  libomptarget_say("Building CUDA LLVM bitcode offloading device RTL using in-tree clang.")
+else ()
+  libomptarget_say("Building CUDA LLVM bitcode offloading device RTL using ${cuda_compiler}")
+endif ()

 set(cuda_src_files
  ${devicertl_common_directory}/src/cancel.cu
@ -170,6 +189,15 @@ foreach(sm ${nvptx_sm_list})
      COMMENT "Building LLVM bitcode ${outfile}"
      VERBATIM
    )
+    if("${cuda_compiler}" STREQUAL "$<TARGET_FILE:clang>")
+      # Add a file-level dependency to ensure that clang is up-to-date.
+      # By default, add_custom_command only builds clang if the
+      # executable is missing.
+      add_custom_command(OUTPUT ${outfile}
+        DEPENDS clang
+        APPEND
+      )
+    endif()
    set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile})

    list(APPEND bc_files ${outfile})
@ -184,6 +212,15 @@ foreach(sm ${nvptx_sm_list})
      DEPENDS ${bc_files}
      COMMENT "Linking LLVM bitcode ${bclib_name}"
  )
+  if("${bc_linker}" STREQUAL "$<TARGET_FILE:llvm-link>")
+    # Add a file-level dependency to ensure that llvm-link is up-to-date.
+    # By default, add_custom_command only builds llvm-link if the
+    # executable is missing.
+    add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
+      DEPENDS llvm-link
+      APPEND
+    )
+  endif()
  set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${bclib_name})

  set(bclib_target_name "omptarget-nvptx-sm_${sm}-bc")