mirror of https://github.com/microsoft/clang.git
[CUDA] Give templated device functions internal linkage, templated kernels external linkage.
Summary: This lets LLVM perform IPO over these functions. In particular, it allows LLVM to emit ld.global.nc for loads to __restrict pointers in kernels that are never written to. Reviewers: rsmith Subscribers: cfe-commits, tra Differential Revision: http://reviews.llvm.org/D21337 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@274261 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a6983aca9b
commit
56a9798df3
|
@ -2671,9 +2671,18 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
|
|||
// explicit instantiations can occur in multiple translation units
|
||||
// and must all be equivalent. However, we are not allowed to
|
||||
// throw away these explicit instantiations.
|
||||
if (Linkage == GVA_StrongODR)
|
||||
return !Context.getLangOpts().AppleKext ? llvm::Function::WeakODRLinkage
|
||||
: llvm::Function::ExternalLinkage;
|
||||
//
|
||||
// We don't currently support CUDA device code spread out across multiple TUs,
|
||||
// so say that CUDA templates are either external (for kernels) or internal.
|
||||
// This lets llvm perform aggressive inter-procedural optimizations.
|
||||
if (Linkage == GVA_StrongODR) {
|
||||
if (Context.getLangOpts().AppleKext)
|
||||
return llvm::Function::ExternalLinkage;
|
||||
if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice)
|
||||
return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage
|
||||
: llvm::Function::InternalLinkage;
|
||||
return llvm::Function::WeakODRLinkage;
|
||||
}
|
||||
|
||||
// C++ doesn't have tentative definitions and thus cannot have common
|
||||
// linkage.
|
||||
|
|
|
@ -19,11 +19,11 @@ __global__ void global_function() {
|
|||
|
||||
// Make sure host-instantiated kernels are preserved on device side.
|
||||
template <typename T> __global__ void templated_kernel(T param) {}
|
||||
// CHECK-DAG: define weak_odr void @_Z16templated_kernelIiEvT_(
|
||||
// CHECK-DAG: define void @_Z16templated_kernelIiEvT_(
|
||||
|
||||
namespace {
|
||||
__global__ void anonymous_ns_kernel() {}
|
||||
// CHECK-DAG: define weak_odr void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv(
|
||||
// CHECK-DAG: define void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv(
|
||||
}
|
||||
|
||||
void host_function() {
|
||||
|
|
Loading…
Reference in New Issue