mirror of https://github.com/microsoft/clang.git
[CUDA] Add #pragma clang force_cuda_host_device_{begin,end} pragmas.
Summary: These cause us to consider all functions in-between to be __host__ __device__. You can nest these pragmas; you just can't have more 'end's than 'begin's. Reviewers: rsmith Subscribers: tra, jhen, cfe-commits Differential Revision: https://reviews.llvm.org/D24975 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@283677 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e5f86e2485
commit
985220f468
|
@ -1026,6 +1026,12 @@ def warn_pragma_unroll_cuda_value_in_parens : Warning<
|
|||
def warn_cuda_attr_lambda_position : Warning<
|
||||
"nvcc does not allow '__%0__' to appear after '()' in lambdas">,
|
||||
InGroup<CudaCompat>;
|
||||
def warn_pragma_force_cuda_host_device_bad_arg : Warning<
|
||||
"incorrect use of #pragma clang force_cuda_host_device begin|end">,
|
||||
InGroup<IgnoredPragmas>;
|
||||
def err_pragma_cannot_end_force_cuda_host_device : Error<
|
||||
"force_cuda_host_device end pragma without matching "
|
||||
"force_cuda_host_device begin">;
|
||||
} // end of Parse Issue category.
|
||||
|
||||
let CategoryName = "Modules Issue" in {
|
||||
|
|
|
@ -173,6 +173,7 @@ class Parser : public CodeCompletionHandler {
|
|||
std::unique_ptr<PragmaHandler> MSSection;
|
||||
std::unique_ptr<PragmaHandler> MSRuntimeChecks;
|
||||
std::unique_ptr<PragmaHandler> MSIntrinsic;
|
||||
std::unique_ptr<PragmaHandler> CUDAForceHostDeviceHandler;
|
||||
std::unique_ptr<PragmaHandler> OptimizeHandler;
|
||||
std::unique_ptr<PragmaHandler> LoopHintHandler;
|
||||
std::unique_ptr<PragmaHandler> UnrollHintHandler;
|
||||
|
|
|
@ -9219,6 +9219,20 @@ public:
|
|||
QualType FieldTy, bool IsMsStruct,
|
||||
Expr *BitWidth, bool *ZeroWidth = nullptr);
|
||||
|
||||
private:
|
||||
unsigned ForceCUDAHostDeviceDepth = 0;
|
||||
|
||||
public:
|
||||
/// Increments our count of the number of times we've seen a pragma forcing
|
||||
/// functions to be __host__ __device__. So long as this count is greater
|
||||
/// than zero, all functions encountered will be __host__ __device__.
|
||||
void PushForceCUDAHostDevice();
|
||||
|
||||
/// Decrements our count of the number of times we've seen a pragma forcing
|
||||
/// functions to be __host__ __device__. Returns false if the count is 0
|
||||
/// before incrementing, so you can emit an error.
|
||||
bool PopForceCUDAHostDevice();
|
||||
|
||||
enum CUDAFunctionTarget {
|
||||
CFT_Device,
|
||||
CFT_Global,
|
||||
|
|
|
@ -580,7 +580,11 @@ namespace clang {
|
|||
MSSTRUCT_PRAGMA_OPTIONS = 55,
|
||||
|
||||
/// \brief Record code for \#pragma ms_struct options.
|
||||
POINTERS_TO_MEMBERS_PRAGMA_OPTIONS = 56
|
||||
POINTERS_TO_MEMBERS_PRAGMA_OPTIONS = 56,
|
||||
|
||||
/// \brief Number of unmatched #pragma clang cuda_force_host_device begin
|
||||
/// directives we've seen.
|
||||
CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH = 57,
|
||||
};
|
||||
|
||||
/// \brief Record types used within a source manager block.
|
||||
|
|
|
@ -772,6 +772,10 @@ private:
|
|||
/// Sema tracks these to emit warnings.
|
||||
SmallVector<uint64_t, 16> UnusedLocalTypedefNameCandidates;
|
||||
|
||||
/// \brief Our current depth in #pragma cuda force_host_device begin/end
|
||||
/// macros.
|
||||
unsigned ForceCUDAHostDeviceDepth = 0;
|
||||
|
||||
/// \brief The IDs of the declarations Sema stores directly.
|
||||
///
|
||||
/// Sema tracks a few important decls, such as namespace std, directly.
|
||||
|
|
|
@ -459,6 +459,7 @@ private:
|
|||
void WriteDeclContextVisibleUpdate(const DeclContext *DC);
|
||||
void WriteFPPragmaOptions(const FPOptions &Opts);
|
||||
void WriteOpenCLExtensions(Sema &SemaRef);
|
||||
void WriteCUDAPragmas(Sema &SemaRef);
|
||||
void WriteObjCCategories();
|
||||
void WriteLateParsedTemplates(Sema &SemaRef);
|
||||
void WriteOptimizePragmaOptions(Sema &SemaRef);
|
||||
|
|
|
@ -167,6 +167,16 @@ struct PragmaMSIntrinsicHandler : public PragmaHandler {
|
|||
Token &FirstToken) override;
|
||||
};
|
||||
|
||||
struct PragmaForceCUDAHostDeviceHandler : public PragmaHandler {
|
||||
PragmaForceCUDAHostDeviceHandler(Sema &Actions)
|
||||
: PragmaHandler("force_cuda_host_device"), Actions(Actions) {}
|
||||
void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
|
||||
Token &FirstToken) override;
|
||||
|
||||
private:
|
||||
Sema &Actions;
|
||||
};
|
||||
|
||||
} // end namespace
|
||||
|
||||
void Parser::initializePragmaHandlers() {
|
||||
|
@ -239,6 +249,12 @@ void Parser::initializePragmaHandlers() {
|
|||
PP.AddPragmaHandler(MSIntrinsic.get());
|
||||
}
|
||||
|
||||
if (getLangOpts().CUDA) {
|
||||
CUDAForceHostDeviceHandler.reset(
|
||||
new PragmaForceCUDAHostDeviceHandler(Actions));
|
||||
PP.AddPragmaHandler("clang", CUDAForceHostDeviceHandler.get());
|
||||
}
|
||||
|
||||
OptimizeHandler.reset(new PragmaOptimizeHandler(Actions));
|
||||
PP.AddPragmaHandler("clang", OptimizeHandler.get());
|
||||
|
||||
|
@ -309,6 +325,11 @@ void Parser::resetPragmaHandlers() {
|
|||
MSIntrinsic.reset();
|
||||
}
|
||||
|
||||
if (getLangOpts().CUDA) {
|
||||
PP.RemovePragmaHandler("clang", CUDAForceHostDeviceHandler.get());
|
||||
CUDAForceHostDeviceHandler.reset();
|
||||
}
|
||||
|
||||
PP.RemovePragmaHandler("STDC", FPContractHandler.get());
|
||||
FPContractHandler.reset();
|
||||
|
||||
|
@ -2187,3 +2208,26 @@ void PragmaMSIntrinsicHandler::HandlePragma(Preprocessor &PP,
|
|||
PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol)
|
||||
<< "intrinsic";
|
||||
}
|
||||
void PragmaForceCUDAHostDeviceHandler::HandlePragma(
|
||||
Preprocessor &PP, PragmaIntroducerKind Introducer, Token &Tok) {
|
||||
Token FirstTok = Tok;
|
||||
|
||||
PP.Lex(Tok);
|
||||
IdentifierInfo *Info = Tok.getIdentifierInfo();
|
||||
if (!Info || (!Info->isStr("begin") && !Info->isStr("end"))) {
|
||||
PP.Diag(FirstTok.getLocation(),
|
||||
diag::warn_pragma_force_cuda_host_device_bad_arg);
|
||||
return;
|
||||
}
|
||||
|
||||
if (Info->isStr("begin"))
|
||||
Actions.PushForceCUDAHostDevice();
|
||||
else if (!Actions.PopForceCUDAHostDevice())
|
||||
PP.Diag(FirstTok.getLocation(),
|
||||
diag::err_pragma_cannot_end_force_cuda_host_device);
|
||||
|
||||
PP.Lex(Tok);
|
||||
if (!Tok.is(tok::eod))
|
||||
PP.Diag(FirstTok.getLocation(),
|
||||
diag::warn_pragma_force_cuda_host_device_bad_arg);
|
||||
}
|
||||
|
|
|
@ -23,6 +23,19 @@
|
|||
#include "llvm/ADT/SmallVector.h"
|
||||
using namespace clang;
|
||||
|
||||
void Sema::PushForceCUDAHostDevice() {
|
||||
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
|
||||
ForceCUDAHostDeviceDepth++;
|
||||
}
|
||||
|
||||
bool Sema::PopForceCUDAHostDevice() {
|
||||
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
|
||||
if (ForceCUDAHostDeviceDepth == 0)
|
||||
return false;
|
||||
ForceCUDAHostDeviceDepth--;
|
||||
return true;
|
||||
}
|
||||
|
||||
ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
|
||||
MultiExprArg ExecConfig,
|
||||
SourceLocation GGGLoc) {
|
||||
|
@ -441,9 +454,23 @@ bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
|
|||
// * a __device__ function with this signature was already declared, in which
|
||||
// case in which case we output an error, unless the __device__ decl is in a
|
||||
// system header, in which case we leave the constexpr function unattributed.
|
||||
//
|
||||
// In addition, all function decls are treated as __host__ __device__ when
|
||||
// ForceCUDAHostDeviceDepth > 0 (corresponding to code within a
|
||||
// #pragma clang force_cuda_host_device_begin/end
|
||||
// pair).
|
||||
void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD,
|
||||
const LookupResult &Previous) {
|
||||
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
|
||||
|
||||
if (ForceCUDAHostDeviceDepth > 0) {
|
||||
if (!NewD->hasAttr<CUDAHostAttr>())
|
||||
NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
|
||||
if (!NewD->hasAttr<CUDADeviceAttr>())
|
||||
NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
|
||||
return;
|
||||
}
|
||||
|
||||
if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
|
||||
NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
|
||||
NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
|
||||
|
|
|
@ -3275,6 +3275,14 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
|
|||
UnusedLocalTypedefNameCandidates.push_back(
|
||||
getGlobalDeclID(F, Record[I]));
|
||||
break;
|
||||
|
||||
case CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH:
|
||||
if (Record.size() != 1) {
|
||||
Error("invalid cuda pragma options record");
|
||||
return Failure;
|
||||
}
|
||||
ForceCUDAHostDeviceDepth = Record[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -7128,6 +7136,7 @@ void ASTReader::UpdateSema() {
|
|||
PragmaMSPointersToMembersState,
|
||||
PointersToMembersPragmaLocation);
|
||||
}
|
||||
SemaObj->ForceCUDAHostDeviceDepth = ForceCUDAHostDeviceDepth;
|
||||
}
|
||||
|
||||
IdentifierInfo *ASTReader::get(StringRef Name) {
|
||||
|
|
|
@ -1069,6 +1069,7 @@ void ASTWriter::WriteBlockInfoBlock() {
|
|||
RECORD(POINTERS_TO_MEMBERS_PRAGMA_OPTIONS);
|
||||
RECORD(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES);
|
||||
RECORD(DELETE_EXPRS_TO_ANALYZE);
|
||||
RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
|
||||
|
||||
// SourceManager Block.
|
||||
BLOCK(SOURCE_MANAGER_BLOCK);
|
||||
|
@ -3942,6 +3943,13 @@ void ASTWriter::WriteOpenCLExtensions(Sema &SemaRef) {
|
|||
Stream.EmitRecord(OPENCL_EXTENSIONS, Record);
|
||||
}
|
||||
|
||||
void ASTWriter::WriteCUDAPragmas(Sema &SemaRef) {
|
||||
if (SemaRef.ForceCUDAHostDeviceDepth > 0) {
|
||||
RecordData::value_type Record[] = {SemaRef.ForceCUDAHostDeviceDepth};
|
||||
Stream.EmitRecord(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH, Record);
|
||||
}
|
||||
}
|
||||
|
||||
void ASTWriter::WriteObjCCategories() {
|
||||
SmallVector<ObjCCategoriesInfo, 2> CategoriesMap;
|
||||
RecordData Categories;
|
||||
|
@ -4619,6 +4627,7 @@ uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
|
|||
WriteIdentifierTable(PP, SemaRef.IdResolver, isModule);
|
||||
WriteFPPragmaOptions(SemaRef.getFPOptions());
|
||||
WriteOpenCLExtensions(SemaRef);
|
||||
WriteCUDAPragmas(SemaRef);
|
||||
WritePragmaDiagnosticMappings(Context.getDiagnostics(), isModule);
|
||||
|
||||
// If we're emitting a module, write out the submodule information.
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
// RUN: %clang_cc1 -emit-pch %s -o %t
|
||||
// RUN: %clang_cc1 -verify -verify-ignore-unexpected=note -include-pch %t -S -o /dev/null %s
|
||||
|
||||
#ifndef HEADER
|
||||
#define HEADER
|
||||
|
||||
#pragma clang force_cuda_host_device begin
|
||||
#pragma clang force_cuda_host_device begin
|
||||
#pragma clang force_cuda_host_device end
|
||||
|
||||
void hd1() {}
|
||||
|
||||
#else
|
||||
|
||||
void hd2() {}
|
||||
|
||||
#pragma clang force_cuda_host_device end
|
||||
|
||||
void host_only() {}
|
||||
|
||||
__attribute__((device)) void device() {
|
||||
hd1();
|
||||
hd2();
|
||||
host_only(); // expected-error {{no matching function for call}}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,41 @@
|
|||
// RUN: %clang_cc1 -std=c++14 -S -verify -fcuda-is-device %s -o /dev/null
|
||||
|
||||
// Check how the force_cuda_host_device pragma interacts with template
|
||||
// instantiations. The errors here are emitted at codegen, so we can't do
|
||||
// -fsyntax-only.
|
||||
|
||||
template <typename T>
|
||||
auto foo() { // expected-note {{declared here}}
|
||||
return T();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct X {
|
||||
void foo(); // expected-note {{declared here}}
|
||||
};
|
||||
|
||||
#pragma clang force_cuda_host_device begin
|
||||
__attribute__((host)) __attribute__((device)) void test() {
|
||||
int n = foo<int>(); // expected-error {{reference to __host__ function 'foo<int>'}}
|
||||
X<int>().foo(); // expected-error {{reference to __host__ function 'foo'}}
|
||||
}
|
||||
#pragma clang force_cuda_host_device end
|
||||
|
||||
// Same thing as above, but within a force_cuda_host_device block without a
|
||||
// corresponding end.
|
||||
|
||||
template <typename T>
|
||||
T bar() { // expected-note {{declared here}}
|
||||
return T();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct Y {
|
||||
void bar(); // expected-note {{declared here}}
|
||||
};
|
||||
|
||||
#pragma clang force_cuda_host_device begin
|
||||
__attribute__((host)) __attribute__((device)) void test2() {
|
||||
int n = bar<int>(); // expected-error {{reference to __host__ function 'bar<int>'}}
|
||||
Y<int>().bar(); // expected-error {{reference to __host__ function 'bar'}}
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
// RUN: %clang_cc1 -fsyntax-only -verify %s
|
||||
|
||||
// Check the force_cuda_host_device pragma.
|
||||
|
||||
#pragma clang force_cuda_host_device begin
|
||||
void f();
|
||||
#pragma clang force_cuda_host_device begin
|
||||
void g();
|
||||
#pragma clang force_cuda_host_device end
|
||||
void h();
|
||||
#pragma clang force_cuda_host_device end
|
||||
|
||||
void i(); // expected-note {{not viable}}
|
||||
|
||||
void host() {
|
||||
f();
|
||||
g();
|
||||
h();
|
||||
i();
|
||||
}
|
||||
|
||||
__attribute__((device)) void device() {
|
||||
f();
|
||||
g();
|
||||
h();
|
||||
i(); // expected-error {{no matching function}}
|
||||
}
|
||||
|
||||
#pragma clang force_cuda_host_device foo
|
||||
// expected-warning@-1 {{incorrect use of #pragma clang force_cuda_host_device begin|end}}
|
||||
|
||||
#pragma clang force_cuda_host_device
|
||||
// expected-warning@-1 {{incorrect use of #pragma clang force_cuda_host_device begin|end}}
|
||||
|
||||
#pragma clang force_cuda_host_device begin foo
|
||||
// expected-warning@-1 {{incorrect use of #pragma clang force_cuda_host_device begin|end}}
|
Loading…
Reference in New Issue