[CUDA] Add amdgpu sub archs

Patch by Greg Rodgers.
Revised and lit tests added by Yaxun Liu.

Differential Revision: https://reviews.llvm.org/D45277


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@329232 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Yaxun Liu 2018-04-04 21:19:27 +00:00
parent 893efa76e2
commit edaef2eeb1
8 changed files with 205 additions and 42 deletions

View File

@ -46,6 +46,19 @@ enum class CudaArch {
SM_62,
SM_70,
SM_72,
GFX600,
GFX601,
GFX700,
GFX701,
GFX702,
GFX703,
GFX704,
GFX801,
GFX802,
GFX803,
GFX810,
GFX900,
GFX902,
LAST,
};
const char *CudaArchToString(CudaArch A);
@ -68,6 +81,7 @@ enum class CudaVirtualArch {
COMPUTE_62,
COMPUTE_70,
COMPUTE_72,
COMPUTE_AMDGCN,
};
const char *CudaVirtualArchToString(CudaVirtualArch A);

View File

@ -58,6 +58,32 @@ const char *CudaArchToString(CudaArch A) {
return "sm_70";
case CudaArch::SM_72:
return "sm_72";
case CudaArch::GFX600: // tahiti
return "gfx600";
case CudaArch::GFX601: // pitcairn, verde, oland,hainan
return "gfx601";
case CudaArch::GFX700: // kaveri
return "gfx700";
case CudaArch::GFX701: // hawaii
return "gfx701";
case CudaArch::GFX702: // 290,290x,R390,R390x
return "gfx702";
case CudaArch::GFX703: // kabini mullins
return "gfx703";
case CudaArch::GFX704: // bonaire
return "gfx704";
case CudaArch::GFX801: // carrizo
return "gfx801";
case CudaArch::GFX802: // tonga,iceland
return "gfx802";
case CudaArch::GFX803: // fiji,polaris10
return "gfx803";
case CudaArch::GFX810: // stoney
return "gfx810";
case CudaArch::GFX900: // vega, instinct
return "gfx900";
case CudaArch::GFX902: // TBA
return "gfx902";
}
llvm_unreachable("invalid enum");
}
@ -78,6 +104,19 @@ CudaArch StringToCudaArch(llvm::StringRef S) {
.Case("sm_62", CudaArch::SM_62)
.Case("sm_70", CudaArch::SM_70)
.Case("sm_72", CudaArch::SM_72)
.Case("gfx600", CudaArch::GFX600)
.Case("gfx601", CudaArch::GFX601)
.Case("gfx700", CudaArch::GFX700)
.Case("gfx701", CudaArch::GFX701)
.Case("gfx702", CudaArch::GFX702)
.Case("gfx703", CudaArch::GFX703)
.Case("gfx704", CudaArch::GFX704)
.Case("gfx801", CudaArch::GFX801)
.Case("gfx802", CudaArch::GFX802)
.Case("gfx803", CudaArch::GFX803)
.Case("gfx810", CudaArch::GFX810)
.Case("gfx900", CudaArch::GFX900)
.Case("gfx902", CudaArch::GFX902)
.Default(CudaArch::UNKNOWN);
}
@ -111,6 +150,8 @@ const char *CudaVirtualArchToString(CudaVirtualArch A) {
return "compute_70";
case CudaVirtualArch::COMPUTE_72:
return "compute_72";
case CudaVirtualArch::COMPUTE_AMDGCN:
return "compute_amdgcn";
}
llvm_unreachable("invalid enum");
}
@ -130,6 +171,7 @@ CudaVirtualArch StringToCudaVirtualArch(llvm::StringRef S) {
.Case("compute_62", CudaVirtualArch::COMPUTE_62)
.Case("compute_70", CudaVirtualArch::COMPUTE_70)
.Case("compute_72", CudaVirtualArch::COMPUTE_72)
.Case("compute_amdgcn", CudaVirtualArch::COMPUTE_AMDGCN)
.Default(CudaVirtualArch::UNKNOWN);
}
@ -166,6 +208,20 @@ CudaVirtualArch VirtualArchForCudaArch(CudaArch A) {
return CudaVirtualArch::COMPUTE_70;
case CudaArch::SM_72:
return CudaVirtualArch::COMPUTE_72;
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX700:
case CudaArch::GFX701:
case CudaArch::GFX702:
case CudaArch::GFX703:
case CudaArch::GFX704:
case CudaArch::GFX801:
case CudaArch::GFX802:
case CudaArch::GFX803:
case CudaArch::GFX810:
case CudaArch::GFX900:
case CudaArch::GFX902:
return CudaVirtualArch::COMPUTE_AMDGCN;
}
llvm_unreachable("invalid enum");
}
@ -194,6 +250,20 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
return CudaVersion::CUDA_90;
case CudaArch::SM_72:
return CudaVersion::CUDA_91;
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX700:
case CudaArch::GFX701:
case CudaArch::GFX702:
case CudaArch::GFX703:
case CudaArch::GFX704:
case CudaArch::GFX801:
case CudaArch::GFX802:
case CudaArch::GFX803:
case CudaArch::GFX810:
case CudaArch::GFX900:
case CudaArch::GFX902:
return CudaVersion::CUDA_70;
}
llvm_unreachable("invalid enum");
}
@ -204,6 +274,19 @@ CudaVersion MaxVersionForCudaArch(CudaArch A) {
return CudaVersion::UNKNOWN;
case CudaArch::SM_20:
case CudaArch::SM_21:
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX700:
case CudaArch::GFX701:
case CudaArch::GFX702:
case CudaArch::GFX703:
case CudaArch::GFX704:
case CudaArch::GFX801:
case CudaArch::GFX802:
case CudaArch::GFX803:
case CudaArch::GFX810:
case CudaArch::GFX900:
case CudaArch::GFX902:
return CudaVersion::CUDA_80;
default:
return CudaVersion::LATEST;

View File

@ -112,6 +112,61 @@ void addMinGWDefines(const llvm::Triple &Triple, const LangOptions &Opts,
addCygMingDefines(Opts, Builder);
}
void defineCudaArchMacro(CudaArch GPU, clang::MacroBuilder &Builder) {
std::string CUDAArchCode = [GPU] {
switch (GPU) {
case CudaArch::LAST:
break;
case CudaArch::SM_20:
return "200";
case CudaArch::SM_21:
return "210";
case CudaArch::SM_30:
return "300";
case CudaArch::SM_32:
return "320";
case CudaArch::SM_35:
return "350";
case CudaArch::SM_37:
return "370";
case CudaArch::SM_50:
return "500";
case CudaArch::SM_52:
return "520";
case CudaArch::SM_53:
return "530";
case CudaArch::SM_60:
return "600";
case CudaArch::SM_61:
return "610";
case CudaArch::SM_62:
return "620";
case CudaArch::SM_70:
return "700";
case CudaArch::SM_72:
return "720";
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX700:
case CudaArch::GFX701:
case CudaArch::GFX702:
case CudaArch::GFX703:
case CudaArch::GFX704:
case CudaArch::GFX801:
case CudaArch::GFX802:
case CudaArch::GFX803:
case CudaArch::GFX810:
case CudaArch::GFX900:
case CudaArch::GFX902:
return "320";
case CudaArch::UNKNOWN:
llvm_unreachable("unhandled Cuda/HIP Arch");
}
llvm_unreachable("unhandled Cuda/HIP Arch");
}();
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
}
//===----------------------------------------------------------------------===//
// Driver code
//===----------------------------------------------------------------------===//

View File

@ -16,6 +16,7 @@
#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_H
#define LLVM_CLANG_LIB_BASIC_TARGETS_H
#include "clang/Basic/Cuda.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetInfo.h"
@ -46,6 +47,9 @@ void addMinGWDefines(const llvm::Triple &Triple, const clang::LangOptions &Opts,
LLVM_LIBRARY_VISIBILITY
void addCygMingDefines(const clang::LangOptions &Opts,
clang::MacroBuilder &Builder);
LLVM_LIBRARY_VISIBILITY
void defineCudaArchMacro(CudaArch GPU, clang::MacroBuilder &Builder);
} // namespace targets
} // namespace clang
#endif // LLVM_CLANG_LIB_BASIC_TARGETS_H

View File

@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "Targets.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/MacroBuilder.h"
@ -263,6 +264,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
: DataLayoutStringR600);
assert(DataLayout->getAllocaAddrSpace() == Private);
GCN_Subarch = CudaArch::GFX803; // Default to fiji
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
!isAMDGCN(Triple));
@ -307,6 +309,9 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
if (GPU.Kind != GK_NONE)
Builder.defineMacro(Twine("__") + Twine(GPU.CanonicalName) + Twine("__"));
if (Opts.CUDAIsDevice)
defineCudaArchMacro(GCN_Subarch, Builder);
// TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
// removed in the near future.
if (GPU.HasFMAF)

View File

@ -14,6 +14,7 @@
#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
#define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
#include "clang/Basic/Cuda.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Basic/TargetOptions.h"
#include "llvm/ADT/StringSet.h"
@ -174,6 +175,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
static bool isAMDGCN(const llvm::Triple &TT) {
return TT.getArch() == llvm::Triple::amdgcn;
}
CudaArch GCN_Subarch;
public:
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts);
@ -330,6 +332,7 @@ public:
else
GPU = parseR600Name(Name);
GCN_Subarch = StringToCudaArch(Name);
return GK_NONE != GPU.Kind;
}

View File

@ -153,48 +153,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
MacroBuilder &Builder) const {
Builder.defineMacro("__PTX__");
Builder.defineMacro("__NVPTX__");
if (Opts.CUDAIsDevice) {
// Set __CUDA_ARCH__ for the GPU specified.
std::string CUDAArchCode = [this] {
switch (GPU) {
case CudaArch::LAST:
break;
case CudaArch::UNKNOWN:
assert(false && "No GPU arch when compiling CUDA device code.");
return "";
case CudaArch::SM_20:
return "200";
case CudaArch::SM_21:
return "210";
case CudaArch::SM_30:
return "300";
case CudaArch::SM_32:
return "320";
case CudaArch::SM_35:
return "350";
case CudaArch::SM_37:
return "370";
case CudaArch::SM_50:
return "500";
case CudaArch::SM_52:
return "520";
case CudaArch::SM_53:
return "530";
case CudaArch::SM_60:
return "600";
case CudaArch::SM_61:
return "610";
case CudaArch::SM_62:
return "620";
case CudaArch::SM_70:
return "700";
case CudaArch::SM_72:
return "720";
}
llvm_unreachable("unhandled CudaArch");
}();
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
}
if (Opts.CUDAIsDevice)
defineCudaArchMacro(GPU, Builder);
}
ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const {

View File

@ -31,6 +31,32 @@
// RUN: | FileCheck -check-prefixes=COMMON,SM62 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_70 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,SM70 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx600 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX600 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx601 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX601 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx700 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX700 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx701 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX701 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx702 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX702 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx703 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX703 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx704 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX704 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx801 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX801 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx802 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX802 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx803 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX803 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx810 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX810 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx900 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX900 %s
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx902 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=COMMON,GFX902 %s
// COMMON: ptxas
// COMMON-SAME: -m64
@ -49,3 +75,16 @@
// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61
// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62
// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70
// GFX600:--image=profile=gfx600{{.*}}--image=profile=compute_amdgcn
// GFX601:--image=profile=gfx601{{.*}}--image=profile=compute_amdgcn
// GFX700:--image=profile=gfx700{{.*}}--image=profile=compute_amdgcn
// GFX701:--image=profile=gfx701{{.*}}--image=profile=compute_amdgcn
// GFX702:--image=profile=gfx702{{.*}}--image=profile=compute_amdgcn
// GFX703:--image=profile=gfx703{{.*}}--image=profile=compute_amdgcn
// GFX704:--image=profile=gfx704{{.*}}--image=profile=compute_amdgcn
// GFX801:--image=profile=gfx801{{.*}}--image=profile=compute_amdgcn
// GFX802:--image=profile=gfx802{{.*}}--image=profile=compute_amdgcn
// GFX803:--image=profile=gfx803{{.*}}--image=profile=compute_amdgcn
// GFX810:--image=profile=gfx810{{.*}}--image=profile=compute_amdgcn
// GFX900:--image=profile=gfx900{{.*}}--image=profile=compute_amdgcn
// GFX902:--image=profile=gfx902{{.*}}--image=profile=compute_amdgcn