mirror of https://github.com/microsoft/clang.git
[CUDA] Add amdgpu sub archs
Patch by Greg Rodgers. Revised and lit tests added by Yaxun Liu. Differential Revision: https://reviews.llvm.org/D45277 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@329232 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
893efa76e2
commit
edaef2eeb1
|
@ -46,6 +46,19 @@ enum class CudaArch {
|
|||
SM_62,
|
||||
SM_70,
|
||||
SM_72,
|
||||
GFX600,
|
||||
GFX601,
|
||||
GFX700,
|
||||
GFX701,
|
||||
GFX702,
|
||||
GFX703,
|
||||
GFX704,
|
||||
GFX801,
|
||||
GFX802,
|
||||
GFX803,
|
||||
GFX810,
|
||||
GFX900,
|
||||
GFX902,
|
||||
LAST,
|
||||
};
|
||||
const char *CudaArchToString(CudaArch A);
|
||||
|
@ -68,6 +81,7 @@ enum class CudaVirtualArch {
|
|||
COMPUTE_62,
|
||||
COMPUTE_70,
|
||||
COMPUTE_72,
|
||||
COMPUTE_AMDGCN,
|
||||
};
|
||||
const char *CudaVirtualArchToString(CudaVirtualArch A);
|
||||
|
||||
|
|
|
@ -58,6 +58,32 @@ const char *CudaArchToString(CudaArch A) {
|
|||
return "sm_70";
|
||||
case CudaArch::SM_72:
|
||||
return "sm_72";
|
||||
case CudaArch::GFX600: // tahiti
|
||||
return "gfx600";
|
||||
case CudaArch::GFX601: // pitcairn, verde, oland,hainan
|
||||
return "gfx601";
|
||||
case CudaArch::GFX700: // kaveri
|
||||
return "gfx700";
|
||||
case CudaArch::GFX701: // hawaii
|
||||
return "gfx701";
|
||||
case CudaArch::GFX702: // 290,290x,R390,R390x
|
||||
return "gfx702";
|
||||
case CudaArch::GFX703: // kabini mullins
|
||||
return "gfx703";
|
||||
case CudaArch::GFX704: // bonaire
|
||||
return "gfx704";
|
||||
case CudaArch::GFX801: // carrizo
|
||||
return "gfx801";
|
||||
case CudaArch::GFX802: // tonga,iceland
|
||||
return "gfx802";
|
||||
case CudaArch::GFX803: // fiji,polaris10
|
||||
return "gfx803";
|
||||
case CudaArch::GFX810: // stoney
|
||||
return "gfx810";
|
||||
case CudaArch::GFX900: // vega, instinct
|
||||
return "gfx900";
|
||||
case CudaArch::GFX902: // TBA
|
||||
return "gfx902";
|
||||
}
|
||||
llvm_unreachable("invalid enum");
|
||||
}
|
||||
|
@ -78,6 +104,19 @@ CudaArch StringToCudaArch(llvm::StringRef S) {
|
|||
.Case("sm_62", CudaArch::SM_62)
|
||||
.Case("sm_70", CudaArch::SM_70)
|
||||
.Case("sm_72", CudaArch::SM_72)
|
||||
.Case("gfx600", CudaArch::GFX600)
|
||||
.Case("gfx601", CudaArch::GFX601)
|
||||
.Case("gfx700", CudaArch::GFX700)
|
||||
.Case("gfx701", CudaArch::GFX701)
|
||||
.Case("gfx702", CudaArch::GFX702)
|
||||
.Case("gfx703", CudaArch::GFX703)
|
||||
.Case("gfx704", CudaArch::GFX704)
|
||||
.Case("gfx801", CudaArch::GFX801)
|
||||
.Case("gfx802", CudaArch::GFX802)
|
||||
.Case("gfx803", CudaArch::GFX803)
|
||||
.Case("gfx810", CudaArch::GFX810)
|
||||
.Case("gfx900", CudaArch::GFX900)
|
||||
.Case("gfx902", CudaArch::GFX902)
|
||||
.Default(CudaArch::UNKNOWN);
|
||||
}
|
||||
|
||||
|
@ -111,6 +150,8 @@ const char *CudaVirtualArchToString(CudaVirtualArch A) {
|
|||
return "compute_70";
|
||||
case CudaVirtualArch::COMPUTE_72:
|
||||
return "compute_72";
|
||||
case CudaVirtualArch::COMPUTE_AMDGCN:
|
||||
return "compute_amdgcn";
|
||||
}
|
||||
llvm_unreachable("invalid enum");
|
||||
}
|
||||
|
@ -130,6 +171,7 @@ CudaVirtualArch StringToCudaVirtualArch(llvm::StringRef S) {
|
|||
.Case("compute_62", CudaVirtualArch::COMPUTE_62)
|
||||
.Case("compute_70", CudaVirtualArch::COMPUTE_70)
|
||||
.Case("compute_72", CudaVirtualArch::COMPUTE_72)
|
||||
.Case("compute_amdgcn", CudaVirtualArch::COMPUTE_AMDGCN)
|
||||
.Default(CudaVirtualArch::UNKNOWN);
|
||||
}
|
||||
|
||||
|
@ -166,6 +208,20 @@ CudaVirtualArch VirtualArchForCudaArch(CudaArch A) {
|
|||
return CudaVirtualArch::COMPUTE_70;
|
||||
case CudaArch::SM_72:
|
||||
return CudaVirtualArch::COMPUTE_72;
|
||||
case CudaArch::GFX600:
|
||||
case CudaArch::GFX601:
|
||||
case CudaArch::GFX700:
|
||||
case CudaArch::GFX701:
|
||||
case CudaArch::GFX702:
|
||||
case CudaArch::GFX703:
|
||||
case CudaArch::GFX704:
|
||||
case CudaArch::GFX801:
|
||||
case CudaArch::GFX802:
|
||||
case CudaArch::GFX803:
|
||||
case CudaArch::GFX810:
|
||||
case CudaArch::GFX900:
|
||||
case CudaArch::GFX902:
|
||||
return CudaVirtualArch::COMPUTE_AMDGCN;
|
||||
}
|
||||
llvm_unreachable("invalid enum");
|
||||
}
|
||||
|
@ -194,6 +250,20 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
|
|||
return CudaVersion::CUDA_90;
|
||||
case CudaArch::SM_72:
|
||||
return CudaVersion::CUDA_91;
|
||||
case CudaArch::GFX600:
|
||||
case CudaArch::GFX601:
|
||||
case CudaArch::GFX700:
|
||||
case CudaArch::GFX701:
|
||||
case CudaArch::GFX702:
|
||||
case CudaArch::GFX703:
|
||||
case CudaArch::GFX704:
|
||||
case CudaArch::GFX801:
|
||||
case CudaArch::GFX802:
|
||||
case CudaArch::GFX803:
|
||||
case CudaArch::GFX810:
|
||||
case CudaArch::GFX900:
|
||||
case CudaArch::GFX902:
|
||||
return CudaVersion::CUDA_70;
|
||||
}
|
||||
llvm_unreachable("invalid enum");
|
||||
}
|
||||
|
@ -204,6 +274,19 @@ CudaVersion MaxVersionForCudaArch(CudaArch A) {
|
|||
return CudaVersion::UNKNOWN;
|
||||
case CudaArch::SM_20:
|
||||
case CudaArch::SM_21:
|
||||
case CudaArch::GFX600:
|
||||
case CudaArch::GFX601:
|
||||
case CudaArch::GFX700:
|
||||
case CudaArch::GFX701:
|
||||
case CudaArch::GFX702:
|
||||
case CudaArch::GFX703:
|
||||
case CudaArch::GFX704:
|
||||
case CudaArch::GFX801:
|
||||
case CudaArch::GFX802:
|
||||
case CudaArch::GFX803:
|
||||
case CudaArch::GFX810:
|
||||
case CudaArch::GFX900:
|
||||
case CudaArch::GFX902:
|
||||
return CudaVersion::CUDA_80;
|
||||
default:
|
||||
return CudaVersion::LATEST;
|
||||
|
|
|
@ -112,6 +112,61 @@ void addMinGWDefines(const llvm::Triple &Triple, const LangOptions &Opts,
|
|||
addCygMingDefines(Opts, Builder);
|
||||
}
|
||||
|
||||
void defineCudaArchMacro(CudaArch GPU, clang::MacroBuilder &Builder) {
|
||||
std::string CUDAArchCode = [GPU] {
|
||||
switch (GPU) {
|
||||
case CudaArch::LAST:
|
||||
break;
|
||||
case CudaArch::SM_20:
|
||||
return "200";
|
||||
case CudaArch::SM_21:
|
||||
return "210";
|
||||
case CudaArch::SM_30:
|
||||
return "300";
|
||||
case CudaArch::SM_32:
|
||||
return "320";
|
||||
case CudaArch::SM_35:
|
||||
return "350";
|
||||
case CudaArch::SM_37:
|
||||
return "370";
|
||||
case CudaArch::SM_50:
|
||||
return "500";
|
||||
case CudaArch::SM_52:
|
||||
return "520";
|
||||
case CudaArch::SM_53:
|
||||
return "530";
|
||||
case CudaArch::SM_60:
|
||||
return "600";
|
||||
case CudaArch::SM_61:
|
||||
return "610";
|
||||
case CudaArch::SM_62:
|
||||
return "620";
|
||||
case CudaArch::SM_70:
|
||||
return "700";
|
||||
case CudaArch::SM_72:
|
||||
return "720";
|
||||
case CudaArch::GFX600:
|
||||
case CudaArch::GFX601:
|
||||
case CudaArch::GFX700:
|
||||
case CudaArch::GFX701:
|
||||
case CudaArch::GFX702:
|
||||
case CudaArch::GFX703:
|
||||
case CudaArch::GFX704:
|
||||
case CudaArch::GFX801:
|
||||
case CudaArch::GFX802:
|
||||
case CudaArch::GFX803:
|
||||
case CudaArch::GFX810:
|
||||
case CudaArch::GFX900:
|
||||
case CudaArch::GFX902:
|
||||
return "320";
|
||||
case CudaArch::UNKNOWN:
|
||||
llvm_unreachable("unhandled Cuda/HIP Arch");
|
||||
}
|
||||
llvm_unreachable("unhandled Cuda/HIP Arch");
|
||||
}();
|
||||
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Driver code
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_H
|
||||
#define LLVM_CLANG_LIB_BASIC_TARGETS_H
|
||||
|
||||
#include "clang/Basic/Cuda.h"
|
||||
#include "clang/Basic/LangOptions.h"
|
||||
#include "clang/Basic/MacroBuilder.h"
|
||||
#include "clang/Basic/TargetInfo.h"
|
||||
|
@ -46,6 +47,9 @@ void addMinGWDefines(const llvm::Triple &Triple, const clang::LangOptions &Opts,
|
|||
LLVM_LIBRARY_VISIBILITY
|
||||
void addCygMingDefines(const clang::LangOptions &Opts,
|
||||
clang::MacroBuilder &Builder);
|
||||
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void defineCudaArchMacro(CudaArch GPU, clang::MacroBuilder &Builder);
|
||||
} // namespace targets
|
||||
} // namespace clang
|
||||
#endif // LLVM_CLANG_LIB_BASIC_TARGETS_H
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "Targets.h"
|
||||
#include "clang/Basic/Builtins.h"
|
||||
#include "clang/Basic/LangOptions.h"
|
||||
#include "clang/Basic/MacroBuilder.h"
|
||||
|
@ -263,6 +264,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
|
|||
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
|
||||
: DataLayoutStringR600);
|
||||
assert(DataLayout->getAllocaAddrSpace() == Private);
|
||||
GCN_Subarch = CudaArch::GFX803; // Default to fiji
|
||||
|
||||
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
|
||||
!isAMDGCN(Triple));
|
||||
|
@ -307,6 +309,9 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
if (GPU.Kind != GK_NONE)
|
||||
Builder.defineMacro(Twine("__") + Twine(GPU.CanonicalName) + Twine("__"));
|
||||
|
||||
if (Opts.CUDAIsDevice)
|
||||
defineCudaArchMacro(GCN_Subarch, Builder);
|
||||
|
||||
// TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
|
||||
// removed in the near future.
|
||||
if (GPU.HasFMAF)
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#ifndef LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
|
||||
#define LLVM_CLANG_LIB_BASIC_TARGETS_AMDGPU_H
|
||||
|
||||
#include "clang/Basic/Cuda.h"
|
||||
#include "clang/Basic/TargetInfo.h"
|
||||
#include "clang/Basic/TargetOptions.h"
|
||||
#include "llvm/ADT/StringSet.h"
|
||||
|
@ -174,6 +175,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
|
|||
static bool isAMDGCN(const llvm::Triple &TT) {
|
||||
return TT.getArch() == llvm::Triple::amdgcn;
|
||||
}
|
||||
CudaArch GCN_Subarch;
|
||||
|
||||
public:
|
||||
AMDGPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts);
|
||||
|
@ -330,6 +332,7 @@ public:
|
|||
else
|
||||
GPU = parseR600Name(Name);
|
||||
|
||||
GCN_Subarch = StringToCudaArch(Name);
|
||||
return GK_NONE != GPU.Kind;
|
||||
}
|
||||
|
||||
|
|
|
@ -153,48 +153,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
|
|||
MacroBuilder &Builder) const {
|
||||
Builder.defineMacro("__PTX__");
|
||||
Builder.defineMacro("__NVPTX__");
|
||||
if (Opts.CUDAIsDevice) {
|
||||
// Set __CUDA_ARCH__ for the GPU specified.
|
||||
std::string CUDAArchCode = [this] {
|
||||
switch (GPU) {
|
||||
case CudaArch::LAST:
|
||||
break;
|
||||
case CudaArch::UNKNOWN:
|
||||
assert(false && "No GPU arch when compiling CUDA device code.");
|
||||
return "";
|
||||
case CudaArch::SM_20:
|
||||
return "200";
|
||||
case CudaArch::SM_21:
|
||||
return "210";
|
||||
case CudaArch::SM_30:
|
||||
return "300";
|
||||
case CudaArch::SM_32:
|
||||
return "320";
|
||||
case CudaArch::SM_35:
|
||||
return "350";
|
||||
case CudaArch::SM_37:
|
||||
return "370";
|
||||
case CudaArch::SM_50:
|
||||
return "500";
|
||||
case CudaArch::SM_52:
|
||||
return "520";
|
||||
case CudaArch::SM_53:
|
||||
return "530";
|
||||
case CudaArch::SM_60:
|
||||
return "600";
|
||||
case CudaArch::SM_61:
|
||||
return "610";
|
||||
case CudaArch::SM_62:
|
||||
return "620";
|
||||
case CudaArch::SM_70:
|
||||
return "700";
|
||||
case CudaArch::SM_72:
|
||||
return "720";
|
||||
}
|
||||
llvm_unreachable("unhandled CudaArch");
|
||||
}();
|
||||
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
|
||||
}
|
||||
if (Opts.CUDAIsDevice)
|
||||
defineCudaArchMacro(GPU, Builder);
|
||||
}
|
||||
|
||||
ArrayRef<Builtin::Info> NVPTXTargetInfo::getTargetBuiltins() const {
|
||||
|
|
|
@ -31,6 +31,32 @@
|
|||
// RUN: | FileCheck -check-prefixes=COMMON,SM62 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=sm_70 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,SM70 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx600 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX600 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx601 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX601 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx700 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX700 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx701 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX701 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx702 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX702 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx703 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX703 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx704 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX704 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx801 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX801 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx802 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX802 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx803 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX803 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx810 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX810 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx900 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX900 %s
|
||||
// RUN: %clang -### -target x86_64-linux-gnu -c --cuda-gpu-arch=gfx902 %s 2>&1 \
|
||||
// RUN: | FileCheck -check-prefixes=COMMON,GFX902 %s
|
||||
|
||||
// COMMON: ptxas
|
||||
// COMMON-SAME: -m64
|
||||
|
@ -49,3 +75,16 @@
|
|||
// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61
|
||||
// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62
|
||||
// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70
|
||||
// GFX600:--image=profile=gfx600{{.*}}--image=profile=compute_amdgcn
|
||||
// GFX601:--image=profile=gfx601{{.*}}--image=profile=compute_amdgcn
|
||||
// GFX700:--image=profile=gfx700{{.*}}--image=profile=compute_amdgcn
|
||||
// GFX701:--image=profile=gfx701{{.*}}--image=profile=compute_amdgcn
|
||||
// GFX702:--image=profile=gfx702{{.*}}--image=profile=compute_amdgcn
|
||||
// GFX703:--image=profile=gfx703{{.*}}--image=profile=compute_amdgcn
|
||||
// GFX704:--image=profile=gfx704{{.*}}--image=profile=compute_amdgcn
|
||||
// GFX801:--image=profile=gfx801{{.*}}--image=profile=compute_amdgcn
|
||||
// GFX802:--image=profile=gfx802{{.*}}--image=profile=compute_amdgcn
|
||||
// GFX803:--image=profile=gfx803{{.*}}--image=profile=compute_amdgcn
|
||||
// GFX810:--image=profile=gfx810{{.*}}--image=profile=compute_amdgcn
|
||||
// GFX900:--image=profile=gfx900{{.*}}--image=profile=compute_amdgcn
|
||||
// GFX902:--image=profile=gfx902{{.*}}--image=profile=compute_amdgcn
|
||||
|
|
Loading…
Reference in New Issue