Add initial support for half precision builtins
v2: fix fmax implementation use consistent checks for __CLC_FP_SIZE add missing TODOs fix whitespace in definitions.h v3: undef ZERO in modf.inc Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> reviewer: Jeroen Ketema <j.ketema@xs4all.nl> Reviewed-by: Aaron Watry <awatry@gmail.com> Tested-by: Aaron Watry <awatry@gmail.com> llvm-svn: 332677
This commit is contained in:
parent
c762666e87
commit
70a270da5f
|
@ -25,6 +25,21 @@ _CLC_DEF _CLC_OVERLOAD double fmax(double x, double y)
|
|||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmax, double, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y)
|
||||
{
|
||||
if (isnan(x))
|
||||
return y;
|
||||
if (isnan(y))
|
||||
return x;
|
||||
return (y < x) ? x : y;
|
||||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half)
|
||||
|
||||
#endif
|
||||
|
||||
#define __CLC_BODY <../../../generic/lib/math/fmax.inc>
|
||||
|
|
|
@ -25,6 +25,21 @@ _CLC_DEF _CLC_OVERLOAD double fmin(double x, double y)
|
|||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmin, double, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y)
|
||||
{
|
||||
if (isnan(x))
|
||||
return y;
|
||||
if (isnan(y))
|
||||
return x;
|
||||
return (y < x) ? y : x;
|
||||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half)
|
||||
|
||||
#endif
|
||||
|
||||
#define __CLC_BODY <../../../generic/lib/math/fmin.inc>
|
||||
|
|
|
@ -8,3 +8,8 @@ _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float)
|
|||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
_CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __clc_nextafter, double, double)
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
_CLC_DEFINE_BINARY_BUILTIN(half, nextafter, __clc_nextafter, half, half)
|
||||
#endif
|
||||
|
|
|
@ -26,6 +26,13 @@
|
|||
|
||||
_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
|
|
@ -66,3 +66,12 @@
|
|||
#define as_double8(x) __builtin_astype(x, double8)
|
||||
#define as_double16(x) __builtin_astype(x, double16)
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#define as_half(x) __builtin_astype(x, half)
|
||||
#define as_half2(x) __builtin_astype(x, half2)
|
||||
#define as_half3(x) __builtin_astype(x, half3)
|
||||
#define as_half4(x) __builtin_astype(x, half4)
|
||||
#define as_half8(x) __builtin_astype(x, half8)
|
||||
#define as_half16(x) __builtin_astype(x, half16)
|
||||
#endif
|
||||
|
|
|
@ -204,4 +204,29 @@
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16: enable
|
||||
|
||||
#define __CLC_GENTYPE half
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_GENTYPE
|
||||
|
||||
#define __CLC_GENTYPE half2
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_GENTYPE
|
||||
|
||||
#define __CLC_GENTYPE half4
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_GENTYPE
|
||||
|
||||
#define __CLC_GENTYPE half8
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_GENTYPE
|
||||
|
||||
#define __CLC_GENTYPE half16
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_GENTYPE
|
||||
|
||||
#endif
|
||||
|
||||
#undef __CLC_BODY
|
||||
|
|
|
@ -64,7 +64,7 @@
|
|||
#define M_SQRT1_2 0x1.6a09e667f3bcdp-1
|
||||
|
||||
#ifdef __CLC_INTERNAL
|
||||
#define M_LOG210 0x1.a934f0979a371p+1
|
||||
#define M_LOG210 0x1.a934f0979a371p+1
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -80,6 +80,11 @@
|
|||
#define HALF_MIN_10_EXP -4
|
||||
#define HALF_MIN_EXP -13
|
||||
|
||||
#define HALF_RADIX 2
|
||||
#define HALF_MAX 0x1.ffcp15h
|
||||
#define HALF_MIN 0x1.0p-14h
|
||||
#define HALF_EPSILON 0x1.0p-10h
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -53,4 +53,35 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef __FLOAT_ONLY
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
#define __CLC_FLOAT half
|
||||
#define __CLC_FPSIZE 16
|
||||
|
||||
#define __CLC_FLOATN half
|
||||
#define __CLC_SCALAR
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_FLOATN
|
||||
#undef __CLC_SCALAR
|
||||
|
||||
#define __CLC_FLOATN half2
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_FLOATN
|
||||
|
||||
#define __CLC_FLOATN half3
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_FLOATN
|
||||
|
||||
#define __CLC_FLOATN half4
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_FLOATN
|
||||
|
||||
#undef __CLC_FLOAT
|
||||
#undef __CLC_FPSIZE
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#undef __CLC_BODY
|
||||
|
|
|
@ -15,5 +15,15 @@ _CLC_OVERLOAD double8 __CLC_FUNCTION(double8, double8) __asm(__CLC_INTRINSIC ".v
|
|||
_CLC_OVERLOAD double16 __CLC_FUNCTION(double16, double16) __asm(__CLC_INTRINSIC ".v16f64");
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
_CLC_OVERLOAD half __CLC_FUNCTION(half, half) __asm(__CLC_INTRINSIC ".f16");
|
||||
_CLC_OVERLOAD half2 __CLC_FUNCTION(half2, half2) __asm(__CLC_INTRINSIC ".v2f16");
|
||||
_CLC_OVERLOAD half3 __CLC_FUNCTION(half3, half3) __asm(__CLC_INTRINSIC ".v3f16");
|
||||
_CLC_OVERLOAD half4 __CLC_FUNCTION(half4, half4) __asm(__CLC_INTRINSIC ".v4f16");
|
||||
_CLC_OVERLOAD half8 __CLC_FUNCTION(half8, half8) __asm(__CLC_INTRINSIC ".v8f16");
|
||||
_CLC_OVERLOAD half16 __CLC_FUNCTION(half16, half16) __asm(__CLC_INTRINSIC ".v16f16");
|
||||
#endif
|
||||
|
||||
#undef __CLC_FUNCTION
|
||||
#undef __CLC_INTRINSIC
|
||||
|
|
|
@ -110,6 +110,66 @@
|
|||
#undef __CLC_FPSIZE
|
||||
#undef __CLC_SCALAR_GENTYPE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef __FLOAT_ONLY
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
#define __CLC_SCALAR_GENTYPE half
|
||||
#define __CLC_FPSIZE 16
|
||||
|
||||
#define __CLC_SCALAR
|
||||
#define __CLC_GENTYPE half
|
||||
#define __CLC_INTN int
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_INTN
|
||||
#undef __CLC_SCALAR
|
||||
|
||||
#define __CLC_GENTYPE half2
|
||||
#define __CLC_INTN int2
|
||||
#define __CLC_VECSIZE 2
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_INTN
|
||||
|
||||
#define __CLC_GENTYPE half3
|
||||
#define __CLC_INTN int3
|
||||
#define __CLC_VECSIZE 3
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_INTN
|
||||
|
||||
#define __CLC_GENTYPE half4
|
||||
#define __CLC_INTN int4
|
||||
#define __CLC_VECSIZE 4
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_INTN
|
||||
|
||||
#define __CLC_GENTYPE half8
|
||||
#define __CLC_INTN int8
|
||||
#define __CLC_VECSIZE 8
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_INTN
|
||||
|
||||
#define __CLC_GENTYPE half16
|
||||
#define __CLC_INTN int16
|
||||
#define __CLC_VECSIZE 16
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_VECSIZE
|
||||
#undef __CLC_GENTYPE
|
||||
#undef __CLC_INTN
|
||||
|
||||
#undef __CLC_FPSIZE
|
||||
#undef __CLC_SCALAR_GENTYPE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#undef __CLC_BODY
|
||||
#endif
|
||||
|
|
|
@ -4,8 +4,10 @@
|
|||
|
||||
#if __CLC_FPSIZE == 64
|
||||
#define __CLC_NATN __CLC_XCONCAT(ulong, __CLC_VECSIZE)
|
||||
#else
|
||||
#elif __CLC_FPSIZE == 32
|
||||
#define __CLC_NATN __CLC_XCONCAT(uint, __CLC_VECSIZE)
|
||||
#elif __CLC_FPSIZE == 16
|
||||
#define __CLC_NATN __CLC_XCONCAT(ushort, __CLC_VECSIZE)
|
||||
#endif
|
||||
|
||||
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE nan(__CLC_NATN code);
|
||||
|
|
|
@ -15,5 +15,16 @@ _CLC_OVERLOAD double8 __CLC_FUNCTION(double8, double8, double8) __asm(__CLC_INTR
|
|||
_CLC_OVERLOAD double16 __CLC_FUNCTION(double16, double16, double16) __asm(__CLC_INTRINSIC ".v16f64");
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16: enable
|
||||
_CLC_OVERLOAD half __CLC_FUNCTION(half, half, half) __asm(__CLC_INTRINSIC ".f16");
|
||||
_CLC_OVERLOAD half2 __CLC_FUNCTION(half2, half2, half2) __asm(__CLC_INTRINSIC ".v2f16");
|
||||
_CLC_OVERLOAD half3 __CLC_FUNCTION(half3, half3, half3) __asm(__CLC_INTRINSIC ".v3f16");
|
||||
_CLC_OVERLOAD half4 __CLC_FUNCTION(half4, half4, half4) __asm(__CLC_INTRINSIC ".v4f16");
|
||||
_CLC_OVERLOAD half8 __CLC_FUNCTION(half8, half8, half8) __asm(__CLC_INTRINSIC ".v8f16");
|
||||
_CLC_OVERLOAD half16 __CLC_FUNCTION(half16, half16, half16) __asm(__CLC_INTRINSIC ".v16f16");
|
||||
#endif
|
||||
|
||||
|
||||
#undef __CLC_FUNCTION
|
||||
#undef __CLC_INTRINSIC
|
||||
|
|
|
@ -15,5 +15,15 @@ _CLC_OVERLOAD double8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64");
|
|||
_CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64");
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16: enable
|
||||
_CLC_OVERLOAD half __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16");
|
||||
_CLC_OVERLOAD half2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16");
|
||||
_CLC_OVERLOAD half3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16");
|
||||
_CLC_OVERLOAD half4 __CLC_FUNCTION(half4 d) __asm(__CLC_INTRINSIC ".v4f16");
|
||||
_CLC_OVERLOAD half8 __CLC_FUNCTION(half8 d) __asm(__CLC_INTRINSIC ".v8f16");
|
||||
_CLC_OVERLOAD half16 __CLC_FUNCTION(half16 d) __asm(__CLC_INTRINSIC ".v16f16");
|
||||
#endif
|
||||
|
||||
#undef __CLC_FUNCTION
|
||||
#undef __CLC_INTRINSIC
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#undef __CLC_INT
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
#define __CLC_FLOATN double
|
||||
#define __CLC_INTN int
|
||||
|
@ -76,6 +77,46 @@
|
|||
#undef __CLC_INTN
|
||||
#undef __CLC_FLOATN
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
#define __CLC_FLOATN half
|
||||
#define __CLC_INTN int
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_INTN
|
||||
#undef __CLC_FLOATN
|
||||
|
||||
#define __CLC_FLOATN half2
|
||||
#define __CLC_INTN short2
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_INTN
|
||||
#undef __CLC_FLOATN
|
||||
|
||||
#define __CLC_FLOATN half3
|
||||
#define __CLC_INTN short3
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_INTN
|
||||
#undef __CLC_FLOATN
|
||||
|
||||
#define __CLC_FLOATN half4
|
||||
#define __CLC_INTN short4
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_INTN
|
||||
#undef __CLC_FLOATN
|
||||
|
||||
#define __CLC_FLOATN half8
|
||||
#define __CLC_INTN short8
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_INTN
|
||||
#undef __CLC_FLOATN
|
||||
|
||||
#define __CLC_FLOATN half16
|
||||
#define __CLC_INTN short16
|
||||
#include __CLC_BODY
|
||||
#undef __CLC_INTN
|
||||
#undef __CLC_FLOATN
|
||||
|
||||
#endif
|
||||
|
||||
#undef __CLC_BODY
|
||||
|
|
|
@ -15,6 +15,10 @@ _CLC_VECTOR_ISEQUAL_DECL(float, int)
|
|||
_CLC_ISEQUAL_DECL(double, int)
|
||||
_CLC_VECTOR_ISEQUAL_DECL(double, long)
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
_CLC_ISEQUAL_DECL(half, int)
|
||||
_CLC_VECTOR_ISEQUAL_DECL(half, short)
|
||||
#endif
|
||||
|
||||
#undef _CLC_ISEQUAL_DECL
|
||||
#undef _CLC_VECTOR_ISEQUAL_DEC
|
||||
|
|
|
@ -17,5 +17,10 @@ _CLC_ISINF_DECL(int, double)
|
|||
_CLC_VECTOR_ISINF_DECL(long, double)
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
_CLC_ISINF_DECL(int, half)
|
||||
_CLC_VECTOR_ISINF_DECL(short, half)
|
||||
#endif
|
||||
|
||||
#undef _CLC_ISINF_DECL
|
||||
#undef _CLC_VECTOR_ISINF_DECL
|
||||
|
|
|
@ -17,5 +17,10 @@ _CLC_ISNAN_DECL(int, double)
|
|||
_CLC_VECTOR_ISNAN_DECL(long, double)
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
_CLC_ISNAN_DECL(int, half)
|
||||
_CLC_VECTOR_ISNAN_DECL(short, half)
|
||||
#endif
|
||||
|
||||
#undef _CLC_ISNAN_DECL
|
||||
#undef _CLC_VECTOR_ISNAN_DECL
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float, int);
|
||||
|
||||
#ifdef cl_khr_fp64
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
_CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int);
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
_CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int);
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(half, int);
|
||||
#endif
|
||||
|
|
|
@ -37,3 +37,25 @@ _CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
|
|||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) {
|
||||
return p0*p1;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) {
|
||||
return p0.x*p1.x + p0.y*p1.y;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) {
|
||||
return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) {
|
||||
return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -79,9 +79,47 @@ _CLC_OVERLOAD _CLC_DEF double length(double3 p) {
|
|||
V_DLENGTH(p);
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF double
|
||||
length(double4 p) {
|
||||
_CLC_OVERLOAD _CLC_DEF double length(double4 p) {
|
||||
V_DLENGTH(p);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF half length(half p){
|
||||
return fabs(p);
|
||||
}
|
||||
|
||||
// Only available in CLC1.2
|
||||
#ifndef HALF_MIN
|
||||
#define HALF_MIN 0x1.0p-14h
|
||||
#endif
|
||||
|
||||
#define V_HLENGTH(p) \
|
||||
half l2 = dot(p, p); \
|
||||
\
|
||||
if (l2 < HALF_MIN) { \
|
||||
p *= 0x1.0p+12h; \
|
||||
return sqrt(dot(p, p)) * 0x1.0p-12h; \
|
||||
} else if (l2 == INFINITY) { \
|
||||
p *= 0x1.0p-7h; \
|
||||
return sqrt(dot(p, p)) * 0x1.0p+7h; \
|
||||
} \
|
||||
\
|
||||
return sqrt(l2);
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF half length(half2 p) {
|
||||
V_HLENGTH(p);
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF half length(half3 p) {
|
||||
V_HLENGTH(p);
|
||||
}
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF half length(half4 p) {
|
||||
V_HLENGTH(p);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -11,10 +11,15 @@
|
|||
* precision of #4 may be better.
|
||||
*/
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
#define __CLC_CONST(x) x ## f
|
||||
#else
|
||||
// TODO: Enable half precision when atan2 is implemented
|
||||
#if __CLC_FPSIZE > 16
|
||||
|
||||
#if __CLC_FPSIZE == 64
|
||||
#define __CLC_CONST(x) x
|
||||
#elif __CLC_FPSIZE == 32
|
||||
#define __CLC_CONST(x) x ## f
|
||||
#elif __CLC_FPSIZE == 16
|
||||
#define __CLC_CONST(x) x ## h
|
||||
#endif
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) {
|
||||
|
@ -27,3 +32,5 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) {
|
|||
}
|
||||
|
||||
#undef __CLC_CONST
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,12 +1,18 @@
|
|||
// TODO: Enable half precision when atan2 is implemented
|
||||
#if __CLC_FPSIZE > 16
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
#define __CLC_CONST(x) x ## f
|
||||
#else
|
||||
#if __CLC_FPSIZE == 64
|
||||
#define __CLC_CONST(x) x
|
||||
#elif __CLC_FPSIZE == 32
|
||||
#define __CLC_CONST(x) x ## f
|
||||
#elif __CLC_FPSIZE == 16
|
||||
#define __CLC_CONST(x) x ## h
|
||||
#endif
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE asin(__CLC_GENTYPE x) {
|
||||
return atan2(x, sqrt( (__CLC_GENTYPE)__CLC_CONST(1.0) -(x*x) ));
|
||||
return atan2(x, sqrt( (__CLC_GENTYPE)__CLC_CONST(1.0) - (x*x) ));
|
||||
}
|
||||
|
||||
#undef __CLC_CONST
|
||||
|
||||
#endif
|
||||
|
|
|
@ -37,3 +37,10 @@ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_nextafter, float, flo
|
|||
NEXTAFTER(double, ulong, long)
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_nextafter, double, double)
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
NEXTAFTER(half, ushort, short)
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_nextafter, half, half)
|
||||
#endif
|
||||
|
|
|
@ -20,14 +20,15 @@
|
|||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
#define __CLC_NAN NAN
|
||||
#define ZERO 0.0f
|
||||
#elif __CLC_FPSIZE == 64
|
||||
#if __CLC_FPSIZE == 64
|
||||
#define __CLC_NAN __builtin_nan("")
|
||||
#define ZERO 0.0
|
||||
#else
|
||||
#error "Invalid value for __CLC_FPSIZE"
|
||||
#elif __CLC_FPSIZE == 32
|
||||
#define __CLC_NAN NAN
|
||||
#define ZERO 0.0f
|
||||
#elif __CLC_FPSIZE == 16
|
||||
#define __CLC_NAN (half)NAN
|
||||
#define ZERO 0.0h
|
||||
#endif
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) {
|
||||
|
|
|
@ -2,8 +2,11 @@
|
|||
|
||||
#define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)
|
||||
|
||||
// TODO: Enable half precision when the sw routine is implemented
|
||||
#if __CLC_FPSIZE > 16
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, __CLC_GENTYPE y) {
|
||||
return __CLC_SW_FUNC(__CLC_FUNC)(x, y);
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef __CLC_SW_FUNC
|
||||
|
|
|
@ -2,8 +2,11 @@
|
|||
|
||||
#define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)
|
||||
|
||||
// TODO: Enable half precision when the sw routine is implemented
|
||||
#if __CLC_FPSIZE > 16
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
|
||||
return __CLC_SW_FUNC(__CLC_FUNC)(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
#undef __CLC_SW_FUNC
|
||||
|
|
|
@ -10,3 +10,18 @@ _CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float)
|
|||
_CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD half copysign(half x, half y)
|
||||
{
|
||||
ushort sign_x = as_ushort(x) & 0x8000u;
|
||||
ushort unsigned_y = as_ushort(y) & 0x7ffffu;
|
||||
|
||||
return as_half((ushort)(sign_x | unsigned_y));
|
||||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, copysign, half, half)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -12,5 +12,21 @@ _CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double);
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y)
|
||||
{
|
||||
if (isnan(x))
|
||||
return y;
|
||||
if (isnan(y))
|
||||
return x;
|
||||
return (x < y) ? y : x;
|
||||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half)
|
||||
|
||||
#endif
|
||||
|
||||
#define __CLC_BODY <fmax.inc>
|
||||
#include <clc/math/gentype.inc>
|
||||
|
|
|
@ -15,4 +15,14 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, double y) {
|
|||
|
||||
#endif // ifdef cl_khr_fp64
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, half y) {
|
||||
return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
|
||||
}
|
||||
|
||||
#endif // ifdef cl_khr_fp16
|
||||
|
||||
#endif // !defined(__CLC_SCALAR)
|
||||
|
|
|
@ -10,6 +10,21 @@ _CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float);
|
|||
|
||||
_CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double);
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y)
|
||||
{
|
||||
if (isnan(x))
|
||||
return y;
|
||||
if (isnan(y))
|
||||
return x;
|
||||
return (y < x) ? y : x;
|
||||
}
|
||||
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half)
|
||||
|
||||
#endif
|
||||
|
||||
#define __CLC_BODY <fmin.inc>
|
||||
|
|
|
@ -15,4 +15,14 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, double y) {
|
|||
|
||||
#endif // ifdef cl_khr_fp64
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, half y) {
|
||||
return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
|
||||
}
|
||||
|
||||
#endif // ifdef cl_khr_fp16
|
||||
|
||||
#endif // !defined(__CLC_SCALAR)
|
||||
|
|
|
@ -20,16 +20,21 @@
|
|||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#if __CLC_FPSIZE == 32
|
||||
#define MIN_CONSTANT 0x1.fffffep-1f
|
||||
#else
|
||||
#if __CLC_FPSIZE == 64
|
||||
#define MIN_CONSTANT 0x1.fffffffffffffp-1
|
||||
#define ZERO 0.0
|
||||
#elif __CLC_FPSIZE == 32
|
||||
#define MIN_CONSTANT 0x1.fffffep-1f
|
||||
#define ZERO 0.0f
|
||||
#elif __CLC_FPSIZE == 16
|
||||
#define MIN_CONSTANT 0x1.ffcp-1h
|
||||
#define ZERO 0.0h
|
||||
#endif
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) {
|
||||
*iptr = floor(x);
|
||||
__CLC_GENTYPE r = fmin(x - *iptr, MIN_CONSTANT);
|
||||
r = isinf(x) ? 0.0f : r;
|
||||
r = isinf(x) ? ZERO : r;
|
||||
r = isnan(x) ? x : r;
|
||||
return r;
|
||||
}
|
||||
|
@ -47,3 +52,4 @@ FRACT_DEF(local);
|
|||
FRACT_DEF(global);
|
||||
|
||||
#undef MIN_CONSTANT
|
||||
#undef ZERO
|
||||
|
|
|
@ -33,7 +33,13 @@ _CLC_DEFINE_BINARY_BUILTIN(float, ldexp, __clc_ldexp, float, int)
|
|||
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||||
|
||||
_CLC_DEFINE_BINARY_BUILTIN(double, ldexp, __clc_ldexp, double, int)
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEFINE_BINARY_BUILTIN(half, ldexp, __clc_ldexp, half, int)
|
||||
#endif
|
||||
|
||||
// This defines all the ldexp(GENTYPE, int) variants
|
||||
|
|
|
@ -20,6 +20,9 @@
|
|||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// TODO: Enable half precision when ldexp is implemented.
|
||||
#if __CLC_FPSIZE > 16
|
||||
|
||||
#ifndef __CLC_SCALAR
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n) {
|
||||
|
@ -27,3 +30,5 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n) {
|
|||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -21,10 +21,12 @@
|
|||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
// TODO: Enable half precision when the base version is implemented.
|
||||
#if __CLC_FPSIZE > 16
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) {
|
||||
__CLC_INTN private_iptr;
|
||||
__CLC_GENTYPE ret = lgamma_r(x, &private_iptr);
|
||||
*iptr = private_iptr;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -20,9 +20,17 @@
|
|||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#if __CLC_FPSIZE == 64
|
||||
#define ZERO 0.0
|
||||
#elif __CLC_FPSIZE == 32
|
||||
#define ZERO 0.0f
|
||||
#elif __CLC_FPSIZE == 16
|
||||
#define ZERO 0.0h
|
||||
#endif
|
||||
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr) {
|
||||
*iptr = trunc(x);
|
||||
return copysign(isinf(x) ? 0.0f : x - *iptr, x);
|
||||
return copysign(isinf(x) ? ZERO : x - *iptr, x);
|
||||
}
|
||||
|
||||
#define MODF_DEF(addrspace) \
|
||||
|
@ -35,3 +43,5 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr)
|
|||
|
||||
MODF_DEF(local);
|
||||
MODF_DEF(global);
|
||||
|
||||
#undef ZERO
|
||||
|
|
|
@ -7,11 +7,18 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ulong, __CLC_VECSIZE) cod
|
|||
{
|
||||
return __CLC_AS_GENTYPE(code | 0x7ff0000000000000ul);
|
||||
}
|
||||
#else
|
||||
#elif __CLC_FPSIZE == 32
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(uint, __CLC_VECSIZE) code)
|
||||
{
|
||||
return __CLC_AS_GENTYPE(code | 0x7fc00000);
|
||||
}
|
||||
#elif __CLC_FPSIZE == 16
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ushort, __CLC_VECSIZE) code)
|
||||
{
|
||||
const ushort mask = 0x7e00;
|
||||
const __CLC_XCONCAT(ushort, __CLC_VECSIZE) res = code | mask;
|
||||
return __CLC_AS_GENTYPE(res);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// TODO: Enable half precision when the sw routine is implemented
|
||||
#if __CLC_FPSIZE > 16
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pown(__CLC_GENTYPE x, __CLC_INTN y) {
|
||||
return __clc_pown(x, y);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
// TODO: Enable half precision when the sw routine is implemented
|
||||
#if __CLC_FPSIZE > 16
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) {
|
||||
__CLC_INTN local_q;
|
||||
__CLC_GENTYPE ret = __clc_remquo(x, y, &local_q);
|
||||
*q = local_q;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// TODO: Enable half precision when the sw routine is implemented
|
||||
#if __CLC_FPSIZE > 16
|
||||
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rootn(__CLC_GENTYPE x, __CLC_INTN y) {
|
||||
return __clc_rootn(x, y);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
// TODO: Enable half precision when sin/cos is implemented
|
||||
#if __CLC_FPSIZE > 16
|
||||
#define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \
|
||||
_CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \
|
||||
*cosval = cos(x); \
|
||||
|
@ -9,3 +11,4 @@ __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE)
|
|||
__CLC_DECLARE_SINCOS(private, __CLC_GENTYPE)
|
||||
|
||||
#undef __CLC_DECLARE_SINCOS
|
||||
#endif
|
||||
|
|
|
@ -33,3 +33,11 @@ _CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
|
|||
_CLC_DEFINE_UNARY_BUILTIN(double, sqrt, __clc_sqrt, double)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -26,5 +26,19 @@ _CLC_DEFINE_ISEQUAL(long8, isequal, double8, double8)
|
|||
_CLC_DEFINE_ISEQUAL(long16, isequal, double16, double16)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#undef _CLC_DEFINE_ISEQUAL
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of isequal(half) returns an int, but the vector versions
|
||||
// return short.
|
||||
_CLC_DEFINE_ISEQUAL(int, isequal, half, half)
|
||||
_CLC_DEFINE_ISEQUAL(short2, isequal, half2, half2)
|
||||
_CLC_DEFINE_ISEQUAL(short3, isequal, half3, half3)
|
||||
_CLC_DEFINE_ISEQUAL(short4, isequal, half4, half4)
|
||||
_CLC_DEFINE_ISEQUAL(short8, isequal, half8, half8)
|
||||
_CLC_DEFINE_ISEQUAL(short16, isequal, half16, half16)
|
||||
|
||||
#endif
|
||||
|
||||
#undef _CLC_DEFINE_ISEQUAL
|
||||
|
|
|
@ -16,3 +16,16 @@ _CLC_DEF _CLC_OVERLOAD int isfinite(double x) {
|
|||
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isfinite, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of isfinite(half) returns an int, but the vector versions
|
||||
// return short.
|
||||
_CLC_DEF _CLC_OVERLOAD int isfinite(half x) {
|
||||
return __builtin_isfinite(x);
|
||||
}
|
||||
|
||||
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isfinite, half)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -20,3 +20,18 @@ _CLC_DEF _CLC_OVERLOAD int isgreater(double x, double y){
|
|||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreater, double, double)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of isgreater(half, half) returns an int, but the vector versions
|
||||
// return short.
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD int isgreater(half x, half y){
|
||||
return __builtin_isgreater(x, y);
|
||||
}
|
||||
|
||||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isgreater, half, half)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int isgreaterequal(double x, double y){
|
|||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreaterequal, double, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of isgreaterequal(half, half) returns an int, but the vector versions
|
||||
// return short.
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD int isgreaterequal(half x, half y){
|
||||
return __builtin_isgreaterequal(x, y);
|
||||
}
|
||||
|
||||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isgreaterequal, half, half)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -14,5 +14,17 @@ _CLC_DEF _CLC_OVERLOAD int isinf(double x) {
|
|||
}
|
||||
|
||||
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isinf, double)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of isinf(half) returns an int, but the vector versions
|
||||
// return short.
|
||||
_CLC_DEF _CLC_OVERLOAD int isinf(half x) {
|
||||
return __builtin_isinf(x);
|
||||
}
|
||||
|
||||
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isinf, half)
|
||||
#endif
|
||||
|
|
|
@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int isless(double x, double y){
|
|||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isless, double, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of isless(half, half) returns an int, but the vector versions
|
||||
// return short.
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD int isless(half x, half y){
|
||||
return __builtin_isless(x, y);
|
||||
}
|
||||
|
||||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isless, half, half)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int islessequal(double x, double y){
|
|||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessequal, double, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of islessequal(half, half) returns an int, but the vector versions
|
||||
// return short.
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD int islessequal(half x, half y){
|
||||
return __builtin_islessequal(x, y);
|
||||
}
|
||||
|
||||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessequal, half, half)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int islessgreater(double x, double y){
|
|||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessgreater, double, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of islessgreater(half, half) returns an int, but the vector versions
|
||||
// return short.
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD int islessgreater(half x, half y){
|
||||
return __builtin_islessgreater(x, y);
|
||||
}
|
||||
|
||||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessgreater, half, half)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -16,3 +16,17 @@ _CLC_DEF _CLC_OVERLOAD int isnan(double x) {
|
|||
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnan, double)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of isnan(half) returns an int, but the vector versions
|
||||
// return short.
|
||||
_CLC_DEF _CLC_OVERLOAD int isnan(half x) {
|
||||
return __builtin_isnan(x);
|
||||
}
|
||||
|
||||
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnan, half)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -16,3 +16,16 @@ _CLC_DEF _CLC_OVERLOAD int isnormal(double x) {
|
|||
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnormal, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of isnormal(half) returns an int, but the vector versions
|
||||
// return short.
|
||||
_CLC_DEF _CLC_OVERLOAD int isnormal(half x) {
|
||||
return __builtin_isnormal(x);
|
||||
}
|
||||
|
||||
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnormal, half)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -18,6 +18,16 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isnotequal, float, float)
|
|||
_CLC_DEFINE_ISNOTEQUAL(int, isnotequal, double, double)
|
||||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isnotequal, double, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of isnotequal(half, half) returns an int, but the vector versions
|
||||
// return short.
|
||||
|
||||
_CLC_DEFINE_ISNOTEQUAL(int, isnotequal, half, half)
|
||||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isnotequal, half, half)
|
||||
|
||||
#endif
|
||||
|
||||
#undef _CLC_DEFINE_ISNOTEQUAL
|
||||
|
|
|
@ -18,6 +18,16 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isordered, float, float)
|
|||
_CLC_DEFINE_ISORDERED(int, isordered, double, double)
|
||||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isordered, double, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of isordered(half, half) returns an int, but the vector versions
|
||||
// return short.
|
||||
|
||||
_CLC_DEFINE_ISORDERED(int, isordered, half, half)
|
||||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isordered, half, half)
|
||||
|
||||
#endif
|
||||
|
||||
#undef _CLC_DEFINE_ISORDERED
|
||||
|
|
|
@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int isunordered(double x, double y){
|
|||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isunordered, double, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of isunordered(half, half) returns an int, but the vector versions
|
||||
// return short.
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD int isunordered(half x, half y){
|
||||
return __builtin_isunordered(x, y);
|
||||
}
|
||||
|
||||
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isunordered, half, half)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -17,3 +17,17 @@ _CLC_DEF _CLC_OVERLOAD int signbit(double x){
|
|||
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, signbit, double)
|
||||
|
||||
#endif
|
||||
#ifdef cl_khr_fp16
|
||||
|
||||
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
|
||||
|
||||
// The scalar version of signbit(half) returns an int, but the vector versions
|
||||
// return short.
|
||||
|
||||
_CLC_DEF _CLC_OVERLOAD int signbit(half x){
|
||||
return __builtin_signbit(x);
|
||||
}
|
||||
|
||||
_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, signbit, half)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
|
||||
// This does not exist for fp16
|
||||
#if __CLC_FPSIZE > 16
|
||||
#ifdef __CLC_VECSIZE
|
||||
|
||||
#if __CLC_VECSIZE == 3
|
||||
|
@ -17,3 +18,4 @@
|
|||
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
|
||||
FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
|
||||
#endif
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue