vstore: Cleanup and add vstore(half)

Add missing undefs
Make helpers amdgpu specific (NVPTX uses different numbering for private AS)
Use clang builtins on clang >= 6

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Tom Stellard <tstellar@redhat.com>
llvm-svn: 312838
This commit is contained in:
Jan Vesely 2017-09-08 23:58:57 +00:00
parent b9dbaae3fb
commit 661ac03a1b
6 changed files with 49 additions and 16 deletions

View File

@ -0,0 +1 @@
shared/vstore_half_helpers.ll

View File

@ -0,0 +1 @@
shared/vstore_half_helpers.ll

View File

@ -29,7 +29,6 @@
_CLC_VECTOR_VSTORE_PRIM3(_half, half, float)
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64: enable
_CLC_VECTOR_VSTORE_PRIM1(double)
_CLC_VECTOR_VSTORE_PRIM3(_half, half, double)
_CLC_VSTORE_DECL(_half, half, double, , __private)
@ -37,7 +36,17 @@
_CLC_VSTORE_DECL(_half, half, double, , __global)
#endif
#ifdef cl_khr_fp16
_CLC_VECTOR_VSTORE_PRIM1(half)
#endif
_CLC_VECTOR_VSTORE_PRIM()
_CLC_VSTORE_DECL(_half, half, float, , __private)
_CLC_VSTORE_DECL(_half, half, float, , __local)
_CLC_VSTORE_DECL(_half, half, float, , __global)
#undef _CLC_VSTORE_DECL
#undef _CLC_VECTOR_VSTORE_DECL
#undef _CLC_VECTOR_VSTORE_PRIM3
#undef _CLC_VECTOR_VSTORE_PRIM1
#undef _CLC_VECTOR_VSTORE_PRIM

View File

@ -147,7 +147,6 @@ shared/max.cl
shared/min.cl
shared/vload.cl
shared/vstore.cl
shared/vstore_half_helpers.ll
workitem/get_global_id.cl
workitem/get_global_size.cl
image/get_image_dim.cl

View File

@ -50,23 +50,34 @@ VSTORE_TYPES()
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
VSTORE_ADDR_SPACES(double)
#endif
/* vstore_half are legal even without cl_khr_fp16 */
#define DECLARE_HELPER(STYPE, AS) void __clc_vstore_half_##STYPE##_helper##AS(STYPE, AS half *);
DECLARE_HELPER(float, __private);
DECLARE_HELPER(float, __global);
DECLARE_HELPER(float, __local);
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
DECLARE_HELPER(double, __private);
DECLARE_HELPER(double, __global);
DECLARE_HELPER(double, __local);
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
VSTORE_ADDR_SPACES(half)
#endif
/* vstore_half are legal even without cl_khr_fp16 */
#if __clang_major__ < 6
#define DECLARE_HELPER(STYPE, AS, builtin) void __clc_vstore_half_##STYPE##_helper##AS(STYPE, AS half *);
#else
#define DECLARE_HELPER(STYPE, AS, __builtin) \
inline void __clc_vstore_half_##STYPE##_helper##AS(STYPE s, AS half *d) \
{ \
__builtin(s, d); \
}
#endif
DECLARE_HELPER(float, __private, __builtin_store_halff);
DECLARE_HELPER(float, __global, __builtin_store_halff);
DECLARE_HELPER(float, __local, __builtin_store_halff);
#ifdef cl_khr_fp64
DECLARE_HELPER(double, __private, __builtin_store_half);
DECLARE_HELPER(double, __global, __builtin_store_half);
DECLARE_HELPER(double, __local, __builtin_store_half);
#endif
#define VEC_STORE1(STYPE, AS, val) __clc_vstore_half_##STYPE##_helper##AS (val, &mem[offset++]);
#define VEC_STORE2(STYPE, AS, val) \
VEC_STORE1(STYPE, AS, val.lo) \
VEC_STORE1(STYPE, AS, val.hi)
@ -94,4 +105,16 @@ DECLARE_HELPER(double, __local);
#define __CLC_BODY "vstore_half.inc"
#include <clc/math/gentype.inc>
#undef __CLC_BODY
#undef FUNC
#undef __FUNC
#undef VEC_LOAD16
#undef VEC_LOAD8
#undef VEC_LOAD4
#undef VEC_LOAD3
#undef VEC_LOAD2
#undef VEC_LOAD1
#undef DECLARE_HELPER
#undef VSTORE_TYPES
#undef VSTORE_ADDR_SPACES
#undef VSTORE_VECTORIZE