487 lines
14 KiB
Python
487 lines
14 KiB
Python
# OpenCL built-in library: type conversion functions
|
|
#
|
|
# Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
|
|
# Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
# in the Software without restriction, including without limitation the rights
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
# furnished to do so, subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in
|
|
# all copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
# THE SOFTWARE.
|
|
|
|
# This script generates the file convert_type.cl, which contains all of the
|
|
# OpenCL functions in the form:
|
|
#
|
|
# convert_<destTypen><_sat><_roundingMode>(<sourceTypen>)
|
|
|
|
types = [
|
|
"char",
|
|
"uchar",
|
|
"short",
|
|
"ushort",
|
|
"int",
|
|
"uint",
|
|
"long",
|
|
"ulong",
|
|
"float",
|
|
"double",
|
|
]
|
|
int_types = ["char", "uchar", "short", "ushort", "int", "uint", "long", "ulong"]
|
|
unsigned_types = ["uchar", "ushort", "uint", "ulong"]
|
|
float_types = ["float", "double"]
|
|
int64_types = ["long", "ulong"]
|
|
float64_types = ["double"]
|
|
vector_sizes = ["", "2", "3", "4", "8", "16"]
|
|
half_sizes = [("2", ""), ("4", "2"), ("8", "4"), ("16", "8")]
|
|
|
|
saturation = ["", "_sat"]
|
|
rounding_modes = ["_rtz", "_rte", "_rtp", "_rtn"]
|
|
float_prefix = {"float": "FLT_", "double": "DBL_"}
|
|
float_suffix = {"float": "f", "double": ""}
|
|
|
|
bool_type = {
|
|
"char": "char",
|
|
"uchar": "char",
|
|
"short": "short",
|
|
"ushort": "short",
|
|
"int": "int",
|
|
"uint": "int",
|
|
"long": "long",
|
|
"ulong": "long",
|
|
"float": "int",
|
|
"double": "long",
|
|
}
|
|
|
|
unsigned_type = {
|
|
"char": "uchar",
|
|
"uchar": "uchar",
|
|
"short": "ushort",
|
|
"ushort": "ushort",
|
|
"int": "uint",
|
|
"uint": "uint",
|
|
"long": "ulong",
|
|
"ulong": "ulong",
|
|
}
|
|
|
|
sizeof_type = {
|
|
"char": 1,
|
|
"uchar": 1,
|
|
"short": 2,
|
|
"ushort": 2,
|
|
"int": 4,
|
|
"uint": 4,
|
|
"long": 8,
|
|
"ulong": 8,
|
|
"float": 4,
|
|
"double": 8,
|
|
}
|
|
|
|
limit_max = {
|
|
"char": "CHAR_MAX",
|
|
"uchar": "UCHAR_MAX",
|
|
"short": "SHRT_MAX",
|
|
"ushort": "USHRT_MAX",
|
|
"int": "INT_MAX",
|
|
"uint": "UINT_MAX",
|
|
"long": "LONG_MAX",
|
|
"ulong": "ULONG_MAX",
|
|
}
|
|
|
|
limit_min = {
|
|
"char": "CHAR_MIN",
|
|
"uchar": "0",
|
|
"short": "SHRT_MIN",
|
|
"ushort": "0",
|
|
"int": "INT_MIN",
|
|
"uint": "0",
|
|
"long": "LONG_MIN",
|
|
"ulong": "0",
|
|
}
|
|
|
|
|
|
def conditional_guard(src, dst):
|
|
int64_count = 0
|
|
float64_count = 0
|
|
if src in int64_types:
|
|
int64_count = int64_count + 1
|
|
elif src in float64_types:
|
|
float64_count = float64_count + 1
|
|
if dst in int64_types:
|
|
int64_count = int64_count + 1
|
|
elif dst in float64_types:
|
|
float64_count = float64_count + 1
|
|
if float64_count > 0:
|
|
# In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be
|
|
print("#ifdef cl_khr_fp64")
|
|
return True
|
|
elif int64_count > 0:
|
|
print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)")
|
|
return True
|
|
return False
|
|
|
|
|
|
print(
|
|
"""/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!!
|
|
|
|
DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN:
|
|
$ ./generate-conversion-type-cl.sh
|
|
|
|
OpenCL type conversion functions
|
|
|
|
Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
|
|
Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
|
|
#include <clc/clc.h>
|
|
|
|
#ifdef cl_khr_fp64
|
|
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
|
|
|
#if defined(__EMBEDDED_PROFILE__) && !defined(cles_khr_int64)
|
|
#error Embedded profile that supports cl_khr_fp64 also has to support cles_khr_int64
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#ifdef cles_khr_int64
|
|
#pragma OPENCL EXTENSION cles_khr_int64 : enable
|
|
#endif
|
|
|
|
"""
|
|
)
|
|
|
|
#
|
|
# Default Conversions
|
|
#
|
|
# All conversions are in accordance with the OpenCL specification,
|
|
# which cites the C99 conversion rules.
|
|
#
|
|
# Casting from floating point to integer results in conversions
|
|
# with truncation, so it should be suitable for the default convert
|
|
# functions.
|
|
#
|
|
# Conversions from integer to floating-point, and floating-point to
|
|
# floating-point through casting is done with the default rounding
|
|
# mode. While C99 allows dynamically changing the rounding mode
|
|
# during runtime, it is not a supported feature in OpenCL according
|
|
# to Section 7.1 - Rounding Modes in the OpenCL 1.2 specification.
|
|
#
|
|
# Therefore, we can assume for optimization purposes that the
|
|
# rounding mode is fixed to round-to-nearest-even. Platform target
|
|
# authors should ensure that the rounding-control registers remain
|
|
# in this state, and that this invariant holds.
|
|
#
|
|
# Also note, even though the OpenCL specification isn't entirely
|
|
# clear on this matter, we implement all rounding mode combinations
|
|
# even for integer-to-integer conversions. When such a conversion
|
|
# is used, the rounding mode is ignored.
|
|
#
|
|
|
|
|
|
def generate_default_conversion(src, dst, mode):
|
|
close_conditional = conditional_guard(src, dst)
|
|
|
|
# scalar conversions
|
|
print(
|
|
"""_CLC_DEF _CLC_OVERLOAD
|
|
{DST} convert_{DST}{M}({SRC} x)
|
|
{{
|
|
return ({DST})x;
|
|
}}
|
|
""".format(
|
|
SRC=src, DST=dst, M=mode
|
|
)
|
|
)
|
|
|
|
# vector conversions, done through decomposition to components
|
|
for size, half_size in half_sizes:
|
|
print(
|
|
"""_CLC_DEF _CLC_OVERLOAD
|
|
{DST}{N} convert_{DST}{N}{M}({SRC}{N} x)
|
|
{{
|
|
return ({DST}{N})(convert_{DST}{H}(x.lo), convert_{DST}{H}(x.hi));
|
|
}}
|
|
""".format(
|
|
SRC=src, DST=dst, N=size, H=half_size, M=mode
|
|
)
|
|
)
|
|
|
|
# 3-component vector conversions
|
|
print(
|
|
"""_CLC_DEF _CLC_OVERLOAD
|
|
{DST}3 convert_{DST}3{M}({SRC}3 x)
|
|
{{
|
|
return ({DST}3)(convert_{DST}2(x.s01), convert_{DST}(x.s2));
|
|
}}""".format(
|
|
SRC=src, DST=dst, M=mode
|
|
)
|
|
)
|
|
|
|
if close_conditional:
|
|
print("#endif")
|
|
|
|
|
|
for src in types:
|
|
for dst in types:
|
|
generate_default_conversion(src, dst, "")
|
|
|
|
for src in int_types:
|
|
for dst in int_types:
|
|
for mode in rounding_modes:
|
|
generate_default_conversion(src, dst, mode)
|
|
|
|
#
|
|
# Saturated Conversions To Integers
|
|
#
|
|
# These functions are dependent on the unsaturated conversion functions
|
|
# generated above, and use clamp, max, min, and select to eliminate
|
|
# branching and vectorize the conversions.
|
|
#
|
|
# Again, as above, we allow all rounding modes for integer-to-integer
|
|
# conversions with saturation.
|
|
#
|
|
|
|
|
|
def generate_saturated_conversion(src, dst, size):
|
|
# Header
|
|
close_conditional = conditional_guard(src, dst)
|
|
print(
|
|
"""_CLC_DEF _CLC_OVERLOAD
|
|
{DST}{N} convert_{DST}{N}_sat({SRC}{N} x)
|
|
{{""".format(
|
|
DST=dst, SRC=src, N=size
|
|
)
|
|
)
|
|
|
|
# FIXME: This is a work around for lack of select function with
|
|
# signed third argument when the first two arguments are unsigned types.
|
|
# We cast to the signed type for sign-extension, then do a bitcast to
|
|
# the unsigned type.
|
|
if dst in unsigned_types:
|
|
bool_prefix = "as_{DST}{N}(convert_{BOOL}{N}".format(
|
|
DST=dst, BOOL=bool_type[dst], N=size
|
|
)
|
|
bool_suffix = ")"
|
|
else:
|
|
bool_prefix = "convert_{BOOL}{N}".format(BOOL=bool_type[dst], N=size)
|
|
bool_suffix = ""
|
|
|
|
# Body
|
|
if src == dst:
|
|
|
|
# Conversion between same types
|
|
print(" return x;")
|
|
|
|
elif src in float_types:
|
|
|
|
# Conversion from float to int
|
|
print(
|
|
""" {DST}{N} y = convert_{DST}{N}(x);
|
|
y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
|
|
y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
|
|
return y;""".format(
|
|
SRC=src,
|
|
DST=dst,
|
|
N=size,
|
|
DST_MIN=limit_min[dst],
|
|
DST_MAX=limit_max[dst],
|
|
BP=bool_prefix,
|
|
BS=bool_suffix,
|
|
)
|
|
)
|
|
|
|
else:
|
|
|
|
# Integer to integer convesion with sizeof(src) == sizeof(dst)
|
|
if sizeof_type[src] == sizeof_type[dst]:
|
|
if src in unsigned_types:
|
|
print(
|
|
" x = min(x, ({SRC}){DST_MAX});".format(
|
|
SRC=src, DST_MAX=limit_max[dst]
|
|
)
|
|
)
|
|
else:
|
|
print(" x = max(x, ({SRC})0);".format(SRC=src))
|
|
|
|
# Integer to integer conversion where sizeof(src) > sizeof(dst)
|
|
elif sizeof_type[src] > sizeof_type[dst]:
|
|
if src in unsigned_types:
|
|
print(
|
|
" x = min(x, ({SRC}){DST_MAX});".format(
|
|
SRC=src, DST_MAX=limit_max[dst]
|
|
)
|
|
)
|
|
else:
|
|
print(
|
|
" x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});".format(
|
|
SRC=src, DST_MIN=limit_min[dst], DST_MAX=limit_max[dst]
|
|
)
|
|
)
|
|
|
|
# Integer to integer conversion where sizeof(src) < sizeof(dst)
|
|
elif src not in unsigned_types and dst in unsigned_types:
|
|
print(" x = max(x, ({SRC})0);".format(SRC=src))
|
|
|
|
print(" return convert_{DST}{N}(x);".format(DST=dst, N=size))
|
|
|
|
# Footer
|
|
print("}")
|
|
if close_conditional:
|
|
print("#endif")
|
|
|
|
|
|
for src in types:
|
|
for dst in int_types:
|
|
for size in vector_sizes:
|
|
generate_saturated_conversion(src, dst, size)
|
|
|
|
|
|
def generate_saturated_conversion_with_rounding(src, dst, size, mode):
|
|
# Header
|
|
close_conditional = conditional_guard(src, dst)
|
|
|
|
# Body
|
|
print(
|
|
"""_CLC_DEF _CLC_OVERLOAD
|
|
{DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x)
|
|
{{
|
|
return convert_{DST}{N}_sat(x);
|
|
}}
|
|
""".format(
|
|
DST=dst, SRC=src, N=size, M=mode
|
|
)
|
|
)
|
|
|
|
# Footer
|
|
if close_conditional:
|
|
print("#endif")
|
|
|
|
|
|
for src in int_types:
|
|
for dst in int_types:
|
|
for size in vector_sizes:
|
|
for mode in rounding_modes:
|
|
generate_saturated_conversion_with_rounding(src, dst, size, mode)
|
|
|
|
#
|
|
# Conversions To/From Floating-Point With Rounding
|
|
#
|
|
# Note that we assume as above that casts from floating-point to
|
|
# integer are done with truncation, and that the default rounding
|
|
# mode is fixed to round-to-nearest-even, as per C99 and OpenCL
|
|
# rounding rules.
|
|
#
|
|
# These functions rely on the use of abs, ceil, fabs, floor,
|
|
# nextafter, sign, rint and the above generated conversion functions.
|
|
#
|
|
# Only conversions to integers can have saturation.
|
|
#
|
|
|
|
|
|
def generate_float_conversion(src, dst, size, mode, sat):
|
|
# Header
|
|
close_conditional = conditional_guard(src, dst)
|
|
print(
|
|
"""_CLC_DEF _CLC_OVERLOAD
|
|
{DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x)
|
|
{{""".format(
|
|
SRC=src, DST=dst, N=size, M=mode, S=sat
|
|
)
|
|
)
|
|
|
|
# Perform conversion
|
|
if dst in int_types:
|
|
if mode == "_rte":
|
|
print(" x = rint(x);")
|
|
elif mode == "_rtp":
|
|
print(" x = ceil(x);")
|
|
elif mode == "_rtn":
|
|
print(" x = floor(x);")
|
|
print(" return convert_{DST}{N}{S}(x);".format(DST=dst, N=size, S=sat))
|
|
elif mode == "_rte":
|
|
print(" return convert_{DST}{N}(x);".format(DST=dst, N=size))
|
|
else:
|
|
print(" {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size))
|
|
print(" {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC=src, N=size))
|
|
if mode == "_rtz":
|
|
if src in int_types:
|
|
print(
|
|
" {USRC}{N} abs_x = abs(x);".format(
|
|
USRC=unsigned_type[src], N=size
|
|
)
|
|
)
|
|
print(
|
|
" {USRC}{N} abs_y = abs(y);".format(
|
|
USRC=unsigned_type[src], N=size
|
|
)
|
|
)
|
|
else:
|
|
print(" {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size))
|
|
print(" {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size))
|
|
print(
|
|
" return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));".format(
|
|
DST=dst, N=size, BOOL=bool_type[dst]
|
|
)
|
|
)
|
|
if mode == "_rtp":
|
|
print(
|
|
" return select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));".format(
|
|
DST=dst, N=size, BOOL=bool_type[dst]
|
|
)
|
|
)
|
|
if mode == "_rtn":
|
|
print(
|
|
" return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));".format(
|
|
DST=dst, N=size, BOOL=bool_type[dst]
|
|
)
|
|
)
|
|
|
|
# Footer
|
|
print("}")
|
|
if close_conditional:
|
|
print("#endif")
|
|
|
|
|
|
for src in float_types:
|
|
for dst in int_types:
|
|
for size in vector_sizes:
|
|
for mode in rounding_modes:
|
|
for sat in saturation:
|
|
generate_float_conversion(src, dst, size, mode, sat)
|
|
|
|
|
|
for src in types:
|
|
for dst in float_types:
|
|
for size in vector_sizes:
|
|
for mode in rounding_modes:
|
|
generate_float_conversion(src, dst, size, mode, "") |