diff options
author | Erik Schnetter <schnetter@gmail.com> | 2013-08-15 09:41:47 -0600 |
---|---|---|
committer | Erik Schnetter <schnetter@gmail.com> | 2013-08-15 09:41:47 -0600 |
commit | 7aaa6e4b35a3625039b2605e31ddfe7cd6f43286 (patch) | |
tree | ec8c8b6c23de6d381139cf0c570b1ddf74922172 | |
parent | af4e5e29311f019d70d1d0a74b206ab234e1cff1 (diff) | |
download | vecmathlib-7aaa6e4b35a3625039b2605e31ddfe7cd6f43286.zip vecmathlib-7aaa6e4b35a3625039b2605e31ddfe7cd6f43286.tar.gz |
Remove pocl subdirectory. This is now instead part of pocl.
-rw-r--r-- | pocl/cross.cl | 43 | ||||
-rw-r--r-- | pocl/distance.cl | 49 | ||||
-rw-r--r-- | pocl/dot.cl | 49 | ||||
-rw-r--r-- | pocl/fast_distance.cl | 23 | ||||
-rw-r--r-- | pocl/fast_length.cl | 26 | ||||
-rw-r--r-- | pocl/fast_normalize.cl | 26 | ||||
-rwxr-xr-x | pocl/generate-files.py | 657 | ||||
-rw-r--r-- | pocl/length.cl | 49 | ||||
-rw-r--r-- | pocl/normalize.cl | 49 | ||||
-rw-r--r-- | pocl/pocl-compat.h | 118 |
10 files changed, 0 insertions, 1089 deletions
diff --git a/pocl/cross.cl b/pocl/cross.cl deleted file mode 100644 index 7b8e861..0000000 --- a/pocl/cross.cl +++ /dev/null @@ -1,43 +0,0 @@ -__attribute__((__overloadable__)) -float4 cross(float4 p0, float4 p1) -{ - float4 r; - r.x = p0.y*p1.z - p0.z*p1.y; - r.y = p0.z*p1.x - p0.x*p1.z; - r.z = p0.x*p1.y - p0.y*p1.x; - r.w = 0.0f; - return r; -} - -__attribute__((__overloadable__)) -float3 cross(float3 p0, float3 p1) -{ - float3 r; - r.x = p0.y*p1.z - p0.z*p1.y; - r.y = p0.z*p1.x - p0.x*p1.z; - r.z = p0.x*p1.y - p0.y*p1.x; - return r; -} - -#ifdef cl_khr_fp64 -__attribute__((__overloadable__)) -double4 cross(double4 p0, double4 p1) -{ - double4 r; - r.x = p0.y*p1.z - p0.z*p1.y; - r.y = p0.z*p1.x - p0.x*p1.z; - r.z = p0.x*p1.y - p0.y*p1.x; - r.w = 0.0f; - return r; -} - -__attribute__((__overloadable__)) -double3 cross(double3 p0, double3 p1) -{ - double3 r; - r.x = p0.y*p1.z - p0.z*p1.y; - r.y = p0.z*p1.x - p0.x*p1.z; - r.z = p0.x*p1.y - p0.y*p1.x; - return r; -} -#endif diff --git a/pocl/distance.cl b/pocl/distance.cl deleted file mode 100644 index 5df8637..0000000 --- a/pocl/distance.cl +++ /dev/null @@ -1,49 +0,0 @@ -__attribute__((__overloadable__)) -float distance(float p0, float p1) -{ - return length(p0-p1); -} - -__attribute__((__overloadable__)) -float distance(float2 p0, float2 p1) -{ - return length(p0-p1); -} - -__attribute__((__overloadable__)) -float distance(float3 p0, float3 p1) -{ - return length(p0-p1); -} - -__attribute__((__overloadable__)) -float distance(float4 p0, float4 p1) -{ - return length(p0-p1); -} - -#ifdef cl_khr_fp64 -__attribute__((__overloadable__)) -double distance(double p0, double p1) -{ - return length(p0-p1); -} - -__attribute__((__overloadable__)) -double distance(double2 p0, double2 p1) -{ - return length(p0-p1); -} - -__attribute__((__overloadable__)) -double distance(double3 p0, double3 p1) -{ - return length(p0-p1); -} - -__attribute__((__overloadable__)) -double distance(double4 p0, double4 p1) -{ - return length(p0-p1); -} -#endif diff --git a/pocl/dot.cl b/pocl/dot.cl deleted file mode 100644 index 91bb400..0000000 --- a/pocl/dot.cl +++ /dev/null @@ -1,49 +0,0 @@ -__attribute__((__overloadable__)) -float dot(float p0, float p1) -{ - return p0*p1; -} - -__attribute__((__overloadable__)) -float dot(float2 p0, float2 p1) -{ - return p0.x*p1.x + p0.y*p1.y; -} - -__attribute__((__overloadable__)) -float dot(float3 p0, float3 p1) -{ - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z; -} - -__attribute__((__overloadable__)) -float dot(float4 p0, float4 p1) -{ - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w; -} - -#ifdef cl_khr_fp64 -__attribute__((__overloadable__)) -double dot(double p0, double p1) -{ - return p0*p1; -} - -__attribute__((__overloadable__)) -double dot(double2 p0, double2 p1) -{ - return p0.x*p1.x + p0.y*p1.y; -} - -__attribute__((__overloadable__)) -double dot(double3 p0, double3 p1) -{ - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z; -} - -__attribute__((__overloadable__)) -double dot(double4 p0, double4 p1) -{ - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w; -} -#endif diff --git a/pocl/fast_distance.cl b/pocl/fast_distance.cl deleted file mode 100644 index c2a7e9e..0000000 --- a/pocl/fast_distance.cl +++ /dev/null @@ -1,23 +0,0 @@ -__attribute__((__overloadable__)) -float fast_distance(float p0, float p1) -{ - return fast_length(p0-p1); -} - -__attribute__((__overloadable__)) -float fast_distance(float2 p0, float2 p1) -{ - return fast_length(p0-p1); -} - -__attribute__((__overloadable__)) -float fast_distance(float3 p0, float3 p1) -{ - return fast_length(p0-p1); -} - -__attribute__((__overloadable__)) -float fast_distance(float4 p0, float4 p1) -{ - return fast_length(p0-p1); -} diff --git a/pocl/fast_length.cl b/pocl/fast_length.cl deleted file mode 100644 index eb765b9..0000000 --- a/pocl/fast_length.cl +++ /dev/null @@ -1,26 +0,0 @@ -// Note: Chapter 6.12.5 of the OpenCL standard says to use half_sqrt, -// not fast_sqrt - -__attribute__((__overloadable__)) -float fast_length(float p) -{ - return half_sqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -float fast_length(float2 p) -{ - return half_sqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -float fast_length(float3 p) -{ - return half_sqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -float fast_length(float4 p) -{ - return half_sqrt(dot(p, p)); -} diff --git a/pocl/fast_normalize.cl b/pocl/fast_normalize.cl deleted file mode 100644 index ecdd524..0000000 --- a/pocl/fast_normalize.cl +++ /dev/null @@ -1,26 +0,0 @@ -// Note: Chapter 6.12.5 of the OpenCL standard says to use half_rsqrt, -// not fast_rsqrt - -__attribute__((__overloadable__)) -float fast_normalize(float p) -{ - return p * half_rsqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -float2 fast_normalize(float2 p) -{ - return p * half_rsqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -float3 fast_normalize(float3 p) -{ - return p * half_rsqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -float4 fast_normalize(float4 p) -{ - return p * half_rsqrt(dot(p, p)); -} diff --git a/pocl/generate-files.py b/pocl/generate-files.py deleted file mode 100755 index b089334..0000000 --- a/pocl/generate-files.py +++ /dev/null @@ -1,657 +0,0 @@ -#! /usr/bin/env python - -import re, sys - - - -# Types: -SI = "SI" # int/long -SK = "SK" # int (even for double) -SF = "SF" # float/double -VB = "VB" # boolN -VI = "VI" # intN/longN -VJ = "VJ" # intN/longN (except int1 for double1) -VK = "VK" # intN (even for doubleN) -VU = "VU" # uintN/ulongN -VF = "VF" # floatN/doubleN -PVK = "PVK" # pointer to VK -PVF = "PVF" # pointer to VF - -# Each function is described by a tuple with the following entries: -# 1. name -# 2. external argument types (see above) -# 3. external return type -# 4. vecmathlib argument types (see above) -# 5. vecmathlib return type -# This allows generating externally visible functions with different -# signatures, e.g. to support OpenCL. -vmlfuncs = [ - # Section 6.12.2 - ("acos" , [VF ], VF, [VF ], VF), - ("acosh" , [VF ], VF, [VF ], VF), - ("asin" , [VF ], VF, [VF ], VF), - ("asinh" , [VF ], VF, [VF ], VF), - ("atan" , [VF ], VF, [VF ], VF), - ("atanh" , [VF ], VF, [VF ], VF), - ("cbrt" , [VF ], VF, [VF ], VF), - ("ceil" , [VF ], VF, [VF ], VF), - ("copysign" , [VF, VF ], VF, [VF, VF ], VF), - ("cos" , [VF ], VF, [VF ], VF), - ("cosh" , [VF ], VF, [VF ], VF), - ("exp" , [VF ], VF, [VF ], VF), - ("exp2" , [VF ], VF, [VF ], VF), - ("exp10" , [VF ], VF, [VF ], VF), - ("expm1" , [VF ], VF, [VF ], VF), - ("fabs" , [VF ], VF, [VF ], VF), - ("fdim" , [VF, VF ], VF, [VF, VF ], VF), - ("floor" , [VF ], VF, [VF ], VF), - ("fma" , [VF, VF, VF], VF, [VF, VF, VF], VF), - ("fmax" , [VF, VF ], VF, [VF, VF ], VF), - ("fmin" , [VF, VF ], VF, [VF, VF ], VF), - ("fmod" , [VF, VF ], VF, [VF, VF ], VF), - ("hypot" , [VF, VF ], VF, [VF, VF ], VF), - ("ilogb_" , [VF ], VJ, [VF ], VI), # should return VK - ("ldexp_" , [VF, VJ ], VF, [VF, VI ], VF), # should take VK - ("ldexp_" , [VF, SK ], VF, [VF, SI ], VF), # should take VK - ("log" , [VF ], VF, [VF ], VF), - ("log2" , [VF ], VF, [VF ], VF), - ("log10" , [VF ], VF, [VF ], VF), - ("log1p" , [VF ], VF, [VF ], VF), - ("pow" , [VF, VF ], VF, [VF, VF ], VF), - ("remainder", [VF, VF ], VF, [VF, VF ], VF), - ("rint" , [VF ], VF, [VF ], VF), - ("round" , [VF ], VF, [VF ], VF), - ("rsqrt" , [VF ], VF, [VF ], VF), - ("sin" , [VF ], VF, [VF ], VF), - ("sinh" , [VF ], VF, [VF ], VF), - ("sqrt" , [VF ], VF, [VF ], VF), - ("tan" , [VF ], VF, [VF ], VF), - ("tanh" , [VF ], VF, [VF ], VF), - ("trunc" , [VF ], VF, [VF ], VF), - - # Section 6.12.6 - ("isfinite" , [VF ], VJ, [VF ], VB), - ("isinf" , [VF ], VJ, [VF ], VB), - ("isnan" , [VF ], VJ, [VF ], VB), - ("isnormal" , [VF ], VJ, [VF ], VB), - ("signbit" , [VF ], VJ, [VF ], VB), - ] - -directfuncs = [ - # Section 6.12.2 - ("acospi" , [VF ], VF, "acos(x0)/(scalar_t)M_PI"), - ("asinpi" , [VF ], VF, "asin(x0)/(scalar_t)M_PI"), - ("atanpi" , [VF ], VF, "atan(x0)/(scalar_t)M_PI"), - ("atan2" , [VF, VF ], VF, "({ vector_t a=atan(x0/x1); x1>(scalar_t)0.0f ? a : x1<(scalar_t)0.0f ? a+copysign((scalar_t)M_PI,x0) : copysign((scalar_t)M_PI_2,x0); })"), - ("atan2pi" , [VF, VF ], VF, "atan2(x0,x1)/(scalar_t)M_PI"), - ("cospi" , [VF ], VF, "cos((scalar_t)M_PI*x0)"), - ("fmax" , [VF, SF ], VF, "fmax(x0,(vector_t)x1)"), - ("fmin" , [VF, SF ], VF, "fmin(x0,(vector_t)x1)"), - ("fract" , [VF, PVF ], VF, "*x1=floor(x0), fmin(x0-floor(x0), sizeof(scalar_t)==sizeof(float) ? (scalar_t)POCL_FRACT_MIN_F : (scalar_t)POCL_FRACT_MIN)"), - ("frexp" , [VF, PVK ], VF, "*x1=ilogb(x0), ldexp(x0,-ilogb(x0))"), - ("ilogb" , [VF ], VK, "convert_kvector_t(({ __attribute__((__overloadable__)) jvector_t ilogb_(vector_t); jvector_t jmin=sizeof(jvector_t)==sizeof(int)?INT_MIN:LONG_MIN; jvector_t r=ilogb_(x0); select(r, (jvector_t)FP_ILOGB0, r==jmin); }))"), - ("ldexp" , [VF, VK ], VF, "({ __attribute__((__overloadable__)) vector_t ldexp_(vector_t,jvector_t); ldexp_(x0,convert_ivector_t(x1)); })"), - ("ldexp" , [VF, SK ], VF, "({ __attribute__((__overloadable__)) vector_t ldexp_(vector_t,kscalar_t); ldexp_(x0,(kscalar_t)x1); })"), - ("logb" , [VF ], VF, "convert_vector_t(ilogb(x0))"), - ("mad" , [VF, VF, VF ], VF, "fma(x0,x1,x2)"), - ("maxmag" , [VF, VF ], VF, "fabs(x0)>fabs(x1) ? x0 : fabs(x1)>fabs(x0) ? x1 : fmax(x0,x1)"), - ("minmag" , [VF, VF ], VF, "fabs(x0)<fabs(x1) ? x0 : fabs(x1)<fabs(x0) ? x1 : fmin(x0,x1)"), - ("modf" , [VF, PVF ], VF, "*x1=trunc(x0), copysign(x0-trunc(x0),x0)"), - ("nan" , [VU ], VF, "(scalar_t)0.0f/(scalar_t)0.0f"), - ("pown" , [VF, VK ], VF, "pow(x0,convert_vector_t(x1))"), - ("powr" , [VF, VF ], VF, "pow(x0,x1)"), - ("remquo" , [VF, VF, PVK], VF, "({ vector_t k=rint(x0/x1); *x2=(convert_kvector_t(k)&0x7f)*(1-2*convert_kvector_t(signbit(k))); x0-k*x1; })"), - ("rootn" , [VF, VK ], VF, "pow(x0,(scalar_t)1.0f/convert_vector_t(x1))"), - ("sincos" , [VF, PVF ], VF, "*x1=cos(x0), sin(x0)"), - ("sinpi" , [VF ], VF, "sin((scalar_t)M_PI*x0)"), - ("tanpi" , [VF ], VF, "tan((scalar_t)M_PI*x0)"), - - # Section 6.12.2, half_ functions - ("half_cos" , [VF ], VF, "cos(x0)"), - ("half_divide" , [VF, VF ], VF, "x0/x1"), - ("half_exp" , [VF ], VF, "exp(x0)"), - ("half_exp2" , [VF ], VF, "exp2(x0)"), - ("half_exp10" , [VF ], VF, "exp10(x0)"), - ("half_log" , [VF ], VF, "log(x0)"), - ("half_log2" , [VF ], VF, "log2(x0)"), - ("half_log10" , [VF ], VF, "log10(x0)"), - ("half_powr" , [VF, VF ], VF, "powr(x0,x1)"), - ("half_recip" , [VF ], VF, "(scalar_t)1.0f/x0"), - ("half_rsqrt" , [VF ], VF, "rsqrt(x0)"), - ("half_sin" , [VF ], VF, "sin(x0)"), - ("half_sqrt" , [VF ], VF, "sqrt(x0)"), - ("half_tan" , [VF ], VF, "tan(x0)"), - # Section 6.12.2, native_ functions - ("native_cos" , [VF ], VF, "cos(x0)"), - ("native_divide" , [VF, VF ], VF, "x0/x1"), - ("native_exp" , [VF ], VF, "exp(x0)"), - ("native_exp2" , [VF ], VF, "exp2(x0)"), - ("native_exp10" , [VF ], VF, "exp10(x0)"), - ("native_log" , [VF ], VF, "log(x0)"), - ("native_log2" , [VF ], VF, "log2(x0)"), - ("native_log10" , [VF ], VF, "log10(x0)"), - ("native_powr" , [VF, VF ], VF, "powr(x0,x1)"), - ("native_recip" , [VF ], VF, "(scalar_t)1.0f/x0"), - ("native_rsqrt" , [VF ], VF, "rsqrt(x0)"), - ("native_sin" , [VF ], VF, "sin(x0)"), - ("native_sqrt" , [VF ], VF, "sqrt(x0)"), - ("native_tan" , [VF ], VF, "tan(x0)"), - - # Section 6.12.4 - ("clamp" , [VF, VF, VF ], VF, "fmin(fmax(x0,x1),x2)"), - ("clamp" , [VF, SF, SF ], VF, "fmin(fmax(x0,x1),x2)"), - ("degrees" , [VF ], VF, "(scalar_t)(180/M_PI)*x0"), - ("max" , [VF, VF ], VF, "fmax(x0,x1)"), - ("max" , [VF, SF ], VF, "fmax(x0,x1)"), - ("min" , [VF, VF ], VF, "fmin(x0,x1)"), - ("min" , [VF, SF ], VF, "fmin(x0,x1)"), - ("mix" , [VF, VF, VF ], VF, "x0+(x1-x0)*x2"), - ("mix" , [VF, VF, SF ], VF, "x0+(x1-x0)*x2"), - ("radians" , [VF ], VF, "(scalar_t)(M_PI/180)*x0"), - ("step" , [VF, VF ], VF, "x1<x0 ? (vector_t)(scalar_t)0.0f : (vector_t)(scalar_t)1.0f"), - ("step" , [SF, VF ], VF, "x1<x0 ? (vector_t)(scalar_t)0.0f : (vector_t)(scalar_t)1.0f"), - ("smoothstep" , [VF, VF, VF ], VF, "({ vector_t t = clamp((x2-x0)/(x1-x0), (scalar_t)0.0f, (scalar_t)1.0f); t*t*((scalar_t)3.0-(scalar_t)2.0*t); })"), - ("smoothstep" , [SF, SF, VF ], VF, "({ vector_t t = clamp((x2-x0)/(x1-x0), (scalar_t)0.0f, (scalar_t)1.0f); t*t*((scalar_t)3.0-(scalar_t)2.0*t); })"), - ("sign" , [VF ], VF, "copysign(x0!=(scalar_t)0.0f ? (vector_t)(scalar_t)1.0f : (vector_t)(scalar_t)0.0f,x0)"), - - # Section 6.12.6 - ("isequal" , [VF, VF ], VJ, "x0==x1"), - ("isnotequal" , [VF, VF ], VJ, "x0!=x1"), - ("isgreater" , [VF, VF ], VJ, "x0>x1"), - ("isgreaterequal", [VF, VF ], VJ, "x0>=x1"), - ("isless" , [VF, VF ], VJ, "x0<x1"), - ("islessequal" , [VF, VF ], VJ, "x0<=x1"), - ("islessgreater" , [VF, VF ], VJ, "x0<x1 || x0>x1"), - ("isordered" , [VF, VF ], VJ, "!isunordered(x0,x1)"), - ("isunordered" , [VF, VF ], VJ, "isnan(x0) || isnan(x1)"), -] - -# Missing functions from 6.12.2: erfc, erf, lgamma, lgamma_r, -# nextafter, tgamma - -# Unchecked: 6.12.3 (integer functions) - -# Missing functions from 6.12.6 (relational functions): any, all, -# bitselect, select - -# Unchecked: 6.12.7 (vector data load and store functions) - -# Unchecked: 6.12.12 (miscellaneous vector functions) - - - -# This is always prepended to the generated function names. -func_prefix = "_cl_" - -# Some of the functions need prefixes to avoid using the C standard -# library ones. -masked_functions = [ - "acos", - "asin", - "atan", - "atan2", - "ceil", - "copysign", - "cos", - "exp", - "exp2", - "fabs", - "floor", - "fma", - "fmax", - "fmin", - "log", - "log2", - "pow", - "rint", - "round", - "sin", - "sqrt", - "tan", - "trunc", -] - -# This is prepended to masked function names. -mask_prefix = "" - -def prefixed(name): - if name in masked_functions: name = mask_prefix + name - return func_prefix + name - - - -outfile = None -outfile_did_truncate = set() -def out(str): outfile.write("%s\n" % str) -def out_open(name): - global outfile - global outfile_did_truncate - if outfile: raise "file already open" - is_first_open = name not in outfile_did_truncate - if is_first_open: - outfile = open(name, "w") - outfile.close() - outfile_did_truncate.add(name) - print name, - sys.stdout.flush() - outfile = open(name, "a") - return is_first_open -def out_close(): - global outfile - outfile.close() - outfile = None - -declfile = None -def decl(str): - if str=="" or str.startswith("//") or str.startswith("#"): - declfile.write("%s\n" % str) - else: - declfile.write("__attribute__((__overloadable__)) %s;\n" % str) -def decl_open(name): - global declfile - declfile = open(name, "w") -def decl_close(): - global declfile - declfile.close() - declfile = None - - - -def mktype(tp, vectype): - (space, basetype, sizename) = ( - re.match("(global|local|private)?(float|double)([0-9]*)", vectype). - groups()) - size = 1 if sizename=="" else int(sizename) - if tp==SK: - if size==1: return "int" - return "int" if basetype=="float" else "long" - if tp==SF: - return basetype - if tp==VI: - ibasetype = "int" if basetype=="float" else "long" - return "%s%s" % (ibasetype, sizename) - if tp==VJ: - if size==1: return "int" - ibasetype = "int" if basetype=="float" else "long" - return "%s%s" % (ibasetype, sizename) - if tp==VK: - return "int%s" % sizename - if tp==PVK: - if space=="": raise "wrong address space" - return "%s int%s*" % (space, sizename) - if tp==VU: - ibasetype = "uint" if basetype=="float" else "ulong" - return "%s%s" % (ibasetype, sizename) - if tp==VF: - return "%s%s" % (basetype, sizename) - if tp==PVF: - if space=="": raise "wrong address space" - return "%s %s%s*" % (space, basetype, sizename) - raise "unreachable" - -def mkvmltype(tp, vectype): - if tp==SI: return vectype+"::int_t" - if tp==SF: return vectype+"::real_t" - if tp==VB: return vectype+"::boolvec_t" - if tp in (VI,VJ): return vectype+"::intvec_t" - if tp==VF: return vectype - raise "unreachable" - - - -def output_vmlfunc_vml(func, vectype): - (name, args, ret, vmlargs, vmlret) = func - out("// Implement %s by calling vecmathlib" % name) - (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups() - size = 1 if size=="" else int(size) - vmltype = "vecmathlib::realvec<%s,%d>" % (basetype, size) - vmlinttype = "%s::intvec_t" % vmltype - vmlbooltype = "%s::boolvec_t" % vmltype - funcargstr = ", ".join(map(lambda (n, arg): - "%s x%d" % (mktype(arg, vectype), n), - zip(range(0, 100), args))) - funcretstr = mktype(ret, vectype) - decl("%s %s(%s)" % (funcretstr, prefixed(name), funcargstr)) - out("%s %s(%s)" % (funcretstr, prefixed(name), funcargstr)) - out("{") - for (n, arg, vmlarg) in zip(range(0, 100), args, vmlargs): - out(" %s y%d = bitcast<%s,%s >(x%d);" % - (mkvmltype(vmlarg, vmltype), n, - mktype(arg, vectype), mkvmltype(vmlarg, vmltype), n)) - callargstr = ", ".join(map(lambda (n, arg): "y%d" % n, - zip(range(0, 100), args))) - callretstr = mkvmltype(vmlret, vmltype) - name1 = name[:-1] if name.endswith("_") else name - out(" %s r = vecmathlib::%s(%s);" % (callretstr, name1, callargstr)) - # We may need to convert from the VML type to the OpenCL type - # before bitcasting. This may be a real conversion, e.g. bool to - # int. This may also involve a change in size (e.g. long to int), - # but only if the type is scalar. These conversions are applied - # before bitcasting. - # convfunc: conversion function to call - # convtype: result type of conversion, also input to bitcast - # bitcasttype: output of bitcast; may differ from function result - # if a size change is needed - # TODO: Why is this here, and not e.g. near the signbit definition - # in the table above? - if vmlret==ret: - convfunc = "" - convtype = callretstr - bitcasttype = funcretstr - else: - if vmlret==VI and ret in (VJ,VK): - convfunc = "" - convtype = callretstr - elif vmlret==VB and ret in (VJ,VK): - if size==1: - # for scalars, true==+1 - convfunc = "vecmathlib::convert_int" - else: - # for vectors, true==-1 - convfunc = "-vecmathlib::convert_int" - convtype = vmlinttype - else: - raise "missing" - if ret in (VJ,VK): - bitcasttype = mktype(VI, vectype) - else: - raise "missing" - out(" return bitcast<%s,%s>(%s(r));" % (convtype, bitcasttype, convfunc)) - out("}") - -def output_vmlfunc_libm(func, vectype): - (name, args, ret, vmlargs, vmlret) = func - out("// Implement %s by calling libm" % name) - (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups() - size = 1 if size=="" else int(size) - othertype = "vecmathlib::realpseudovec<%s,%d>" % (basetype, size) - otherinttype = "%s::intvec_t" % othertype - funcargstr = ", ".join(map(lambda (n, arg): - "%s x%d" % (mktype(arg, vectype), n), - zip(range(0, 100), args))) - funcretstr = mktype(ret, vectype) - decl("%s %s(%s)" % (funcretstr, prefixed(name), funcargstr)) - out("%s %s(%s)" % (funcretstr, prefixed(name), funcargstr)) - out("{") - for (n, arg) in zip(range(0, 100), args): - out(" %s y%d = x%d;" % (mkvmltype(arg, othertype), n, n)) - callargstr = ", ".join(map(lambda (n, arg): "y%d" % n, - zip(range(0, 100), args))) - # callretstr = othertype if ret==VF else otherinttype - callretstr = mkvmltype(vmlret, othertype) - name1 = name[:-1] if name.endswith("_") else name - out(" %s r = %s(%s);" % (callretstr, name1, callargstr)) - # We may need to convert from the VML type to the OpenCL type - # before bitcasting. This may be a real conversion, e.g. bool to - # int. This may also involve a change in size (e.g. long to int), - # but only if the type is scalar. These conversions are applied - # before bitcasting. - # convfunc: conversion function to call - # convtype: result type of conversion, also input to bitcast - # bitcasttype: output of bitcast; may differ from function result - # if a size change is needed - # TODO: Why is this here, and not e.g. near the signbit definition - # in the table above? - if vmlret==ret: - convfunc = "" - else: - if vmlret==VI and ret in (VJ,VK): - convfunc = "" - elif vmlret==VB and ret in (VJ,VK): - if size==1: - # for scalars, true==+1 - convfunc = "vecmathlib::convert_int" - else: - # for vectors, true==-1 - convfunc = "-vecmathlib::convert_int" - else: - raise "missing" - out(" return %s(r)[0];" % convfunc) - out("}") - -def output_vmlfunc_upcast(func, vectype): - (name, args, ret, vmlargs, vmlret) = func - out("// Implement %s by using a larger vector size" % name) - (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups() - size = 1 if size=="" else int(size) - size2 = 4 if size==3 else size*2 # next power of 2 - size2 = "" if size2==1 else str(size2) - if size==1: raise "can't upcast scalars" - othertype = "%s%s" % (basetype, size2) - declargstr = ", ".join(map(lambda (n, arg): "%s" % mktype(arg, othertype), - zip(range(0, 100), args))) - out("%s %s(%s);" % (mktype(ret, othertype), prefixed(name), declargstr)) - funcargstr = ", ".join(map(lambda (n, arg): - "%s x%d" % (mktype(arg, vectype), n), - zip(range(0, 100), args))) - decl("%s %s(%s)" % (mktype(ret, vectype), prefixed(name), funcargstr)) - out("%s %s(%s)" % (mktype(ret, vectype), prefixed(name), funcargstr)) - out("{") - for (n, arg) in zip(range(0, 100), args): - out(" %s y%d = bitcast<%s,%s>(x%d);" % - (mktype(arg, othertype), n, - mktype(arg, vectype), mktype(arg, othertype), n)) - callargstr = ", ".join(map(lambda (n, arg): "y%d" % n, - zip(range(0, 100), args))) - out(" %s r = %s(%s);" % - (mktype(ret, othertype), prefixed(name), callargstr)) - out(" return bitcast<%s,%s>(r);" % - (mktype(ret, othertype), mktype(ret, vectype))) - out("}") - -def output_vmlfunc_split(func, vectype): - (name, args, ret, vmlargs, vmlret) = func - out("// Implement %s by splitting into a smaller vector size" % name) - (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups() - size = 1 if size=="" else int(size) - size2 = (size+1) / 2 # divide by 2, rounding up - size2 = "" if size2==1 else str(size2) - othertype = "%s%s" % (basetype, size2) - declargstr = ", ".join(map(lambda (n, arg): "%s" % mktype(arg, othertype), - zip(range(0, 100), args))) - out("%s %s(%s);" % (mktype(ret, othertype), prefixed(name), declargstr)) - funcargstr = ", ".join(map(lambda (n, arg): - "%s x%d" % (mktype(arg, vectype), n), - zip(range(0, 100), args))) - decl("%s %s(%s)" % (mktype(ret, vectype), prefixed(name), funcargstr)) - out("%s %s(%s)" % (mktype(ret, vectype), prefixed(name), funcargstr)) - out("{") - if ret in (SF, SK): - split_ret = SF - elif ret in (VI, VJ, VK): - split_ret = VI - elif ret in (VF): - split_ret = VF - else: - raise "missing" - for (n, arg) in zip(range(0, 100), args): - out(" pair_%s y%d = bitcast<%s,pair_%s>(x%d);" % - (mktype(arg, othertype), n, - mktype(arg, vectype), mktype(arg, othertype), n)) - out(" pair_%s r;" % mktype(split_ret, othertype)) - # in OpenCL: for scalars, true==+1, but for vectors, true==-1 - conv = "" - if vmlret==VB: - if ret in (VJ,VK): - if size2=="": - conv = "-" - else: - raise "missing" - for suffix in ("lo", "hi"): - callargstr = ", ".join(map(lambda (n, arg): "y%d.%s" % (n, suffix), - zip(range(0, 100), args))) - out(" r.%s = %s%s(%s);" % (suffix, conv, prefixed(name), callargstr)) - out(" pocl_static_assert(sizeof(pair_%s) == sizeof(%s));" % - (mktype(split_ret, othertype), mktype(ret, vectype))) - out(" return bitcast<pair_%s,%s>(r);" % - (mktype(split_ret, othertype), mktype(ret, vectype))) - out("}") - - - -def output_directfunc_direct(func, vectype): - (name, args, ret, impl) = func - out("// Implement %s directly" % name) - (space, basetype, sizename) = ( - re.match("(global|local|private)?(float|double)([0-9]*)", vectype). - groups()) - size = 1 if sizename=="" else int(sizename) - funcargstr = ", ".join(map(lambda (n, arg): - "%s x%d" % (mktype(arg, vectype), n), - zip(range(0, 100), args))) - funcretstr = mktype(ret, vectype) - decl("%s %s(%s)" % (funcretstr, prefixed(name), funcargstr)) - out("__attribute__((__overloadable__))"); - out("%s %s(%s)" % (funcretstr, prefixed(name), funcargstr)) - out("{") - out(" typedef %s kscalar_t;" % mktype(SK, vectype)) - out(" typedef %s scalar_t;" % mktype(SF, vectype)) - out(" typedef %s ivector_t;" % mktype(VI, vectype)) - out(" typedef %s jvector_t;" % mktype(VJ, vectype)) - out(" typedef %s kvector_t;" % mktype(VK, vectype)) - out(" typedef %s vector_t;" % mktype(VF, vectype)) - out("#define convert_ivector_t convert_%s" % mktype(VI, vectype)) - out("#define convert_jvector_t convert_%s" % mktype(VJ, vectype)) - out("#define convert_kvector_t convert_%s" % mktype(VK, vectype)) - out("#define convert_vector_t convert_%s" % mktype(VF, vectype)) - out(" return %s;" % impl) - out("#undef convert_ivector_t") - out("#undef convert_jvector_t") - out("#undef convert_kvector_t") - out("#undef convert_vector_t") - out("}") - - - -def output_vmlfunc(func): - (name, args, ret, vmlargs, vmlret) = func - is_first_open = out_open("%s.cc" % name) - if is_first_open: - out("// Note: This file has been automatically generated. Do not modify.") - out("") - out("#include \"pocl-compat.h\"") - out("") - else: - out("") - out("") - out("") - decl("") - decl("// %s: %s -> %s" % (name, args, ret)) - decl("#undef %s" % name) - if prefixed(name) != name: - decl("#define %s %s" % (name, prefixed(name))) - out("// %s: %s -> %s" % (name, args, ret)) - for basetype in ["float", "double"]: - if basetype=="double": - out("") - out("#ifdef cl_khr_fp64") - for size in [1, 2, 3, 4, 8, 16]: - # Ignore this prototype for size==1 if there are any - # scalar arguments; this prevents duplicate definitions - if size==1 and any(map(lambda arg: arg in (SI, SK, SF), args)): - continue - sizename = '' if size==1 else str(size) - vectype = basetype + sizename - # always use vecmathlib if available - out("") - out("// %s: VF=%s" % (name, vectype)) - out("#if defined VECMATHLIB_HAVE_VEC_%s_%d" % - (basetype.upper(), size)) - output_vmlfunc_vml(func, vectype) - if size==1: - # a scalar type: use libm - out("#else") - output_vmlfunc_libm(func, vectype) - else: - # a vector type: try upcasting to next power of 2 - size2 = 4 if size==3 else size*2 - out("#elif defined VECMATHLIB_HAVE_VEC_%s_%d" % - (basetype.upper(), size2)) - output_vmlfunc_upcast(func, vectype) - # a vector type: split into smaller vector type - out("#else") - output_vmlfunc_split(func, vectype) - out("#endif") - if basetype=="double": - out("") - out("#endif // #ifdef cl_khr_fp64") - out_close() - - - -def output_directfunc(func): - (name, args, ret, impl) = func - is_first_open = out_open("%s.cl" % name) - if is_first_open: - out("// Note: This file has been automatically generated. Do not modify.") - out("") - out("// Needed for fract()") - out("#define POCL_FRACT_MIN 0x1.fffffffffffffp-1") - out("#define POCL_FRACT_MIN_F 0x1.fffffep-1f") - out("") - out("// If double precision is not supported, then define") - out("// single-precision (dummy) values to avoid compiler warnings") - out("// for double precision values") - out("#ifndef khr_fp64") - out("# undef M_PI") - out("# define M_PI M_PI_F") - out("# undef M_PI_2") - out("# define M_PI_2 M_PI_2_F") - out("# undef LONG_MAX") - out("# define LONG_MAX INT_MAX") - out("# undef LONG_MIN") - out("# define LONG_MIN INT_MIN") - out("# undef POCL_FRACT_MIN") - out("# define POCL_FRACT_MIN POCL_FRACT_MIN_F") - out("#endif") - out("") - else: - out("") - out("") - out("") - decl("") - decl("// %s: %s -> %s" % (name, args, ret)) - decl("#undef %s" % name) - if prefixed(name) != name: - decl("#define %s %s" % (name, prefixed(name))) - out("// %s: %s -> %s" % (name, args, ret)) - if any(map(lambda arg: arg in (PVK, PVF), args)): - spaces = ["global", "local", "private"] - else: - spaces = [""] - for basetype in ["float", "double"]: - if ((name.startswith("half_") or name.startswith("native_")) and - basetype=="double"): - continue - if basetype=="double": - out("") - out("#ifdef cl_khr_fp64") - for size in [1, 2, 3, 4, 8, 16]: - # Ignore this prototype for size==1 if there are any - # scalar arguments; this prevents duplicate definitions - if size==1 and any(map(lambda arg: arg in (SI, SK, SF), args)): - continue - sizename = '' if size==1 else str(size) - for space in spaces: - vectype = space + basetype + sizename - # always use vecmathlib if available - out("") - out("// %s: VF=%s" % (name, vectype)) - output_directfunc_direct(func, vectype) - if basetype=="double": - out("") - out("#endif // #ifdef cl_khr_fp64") - out_close() - - - -decl_open("kernel-vecmathlib.h") -decl("// Note: This file has been automatically generated. Do not modify.") -decl("#ifndef KERNEL_VECMATHLIB_H") -decl("#define KERNEL_VECMATHLIB_H 1") -map(output_vmlfunc, vmlfuncs) -map(output_directfunc, directfuncs) -decl("") -decl("#endif // #ifndef KERNEL_VECMATHLIB_H") -decl_close() -print diff --git a/pocl/length.cl b/pocl/length.cl deleted file mode 100644 index 9715e59..0000000 --- a/pocl/length.cl +++ /dev/null @@ -1,49 +0,0 @@ -__attribute__((__overloadable__)) -float length(float p) -{ - return sqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -float length(float2 p) -{ - return sqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -float length(float3 p) -{ - return sqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -float length(float4 p) -{ - return sqrt(dot(p, p)); -} - -#ifdef cl_khr_fp64 -__attribute__((__overloadable__)) -double length(double p) -{ - return sqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -double length(double2 p) -{ - return sqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -double length(double3 p) -{ - return sqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -double length(double4 p) -{ - return sqrt(dot(p, p)); -} -#endif diff --git a/pocl/normalize.cl b/pocl/normalize.cl deleted file mode 100644 index e033567..0000000 --- a/pocl/normalize.cl +++ /dev/null @@ -1,49 +0,0 @@ -__attribute__((__overloadable__)) -float normalize(float p) -{ - return p * rsqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -float2 normalize(float2 p) -{ - return p * rsqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -float3 normalize(float3 p) -{ - return p * rsqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -float4 normalize(float4 p) -{ - return p * rsqrt(dot(p, p)); -} - -#ifdef cl_khr_fp64 -__attribute__((__overloadable__)) -double normalize(double p) -{ - return p * rsqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -double2 normalize(double2 p) -{ - return p * rsqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -double3 normalize(double3 p) -{ - return p * rsqrt(dot(p, p)); -} - -__attribute__((__overloadable__)) -double4 normalize(double4 p) -{ - return p * rsqrt(dot(p, p)); -} -#endif diff --git a/pocl/pocl-compat.h b/pocl/pocl-compat.h deleted file mode 100644 index 10dc281..0000000 --- a/pocl/pocl-compat.h +++ /dev/null @@ -1,118 +0,0 @@ -// -*-C++-*- Compatibility layer to help instantiante functions to -// create a library that can be called from elsewhere - - - -// Make things go fast (and debugging difficult...) -#define VML_NODEBUG -#include "../vecmathlib.h" - -#include <algorithm> -#include <cstring> - -#define pocl_static_assert(b) typedef char _static_assert[(b)?+1:-1] - -// If double precision is not supported, then define single-precision -// (dummy) values to avoid compiler warnings for double precision -// values -#ifndef cl_khr_fp64 -# undef M_PI -# define M_PI M_PI_F -#endif - - - -// Define vector types - -#define int std::int32_t -typedef int int2 __attribute__((__ext_vector_type__( 2))); -typedef int int3 __attribute__((__ext_vector_type__( 3))); -typedef int int4 __attribute__((__ext_vector_type__( 4))); -typedef int int8 __attribute__((__ext_vector_type__( 8))); -typedef int int16 __attribute__((__ext_vector_type__(16))); - -#define uint std::uint32_t -typedef uint uint2 __attribute__((__ext_vector_type__( 2))); -typedef uint uint3 __attribute__((__ext_vector_type__( 3))); -typedef uint uint4 __attribute__((__ext_vector_type__( 4))); -typedef uint uint8 __attribute__((__ext_vector_type__( 8))); -typedef uint uint16 __attribute__((__ext_vector_type__(16))); - -#ifdef cles_khr_int64 -#define long std::int64_t -typedef long long2 __attribute__((__ext_vector_type__( 2))); -typedef long long3 __attribute__((__ext_vector_type__( 3))); -typedef long long4 __attribute__((__ext_vector_type__( 4))); -typedef long long8 __attribute__((__ext_vector_type__( 8))); -typedef long long16 __attribute__((__ext_vector_type__(16))); - -#define ulong std::uint64_t -typedef ulong ulong2 __attribute__((__ext_vector_type__( 2))); -typedef ulong ulong3 __attribute__((__ext_vector_type__( 3))); -typedef ulong ulong4 __attribute__((__ext_vector_type__( 4))); -typedef ulong ulong8 __attribute__((__ext_vector_type__( 8))); -typedef ulong ulong16 __attribute__((__ext_vector_type__(16))); -#endif - -typedef float float2 __attribute__((__ext_vector_type__( 2))); -typedef float float3 __attribute__((__ext_vector_type__( 3))); -typedef float float4 __attribute__((__ext_vector_type__( 4))); -typedef float float8 __attribute__((__ext_vector_type__( 8))); -typedef float float16 __attribute__((__ext_vector_type__(16))); - -#ifdef cl_khr_fp64 -typedef double double2 __attribute__((__ext_vector_type__( 2))); -typedef double double3 __attribute__((__ext_vector_type__( 3))); -typedef double double4 __attribute__((__ext_vector_type__( 4))); -typedef double double8 __attribute__((__ext_vector_type__( 8))); -typedef double double16 __attribute__((__ext_vector_type__(16))); -#endif - - - -// Declare pair types for assembling/disassembling vectors -struct pair_int { int lo, hi; }; -struct pair_int2 { int2 lo, hi; }; -struct pair_int3 { int3 lo, hi; }; -struct pair_int4 { int4 lo, hi; }; -struct pair_int8 { int8 lo, hi; }; -struct pair_int16 { int16 lo, hi; }; - -#ifdef cles_khr_int64 -struct pair_long { long lo, hi; }; -struct pair_long2 { long2 lo, hi; }; -struct pair_long3 { long3 lo, hi; }; -struct pair_long4 { long4 lo, hi; }; -struct pair_long8 { long8 lo, hi; }; -struct pair_long16 { long16 lo, hi; }; -#endif - -struct pair_float { float lo, hi; }; -struct pair_float2 { float2 lo, hi; }; -struct pair_float3 { float3 lo, hi; }; -struct pair_float4 { float4 lo, hi; }; -struct pair_float8 { float8 lo, hi; }; -struct pair_float16 { float16 lo, hi; }; - -#ifdef cl_khr_fp64 -struct pair_double { double lo, hi; }; -struct pair_double2 { double2 lo, hi; }; -struct pair_double3 { double3 lo, hi; }; -struct pair_double4 { double4 lo, hi; }; -struct pair_double8 { double8 lo, hi; }; -struct pair_double16 { double16 lo, hi; }; -#endif - - - -// Generic conversion function -template<typename A, typename B> -static B bitcast(A a) -{ - B b; - std::memcpy(&b, &a, std::min(sizeof a, sizeof b)); - if (sizeof b > sizeof a) { - std::memset((char*)&b + sizeof a, 0, sizeof b - sizeof a); - } - return b; -} |