summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@gmail.com>2013-08-15 09:41:47 -0600
committerErik Schnetter <schnetter@gmail.com>2013-08-15 09:41:47 -0600
commit7aaa6e4b35a3625039b2605e31ddfe7cd6f43286 (patch)
treeec8c8b6c23de6d381139cf0c570b1ddf74922172
parentaf4e5e29311f019d70d1d0a74b206ab234e1cff1 (diff)
downloadvecmathlib-7aaa6e4b35a3625039b2605e31ddfe7cd6f43286.zip
vecmathlib-7aaa6e4b35a3625039b2605e31ddfe7cd6f43286.tar.gz
Remove pocl subdirectory. This is now instead part of pocl.
-rw-r--r--pocl/cross.cl43
-rw-r--r--pocl/distance.cl49
-rw-r--r--pocl/dot.cl49
-rw-r--r--pocl/fast_distance.cl23
-rw-r--r--pocl/fast_length.cl26
-rw-r--r--pocl/fast_normalize.cl26
-rwxr-xr-xpocl/generate-files.py657
-rw-r--r--pocl/length.cl49
-rw-r--r--pocl/normalize.cl49
-rw-r--r--pocl/pocl-compat.h118
10 files changed, 0 insertions, 1089 deletions
diff --git a/pocl/cross.cl b/pocl/cross.cl
deleted file mode 100644
index 7b8e861..0000000
--- a/pocl/cross.cl
+++ /dev/null
@@ -1,43 +0,0 @@
-__attribute__((__overloadable__))
-float4 cross(float4 p0, float4 p1)
-{
- float4 r;
- r.x = p0.y*p1.z - p0.z*p1.y;
- r.y = p0.z*p1.x - p0.x*p1.z;
- r.z = p0.x*p1.y - p0.y*p1.x;
- r.w = 0.0f;
- return r;
-}
-
-__attribute__((__overloadable__))
-float3 cross(float3 p0, float3 p1)
-{
- float3 r;
- r.x = p0.y*p1.z - p0.z*p1.y;
- r.y = p0.z*p1.x - p0.x*p1.z;
- r.z = p0.x*p1.y - p0.y*p1.x;
- return r;
-}
-
-#ifdef cl_khr_fp64
-__attribute__((__overloadable__))
-double4 cross(double4 p0, double4 p1)
-{
- double4 r;
- r.x = p0.y*p1.z - p0.z*p1.y;
- r.y = p0.z*p1.x - p0.x*p1.z;
- r.z = p0.x*p1.y - p0.y*p1.x;
- r.w = 0.0f;
- return r;
-}
-
-__attribute__((__overloadable__))
-double3 cross(double3 p0, double3 p1)
-{
- double3 r;
- r.x = p0.y*p1.z - p0.z*p1.y;
- r.y = p0.z*p1.x - p0.x*p1.z;
- r.z = p0.x*p1.y - p0.y*p1.x;
- return r;
-}
-#endif
diff --git a/pocl/distance.cl b/pocl/distance.cl
deleted file mode 100644
index 5df8637..0000000
--- a/pocl/distance.cl
+++ /dev/null
@@ -1,49 +0,0 @@
-__attribute__((__overloadable__))
-float distance(float p0, float p1)
-{
- return length(p0-p1);
-}
-
-__attribute__((__overloadable__))
-float distance(float2 p0, float2 p1)
-{
- return length(p0-p1);
-}
-
-__attribute__((__overloadable__))
-float distance(float3 p0, float3 p1)
-{
- return length(p0-p1);
-}
-
-__attribute__((__overloadable__))
-float distance(float4 p0, float4 p1)
-{
- return length(p0-p1);
-}
-
-#ifdef cl_khr_fp64
-__attribute__((__overloadable__))
-double distance(double p0, double p1)
-{
- return length(p0-p1);
-}
-
-__attribute__((__overloadable__))
-double distance(double2 p0, double2 p1)
-{
- return length(p0-p1);
-}
-
-__attribute__((__overloadable__))
-double distance(double3 p0, double3 p1)
-{
- return length(p0-p1);
-}
-
-__attribute__((__overloadable__))
-double distance(double4 p0, double4 p1)
-{
- return length(p0-p1);
-}
-#endif
diff --git a/pocl/dot.cl b/pocl/dot.cl
deleted file mode 100644
index 91bb400..0000000
--- a/pocl/dot.cl
+++ /dev/null
@@ -1,49 +0,0 @@
-__attribute__((__overloadable__))
-float dot(float p0, float p1)
-{
- return p0*p1;
-}
-
-__attribute__((__overloadable__))
-float dot(float2 p0, float2 p1)
-{
- return p0.x*p1.x + p0.y*p1.y;
-}
-
-__attribute__((__overloadable__))
-float dot(float3 p0, float3 p1)
-{
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
-}
-
-__attribute__((__overloadable__))
-float dot(float4 p0, float4 p1)
-{
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
-}
-
-#ifdef cl_khr_fp64
-__attribute__((__overloadable__))
-double dot(double p0, double p1)
-{
- return p0*p1;
-}
-
-__attribute__((__overloadable__))
-double dot(double2 p0, double2 p1)
-{
- return p0.x*p1.x + p0.y*p1.y;
-}
-
-__attribute__((__overloadable__))
-double dot(double3 p0, double3 p1)
-{
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
-}
-
-__attribute__((__overloadable__))
-double dot(double4 p0, double4 p1)
-{
- return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
-}
-#endif
diff --git a/pocl/fast_distance.cl b/pocl/fast_distance.cl
deleted file mode 100644
index c2a7e9e..0000000
--- a/pocl/fast_distance.cl
+++ /dev/null
@@ -1,23 +0,0 @@
-__attribute__((__overloadable__))
-float fast_distance(float p0, float p1)
-{
- return fast_length(p0-p1);
-}
-
-__attribute__((__overloadable__))
-float fast_distance(float2 p0, float2 p1)
-{
- return fast_length(p0-p1);
-}
-
-__attribute__((__overloadable__))
-float fast_distance(float3 p0, float3 p1)
-{
- return fast_length(p0-p1);
-}
-
-__attribute__((__overloadable__))
-float fast_distance(float4 p0, float4 p1)
-{
- return fast_length(p0-p1);
-}
diff --git a/pocl/fast_length.cl b/pocl/fast_length.cl
deleted file mode 100644
index eb765b9..0000000
--- a/pocl/fast_length.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-// Note: Chapter 6.12.5 of the OpenCL standard says to use half_sqrt,
-// not fast_sqrt
-
-__attribute__((__overloadable__))
-float fast_length(float p)
-{
- return half_sqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-float fast_length(float2 p)
-{
- return half_sqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-float fast_length(float3 p)
-{
- return half_sqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-float fast_length(float4 p)
-{
- return half_sqrt(dot(p, p));
-}
diff --git a/pocl/fast_normalize.cl b/pocl/fast_normalize.cl
deleted file mode 100644
index ecdd524..0000000
--- a/pocl/fast_normalize.cl
+++ /dev/null
@@ -1,26 +0,0 @@
-// Note: Chapter 6.12.5 of the OpenCL standard says to use half_rsqrt,
-// not fast_rsqrt
-
-__attribute__((__overloadable__))
-float fast_normalize(float p)
-{
- return p * half_rsqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-float2 fast_normalize(float2 p)
-{
- return p * half_rsqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-float3 fast_normalize(float3 p)
-{
- return p * half_rsqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-float4 fast_normalize(float4 p)
-{
- return p * half_rsqrt(dot(p, p));
-}
diff --git a/pocl/generate-files.py b/pocl/generate-files.py
deleted file mode 100755
index b089334..0000000
--- a/pocl/generate-files.py
+++ /dev/null
@@ -1,657 +0,0 @@
-#! /usr/bin/env python
-
-import re, sys
-
-
-
-# Types:
-SI = "SI" # int/long
-SK = "SK" # int (even for double)
-SF = "SF" # float/double
-VB = "VB" # boolN
-VI = "VI" # intN/longN
-VJ = "VJ" # intN/longN (except int1 for double1)
-VK = "VK" # intN (even for doubleN)
-VU = "VU" # uintN/ulongN
-VF = "VF" # floatN/doubleN
-PVK = "PVK" # pointer to VK
-PVF = "PVF" # pointer to VF
-
-# Each function is described by a tuple with the following entries:
-# 1. name
-# 2. external argument types (see above)
-# 3. external return type
-# 4. vecmathlib argument types (see above)
-# 5. vecmathlib return type
-# This allows generating externally visible functions with different
-# signatures, e.g. to support OpenCL.
-vmlfuncs = [
- # Section 6.12.2
- ("acos" , [VF ], VF, [VF ], VF),
- ("acosh" , [VF ], VF, [VF ], VF),
- ("asin" , [VF ], VF, [VF ], VF),
- ("asinh" , [VF ], VF, [VF ], VF),
- ("atan" , [VF ], VF, [VF ], VF),
- ("atanh" , [VF ], VF, [VF ], VF),
- ("cbrt" , [VF ], VF, [VF ], VF),
- ("ceil" , [VF ], VF, [VF ], VF),
- ("copysign" , [VF, VF ], VF, [VF, VF ], VF),
- ("cos" , [VF ], VF, [VF ], VF),
- ("cosh" , [VF ], VF, [VF ], VF),
- ("exp" , [VF ], VF, [VF ], VF),
- ("exp2" , [VF ], VF, [VF ], VF),
- ("exp10" , [VF ], VF, [VF ], VF),
- ("expm1" , [VF ], VF, [VF ], VF),
- ("fabs" , [VF ], VF, [VF ], VF),
- ("fdim" , [VF, VF ], VF, [VF, VF ], VF),
- ("floor" , [VF ], VF, [VF ], VF),
- ("fma" , [VF, VF, VF], VF, [VF, VF, VF], VF),
- ("fmax" , [VF, VF ], VF, [VF, VF ], VF),
- ("fmin" , [VF, VF ], VF, [VF, VF ], VF),
- ("fmod" , [VF, VF ], VF, [VF, VF ], VF),
- ("hypot" , [VF, VF ], VF, [VF, VF ], VF),
- ("ilogb_" , [VF ], VJ, [VF ], VI), # should return VK
- ("ldexp_" , [VF, VJ ], VF, [VF, VI ], VF), # should take VK
- ("ldexp_" , [VF, SK ], VF, [VF, SI ], VF), # should take VK
- ("log" , [VF ], VF, [VF ], VF),
- ("log2" , [VF ], VF, [VF ], VF),
- ("log10" , [VF ], VF, [VF ], VF),
- ("log1p" , [VF ], VF, [VF ], VF),
- ("pow" , [VF, VF ], VF, [VF, VF ], VF),
- ("remainder", [VF, VF ], VF, [VF, VF ], VF),
- ("rint" , [VF ], VF, [VF ], VF),
- ("round" , [VF ], VF, [VF ], VF),
- ("rsqrt" , [VF ], VF, [VF ], VF),
- ("sin" , [VF ], VF, [VF ], VF),
- ("sinh" , [VF ], VF, [VF ], VF),
- ("sqrt" , [VF ], VF, [VF ], VF),
- ("tan" , [VF ], VF, [VF ], VF),
- ("tanh" , [VF ], VF, [VF ], VF),
- ("trunc" , [VF ], VF, [VF ], VF),
-
- # Section 6.12.6
- ("isfinite" , [VF ], VJ, [VF ], VB),
- ("isinf" , [VF ], VJ, [VF ], VB),
- ("isnan" , [VF ], VJ, [VF ], VB),
- ("isnormal" , [VF ], VJ, [VF ], VB),
- ("signbit" , [VF ], VJ, [VF ], VB),
- ]
-
-directfuncs = [
- # Section 6.12.2
- ("acospi" , [VF ], VF, "acos(x0)/(scalar_t)M_PI"),
- ("asinpi" , [VF ], VF, "asin(x0)/(scalar_t)M_PI"),
- ("atanpi" , [VF ], VF, "atan(x0)/(scalar_t)M_PI"),
- ("atan2" , [VF, VF ], VF, "({ vector_t a=atan(x0/x1); x1>(scalar_t)0.0f ? a : x1<(scalar_t)0.0f ? a+copysign((scalar_t)M_PI,x0) : copysign((scalar_t)M_PI_2,x0); })"),
- ("atan2pi" , [VF, VF ], VF, "atan2(x0,x1)/(scalar_t)M_PI"),
- ("cospi" , [VF ], VF, "cos((scalar_t)M_PI*x0)"),
- ("fmax" , [VF, SF ], VF, "fmax(x0,(vector_t)x1)"),
- ("fmin" , [VF, SF ], VF, "fmin(x0,(vector_t)x1)"),
- ("fract" , [VF, PVF ], VF, "*x1=floor(x0), fmin(x0-floor(x0), sizeof(scalar_t)==sizeof(float) ? (scalar_t)POCL_FRACT_MIN_F : (scalar_t)POCL_FRACT_MIN)"),
- ("frexp" , [VF, PVK ], VF, "*x1=ilogb(x0), ldexp(x0,-ilogb(x0))"),
- ("ilogb" , [VF ], VK, "convert_kvector_t(({ __attribute__((__overloadable__)) jvector_t ilogb_(vector_t); jvector_t jmin=sizeof(jvector_t)==sizeof(int)?INT_MIN:LONG_MIN; jvector_t r=ilogb_(x0); select(r, (jvector_t)FP_ILOGB0, r==jmin); }))"),
- ("ldexp" , [VF, VK ], VF, "({ __attribute__((__overloadable__)) vector_t ldexp_(vector_t,jvector_t); ldexp_(x0,convert_ivector_t(x1)); })"),
- ("ldexp" , [VF, SK ], VF, "({ __attribute__((__overloadable__)) vector_t ldexp_(vector_t,kscalar_t); ldexp_(x0,(kscalar_t)x1); })"),
- ("logb" , [VF ], VF, "convert_vector_t(ilogb(x0))"),
- ("mad" , [VF, VF, VF ], VF, "fma(x0,x1,x2)"),
- ("maxmag" , [VF, VF ], VF, "fabs(x0)>fabs(x1) ? x0 : fabs(x1)>fabs(x0) ? x1 : fmax(x0,x1)"),
- ("minmag" , [VF, VF ], VF, "fabs(x0)<fabs(x1) ? x0 : fabs(x1)<fabs(x0) ? x1 : fmin(x0,x1)"),
- ("modf" , [VF, PVF ], VF, "*x1=trunc(x0), copysign(x0-trunc(x0),x0)"),
- ("nan" , [VU ], VF, "(scalar_t)0.0f/(scalar_t)0.0f"),
- ("pown" , [VF, VK ], VF, "pow(x0,convert_vector_t(x1))"),
- ("powr" , [VF, VF ], VF, "pow(x0,x1)"),
- ("remquo" , [VF, VF, PVK], VF, "({ vector_t k=rint(x0/x1); *x2=(convert_kvector_t(k)&0x7f)*(1-2*convert_kvector_t(signbit(k))); x0-k*x1; })"),
- ("rootn" , [VF, VK ], VF, "pow(x0,(scalar_t)1.0f/convert_vector_t(x1))"),
- ("sincos" , [VF, PVF ], VF, "*x1=cos(x0), sin(x0)"),
- ("sinpi" , [VF ], VF, "sin((scalar_t)M_PI*x0)"),
- ("tanpi" , [VF ], VF, "tan((scalar_t)M_PI*x0)"),
-
- # Section 6.12.2, half_ functions
- ("half_cos" , [VF ], VF, "cos(x0)"),
- ("half_divide" , [VF, VF ], VF, "x0/x1"),
- ("half_exp" , [VF ], VF, "exp(x0)"),
- ("half_exp2" , [VF ], VF, "exp2(x0)"),
- ("half_exp10" , [VF ], VF, "exp10(x0)"),
- ("half_log" , [VF ], VF, "log(x0)"),
- ("half_log2" , [VF ], VF, "log2(x0)"),
- ("half_log10" , [VF ], VF, "log10(x0)"),
- ("half_powr" , [VF, VF ], VF, "powr(x0,x1)"),
- ("half_recip" , [VF ], VF, "(scalar_t)1.0f/x0"),
- ("half_rsqrt" , [VF ], VF, "rsqrt(x0)"),
- ("half_sin" , [VF ], VF, "sin(x0)"),
- ("half_sqrt" , [VF ], VF, "sqrt(x0)"),
- ("half_tan" , [VF ], VF, "tan(x0)"),
- # Section 6.12.2, native_ functions
- ("native_cos" , [VF ], VF, "cos(x0)"),
- ("native_divide" , [VF, VF ], VF, "x0/x1"),
- ("native_exp" , [VF ], VF, "exp(x0)"),
- ("native_exp2" , [VF ], VF, "exp2(x0)"),
- ("native_exp10" , [VF ], VF, "exp10(x0)"),
- ("native_log" , [VF ], VF, "log(x0)"),
- ("native_log2" , [VF ], VF, "log2(x0)"),
- ("native_log10" , [VF ], VF, "log10(x0)"),
- ("native_powr" , [VF, VF ], VF, "powr(x0,x1)"),
- ("native_recip" , [VF ], VF, "(scalar_t)1.0f/x0"),
- ("native_rsqrt" , [VF ], VF, "rsqrt(x0)"),
- ("native_sin" , [VF ], VF, "sin(x0)"),
- ("native_sqrt" , [VF ], VF, "sqrt(x0)"),
- ("native_tan" , [VF ], VF, "tan(x0)"),
-
- # Section 6.12.4
- ("clamp" , [VF, VF, VF ], VF, "fmin(fmax(x0,x1),x2)"),
- ("clamp" , [VF, SF, SF ], VF, "fmin(fmax(x0,x1),x2)"),
- ("degrees" , [VF ], VF, "(scalar_t)(180/M_PI)*x0"),
- ("max" , [VF, VF ], VF, "fmax(x0,x1)"),
- ("max" , [VF, SF ], VF, "fmax(x0,x1)"),
- ("min" , [VF, VF ], VF, "fmin(x0,x1)"),
- ("min" , [VF, SF ], VF, "fmin(x0,x1)"),
- ("mix" , [VF, VF, VF ], VF, "x0+(x1-x0)*x2"),
- ("mix" , [VF, VF, SF ], VF, "x0+(x1-x0)*x2"),
- ("radians" , [VF ], VF, "(scalar_t)(M_PI/180)*x0"),
- ("step" , [VF, VF ], VF, "x1<x0 ? (vector_t)(scalar_t)0.0f : (vector_t)(scalar_t)1.0f"),
- ("step" , [SF, VF ], VF, "x1<x0 ? (vector_t)(scalar_t)0.0f : (vector_t)(scalar_t)1.0f"),
- ("smoothstep" , [VF, VF, VF ], VF, "({ vector_t t = clamp((x2-x0)/(x1-x0), (scalar_t)0.0f, (scalar_t)1.0f); t*t*((scalar_t)3.0-(scalar_t)2.0*t); })"),
- ("smoothstep" , [SF, SF, VF ], VF, "({ vector_t t = clamp((x2-x0)/(x1-x0), (scalar_t)0.0f, (scalar_t)1.0f); t*t*((scalar_t)3.0-(scalar_t)2.0*t); })"),
- ("sign" , [VF ], VF, "copysign(x0!=(scalar_t)0.0f ? (vector_t)(scalar_t)1.0f : (vector_t)(scalar_t)0.0f,x0)"),
-
- # Section 6.12.6
- ("isequal" , [VF, VF ], VJ, "x0==x1"),
- ("isnotequal" , [VF, VF ], VJ, "x0!=x1"),
- ("isgreater" , [VF, VF ], VJ, "x0>x1"),
- ("isgreaterequal", [VF, VF ], VJ, "x0>=x1"),
- ("isless" , [VF, VF ], VJ, "x0<x1"),
- ("islessequal" , [VF, VF ], VJ, "x0<=x1"),
- ("islessgreater" , [VF, VF ], VJ, "x0<x1 || x0>x1"),
- ("isordered" , [VF, VF ], VJ, "!isunordered(x0,x1)"),
- ("isunordered" , [VF, VF ], VJ, "isnan(x0) || isnan(x1)"),
-]
-
-# Missing functions from 6.12.2: erfc, erf, lgamma, lgamma_r,
-# nextafter, tgamma
-
-# Unchecked: 6.12.3 (integer functions)
-
-# Missing functions from 6.12.6 (relational functions): any, all,
-# bitselect, select
-
-# Unchecked: 6.12.7 (vector data load and store functions)
-
-# Unchecked: 6.12.12 (miscellaneous vector functions)
-
-
-
-# This is always prepended to the generated function names.
-func_prefix = "_cl_"
-
-# Some of the functions need prefixes to avoid using the C standard
-# library ones.
-masked_functions = [
- "acos",
- "asin",
- "atan",
- "atan2",
- "ceil",
- "copysign",
- "cos",
- "exp",
- "exp2",
- "fabs",
- "floor",
- "fma",
- "fmax",
- "fmin",
- "log",
- "log2",
- "pow",
- "rint",
- "round",
- "sin",
- "sqrt",
- "tan",
- "trunc",
-]
-
-# This is prepended to masked function names.
-mask_prefix = ""
-
-def prefixed(name):
- if name in masked_functions: name = mask_prefix + name
- return func_prefix + name
-
-
-
-outfile = None
-outfile_did_truncate = set()
-def out(str): outfile.write("%s\n" % str)
-def out_open(name):
- global outfile
- global outfile_did_truncate
- if outfile: raise "file already open"
- is_first_open = name not in outfile_did_truncate
- if is_first_open:
- outfile = open(name, "w")
- outfile.close()
- outfile_did_truncate.add(name)
- print name,
- sys.stdout.flush()
- outfile = open(name, "a")
- return is_first_open
-def out_close():
- global outfile
- outfile.close()
- outfile = None
-
-declfile = None
-def decl(str):
- if str=="" or str.startswith("//") or str.startswith("#"):
- declfile.write("%s\n" % str)
- else:
- declfile.write("__attribute__((__overloadable__)) %s;\n" % str)
-def decl_open(name):
- global declfile
- declfile = open(name, "w")
-def decl_close():
- global declfile
- declfile.close()
- declfile = None
-
-
-
-def mktype(tp, vectype):
- (space, basetype, sizename) = (
- re.match("(global|local|private)?(float|double)([0-9]*)", vectype).
- groups())
- size = 1 if sizename=="" else int(sizename)
- if tp==SK:
- if size==1: return "int"
- return "int" if basetype=="float" else "long"
- if tp==SF:
- return basetype
- if tp==VI:
- ibasetype = "int" if basetype=="float" else "long"
- return "%s%s" % (ibasetype, sizename)
- if tp==VJ:
- if size==1: return "int"
- ibasetype = "int" if basetype=="float" else "long"
- return "%s%s" % (ibasetype, sizename)
- if tp==VK:
- return "int%s" % sizename
- if tp==PVK:
- if space=="": raise "wrong address space"
- return "%s int%s*" % (space, sizename)
- if tp==VU:
- ibasetype = "uint" if basetype=="float" else "ulong"
- return "%s%s" % (ibasetype, sizename)
- if tp==VF:
- return "%s%s" % (basetype, sizename)
- if tp==PVF:
- if space=="": raise "wrong address space"
- return "%s %s%s*" % (space, basetype, sizename)
- raise "unreachable"
-
-def mkvmltype(tp, vectype):
- if tp==SI: return vectype+"::int_t"
- if tp==SF: return vectype+"::real_t"
- if tp==VB: return vectype+"::boolvec_t"
- if tp in (VI,VJ): return vectype+"::intvec_t"
- if tp==VF: return vectype
- raise "unreachable"
-
-
-
-def output_vmlfunc_vml(func, vectype):
- (name, args, ret, vmlargs, vmlret) = func
- out("// Implement %s by calling vecmathlib" % name)
- (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups()
- size = 1 if size=="" else int(size)
- vmltype = "vecmathlib::realvec<%s,%d>" % (basetype, size)
- vmlinttype = "%s::intvec_t" % vmltype
- vmlbooltype = "%s::boolvec_t" % vmltype
- funcargstr = ", ".join(map(lambda (n, arg):
- "%s x%d" % (mktype(arg, vectype), n),
- zip(range(0, 100), args)))
- funcretstr = mktype(ret, vectype)
- decl("%s %s(%s)" % (funcretstr, prefixed(name), funcargstr))
- out("%s %s(%s)" % (funcretstr, prefixed(name), funcargstr))
- out("{")
- for (n, arg, vmlarg) in zip(range(0, 100), args, vmlargs):
- out(" %s y%d = bitcast<%s,%s >(x%d);" %
- (mkvmltype(vmlarg, vmltype), n,
- mktype(arg, vectype), mkvmltype(vmlarg, vmltype), n))
- callargstr = ", ".join(map(lambda (n, arg): "y%d" % n,
- zip(range(0, 100), args)))
- callretstr = mkvmltype(vmlret, vmltype)
- name1 = name[:-1] if name.endswith("_") else name
- out(" %s r = vecmathlib::%s(%s);" % (callretstr, name1, callargstr))
- # We may need to convert from the VML type to the OpenCL type
- # before bitcasting. This may be a real conversion, e.g. bool to
- # int. This may also involve a change in size (e.g. long to int),
- # but only if the type is scalar. These conversions are applied
- # before bitcasting.
- # convfunc: conversion function to call
- # convtype: result type of conversion, also input to bitcast
- # bitcasttype: output of bitcast; may differ from function result
- # if a size change is needed
- # TODO: Why is this here, and not e.g. near the signbit definition
- # in the table above?
- if vmlret==ret:
- convfunc = ""
- convtype = callretstr
- bitcasttype = funcretstr
- else:
- if vmlret==VI and ret in (VJ,VK):
- convfunc = ""
- convtype = callretstr
- elif vmlret==VB and ret in (VJ,VK):
- if size==1:
- # for scalars, true==+1
- convfunc = "vecmathlib::convert_int"
- else:
- # for vectors, true==-1
- convfunc = "-vecmathlib::convert_int"
- convtype = vmlinttype
- else:
- raise "missing"
- if ret in (VJ,VK):
- bitcasttype = mktype(VI, vectype)
- else:
- raise "missing"
- out(" return bitcast<%s,%s>(%s(r));" % (convtype, bitcasttype, convfunc))
- out("}")
-
-def output_vmlfunc_libm(func, vectype):
- (name, args, ret, vmlargs, vmlret) = func
- out("// Implement %s by calling libm" % name)
- (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups()
- size = 1 if size=="" else int(size)
- othertype = "vecmathlib::realpseudovec<%s,%d>" % (basetype, size)
- otherinttype = "%s::intvec_t" % othertype
- funcargstr = ", ".join(map(lambda (n, arg):
- "%s x%d" % (mktype(arg, vectype), n),
- zip(range(0, 100), args)))
- funcretstr = mktype(ret, vectype)
- decl("%s %s(%s)" % (funcretstr, prefixed(name), funcargstr))
- out("%s %s(%s)" % (funcretstr, prefixed(name), funcargstr))
- out("{")
- for (n, arg) in zip(range(0, 100), args):
- out(" %s y%d = x%d;" % (mkvmltype(arg, othertype), n, n))
- callargstr = ", ".join(map(lambda (n, arg): "y%d" % n,
- zip(range(0, 100), args)))
- # callretstr = othertype if ret==VF else otherinttype
- callretstr = mkvmltype(vmlret, othertype)
- name1 = name[:-1] if name.endswith("_") else name
- out(" %s r = %s(%s);" % (callretstr, name1, callargstr))
- # We may need to convert from the VML type to the OpenCL type
- # before bitcasting. This may be a real conversion, e.g. bool to
- # int. This may also involve a change in size (e.g. long to int),
- # but only if the type is scalar. These conversions are applied
- # before bitcasting.
- # convfunc: conversion function to call
- # convtype: result type of conversion, also input to bitcast
- # bitcasttype: output of bitcast; may differ from function result
- # if a size change is needed
- # TODO: Why is this here, and not e.g. near the signbit definition
- # in the table above?
- if vmlret==ret:
- convfunc = ""
- else:
- if vmlret==VI and ret in (VJ,VK):
- convfunc = ""
- elif vmlret==VB and ret in (VJ,VK):
- if size==1:
- # for scalars, true==+1
- convfunc = "vecmathlib::convert_int"
- else:
- # for vectors, true==-1
- convfunc = "-vecmathlib::convert_int"
- else:
- raise "missing"
- out(" return %s(r)[0];" % convfunc)
- out("}")
-
-def output_vmlfunc_upcast(func, vectype):
- (name, args, ret, vmlargs, vmlret) = func
- out("// Implement %s by using a larger vector size" % name)
- (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups()
- size = 1 if size=="" else int(size)
- size2 = 4 if size==3 else size*2 # next power of 2
- size2 = "" if size2==1 else str(size2)
- if size==1: raise "can't upcast scalars"
- othertype = "%s%s" % (basetype, size2)
- declargstr = ", ".join(map(lambda (n, arg): "%s" % mktype(arg, othertype),
- zip(range(0, 100), args)))
- out("%s %s(%s);" % (mktype(ret, othertype), prefixed(name), declargstr))
- funcargstr = ", ".join(map(lambda (n, arg):
- "%s x%d" % (mktype(arg, vectype), n),
- zip(range(0, 100), args)))
- decl("%s %s(%s)" % (mktype(ret, vectype), prefixed(name), funcargstr))
- out("%s %s(%s)" % (mktype(ret, vectype), prefixed(name), funcargstr))
- out("{")
- for (n, arg) in zip(range(0, 100), args):
- out(" %s y%d = bitcast<%s,%s>(x%d);" %
- (mktype(arg, othertype), n,
- mktype(arg, vectype), mktype(arg, othertype), n))
- callargstr = ", ".join(map(lambda (n, arg): "y%d" % n,
- zip(range(0, 100), args)))
- out(" %s r = %s(%s);" %
- (mktype(ret, othertype), prefixed(name), callargstr))
- out(" return bitcast<%s,%s>(r);" %
- (mktype(ret, othertype), mktype(ret, vectype)))
- out("}")
-
-def output_vmlfunc_split(func, vectype):
- (name, args, ret, vmlargs, vmlret) = func
- out("// Implement %s by splitting into a smaller vector size" % name)
- (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups()
- size = 1 if size=="" else int(size)
- size2 = (size+1) / 2 # divide by 2, rounding up
- size2 = "" if size2==1 else str(size2)
- othertype = "%s%s" % (basetype, size2)
- declargstr = ", ".join(map(lambda (n, arg): "%s" % mktype(arg, othertype),
- zip(range(0, 100), args)))
- out("%s %s(%s);" % (mktype(ret, othertype), prefixed(name), declargstr))
- funcargstr = ", ".join(map(lambda (n, arg):
- "%s x%d" % (mktype(arg, vectype), n),
- zip(range(0, 100), args)))
- decl("%s %s(%s)" % (mktype(ret, vectype), prefixed(name), funcargstr))
- out("%s %s(%s)" % (mktype(ret, vectype), prefixed(name), funcargstr))
- out("{")
- if ret in (SF, SK):
- split_ret = SF
- elif ret in (VI, VJ, VK):
- split_ret = VI
- elif ret in (VF):
- split_ret = VF
- else:
- raise "missing"
- for (n, arg) in zip(range(0, 100), args):
- out(" pair_%s y%d = bitcast<%s,pair_%s>(x%d);" %
- (mktype(arg, othertype), n,
- mktype(arg, vectype), mktype(arg, othertype), n))
- out(" pair_%s r;" % mktype(split_ret, othertype))
- # in OpenCL: for scalars, true==+1, but for vectors, true==-1
- conv = ""
- if vmlret==VB:
- if ret in (VJ,VK):
- if size2=="":
- conv = "-"
- else:
- raise "missing"
- for suffix in ("lo", "hi"):
- callargstr = ", ".join(map(lambda (n, arg): "y%d.%s" % (n, suffix),
- zip(range(0, 100), args)))
- out(" r.%s = %s%s(%s);" % (suffix, conv, prefixed(name), callargstr))
- out(" pocl_static_assert(sizeof(pair_%s) == sizeof(%s));" %
- (mktype(split_ret, othertype), mktype(ret, vectype)))
- out(" return bitcast<pair_%s,%s>(r);" %
- (mktype(split_ret, othertype), mktype(ret, vectype)))
- out("}")
-
-
-
-def output_directfunc_direct(func, vectype):
- (name, args, ret, impl) = func
- out("// Implement %s directly" % name)
- (space, basetype, sizename) = (
- re.match("(global|local|private)?(float|double)([0-9]*)", vectype).
- groups())
- size = 1 if sizename=="" else int(sizename)
- funcargstr = ", ".join(map(lambda (n, arg):
- "%s x%d" % (mktype(arg, vectype), n),
- zip(range(0, 100), args)))
- funcretstr = mktype(ret, vectype)
- decl("%s %s(%s)" % (funcretstr, prefixed(name), funcargstr))
- out("__attribute__((__overloadable__))");
- out("%s %s(%s)" % (funcretstr, prefixed(name), funcargstr))
- out("{")
- out(" typedef %s kscalar_t;" % mktype(SK, vectype))
- out(" typedef %s scalar_t;" % mktype(SF, vectype))
- out(" typedef %s ivector_t;" % mktype(VI, vectype))
- out(" typedef %s jvector_t;" % mktype(VJ, vectype))
- out(" typedef %s kvector_t;" % mktype(VK, vectype))
- out(" typedef %s vector_t;" % mktype(VF, vectype))
- out("#define convert_ivector_t convert_%s" % mktype(VI, vectype))
- out("#define convert_jvector_t convert_%s" % mktype(VJ, vectype))
- out("#define convert_kvector_t convert_%s" % mktype(VK, vectype))
- out("#define convert_vector_t convert_%s" % mktype(VF, vectype))
- out(" return %s;" % impl)
- out("#undef convert_ivector_t")
- out("#undef convert_jvector_t")
- out("#undef convert_kvector_t")
- out("#undef convert_vector_t")
- out("}")
-
-
-
-def output_vmlfunc(func):
- (name, args, ret, vmlargs, vmlret) = func
- is_first_open = out_open("%s.cc" % name)
- if is_first_open:
- out("// Note: This file has been automatically generated. Do not modify.")
- out("")
- out("#include \"pocl-compat.h\"")
- out("")
- else:
- out("")
- out("")
- out("")
- decl("")
- decl("// %s: %s -> %s" % (name, args, ret))
- decl("#undef %s" % name)
- if prefixed(name) != name:
- decl("#define %s %s" % (name, prefixed(name)))
- out("// %s: %s -> %s" % (name, args, ret))
- for basetype in ["float", "double"]:
- if basetype=="double":
- out("")
- out("#ifdef cl_khr_fp64")
- for size in [1, 2, 3, 4, 8, 16]:
- # Ignore this prototype for size==1 if there are any
- # scalar arguments; this prevents duplicate definitions
- if size==1 and any(map(lambda arg: arg in (SI, SK, SF), args)):
- continue
- sizename = '' if size==1 else str(size)
- vectype = basetype + sizename
- # always use vecmathlib if available
- out("")
- out("// %s: VF=%s" % (name, vectype))
- out("#if defined VECMATHLIB_HAVE_VEC_%s_%d" %
- (basetype.upper(), size))
- output_vmlfunc_vml(func, vectype)
- if size==1:
- # a scalar type: use libm
- out("#else")
- output_vmlfunc_libm(func, vectype)
- else:
- # a vector type: try upcasting to next power of 2
- size2 = 4 if size==3 else size*2
- out("#elif defined VECMATHLIB_HAVE_VEC_%s_%d" %
- (basetype.upper(), size2))
- output_vmlfunc_upcast(func, vectype)
- # a vector type: split into smaller vector type
- out("#else")
- output_vmlfunc_split(func, vectype)
- out("#endif")
- if basetype=="double":
- out("")
- out("#endif // #ifdef cl_khr_fp64")
- out_close()
-
-
-
-def output_directfunc(func):
- (name, args, ret, impl) = func
- is_first_open = out_open("%s.cl" % name)
- if is_first_open:
- out("// Note: This file has been automatically generated. Do not modify.")
- out("")
- out("// Needed for fract()")
- out("#define POCL_FRACT_MIN 0x1.fffffffffffffp-1")
- out("#define POCL_FRACT_MIN_F 0x1.fffffep-1f")
- out("")
- out("// If double precision is not supported, then define")
- out("// single-precision (dummy) values to avoid compiler warnings")
- out("// for double precision values")
- out("#ifndef khr_fp64")
- out("# undef M_PI")
- out("# define M_PI M_PI_F")
- out("# undef M_PI_2")
- out("# define M_PI_2 M_PI_2_F")
- out("# undef LONG_MAX")
- out("# define LONG_MAX INT_MAX")
- out("# undef LONG_MIN")
- out("# define LONG_MIN INT_MIN")
- out("# undef POCL_FRACT_MIN")
- out("# define POCL_FRACT_MIN POCL_FRACT_MIN_F")
- out("#endif")
- out("")
- else:
- out("")
- out("")
- out("")
- decl("")
- decl("// %s: %s -> %s" % (name, args, ret))
- decl("#undef %s" % name)
- if prefixed(name) != name:
- decl("#define %s %s" % (name, prefixed(name)))
- out("// %s: %s -> %s" % (name, args, ret))
- if any(map(lambda arg: arg in (PVK, PVF), args)):
- spaces = ["global", "local", "private"]
- else:
- spaces = [""]
- for basetype in ["float", "double"]:
- if ((name.startswith("half_") or name.startswith("native_")) and
- basetype=="double"):
- continue
- if basetype=="double":
- out("")
- out("#ifdef cl_khr_fp64")
- for size in [1, 2, 3, 4, 8, 16]:
- # Ignore this prototype for size==1 if there are any
- # scalar arguments; this prevents duplicate definitions
- if size==1 and any(map(lambda arg: arg in (SI, SK, SF), args)):
- continue
- sizename = '' if size==1 else str(size)
- for space in spaces:
- vectype = space + basetype + sizename
- # always use vecmathlib if available
- out("")
- out("// %s: VF=%s" % (name, vectype))
- output_directfunc_direct(func, vectype)
- if basetype=="double":
- out("")
- out("#endif // #ifdef cl_khr_fp64")
- out_close()
-
-
-
-decl_open("kernel-vecmathlib.h")
-decl("// Note: This file has been automatically generated. Do not modify.")
-decl("#ifndef KERNEL_VECMATHLIB_H")
-decl("#define KERNEL_VECMATHLIB_H 1")
-map(output_vmlfunc, vmlfuncs)
-map(output_directfunc, directfuncs)
-decl("")
-decl("#endif // #ifndef KERNEL_VECMATHLIB_H")
-decl_close()
-print
diff --git a/pocl/length.cl b/pocl/length.cl
deleted file mode 100644
index 9715e59..0000000
--- a/pocl/length.cl
+++ /dev/null
@@ -1,49 +0,0 @@
-__attribute__((__overloadable__))
-float length(float p)
-{
- return sqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-float length(float2 p)
-{
- return sqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-float length(float3 p)
-{
- return sqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-float length(float4 p)
-{
- return sqrt(dot(p, p));
-}
-
-#ifdef cl_khr_fp64
-__attribute__((__overloadable__))
-double length(double p)
-{
- return sqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-double length(double2 p)
-{
- return sqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-double length(double3 p)
-{
- return sqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-double length(double4 p)
-{
- return sqrt(dot(p, p));
-}
-#endif
diff --git a/pocl/normalize.cl b/pocl/normalize.cl
deleted file mode 100644
index e033567..0000000
--- a/pocl/normalize.cl
+++ /dev/null
@@ -1,49 +0,0 @@
-__attribute__((__overloadable__))
-float normalize(float p)
-{
- return p * rsqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-float2 normalize(float2 p)
-{
- return p * rsqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-float3 normalize(float3 p)
-{
- return p * rsqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-float4 normalize(float4 p)
-{
- return p * rsqrt(dot(p, p));
-}
-
-#ifdef cl_khr_fp64
-__attribute__((__overloadable__))
-double normalize(double p)
-{
- return p * rsqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-double2 normalize(double2 p)
-{
- return p * rsqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-double3 normalize(double3 p)
-{
- return p * rsqrt(dot(p, p));
-}
-
-__attribute__((__overloadable__))
-double4 normalize(double4 p)
-{
- return p * rsqrt(dot(p, p));
-}
-#endif
diff --git a/pocl/pocl-compat.h b/pocl/pocl-compat.h
deleted file mode 100644
index 10dc281..0000000
--- a/pocl/pocl-compat.h
+++ /dev/null
@@ -1,118 +0,0 @@
-// -*-C++-*- Compatibility layer to help instantiante functions to
-// create a library that can be called from elsewhere
-
-
-
-// Make things go fast (and debugging difficult...)
-#define VML_NODEBUG
-#include "../vecmathlib.h"
-
-#include <algorithm>
-#include <cstring>
-
-#define pocl_static_assert(b) typedef char _static_assert[(b)?+1:-1]
-
-// If double precision is not supported, then define single-precision
-// (dummy) values to avoid compiler warnings for double precision
-// values
-#ifndef cl_khr_fp64
-# undef M_PI
-# define M_PI M_PI_F
-#endif
-
-
-
-// Define vector types
-
-#define int std::int32_t
-typedef int int2 __attribute__((__ext_vector_type__( 2)));
-typedef int int3 __attribute__((__ext_vector_type__( 3)));
-typedef int int4 __attribute__((__ext_vector_type__( 4)));
-typedef int int8 __attribute__((__ext_vector_type__( 8)));
-typedef int int16 __attribute__((__ext_vector_type__(16)));
-
-#define uint std::uint32_t
-typedef uint uint2 __attribute__((__ext_vector_type__( 2)));
-typedef uint uint3 __attribute__((__ext_vector_type__( 3)));
-typedef uint uint4 __attribute__((__ext_vector_type__( 4)));
-typedef uint uint8 __attribute__((__ext_vector_type__( 8)));
-typedef uint uint16 __attribute__((__ext_vector_type__(16)));
-
-#ifdef cles_khr_int64
-#define long std::int64_t
-typedef long long2 __attribute__((__ext_vector_type__( 2)));
-typedef long long3 __attribute__((__ext_vector_type__( 3)));
-typedef long long4 __attribute__((__ext_vector_type__( 4)));
-typedef long long8 __attribute__((__ext_vector_type__( 8)));
-typedef long long16 __attribute__((__ext_vector_type__(16)));
-
-#define ulong std::uint64_t
-typedef ulong ulong2 __attribute__((__ext_vector_type__( 2)));
-typedef ulong ulong3 __attribute__((__ext_vector_type__( 3)));
-typedef ulong ulong4 __attribute__((__ext_vector_type__( 4)));
-typedef ulong ulong8 __attribute__((__ext_vector_type__( 8)));
-typedef ulong ulong16 __attribute__((__ext_vector_type__(16)));
-#endif
-
-typedef float float2 __attribute__((__ext_vector_type__( 2)));
-typedef float float3 __attribute__((__ext_vector_type__( 3)));
-typedef float float4 __attribute__((__ext_vector_type__( 4)));
-typedef float float8 __attribute__((__ext_vector_type__( 8)));
-typedef float float16 __attribute__((__ext_vector_type__(16)));
-
-#ifdef cl_khr_fp64
-typedef double double2 __attribute__((__ext_vector_type__( 2)));
-typedef double double3 __attribute__((__ext_vector_type__( 3)));
-typedef double double4 __attribute__((__ext_vector_type__( 4)));
-typedef double double8 __attribute__((__ext_vector_type__( 8)));
-typedef double double16 __attribute__((__ext_vector_type__(16)));
-#endif
-
-
-
-// Declare pair types for assembling/disassembling vectors
-struct pair_int { int lo, hi; };
-struct pair_int2 { int2 lo, hi; };
-struct pair_int3 { int3 lo, hi; };
-struct pair_int4 { int4 lo, hi; };
-struct pair_int8 { int8 lo, hi; };
-struct pair_int16 { int16 lo, hi; };
-
-#ifdef cles_khr_int64
-struct pair_long { long lo, hi; };
-struct pair_long2 { long2 lo, hi; };
-struct pair_long3 { long3 lo, hi; };
-struct pair_long4 { long4 lo, hi; };
-struct pair_long8 { long8 lo, hi; };
-struct pair_long16 { long16 lo, hi; };
-#endif
-
-struct pair_float { float lo, hi; };
-struct pair_float2 { float2 lo, hi; };
-struct pair_float3 { float3 lo, hi; };
-struct pair_float4 { float4 lo, hi; };
-struct pair_float8 { float8 lo, hi; };
-struct pair_float16 { float16 lo, hi; };
-
-#ifdef cl_khr_fp64
-struct pair_double { double lo, hi; };
-struct pair_double2 { double2 lo, hi; };
-struct pair_double3 { double3 lo, hi; };
-struct pair_double4 { double4 lo, hi; };
-struct pair_double8 { double8 lo, hi; };
-struct pair_double16 { double16 lo, hi; };
-#endif
-
-
-
-// Generic conversion function
-template<typename A, typename B>
-static B bitcast(A a)
-{
- B b;
- std::memcpy(&b, &a, std::min(sizeof a, sizeof b));
- if (sizeof b > sizeof a) {
- std::memset((char*)&b + sizeof a, 0, sizeof b - sizeof a);
- }
- return b;
-}
OpenPOWER on IntegriCloud