summaryrefslogtreecommitdiffstats
path: root/pocl
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@gmail.com>2013-02-17 20:46:39 -0500
committerErik Schnetter <schnetter@gmail.com>2013-02-17 20:46:39 -0500
commit1fccde626317f3c6aad9186200d169baa1673a0f (patch)
tree36c0c7a22e35b0737dbf63212e1629a6caa09079 /pocl
parent16627815106407a34b85ca691bd98ee18c92abe3 (diff)
downloadvecmathlib-1fccde626317f3c6aad9186200d169baa1673a0f.zip
vecmathlib-1fccde626317f3c6aad9186200d169baa1673a0f.tar.gz
Generate pocl-compatible OpenCL math function bindings
Diffstat (limited to 'pocl')
-rw-r--r--pocl/CMakeLists.txt33
-rw-r--r--pocl/cross.cl43
-rw-r--r--pocl/distance.cl49
-rw-r--r--pocl/dot.cl49
-rw-r--r--pocl/fast_distance.cl23
-rw-r--r--pocl/fast_length.cl26
-rw-r--r--pocl/fast_normalize.cl26
-rwxr-xr-xpocl/generate-files.py496
-rw-r--r--pocl/length.cl49
-rw-r--r--pocl/normalize.cl49
-rw-r--r--pocl/pocl-compat.h78
11 files changed, 921 insertions, 0 deletions
diff --git a/pocl/CMakeLists.txt b/pocl/CMakeLists.txt
new file mode 100644
index 0000000..2586bac
--- /dev/null
+++ b/pocl/CMakeLists.txt
@@ -0,0 +1,33 @@
+# See file "BUILD" for instructions
+
+set (GEN_SRCS
+acos.cc acosh.cc asin.cc asinh.cc atan.cc atanh.cc cbrt.cc ceil.cc copysign.cc cos.cc cosh.cc exp.cc exp2.cc exp10.cc expm1.cc fabs.cc fdim.cc floor.cc fma.cc fmax.cc fmin.cc fmod.cc hypot.cc ilogb.cc ldexp.cc log.cc log2.cc log10.cc log1p.cc pow.cc remainder.cc round.cc rsqrt.cc sin.cc sinh.cc sqrt.cc tan.cc tanh.cc trunc.cc isfinite.cc isinf.cc isnan.cc isnormal.cc signbit.cc acospi.cl asinpi.cl atanpi.cl atan2pi.cl cospi.cl fmax.cl fmin.cl mad.cl maxmag.cl minmag.cl nan.cl pown.cl powr.cl rint.cl rootn.cl sinpi.cl tanpi.cl half_cos.cl half_divide.cl half_exp.cl half_exp2.cl half_exp10.cl half_log.cl half_log2.cl half_log10.cl half_powr.cl half_recip.cl half_rsqrt.cl half_sin.cl half_sqrt.cl half_tan.cl native_cos.cl native_divide.cl native_exp.cl native_exp2.cl native_exp10.cl native_log.cl native_log2.cl native_log10.cl native_powr.cl native_recip.cl native_rsqrt.cl native_sin.cl native_sqrt.cl native_tan.cl clamp.cl degrees.cl max.cl min.cl mix.cl radians.cl step.cl smoothstep.cl sign.cl isequal.cl isnotequal.cl isgreater.cl isgreaterequal.cl isless.cl islessequal.cl islessgreater.cl isordered.cl isunordered.cl
+ )
+
+set (SRCS
+ cross.cl dot.cl distance.cl length.cl normalize.cl fast_distance.cl
+ fast_length.cl fast_normalize.cl
+ ${GEN_SRCS}
+)
+
+add_custom_command(
+ OUTPUT ${GEN_SRCS} kernel-vecmathlib.h
+ COMMAND ./generate-files.py
+ DEPENDS generate-files.py)
+
+add_library (kernel-vecmathlib ${SRCS})
+
+
+
+# GCC:
+# set (CMAKE_CXX_COMPILER "g++")
+# set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -g -std=gnu++11 -march=native -Ofast")
+
+# Clang:
+# Note: This fails to link with -O4
+set (CMAKE_CXX_COMPILER "clang++-mp-3.3")
+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -g -Dcl_khr_fp64 -Dcles_khr_int64 -std=gnu++11 -stdlib=libc++ -march=native -O3")
+
+# Intel:
+# set (CMAKE_CXX_COMPILER "icpc")
+# set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -g -std=c++11 -fast")
diff --git a/pocl/cross.cl b/pocl/cross.cl
new file mode 100644
index 0000000..7b8e861
--- /dev/null
+++ b/pocl/cross.cl
@@ -0,0 +1,43 @@
+__attribute__((__overloadable__))
+float4 cross(float4 p0, float4 p1)
+{
+ float4 r;
+ r.x = p0.y*p1.z - p0.z*p1.y;
+ r.y = p0.z*p1.x - p0.x*p1.z;
+ r.z = p0.x*p1.y - p0.y*p1.x;
+ r.w = 0.0f;
+ return r;
+}
+
+__attribute__((__overloadable__))
+float3 cross(float3 p0, float3 p1)
+{
+ float3 r;
+ r.x = p0.y*p1.z - p0.z*p1.y;
+ r.y = p0.z*p1.x - p0.x*p1.z;
+ r.z = p0.x*p1.y - p0.y*p1.x;
+ return r;
+}
+
+#ifdef cl_khr_fp64
+__attribute__((__overloadable__))
+double4 cross(double4 p0, double4 p1)
+{
+ double4 r;
+ r.x = p0.y*p1.z - p0.z*p1.y;
+ r.y = p0.z*p1.x - p0.x*p1.z;
+ r.z = p0.x*p1.y - p0.y*p1.x;
+ r.w = 0.0f;
+ return r;
+}
+
+__attribute__((__overloadable__))
+double3 cross(double3 p0, double3 p1)
+{
+ double3 r;
+ r.x = p0.y*p1.z - p0.z*p1.y;
+ r.y = p0.z*p1.x - p0.x*p1.z;
+ r.z = p0.x*p1.y - p0.y*p1.x;
+ return r;
+}
+#endif
diff --git a/pocl/distance.cl b/pocl/distance.cl
new file mode 100644
index 0000000..5df8637
--- /dev/null
+++ b/pocl/distance.cl
@@ -0,0 +1,49 @@
+__attribute__((__overloadable__))
+float distance(float p0, float p1)
+{
+ return length(p0-p1);
+}
+
+__attribute__((__overloadable__))
+float distance(float2 p0, float2 p1)
+{
+ return length(p0-p1);
+}
+
+__attribute__((__overloadable__))
+float distance(float3 p0, float3 p1)
+{
+ return length(p0-p1);
+}
+
+__attribute__((__overloadable__))
+float distance(float4 p0, float4 p1)
+{
+ return length(p0-p1);
+}
+
+#ifdef cl_khr_fp64
+__attribute__((__overloadable__))
+double distance(double p0, double p1)
+{
+ return length(p0-p1);
+}
+
+__attribute__((__overloadable__))
+double distance(double2 p0, double2 p1)
+{
+ return length(p0-p1);
+}
+
+__attribute__((__overloadable__))
+double distance(double3 p0, double3 p1)
+{
+ return length(p0-p1);
+}
+
+__attribute__((__overloadable__))
+double distance(double4 p0, double4 p1)
+{
+ return length(p0-p1);
+}
+#endif
diff --git a/pocl/dot.cl b/pocl/dot.cl
new file mode 100644
index 0000000..91bb400
--- /dev/null
+++ b/pocl/dot.cl
@@ -0,0 +1,49 @@
+__attribute__((__overloadable__))
+float dot(float p0, float p1)
+{
+ return p0*p1;
+}
+
+__attribute__((__overloadable__))
+float dot(float2 p0, float2 p1)
+{
+ return p0.x*p1.x + p0.y*p1.y;
+}
+
+__attribute__((__overloadable__))
+float dot(float3 p0, float3 p1)
+{
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+}
+
+__attribute__((__overloadable__))
+float dot(float4 p0, float4 p1)
+{
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+}
+
+#ifdef cl_khr_fp64
+__attribute__((__overloadable__))
+double dot(double p0, double p1)
+{
+ return p0*p1;
+}
+
+__attribute__((__overloadable__))
+double dot(double2 p0, double2 p1)
+{
+ return p0.x*p1.x + p0.y*p1.y;
+}
+
+__attribute__((__overloadable__))
+double dot(double3 p0, double3 p1)
+{
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+}
+
+__attribute__((__overloadable__))
+double dot(double4 p0, double4 p1)
+{
+ return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+}
+#endif
diff --git a/pocl/fast_distance.cl b/pocl/fast_distance.cl
new file mode 100644
index 0000000..c2a7e9e
--- /dev/null
+++ b/pocl/fast_distance.cl
@@ -0,0 +1,23 @@
+__attribute__((__overloadable__))
+float fast_distance(float p0, float p1)
+{
+ return fast_length(p0-p1);
+}
+
+__attribute__((__overloadable__))
+float fast_distance(float2 p0, float2 p1)
+{
+ return fast_length(p0-p1);
+}
+
+__attribute__((__overloadable__))
+float fast_distance(float3 p0, float3 p1)
+{
+ return fast_length(p0-p1);
+}
+
+__attribute__((__overloadable__))
+float fast_distance(float4 p0, float4 p1)
+{
+ return fast_length(p0-p1);
+}
diff --git a/pocl/fast_length.cl b/pocl/fast_length.cl
new file mode 100644
index 0000000..eb765b9
--- /dev/null
+++ b/pocl/fast_length.cl
@@ -0,0 +1,26 @@
+// Note: Chapter 6.12.5 of the OpenCL standard says to use half_sqrt,
+// not fast_sqrt
+
+__attribute__((__overloadable__))
+float fast_length(float p)
+{
+ return half_sqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+float fast_length(float2 p)
+{
+ return half_sqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+float fast_length(float3 p)
+{
+ return half_sqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+float fast_length(float4 p)
+{
+ return half_sqrt(dot(p, p));
+}
diff --git a/pocl/fast_normalize.cl b/pocl/fast_normalize.cl
new file mode 100644
index 0000000..ecdd524
--- /dev/null
+++ b/pocl/fast_normalize.cl
@@ -0,0 +1,26 @@
+// Note: Chapter 6.12.5 of the OpenCL standard says to use half_rsqrt,
+// not fast_rsqrt
+
+__attribute__((__overloadable__))
+float fast_normalize(float p)
+{
+ return p * half_rsqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+float2 fast_normalize(float2 p)
+{
+ return p * half_rsqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+float3 fast_normalize(float3 p)
+{
+ return p * half_rsqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+float4 fast_normalize(float4 p)
+{
+ return p * half_rsqrt(dot(p, p));
+}
diff --git a/pocl/generate-files.py b/pocl/generate-files.py
new file mode 100755
index 0000000..00d8ce2
--- /dev/null
+++ b/pocl/generate-files.py
@@ -0,0 +1,496 @@
+#! /usr/bin/env python
+
+import re
+
+
+
+# Types:
+SI = "SI" # int/long
+SJ = "SJ" # int (even for double)
+SF = "SF" # float/double
+VB = "VB" # boolN
+VF = "VF" # floatN/doubleN
+VI = "VI" # intN/longN
+VJ = "VJ" # intN/longN (except int1 for double1)
+VK = "VK" # intN (even for doubleN)
+VU = "VU" # uintN/ulongN
+
+# Each function is described by a tuple with the following entries:
+# 1. name
+# 2. external argument types (see above)
+# 3. external return type
+# 4. vecmathlib argument types (see above)
+# 5. vecmathlib return type
+# This allows generating externally visible functions with different
+# signatures, e.g. to support OpenCL.
+vmlfuncs = [
+ ("acos" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("acosh" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("asin" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("asinh" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("atan" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("atanh" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("cbrt" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("ceil" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("copysign" , [VF, VF ], VF, [VF, VF ], VF), # 6.12.2
+ ("cos" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("cosh" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("exp" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("exp2" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("exp10" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("expm1" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("fabs" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("fdim" , [VF, VF ], VF, [VF, VF ], VF), # 6.12.2
+ ("floor" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("fma" , [VF, VF, VF], VF, [VF, VF, VF], VF), # 6.12.2
+ ("fmax" , [VF, VF ], VF, [VF, VF ], VF), # 6.12.2
+ ("fmin" , [VF, VF ], VF, [VF, VF ], VF), # 6.12.2
+ ("fmod" , [VF, VF ], VF, [VF, VF ], VF), # 6.12.2
+ ("hypot" , [VF, VF ], VF, [VF, VF ], VF), # 6.12.2
+ ("ilogb" , [VF ], VJ, [VF ], VI), # 6.12.2 (but should return VK)
+ ("ldexp" , [VF, VJ ], VF, [VF, VI ], VF), # 6.12.2 (but should take VK)
+ ("ldexp" , [VF, SJ ], VF, [VF, SI ], VF), # 6.12.2
+ ("log" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("log2" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("log10" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("log1p" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("pow" , [VF, VF ], VF, [VF, VF ], VF), # 6.12.2
+ ("remainder", [VF, VF ], VF, [VF, VF ], VF), # 6.12.2
+ ("round" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("rsqrt" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("sin" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("sinh" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("sqrt" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("tan" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("tanh" , [VF ], VF, [VF ], VF), # 6.12.2
+ ("trunc" , [VF ], VF, [VF ], VF), # 6.12.2
+
+ ("isfinite" , [VF ], VJ, [VF ], VB), # 6.12.6
+ ("isinf" , [VF ], VJ, [VF ], VB), # 6.12.6
+ ("isnan" , [VF ], VJ, [VF ], VB), # 6.12.6
+ ("isnormal" , [VF ], VJ, [VF ], VB), # 6.12.6
+ ("signbit" , [VF ], VJ, [VF ], VB), # 6.12.6
+ ]
+
+
+
+directfuncs = [
+ ("acospi" , [VF ], VF, "acos(x0)/(scalar_t)M_PI"), # 6.12.2
+ ("asinpi" , [VF ], VF, "asin(x0)/(scalar_t)M_PI"), # 6.12.2
+ ("atanpi" , [VF ], VF, "atan(x0)/(scalar_t)M_PI"), # 6.12.2
+ ("atan2pi" , [VF, VF ], VF, "atan2(x0,x1)/(scalar_t)M_PI"), # 6.12.2
+ ("cospi" , [VF ], VF, "cos((scalar_t)M_PI*x0)"), # 6.12.2
+ ("fmax" , [VF, SF ], VF, "fmax(x0,(vector_t)x1)"), # 6.12.2
+ ("fmin" , [VF, SF ], VF, "fmin(x0,(vector_t)x1)"), # 6.12.2
+ ("mad" , [VF, VF, VF], VF, "fma(x0,x1,x2)"), # 6.12.2
+ ("maxmag" , [VF, VF ], VF, "fabs(x0)>fabs(x1) ? x0 : fabs(x1)>fabs(x0) ? x1 : fmax(x0,x1)"), # 6.12.2
+ ("minmag" , [VF, VF ], VF, "fabs(x0)<fabs(x1) ? x0 : fabs(x1)<fabs(x0) ? x1 : fmin(x0,x1)"), # 6.12.2
+ ("nan" , [VU ], VF, "(scalar_t)0.0/(scalar_t)0.0"), # 6.12.2
+ ("pown" , [VF, VK ], VF, "pow(x0,convert_vector_t(x1))"), # 6.12.2
+ ("powr" , [VF, VF ], VF, "pow(x0,x1)"), # 6.12.2
+ ("rint" , [VF ], VF, "round(x0)"), # 6.12.2
+ ("rootn" , [VF, VK ], VF, "pow(x0,(scalar_t)1.0/convert_vector_t(x1))"), # 6.12.2
+ ("sinpi" , [VF ], VF, "sin((scalar_t)M_PI*x0)"), # 6.12.2
+ ("tanpi" , [VF ], VF, "tan((scalar_t)M_PI*x0)"), # 6.12.2
+
+ ("half_cos" , [VF ], VF, "cos(x0)"), # 6.12.2
+ ("half_divide" , [VF, VF ], VF, "x0/x1"), # 6.12.2
+ ("half_exp" , [VF ], VF, "exp(x0)"), # 6.12.2
+ ("half_exp2" , [VF ], VF, "exp2(x0)"), # 6.12.2
+ ("half_exp10" , [VF ], VF, "exp10(x0)"), # 6.12.2
+ ("half_log" , [VF ], VF, "log(x0)"), # 6.12.2
+ ("half_log2" , [VF ], VF, "log2(x0)"), # 6.12.2
+ ("half_log10" , [VF ], VF, "log10(x0)"), # 6.12.2
+ ("half_powr" , [VF, VF ], VF, "powr(x0,x1)"), # 6.12.2
+ ("half_recip" , [VF ], VF, "(scalar_t)1.0/x0"), # 6.12.2
+ ("half_rsqrt" , [VF ], VF, "rsqrt(x0)"), # 6.12.2
+ ("half_sin" , [VF ], VF, "sin(x0)"), # 6.12.2
+ ("half_sqrt" , [VF ], VF, "sqrt(x0)"), # 6.12.2
+ ("half_tan" , [VF ], VF, "tan(x0)"), # 6.12.2
+
+ ("native_cos" , [VF ], VF, "cos(x0)"), # 6.12.2
+ ("native_divide", [VF, VF ], VF, "x0/x1"), # 6.12.2
+ ("native_exp" , [VF ], VF, "exp(x0)"), # 6.12.2
+ ("native_exp2" , [VF ], VF, "exp2(x0)"), # 6.12.2
+ ("native_exp10" , [VF ], VF, "exp10(x0)"), # 6.12.2
+ ("native_log" , [VF ], VF, "log(x0)"), # 6.12.2
+ ("native_log2" , [VF ], VF, "log2(x0)"), # 6.12.2
+ ("native_log10" , [VF ], VF, "log10(x0)"), # 6.12.2
+ ("native_powr" , [VF, VF ], VF, "powr(x0,x1)"), # 6.12.2
+ ("native_recip" , [VF ], VF, "(scalar_t)1.0/x0"), # 6.12.2
+ ("native_rsqrt" , [VF ], VF, "rsqrt(x0)"), # 6.12.2
+ ("native_sin" , [VF ], VF, "sin(x0)"), # 6.12.2
+ ("native_sqrt" , [VF ], VF, "sqrt(x0)"), # 6.12.2
+ ("native_tan" , [VF ], VF, "tan(x0)"), # 6.12.2
+
+ ("clamp" , [VF, VF, VF], VF, "fmin(fmax(x0,x1),x2)"), # 6.12.4
+ ("clamp" , [VF, SF, SF], VF, "fmin(fmax(x0,x1),x2)"), # 6.12.4
+ ("degrees" , [VF ], VF, "(scalar_t)(180.0/M_PI)*x0"), # 6.12.4
+ ("max" , [VF, VF ], VF, "fmax(x0,x1)"), # 6.12.4
+ ("max" , [VF, SF ], VF, "fmax(x0,x1)"), # 6.12.4
+ ("min" , [VF, VF ], VF, "fmin(x0,x1)"), # 6.12.4
+ ("min" , [VF, SF ], VF, "fmin(x0,x1)"), # 6.12.4
+ ("mix" , [VF, VF, VF], VF, "x0+(x1-x0)*x2"), # 6.12.4
+ ("mix" , [VF, VF, SF], VF, "x0+(x1-x0)*x2"), # 6.12.4
+ ("radians" , [VF ], VF, "(scalar_t)(M_PI/180.0)*x0"), # 6.12.4
+ ("step" , [VF, VF ], VF, "x1<x0 ? (vector_t)(scalar_t)0.0 : (vector_t)(scalar_t)1.0"), # 6.12.4
+ ("step" , [SF, VF ], VF, "x1<x0 ? (vector_t)(scalar_t)0.0 : (vector_t)(scalar_t)1.0"), # 6.12.4
+ ("smoothstep" , [VF, VF, VF], VF, "({ vector_t t = clamp((x2-x0)/(x1-x0), (scalar_t)0.0, (scalar_t)1.0); t*t*((scalar_t)3.0-(scalar_t)2.0*t); })"), # 6.12.4
+ ("smoothstep" , [SF, SF, VF], VF, "({ vector_t t = clamp((x2-x0)/(x1-x0), (scalar_t)0.0, (scalar_t)1.0); t*t*((scalar_t)3.0-(scalar_t)2.0*t); })"), # 6.12.4
+ ("sign" , [VF ], VF, "copysign(x0!=(scalar_t)0.0 ? (vector_t)(scalar_t)1.0 : (vector_t)(scalar_t)0.0,x0)"), # 6.12.4
+
+ ("isequal" , [VF, VF ], VJ, "x0==x1"), # 6.12.6
+ ("isnotequal" , [VF, VF ], VJ, "x0!=x1"), # 6.12.6
+ ("isgreater" , [VF, VF ], VJ, "x0>x1"), # 6.12.6
+ ("isgreaterequal", [VF, VF ], VJ, "x0>=x1"), # 6.12.6
+ ("isless" , [VF, VF ], VJ, "x0<x1"), # 6.12.6
+ ("islessequal" , [VF, VF ], VJ, "x0<=x1"), # 6.12.6
+ ("islessgreater" , [VF, VF ], VJ, "x0<x1 || x0>x1"), # 6.12.6
+ ("isordered" , [VF, VF ], VJ, "!isunordered(x0,x1)"), # 6.12.6
+ ("isunordered" , [VF, VF ], VJ, "isnan(x0) || isnan(x1)"), # 6.12.6
+]
+
+# Missing functions from 6.12.2: atan2, erfc, erf, fract, frexp,
+# lgamma, lgamma_r, logb, modf, nextafter, remquo, sincos, tgamma
+
+# Unchecked: 6.12.3 (integer functions)
+
+# Missing functions from 6.12.6 (relational functions): any, all,
+# bitselect, select
+
+# Unchecked: 6.12.7 (vector data load and store functions)
+
+# Unchecked: 6.12.12 (miscellaneous vector functions)
+
+
+
+outfile = None
+outfile_did_truncate = set()
+def out(str): outfile.write("%s\n" % str)
+def out_open(name):
+ global outfile
+ global outfile_did_truncate
+ if outfile: raise "file already open"
+ is_first_open = name not in outfile_did_truncate
+ if is_first_open:
+ outfile = open(name, "w")
+ outfile.close()
+ outfile_did_truncate.add(name)
+ print name
+ outfile = open(name, "a")
+ return is_first_open
+def out_close():
+ global outfile
+ outfile.close()
+ outfile = None
+
+declfile = None
+def decl(str):
+ if str=="" or str.startswith("//") or str.startswith("#"):
+ declfile.write("%s\n" % str)
+ else:
+ declfile.write("__attribute__((__overloadable__)) %s;\n" % str)
+def decl_open(name):
+ global declfile
+ declfile = open(name, "w")
+def decl_close():
+ global declfile
+ declfile.close()
+ declfile = None
+
+
+
+def mktype(tp, vectype):
+ (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups()
+ size = 1 if size=="" else int(size)
+ if tp==SJ:
+ if size==1: return "int"
+ return "int" if basetype=="float" else "long"
+ if tp==SF:
+ return basetype
+ if tp==VI:
+ ibasetype = "int" if basetype=="float" else "long"
+ return "%s%s" % (ibasetype, "" if size==1 else str(size))
+ if tp==VJ:
+ if size==1: return "int"
+ ibasetype = "int" if basetype=="float" else "long"
+ return "%s%d" % (ibasetype, size)
+ if tp==VK:
+ if size==1: return "int"
+ return "int%d" % size
+ if tp==VU:
+ ibasetype = "uint" if basetype=="float" else "ulong"
+ return "%s%s" % (ibasetype, "" if size==1 else str(size))
+ if tp==VF:
+ return vectype
+ raise "unreachable"
+
+def mkvmltype(tp, vectype):
+ if tp==SI: return vectype+"::int_t"
+ if tp==SF: return vectype+"::real_t"
+ if tp==VB: return vectype+"::boolvec_t"
+ if tp==VI: return vectype+"::intvec_t"
+ if tp==VF: return vectype
+ raise "unreachable"
+
+
+
+def output_vmlfunc_vml(func, vectype):
+ (name, args, ret, vmlargs, vmlret) = func
+ out("// Implement %s by calling vecmathlib" % name)
+ (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups()
+ size = 1 if size=="" else int(size)
+ vmltype = "vecmathlib::realvec<%s,%d>" % (basetype, size)
+ vmlinttype = "%s::intvec_t" % vmltype
+ vmlbooltype = "%s::boolvec_t" % vmltype
+ funcargstr = ", ".join(map(lambda (n, arg):
+ "%s x%d" % (mktype(arg, vectype), n),
+ zip(range(0, 100), args)))
+ funcretstr = mktype(ret, vectype)
+ decl("%s __vml_%s(%s)" % (funcretstr, name, funcargstr))
+ out("%s __vml_%s(%s)" % (funcretstr, name, funcargstr))
+ out("{")
+ for (n, arg, vmlarg) in zip(range(0, 100), args, vmlargs):
+ out(" %s y%d = bitcast<%s,%s>(x%d);" %
+ (mkvmltype(vmlarg, vmltype), n,
+ mktype(arg, vectype), mkvmltype(vmlarg, vmltype), n))
+ callargstr = ", ".join(map(lambda (n, arg): "y%d" % n,
+ zip(range(0, 100), args)))
+ callretstr = mkvmltype(vmlret, vmltype)
+ out(" %s r = vecmathlib::%s(%s);" % (callretstr, name, callargstr))
+ # We may need to convert from the VML type to the OpenCL type
+ # before bitcasting. This may be a real conversion, e.g. bool to
+ # int. This may also involve a change in size (e.g. long to int),
+ # but only if the type is scalar. These conversions are applied
+ # before bitcasting.
+ # convfunc: conversion function to call
+ # convtype: result type of conversion, also input to bitcast
+ # bitcasttype: output of bitcast; may differ from function result
+ # if a size change is needed
+ if vmlret==ret:
+ convfunc = ""
+ convtype = callretstr
+ bitcasttype = funcretstr
+ else:
+ if vmlret==VI and ret in (VJ,VK):
+ convfunc = ""
+ convtype = callretstr
+ elif vmlret==VB and ret in (VJ,VK):
+ convfunc = "vecmathlib::convert_int"
+ convtype = vmlinttype
+ else:
+ raise "missing"
+ if ret in (VJ,VK):
+ bitcasttype = mktype(VI, vectype)
+ else:
+ raise "missing"
+ out(" return bitcast<%s,%s>(%s(r));" % (convtype, bitcasttype, convfunc))
+ out("}")
+
+def output_vmlfunc_libm(func, vectype):
+ (name, args, ret, vmlargs, vmlret) = func
+ out("// Implement %s by calling libm" % name)
+ (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups()
+ size = 1 if size=="" else int(size)
+ othertype = "vecmathlib::realpseudovec<%s,%d>" % (basetype, size)
+ otherinttype = "%s::intvec_t" % othertype
+ funcargstr = ", ".join(map(lambda (n, arg):
+ "%s x%d" % (mktype(arg, vectype), n),
+ zip(range(0, 100), args)))
+ decl("%s __vml_%s(%s)" % (vectype, name, funcargstr))
+ out("%s __vml_%s(%s)" % (vectype, name, funcargstr))
+ out("{")
+ for (n, arg) in zip(range(0, 100), args):
+ out(" %s y%d = x%d;" % (othertype, n, n))
+ callargstr = ", ".join(map(lambda (n, arg): "y%d" % n,
+ zip(range(0, 100), args)))
+ callretstr = othertype if ret==VF else otherinttype
+ out(" %s r = vecmathlib::%s(%s);" % (callretstr, name, callargstr))
+ out(" return r[0];")
+ out("}")
+
+def output_vmlfunc_upcast(func, vectype):
+ (name, args, ret, vmlargs, vmlret) = func
+ out("// Implement %s by using a larger vector size" % name)
+ (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups()
+ size = 1 if size=="" else int(size)
+ size2 = 4 if size==3 else size*2 # next power of 2
+ size2 = "" if size2==1 else str(size2)
+ othertype = "%s%s" % (basetype, size2)
+ declargstr = ", ".join(map(lambda (n, arg): "%s" % mktype(arg, othertype),
+ zip(range(0, 100), args)))
+ out("%s __vml_%s(%s);" % (mktype(ret, othertype), name, declargstr))
+ funcargstr = ", ".join(map(lambda (n, arg):
+ "%s x%d" % (mktype(arg, vectype), n),
+ zip(range(0, 100), args)))
+ decl("%s __vml_%s(%s)" % (mktype(ret, vectype), name, funcargstr))
+ out("%s __vml_%s(%s)" % (mktype(ret, vectype), name, funcargstr))
+ out("{")
+ for (n, arg) in zip(range(0, 100), args):
+ out(" %s y%d = bitcast<%s,%s>(x%d);" %
+ (mktype(arg, othertype), n,
+ mktype(arg, vectype), mktype(arg, othertype), n))
+ callargstr = ", ".join(map(lambda (n, arg): "y%d" % n,
+ zip(range(0, 100), args)))
+ out(" %s r = __vml_%s(%s);" % (mktype(ret, othertype), name, callargstr))
+ out(" return bitcast<%s,%s>(r);" %
+ (mktype(ret, othertype), mktype(ret, vectype)))
+ out("}")
+
+def output_vmlfunc_split(func, vectype):
+ (name, args, ret, vmlargs, vmlret) = func
+ out("// Implement %s by splitting into a smaller vector size" % name)
+ (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups()
+ size = 1 if size=="" else int(size)
+ size2 = (size+1) / 2 # divide by 2, rounding up
+ size2 = "" if size2==1 else str(size2)
+ othertype = "%s%s" % (basetype, size2)
+ declargstr = ", ".join(map(lambda (n, arg): "%s" % mktype(arg, othertype),
+ zip(range(0, 100), args)))
+ out("%s __vml_%s(%s);" % (mktype(ret, othertype), name, declargstr))
+ funcargstr = ", ".join(map(lambda (n, arg):
+ "%s x%d" % (mktype(arg, vectype), n),
+ zip(range(0, 100), args)))
+ decl("%s __vml_%s(%s)" % (mktype(ret, vectype), name, funcargstr))
+ out("%s __vml_%s(%s)" % (mktype(ret, vectype), name, funcargstr))
+ out("{")
+ out(" struct pair { %s lo, hi; };" % othertype)
+ for (n, arg) in zip(range(0, 100), args):
+ out(" %s y%d = bitcast<%s,%s>(x%d);" %
+ (mktype(arg, othertype), n,
+ mktype(arg, vectype), mktype(arg, othertype), n))
+ callargstr = ", ".join(map(lambda (n, arg): "y%d" % n,
+ zip(range(0, 100), args)))
+ out(" %s r = __vml_%s(%s);" % (mktype(ret, othertype), name, callargstr))
+ out(" return bitcast<%s,%s>(r);" %
+ (mktype(ret, othertype), mktype(ret, vectype)))
+ out("}")
+
+
+
+def output_directfunc_direct(func, vectype):
+ (name, args, ret, impl) = func
+ out("// Implement %s directly" % name)
+ (basetype, size) = re.match("([A-Za-z]+)([0-9]*)", vectype).groups()
+ size = 1 if size=="" else int(size)
+ funcargstr = ", ".join(map(lambda (n, arg):
+ "%s x%d" % (mktype(arg, vectype), n),
+ zip(range(0, 100), args)))
+ funcretstr = mktype(ret, vectype)
+ decl("%s __vml_%s(%s)" % (funcretstr, name, funcargstr))
+ out("__attribute__((__overloadable__))");
+ out("%s __vml_%s(%s)" % (funcretstr, name, funcargstr))
+ out("{")
+ out(" typedef %s scalar_t;" % basetype)
+ out(" typedef %s vector_t;" % vectype)
+ out("#define convert_vector_t convert_%s" % vectype)
+ out(" return %s;" % impl)
+ out("#undef convert_vector_t")
+ out("}")
+
+
+
+def output_vmlfunc(func):
+ (name, args, ret, vmlargs, vmlret) = func
+ is_first_open = out_open("%s.cc" % name)
+ if is_first_open:
+ out("// Note: This file has been automatically generated. Do not modify.")
+ out("")
+ out("#include \"pocl-compat.h\"")
+ out("")
+ else:
+ out("")
+ out("")
+ out("")
+ decl("")
+ decl("// %s: %s -> %s" % (name, args, ret))
+ decl("#undef %s" % name)
+ decl("#define %s __vml_%s" % (name, name))
+ out("// %s: %s -> %s" % (name, args, ret))
+ for basetype in ["float", "double"]:
+ if basetype=="double":
+ out("")
+ out("#ifdef cl_khr_fp64")
+ for size in [1, 2, 3, 4, 8, 16]:
+ # Ignore this prototype for size==1 if there are any
+ # scalar arguments; this prevents duplicate definitions
+ if size==1 and any(map(lambda arg: arg in (SI, SJ, SF), args)):
+ continue
+ sizename = '' if size==1 else str(size)
+ vectype = basetype + sizename
+ # always use vecmathlib if available
+ out("")
+ out("// %s: VF=%s" % (name, vectype))
+ out("#if defined VECMATHLIB_HAVE_VEC_%s_%d" %
+ (basetype.upper(), size))
+ output_vmlfunc_vml(func, vectype)
+ if size==1:
+ # a scalar type: use libm
+ out("#else")
+ output_vmlfunc_libm(func, vectype)
+ else:
+ # a vector type: try upcasting to next power of 2
+ size2 = 4 if size==3 else size*2
+ out("#elif defined VECMATHLIB_HAVE_VEC_%s_%d" %
+ (basetype.upper(), size2))
+ output_vmlfunc_upcast(func, vectype)
+ # a vector type: split into smaller vector type
+ out("#else")
+ output_vmlfunc_split(func, vectype)
+ out("#endif")
+ if basetype=="double":
+ out("")
+ out("#endif // #ifdef cl_khr_fp64")
+ out_close()
+
+
+
+def output_directfunc(func):
+ (name, args, ret, impl) = func
+ is_first_open = out_open("%s.cl" % name)
+ if is_first_open:
+ out("// Note: This file has been automatically generated. Do not modify.")
+ out("")
+ else:
+ out("")
+ out("")
+ out("")
+ decl("")
+ decl("// %s: %s -> %s" % (name, args, ret))
+ decl("#undef %s" % name)
+ decl("#define %s __vml_%s" % (name, name))
+ out("// %s: %s -> %s" % (name, args, ret))
+ for basetype in ["float", "double"]:
+ if ((name.startswith("half_") or name.startswith("native_")) and
+ basetype=="double"):
+ continue
+ if basetype=="double":
+ out("")
+ out("#ifdef cl_khr_fp64")
+ for size in [1, 2, 3, 4, 8, 16]:
+ # Ignore this prototype for size==1 if there are any
+ # scalar arguments; this prevents duplicate definitions
+ if size==1 and any(map(lambda arg: arg in (SI, SJ, SF), args)):
+ continue
+ sizename = '' if size==1 else str(size)
+ vectype = basetype + sizename
+ # always use vecmathlib if available
+ out("")
+ out("// %s: VF=%s" % (name, vectype))
+ output_directfunc_direct(func, vectype)
+ if basetype=="double":
+ out("")
+ out("#endif // #ifdef cl_khr_fp64")
+ out_close()
+
+
+
+decl_open("kernel-vecmathlib.h")
+decl("// Note: This file has been automatically generated. Do not modify.")
+decl("#ifndef KERNEL_VECMATHLIB_H")
+decl("#define KERNEL_VECMATHLIB_H 1")
+map(output_vmlfunc, vmlfuncs)
+map(output_directfunc, directfuncs)
+decl("")
+decl("#endif // #ifndef KERNEL_VECMATHLIB_H")
+decl_close()
diff --git a/pocl/length.cl b/pocl/length.cl
new file mode 100644
index 0000000..9715e59
--- /dev/null
+++ b/pocl/length.cl
@@ -0,0 +1,49 @@
+__attribute__((__overloadable__))
+float length(float p)
+{
+ return sqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+float length(float2 p)
+{
+ return sqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+float length(float3 p)
+{
+ return sqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+float length(float4 p)
+{
+ return sqrt(dot(p, p));
+}
+
+#ifdef cl_khr_fp64
+__attribute__((__overloadable__))
+double length(double p)
+{
+ return sqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+double length(double2 p)
+{
+ return sqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+double length(double3 p)
+{
+ return sqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+double length(double4 p)
+{
+ return sqrt(dot(p, p));
+}
+#endif
diff --git a/pocl/normalize.cl b/pocl/normalize.cl
new file mode 100644
index 0000000..e033567
--- /dev/null
+++ b/pocl/normalize.cl
@@ -0,0 +1,49 @@
+__attribute__((__overloadable__))
+float normalize(float p)
+{
+ return p * rsqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+float2 normalize(float2 p)
+{
+ return p * rsqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+float3 normalize(float3 p)
+{
+ return p * rsqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+float4 normalize(float4 p)
+{
+ return p * rsqrt(dot(p, p));
+}
+
+#ifdef cl_khr_fp64
+__attribute__((__overloadable__))
+double normalize(double p)
+{
+ return p * rsqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+double2 normalize(double2 p)
+{
+ return p * rsqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+double3 normalize(double3 p)
+{
+ return p * rsqrt(dot(p, p));
+}
+
+__attribute__((__overloadable__))
+double4 normalize(double4 p)
+{
+ return p * rsqrt(dot(p, p));
+}
+#endif
diff --git a/pocl/pocl-compat.h b/pocl/pocl-compat.h
new file mode 100644
index 0000000..8b4041e
--- /dev/null
+++ b/pocl/pocl-compat.h
@@ -0,0 +1,78 @@
+// -*-C++-*- Compatibility layer to help instantiante functions to
+// create a library that can be called from elsewhere
+
+
+
+// Make things go fast (and debugging difficult...)
+#define VML_NODEBUG
+#include "../vecmathlib.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+
+
+
+// Generic conversion function
+template<typename A, typename B>
+static B bitcast(A a)
+{
+ B b;
+ std::memcpy(&b, &a, std::min(sizeof a, sizeof b));
+ if (sizeof b > sizeof a) {
+ std::memset((char*)&b + sizeof a, 0, sizeof b - sizeof a);
+ }
+ return b;
+}
+
+
+
+// Define vector types
+
+using std::int32_t;
+#define int int32_t
+typedef int int2 __attribute__((__ext_vector_type__( 2)));
+typedef int int3 __attribute__((__ext_vector_type__( 3)));
+typedef int int4 __attribute__((__ext_vector_type__( 4)));
+typedef int int8 __attribute__((__ext_vector_type__( 8)));
+typedef int int16 __attribute__((__ext_vector_type__(16)));
+
+using std::uint32_t;
+#define uint uint32_t
+typedef uint uint2 __attribute__((__ext_vector_type__( 2)));
+typedef uint uint3 __attribute__((__ext_vector_type__( 3)));
+typedef uint uint4 __attribute__((__ext_vector_type__( 4)));
+typedef uint uint8 __attribute__((__ext_vector_type__( 8)));
+typedef uint uint16 __attribute__((__ext_vector_type__(16)));
+
+#ifdef cles_khr_int64
+using std::int64_t;
+#define long int64_t
+typedef long long2 __attribute__((__ext_vector_type__( 2)));
+typedef long long3 __attribute__((__ext_vector_type__( 3)));
+typedef long long4 __attribute__((__ext_vector_type__( 4)));
+typedef long long8 __attribute__((__ext_vector_type__( 8)));
+typedef long long16 __attribute__((__ext_vector_type__(16)));
+
+using std::uint64_t;
+#define ulong uint64_t
+typedef ulong ulong2 __attribute__((__ext_vector_type__( 2)));
+typedef ulong ulong3 __attribute__((__ext_vector_type__( 3)));
+typedef ulong ulong4 __attribute__((__ext_vector_type__( 4)));
+typedef ulong ulong8 __attribute__((__ext_vector_type__( 8)));
+typedef ulong ulong16 __attribute__((__ext_vector_type__(16)));
+#endif
+
+typedef float float2 __attribute__((__ext_vector_type__( 2)));
+typedef float float3 __attribute__((__ext_vector_type__( 3)));
+typedef float float4 __attribute__((__ext_vector_type__( 4)));
+typedef float float8 __attribute__((__ext_vector_type__( 8)));
+typedef float float16 __attribute__((__ext_vector_type__(16)));
+
+#ifdef cl_khr_fp64
+typedef double double2 __attribute__((__ext_vector_type__( 2)));
+typedef double double3 __attribute__((__ext_vector_type__( 3)));
+typedef double double4 __attribute__((__ext_vector_type__( 4)));
+typedef double double8 __attribute__((__ext_vector_type__( 8)));
+typedef double double16 __attribute__((__ext_vector_type__(16)));
+#endif
OpenPOWER on IntegriCloud