summaryrefslogtreecommitdiffstats
path: root/vecmathlib.h
diff options
context:
space:
mode:
Diffstat (limited to 'vecmathlib.h')
-rw-r--r--vecmathlib.h248
1 files changed, 119 insertions, 129 deletions
diff --git a/vecmathlib.h b/vecmathlib.h
index 9accd24..0d72add 100644
--- a/vecmathlib.h
+++ b/vecmathlib.h
@@ -4,16 +4,14 @@
#define VECMATHLIB_H
#if defined VML_DEBUG || defined VML_NODEBUG
-# if defined VML_DEBUG && defined VML_NODEBUG
-# error "Only one of VML_DEBUG or VML_NODEBUG may be defined"
-# endif
+#if defined VML_DEBUG && defined VML_NODEBUG
+#error "Only one of VML_DEBUG or VML_NODEBUG may be defined"
+#endif
#else
// default
-# define VML_DEBUG
+#define VML_DEBUG
#endif
-
-
// FP settings
// Possible effects of not having VML_HAVE_FP_CONTRACT:
@@ -23,7 +21,7 @@
// - can evaluate functions with reduced precision (80% of significant digits)
// default settings
-#undef VML_HAVE_DENORMALS // TODO
+#undef VML_HAVE_DENORMALS // TODO
#define VML_HAVE_FP_CONTRACT
#define VML_HAVE_INF
#define VML_HAVE_NAN
@@ -31,63 +29,59 @@
// optimized settings
#ifdef __FAST_MATH__
-# undef VML_HAVE_DENORMALS
-# undef VML_HAVE_FP_CONTRACT
-# undef VML_HAVE_INF
-# undef VML_HAVE_NAN
+#undef VML_HAVE_DENORMALS
+#undef VML_HAVE_FP_CONTRACT
+#undef VML_HAVE_INF
+#undef VML_HAVE_NAN
#endif
#ifdef VML_DEBUG
-# define VML_CONFIG_DEBUG " debug"
+#define VML_CONFIG_DEBUG " debug"
#else
-# define VML_CONFIG_DEBUG " no-debug"
+#define VML_CONFIG_DEBUG " no-debug"
#endif
#ifdef VML_DENORMALS
-# define VML_CONFIG_DENORMALS " denormals"
+#define VML_CONFIG_DENORMALS " denormals"
#else
-# define VML_CONFIG_DENORMALS " no-denormals"
+#define VML_CONFIG_DENORMALS " no-denormals"
#endif
#ifdef VML_FP_CONTRACT
-# define VML_CONFIG_FP_CONTRACT " fp-contract"
+#define VML_CONFIG_FP_CONTRACT " fp-contract"
#else
-# define VML_CONFIG_FP_CONTRACT " no-fp-contract"
+#define VML_CONFIG_FP_CONTRACT " no-fp-contract"
#endif
#ifdef VML_INF
-# define VML_CONFIG_INF " inf"
+#define VML_CONFIG_INF " inf"
#else
-# define VML_CONFIG_INF " no-inf"
+#define VML_CONFIG_INF " no-inf"
#endif
#ifdef VML_NAN
-# define VML_CONFIG_NAN " nan"
+#define VML_CONFIG_NAN " nan"
#else
-# define VML_CONFIG_NAN " no-nan"
+#define VML_CONFIG_NAN " no-nan"
#endif
// TODO: introduce mad, as fast version of fma (check FP_FAST_FMA)
// TODO: introduce ieee_isnan and friends
// TODO: switch between isnan and ieee_isnan at an outside level
-
-
// This workaround is needed for older libstdc++ versions such as the
// one in Debian 6.0 when compiled with clang++
// <http://lists.cs.uiuc.edu/pipermail/cfe-dev/2011-February/013207.html>.
// The version time stamp used below is the one in Debian 6.0.
-#include <cstring> // pull in __GLIBCXX__
+#include <cstring> // pull in __GLIBCXX__
#if defined __GLIBCXX__ && __GLIBCXX__ <= 20101114
-namespace std { class type_info; }
+namespace std {
+class type_info;
+}
#endif
-
-
#include <cassert>
-
-
#ifdef VML_DEBUG
-# define VML_ASSERT(x) assert(x)
+#define VML_ASSERT(x) assert(x)
#else
-# define VML_ASSERT(x) ((void)0)
+#define VML_ASSERT(x) ((void)0)
#endif
// Scalarise all vector operations, and use libm's functions (mostly
@@ -96,146 +90,142 @@ namespace std { class type_info; }
#ifdef __clang__
// Use compiler-provided vector types
-# include "vec_builtin.h"
+#include "vec_builtin.h"
#endif
// Scalarise all vector operations; don't use libm, use only
// Vecmathlib's functions (mostly useful for testing Vecmathlib)
#include "vec_test.h"
-#if defined __ARM_NEON__ // ARM NEON
-# include "vec_neon_float2.h"
-# include "vec_neon_float4.h"
-# define VML_CONFIG_NEON " NEON"
-#else
-# define VML_CONFIG_NEON
-#endif
-
-#if defined __SSE2__ // Intel SSE 2
-# include "vec_sse_float1.h"
-# include "vec_sse_float4.h"
-# include "vec_sse_double1.h"
-# include "vec_sse_double2.h"
-# if defined __SSE3__
-# define VML_CONFIG_SSE3 " SSE3"
-# else
-# define VML_CONFIG_SSE3
-# endif
-# if defined __SSSE3__
-# define VML_CONFIG_SSSE3 " SSSE3"
-# else
-# define VML_CONFIG_SSSE3
-# endif
-# if defined __SSE4_1__
-# define VML_CONFIG_SSE4_1 " SSE4.1"
-# else
-# define VML_CONFIG_SSE4_1
-# endif
-# if defined __SSE4a__
-# define VML_CONFIG_SSE4a " SSE4a"
-# else
-# define VML_CONFIG_SSE4a
-# endif
-# define VML_CONFIG_SSE2 " SSE2" VML_CONFIG_SSE3 VML_CONFIG_SSSE3 VML_CONFIG_SSE4_1 VML_CONFIG_SSE4a
-#else
-# define VML_CONFIG_SSE2
-#endif
-
-#if defined __AVX__ // Intel AVX
-# include "vec_avx_fp8_32.h"
-# include "vec_avx_fp16_16.h"
-# include "vec_avx_float8.h"
-# include "vec_avx_double4.h"
-# define VML_CONFIG_AVX " AVX"
-#else
-# define VML_CONFIG_AVX
-#endif
-
-#if defined __MIC__ // Intel MIC
+#if defined __ARM_NEON__ // ARM NEON
+#include "vec_neon_float2.h"
+#include "vec_neon_float4.h"
+#define VML_CONFIG_NEON " NEON"
+#else
+#define VML_CONFIG_NEON
+#endif
+
+#if defined __SSE2__ // Intel SSE 2
+#include "vec_sse_float1.h"
+#include "vec_sse_float4.h"
+#include "vec_sse_double1.h"
+#include "vec_sse_double2.h"
+#if defined __SSE3__
+#define VML_CONFIG_SSE3 " SSE3"
+#else
+#define VML_CONFIG_SSE3
+#endif
+#if defined __SSSE3__
+#define VML_CONFIG_SSSE3 " SSSE3"
+#else
+#define VML_CONFIG_SSSE3
+#endif
+#if defined __SSE4_1__
+#define VML_CONFIG_SSE4_1 " SSE4.1"
+#else
+#define VML_CONFIG_SSE4_1
+#endif
+#if defined __SSE4a__
+#define VML_CONFIG_SSE4a " SSE4a"
+#else
+#define VML_CONFIG_SSE4a
+#endif
+#define VML_CONFIG_SSE2 \
+ " SSE2" VML_CONFIG_SSE3 VML_CONFIG_SSSE3 VML_CONFIG_SSE4_1 VML_CONFIG_SSE4a
+#else
+#define VML_CONFIG_SSE2
+#endif
+
+#if defined __AVX__ // Intel AVX
+#include "vec_avx_fp8_32.h"
+#include "vec_avx_fp16_16.h"
+#include "vec_avx_float8.h"
+#include "vec_avx_double4.h"
+#define VML_CONFIG_AVX " AVX"
+#else
+#define VML_CONFIG_AVX
+#endif
+
+#if defined __MIC__ // Intel MIC
// TODO: single precision?
-# include "vec_mic_double8.h"
-# define VML_CONFIG_MIC " MIC"
+#include "vec_mic_double8.h"
+#define VML_CONFIG_MIC " MIC"
#else
-# define VML_CONFIG_MIC
+#define VML_CONFIG_MIC
#endif
-#if defined __ALTIVEC__ // IBM Altivec
-# include "vec_altivec_float4.h"
-# define VML_CONFIG_ALTIVEC " Altivec"
+#if defined __ALTIVEC__ // IBM Altivec
+#include "vec_altivec_float4.h"
+#define VML_CONFIG_ALTIVEC " Altivec"
#else
-# define VML_CONFIG_ALTIVEC
+#define VML_CONFIG_ALTIVEC
#endif
#if defined __ALTIVEC__ && defined _ARCH_PWR7 // IBM VSX
-# include "vec_vsx_double2.h"
-# define VML_CONFIG_VSX " VSX"
+#include "vec_vsx_double2.h"
+#define VML_CONFIG_VSX " VSX"
#else
-# define VML_CONFIG_VSX
+#define VML_CONFIG_VSX
#endif
// TODO: IBM Blue Gene/P DoubleHummer
#if defined __bgq__ && defined __VECTOR4DOUBLE__ // IBM Blue Gene/Q QPX
// TODO: vec_qpx_float4
-# include "vec_qpx_double4.h"
-# define VML_CONFIG_QPX " QPX"
+#include "vec_qpx_double4.h"
+#define VML_CONFIG_QPX " QPX"
#else
-# define VML_CONFIG_QPX
+#define VML_CONFIG_QPX
#endif
-#define VECMATHLIB_CONFIGURATION \
- "VecmathlibConfiguration" \
- VML_CONFIG_DEBUG \
- VML_CONFIG_DENORMALS VML_CONFIG_FP_CONTRACT VML_CONFIG_INF VML_CONFIG_NAN \
- VML_CONFIG_NEON \
- VML_CONFIG_SSE2 VML_CONFIG_AVX VML_CONFIG_MIC \
- VML_CONFIG_ALTIVEC VML_CONFIG_VSX \
- VML_CONFIG_QPX
-
-
+#define VECMATHLIB_CONFIGURATION \
+ "VecmathlibConfiguration" VML_CONFIG_DEBUG VML_CONFIG_DENORMALS \
+ VML_CONFIG_FP_CONTRACT VML_CONFIG_INF VML_CONFIG_NAN VML_CONFIG_NEON \
+ VML_CONFIG_SSE2 VML_CONFIG_AVX VML_CONFIG_MIC VML_CONFIG_ALTIVEC \
+ VML_CONFIG_VSX VML_CONFIG_QPX
// Define "best" vector types
namespace vecmathlib {
-
+
#if defined VECMATHLIB_HAVE_VEC_FLOAT_16
-# define VECMATHLIB_MAX_FLOAT_VECSIZE 16
+#define VECMATHLIB_MAX_FLOAT_VECSIZE 16
#elif defined VECMATHLIB_HAVE_VEC_FLOAT_8
-# define VECMATHLIB_MAX_FLOAT_VECSIZE 8
+#define VECMATHLIB_MAX_FLOAT_VECSIZE 8
#elif defined VECMATHLIB_HAVE_VEC_FLOAT_4
-# define VECMATHLIB_MAX_FLOAT_VECSIZE 4
+#define VECMATHLIB_MAX_FLOAT_VECSIZE 4
#elif defined VECMATHLIB_HAVE_VEC_FLOAT_2
-# define VECMATHLIB_MAX_FLOAT_VECSIZE 2
+#define VECMATHLIB_MAX_FLOAT_VECSIZE 2
#elif defined VECMATHLIB_HAVE_VEC_FLOAT_1
-# define VECMATHLIB_MAX_FLOAT_VECSIZE 1
+#define VECMATHLIB_MAX_FLOAT_VECSIZE 1
#endif
-
+
#if defined VECMATHLIB_HAVE_VEC_DOUBLE_8
-# define VECMATHLIB_MAX_DOUBLE_VECSIZE 8
+#define VECMATHLIB_MAX_DOUBLE_VECSIZE 8
#elif defined VECMATHLIB_HAVE_VEC_DOUBLE_4
-# define VECMATHLIB_MAX_DOUBLE_VECSIZE 4
+#define VECMATHLIB_MAX_DOUBLE_VECSIZE 4
#elif defined VECMATHLIB_HAVE_VEC_DOUBLE_2
-# define VECMATHLIB_MAX_DOUBLE_VECSIZE 2
+#define VECMATHLIB_MAX_DOUBLE_VECSIZE 2
#elif defined VECMATHLIB_HAVE_VEC_DOUBLE_1
-# define VECMATHLIB_MAX_DOUBLE_VECSIZE 1
+#define VECMATHLIB_MAX_DOUBLE_VECSIZE 1
#endif
-
+
#ifdef VECMATHLIB_MAX_FLOAT_VECSIZE
- typedef realvec<float,VECMATHLIB_MAX_FLOAT_VECSIZE> float32_vec;
- typedef intvec<float,VECMATHLIB_MAX_FLOAT_VECSIZE> int32_vec;
- typedef boolvec<float,VECMATHLIB_MAX_FLOAT_VECSIZE> bool32_vec;
+typedef realvec<float, VECMATHLIB_MAX_FLOAT_VECSIZE> float32_vec;
+typedef intvec<float, VECMATHLIB_MAX_FLOAT_VECSIZE> int32_vec;
+typedef boolvec<float, VECMATHLIB_MAX_FLOAT_VECSIZE> bool32_vec;
#else
- typedef realpseudovec<float,1> float32_vec;
- typedef intpseudovec<float,1> int32_vec;
- typedef boolpseudovec<float,1> bool32_vec;
+typedef realpseudovec<float, 1> float32_vec;
+typedef intpseudovec<float, 1> int32_vec;
+typedef boolpseudovec<float, 1> bool32_vec;
#endif
-
+
#ifdef VECMATHLIB_MAX_DOUBLE_VECSIZE
- typedef realvec<double,VECMATHLIB_MAX_DOUBLE_VECSIZE> float64_vec;
- typedef intvec<double,VECMATHLIB_MAX_DOUBLE_VECSIZE> int64_vec;
- typedef boolvec<double,VECMATHLIB_MAX_DOUBLE_VECSIZE> bool64_vec;
+typedef realvec<double, VECMATHLIB_MAX_DOUBLE_VECSIZE> float64_vec;
+typedef intvec<double, VECMATHLIB_MAX_DOUBLE_VECSIZE> int64_vec;
+typedef boolvec<double, VECMATHLIB_MAX_DOUBLE_VECSIZE> bool64_vec;
#else
- typedef realpseudovec<double,1> float64_vec;
- typedef intpseudovec<double,1> int64_vec;
- typedef boolpseudovec<double,1> bool64_vec;
+typedef realpseudovec<double, 1> float64_vec;
+typedef intpseudovec<double, 1> int64_vec;
+typedef boolpseudovec<double, 1> bool64_vec;
#endif
}
OpenPOWER on IntegriCloud