summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorErik Schnetter <schnetter@gmail.com>2013-06-21 16:47:44 -0400
committerErik Schnetter <schnetter@gmail.com>2013-06-21 16:47:44 -0400
commit93f02d380232f57978f43658afe6c213fbc1e536 (patch)
tree120a341f15dec2108527d362c440c301968a1c99
parent437b6f9d3f1224e93f7a6bb976677d8044c0b1ba (diff)
downloadvecmathlib-93f02d380232f57978f43658afe6c213fbc1e536.zip
vecmathlib-93f02d380232f57978f43658afe6c213fbc1e536.tar.gz
Optimize exp and log
-rw-r--r--mathfuncs_exp.h24
-rw-r--r--mathfuncs_log.h20
2 files changed, 44 insertions, 0 deletions
diff --git a/mathfuncs_exp.h b/mathfuncs_exp.h
index ceb89c0..99fd063 100644
--- a/mathfuncs_exp.h
+++ b/mathfuncs_exp.h
@@ -35,6 +35,7 @@ namespace vecmathlib {
realvec_t r;
switch (sizeof(real_t)) {
case 4:
+#ifdef VML_HAVE_FP_CONTRACT
// float, error=4.55549108005200277750378992345e-9
r = RV(0.000154653240842602623787395880898);
r = fma(r, x, RV(0.00133952915439234389712105060319));
@@ -43,8 +44,18 @@ namespace vecmathlib {
r = fma(r, x, RV(0.240226511015459465468737123346));
r = fma(r, x, RV(0.69314720007380208630542805293));
r = fma(r, x, RV(0.99999999997182023878745628977));
+#else
+ // float, error=1.62772721960621336664735896836e-7
+ r = RV(0.00133952915439234389712105060319);
+ r = fma(r, x, RV(0.009670773148229417605024318985));
+ r = fma(r, x, RV(0.055503406540531310853149866446));
+ r = fma(r, x, RV(0.240222115700585316818177639177));
+ r = fma(r, x, RV(0.69314720007380208630542805293));
+ r = fma(r, x, RV(1.00000005230745711373079206024));
+#endif
break;
case 8:
+#ifdef VML_HAVE_FP_CONTRACT
// double, error=9.32016781355638010975628074746e-18
r = RV(4.45623165388261696886670014471e-10);
r = fma(r, x, RV(7.0733589360775271430968224806e-9));
@@ -58,6 +69,19 @@ namespace vecmathlib {
r = fma(r, x, RV(0.240226506959101382690753994082));
r = fma(r, x, RV(0.69314718055994530864272481773));
r = fma(r, x, RV(0.9999999999999999978508676375));
+#else
+ // double, error=3.74939899823302048807873981077e-14
+ r = RV(1.02072375599725694063203809188e-7);
+ r = fma(r, x, RV(1.32573274434801314145133004073e-6));
+ r = fma(r, x, RV(0.0000152526647170731944840736190013));
+ r = fma(r, x, RV(0.000154034441925859828261898614555));
+ r = fma(r, x, RV(0.00133335582175770747495287552557));
+ r = fma(r, x, RV(0.0096181291794939392517233403183));
+ r = fma(r, x, RV(0.055504108664525029438908798685));
+ r = fma(r, x, RV(0.240226506957026959772247598695));
+ r = fma(r, x, RV(0.6931471805599487321347668143));
+ r = fma(r, x, RV(1.00000000000000942892870993489));
+#endif
break;
default:
__builtin_unreachable();
diff --git a/mathfuncs_log.h b/mathfuncs_log.h
index 8d99a62..0a40eec 100644
--- a/mathfuncs_log.h
+++ b/mathfuncs_log.h
@@ -31,14 +31,23 @@ namespace vecmathlib {
realvec_t r;
switch (sizeof(real_t)) {
case 4:
+#ifdef VML_HAVE_FP_CONTRACT
// float, error=5.98355642684398209498469870525e-9
r = RV(0.410981538282433293325329456838);
r = fma(r, y2, RV(0.402155483172044562892705980539));
r = fma(r, y2, RV(0.57755014627178237959721643293));
r = fma(r, y2, RV(0.96178780600659929206930296869));
r = fma(r, y2, RV(2.88539012786343587248965772685));
+#else
+ //flaot, error=2.25468184051947656525068987795e-7
+ r = RV(0.498866687070343238590910977481);
+ r = fma(r, y2, RV(0.57002741193682764193895550312));
+ r = fma(r, y2, RV(0.96200215034262628756932169194));
+ r = fma(r, y2, RV(2.88538850388042106595516956395));
+#endif
break;
case 8:
+#ifdef VML_HAVE_FP_CONTRACT
// double, error=9.45037202901655672811489051683e-17
r = RV(0.259935726478127940817401224248);
r = fma(r, y2, RV(0.140676370079882918464564658472));
@@ -50,6 +59,17 @@ namespace vecmathlib {
r = fma(r, y2, RV(0.57707801621733949207376840932));
r = fma(r, y2, RV(0.96179669392666302667713134701));
r = fma(r, y2, RV(2.88539008177792581277410991327));
+#else
+ // double, error=1.21820548287702216975532695788e-13
+ r = RV(0.293251364683280430617251942017);
+ r = fma(r, y2, RV(0.201364223624519571276587631354));
+ r = fma(r, y2, RV(0.264443947645547871780098560836));
+ r = fma(r, y2, RV(0.320475051320227723946459855458));
+ r = fma(r, y2, RV(0.412202612052105347480086431555));
+ r = fma(r, y2, RV(0.57707794741938820005328259256));
+ r = fma(r, y2, RV(0.96179669445173881282808321929));
+ r = fma(r, y2, RV(2.88539008177676567117601117274));
+#endif
break;
default:
__builtin_unreachable();
OpenPOWER on IntegriCloud