Estimate an initial overhead of 0 usec instead of 20 usec in DELAY().

I have code to calibrate the overhead fairly accurately, but there is little point in using it since it is most accurate on machines where an estimate of 0 works well. On slow machines, the accuracy of DELAY() has a large variance since it is limited by the resolution of getit() even if the initial delay is calibrated perfectly. Use fixed point and long longs to speed up scaling in DELAY(). The old method slowed down a lot when the frequency became variable. Assume the default frequency for short delays so that the fixed point calculation can be exact. Fast scaling is only important for small delays. Scaling is done after looking at the counter and outside the loop, so it doesn't decrease accuracy or resolution provided it completes before the delay is up. The comment in the code is still confused about this.
author: bde <bde@FreeBSD.org> 1997-01-29 22:51:44 +0000
committer: bde <bde@FreeBSD.org> 1997-01-29 22:51:44 +0000
commit: 264fc79b213bb942af9be8061cafa89eb07aff14 (patch)
tree: 87241d336d31c19008918f687d0430531b90f4b5 /sys
parent: d81f278ab499fe210ce54d4619e7d0493e7dc5ef (diff)
download: FreeBSD-src-264fc79b213bb942af9be8061cafa89eb07aff14.zip
FreeBSD-src-264fc79b213bb942af9be8061cafa89eb07aff14.tar.gz
5 files changed, 105 insertions, 50 deletions
diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c
index faa38fd..60aa95c 100644
--- a/sys/amd64/amd64/tsc.c
+++ b/sys/amd64/amd64/tsc.c
@@ -362,7 +362,7 @@ getit(void)
 void
 DELAY(int n)
 {
-	int delta, prev_tick, tick, ticks_left, sec, usec;
+	int delta, prev_tick, tick, ticks_left;
 
 #ifdef DELAYDEBUG
 	int getit_calls = 1;
@@ -393,19 +393,30 @@ DELAY(int n)
 	 * multiplications and divisions to scale the count take a while).
 	 */
 	prev_tick = getit();
-	n -= 20;
+	n -= 0;			/* XXX actually guess no initial overhead */
 	/*
 	 * Calculate (n * (timer_freq / 1e6)) without using floating point
 	 * and without any avoidable overflows.
 	 */
-	sec = n / 1000000;
-	usec = n - sec * 1000000;
-	ticks_left = sec * timer_freq
-		     + usec * (timer_freq / 1000000)
-		     + usec * ((timer_freq % 1000000) / 1000) / 1000
-		     + usec * (timer_freq % 1000) / 1000000;
-	if (n < 0)
-		ticks_left = 0;	/* XXX timer_freq is unsigned */
+	if (n <= 0)
+		ticks_left = 0;
+	else if (n < 256)
+		/*
+		 * Use fixed point to avoid a slow division by 1000000.
+		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+		 * 2^15 is the first power of 2 that gives exact results
+		 * for n between 0 and 256.
+		 */
+		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+	else
+		/*
+		 * Don't bother using fixed point, although gcc-2.7.2
+		 * generates particularly poor code for the long long
+		 * division, since even the slow way will complete long
+		 * before the delay is up (unless we're interrupted).
+		 */
+		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+			     / 1000000;
 
 	while (ticks_left > 0) {
 		tick = getit();
diff --git a/sys/amd64/isa/clock.c b/sys/amd64/isa/clock.c
index faa38fd..60aa95c 100644
--- a/sys/amd64/isa/clock.c
+++ b/sys/amd64/isa/clock.c
@@ -362,7 +362,7 @@ getit(void)
 void
 DELAY(int n)
 {
-	int delta, prev_tick, tick, ticks_left, sec, usec;
+	int delta, prev_tick, tick, ticks_left;
 
 #ifdef DELAYDEBUG
 	int getit_calls = 1;
@@ -393,19 +393,30 @@ DELAY(int n)
 	 * multiplications and divisions to scale the count take a while).
 	 */
 	prev_tick = getit();
-	n -= 20;
+	n -= 0;			/* XXX actually guess no initial overhead */
 	/*
 	 * Calculate (n * (timer_freq / 1e6)) without using floating point
 	 * and without any avoidable overflows.
 	 */
-	sec = n / 1000000;
-	usec = n - sec * 1000000;
-	ticks_left = sec * timer_freq
-		     + usec * (timer_freq / 1000000)
-		     + usec * ((timer_freq % 1000000) / 1000) / 1000
-		     + usec * (timer_freq % 1000) / 1000000;
-	if (n < 0)
-		ticks_left = 0;	/* XXX timer_freq is unsigned */
+	if (n <= 0)
+		ticks_left = 0;
+	else if (n < 256)
+		/*
+		 * Use fixed point to avoid a slow division by 1000000.
+		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+		 * 2^15 is the first power of 2 that gives exact results
+		 * for n between 0 and 256.
+		 */
+		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+	else
+		/*
+		 * Don't bother using fixed point, although gcc-2.7.2
+		 * generates particularly poor code for the long long
+		 * division, since even the slow way will complete long
+		 * before the delay is up (unless we're interrupted).
+		 */
+		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+			     / 1000000;
 
 	while (ticks_left > 0) {
 		tick = getit();
diff --git a/sys/i386/i386/tsc.c b/sys/i386/i386/tsc.c
index faa38fd..60aa95c 100644
--- a/sys/i386/i386/tsc.c
+++ b/sys/i386/i386/tsc.c
@@ -362,7 +362,7 @@ getit(void)
 void
 DELAY(int n)
 {
-	int delta, prev_tick, tick, ticks_left, sec, usec;
+	int delta, prev_tick, tick, ticks_left;
 
 #ifdef DELAYDEBUG
 	int getit_calls = 1;
@@ -393,19 +393,30 @@ DELAY(int n)
 	 * multiplications and divisions to scale the count take a while).
 	 */
 	prev_tick = getit();
-	n -= 20;
+	n -= 0;			/* XXX actually guess no initial overhead */
 	/*
 	 * Calculate (n * (timer_freq / 1e6)) without using floating point
 	 * and without any avoidable overflows.
 	 */
-	sec = n / 1000000;
-	usec = n - sec * 1000000;
-	ticks_left = sec * timer_freq
-		     + usec * (timer_freq / 1000000)
-		     + usec * ((timer_freq % 1000000) / 1000) / 1000
-		     + usec * (timer_freq % 1000) / 1000000;
-	if (n < 0)
-		ticks_left = 0;	/* XXX timer_freq is unsigned */
+	if (n <= 0)
+		ticks_left = 0;
+	else if (n < 256)
+		/*
+		 * Use fixed point to avoid a slow division by 1000000.
+		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+		 * 2^15 is the first power of 2 that gives exact results
+		 * for n between 0 and 256.
+		 */
+		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+	else
+		/*
+		 * Don't bother using fixed point, although gcc-2.7.2
+		 * generates particularly poor code for the long long
+		 * division, since even the slow way will complete long
+		 * before the delay is up (unless we're interrupted).
+		 */
+		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+			     / 1000000;
 
 	while (ticks_left > 0) {
 		tick = getit();
diff --git a/sys/i386/isa/clock.c b/sys/i386/isa/clock.c
index faa38fd..60aa95c 100644
--- a/sys/i386/isa/clock.c
+++ b/sys/i386/isa/clock.c
@@ -362,7 +362,7 @@ getit(void)
 void
 DELAY(int n)
 {
-	int delta, prev_tick, tick, ticks_left, sec, usec;
+	int delta, prev_tick, tick, ticks_left;
 
 #ifdef DELAYDEBUG
 	int getit_calls = 1;
@@ -393,19 +393,30 @@ DELAY(int n)
 	 * multiplications and divisions to scale the count take a while).
 	 */
 	prev_tick = getit();
-	n -= 20;
+	n -= 0;			/* XXX actually guess no initial overhead */
 	/*
 	 * Calculate (n * (timer_freq / 1e6)) without using floating point
 	 * and without any avoidable overflows.
 	 */
-	sec = n / 1000000;
-	usec = n - sec * 1000000;
-	ticks_left = sec * timer_freq
-		     + usec * (timer_freq / 1000000)
-		     + usec * ((timer_freq % 1000000) / 1000) / 1000
-		     + usec * (timer_freq % 1000) / 1000000;
-	if (n < 0)
-		ticks_left = 0;	/* XXX timer_freq is unsigned */
+	if (n <= 0)
+		ticks_left = 0;
+	else if (n < 256)
+		/*
+		 * Use fixed point to avoid a slow division by 1000000.
+		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+		 * 2^15 is the first power of 2 that gives exact results
+		 * for n between 0 and 256.
+		 */
+		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+	else
+		/*
+		 * Don't bother using fixed point, although gcc-2.7.2
+		 * generates particularly poor code for the long long
+		 * division, since even the slow way will complete long
+		 * before the delay is up (unless we're interrupted).
+		 */
+		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+			     / 1000000;
 
 	while (ticks_left > 0) {
 		tick = getit();
diff --git a/sys/isa/atrtc.c b/sys/isa/atrtc.c
index faa38fd..60aa95c 100644
--- a/sys/isa/atrtc.c
+++ b/sys/isa/atrtc.c
@@ -362,7 +362,7 @@ getit(void)
 void
 DELAY(int n)
 {
-	int delta, prev_tick, tick, ticks_left, sec, usec;
+	int delta, prev_tick, tick, ticks_left;
 
 #ifdef DELAYDEBUG
 	int getit_calls = 1;
@@ -393,19 +393,30 @@ DELAY(int n)
 	 * multiplications and divisions to scale the count take a while).
 	 */
 	prev_tick = getit();
-	n -= 20;
+	n -= 0;			/* XXX actually guess no initial overhead */
 	/*
 	 * Calculate (n * (timer_freq / 1e6)) without using floating point
 	 * and without any avoidable overflows.
 	 */
-	sec = n / 1000000;
-	usec = n - sec * 1000000;
-	ticks_left = sec * timer_freq
-		     + usec * (timer_freq / 1000000)
-		     + usec * ((timer_freq % 1000000) / 1000) / 1000
-		     + usec * (timer_freq % 1000) / 1000000;
-	if (n < 0)
-		ticks_left = 0;	/* XXX timer_freq is unsigned */
+	if (n <= 0)
+		ticks_left = 0;
+	else if (n < 256)
+		/*
+		 * Use fixed point to avoid a slow division by 1000000.
+		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+		 * 2^15 is the first power of 2 that gives exact results
+		 * for n between 0 and 256.
+		 */
+		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+	else
+		/*
+		 * Don't bother using fixed point, although gcc-2.7.2
+		 * generates particularly poor code for the long long
+		 * division, since even the slow way will complete long
+		 * before the delay is up (unless we're interrupted).
+		 */
+		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+			     / 1000000;
 
 	while (ticks_left > 0) {
 		tick = getit();
author	bde <bde@FreeBSD.org>	1997-01-29 22:51:44 +0000
committer	bde <bde@FreeBSD.org>	1997-01-29 22:51:44 +0000
commit	264fc79b213bb942af9be8061cafa89eb07aff14 (patch)
tree	87241d336d31c19008918f687d0430531b90f4b5 /sys
parent	d81f278ab499fe210ce54d4619e7d0493e7dc5ef (diff)
download	FreeBSD-src-264fc79b213bb942af9be8061cafa89eb07aff14.zip FreeBSD-src-264fc79b213bb942af9be8061cafa89eb07aff14.tar.gz