diff options
author | avg <avg@FreeBSD.org> | 2009-07-15 17:07:39 +0000 |
---|---|---|
committer | avg <avg@FreeBSD.org> | 2009-07-15 17:07:39 +0000 |
commit | b898b874c6ac6d3ebccdf4844571b90e08f39f0e (patch) | |
tree | e8923eb934a3053e438f8f4b36433f78316a58fe /sys/cddl/dev/dtrace/i386/dtrace_subr.c | |
parent | 9990f66d4450ab695ddf3686db11797c224dea45 (diff) | |
download | FreeBSD-src-b898b874c6ac6d3ebccdf4844571b90e08f39f0e.zip FreeBSD-src-b898b874c6ac6d3ebccdf4844571b90e08f39f0e.tar.gz |
dtrace_gethrtime: improve scaling of TSC ticks to nanoseconds
Currently dtrace_gethrtime uses formula similar to the following for
converting TSC ticks to nanoseconds:
rdtsc() * 10^9 / tsc_freq
The dividend overflows 64-bit type and wraps-around every 2^64/10^9 =
18446744073 ticks which is just a few seconds on modern machines.
Now we instead use precalculated scaling factor of
10^9*2^N/tsc_freq < 2^32 and perform TSC value multiplication separately
for each 32-bit half. This allows to avoid overflow of the dividend
described above.
The idea is taken from OpenSolaris.
This has an added feature of always scaling TSC with invariant value
regardless of TSC frequency changes. Thus the timestamps will not be
accurate if TSC actually changes, but they are always proportional to
TSC ticks and thus monotonic. This should be much better than current
formula which produces wildly different non-monotonic results on when
tsc_freq changes.
Also drop write-only 'cp' variable from amd64 dtrace_gethrtime_init()
to make it identical to the i386 twin.
PR: kern/127441
Tested by: Thomas Backman <serenity@exscape.org>
Reviewed by: jhb
Discussed with: current@, bde, gnn
Silence from: jb
Approved by: re (gnn)
MFC after: 1 week
Diffstat (limited to 'sys/cddl/dev/dtrace/i386/dtrace_subr.c')
-rw-r--r-- | sys/cddl/dev/dtrace/i386/dtrace_subr.c | 48 |
1 files changed, 47 insertions, 1 deletions
diff --git a/sys/cddl/dev/dtrace/i386/dtrace_subr.c b/sys/cddl/dev/dtrace/i386/dtrace_subr.c index 78d80f3..2839263 100644 --- a/sys/cddl/dev/dtrace/i386/dtrace_subr.c +++ b/sys/cddl/dev/dtrace/i386/dtrace_subr.c @@ -366,6 +366,10 @@ dtrace_safe_defer_signal(void) static int64_t tgt_cpu_tsc; static int64_t hst_cpu_tsc; static int64_t tsc_skew[MAXCPU]; +static uint64_t nsec_scale; + +/* See below for the explanation of this macro. */ +#define SCALE_SHIFT 28 static void dtrace_gethrtime_init_sync(void *arg) @@ -401,9 +405,37 @@ dtrace_gethrtime_init_cpu(void *arg) static void dtrace_gethrtime_init(void *arg) { + uint64_t tsc_f; cpumask_t map; int i; + /* + * Get TSC frequency known at this moment. + * This should be constant if TSC is invariant. + * Otherwise tick->time conversion will be inaccurate, but + * will preserve monotonic property of TSC. + */ + tsc_f = tsc_freq; + + /* + * The following line checks that nsec_scale calculated below + * doesn't overflow 32-bit unsigned integer, so that it can multiply + * another 32-bit integer without overflowing 64-bit. + * Thus minimum supported TSC frequency is 62.5MHz. + */ + KASSERT(tsc_f > (NANOSEC >> (32 - SCALE_SHIFT)), ("TSC frequency is too low")); + + /* + * We scale up NANOSEC/tsc_f ratio to preserve as much precision + * as possible. + * 2^28 factor was chosen quite arbitrarily from practical + * considerations: + * - it supports TSC frequencies as low as 62.5MHz (see above); + * - it provides quite good precision (e < 0.01%) up to THz + * (terahertz) values; + */ + nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f; + /* The current CPU is the reference one. */ tsc_skew[curcpu] = 0; @@ -438,7 +470,21 @@ SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init, uint64_t dtrace_gethrtime() { - return ((rdtsc() + tsc_skew[curcpu]) * (int64_t) 1000000000 / tsc_freq); + uint64_t tsc; + uint32_t lo; + uint32_t hi; + + /* + * We split TSC value into lower and higher 32-bit halves and separately + * scale them with nsec_scale, then we scale them down by 2^28 + * (see nsec_scale calculations) taking into account 32-bit shift of + * the higher half and finally add. + */ + tsc = rdtsc() + tsc_skew[curcpu]; + lo = tsc; + hi = tsc >> 32; + return (((lo * nsec_scale) >> SCALE_SHIFT) + + ((hi * nsec_scale) << (32 - SCALE_SHIFT))); } uint64_t |