From 24e4a8c3e8868874835b0f1ad6dd417341e99822 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 16 Jul 2014 21:03:53 +0000 Subject: ktime: Kill non-scalar ktime_t implementation for 2038 The non-scalar ktime_t implementation is basically a timespec which has to be changed to support dates past 2038 on 32bit systems. This patch removes the non-scalar ktime_t implementation, forcing the scalar s64 nanosecond version on all architectures. This may have additional performance overhead on some 32bit systems when converting between ktime_t and timespec structures, however the majority of 32bit systems (arm and i386) were already using scalar ktime_t, so no performance regressions will be seen on those platforms. On affected platforms, I'm open to finding optimizations, including avoiding converting to timespecs where possible. [ tglx: We can now cleanup the ktime_t.tv64 mess, but thats a different issue and we can throw a coccinelle script at it ] Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a8f749e..7fa17b5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -111,7 +111,6 @@ config X86 select ARCH_CLOCKSOURCE_DATA select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) select GENERIC_TIME_VSYSCALL - select KTIME_SCALAR if X86_32 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select HAVE_CONTEXT_TRACKING if X86_64 -- cgit v1.1 From bb0b58127c5add364cb597d58b1cf66eb279eae8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:04:52 +0000 Subject: x86: kvm: Use ktime_get_boot_ns() Use the new nanoseconds based interface and get rid of the timespec conversion dance. Signed-off-by: Thomas Gleixner Cc: Gleb Natapov Cc: kvm@vger.kernel.org Acked-by: Paolo Bonzini Signed-off-by: John Stultz --- arch/x86/kvm/x86.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f644933..65c4305 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1109,11 +1109,7 @@ static void kvm_get_time_scale(uint32_t scaled_khz, uint32_t base_khz, static inline u64 get_kernel_ns(void) { - struct timespec ts; - - ktime_get_ts(&ts); - monotonic_to_bootbased(&ts); - return timespec_to_ns(&ts); + return ktime_get_boot_ns(); } #ifdef CONFIG_X86_64 -- cgit v1.1 From cbcf2dd3b3d4d990610259e8d878fc8dc1f17d80 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:04:54 +0000 Subject: x86: kvm: Make kvm_get_time_and_clockread() nanoseconds based Convert the relevant base data right away to nanoseconds instead of doing the conversion on every readout. Reduces text size by 160 bytes. Signed-off-by: Thomas Gleixner Cc: Gleb Natapov Cc: kvm@vger.kernel.org Acked-by: Paolo Bonzini Signed-off-by: John Stultz --- arch/x86/kvm/x86.c | 44 ++++++++++++++------------------------------ 1 file changed, 14 insertions(+), 30 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 65c4305..63832f5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -984,9 +984,8 @@ struct pvclock_gtod_data { u32 shift; } clock; - /* open coded 'struct timespec' */ - u64 monotonic_time_snsec; - time_t monotonic_time_sec; + u64 boot_ns; + u64 nsec_base; }; static struct pvclock_gtod_data pvclock_gtod_data; @@ -994,6 +993,9 @@ static struct pvclock_gtod_data pvclock_gtod_data; static void update_pvclock_gtod(struct timekeeper *tk) { struct pvclock_gtod_data *vdata = &pvclock_gtod_data; + u64 boot_ns; + + boot_ns = ktime_to_ns(ktime_add(tk->base_mono, tk->offs_boot)); write_seqcount_begin(&vdata->seq); @@ -1004,17 +1006,8 @@ static void update_pvclock_gtod(struct timekeeper *tk) vdata->clock.mult = tk->mult; vdata->clock.shift = tk->shift; - vdata->monotonic_time_sec = tk->xtime_sec - + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_snsec = tk->xtime_nsec - + (tk->wall_to_monotonic.tv_nsec - << tk->shift); - while (vdata->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->shift)) { - vdata->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->shift; - vdata->monotonic_time_sec++; - } + vdata->boot_ns = boot_ns; + vdata->nsec_base = tk->xtime_nsec; write_seqcount_end(&vdata->seq); } @@ -1371,23 +1364,22 @@ static inline u64 vgettsc(cycle_t *cycle_now) return v * gtod->clock.mult; } -static int do_monotonic(struct timespec *ts, cycle_t *cycle_now) +static int do_monotonic_boot(s64 *t, cycle_t *cycle_now) { + struct pvclock_gtod_data *gtod = &pvclock_gtod_data; unsigned long seq; - u64 ns; int mode; - struct pvclock_gtod_data *gtod = &pvclock_gtod_data; + u64 ns; - ts->tv_nsec = 0; do { seq = read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; - ts->tv_sec = gtod->monotonic_time_sec; - ns = gtod->monotonic_time_snsec; + ns = gtod->nsec_base; ns += vgettsc(cycle_now); ns >>= gtod->clock.shift; + ns += gtod->boot_ns; } while (unlikely(read_seqcount_retry(>od->seq, seq))); - timespec_add_ns(ts, ns); + *t = ns; return mode; } @@ -1395,19 +1387,11 @@ static int do_monotonic(struct timespec *ts, cycle_t *cycle_now) /* returns true if host is using tsc clocksource */ static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now) { - struct timespec ts; - /* checked again under seqlock below */ if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) return false; - if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC) - return false; - - monotonic_to_bootbased(&ts); - *kernel_ns = timespec_to_ns(&ts); - - return true; + return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC; } #endif -- cgit v1.1 From 09ec54429c6d10f87d1f084de53ae2c1c3a81108 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:05:12 +0000 Subject: clocksource: Move cycle_last validation to core code The only user of the cycle_last validation is the x86 TSC. In order to provide NMI safe accessor functions for clock monotonic and monotonic_raw we need to do that in the core. We can't do the TSC specific if (now < cycle_last) now = cycle_last; for the other wrapping around clocksources, but TSC has CLOCKSOURCE_MASK(64) which actually does not mask out anything so if now is less than cycle_last the subtraction will give a negative result. So we can check for that in clocksource_delta() and return 0 for that case. Implement and enable it for x86 Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- arch/x86/Kconfig | 1 + arch/x86/kernel/tsc.c | 21 +++++++++------------ 2 files changed, 10 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7fa17b5..d08e061 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -109,6 +109,7 @@ config X86 select CLOCKSOURCE_WATCHDOG select GENERIC_CLOCKEVENTS select ARCH_CLOCKSOURCE_DATA + select CLOCKSOURCE_VALIDATE_LAST_CYCLE select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) select GENERIC_TIME_VSYSCALL select GENERIC_STRNCPY_FROM_USER diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 57e5ce1..456c0e6 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -951,7 +951,7 @@ core_initcall(cpufreq_tsc); static struct clocksource clocksource_tsc; /* - * We compare the TSC to the cycle_last value in the clocksource + * We used to compare the TSC to the cycle_last value in the clocksource * structure to avoid a nasty time-warp. This can be observed in a * very small window right after one CPU updated cycle_last under * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which @@ -961,26 +961,23 @@ static struct clocksource clocksource_tsc; * due to the unsigned delta calculation of the time keeping core * code, which is necessary to support wrapping clocksources like pm * timer. + * + * This sanity check is now done in the core timekeeping code. + * checking the result of read_tsc() - cycle_last for being negative. + * That works because CLOCKSOURCE_MASK(64) does not mask out any bit. */ static cycle_t read_tsc(struct clocksource *cs) { - cycle_t ret = (cycle_t)get_cycles(); - - return ret >= clocksource_tsc.cycle_last ? - ret : clocksource_tsc.cycle_last; -} - -static void resume_tsc(struct clocksource *cs) -{ - if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) - clocksource_tsc.cycle_last = 0; + return (cycle_t)get_cycles(); } +/* + * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() + */ static struct clocksource clocksource_tsc = { .name = "tsc", .rating = 300, .read = read_tsc, - .resume = resume_tsc, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS | CLOCK_SOURCE_MUST_VERIFY, -- cgit v1.1 From 4a0e637738f06673725792d74eed67f8779b62c7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:05:13 +0000 Subject: clocksource: Get rid of cycle_last cycle_last was added to the clocksource to support the TSC validation. We moved that to the core code, so we can get rid of the extra copy. Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- arch/x86/kernel/vsyscall_gtod.c | 2 +- arch/x86/kvm/x86.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c index 9531fbb..c3cb3c1 100644 --- a/arch/x86/kernel/vsyscall_gtod.c +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -32,7 +32,7 @@ void update_vsyscall(struct timekeeper *tk) /* copy vsyscall data */ vdata->vclock_mode = tk->clock->archdata.vclock_mode; - vdata->cycle_last = tk->clock->cycle_last; + vdata->cycle_last = tk->cycle_last; vdata->mask = tk->clock->mask; vdata->mult = tk->mult; vdata->shift = tk->shift; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 63832f5..7b25125 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1001,7 +1001,7 @@ static void update_pvclock_gtod(struct timekeeper *tk) /* copy pvclock gtod data */ vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->clock->cycle_last; + vdata->clock.cycle_last = tk->cycle_last; vdata->clock.mask = tk->clock->mask; vdata->clock.mult = tk->mult; vdata->clock.shift = tk->shift; -- cgit v1.1 From d28ede83791defee9a81e558540699dc46dbbe13 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 16 Jul 2014 21:05:16 +0000 Subject: timekeeping: Create struct tk_read_base and use it in struct timekeeper The members of the new struct are the required ones for the new NMI safe accessor to clcok monotonic. In order to reuse the existing timekeeping code and to make the update of the fast NMI safe timekeepers a simple memcpy use the struct for the timekeeper as well and convert all users. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Mathieu Desnoyers Signed-off-by: John Stultz --- arch/x86/kernel/vsyscall_gtod.c | 23 ++++++++++++----------- arch/x86/kvm/x86.c | 14 +++++++------- 2 files changed, 19 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c index c3cb3c1..c7d791f 100644 --- a/arch/x86/kernel/vsyscall_gtod.c +++ b/arch/x86/kernel/vsyscall_gtod.c @@ -31,29 +31,30 @@ void update_vsyscall(struct timekeeper *tk) gtod_write_begin(vdata); /* copy vsyscall data */ - vdata->vclock_mode = tk->clock->archdata.vclock_mode; - vdata->cycle_last = tk->cycle_last; - vdata->mask = tk->clock->mask; - vdata->mult = tk->mult; - vdata->shift = tk->shift; + vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode; + vdata->cycle_last = tk->tkr.cycle_last; + vdata->mask = tk->tkr.mask; + vdata->mult = tk->tkr.mult; + vdata->shift = tk->tkr.shift; vdata->wall_time_sec = tk->xtime_sec; - vdata->wall_time_snsec = tk->xtime_nsec; + vdata->wall_time_snsec = tk->tkr.xtime_nsec; vdata->monotonic_time_sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; - vdata->monotonic_time_snsec = tk->xtime_nsec + vdata->monotonic_time_snsec = tk->tkr.xtime_nsec + ((u64)tk->wall_to_monotonic.tv_nsec - << tk->shift); + << tk->tkr.shift); while (vdata->monotonic_time_snsec >= - (((u64)NSEC_PER_SEC) << tk->shift)) { + (((u64)NSEC_PER_SEC) << tk->tkr.shift)) { vdata->monotonic_time_snsec -= - ((u64)NSEC_PER_SEC) << tk->shift; + ((u64)NSEC_PER_SEC) << tk->tkr.shift; vdata->monotonic_time_sec++; } vdata->wall_time_coarse_sec = tk->xtime_sec; - vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift); + vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >> + tk->tkr.shift); vdata->monotonic_time_coarse_sec = vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7b25125..b7e5794 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -995,19 +995,19 @@ static void update_pvclock_gtod(struct timekeeper *tk) struct pvclock_gtod_data *vdata = &pvclock_gtod_data; u64 boot_ns; - boot_ns = ktime_to_ns(ktime_add(tk->base_mono, tk->offs_boot)); + boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot)); write_seqcount_begin(&vdata->seq); /* copy pvclock gtod data */ - vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; - vdata->clock.cycle_last = tk->cycle_last; - vdata->clock.mask = tk->clock->mask; - vdata->clock.mult = tk->mult; - vdata->clock.shift = tk->shift; + vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode; + vdata->clock.cycle_last = tk->tkr.cycle_last; + vdata->clock.mask = tk->tkr.mask; + vdata->clock.mult = tk->tkr.mult; + vdata->clock.shift = tk->tkr.shift; vdata->boot_ns = boot_ns; - vdata->nsec_base = tk->xtime_nsec; + vdata->nsec_base = tk->tkr.xtime_nsec; write_seqcount_end(&vdata->seq); } -- cgit v1.1