diff options
Diffstat (limited to 'tools')
31 files changed, 6460 insertions, 89 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 52462ae..e032716 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -61,6 +61,9 @@ OPTIONS -r:: --realtime=:: Collect data with this RT SCHED_FIFO priority. +-D:: +--no-delay:: + Collect data without buffering. -A:: --append:: Append to the output file to do incremental profiling. diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 1b9b13e..2b5387d 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -227,7 +227,7 @@ ifndef PERF_DEBUG CFLAGS_OPTIMIZE = -O6 endif -CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 ALL_LDFLAGS = $(LDFLAGS) diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile new file mode 100644 index 0000000..15130b50 --- /dev/null +++ b/tools/perf/arch/s390/Makefile @@ -0,0 +1,4 @@ +ifndef NO_DWARF +PERF_HAVE_DWARF_REGS := 1 +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o +endif diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c new file mode 100644 index 0000000..e19653e --- /dev/null +++ b/tools/perf/arch/s390/util/dwarf-regs.c @@ -0,0 +1,22 @@ +/* + * Mapping of DWARF debug register numbers into register names. + * + * Copyright IBM Corp. 2010 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <libio.h> +#include <dwarf-regs.h> + +#define NUM_GPRS 16 + +static const char *gpr_names[NUM_GPRS] = { + "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", +}; + +const char *get_arch_regstr(unsigned int n) +{ + return (n >= NUM_GPRS) ? NULL : gpr_names[n]; +} diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7bc0490..df6064a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -49,6 +49,7 @@ static int pipe_output = 0; static const char *output_name = "perf.data"; static int group = 0; static int realtime_prio = 0; +static bool nodelay = false; static bool raw_samples = false; static bool sample_id_all_avail = true; static bool system_wide = false; @@ -307,6 +308,11 @@ static void create_counter(struct perf_evsel *evsel, int cpu) attr->sample_type |= PERF_SAMPLE_CPU; } + if (nodelay) { + attr->watermark = 0; + attr->wakeup_events = 1; + } + attr->mmap = track; attr->comm = track; attr->inherit = !no_inherit; @@ -477,6 +483,7 @@ static void atexit_header(void) process_buildids(); perf_header__write(&session->header, output, true); perf_session__delete(session); + perf_evsel_list__delete(); symbol__exit(); } } @@ -842,6 +849,8 @@ const struct option record_options[] = { "record events on existing thread id"), OPT_INTEGER('r', "realtime", &realtime_prio, "collect data with this RT SCHED_FIFO priority"), + OPT_BOOLEAN('D', "no-delay", &nodelay, + "collect data without buffering"), OPT_BOOLEAN('R', "raw-samples", &raw_samples, "collect raw sample records from all opened counters"), OPT_BOOLEAN('a', "all-cpus", &system_wide, diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 7a4ebeb..29e7ffd8 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -489,7 +489,8 @@ static void create_tasks(void) err = pthread_attr_init(&attr); BUG_ON(err); - err = pthread_attr_setstacksize(&attr, (size_t)(16*1024)); + err = pthread_attr_setstacksize(&attr, + (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); BUG_ON(err); err = pthread_mutex_lock(&start_work_mutex); BUG_ON(err); @@ -1842,15 +1843,15 @@ static const char *record_args[] = { "-f", "-m", "1024", "-c", "1", - "-e", "sched:sched_switch:r", - "-e", "sched:sched_stat_wait:r", - "-e", "sched:sched_stat_sleep:r", - "-e", "sched:sched_stat_iowait:r", - "-e", "sched:sched_stat_runtime:r", - "-e", "sched:sched_process_exit:r", - "-e", "sched:sched_process_fork:r", - "-e", "sched:sched_wakeup:r", - "-e", "sched:sched_migrate_task:r", + "-e", "sched:sched_switch", + "-e", "sched:sched_stat_wait", + "-e", "sched:sched_stat_sleep", + "-e", "sched:sched_stat_iowait", + "-e", "sched:sched_stat_runtime", + "-e", "sched:sched_process_exit", + "-e", "sched:sched_process_fork", + "-e", "sched:sched_wakeup", + "-e", "sched:sched_migrate_task", }; static int __cmd_record(int argc, const char **argv) @@ -1861,7 +1862,7 @@ static int __cmd_record(int argc, const char **argv) rec_argc = ARRAY_SIZE(record_args) + argc - 1; rec_argv = calloc(rec_argc + 1, sizeof(char *)); - if (rec_argv) + if (rec_argv == NULL) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(record_args); i++) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 02b2d80..0ff11d9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -316,6 +316,8 @@ static int run_perf_stat(int argc __used, const char **argv) "\t Consider tweaking" " /proc/sys/kernel/perf_event_paranoid or running as root.", system_wide ? "system-wide " : ""); + } else if (errno == ENOENT) { + error("%s event is not supported. ", event_name(counter)); } else { error("open_counter returned with %d (%s). " "/bin/dmesg may provide additional information.\n", @@ -683,8 +685,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) nr_counters = ARRAY_SIZE(default_attrs); for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { - pos = perf_evsel__new(default_attrs[c].type, - default_attrs[c].config, + pos = perf_evsel__new(&default_attrs[c], nr_counters); if (pos == NULL) goto out; @@ -742,6 +743,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) out_free_fd: list_for_each_entry(pos, &evsel_list, node) perf_evsel__free_stat_priv(pos); + perf_evsel_list__delete(); out: thread_map__delete(threads); threads = NULL; diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 1c98434..ed56961 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -234,6 +234,7 @@ out: return err; } +#include "util/cpumap.h" #include "util/evsel.h" #include <sys/types.h> @@ -264,6 +265,7 @@ static int test__open_syscall_event(void) int err = -1, fd; struct thread_map *threads; struct perf_evsel *evsel; + struct perf_event_attr attr; unsigned int nr_open_calls = 111, i; int id = trace_event__id("sys_enter_open"); @@ -278,7 +280,10 @@ static int test__open_syscall_event(void) return -1; } - evsel = perf_evsel__new(PERF_TYPE_TRACEPOINT, id, 0); + memset(&attr, 0, sizeof(attr)); + attr.type = PERF_TYPE_TRACEPOINT; + attr.config = id; + evsel = perf_evsel__new(&attr, 0); if (evsel == NULL) { pr_debug("perf_evsel__new\n"); goto out_thread_map_delete; @@ -317,6 +322,111 @@ out_thread_map_delete: return err; } +#include <sched.h> + +static int test__open_syscall_event_on_all_cpus(void) +{ + int err = -1, fd, cpu; + struct thread_map *threads; + struct cpu_map *cpus; + struct perf_evsel *evsel; + struct perf_event_attr attr; + unsigned int nr_open_calls = 111, i; + cpu_set_t *cpu_set; + size_t cpu_set_size; + int id = trace_event__id("sys_enter_open"); + + if (id < 0) { + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + return -1; + } + + threads = thread_map__new(-1, getpid()); + if (threads == NULL) { + pr_debug("thread_map__new\n"); + return -1; + } + + cpus = cpu_map__new(NULL); + if (threads == NULL) { + pr_debug("thread_map__new\n"); + return -1; + } + + cpu_set = CPU_ALLOC(cpus->nr); + + if (cpu_set == NULL) + goto out_thread_map_delete; + + cpu_set_size = CPU_ALLOC_SIZE(cpus->nr); + CPU_ZERO_S(cpu_set_size, cpu_set); + + memset(&attr, 0, sizeof(attr)); + attr.type = PERF_TYPE_TRACEPOINT; + attr.config = id; + evsel = perf_evsel__new(&attr, 0); + if (evsel == NULL) { + pr_debug("perf_evsel__new\n"); + goto out_cpu_free; + } + + if (perf_evsel__open(evsel, cpus, threads) < 0) { + pr_debug("failed to open counter: %s, " + "tweak /proc/sys/kernel/perf_event_paranoid?\n", + strerror(errno)); + goto out_evsel_delete; + } + + for (cpu = 0; cpu < cpus->nr; ++cpu) { + unsigned int ncalls = nr_open_calls + cpu; + + CPU_SET(cpu, cpu_set); + sched_setaffinity(0, cpu_set_size, cpu_set); + for (i = 0; i < ncalls; ++i) { + fd = open("/etc/passwd", O_RDONLY); + close(fd); + } + CPU_CLR(cpu, cpu_set); + } + + /* + * Here we need to explicitely preallocate the counts, as if + * we use the auto allocation it will allocate just for 1 cpu, + * as we start by cpu 0. + */ + if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) { + pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr); + goto out_close_fd; + } + + for (cpu = 0; cpu < cpus->nr; ++cpu) { + unsigned int expected; + + if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) { + pr_debug("perf_evsel__open_read_on_cpu\n"); + goto out_close_fd; + } + + expected = nr_open_calls + cpu; + if (evsel->counts->cpu[cpu].val != expected) { + pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %Ld\n", + expected, cpu, evsel->counts->cpu[cpu].val); + goto out_close_fd; + } + } + + err = 0; +out_close_fd: + perf_evsel__close_fd(evsel, 1, threads->nr); +out_evsel_delete: + perf_evsel__delete(evsel); +out_cpu_free: + CPU_FREE(cpu_set); +out_thread_map_delete: + thread_map__delete(threads); + return err; +} + static struct test { const char *desc; int (*func)(void); @@ -330,6 +440,10 @@ static struct test { .func = test__open_syscall_event, }, { + .desc = "detect open syscall event on all cpus", + .func = test__open_syscall_event_on_all_cpus, + }, + { .func = NULL, }, }; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1e67ab9..05344c6 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1471,6 +1471,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) pos->attr.sample_period = default_interval; } + sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node); + symbol_conf.priv_size = (sizeof(struct sym_entry) + (nr_counters + 1) * sizeof(unsigned long)); @@ -1488,6 +1490,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) out_free_fd: list_for_each_entry(pos, &evsel_list, node) perf_evsel__free_mmap(pos); + perf_evsel_list__delete(); return status; } diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 5b1ecd6..595d0f4 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -286,8 +286,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) status = p->fn(argc, argv, prefix); exit_browser(status); - perf_evsel_list__delete(); - if (status) return status & 0xff; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c95267e..f5cfed6 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -6,14 +6,13 @@ #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) -struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx) +struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) { struct perf_evsel *evsel = zalloc(sizeof(*evsel)); if (evsel != NULL) { evsel->idx = idx; - evsel->attr.type = type; - evsel->attr.config = config; + evsel->attr = *attr; INIT_LIST_HEAD(&evsel->node); } @@ -128,59 +127,75 @@ int __perf_evsel__read(struct perf_evsel *evsel, return 0; } -int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus) +static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, + struct thread_map *threads) { - int cpu; + int cpu, thread; - if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, 1) < 0) + if (evsel->fd == NULL && + perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) return -1; for (cpu = 0; cpu < cpus->nr; cpu++) { - FD(evsel, cpu, 0) = sys_perf_event_open(&evsel->attr, -1, - cpus->map[cpu], -1, 0); - if (FD(evsel, cpu, 0) < 0) - goto out_close; + for (thread = 0; thread < threads->nr; thread++) { + FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, + threads->map[thread], + cpus->map[cpu], -1, 0); + if (FD(evsel, cpu, thread) < 0) + goto out_close; + } } return 0; out_close: - while (--cpu >= 0) { - close(FD(evsel, cpu, 0)); - FD(evsel, cpu, 0) = -1; - } + do { + while (--thread >= 0) { + close(FD(evsel, cpu, thread)); + FD(evsel, cpu, thread) = -1; + } + thread = threads->nr; + } while (--cpu >= 0); return -1; } -int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads) +static struct { + struct cpu_map map; + int cpus[1]; +} empty_cpu_map = { + .map.nr = 1, + .cpus = { -1, }, +}; + +static struct { + struct thread_map map; + int threads[1]; +} empty_thread_map = { + .map.nr = 1, + .threads = { -1, }, +}; + +int perf_evsel__open(struct perf_evsel *evsel, + struct cpu_map *cpus, struct thread_map *threads) { - int thread; - - if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, 1, threads->nr)) - return -1; - for (thread = 0; thread < threads->nr; thread++) { - FD(evsel, 0, thread) = sys_perf_event_open(&evsel->attr, - threads->map[thread], -1, -1, 0); - if (FD(evsel, 0, thread) < 0) - goto out_close; + if (cpus == NULL) { + /* Work around old compiler warnings about strict aliasing */ + cpus = &empty_cpu_map.map; } - return 0; + if (threads == NULL) + threads = &empty_thread_map.map; -out_close: - while (--thread >= 0) { - close(FD(evsel, 0, thread)); - FD(evsel, 0, thread) = -1; - } - return -1; + return __perf_evsel__open(evsel, cpus, threads); } -int perf_evsel__open(struct perf_evsel *evsel, - struct cpu_map *cpus, struct thread_map *threads) +int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus) { - if (threads == NULL) - return perf_evsel__open_per_cpu(evsel, cpus); + return __perf_evsel__open(evsel, cpus, &empty_thread_map.map); +} - return perf_evsel__open_per_thread(evsel, threads); +int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads) +{ + return __perf_evsel__open(evsel, &empty_cpu_map.map, threads); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index a0ccd69..b2d755f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -37,7 +37,7 @@ struct perf_evsel { struct cpu_map; struct thread_map; -struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx); +struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); void perf_evsel__delete(struct perf_evsel *evsel); int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 649083f..5cb6f4b 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -490,6 +490,31 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp, return EVT_HANDLED_ALL; } +static int store_event_type(const char *orgname) +{ + char filename[PATH_MAX], *c; + FILE *file; + int id, n; + + sprintf(filename, "%s/", debugfs_path); + strncat(filename, orgname, strlen(orgname)); + strcat(filename, "/id"); + + c = strchr(filename, ':'); + if (c) + *c = '/'; + + file = fopen(filename, "r"); + if (!file) + return 0; + n = fscanf(file, "%i", &id); + fclose(file); + if (n < 1) { + pr_err("cannot store event ID\n"); + return -EINVAL; + } + return perf_header__push_event(id, orgname); +} static enum event_result parse_tracepoint_event(const char **strp, struct perf_event_attr *attr) @@ -533,9 +558,13 @@ static enum event_result parse_tracepoint_event(const char **strp, *strp += strlen(sys_name) + evt_length; return parse_multiple_tracepoint_event(sys_name, evt_name, flags); - } else + } else { + if (store_event_type(evt_name) < 0) + return EVT_FAILED; + return parse_single_tracepoint_event(sys_name, evt_name, evt_length, attr, strp); + } } static enum event_result @@ -778,41 +807,11 @@ modifier: return ret; } -static int store_event_type(const char *orgname) -{ - char filename[PATH_MAX], *c; - FILE *file; - int id, n; - - sprintf(filename, "%s/", debugfs_path); - strncat(filename, orgname, strlen(orgname)); - strcat(filename, "/id"); - - c = strchr(filename, ':'); - if (c) - *c = '/'; - - file = fopen(filename, "r"); - if (!file) - return 0; - n = fscanf(file, "%i", &id); - fclose(file); - if (n < 1) { - pr_err("cannot store event ID\n"); - return -EINVAL; - } - return perf_header__push_event(id, orgname); -} - int parse_events(const struct option *opt __used, const char *str, int unset __used) { struct perf_event_attr attr; enum event_result ret; - if (strchr(str, ':')) - if (store_event_type(str) < 0) - return -1; - for (;;) { memset(&attr, 0, sizeof(attr)); ret = parse_event_symbols(&str, &attr); @@ -824,7 +823,7 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u if (ret != EVT_HANDLED_ALL) { struct perf_evsel *evsel; - evsel = perf_evsel__new(attr.type, attr.config, + evsel = perf_evsel__new(&attr, nr_counters); if (evsel == NULL) return -1; @@ -1014,8 +1013,15 @@ void print_events(void) int perf_evsel_list__create_default(void) { - struct perf_evsel *evsel = perf_evsel__new(PERF_TYPE_HARDWARE, - PERF_COUNT_HW_CPU_CYCLES, 0); + struct perf_evsel *evsel; + struct perf_event_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.type = PERF_TYPE_HARDWARE; + attr.config = PERF_COUNT_HW_CPU_CYCLES; + + evsel = perf_evsel__new(&attr, 0); + if (evsel == NULL) return -ENOMEM; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 6fb4694..313dac2 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1007,7 +1007,7 @@ more: if (size == 0) size = 8; - if (head + event->header.size >= mmap_size) { + if (head + event->header.size > mmap_size) { if (mmaps[map_idx]) { munmap(mmaps[map_idx], mmap_size); mmaps[map_idx] = NULL; diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile new file mode 100644 index 0000000..fd8e1f1 --- /dev/null +++ b/tools/power/x86/turbostat/Makefile @@ -0,0 +1,8 @@ +turbostat : turbostat.c + +clean : + rm -f turbostat + +install : + install turbostat /usr/bin/turbostat + install turbostat.8 /usr/share/man/man8 diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 new file mode 100644 index 0000000..ff75125 --- /dev/null +++ b/tools/power/x86/turbostat/turbostat.8 @@ -0,0 +1,172 @@ +.TH TURBOSTAT 8 +.SH NAME +turbostat \- Report processor frequency and idle statistics +.SH SYNOPSIS +.ft B +.B turbostat +.RB [ "\-v" ] +.RB [ "\-M MSR#" ] +.RB command +.br +.B turbostat +.RB [ "\-v" ] +.RB [ "\-M MSR#" ] +.RB [ "\-i interval_sec" ] +.SH DESCRIPTION +\fBturbostat \fP reports processor topology, frequency +and idle power state statistics on modern X86 processors. +Either \fBcommand\fP is forked and statistics are printed +upon its completion, or statistics are printed periodically. + +\fBturbostat \fP +requires that the processor +supports an "invariant" TSC, plus the APERF and MPERF MSRs. +\fBturbostat \fP will report idle cpu power state residency +on processors that additionally support C-state residency counters. + +.SS Options +The \fB-v\fP option increases verbosity. +.PP +The \fB-M MSR#\fP option dumps the specified MSR, +in addition to the usual frequency and idle statistics. +.PP +The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds. +The default is 5 seconds. +.PP +The \fBcommand\fP parameter forks \fBcommand\fP and upon its exit, +displays the statistics gathered since it was forked. +.PP +.SH FIELD DESCRIPTIONS +.nf +\fBpkg\fP processor package number. +\fBcore\fP processor core number. +\fBCPU\fP Linux CPU (logical processor) number. +\fB%c0\fP percent of the interval that the CPU retired instructions. +\fBGHz\fP average clock rate while the CPU was in c0 state. +\fBTSC\fP average GHz that the TSC ran during the entire interval. +\fB%c1, %c3, %c6\fP show the percentage residency in hardware core idle states. +\fB%pc3, %pc6\fP percentage residency in hardware package idle states. +.fi +.PP +.SH EXAMPLE +Without any parameters, turbostat prints out counters ever 5 seconds. +(override interval with "-i sec" option, or specify a command +for turbostat to fork). + +The first row of statistics reflect the average for the entire system. +Subsequent rows show per-CPU statistics. + +.nf +[root@x980]# ./turbostat +core CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 0.04 1.62 3.38 0.11 0.00 99.85 0.00 95.07 + 0 0 0.04 1.62 3.38 0.06 0.00 99.90 0.00 95.07 + 0 6 0.02 1.62 3.38 0.08 0.00 99.90 0.00 95.07 + 1 2 0.10 1.62 3.38 0.29 0.00 99.61 0.00 95.07 + 1 8 0.11 1.62 3.38 0.28 0.00 99.61 0.00 95.07 + 2 4 0.01 1.62 3.38 0.01 0.00 99.98 0.00 95.07 + 2 10 0.01 1.61 3.38 0.02 0.00 99.98 0.00 95.07 + 8 1 0.07 1.62 3.38 0.15 0.00 99.78 0.00 95.07 + 8 7 0.03 1.62 3.38 0.19 0.00 99.78 0.00 95.07 + 9 3 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07 + 9 9 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07 + 10 5 0.01 1.62 3.38 0.13 0.00 99.86 0.00 95.07 + 10 11 0.08 1.62 3.38 0.05 0.00 99.86 0.00 95.07 +.fi +.SH VERBOSE EXAMPLE +The "-v" option adds verbosity to the output: + +.nf +GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2) +12 * 133 = 1600 MHz max efficiency +25 * 133 = 3333 MHz TSC frequency +26 * 133 = 3467 MHz max turbo 4 active cores +26 * 133 = 3467 MHz max turbo 3 active cores +27 * 133 = 3600 MHz max turbo 2 active cores +27 * 133 = 3600 MHz max turbo 1 active cores + +.fi +The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency +available at the minimum package voltage. The \fBTSC frequency\fP is the nominal +maximum frequency of the processor if turbo-mode were not available. This frequency +should be sustainable on all CPUs indefinitely, given nominal power and cooling. +The remaining rows show what maximum turbo frequency is possible +depending on the number of idle cores. Note that this information is +not available on all processors. +.SH FORK EXAMPLE +If turbostat is invoked with a command, it will fork that command +and output the statistics gathered when the command exits. +eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds +until ^C while the other CPUs are mostly idle: + +.nf +[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null + +^Ccore CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 + 8.49 3.63 3.38 16.23 0.66 74.63 0.00 0.00 + 0 0 1.22 3.62 3.38 32.18 0.00 66.60 0.00 0.00 + 0 6 0.40 3.61 3.38 33.00 0.00 66.60 0.00 0.00 + 1 2 0.11 3.14 3.38 0.19 3.95 95.75 0.00 0.00 + 1 8 0.05 2.88 3.38 0.25 3.95 95.75 0.00 0.00 + 2 4 0.00 3.13 3.38 0.02 0.00 99.98 0.00 0.00 + 2 10 0.00 3.09 3.38 0.02 0.00 99.98 0.00 0.00 + 8 1 0.04 3.50 3.38 14.43 0.00 85.54 0.00 0.00 + 8 7 0.03 2.98 3.38 14.43 0.00 85.54 0.00 0.00 + 9 3 0.00 3.16 3.38 100.00 0.00 0.00 0.00 0.00 + 9 9 99.93 3.63 3.38 0.06 0.00 0.00 0.00 0.00 + 10 5 0.01 2.82 3.38 0.08 0.00 99.91 0.00 0.00 + 10 11 0.02 3.36 3.38 0.06 0.00 99.91 0.00 0.00 +6.950866 sec + +.fi +Above the cycle soaker drives cpu9 up 3.6 Ghz turbo limit +while the other processors are generally in various states of idle. + +Note that cpu3 is an HT sibling sharing core9 +with cpu9, and thus it is unable to get to an idle state +deeper than c1 while cpu9 is busy. + +Note that turbostat reports average GHz of 3.61, while +the arithmetic average of the GHz column above is 3.24. +This is a weighted average, where the weight is %c0. ie. it is the total number of +un-halted cycles elapsed per time divided by the number of CPUs. +.SH NOTES + +.B "turbostat " +must be run as root. + +.B "turbostat " +reads hardware counters, but doesn't write them. +So it will not interfere with the OS or other programs, including +multiple invocations of itself. + +\fBturbostat \fP +may work poorly on Linux-2.6.20 through 2.6.29, +as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF +in those kernels. + +The APERF, MPERF MSRs are defined to count non-halted cycles. +Although it is not guaranteed by the architecture, turbostat assumes +that they count at TSC rate, which is true on all processors tested to date. + +.SH REFERENCES +"IntelÂŽ Turbo Boost Technology +in IntelÂŽ Core⢠Microarchitecture (Nehalem) Based Processors" +http://download.intel.com/design/processor/applnots/320354.pdf + +"IntelÂŽ 64 and IA-32 Architectures Software Developer's Manual +Volume 3B: System Programming Guide" +http://www.intel.com/products/processor/manuals/ + +.SH FILES +.ta +.nf +/dev/cpu/*/msr +.fi + +.SH "SEE ALSO" +msr(4), vmstat(8) +.PP +.SH AUTHORS +.nf +Written by Len Brown <len.brown@intel.com> diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c new file mode 100644 index 0000000..4c6983d --- /dev/null +++ b/tools/power/x86/turbostat/turbostat.c @@ -0,0 +1,1048 @@ +/* + * turbostat -- show CPU frequency and C-state residency + * on modern Intel turbo-capable processors. + * + * Copyright (c) 2010, Intel Corporation. + * Len Brown <len.brown@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/time.h> +#include <stdlib.h> +#include <dirent.h> +#include <string.h> +#include <ctype.h> + +#define MSR_TSC 0x10 +#define MSR_NEHALEM_PLATFORM_INFO 0xCE +#define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD +#define MSR_APERF 0xE8 +#define MSR_MPERF 0xE7 +#define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */ +#define MSR_PKG_C3_RESIDENCY 0x3F8 +#define MSR_PKG_C6_RESIDENCY 0x3F9 +#define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */ +#define MSR_CORE_C3_RESIDENCY 0x3FC +#define MSR_CORE_C6_RESIDENCY 0x3FD +#define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */ + +char *proc_stat = "/proc/stat"; +unsigned int interval_sec = 5; /* set with -i interval_sec */ +unsigned int verbose; /* set with -v */ +unsigned int skip_c0; +unsigned int skip_c1; +unsigned int do_nhm_cstates; +unsigned int do_snb_cstates; +unsigned int has_aperf; +unsigned int units = 1000000000; /* Ghz etc */ +unsigned int genuine_intel; +unsigned int has_invariant_tsc; +unsigned int do_nehalem_platform_info; +unsigned int do_nehalem_turbo_ratio_limit; +unsigned int extra_msr_offset; +double bclk; +unsigned int show_pkg; +unsigned int show_core; +unsigned int show_cpu; + +int aperf_mperf_unstable; +int backwards_count; +char *progname; +int need_reinitialize; + +int num_cpus; + +typedef struct per_cpu_counters { + unsigned long long tsc; /* per thread */ + unsigned long long aperf; /* per thread */ + unsigned long long mperf; /* per thread */ + unsigned long long c1; /* per thread (calculated) */ + unsigned long long c3; /* per core */ + unsigned long long c6; /* per core */ + unsigned long long c7; /* per core */ + unsigned long long pc2; /* per package */ + unsigned long long pc3; /* per package */ + unsigned long long pc6; /* per package */ + unsigned long long pc7; /* per package */ + unsigned long long extra_msr; /* per thread */ + int pkg; + int core; + int cpu; + struct per_cpu_counters *next; +} PCC; + +PCC *pcc_even; +PCC *pcc_odd; +PCC *pcc_delta; +PCC *pcc_average; +struct timeval tv_even; +struct timeval tv_odd; +struct timeval tv_delta; + +unsigned long long get_msr(int cpu, off_t offset) +{ + ssize_t retval; + unsigned long long msr; + char pathname[32]; + int fd; + + sprintf(pathname, "/dev/cpu/%d/msr", cpu); + fd = open(pathname, O_RDONLY); + if (fd < 0) { + perror(pathname); + need_reinitialize = 1; + return 0; + } + + retval = pread(fd, &msr, sizeof msr, offset); + if (retval != sizeof msr) { + fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n", + cpu, offset, retval); + exit(-2); + } + + close(fd); + return msr; +} + +void print_header() +{ + if (show_pkg) + fprintf(stderr, "pkg "); + if (show_core) + fprintf(stderr, "core"); + if (show_cpu) + fprintf(stderr, " CPU"); + if (do_nhm_cstates) + fprintf(stderr, " %%c0 "); + if (has_aperf) + fprintf(stderr, " GHz"); + fprintf(stderr, " TSC"); + if (do_nhm_cstates) + fprintf(stderr, " %%c1 "); + if (do_nhm_cstates) + fprintf(stderr, " %%c3 "); + if (do_nhm_cstates) + fprintf(stderr, " %%c6 "); + if (do_snb_cstates) + fprintf(stderr, " %%c7 "); + if (do_snb_cstates) + fprintf(stderr, " %%pc2 "); + if (do_nhm_cstates) + fprintf(stderr, " %%pc3 "); + if (do_nhm_cstates) + fprintf(stderr, " %%pc6 "); + if (do_snb_cstates) + fprintf(stderr, " %%pc7 "); + if (extra_msr_offset) + fprintf(stderr, " MSR 0x%x ", extra_msr_offset); + + putc('\n', stderr); +} + +void dump_pcc(PCC *pcc) +{ + fprintf(stderr, "package: %d ", pcc->pkg); + fprintf(stderr, "core:: %d ", pcc->core); + fprintf(stderr, "CPU: %d ", pcc->cpu); + fprintf(stderr, "TSC: %016llX\n", pcc->tsc); + fprintf(stderr, "c3: %016llX\n", pcc->c3); + fprintf(stderr, "c6: %016llX\n", pcc->c6); + fprintf(stderr, "c7: %016llX\n", pcc->c7); + fprintf(stderr, "aperf: %016llX\n", pcc->aperf); + fprintf(stderr, "pc2: %016llX\n", pcc->pc2); + fprintf(stderr, "pc3: %016llX\n", pcc->pc3); + fprintf(stderr, "pc6: %016llX\n", pcc->pc6); + fprintf(stderr, "pc7: %016llX\n", pcc->pc7); + fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, pcc->extra_msr); +} + +void dump_list(PCC *pcc) +{ + printf("dump_list 0x%p\n", pcc); + + for (; pcc; pcc = pcc->next) + dump_pcc(pcc); +} + +void print_pcc(PCC *p) +{ + double interval_float; + + interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; + + /* topology columns, print blanks on 1st (average) line */ + if (p == pcc_average) { + if (show_pkg) + fprintf(stderr, " "); + if (show_core) + fprintf(stderr, " "); + if (show_cpu) + fprintf(stderr, " "); + } else { + if (show_pkg) + fprintf(stderr, "%4d", p->pkg); + if (show_core) + fprintf(stderr, "%4d", p->core); + if (show_cpu) + fprintf(stderr, "%4d", p->cpu); + } + + /* %c0 */ + if (do_nhm_cstates) { + if (!skip_c0) + fprintf(stderr, "%7.2f", 100.0 * p->mperf/p->tsc); + else + fprintf(stderr, " ****"); + } + + /* GHz */ + if (has_aperf) { + if (!aperf_mperf_unstable) { + fprintf(stderr, "%5.2f", + 1.0 * p->tsc / units * p->aperf / + p->mperf / interval_float); + } else { + if (p->aperf > p->tsc || p->mperf > p->tsc) { + fprintf(stderr, " ****"); + } else { + fprintf(stderr, "%4.1f*", + 1.0 * p->tsc / + units * p->aperf / + p->mperf / interval_float); + } + } + } + + /* TSC */ + fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float); + + if (do_nhm_cstates) { + if (!skip_c1) + fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc); + else + fprintf(stderr, " ****"); + } + if (do_nhm_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc); + if (do_nhm_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc); + if (do_snb_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc); + if (do_snb_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc); + if (do_nhm_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc); + if (do_nhm_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc); + if (do_snb_cstates) + fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc); + if (extra_msr_offset) + fprintf(stderr, " 0x%016llx", p->extra_msr); + putc('\n', stderr); +} + +void print_counters(PCC *cnt) +{ + PCC *pcc; + + print_header(); + + if (num_cpus > 1) + print_pcc(pcc_average); + + for (pcc = cnt; pcc != NULL; pcc = pcc->next) + print_pcc(pcc); + +} + +#define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after)) + + +int compute_delta(PCC *after, PCC *before, PCC *delta) +{ + int errors = 0; + int perf_err = 0; + + skip_c0 = skip_c1 = 0; + + for ( ; after && before && delta; + after = after->next, before = before->next, delta = delta->next) { + if (before->cpu != after->cpu) { + printf("cpu configuration changed: %d != %d\n", + before->cpu, after->cpu); + return -1; + } + + if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) { + fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n", + before->cpu, before->tsc, after->tsc); + errors++; + } + /* check for TSC < 1 Mcycles over interval */ + if (delta->tsc < (1000 * 1000)) { + fprintf(stderr, "Insanely slow TSC rate," + " TSC stops in idle?\n"); + fprintf(stderr, "You can disable all c-states" + " by booting with \"idle=poll\"\n"); + fprintf(stderr, "or just the deep ones with" + " \"processor.max_cstate=1\"\n"); + exit(-3); + } + if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) { + fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n", + before->cpu, before->c3, after->c3); + errors++; + } + if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) { + fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n", + before->cpu, before->c6, after->c6); + errors++; + } + if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) { + fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n", + before->cpu, before->c7, after->c7); + errors++; + } + if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) { + fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n", + before->cpu, before->pc2, after->pc2); + errors++; + } + if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) { + fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n", + before->cpu, before->pc3, after->pc3); + errors++; + } + if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) { + fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n", + before->cpu, before->pc6, after->pc6); + errors++; + } + if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) { + fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n", + before->cpu, before->pc7, after->pc7); + errors++; + } + + perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf); + if (perf_err) { + fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n", + before->cpu, before->aperf, after->aperf); + } + perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf); + if (perf_err) { + fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n", + before->cpu, before->mperf, after->mperf); + } + if (perf_err) { + if (!aperf_mperf_unstable) { + fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); + fprintf(stderr, "* Frequency results do not cover entire interval *\n"); + fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); + + aperf_mperf_unstable = 1; + } + /* + * mperf delta is likely a huge "positive" number + * can not use it for calculating c0 time + */ + skip_c0 = 1; + skip_c1 = 1; + } + + /* + * As mperf and tsc collection are not atomic, + * it is possible for mperf's non-halted cycles + * to exceed TSC's all cycles: show c1 = 0% in that case. + */ + if (delta->mperf > delta->tsc) + delta->c1 = 0; + else /* normal case, derive c1 */ + delta->c1 = delta->tsc - delta->mperf + - delta->c3 - delta->c6 - delta->c7; + + if (delta->mperf == 0) + delta->mperf = 1; /* divide by 0 protection */ + + /* + * for "extra msr", just copy the latest w/o subtracting + */ + delta->extra_msr = after->extra_msr; + if (errors) { + fprintf(stderr, "ERROR cpu%d before:\n", before->cpu); + dump_pcc(before); + fprintf(stderr, "ERROR cpu%d after:\n", before->cpu); + dump_pcc(after); + errors = 0; + } + } + return 0; +} + +void compute_average(PCC *delta, PCC *avg) +{ + PCC *sum; + + sum = calloc(1, sizeof(PCC)); + if (sum == NULL) { + perror("calloc sum"); + exit(1); + } + + for (; delta; delta = delta->next) { + sum->tsc += delta->tsc; + sum->c1 += delta->c1; + sum->c3 += delta->c3; + sum->c6 += delta->c6; + sum->c7 += delta->c7; + sum->aperf += delta->aperf; + sum->mperf += delta->mperf; + sum->pc2 += delta->pc2; + sum->pc3 += delta->pc3; + sum->pc6 += delta->pc6; + sum->pc7 += delta->pc7; + } + avg->tsc = sum->tsc/num_cpus; + avg->c1 = sum->c1/num_cpus; + avg->c3 = sum->c3/num_cpus; + avg->c6 = sum->c6/num_cpus; + avg->c7 = sum->c7/num_cpus; + avg->aperf = sum->aperf/num_cpus; + avg->mperf = sum->mperf/num_cpus; + avg->pc2 = sum->pc2/num_cpus; + avg->pc3 = sum->pc3/num_cpus; + avg->pc6 = sum->pc6/num_cpus; + avg->pc7 = sum->pc7/num_cpus; + + free(sum); +} + +void get_counters(PCC *pcc) +{ + for ( ; pcc; pcc = pcc->next) { + pcc->tsc = get_msr(pcc->cpu, MSR_TSC); + if (do_nhm_cstates) + pcc->c3 = get_msr(pcc->cpu, MSR_CORE_C3_RESIDENCY); + if (do_nhm_cstates) + pcc->c6 = get_msr(pcc->cpu, MSR_CORE_C6_RESIDENCY); + if (do_snb_cstates) + pcc->c7 = get_msr(pcc->cpu, MSR_CORE_C7_RESIDENCY); + if (has_aperf) + pcc->aperf = get_msr(pcc->cpu, MSR_APERF); + if (has_aperf) + pcc->mperf = get_msr(pcc->cpu, MSR_MPERF); + if (do_snb_cstates) + pcc->pc2 = get_msr(pcc->cpu, MSR_PKG_C2_RESIDENCY); + if (do_nhm_cstates) + pcc->pc3 = get_msr(pcc->cpu, MSR_PKG_C3_RESIDENCY); + if (do_nhm_cstates) + pcc->pc6 = get_msr(pcc->cpu, MSR_PKG_C6_RESIDENCY); + if (do_snb_cstates) + pcc->pc7 = get_msr(pcc->cpu, MSR_PKG_C7_RESIDENCY); + if (extra_msr_offset) + pcc->extra_msr = get_msr(pcc->cpu, extra_msr_offset); + } +} + + +void print_nehalem_info() +{ + unsigned long long msr; + unsigned int ratio; + + if (!do_nehalem_platform_info) + return; + + msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO); + + ratio = (msr >> 40) & 0xFF; + fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0xFF; + fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", + ratio, bclk, ratio * bclk); + + if (verbose > 1) + fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); + + if (!do_nehalem_turbo_ratio_limit) + return; + + msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT); + + ratio = (msr >> 24) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 16) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 0) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", + ratio, bclk, ratio * bclk); + +} + +void free_counter_list(PCC *list) +{ + PCC *p; + + for (p = list; p; ) { + PCC *free_me; + + free_me = p; + p = p->next; + free(free_me); + } + return; +} + +void free_all_counters(void) +{ + free_counter_list(pcc_even); + pcc_even = NULL; + + free_counter_list(pcc_odd); + pcc_odd = NULL; + + free_counter_list(pcc_delta); + pcc_delta = NULL; + + free_counter_list(pcc_average); + pcc_average = NULL; +} + +void insert_cpu_counters(PCC **list, PCC *new) +{ + PCC *prev; + + /* + * list was empty + */ + if (*list == NULL) { + new->next = *list; + *list = new; + return; + } + + show_cpu = 1; /* there is more than one CPU */ + + /* + * insert on front of list. + * It is sorted by ascending package#, core#, cpu# + */ + if (((*list)->pkg > new->pkg) || + (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) || + (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) { + new->next = *list; + *list = new; + return; + } + + prev = *list; + + while (prev->next && (prev->next->pkg < new->pkg)) { + prev = prev->next; + show_pkg = 1; /* there is more than 1 package */ + } + + while (prev->next && (prev->next->pkg == new->pkg) + && (prev->next->core < new->core)) { + prev = prev->next; + show_core = 1; /* there is more than 1 core */ + } + + while (prev->next && (prev->next->pkg == new->pkg) + && (prev->next->core == new->core) + && (prev->next->cpu < new->cpu)) { + prev = prev->next; + } + + /* + * insert after "prev" + */ + new->next = prev->next; + prev->next = new; + + return; +} + +void alloc_new_cpu_counters(int pkg, int core, int cpu) +{ + PCC *new; + + if (verbose > 1) + printf("pkg%d core%d, cpu%d\n", pkg, core, cpu); + + new = (PCC *)calloc(1, sizeof(PCC)); + if (new == NULL) { + perror("calloc"); + exit(1); + } + new->pkg = pkg; + new->core = core; + new->cpu = cpu; + insert_cpu_counters(&pcc_odd, new); + + new = (PCC *)calloc(1, sizeof(PCC)); + if (new == NULL) { + perror("calloc"); + exit(1); + } + new->pkg = pkg; + new->core = core; + new->cpu = cpu; + insert_cpu_counters(&pcc_even, new); + + new = (PCC *)calloc(1, sizeof(PCC)); + if (new == NULL) { + perror("calloc"); + exit(1); + } + new->pkg = pkg; + new->core = core; + new->cpu = cpu; + insert_cpu_counters(&pcc_delta, new); + + new = (PCC *)calloc(1, sizeof(PCC)); + if (new == NULL) { + perror("calloc"); + exit(1); + } + new->pkg = pkg; + new->core = core; + new->cpu = cpu; + pcc_average = new; +} + +int get_physical_package_id(int cpu) +{ + char path[64]; + FILE *filep; + int pkg; + + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); + filep = fopen(path, "r"); + if (filep == NULL) { + perror(path); + exit(1); + } + fscanf(filep, "%d", &pkg); + fclose(filep); + return pkg; +} + +int get_core_id(int cpu) +{ + char path[64]; + FILE *filep; + int core; + + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); + filep = fopen(path, "r"); + if (filep == NULL) { + perror(path); + exit(1); + } + fscanf(filep, "%d", &core); + fclose(filep); + return core; +} + +/* + * run func(index, cpu) on every cpu in /proc/stat + */ + +int for_all_cpus(void (func)(int, int, int)) +{ + FILE *fp; + int cpu_count; + int retval; + + fp = fopen(proc_stat, "r"); + if (fp == NULL) { + perror(proc_stat); + exit(1); + } + + retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); + if (retval != 0) { + perror("/proc/stat format"); + exit(1); + } + + for (cpu_count = 0; ; cpu_count++) { + int cpu; + + retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu); + if (retval != 1) + break; + + func(get_physical_package_id(cpu), get_core_id(cpu), cpu); + } + fclose(fp); + return cpu_count; +} + +void re_initialize(void) +{ + printf("turbostat: topology changed, re-initializing.\n"); + free_all_counters(); + num_cpus = for_all_cpus(alloc_new_cpu_counters); + need_reinitialize = 0; + printf("num_cpus is now %d\n", num_cpus); +} + +void dummy(int pkg, int core, int cpu) { return; } +/* + * check to see if a cpu came on-line + */ +void verify_num_cpus() +{ + int new_num_cpus; + + new_num_cpus = for_all_cpus(dummy); + + if (new_num_cpus != num_cpus) { + if (verbose) + printf("num_cpus was %d, is now %d\n", + num_cpus, new_num_cpus); + need_reinitialize = 1; + } + + return; +} + +void turbostat_loop() +{ +restart: + get_counters(pcc_even); + gettimeofday(&tv_even, (struct timezone *)NULL); + + while (1) { + verify_num_cpus(); + if (need_reinitialize) { + re_initialize(); + goto restart; + } + sleep(interval_sec); + get_counters(pcc_odd); + gettimeofday(&tv_odd, (struct timezone *)NULL); + + compute_delta(pcc_odd, pcc_even, pcc_delta); + timersub(&tv_odd, &tv_even, &tv_delta); + compute_average(pcc_delta, pcc_average); + print_counters(pcc_delta); + if (need_reinitialize) { + re_initialize(); + goto restart; + } + sleep(interval_sec); + get_counters(pcc_even); + gettimeofday(&tv_even, (struct timezone *)NULL); + compute_delta(pcc_even, pcc_odd, pcc_delta); + timersub(&tv_even, &tv_odd, &tv_delta); + compute_average(pcc_delta, pcc_average); + print_counters(pcc_delta); + } +} + +void check_dev_msr() +{ + struct stat sb; + + if (stat("/dev/cpu/0/msr", &sb)) { + fprintf(stderr, "no /dev/cpu/0/msr\n"); + fprintf(stderr, "Try \"# modprobe msr\"\n"); + exit(-5); + } +} + +void check_super_user() +{ + if (getuid() != 0) { + fprintf(stderr, "must be root\n"); + exit(-6); + } +} + +int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ + case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ + case 0x1F: /* Core i7 and i5 Processor - Nehalem */ + case 0x25: /* Westmere Client - Clarkdale, Arrandale */ + case 0x2C: /* Westmere EP - Gulftown */ + case 0x2A: /* SNB */ + case 0x2D: /* SNB Xeon */ + return 1; + case 0x2E: /* Nehalem-EX Xeon - Beckton */ + case 0x2F: /* Westmere-EX Xeon - Eagleton */ + default: + return 0; + } +} + +int is_snb(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case 0x2A: + case 0x2D: + return 1; + } + return 0; +} + +double discover_bclk(unsigned int family, unsigned int model) +{ + if (is_snb(family, model)) + return 100.00; + else + return 133.33; +} + +void check_cpuid() +{ + unsigned int eax, ebx, ecx, edx, max_level; + unsigned int fms, family, model, stepping; + + eax = ebx = ecx = edx = 0; + + asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0)); + + if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) + genuine_intel = 1; + + if (verbose) + fprintf(stderr, "%.4s%.4s%.4s ", + (char *)&ebx, (char *)&edx, (char *)&ecx); + + asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); + family = (fms >> 8) & 0xf; + model = (fms >> 4) & 0xf; + stepping = fms & 0xf; + if (family == 6 || family == 0xf) + model += ((fms >> 16) & 0xf) << 4; + + if (verbose) + fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", + max_level, family, model, stepping, family, model, stepping); + + if (!(edx & (1 << 5))) { + fprintf(stderr, "CPUID: no MSR\n"); + exit(1); + } + + /* + * check max extended function levels of CPUID. + * This is needed to check for invariant TSC. + * This check is valid for both Intel and AMD. + */ + ebx = ecx = edx = 0; + asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000)); + + if (max_level < 0x80000007) { + fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level); + exit(1); + } + + /* + * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 + * this check is valid for both Intel and AMD + */ + asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007)); + has_invariant_tsc = edx && (1 << 8); + + if (!has_invariant_tsc) { + fprintf(stderr, "No invariant TSC\n"); + exit(1); + } + + /* + * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 + * this check is valid for both Intel and AMD + */ + + asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); + has_aperf = ecx && (1 << 0); + if (!has_aperf) { + fprintf(stderr, "No APERF MSR\n"); + exit(1); + } + + do_nehalem_platform_info = genuine_intel && has_invariant_tsc; + do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ + do_snb_cstates = is_snb(family, model); + bclk = discover_bclk(family, model); + + do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); +} + + +void usage() +{ + fprintf(stderr, "%s: [-v] [-M MSR#] [-i interval_sec | command ...]\n", + progname); + exit(1); +} + + +/* + * in /dev/cpu/ return success for names that are numbers + * ie. filter out ".", "..", "microcode". + */ +int dir_filter(const struct dirent *dirp) +{ + if (isdigit(dirp->d_name[0])) + return 1; + else + return 0; +} + +int open_dev_cpu_msr(int dummy1) +{ + return 0; +} + +void turbostat_init() +{ + check_cpuid(); + + check_dev_msr(); + check_super_user(); + + num_cpus = for_all_cpus(alloc_new_cpu_counters); + + if (verbose) + print_nehalem_info(); +} + +int fork_it(char **argv) +{ + int retval; + pid_t child_pid; + get_counters(pcc_even); + gettimeofday(&tv_even, (struct timezone *)NULL); + + child_pid = fork(); + if (!child_pid) { + /* child */ + execvp(argv[0], argv); + } else { + int status; + + /* parent */ + if (child_pid == -1) { + perror("fork"); + exit(1); + } + + signal(SIGINT, SIG_IGN); + signal(SIGQUIT, SIG_IGN); + if (waitpid(child_pid, &status, 0) == -1) { + perror("wait"); + exit(1); + } + } + get_counters(pcc_odd); + gettimeofday(&tv_odd, (struct timezone *)NULL); + retval = compute_delta(pcc_odd, pcc_even, pcc_delta); + + timersub(&tv_odd, &tv_even, &tv_delta); + compute_average(pcc_delta, pcc_average); + if (!retval) + print_counters(pcc_delta); + + fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);; + + return 0; +} + +void cmdline(int argc, char **argv) +{ + int opt; + + progname = argv[0]; + + while ((opt = getopt(argc, argv, "+vi:M:")) != -1) { + switch (opt) { + case 'v': + verbose++; + break; + case 'i': + interval_sec = atoi(optarg); + break; + case 'M': + sscanf(optarg, "%x", &extra_msr_offset); + if (verbose > 1) + fprintf(stderr, "MSR 0x%X\n", extra_msr_offset); + break; + default: + usage(); + } + } +} + +int main(int argc, char **argv) +{ + cmdline(argc, argv); + + if (verbose > 1) + fprintf(stderr, "turbostat Dec 6, 2010" + " - Len Brown <lenb@kernel.org>\n"); + if (verbose > 1) + fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n"); + + turbostat_init(); + + /* + * if any params left, it must be a command to fork + */ + if (argc - optind) + return fork_it(argv + optind); + else + turbostat_loop(); + + return 0; +} diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile new file mode 100644 index 0000000..f458237 --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -0,0 +1,8 @@ +x86_energy_perf_policy : x86_energy_perf_policy.c + +clean : + rm -f x86_energy_perf_policy + +install : + install x86_energy_perf_policy /usr/bin/ + install x86_energy_perf_policy.8 /usr/share/man/man8/ diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 new file mode 100644 index 0000000..8eaaad6 --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 @@ -0,0 +1,104 @@ +.\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com> +.\" Distributed under the GPL, Copyleft 1994. +.TH X86_ENERGY_PERF_POLICY 8 +.SH NAME +x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS +.SH SYNOPSIS +.ft B +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB "\-r" +.br +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB 'performance' +.br +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB 'normal' +.br +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB 'powersave' +.br +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB n +.br +.SH DESCRIPTION +\fBx86_energy_perf_policy\fP +allows software to convey +its policy for the relative importance of performance +versus energy savings to the processor. + +The processor uses this information in model-specific ways +when it must select trade-offs between performance and +energy efficiency. + +This policy hint does not supersede Processor Performance states +(P-states) or CPU Idle power states (C-states), but allows +software to have influence where it would otherwise be unable +to express a preference. + +For example, this setting may tell the hardware how +aggressively or conservatively to control frequency +in the "turbo range" above the explicitly OS-controlled +P-state frequency range. It may also tell the hardware +how aggressively is should enter the OS requested C-states. + +Support for this feature is indicated by CPUID.06H.ECX.bit3 +per the Intel Architectures Software Developer's Manual. + +.SS Options +\fB-c\fP limits operation to a single CPU. +The default is to operate on all CPUs. +Note that MSR_IA32_ENERGY_PERF_BIAS is defined per +logical processor, but that the initial implementations +of the MSR were shared among all processors in each package. +.PP +\fB-v\fP increases verbosity. By default +x86_energy_perf_policy is silent. +.PP +\fB-r\fP is for "read-only" mode - the unchanged state +is read and displayed. +.PP +.I performance +Set a policy where performance is paramount. +The processor will be unwilling to sacrifice any performance +for the sake of energy saving. This is the hardware default. +.PP +.I normal +Set a policy with a normal balance between performance and energy efficiency. +The processor will tolerate minor performance compromise +for potentially significant energy savings. +This reasonable default for most desktops and servers. +.PP +.I powersave +Set a policy where the processor can accept +a measurable performance hit to maximize energy efficiency. +.PP +.I n +Set MSR_IA32_ENERGY_PERF_BIAS to the specified number. +The range of valid numbers is 0-15, where 0 is maximum +performance and 15 is maximum energy efficiency. + +.SH NOTES +.B "x86_energy_perf_policy " +runs only as root. +.SH FILES +.ta +.nf +/dev/cpu/*/msr +.fi + +.SH "SEE ALSO" +msr(4) +.PP +.SH AUTHORS +.nf +Written by Len Brown <len.brown@intel.com> diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c new file mode 100644 index 0000000..d9678a3 --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -0,0 +1,325 @@ +/* + * x86_energy_perf_policy -- set the energy versus performance + * policy preference bias on recent X86 processors. + */ +/* + * Copyright (c) 2010, Intel Corporation. + * Len Brown <len.brown@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/time.h> +#include <stdlib.h> +#include <string.h> + +unsigned int verbose; /* set with -v */ +unsigned int read_only; /* set with -r */ +char *progname; +unsigned long long new_bias; +int cpu = -1; + +/* + * Usage: + * + * -c cpu: limit action to a single CPU (default is all CPUs) + * -v: verbose output (can invoke more than once) + * -r: read-only, don't change any settings + * + * performance + * Performance is paramount. + * Unwilling to sacrafice any performance + * for the sake of energy saving. (hardware default) + * + * normal + * Can tolerate minor performance compromise + * for potentially significant energy savings. + * (reasonable default for most desktops and servers) + * + * powersave + * Can tolerate significant performance hit + * to maximize energy savings. + * + * n + * a numerical value to write to the underlying MSR. + */ +void usage(void) +{ + printf("%s: [-c cpu] [-v] " + "(-r | 'performance' | 'normal' | 'powersave' | n)\n", + progname); + exit(1); +} + +#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 + +#define BIAS_PERFORMANCE 0 +#define BIAS_BALANCE 6 +#define BIAS_POWERSAVE 15 + +void cmdline(int argc, char **argv) +{ + int opt; + + progname = argv[0]; + + while ((opt = getopt(argc, argv, "+rvc:")) != -1) { + switch (opt) { + case 'c': + cpu = atoi(optarg); + break; + case 'r': + read_only = 1; + break; + case 'v': + verbose++; + break; + default: + usage(); + } + } + /* if -r, then should be no additional optind */ + if (read_only && (argc > optind)) + usage(); + + /* + * if no -r , then must be one additional optind + */ + if (!read_only) { + + if (argc != optind + 1) { + printf("must supply -r or policy param\n"); + usage(); + } + + if (!strcmp("performance", argv[optind])) { + new_bias = BIAS_PERFORMANCE; + } else if (!strcmp("normal", argv[optind])) { + new_bias = BIAS_BALANCE; + } else if (!strcmp("powersave", argv[optind])) { + new_bias = BIAS_POWERSAVE; + } else { + char *endptr; + + new_bias = strtoull(argv[optind], &endptr, 0); + if (endptr == argv[optind] || + new_bias > BIAS_POWERSAVE) { + fprintf(stderr, "invalid value: %s\n", + argv[optind]); + usage(); + } + } + } +} + +/* + * validate_cpuid() + * returns on success, quietly exits on failure (make verbose with -v) + */ +void validate_cpuid(void) +{ + unsigned int eax, ebx, ecx, edx, max_level; + char brand[16]; + unsigned int fms, family, model, stepping; + + eax = ebx = ecx = edx = 0; + + asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), + "=d" (edx) : "a" (0)); + + if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) { + if (verbose) + fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel", + (char *)&ebx, (char *)&edx, (char *)&ecx); + exit(1); + } + + asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); + family = (fms >> 8) & 0xf; + model = (fms >> 4) & 0xf; + stepping = fms & 0xf; + if (family == 6 || family == 0xf) + model += ((fms >> 16) & 0xf) << 4; + + if (verbose > 1) + printf("CPUID %s %d levels family:model:stepping " + "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level, + family, model, stepping, family, model, stepping); + + if (!(edx & (1 << 5))) { + if (verbose) + printf("CPUID: no MSR\n"); + exit(1); + } + + /* + * Support for MSR_IA32_ENERGY_PERF_BIAS + * is indicated by CPUID.06H.ECX.bit3 + */ + asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6)); + if (verbose) + printf("CPUID.06H.ECX: 0x%x\n", ecx); + if (!(ecx & (1 << 3))) { + if (verbose) + printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n"); + exit(1); + } + return; /* success */ +} + +unsigned long long get_msr(int cpu, int offset) +{ + unsigned long long msr; + char msr_path[32]; + int retval; + int fd; + + sprintf(msr_path, "/dev/cpu/%d/msr", cpu); + fd = open(msr_path, O_RDONLY); + if (fd < 0) { + printf("Try \"# modprobe msr\"\n"); + perror(msr_path); + exit(1); + } + + retval = pread(fd, &msr, sizeof msr, offset); + + if (retval != sizeof msr) { + printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); + exit(-2); + } + close(fd); + return msr; +} + +unsigned long long put_msr(int cpu, unsigned long long new_msr, int offset) +{ + unsigned long long old_msr; + char msr_path[32]; + int retval; + int fd; + + sprintf(msr_path, "/dev/cpu/%d/msr", cpu); + fd = open(msr_path, O_RDWR); + if (fd < 0) { + perror(msr_path); + exit(1); + } + + retval = pread(fd, &old_msr, sizeof old_msr, offset); + if (retval != sizeof old_msr) { + perror("pwrite"); + printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); + exit(-2); + } + + retval = pwrite(fd, &new_msr, sizeof new_msr, offset); + if (retval != sizeof new_msr) { + perror("pwrite"); + printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval); + exit(-2); + } + + close(fd); + + return old_msr; +} + +void print_msr(int cpu) +{ + printf("cpu%d: 0x%016llx\n", + cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS)); +} + +void update_msr(int cpu) +{ + unsigned long long previous_msr; + + previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS); + + if (verbose) + printf("cpu%d msr0x%x 0x%016llx -> 0x%016llx\n", + cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias); + + return; +} + +char *proc_stat = "/proc/stat"; +/* + * run func() on every cpu in /dev/cpu + */ +void for_every_cpu(void (func)(int)) +{ + FILE *fp; + int retval; + + fp = fopen(proc_stat, "r"); + if (fp == NULL) { + perror(proc_stat); + exit(1); + } + + retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); + if (retval != 0) { + perror("/proc/stat format"); + exit(1); + } + + while (1) { + int cpu; + + retval = fscanf(fp, + "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", + &cpu); + if (retval != 1) + return; + + func(cpu); + } + fclose(fp); +} + +int main(int argc, char **argv) +{ + cmdline(argc, argv); + + if (verbose > 1) + printf("x86_energy_perf_policy Nov 24, 2010" + " - Len Brown <lenb@kernel.org>\n"); + if (verbose > 1 && !read_only) + printf("new_bias %lld\n", new_bias); + + validate_cpuid(); + + if (cpu != -1) { + if (read_only) + print_msr(cpu); + else + update_msr(cpu); + } else { + if (read_only) + for_every_cpu(print_msr); + else + for_every_cpu(update_msr); + } + + return 0; +} diff --git a/tools/slub/slabinfo.c b/tools/slub/slabinfo.c new file mode 100644 index 0000000..516551c --- /dev/null +++ b/tools/slub/slabinfo.c @@ -0,0 +1,1364 @@ +/* + * Slabinfo: Tool to get reports about slabs + * + * (C) 2007 sgi, Christoph Lameter + * + * Compile by: + * + * gcc -o slabinfo slabinfo.c + */ +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <dirent.h> +#include <strings.h> +#include <string.h> +#include <unistd.h> +#include <stdarg.h> +#include <getopt.h> +#include <regex.h> +#include <errno.h> + +#define MAX_SLABS 500 +#define MAX_ALIASES 500 +#define MAX_NODES 1024 + +struct slabinfo { + char *name; + int alias; + int refs; + int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu; + int hwcache_align, object_size, objs_per_slab; + int sanity_checks, slab_size, store_user, trace; + int order, poison, reclaim_account, red_zone; + unsigned long partial, objects, slabs, objects_partial, objects_total; + unsigned long alloc_fastpath, alloc_slowpath; + unsigned long free_fastpath, free_slowpath; + unsigned long free_frozen, free_add_partial, free_remove_partial; + unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill; + unsigned long cpuslab_flush, deactivate_full, deactivate_empty; + unsigned long deactivate_to_head, deactivate_to_tail; + unsigned long deactivate_remote_frees, order_fallback; + int numa[MAX_NODES]; + int numa_partial[MAX_NODES]; +} slabinfo[MAX_SLABS]; + +struct aliasinfo { + char *name; + char *ref; + struct slabinfo *slab; +} aliasinfo[MAX_ALIASES]; + +int slabs = 0; +int actual_slabs = 0; +int aliases = 0; +int alias_targets = 0; +int highest_node = 0; + +char buffer[4096]; + +int show_empty = 0; +int show_report = 0; +int show_alias = 0; +int show_slab = 0; +int skip_zero = 1; +int show_numa = 0; +int show_track = 0; +int show_first_alias = 0; +int validate = 0; +int shrink = 0; +int show_inverted = 0; +int show_single_ref = 0; +int show_totals = 0; +int sort_size = 0; +int sort_active = 0; +int set_debug = 0; +int show_ops = 0; +int show_activity = 0; + +/* Debug options */ +int sanity = 0; +int redzone = 0; +int poison = 0; +int tracking = 0; +int tracing = 0; + +int page_size; + +regex_t pattern; + +static void fatal(const char *x, ...) +{ + va_list ap; + + va_start(ap, x); + vfprintf(stderr, x, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static void usage(void) +{ + printf("slabinfo 5/7/2007. (c) 2007 sgi.\n\n" + "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" + "-a|--aliases Show aliases\n" + "-A|--activity Most active slabs first\n" + "-d<options>|--debug=<options> Set/Clear Debug options\n" + "-D|--display-active Switch line format to activity\n" + "-e|--empty Show empty slabs\n" + "-f|--first-alias Show first alias\n" + "-h|--help Show usage information\n" + "-i|--inverted Inverted list\n" + "-l|--slabs Show slabs\n" + "-n|--numa Show NUMA information\n" + "-o|--ops Show kmem_cache_ops\n" + "-s|--shrink Shrink slabs\n" + "-r|--report Detailed report on single slabs\n" + "-S|--Size Sort by size\n" + "-t|--tracking Show alloc/free information\n" + "-T|--Totals Show summary information\n" + "-v|--validate Validate slabs\n" + "-z|--zero Include empty slabs\n" + "-1|--1ref Single reference\n" + "\nValid debug options (FZPUT may be combined)\n" + "a / A Switch on all debug options (=FZUP)\n" + "- Switch off all debug options\n" + "f / F Sanity Checks (SLAB_DEBUG_FREE)\n" + "z / Z Redzoning\n" + "p / P Poisoning\n" + "u / U Tracking\n" + "t / T Tracing\n" + ); +} + +static unsigned long read_obj(const char *name) +{ + FILE *f = fopen(name, "r"); + + if (!f) + buffer[0] = 0; + else { + if (!fgets(buffer, sizeof(buffer), f)) + buffer[0] = 0; + fclose(f); + if (buffer[strlen(buffer)] == '\n') + buffer[strlen(buffer)] = 0; + } + return strlen(buffer); +} + + +/* + * Get the contents of an attribute + */ +static unsigned long get_obj(const char *name) +{ + if (!read_obj(name)) + return 0; + + return atol(buffer); +} + +static unsigned long get_obj_and_str(const char *name, char **x) +{ + unsigned long result = 0; + char *p; + + *x = NULL; + + if (!read_obj(name)) { + x = NULL; + return 0; + } + result = strtoul(buffer, &p, 10); + while (*p == ' ') + p++; + if (*p) + *x = strdup(p); + return result; +} + +static void set_obj(struct slabinfo *s, const char *name, int n) +{ + char x[100]; + FILE *f; + + snprintf(x, 100, "%s/%s", s->name, name); + f = fopen(x, "w"); + if (!f) + fatal("Cannot write to %s\n", x); + + fprintf(f, "%d\n", n); + fclose(f); +} + +static unsigned long read_slab_obj(struct slabinfo *s, const char *name) +{ + char x[100]; + FILE *f; + size_t l; + + snprintf(x, 100, "%s/%s", s->name, name); + f = fopen(x, "r"); + if (!f) { + buffer[0] = 0; + l = 0; + } else { + l = fread(buffer, 1, sizeof(buffer), f); + buffer[l] = 0; + fclose(f); + } + return l; +} + + +/* + * Put a size string together + */ +static int store_size(char *buffer, unsigned long value) +{ + unsigned long divisor = 1; + char trailer = 0; + int n; + + if (value > 1000000000UL) { + divisor = 100000000UL; + trailer = 'G'; + } else if (value > 1000000UL) { + divisor = 100000UL; + trailer = 'M'; + } else if (value > 1000UL) { + divisor = 100; + trailer = 'K'; + } + + value /= divisor; + n = sprintf(buffer, "%ld",value); + if (trailer) { + buffer[n] = trailer; + n++; + buffer[n] = 0; + } + if (divisor != 1) { + memmove(buffer + n - 2, buffer + n - 3, 4); + buffer[n-2] = '.'; + n++; + } + return n; +} + +static void decode_numa_list(int *numa, char *t) +{ + int node; + int nr; + + memset(numa, 0, MAX_NODES * sizeof(int)); + + if (!t) + return; + + while (*t == 'N') { + t++; + node = strtoul(t, &t, 10); + if (*t == '=') { + t++; + nr = strtoul(t, &t, 10); + numa[node] = nr; + if (node > highest_node) + highest_node = node; + } + while (*t == ' ') + t++; + } +} + +static void slab_validate(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + set_obj(s, "validate", 1); +} + +static void slab_shrink(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + set_obj(s, "shrink", 1); +} + +int line = 0; + +static void first_line(void) +{ + if (show_activity) + printf("Name Objects Alloc Free %%Fast Fallb O\n"); + else + printf("Name Objects Objsize Space " + "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); +} + +/* + * Find the shortest alias of a slab + */ +static struct aliasinfo *find_one_alias(struct slabinfo *find) +{ + struct aliasinfo *a; + struct aliasinfo *best = NULL; + + for(a = aliasinfo;a < aliasinfo + aliases; a++) { + if (a->slab == find && + (!best || strlen(best->name) < strlen(a->name))) { + best = a; + if (strncmp(a->name,"kmall", 5) == 0) + return best; + } + } + return best; +} + +static unsigned long slab_size(struct slabinfo *s) +{ + return s->slabs * (page_size << s->order); +} + +static unsigned long slab_activity(struct slabinfo *s) +{ + return s->alloc_fastpath + s->free_fastpath + + s->alloc_slowpath + s->free_slowpath; +} + +static void slab_numa(struct slabinfo *s, int mode) +{ + int node; + + if (strcmp(s->name, "*") == 0) + return; + + if (!highest_node) { + printf("\n%s: No NUMA information available.\n", s->name); + return; + } + + if (skip_zero && !s->slabs) + return; + + if (!line) { + printf("\n%-21s:", mode ? "NUMA nodes" : "Slab"); + for(node = 0; node <= highest_node; node++) + printf(" %4d", node); + printf("\n----------------------"); + for(node = 0; node <= highest_node; node++) + printf("-----"); + printf("\n"); + } + printf("%-21s ", mode ? "All slabs" : s->name); + for(node = 0; node <= highest_node; node++) { + char b[20]; + + store_size(b, s->numa[node]); + printf(" %4s", b); + } + printf("\n"); + if (mode) { + printf("%-21s ", "Partial slabs"); + for(node = 0; node <= highest_node; node++) { + char b[20]; + + store_size(b, s->numa_partial[node]); + printf(" %4s", b); + } + printf("\n"); + } + line++; +} + +static void show_tracking(struct slabinfo *s) +{ + printf("\n%s: Kernel object allocation\n", s->name); + printf("-----------------------------------------------------------------------\n"); + if (read_slab_obj(s, "alloc_calls")) + printf(buffer); + else + printf("No Data\n"); + + printf("\n%s: Kernel object freeing\n", s->name); + printf("------------------------------------------------------------------------\n"); + if (read_slab_obj(s, "free_calls")) + printf(buffer); + else + printf("No Data\n"); + +} + +static void ops(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + if (read_slab_obj(s, "ops")) { + printf("\n%s: kmem_cache operations\n", s->name); + printf("--------------------------------------------\n"); + printf(buffer); + } else + printf("\n%s has no kmem_cache operations\n", s->name); +} + +static const char *onoff(int x) +{ + if (x) + return "On "; + return "Off"; +} + +static void slab_stats(struct slabinfo *s) +{ + unsigned long total_alloc; + unsigned long total_free; + unsigned long total; + + if (!s->alloc_slab) + return; + + total_alloc = s->alloc_fastpath + s->alloc_slowpath; + total_free = s->free_fastpath + s->free_slowpath; + + if (!total_alloc) + return; + + printf("\n"); + printf("Slab Perf Counter Alloc Free %%Al %%Fr\n"); + printf("--------------------------------------------------\n"); + printf("Fastpath %8lu %8lu %3lu %3lu\n", + s->alloc_fastpath, s->free_fastpath, + s->alloc_fastpath * 100 / total_alloc, + s->free_fastpath * 100 / total_free); + printf("Slowpath %8lu %8lu %3lu %3lu\n", + total_alloc - s->alloc_fastpath, s->free_slowpath, + (total_alloc - s->alloc_fastpath) * 100 / total_alloc, + s->free_slowpath * 100 / total_free); + printf("Page Alloc %8lu %8lu %3lu %3lu\n", + s->alloc_slab, s->free_slab, + s->alloc_slab * 100 / total_alloc, + s->free_slab * 100 / total_free); + printf("Add partial %8lu %8lu %3lu %3lu\n", + s->deactivate_to_head + s->deactivate_to_tail, + s->free_add_partial, + (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc, + s->free_add_partial * 100 / total_free); + printf("Remove partial %8lu %8lu %3lu %3lu\n", + s->alloc_from_partial, s->free_remove_partial, + s->alloc_from_partial * 100 / total_alloc, + s->free_remove_partial * 100 / total_free); + + printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", + s->deactivate_remote_frees, s->free_frozen, + s->deactivate_remote_frees * 100 / total_alloc, + s->free_frozen * 100 / total_free); + + printf("Total %8lu %8lu\n\n", total_alloc, total_free); + + if (s->cpuslab_flush) + printf("Flushes %8lu\n", s->cpuslab_flush); + + if (s->alloc_refill) + printf("Refill %8lu\n", s->alloc_refill); + + total = s->deactivate_full + s->deactivate_empty + + s->deactivate_to_head + s->deactivate_to_tail; + + if (total) + printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) " + "ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n", + s->deactivate_full, (s->deactivate_full * 100) / total, + s->deactivate_empty, (s->deactivate_empty * 100) / total, + s->deactivate_to_head, (s->deactivate_to_head * 100) / total, + s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); +} + +static void report(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + printf("\nSlabcache: %-20s Aliases: %2d Order : %2d Objects: %lu\n", + s->name, s->aliases, s->order, s->objects); + if (s->hwcache_align) + printf("** Hardware cacheline aligned\n"); + if (s->cache_dma) + printf("** Memory is allocated in a special DMA zone\n"); + if (s->destroy_by_rcu) + printf("** Slabs are destroyed via RCU\n"); + if (s->reclaim_account) + printf("** Reclaim accounting active\n"); + + printf("\nSizes (bytes) Slabs Debug Memory\n"); + printf("------------------------------------------------------------------------\n"); + printf("Object : %7d Total : %7ld Sanity Checks : %s Total: %7ld\n", + s->object_size, s->slabs, onoff(s->sanity_checks), + s->slabs * (page_size << s->order)); + printf("SlabObj: %7d Full : %7ld Redzoning : %s Used : %7ld\n", + s->slab_size, s->slabs - s->partial - s->cpu_slabs, + onoff(s->red_zone), s->objects * s->object_size); + printf("SlabSiz: %7d Partial: %7ld Poisoning : %s Loss : %7ld\n", + page_size << s->order, s->partial, onoff(s->poison), + s->slabs * (page_size << s->order) - s->objects * s->object_size); + printf("Loss : %7d CpuSlab: %7d Tracking : %s Lalig: %7ld\n", + s->slab_size - s->object_size, s->cpu_slabs, onoff(s->store_user), + (s->slab_size - s->object_size) * s->objects); + printf("Align : %7d Objects: %7d Tracing : %s Lpadd: %7ld\n", + s->align, s->objs_per_slab, onoff(s->trace), + ((page_size << s->order) - s->objs_per_slab * s->slab_size) * + s->slabs); + + ops(s); + show_tracking(s); + slab_numa(s, 1); + slab_stats(s); +} + +static void slabcache(struct slabinfo *s) +{ + char size_str[20]; + char dist_str[40]; + char flags[20]; + char *p = flags; + + if (strcmp(s->name, "*") == 0) + return; + + if (actual_slabs == 1) { + report(s); + return; + } + + if (skip_zero && !show_empty && !s->slabs) + return; + + if (show_empty && s->slabs) + return; + + store_size(size_str, slab_size(s)); + snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs, + s->partial, s->cpu_slabs); + + if (!line++) + first_line(); + + if (s->aliases) + *p++ = '*'; + if (s->cache_dma) + *p++ = 'd'; + if (s->hwcache_align) + *p++ = 'A'; + if (s->poison) + *p++ = 'P'; + if (s->reclaim_account) + *p++ = 'a'; + if (s->red_zone) + *p++ = 'Z'; + if (s->sanity_checks) + *p++ = 'F'; + if (s->store_user) + *p++ = 'U'; + if (s->trace) + *p++ = 'T'; + + *p = 0; + if (show_activity) { + unsigned long total_alloc; + unsigned long total_free; + + total_alloc = s->alloc_fastpath + s->alloc_slowpath; + total_free = s->free_fastpath + s->free_slowpath; + + printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d\n", + s->name, s->objects, + total_alloc, total_free, + total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0, + total_free ? (s->free_fastpath * 100 / total_free) : 0, + s->order_fallback, s->order); + } + else + printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", + s->name, s->objects, s->object_size, size_str, dist_str, + s->objs_per_slab, s->order, + s->slabs ? (s->partial * 100) / s->slabs : 100, + s->slabs ? (s->objects * s->object_size * 100) / + (s->slabs * (page_size << s->order)) : 100, + flags); +} + +/* + * Analyze debug options. Return false if something is amiss. + */ +static int debug_opt_scan(char *opt) +{ + if (!opt || !opt[0] || strcmp(opt, "-") == 0) + return 1; + + if (strcasecmp(opt, "a") == 0) { + sanity = 1; + poison = 1; + redzone = 1; + tracking = 1; + return 1; + } + + for ( ; *opt; opt++) + switch (*opt) { + case 'F' : case 'f': + if (sanity) + return 0; + sanity = 1; + break; + case 'P' : case 'p': + if (poison) + return 0; + poison = 1; + break; + + case 'Z' : case 'z': + if (redzone) + return 0; + redzone = 1; + break; + + case 'U' : case 'u': + if (tracking) + return 0; + tracking = 1; + break; + + case 'T' : case 't': + if (tracing) + return 0; + tracing = 1; + break; + default: + return 0; + } + return 1; +} + +static int slab_empty(struct slabinfo *s) +{ + if (s->objects > 0) + return 0; + + /* + * We may still have slabs even if there are no objects. Shrinking will + * remove them. + */ + if (s->slabs != 0) + set_obj(s, "shrink", 1); + + return 1; +} + +static void slab_debug(struct slabinfo *s) +{ + if (strcmp(s->name, "*") == 0) + return; + + if (sanity && !s->sanity_checks) { + set_obj(s, "sanity", 1); + } + if (!sanity && s->sanity_checks) { + if (slab_empty(s)) + set_obj(s, "sanity", 0); + else + fprintf(stderr, "%s not empty cannot disable sanity checks\n", s->name); + } + if (redzone && !s->red_zone) { + if (slab_empty(s)) + set_obj(s, "red_zone", 1); + else + fprintf(stderr, "%s not empty cannot enable redzoning\n", s->name); + } + if (!redzone && s->red_zone) { + if (slab_empty(s)) + set_obj(s, "red_zone", 0); + else + fprintf(stderr, "%s not empty cannot disable redzoning\n", s->name); + } + if (poison && !s->poison) { + if (slab_empty(s)) + set_obj(s, "poison", 1); + else + fprintf(stderr, "%s not empty cannot enable poisoning\n", s->name); + } + if (!poison && s->poison) { + if (slab_empty(s)) + set_obj(s, "poison", 0); + else + fprintf(stderr, "%s not empty cannot disable poisoning\n", s->name); + } + if (tracking && !s->store_user) { + if (slab_empty(s)) + set_obj(s, "store_user", 1); + else + fprintf(stderr, "%s not empty cannot enable tracking\n", s->name); + } + if (!tracking && s->store_user) { + if (slab_empty(s)) + set_obj(s, "store_user", 0); + else + fprintf(stderr, "%s not empty cannot disable tracking\n", s->name); + } + if (tracing && !s->trace) { + if (slabs == 1) + set_obj(s, "trace", 1); + else + fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name); + } + if (!tracing && s->trace) + set_obj(s, "trace", 1); +} + +static void totals(void) +{ + struct slabinfo *s; + + int used_slabs = 0; + char b1[20], b2[20], b3[20], b4[20]; + unsigned long long max = 1ULL << 63; + + /* Object size */ + unsigned long long min_objsize = max, max_objsize = 0, avg_objsize; + + /* Number of partial slabs in a slabcache */ + unsigned long long min_partial = max, max_partial = 0, + avg_partial, total_partial = 0; + + /* Number of slabs in a slab cache */ + unsigned long long min_slabs = max, max_slabs = 0, + avg_slabs, total_slabs = 0; + + /* Size of the whole slab */ + unsigned long long min_size = max, max_size = 0, + avg_size, total_size = 0; + + /* Bytes used for object storage in a slab */ + unsigned long long min_used = max, max_used = 0, + avg_used, total_used = 0; + + /* Waste: Bytes used for alignment and padding */ + unsigned long long min_waste = max, max_waste = 0, + avg_waste, total_waste = 0; + /* Number of objects in a slab */ + unsigned long long min_objects = max, max_objects = 0, + avg_objects, total_objects = 0; + /* Waste per object */ + unsigned long long min_objwaste = max, + max_objwaste = 0, avg_objwaste, + total_objwaste = 0; + + /* Memory per object */ + unsigned long long min_memobj = max, + max_memobj = 0, avg_memobj, + total_objsize = 0; + + /* Percentage of partial slabs per slab */ + unsigned long min_ppart = 100, max_ppart = 0, + avg_ppart, total_ppart = 0; + + /* Number of objects in partial slabs */ + unsigned long min_partobj = max, max_partobj = 0, + avg_partobj, total_partobj = 0; + + /* Percentage of partial objects of all objects in a slab */ + unsigned long min_ppartobj = 100, max_ppartobj = 0, + avg_ppartobj, total_ppartobj = 0; + + + for (s = slabinfo; s < slabinfo + slabs; s++) { + unsigned long long size; + unsigned long used; + unsigned long long wasted; + unsigned long long objwaste; + unsigned long percentage_partial_slabs; + unsigned long percentage_partial_objs; + + if (!s->slabs || !s->objects) + continue; + + used_slabs++; + + size = slab_size(s); + used = s->objects * s->object_size; + wasted = size - used; + objwaste = s->slab_size - s->object_size; + + percentage_partial_slabs = s->partial * 100 / s->slabs; + if (percentage_partial_slabs > 100) + percentage_partial_slabs = 100; + + percentage_partial_objs = s->objects_partial * 100 + / s->objects; + + if (percentage_partial_objs > 100) + percentage_partial_objs = 100; + + if (s->object_size < min_objsize) + min_objsize = s->object_size; + if (s->partial < min_partial) + min_partial = s->partial; + if (s->slabs < min_slabs) + min_slabs = s->slabs; + if (size < min_size) + min_size = size; + if (wasted < min_waste) + min_waste = wasted; + if (objwaste < min_objwaste) + min_objwaste = objwaste; + if (s->objects < min_objects) + min_objects = s->objects; + if (used < min_used) + min_used = used; + if (s->objects_partial < min_partobj) + min_partobj = s->objects_partial; + if (percentage_partial_slabs < min_ppart) + min_ppart = percentage_partial_slabs; + if (percentage_partial_objs < min_ppartobj) + min_ppartobj = percentage_partial_objs; + if (s->slab_size < min_memobj) + min_memobj = s->slab_size; + + if (s->object_size > max_objsize) + max_objsize = s->object_size; + if (s->partial > max_partial) + max_partial = s->partial; + if (s->slabs > max_slabs) + max_slabs = s->slabs; + if (size > max_size) + max_size = size; + if (wasted > max_waste) + max_waste = wasted; + if (objwaste > max_objwaste) + max_objwaste = objwaste; + if (s->objects > max_objects) + max_objects = s->objects; + if (used > max_used) + max_used = used; + if (s->objects_partial > max_partobj) + max_partobj = s->objects_partial; + if (percentage_partial_slabs > max_ppart) + max_ppart = percentage_partial_slabs; + if (percentage_partial_objs > max_ppartobj) + max_ppartobj = percentage_partial_objs; + if (s->slab_size > max_memobj) + max_memobj = s->slab_size; + + total_partial += s->partial; + total_slabs += s->slabs; + total_size += size; + total_waste += wasted; + + total_objects += s->objects; + total_used += used; + total_partobj += s->objects_partial; + total_ppart += percentage_partial_slabs; + total_ppartobj += percentage_partial_objs; + + total_objwaste += s->objects * objwaste; + total_objsize += s->objects * s->slab_size; + } + + if (!total_objects) { + printf("No objects\n"); + return; + } + if (!used_slabs) { + printf("No slabs\n"); + return; + } + + /* Per slab averages */ + avg_partial = total_partial / used_slabs; + avg_slabs = total_slabs / used_slabs; + avg_size = total_size / used_slabs; + avg_waste = total_waste / used_slabs; + + avg_objects = total_objects / used_slabs; + avg_used = total_used / used_slabs; + avg_partobj = total_partobj / used_slabs; + avg_ppart = total_ppart / used_slabs; + avg_ppartobj = total_ppartobj / used_slabs; + + /* Per object object sizes */ + avg_objsize = total_used / total_objects; + avg_objwaste = total_objwaste / total_objects; + avg_partobj = total_partobj * 100 / total_objects; + avg_memobj = total_objsize / total_objects; + + printf("Slabcache Totals\n"); + printf("----------------\n"); + printf("Slabcaches : %3d Aliases : %3d->%-3d Active: %3d\n", + slabs, aliases, alias_targets, used_slabs); + + store_size(b1, total_size);store_size(b2, total_waste); + store_size(b3, total_waste * 100 / total_used); + printf("Memory used: %6s # Loss : %6s MRatio:%6s%%\n", b1, b2, b3); + + store_size(b1, total_objects);store_size(b2, total_partobj); + store_size(b3, total_partobj * 100 / total_objects); + printf("# Objects : %6s # PartObj: %6s ORatio:%6s%%\n", b1, b2, b3); + + printf("\n"); + printf("Per Cache Average Min Max Total\n"); + printf("---------------------------------------------------------\n"); + + store_size(b1, avg_objects);store_size(b2, min_objects); + store_size(b3, max_objects);store_size(b4, total_objects); + printf("#Objects %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_slabs);store_size(b2, min_slabs); + store_size(b3, max_slabs);store_size(b4, total_slabs); + printf("#Slabs %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_partial);store_size(b2, min_partial); + store_size(b3, max_partial);store_size(b4, total_partial); + printf("#PartSlab %10s %10s %10s %10s\n", + b1, b2, b3, b4); + store_size(b1, avg_ppart);store_size(b2, min_ppart); + store_size(b3, max_ppart); + store_size(b4, total_partial * 100 / total_slabs); + printf("%%PartSlab%10s%% %10s%% %10s%% %10s%%\n", + b1, b2, b3, b4); + + store_size(b1, avg_partobj);store_size(b2, min_partobj); + store_size(b3, max_partobj); + store_size(b4, total_partobj); + printf("PartObjs %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj); + store_size(b3, max_ppartobj); + store_size(b4, total_partobj * 100 / total_objects); + printf("%% PartObj%10s%% %10s%% %10s%% %10s%%\n", + b1, b2, b3, b4); + + store_size(b1, avg_size);store_size(b2, min_size); + store_size(b3, max_size);store_size(b4, total_size); + printf("Memory %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_used);store_size(b2, min_used); + store_size(b3, max_used);store_size(b4, total_used); + printf("Used %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + store_size(b1, avg_waste);store_size(b2, min_waste); + store_size(b3, max_waste);store_size(b4, total_waste); + printf("Loss %10s %10s %10s %10s\n", + b1, b2, b3, b4); + + printf("\n"); + printf("Per Object Average Min Max\n"); + printf("---------------------------------------------\n"); + + store_size(b1, avg_memobj);store_size(b2, min_memobj); + store_size(b3, max_memobj); + printf("Memory %10s %10s %10s\n", + b1, b2, b3); + store_size(b1, avg_objsize);store_size(b2, min_objsize); + store_size(b3, max_objsize); + printf("User %10s %10s %10s\n", + b1, b2, b3); + + store_size(b1, avg_objwaste);store_size(b2, min_objwaste); + store_size(b3, max_objwaste); + printf("Loss %10s %10s %10s\n", + b1, b2, b3); +} + +static void sort_slabs(void) +{ + struct slabinfo *s1,*s2; + + for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) { + for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) { + int result; + + if (sort_size) + result = slab_size(s1) < slab_size(s2); + else if (sort_active) + result = slab_activity(s1) < slab_activity(s2); + else + result = strcasecmp(s1->name, s2->name); + + if (show_inverted) + result = -result; + + if (result > 0) { + struct slabinfo t; + + memcpy(&t, s1, sizeof(struct slabinfo)); + memcpy(s1, s2, sizeof(struct slabinfo)); + memcpy(s2, &t, sizeof(struct slabinfo)); + } + } + } +} + +static void sort_aliases(void) +{ + struct aliasinfo *a1,*a2; + + for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) { + for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) { + char *n1, *n2; + + n1 = a1->name; + n2 = a2->name; + if (show_alias && !show_inverted) { + n1 = a1->ref; + n2 = a2->ref; + } + if (strcasecmp(n1, n2) > 0) { + struct aliasinfo t; + + memcpy(&t, a1, sizeof(struct aliasinfo)); + memcpy(a1, a2, sizeof(struct aliasinfo)); + memcpy(a2, &t, sizeof(struct aliasinfo)); + } + } + } +} + +static void link_slabs(void) +{ + struct aliasinfo *a; + struct slabinfo *s; + + for (a = aliasinfo; a < aliasinfo + aliases; a++) { + + for (s = slabinfo; s < slabinfo + slabs; s++) + if (strcmp(a->ref, s->name) == 0) { + a->slab = s; + s->refs++; + break; + } + if (s == slabinfo + slabs) + fatal("Unresolved alias %s\n", a->ref); + } +} + +static void alias(void) +{ + struct aliasinfo *a; + char *active = NULL; + + sort_aliases(); + link_slabs(); + + for(a = aliasinfo; a < aliasinfo + aliases; a++) { + + if (!show_single_ref && a->slab->refs == 1) + continue; + + if (!show_inverted) { + if (active) { + if (strcmp(a->slab->name, active) == 0) { + printf(" %s", a->name); + continue; + } + } + printf("\n%-12s <- %s", a->slab->name, a->name); + active = a->slab->name; + } + else + printf("%-20s -> %s\n", a->name, a->slab->name); + } + if (active) + printf("\n"); +} + + +static void rename_slabs(void) +{ + struct slabinfo *s; + struct aliasinfo *a; + + for (s = slabinfo; s < slabinfo + slabs; s++) { + if (*s->name != ':') + continue; + + if (s->refs > 1 && !show_first_alias) + continue; + + a = find_one_alias(s); + + if (a) + s->name = a->name; + else { + s->name = "*"; + actual_slabs--; + } + } +} + +static int slab_mismatch(char *slab) +{ + return regexec(&pattern, slab, 0, NULL, 0); +} + +static void read_slab_dir(void) +{ + DIR *dir; + struct dirent *de; + struct slabinfo *slab = slabinfo; + struct aliasinfo *alias = aliasinfo; + char *p; + char *t; + int count; + + if (chdir("/sys/kernel/slab") && chdir("/sys/slab")) + fatal("SYSFS support for SLUB not active\n"); + + dir = opendir("."); + while ((de = readdir(dir))) { + if (de->d_name[0] == '.' || + (de->d_name[0] != ':' && slab_mismatch(de->d_name))) + continue; + switch (de->d_type) { + case DT_LNK: + alias->name = strdup(de->d_name); + count = readlink(de->d_name, buffer, sizeof(buffer)); + + if (count < 0) + fatal("Cannot read symlink %s\n", de->d_name); + + buffer[count] = 0; + p = buffer + count; + while (p > buffer && p[-1] != '/') + p--; + alias->ref = strdup(p); + alias++; + break; + case DT_DIR: + if (chdir(de->d_name)) + fatal("Unable to access slab %s\n", slab->name); + slab->name = strdup(de->d_name); + slab->alias = 0; + slab->refs = 0; + slab->aliases = get_obj("aliases"); + slab->align = get_obj("align"); + slab->cache_dma = get_obj("cache_dma"); + slab->cpu_slabs = get_obj("cpu_slabs"); + slab->destroy_by_rcu = get_obj("destroy_by_rcu"); + slab->hwcache_align = get_obj("hwcache_align"); + slab->object_size = get_obj("object_size"); + slab->objects = get_obj("objects"); + slab->objects_partial = get_obj("objects_partial"); + slab->objects_total = get_obj("objects_total"); + slab->objs_per_slab = get_obj("objs_per_slab"); + slab->order = get_obj("order"); + slab->partial = get_obj("partial"); + slab->partial = get_obj_and_str("partial", &t); + decode_numa_list(slab->numa_partial, t); + free(t); + slab->poison = get_obj("poison"); + slab->reclaim_account = get_obj("reclaim_account"); + slab->red_zone = get_obj("red_zone"); + slab->sanity_checks = get_obj("sanity_checks"); + slab->slab_size = get_obj("slab_size"); + slab->slabs = get_obj_and_str("slabs", &t); + decode_numa_list(slab->numa, t); + free(t); + slab->store_user = get_obj("store_user"); + slab->trace = get_obj("trace"); + slab->alloc_fastpath = get_obj("alloc_fastpath"); + slab->alloc_slowpath = get_obj("alloc_slowpath"); + slab->free_fastpath = get_obj("free_fastpath"); + slab->free_slowpath = get_obj("free_slowpath"); + slab->free_frozen= get_obj("free_frozen"); + slab->free_add_partial = get_obj("free_add_partial"); + slab->free_remove_partial = get_obj("free_remove_partial"); + slab->alloc_from_partial = get_obj("alloc_from_partial"); + slab->alloc_slab = get_obj("alloc_slab"); + slab->alloc_refill = get_obj("alloc_refill"); + slab->free_slab = get_obj("free_slab"); + slab->cpuslab_flush = get_obj("cpuslab_flush"); + slab->deactivate_full = get_obj("deactivate_full"); + slab->deactivate_empty = get_obj("deactivate_empty"); + slab->deactivate_to_head = get_obj("deactivate_to_head"); + slab->deactivate_to_tail = get_obj("deactivate_to_tail"); + slab->deactivate_remote_frees = get_obj("deactivate_remote_frees"); + slab->order_fallback = get_obj("order_fallback"); + chdir(".."); + if (slab->name[0] == ':') + alias_targets++; + slab++; + break; + default : + fatal("Unknown file type %lx\n", de->d_type); + } + } + closedir(dir); + slabs = slab - slabinfo; + actual_slabs = slabs; + aliases = alias - aliasinfo; + if (slabs > MAX_SLABS) + fatal("Too many slabs\n"); + if (aliases > MAX_ALIASES) + fatal("Too many aliases\n"); +} + +static void output_slabs(void) +{ + struct slabinfo *slab; + + for (slab = slabinfo; slab < slabinfo + slabs; slab++) { + + if (slab->alias) + continue; + + + if (show_numa) + slab_numa(slab, 0); + else if (show_track) + show_tracking(slab); + else if (validate) + slab_validate(slab); + else if (shrink) + slab_shrink(slab); + else if (set_debug) + slab_debug(slab); + else if (show_ops) + ops(slab); + else if (show_slab) + slabcache(slab); + else if (show_report) + report(slab); + } +} + +struct option opts[] = { + { "aliases", 0, NULL, 'a' }, + { "activity", 0, NULL, 'A' }, + { "debug", 2, NULL, 'd' }, + { "display-activity", 0, NULL, 'D' }, + { "empty", 0, NULL, 'e' }, + { "first-alias", 0, NULL, 'f' }, + { "help", 0, NULL, 'h' }, + { "inverted", 0, NULL, 'i'}, + { "numa", 0, NULL, 'n' }, + { "ops", 0, NULL, 'o' }, + { "report", 0, NULL, 'r' }, + { "shrink", 0, NULL, 's' }, + { "slabs", 0, NULL, 'l' }, + { "track", 0, NULL, 't'}, + { "validate", 0, NULL, 'v' }, + { "zero", 0, NULL, 'z' }, + { "1ref", 0, NULL, '1'}, + { NULL, 0, NULL, 0 } +}; + +int main(int argc, char *argv[]) +{ + int c; + int err; + char *pattern_source; + + page_size = getpagesize(); + + while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS", + opts, NULL)) != -1) + switch (c) { + case '1': + show_single_ref = 1; + break; + case 'a': + show_alias = 1; + break; + case 'A': + sort_active = 1; + break; + case 'd': + set_debug = 1; + if (!debug_opt_scan(optarg)) + fatal("Invalid debug option '%s'\n", optarg); + break; + case 'D': + show_activity = 1; + break; + case 'e': + show_empty = 1; + break; + case 'f': + show_first_alias = 1; + break; + case 'h': + usage(); + return 0; + case 'i': + show_inverted = 1; + break; + case 'n': + show_numa = 1; + break; + case 'o': + show_ops = 1; + break; + case 'r': + show_report = 1; + break; + case 's': + shrink = 1; + break; + case 'l': + show_slab = 1; + break; + case 't': + show_track = 1; + break; + case 'v': + validate = 1; + break; + case 'z': + skip_zero = 0; + break; + case 'T': + show_totals = 1; + break; + case 'S': + sort_size = 1; + break; + + default: + fatal("%s: Invalid option '%c'\n", argv[0], optopt); + + } + + if (!show_slab && !show_alias && !show_track && !show_report + && !validate && !shrink && !set_debug && !show_ops) + show_slab = 1; + + if (argc > optind) + pattern_source = argv[optind]; + else + pattern_source = ".*"; + + err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB); + if (err) + fatal("%s: Invalid pattern '%s' code %d\n", + argv[0], pattern_source, err); + read_slab_dir(); + if (show_alias) + alias(); + else + if (show_totals) + totals(); + else { + link_slabs(); + rename_slabs(); + sort_slabs(); + output_slabs(); + } + return 0; +} diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl new file mode 100755 index 0000000..9a571e716 --- /dev/null +++ b/tools/testing/ktest/compare-ktest-sample.pl @@ -0,0 +1,30 @@ +#!/usr/bin/perl + +open (IN,"ktest.pl"); +while (<IN>) { + if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ || + /set_test_option\("(.*?)"/) { + $opt{$1} = 1; + } +} +close IN; + +open (IN, "sample.conf"); +while (<IN>) { + if (/^\s*#?\s*(\S+)\s*=/) { + $samp{$1} = 1; + } +} +close IN; + +foreach $opt (keys %opt) { + if (!defined($samp{$opt})) { + print "opt = $opt\n"; + } +} + +foreach $samp (keys %samp) { + if (!defined($opt{$samp})) { + print "samp = $samp\n"; + } +} diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl new file mode 100755 index 0000000..e1c62ee --- /dev/null +++ b/tools/testing/ktest/ktest.pl @@ -0,0 +1,2023 @@ +#!/usr/bin/perl -w +# +# Copywrite 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc. +# Licensed under the terms of the GNU GPL License version 2 +# + +use strict; +use IPC::Open2; +use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK); +use File::Path qw(mkpath); +use File::Copy qw(cp); +use FileHandle; + +my $VERSION = "0.2"; + +$| = 1; + +my %opt; +my %repeat_tests; +my %repeats; +my %default; + +#default opts +$default{"NUM_TESTS"} = 1; +$default{"REBOOT_TYPE"} = "grub"; +$default{"TEST_TYPE"} = "test"; +$default{"BUILD_TYPE"} = "randconfig"; +$default{"MAKE_CMD"} = "make"; +$default{"TIMEOUT"} = 120; +$default{"TMP_DIR"} = "/tmp/ktest"; +$default{"SLEEP_TIME"} = 60; # sleep time between tests +$default{"BUILD_NOCLEAN"} = 0; +$default{"REBOOT_ON_ERROR"} = 0; +$default{"POWEROFF_ON_ERROR"} = 0; +$default{"REBOOT_ON_SUCCESS"} = 1; +$default{"POWEROFF_ON_SUCCESS"} = 0; +$default{"BUILD_OPTIONS"} = ""; +$default{"BISECT_SLEEP_TIME"} = 60; # sleep time between bisects +$default{"CLEAR_LOG"} = 0; +$default{"SUCCESS_LINE"} = "login:"; +$default{"BOOTED_TIMEOUT"} = 1; +$default{"DIE_ON_FAILURE"} = 1; +$default{"SSH_EXEC"} = "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND"; +$default{"SCP_TO_TARGET"} = "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE"; +$default{"REBOOT"} = "ssh \$SSH_USER\@\$MACHINE reboot"; +$default{"STOP_AFTER_SUCCESS"} = 10; +$default{"STOP_AFTER_FAILURE"} = 60; +$default{"LOCALVERSION"} = "-test"; + +my $ktest_config; +my $version; +my $machine; +my $ssh_user; +my $tmpdir; +my $builddir; +my $outputdir; +my $output_config; +my $test_type; +my $build_type; +my $build_options; +my $reboot_type; +my $reboot_script; +my $power_cycle; +my $reboot; +my $reboot_on_error; +my $poweroff_on_error; +my $die_on_failure; +my $powercycle_after_reboot; +my $poweroff_after_halt; +my $ssh_exec; +my $scp_to_target; +my $power_off; +my $grub_menu; +my $grub_number; +my $target; +my $make; +my $post_install; +my $noclean; +my $minconfig; +my $addconfig; +my $in_bisect = 0; +my $bisect_bad = ""; +my $reverse_bisect; +my $in_patchcheck = 0; +my $run_test; +my $redirect; +my $buildlog; +my $dmesg; +my $monitor_fp; +my $monitor_pid; +my $monitor_cnt = 0; +my $sleep_time; +my $bisect_sleep_time; +my $store_failures; +my $timeout; +my $booted_timeout; +my $console; +my $success_line; +my $stop_after_success; +my $stop_after_failure; +my $build_target; +my $target_image; +my $localversion; +my $iteration = 0; +my $successes = 0; + +my %entered_configs; +my %config_help; + +$config_help{"MACHINE"} = << "EOF" + The machine hostname that you will test. +EOF + ; +$config_help{"SSH_USER"} = << "EOF" + The box is expected to have ssh on normal bootup, provide the user + (most likely root, since you need privileged operations) +EOF + ; +$config_help{"BUILD_DIR"} = << "EOF" + The directory that contains the Linux source code (full path). +EOF + ; +$config_help{"OUTPUT_DIR"} = << "EOF" + The directory that the objects will be built (full path). + (can not be same as BUILD_DIR) +EOF + ; +$config_help{"BUILD_TARGET"} = << "EOF" + The location of the compiled file to copy to the target. + (relative to OUTPUT_DIR) +EOF + ; +$config_help{"TARGET_IMAGE"} = << "EOF" + The place to put your image on the test machine. +EOF + ; +$config_help{"POWER_CYCLE"} = << "EOF" + A script or command to reboot the box. + + Here is a digital loggers power switch example + POWER_CYCLE = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin\@power/outlet?5=CCL' + + Here is an example to reboot a virtual box on the current host + with the name "Guest". + POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest +EOF + ; +$config_help{"CONSOLE"} = << "EOF" + The script or command that reads the console + + If you use ttywatch server, something like the following would work. +CONSOLE = nc -d localhost 3001 + + For a virtual machine with guest name "Guest". +CONSOLE = virsh console Guest +EOF + ; +$config_help{"LOCALVERSION"} = << "EOF" + Required version ending to differentiate the test + from other linux builds on the system. +EOF + ; +$config_help{"REBOOT_TYPE"} = << "EOF" + Way to reboot the box to the test kernel. + Only valid options so far are "grub" and "script". + + If you specify grub, it will assume grub version 1 + and will search in /boot/grub/menu.lst for the title \$GRUB_MENU + and select that target to reboot to the kernel. If this is not + your setup, then specify "script" and have a command or script + specified in REBOOT_SCRIPT to boot to the target. + + The entry in /boot/grub/menu.lst must be entered in manually. + The test will not modify that file. +EOF + ; +$config_help{"GRUB_MENU"} = << "EOF" + The grub title name for the test kernel to boot + (Only mandatory if REBOOT_TYPE = grub) + + Note, ktest.pl will not update the grub menu.lst, you need to + manually add an option for the test. ktest.pl will search + the grub menu.lst for this option to find what kernel to + reboot into. + + For example, if in the /boot/grub/menu.lst the test kernel title has: + title Test Kernel + kernel vmlinuz-test + GRUB_MENU = Test Kernel +EOF + ; +$config_help{"REBOOT_SCRIPT"} = << "EOF" + A script to reboot the target into the test kernel + (Only mandatory if REBOOT_TYPE = script) +EOF + ; + + +sub get_ktest_config { + my ($config) = @_; + + return if (defined($opt{$config})); + + if (defined($config_help{$config})) { + print "\n"; + print $config_help{$config}; + } + + for (;;) { + print "$config = "; + if (defined($default{$config})) { + print "\[$default{$config}\] "; + } + $entered_configs{$config} = <STDIN>; + $entered_configs{$config} =~ s/^\s*(.*\S)\s*$/$1/; + if ($entered_configs{$config} =~ /^\s*$/) { + if ($default{$config}) { + $entered_configs{$config} = $default{$config}; + } else { + print "Your answer can not be blank\n"; + next; + } + } + last; + } +} + +sub get_ktest_configs { + get_ktest_config("MACHINE"); + get_ktest_config("SSH_USER"); + get_ktest_config("BUILD_DIR"); + get_ktest_config("OUTPUT_DIR"); + get_ktest_config("BUILD_TARGET"); + get_ktest_config("TARGET_IMAGE"); + get_ktest_config("POWER_CYCLE"); + get_ktest_config("CONSOLE"); + get_ktest_config("LOCALVERSION"); + + my $rtype = $opt{"REBOOT_TYPE"}; + + if (!defined($rtype)) { + if (!defined($opt{"GRUB_MENU"})) { + get_ktest_config("REBOOT_TYPE"); + $rtype = $entered_configs{"REBOOT_TYPE"}; + } else { + $rtype = "grub"; + } + } + + if ($rtype eq "grub") { + get_ktest_config("GRUB_MENU"); + } else { + get_ktest_config("REBOOT_SCRIPT"); + } +} + +sub set_value { + my ($lvalue, $rvalue) = @_; + + if (defined($opt{$lvalue})) { + die "Error: Option $lvalue defined more than once!\n"; + } + if ($rvalue =~ /^\s*$/) { + delete $opt{$lvalue}; + } else { + $opt{$lvalue} = $rvalue; + } +} + +sub read_config { + my ($config) = @_; + + open(IN, $config) || die "can't read file $config"; + + my $name = $config; + $name =~ s,.*/(.*),$1,; + + my $test_num = 0; + my $default = 1; + my $repeat = 1; + my $num_tests_set = 0; + my $skip = 0; + my $rest; + + while (<IN>) { + + # ignore blank lines and comments + next if (/^\s*$/ || /\s*\#/); + + if (/^\s*TEST_START(.*)/) { + + $rest = $1; + + if ($num_tests_set) { + die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n"; + } + + my $old_test_num = $test_num; + my $old_repeat = $repeat; + + $test_num += $repeat; + $default = 0; + $repeat = 1; + + if ($rest =~ /\s+SKIP(.*)/) { + $rest = $1; + $skip = 1; + } else { + $skip = 0; + } + + if ($rest =~ /\s+ITERATE\s+(\d+)(.*)$/) { + $repeat = $1; + $rest = $2; + $repeat_tests{"$test_num"} = $repeat; + } + + if ($rest =~ /\s+SKIP(.*)/) { + $rest = $1; + $skip = 1; + } + + if ($rest !~ /^\s*$/) { + die "$name: $.: Gargbage found after TEST_START\n$_"; + } + + if ($skip) { + $test_num = $old_test_num; + $repeat = $old_repeat; + } + + } elsif (/^\s*DEFAULTS(.*)$/) { + $default = 1; + + $rest = $1; + + if ($rest =~ /\s+SKIP(.*)/) { + $rest = $1; + $skip = 1; + } else { + $skip = 0; + } + + if ($rest !~ /^\s*$/) { + die "$name: $.: Gargbage found after DEFAULTS\n$_"; + } + + } elsif (/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) { + + next if ($skip); + + my $lvalue = $1; + my $rvalue = $2; + + if (!$default && + ($lvalue eq "NUM_TESTS" || + $lvalue eq "LOG_FILE" || + $lvalue eq "CLEAR_LOG")) { + die "$name: $.: $lvalue must be set in DEFAULTS section\n"; + } + + if ($lvalue eq "NUM_TESTS") { + if ($test_num) { + die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n"; + } + if (!$default) { + die "$name: $.: NUM_TESTS must be set in default section\n"; + } + $num_tests_set = 1; + } + + if ($default || $lvalue =~ /\[\d+\]$/) { + set_value($lvalue, $rvalue); + } else { + my $val = "$lvalue\[$test_num\]"; + set_value($val, $rvalue); + + if ($repeat > 1) { + $repeats{$val} = $repeat; + } + } + } else { + die "$name: $.: Garbage found in config\n$_"; + } + } + + close(IN); + + if ($test_num) { + $test_num += $repeat - 1; + $opt{"NUM_TESTS"} = $test_num; + } + + # make sure we have all mandatory configs + get_ktest_configs; + + # set any defaults + + foreach my $default (keys %default) { + if (!defined($opt{$default})) { + $opt{$default} = $default{$default}; + } + } +} + +sub _logit { + if (defined($opt{"LOG_FILE"})) { + open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; + print OUT @_; + close(OUT); + } +} + +sub logit { + if (defined($opt{"LOG_FILE"})) { + _logit @_; + } else { + print @_; + } +} + +sub doprint { + print @_; + _logit @_; +} + +sub run_command; + +sub reboot { + # try to reboot normally + if (run_command $reboot) { + if (defined($powercycle_after_reboot)) { + sleep $powercycle_after_reboot; + run_command "$power_cycle"; + } + } else { + # nope? power cycle it. + run_command "$power_cycle"; + } +} + +sub do_not_reboot { + my $i = $iteration; + + return $test_type eq "build" || + ($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") || + ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build"); +} + +sub dodie { + doprint "CRITICAL FAILURE... ", @_, "\n"; + + my $i = $iteration; + + if ($reboot_on_error && !do_not_reboot) { + + doprint "REBOOTING\n"; + reboot; + + } elsif ($poweroff_on_error && defined($power_off)) { + doprint "POWERING OFF\n"; + `$power_off`; + } + + die @_, "\n"; +} + +sub open_console { + my ($fp) = @_; + + my $flags; + + my $pid = open($fp, "$console|") or + dodie "Can't open console $console"; + + $flags = fcntl($fp, F_GETFL, 0) or + dodie "Can't get flags for the socket: $!"; + $flags = fcntl($fp, F_SETFL, $flags | O_NONBLOCK) or + dodie "Can't set flags for the socket: $!"; + + return $pid; +} + +sub close_console { + my ($fp, $pid) = @_; + + doprint "kill child process $pid\n"; + kill 2, $pid; + + print "closing!\n"; + close($fp); +} + +sub start_monitor { + if ($monitor_cnt++) { + return; + } + $monitor_fp = \*MONFD; + $monitor_pid = open_console $monitor_fp; + + return; + + open(MONFD, "Stop perl from warning about single use of MONFD"); +} + +sub end_monitor { + if (--$monitor_cnt) { + return; + } + close_console($monitor_fp, $monitor_pid); +} + +sub wait_for_monitor { + my ($time) = @_; + my $line; + + doprint "** Wait for monitor to settle down **\n"; + + # read the monitor and wait for the system to calm down + do { + $line = wait_for_input($monitor_fp, $time); + print "$line" if (defined($line)); + } while (defined($line)); + print "** Monitor flushed **\n"; +} + +sub fail { + + if ($die_on_failure) { + dodie @_; + } + + doprint "FAILED\n"; + + my $i = $iteration; + + # no need to reboot for just building. + if (!do_not_reboot) { + doprint "REBOOTING\n"; + reboot; + start_monitor; + wait_for_monitor $sleep_time; + end_monitor; + } + + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "KTEST RESULT: TEST $i Failed: ", @_, "\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + + return 1 if (!defined($store_failures)); + + my @t = localtime; + my $date = sprintf "%04d%02d%02d%02d%02d%02d", + 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0]; + + my $type = $build_type; + if ($type =~ /useconfig/) { + $type = "useconfig"; + } + + my $dir = "$machine-$test_type-$type-fail-$date"; + my $faildir = "$store_failures/$dir"; + + if (!-d $faildir) { + mkpath($faildir) or + die "can't create $faildir"; + } + if (-f "$output_config") { + cp "$output_config", "$faildir/config" or + die "failed to copy .config"; + } + if (-f $buildlog) { + cp $buildlog, "$faildir/buildlog" or + die "failed to move $buildlog"; + } + if (-f $dmesg) { + cp $dmesg, "$faildir/dmesg" or + die "failed to move $dmesg"; + } + + doprint "*** Saved info to $faildir ***\n"; + + return 1; +} + +sub run_command { + my ($command) = @_; + my $dolog = 0; + my $dord = 0; + my $pid; + + $command =~ s/\$SSH_USER/$ssh_user/g; + $command =~ s/\$MACHINE/$machine/g; + + doprint("$command ... "); + + $pid = open(CMD, "$command 2>&1 |") or + (fail "unable to exec $command" and return 0); + + if (defined($opt{"LOG_FILE"})) { + open(LOG, ">>$opt{LOG_FILE}") or + dodie "failed to write to log"; + $dolog = 1; + } + + if (defined($redirect)) { + open (RD, ">$redirect") or + dodie "failed to write to redirect $redirect"; + $dord = 1; + } + + while (<CMD>) { + print LOG if ($dolog); + print RD if ($dord); + } + + waitpid($pid, 0); + my $failed = $?; + + close(CMD); + close(LOG) if ($dolog); + close(RD) if ($dord); + + if ($failed) { + doprint "FAILED!\n"; + } else { + doprint "SUCCESS\n"; + } + + return !$failed; +} + +sub run_ssh { + my ($cmd) = @_; + my $cp_exec = $ssh_exec; + + $cp_exec =~ s/\$SSH_COMMAND/$cmd/g; + return run_command "$cp_exec"; +} + +sub run_scp { + my ($src, $dst) = @_; + my $cp_scp = $scp_to_target; + + $cp_scp =~ s/\$SRC_FILE/$src/g; + $cp_scp =~ s/\$DST_FILE/$dst/g; + + return run_command "$cp_scp"; +} + +sub get_grub_index { + + if ($reboot_type ne "grub") { + return; + } + return if (defined($grub_number)); + + doprint "Find grub menu ... "; + $grub_number = -1; + + my $ssh_grub = $ssh_exec; + $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g; + + open(IN, "$ssh_grub |") + or die "unable to get menu.lst"; + + while (<IN>) { + if (/^\s*title\s+$grub_menu\s*$/) { + $grub_number++; + last; + } elsif (/^\s*title\s/) { + $grub_number++; + } + } + close(IN); + + die "Could not find '$grub_menu' in /boot/grub/menu on $machine" + if ($grub_number < 0); + doprint "$grub_number\n"; +} + +sub wait_for_input +{ + my ($fp, $time) = @_; + my $rin; + my $ready; + my $line; + my $ch; + + if (!defined($time)) { + $time = $timeout; + } + + $rin = ''; + vec($rin, fileno($fp), 1) = 1; + $ready = select($rin, undef, undef, $time); + + $line = ""; + + # try to read one char at a time + while (sysread $fp, $ch, 1) { + $line .= $ch; + last if ($ch eq "\n"); + } + + if (!length($line)) { + return undef; + } + + return $line; +} + +sub reboot_to { + if ($reboot_type eq "grub") { + run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch; reboot)'"; + return; + } + + run_command "$reboot_script"; +} + +sub get_sha1 { + my ($commit) = @_; + + doprint "git rev-list --max-count=1 $commit ... "; + my $sha1 = `git rev-list --max-count=1 $commit`; + my $ret = $?; + + logit $sha1; + + if ($ret) { + doprint "FAILED\n"; + dodie "Failed to get git $commit"; + } + + print "SUCCESS\n"; + + chomp $sha1; + + return $sha1; +} + +sub monitor { + my $booted = 0; + my $bug = 0; + my $skip_call_trace = 0; + my $loops; + + wait_for_monitor 5; + + my $line; + my $full_line = ""; + + open(DMESG, "> $dmesg") or + die "unable to write to $dmesg"; + + reboot_to; + + my $success_start; + my $failure_start; + + for (;;) { + + if ($booted) { + $line = wait_for_input($monitor_fp, $booted_timeout); + } else { + $line = wait_for_input($monitor_fp); + } + + last if (!defined($line)); + + doprint $line; + print DMESG $line; + + # we are not guaranteed to get a full line + $full_line .= $line; + + if ($full_line =~ /$success_line/) { + $booted = 1; + $success_start = time; + } + + if ($booted && defined($stop_after_success) && + $stop_after_success >= 0) { + my $now = time; + if ($now - $success_start >= $stop_after_success) { + doprint "Test forced to stop after $stop_after_success seconds after success\n"; + last; + } + } + + if ($full_line =~ /\[ backtrace testing \]/) { + $skip_call_trace = 1; + } + + if ($full_line =~ /call trace:/i) { + if (!$skip_call_trace) { + $bug = 1; + $failure_start = time; + } + } + + if ($bug && defined($stop_after_failure) && + $stop_after_failure >= 0) { + my $now = time; + if ($now - $failure_start >= $stop_after_failure) { + doprint "Test forced to stop after $stop_after_failure seconds after failure\n"; + last; + } + } + + if ($full_line =~ /\[ end of backtrace testing \]/) { + $skip_call_trace = 0; + } + + if ($full_line =~ /Kernel panic -/) { + $bug = 1; + } + + if ($line =~ /\n/) { + $full_line = ""; + } + } + + close(DMESG); + + if ($bug) { + return 0 if ($in_bisect); + fail "failed - got a bug report" and return 0; + } + + if (!$booted) { + return 0 if ($in_bisect); + fail "failed - never got a boot prompt." and return 0; + } + + return 1; +} + +sub install { + + run_scp "$outputdir/$build_target", "$target_image" or + dodie "failed to copy image"; + + my $install_mods = 0; + + # should we process modules? + $install_mods = 0; + open(IN, "$output_config") or dodie("Can't read config file"); + while (<IN>) { + if (/CONFIG_MODULES(=y)?/) { + $install_mods = 1 if (defined($1)); + last; + } + } + close(IN); + + if (!$install_mods) { + doprint "No modules needed\n"; + return; + } + + run_command "$make INSTALL_MOD_PATH=$tmpdir modules_install" or + dodie "Failed to install modules"; + + my $modlib = "/lib/modules/$version"; + my $modtar = "ktest-mods.tar.bz2"; + + run_ssh "rm -rf $modlib" or + dodie "failed to remove old mods: $modlib"; + + # would be nice if scp -r did not follow symbolic links + run_command "cd $tmpdir && tar -cjf $modtar lib/modules/$version" or + dodie "making tarball"; + + run_scp "$tmpdir/$modtar", "/tmp" or + dodie "failed to copy modules"; + + unlink "$tmpdir/$modtar"; + + run_ssh "'(cd / && tar xf /tmp/$modtar)'" or + dodie "failed to tar modules"; + + run_ssh "rm -f /tmp/$modtar"; + + return if (!defined($post_install)); + + my $cp_post_install = $post_install; + $cp_post_install = s/\$KERNEL_VERSION/$version/g; + run_command "$cp_post_install" or + dodie "Failed to run post install"; +} + +sub check_buildlog { + my ($patch) = @_; + + my @files = `git show $patch | diffstat -l`; + + open(IN, "git show $patch |") or + dodie "failed to show $patch"; + while (<IN>) { + if (m,^--- a/(.*),) { + chomp $1; + $files[$#files] = $1; + } + } + close(IN); + + open(IN, $buildlog) or dodie "Can't open $buildlog"; + while (<IN>) { + if (/^\s*(.*?):.*(warning|error)/) { + my $err = $1; + foreach my $file (@files) { + my $fullpath = "$builddir/$file"; + if ($file eq $err || $fullpath eq $err) { + fail "$file built with warnings" and return 0; + } + } + } + } + close(IN); + + return 1; +} + +sub build { + my ($type) = @_; + my $defconfig = ""; + + unlink $buildlog; + + if ($type =~ /^useconfig:(.*)/) { + run_command "cp $1 $output_config" or + dodie "could not copy $1 to .config"; + + $type = "oldconfig"; + } + + # old config can ask questions + if ($type eq "oldconfig") { + $type = "oldnoconfig"; + + # allow for empty configs + run_command "touch $output_config"; + + run_command "mv $output_config $outputdir/config_temp" or + dodie "moving .config"; + + if (!$noclean && !run_command "$make mrproper") { + dodie "make mrproper"; + } + + run_command "mv $outputdir/config_temp $output_config" or + dodie "moving config_temp"; + + } elsif (!$noclean) { + unlink "$output_config"; + run_command "$make mrproper" or + dodie "make mrproper"; + } + + # add something to distinguish this build + open(OUT, "> $outputdir/localversion") or dodie("Can't make localversion file"); + print OUT "$localversion\n"; + close(OUT); + + if (defined($minconfig)) { + $defconfig = "KCONFIG_ALLCONFIG=$minconfig"; + } + + run_command "$defconfig $make $type" or + dodie "failed make config"; + + $redirect = "$buildlog"; + if (!run_command "$make $build_options") { + undef $redirect; + # bisect may need this to pass + return 0 if ($in_bisect); + fail "failed build" and return 0; + } + undef $redirect; + + return 1; +} + +sub halt { + if (!run_ssh "halt" or defined($power_off)) { + if (defined($poweroff_after_halt)) { + sleep $poweroff_after_halt; + run_command "$power_off"; + } + } else { + # nope? the zap it! + run_command "$power_off"; + } +} + +sub success { + my ($i) = @_; + + $successes++; + + doprint "\n\n*******************************************\n"; + doprint "*******************************************\n"; + doprint "KTEST RESULT: TEST $i SUCCESS!!!! **\n"; + doprint "*******************************************\n"; + doprint "*******************************************\n"; + + if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) { + doprint "Reboot and wait $sleep_time seconds\n"; + reboot; + start_monitor; + wait_for_monitor $sleep_time; + end_monitor; + } +} + +sub get_version { + # get the release name + doprint "$make kernelrelease ... "; + $version = `$make kernelrelease | tail -1`; + chomp($version); + doprint "$version\n"; +} + +sub child_run_test { + my $failed = 0; + + # child should have no power + $reboot_on_error = 0; + $poweroff_on_error = 0; + $die_on_failure = 1; + + run_command $run_test or $failed = 1; + exit $failed; +} + +my $child_done; + +sub child_finished { + $child_done = 1; +} + +sub do_run_test { + my $child_pid; + my $child_exit; + my $line; + my $full_line; + my $bug = 0; + + wait_for_monitor 1; + + doprint "run test $run_test\n"; + + $child_done = 0; + + $SIG{CHLD} = qw(child_finished); + + $child_pid = fork; + + child_run_test if (!$child_pid); + + $full_line = ""; + + do { + $line = wait_for_input($monitor_fp, 1); + if (defined($line)) { + + # we are not guaranteed to get a full line + $full_line .= $line; + + if ($full_line =~ /call trace:/i) { + $bug = 1; + } + + if ($full_line =~ /Kernel panic -/) { + $bug = 1; + } + + if ($line =~ /\n/) { + $full_line = ""; + } + } + } while (!$child_done && !$bug); + + if ($bug) { + doprint "Detected kernel crash!\n"; + # kill the child with extreme prejudice + kill 9, $child_pid; + } + + waitpid $child_pid, 0; + $child_exit = $?; + + if ($bug || $child_exit) { + return 0 if $in_bisect; + fail "test failed" and return 0; + } + return 1; +} + +sub run_git_bisect { + my ($command) = @_; + + doprint "$command ... "; + + my $output = `$command 2>&1`; + my $ret = $?; + + logit $output; + + if ($ret) { + doprint "FAILED\n"; + dodie "Failed to git bisect"; + } + + doprint "SUCCESS\n"; + if ($output =~ m/^(Bisecting: .*\(roughly \d+ steps?\))\s+\[([[:xdigit:]]+)\]/) { + doprint "$1 [$2]\n"; + } elsif ($output =~ m/^([[:xdigit:]]+) is the first bad commit/) { + $bisect_bad = $1; + doprint "Found bad commit... $1\n"; + return 0; + } else { + # we already logged it, just print it now. + print $output; + } + + return 1; +} + +# returns 1 on success, 0 on failure +sub run_bisect_test { + my ($type, $buildtype) = @_; + + my $failed = 0; + my $result; + my $output; + my $ret; + + $in_bisect = 1; + + build $buildtype or $failed = 1; + + if ($type ne "build") { + dodie "Failed on build" if $failed; + + # Now boot the box + get_grub_index; + get_version; + install; + + start_monitor; + monitor or $failed = 1; + + if ($type ne "boot") { + dodie "Failed on boot" if $failed; + + do_run_test or $failed = 1; + } + end_monitor; + } + + if ($failed) { + $result = 0; + + # reboot the box to a good kernel + if ($type ne "build") { + doprint "Reboot and sleep $bisect_sleep_time seconds\n"; + reboot; + start_monitor; + wait_for_monitor $bisect_sleep_time; + end_monitor; + } + } else { + $result = 1; + } + $in_bisect = 0; + + return $result; +} + +sub run_bisect { + my ($type) = @_; + my $buildtype = "oldconfig"; + + # We should have a minconfig to use? + if (defined($minconfig)) { + $buildtype = "useconfig:$minconfig"; + } + + my $ret = run_bisect_test $type, $buildtype; + + + # Are we looking for where it worked, not failed? + if ($reverse_bisect) { + $ret = !$ret; + } + + if ($ret) { + return "good"; + } else { + return "bad"; + } +} + +sub bisect { + my ($i) = @_; + + my $result; + + die "BISECT_GOOD[$i] not defined\n" if (!defined($opt{"BISECT_GOOD[$i]"})); + die "BISECT_BAD[$i] not defined\n" if (!defined($opt{"BISECT_BAD[$i]"})); + die "BISECT_TYPE[$i] not defined\n" if (!defined($opt{"BISECT_TYPE[$i]"})); + + my $good = $opt{"BISECT_GOOD[$i]"}; + my $bad = $opt{"BISECT_BAD[$i]"}; + my $type = $opt{"BISECT_TYPE[$i]"}; + my $start = $opt{"BISECT_START[$i]"}; + my $replay = $opt{"BISECT_REPLAY[$i]"}; + + # convert to true sha1's + $good = get_sha1($good); + $bad = get_sha1($bad); + + if (defined($opt{"BISECT_REVERSE[$i]"}) && + $opt{"BISECT_REVERSE[$i]"} == 1) { + doprint "Performing a reverse bisect (bad is good, good is bad!)\n"; + $reverse_bisect = 1; + } else { + $reverse_bisect = 0; + } + + # Can't have a test without having a test to run + if ($type eq "test" && !defined($run_test)) { + $type = "boot"; + } + + my $check = $opt{"BISECT_CHECK[$i]"}; + if (defined($check) && $check ne "0") { + + # get current HEAD + my $head = get_sha1("HEAD"); + + if ($check ne "good") { + doprint "TESTING BISECT BAD [$bad]\n"; + run_command "git checkout $bad" or + die "Failed to checkout $bad"; + + $result = run_bisect $type; + + if ($result ne "bad") { + fail "Tested BISECT_BAD [$bad] and it succeeded" and return 0; + } + } + + if ($check ne "bad") { + doprint "TESTING BISECT GOOD [$good]\n"; + run_command "git checkout $good" or + die "Failed to checkout $good"; + + $result = run_bisect $type; + + if ($result ne "good") { + fail "Tested BISECT_GOOD [$good] and it failed" and return 0; + } + } + + # checkout where we started + run_command "git checkout $head" or + die "Failed to checkout $head"; + } + + run_command "git bisect start" or + dodie "could not start bisect"; + + run_command "git bisect good $good" or + dodie "could not set bisect good to $good"; + + run_git_bisect "git bisect bad $bad" or + dodie "could not set bisect bad to $bad"; + + if (defined($replay)) { + run_command "git bisect replay $replay" or + dodie "failed to run replay"; + } + + if (defined($start)) { + run_command "git checkout $start" or + dodie "failed to checkout $start"; + } + + my $test; + do { + $result = run_bisect $type; + $test = run_git_bisect "git bisect $result"; + } while ($test); + + run_command "git bisect log" or + dodie "could not capture git bisect log"; + + run_command "git bisect reset" or + dodie "could not reset git bisect"; + + doprint "Bad commit was [$bisect_bad]\n"; + + success $i; +} + +my %config_ignore; +my %config_set; + +my %config_list; +my %null_config; + +my %dependency; + +sub process_config_ignore { + my ($config) = @_; + + open (IN, $config) + or dodie "Failed to read $config"; + + while (<IN>) { + if (/^(.*?(CONFIG\S*)(=.*| is not set))/) { + $config_ignore{$2} = $1; + } + } + + close(IN); +} + +sub read_current_config { + my ($config_ref) = @_; + + %{$config_ref} = (); + undef %{$config_ref}; + + my @key = keys %{$config_ref}; + if ($#key >= 0) { + print "did not delete!\n"; + exit; + } + open (IN, "$output_config"); + + while (<IN>) { + if (/^(CONFIG\S+)=(.*)/) { + ${$config_ref}{$1} = $2; + } + } + close(IN); +} + +sub get_dependencies { + my ($config) = @_; + + my $arr = $dependency{$config}; + if (!defined($arr)) { + return (); + } + + my @deps = @{$arr}; + + foreach my $dep (@{$arr}) { + print "ADD DEP $dep\n"; + @deps = (@deps, get_dependencies $dep); + } + + return @deps; +} + +sub create_config { + my @configs = @_; + + open(OUT, ">$output_config") or dodie "Can not write to $output_config"; + + foreach my $config (@configs) { + print OUT "$config_set{$config}\n"; + my @deps = get_dependencies $config; + foreach my $dep (@deps) { + print OUT "$config_set{$dep}\n"; + } + } + + foreach my $config (keys %config_ignore) { + print OUT "$config_ignore{$config}\n"; + } + close(OUT); + +# exit; + run_command "$make oldnoconfig" or + dodie "failed make config oldconfig"; + +} + +sub compare_configs { + my (%a, %b) = @_; + + foreach my $item (keys %a) { + if (!defined($b{$item})) { + print "diff $item\n"; + return 1; + } + delete $b{$item}; + } + + my @keys = keys %b; + if ($#keys) { + print "diff2 $keys[0]\n"; + } + return -1 if ($#keys >= 0); + + return 0; +} + +sub run_config_bisect_test { + my ($type) = @_; + + return run_bisect_test $type, "oldconfig"; +} + +sub process_passed { + my (%configs) = @_; + + doprint "These configs had no failure: (Enabling them for further compiles)\n"; + # Passed! All these configs are part of a good compile. + # Add them to the min options. + foreach my $config (keys %configs) { + if (defined($config_list{$config})) { + doprint " removing $config\n"; + $config_ignore{$config} = $config_list{$config}; + delete $config_list{$config}; + } + } + doprint "config copied to $outputdir/config_good\n"; + run_command "cp -f $output_config $outputdir/config_good"; +} + +sub process_failed { + my ($config) = @_; + + doprint "\n\n***************************************\n"; + doprint "Found bad config: $config\n"; + doprint "***************************************\n\n"; +} + +sub run_config_bisect { + + my @start_list = keys %config_list; + + if ($#start_list < 0) { + doprint "No more configs to test!!!\n"; + return -1; + } + + doprint "***** RUN TEST ***\n"; + my $type = $opt{"CONFIG_BISECT_TYPE[$iteration]"}; + my $ret; + my %current_config; + + my $count = $#start_list + 1; + doprint " $count configs to test\n"; + + my $half = int($#start_list / 2); + + do { + my @tophalf = @start_list[0 .. $half]; + + create_config @tophalf; + read_current_config \%current_config; + + $count = $#tophalf + 1; + doprint "Testing $count configs\n"; + my $found = 0; + # make sure we test something + foreach my $config (@tophalf) { + if (defined($current_config{$config})) { + logit " $config\n"; + $found = 1; + } + } + if (!$found) { + # try the other half + doprint "Top half produced no set configs, trying bottom half\n"; + @tophalf = @start_list[$half .. $#start_list]; + create_config @tophalf; + read_current_config \%current_config; + foreach my $config (@tophalf) { + if (defined($current_config{$config})) { + logit " $config\n"; + $found = 1; + } + } + if (!$found) { + doprint "Failed: Can't make new config with current configs\n"; + foreach my $config (@start_list) { + doprint " CONFIG: $config\n"; + } + return -1; + } + $count = $#tophalf + 1; + doprint "Testing $count configs\n"; + } + + $ret = run_config_bisect_test $type; + + if ($ret) { + process_passed %current_config; + return 0; + } + + doprint "This config had a failure.\n"; + doprint "Removing these configs that were not set in this config:\n"; + doprint "config copied to $outputdir/config_bad\n"; + run_command "cp -f $output_config $outputdir/config_bad"; + + # A config exists in this group that was bad. + foreach my $config (keys %config_list) { + if (!defined($current_config{$config})) { + doprint " removing $config\n"; + delete $config_list{$config}; + } + } + + @start_list = @tophalf; + + if ($#start_list == 0) { + process_failed $start_list[0]; + return 1; + } + + # remove half the configs we are looking at and see if + # they are good. + $half = int($#start_list / 2); + } while ($half > 0); + + # we found a single config, try it again + my @tophalf = @start_list[0 .. 0]; + + $ret = run_config_bisect_test $type; + if ($ret) { + process_passed %current_config; + return 0; + } + + process_failed $start_list[0]; + return 1; +} + +sub config_bisect { + my ($i) = @_; + + my $start_config = $opt{"CONFIG_BISECT[$i]"}; + + my $tmpconfig = "$tmpdir/use_config"; + + # Make the file with the bad config and the min config + if (defined($minconfig)) { + # read the min config for things to ignore + run_command "cp $minconfig $tmpconfig" or + dodie "failed to copy $minconfig to $tmpconfig"; + } else { + unlink $tmpconfig; + } + + # Add other configs + if (defined($addconfig)) { + run_command "cat $addconfig >> $tmpconfig" or + dodie "failed to append $addconfig"; + } + + my $defconfig = ""; + if (-f $tmpconfig) { + $defconfig = "KCONFIG_ALLCONFIG=$tmpconfig"; + process_config_ignore $tmpconfig; + } + + # now process the start config + run_command "cp $start_config $output_config" or + dodie "failed to copy $start_config to $output_config"; + + # read directly what we want to check + my %config_check; + open (IN, $output_config) + or dodie "faied to open $output_config"; + + while (<IN>) { + if (/^((CONFIG\S*)=.*)/) { + $config_check{$2} = $1; + } + } + close(IN); + + # Now run oldconfig with the minconfig (and addconfigs) + run_command "$defconfig $make oldnoconfig" or + dodie "failed make config oldconfig"; + + # check to see what we lost (or gained) + open (IN, $output_config) + or dodie "Failed to read $start_config"; + + my %removed_configs; + my %added_configs; + + while (<IN>) { + if (/^((CONFIG\S*)=.*)/) { + # save off all options + $config_set{$2} = $1; + if (defined($config_check{$2})) { + if (defined($config_ignore{$2})) { + $removed_configs{$2} = $1; + } else { + $config_list{$2} = $1; + } + } elsif (!defined($config_ignore{$2})) { + $added_configs{$2} = $1; + $config_list{$2} = $1; + } + } + } + close(IN); + + my @confs = keys %removed_configs; + if ($#confs >= 0) { + doprint "Configs overridden by default configs and removed from check:\n"; + foreach my $config (@confs) { + doprint " $config\n"; + } + } + @confs = keys %added_configs; + if ($#confs >= 0) { + doprint "Configs appearing in make oldconfig and added:\n"; + foreach my $config (@confs) { + doprint " $config\n"; + } + } + + my %config_test; + my $once = 0; + + # Sometimes kconfig does weird things. We must make sure + # that the config we autocreate has everything we need + # to test, otherwise we may miss testing configs, or + # may not be able to create a new config. + # Here we create a config with everything set. + create_config (keys %config_list); + read_current_config \%config_test; + foreach my $config (keys %config_list) { + if (!defined($config_test{$config})) { + if (!$once) { + $once = 1; + doprint "Configs not produced by kconfig (will not be checked):\n"; + } + doprint " $config\n"; + delete $config_list{$config}; + } + } + my $ret; + do { + $ret = run_config_bisect; + } while (!$ret); + + return $ret if ($ret < 0); + + success $i; +} + +sub patchcheck { + my ($i) = @_; + + die "PATCHCHECK_START[$i] not defined\n" + if (!defined($opt{"PATCHCHECK_START[$i]"})); + die "PATCHCHECK_TYPE[$i] not defined\n" + if (!defined($opt{"PATCHCHECK_TYPE[$i]"})); + + my $start = $opt{"PATCHCHECK_START[$i]"}; + + my $end = "HEAD"; + if (defined($opt{"PATCHCHECK_END[$i]"})) { + $end = $opt{"PATCHCHECK_END[$i]"}; + } + + # Get the true sha1's since we can use things like HEAD~3 + $start = get_sha1($start); + $end = get_sha1($end); + + my $type = $opt{"PATCHCHECK_TYPE[$i]"}; + + # Can't have a test without having a test to run + if ($type eq "test" && !defined($run_test)) { + $type = "boot"; + } + + open (IN, "git log --pretty=oneline $end|") or + dodie "could not get git list"; + + my @list; + + while (<IN>) { + chomp; + $list[$#list+1] = $_; + last if (/^$start/); + } + close(IN); + + if ($list[$#list] !~ /^$start/) { + fail "SHA1 $start not found"; + } + + # go backwards in the list + @list = reverse @list; + + my $save_clean = $noclean; + + $in_patchcheck = 1; + foreach my $item (@list) { + my $sha1 = $item; + $sha1 =~ s/^([[:xdigit:]]+).*/$1/; + + doprint "\nProcessing commit $item\n\n"; + + run_command "git checkout $sha1" or + die "Failed to checkout $sha1"; + + # only clean on the first and last patch + if ($item eq $list[0] || + $item eq $list[$#list]) { + $noclean = $save_clean; + } else { + $noclean = 1; + } + + if (defined($minconfig)) { + build "useconfig:$minconfig" or return 0; + } else { + # ?? no config to use? + build "oldconfig" or return 0; + } + + check_buildlog $sha1 or return 0; + + next if ($type eq "build"); + + get_grub_index; + get_version; + install; + + my $failed = 0; + + start_monitor; + monitor or $failed = 1; + + if (!$failed && $type ne "boot"){ + do_run_test or $failed = 1; + } + end_monitor; + return 0 if ($failed); + + } + $in_patchcheck = 0; + success $i; + + return 1; +} + +$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl config-file\n"; + +if ($#ARGV == 0) { + $ktest_config = $ARGV[0]; + if (! -f $ktest_config) { + print "$ktest_config does not exist.\n"; + my $ans; + for (;;) { + print "Create it? [Y/n] "; + $ans = <STDIN>; + chomp $ans; + if ($ans =~ /^\s*$/) { + $ans = "y"; + } + last if ($ans =~ /^y$/i || $ans =~ /^n$/i); + print "Please answer either 'y' or 'n'.\n"; + } + if ($ans !~ /^y$/i) { + exit 0; + } + } +} else { + $ktest_config = "ktest.conf"; +} + +if (! -f $ktest_config) { + open(OUT, ">$ktest_config") or die "Can not create $ktest_config"; + print OUT << "EOF" +# Generated by ktest.pl +# +# Define each test with TEST_START +# The config options below it will override the defaults +TEST_START + +DEFAULTS +EOF +; + close(OUT); +} +read_config $ktest_config; + +# Append any configs entered in manually to the config file. +my @new_configs = keys %entered_configs; +if ($#new_configs >= 0) { + print "\nAppending entered in configs to $ktest_config\n"; + open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config"; + foreach my $config (@new_configs) { + print OUT "$config = $entered_configs{$config}\n"; + $opt{$config} = $entered_configs{$config}; + } +} + +if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) { + unlink $opt{"LOG_FILE"}; +} + +doprint "\n\nSTARTING AUTOMATED TESTS\n\n"; + +for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) { + + if (!$i) { + doprint "DEFAULT OPTIONS:\n"; + } else { + doprint "\nTEST $i OPTIONS"; + if (defined($repeat_tests{$i})) { + $repeat = $repeat_tests{$i}; + doprint " ITERATE $repeat"; + } + doprint "\n"; + } + + foreach my $option (sort keys %opt) { + + if ($option =~ /\[(\d+)\]$/) { + next if ($i != $1); + } else { + next if ($i); + } + + doprint "$option = $opt{$option}\n"; + } +} + +sub set_test_option { + my ($name, $i) = @_; + + my $option = "$name\[$i\]"; + + if (defined($opt{$option})) { + return $opt{$option}; + } + + foreach my $test (keys %repeat_tests) { + if ($i >= $test && + $i < $test + $repeat_tests{$test}) { + $option = "$name\[$test\]"; + if (defined($opt{$option})) { + return $opt{$option}; + } + } + } + + if (defined($opt{$name})) { + return $opt{$name}; + } + + return undef; +} + +# First we need to do is the builds +for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { + + $iteration = $i; + + my $makecmd = set_test_option("MAKE_CMD", $i); + + $machine = set_test_option("MACHINE", $i); + $ssh_user = set_test_option("SSH_USER", $i); + $tmpdir = set_test_option("TMP_DIR", $i); + $outputdir = set_test_option("OUTPUT_DIR", $i); + $builddir = set_test_option("BUILD_DIR", $i); + $test_type = set_test_option("TEST_TYPE", $i); + $build_type = set_test_option("BUILD_TYPE", $i); + $build_options = set_test_option("BUILD_OPTIONS", $i); + $power_cycle = set_test_option("POWER_CYCLE", $i); + $reboot = set_test_option("REBOOT", $i); + $noclean = set_test_option("BUILD_NOCLEAN", $i); + $minconfig = set_test_option("MIN_CONFIG", $i); + $run_test = set_test_option("TEST", $i); + $addconfig = set_test_option("ADD_CONFIG", $i); + $reboot_type = set_test_option("REBOOT_TYPE", $i); + $grub_menu = set_test_option("GRUB_MENU", $i); + $post_install = set_test_option("POST_INSTALL", $i); + $reboot_script = set_test_option("REBOOT_SCRIPT", $i); + $reboot_on_error = set_test_option("REBOOT_ON_ERROR", $i); + $poweroff_on_error = set_test_option("POWEROFF_ON_ERROR", $i); + $die_on_failure = set_test_option("DIE_ON_FAILURE", $i); + $power_off = set_test_option("POWER_OFF", $i); + $powercycle_after_reboot = set_test_option("POWERCYCLE_AFTER_REBOOT", $i); + $poweroff_after_halt = set_test_option("POWEROFF_AFTER_HALT", $i); + $sleep_time = set_test_option("SLEEP_TIME", $i); + $bisect_sleep_time = set_test_option("BISECT_SLEEP_TIME", $i); + $store_failures = set_test_option("STORE_FAILURES", $i); + $timeout = set_test_option("TIMEOUT", $i); + $booted_timeout = set_test_option("BOOTED_TIMEOUT", $i); + $console = set_test_option("CONSOLE", $i); + $success_line = set_test_option("SUCCESS_LINE", $i); + $stop_after_success = set_test_option("STOP_AFTER_SUCCESS", $i); + $stop_after_failure = set_test_option("STOP_AFTER_FAILURE", $i); + $build_target = set_test_option("BUILD_TARGET", $i); + $ssh_exec = set_test_option("SSH_EXEC", $i); + $scp_to_target = set_test_option("SCP_TO_TARGET", $i); + $target_image = set_test_option("TARGET_IMAGE", $i); + $localversion = set_test_option("LOCALVERSION", $i); + + chdir $builddir || die "can't change directory to $builddir"; + + if (!-d $tmpdir) { + mkpath($tmpdir) or + die "can't create $tmpdir"; + } + + $ENV{"SSH_USER"} = $ssh_user; + $ENV{"MACHINE"} = $machine; + + $target = "$ssh_user\@$machine"; + + $buildlog = "$tmpdir/buildlog-$machine"; + $dmesg = "$tmpdir/dmesg-$machine"; + $make = "$makecmd O=$outputdir"; + $output_config = "$outputdir/.config"; + + if ($reboot_type eq "grub") { + dodie "GRUB_MENU not defined" if (!defined($grub_menu)); + } elsif (!defined($reboot_script)) { + dodie "REBOOT_SCRIPT not defined" + } + + my $run_type = $build_type; + if ($test_type eq "patchcheck") { + $run_type = $opt{"PATCHCHECK_TYPE[$i]"}; + } elsif ($test_type eq "bisect") { + $run_type = $opt{"BISECT_TYPE[$i]"}; + } elsif ($test_type eq "config_bisect") { + $run_type = $opt{"CONFIG_BISECT_TYPE[$i]"}; + } + + # mistake in config file? + if (!defined($run_type)) { + $run_type = "ERROR"; + } + + doprint "\n\n"; + doprint "RUNNING TEST $i of $opt{NUM_TESTS} with option $test_type $run_type\n\n"; + + unlink $dmesg; + unlink $buildlog; + + if (!defined($minconfig)) { + $minconfig = $addconfig; + + } elsif (defined($addconfig)) { + run_command "cat $addconfig $minconfig > $tmpdir/add_config" or + dodie "Failed to create temp config"; + $minconfig = "$tmpdir/add_config"; + } + + my $checkout = $opt{"CHECKOUT[$i]"}; + if (defined($checkout)) { + run_command "git checkout $checkout" or + die "failed to checkout $checkout"; + } + + if ($test_type eq "bisect") { + bisect $i; + next; + } elsif ($test_type eq "config_bisect") { + config_bisect $i; + next; + } elsif ($test_type eq "patchcheck") { + patchcheck $i; + next; + } + + if ($build_type ne "nobuild") { + build $build_type or next; + } + + if ($test_type ne "build") { + get_grub_index; + get_version; + install; + + my $failed = 0; + start_monitor; + monitor or $failed = 1;; + + if (!$failed && $test_type ne "boot" && defined($run_test)) { + do_run_test or $failed = 1; + } + end_monitor; + next if ($failed); + } + + success $i; +} + +if ($opt{"POWEROFF_ON_SUCCESS"}) { + halt; +} elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot) { + reboot; +} + +doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n"; + +exit 0; diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf new file mode 100644 index 0000000..3408c59 --- /dev/null +++ b/tools/testing/ktest/sample.conf @@ -0,0 +1,622 @@ +# +# Config file for ktest.pl +# +# Note, all paths must be absolute +# + +# Options set in the beginning of the file are considered to be +# default options. These options can be overriden by test specific +# options, with the following exceptions: +# +# LOG_FILE +# CLEAR_LOG +# POWEROFF_ON_SUCCESS +# REBOOT_ON_SUCCESS +# +# Test specific options are set after the label: +# +# TEST_START +# +# The options after a TEST_START label are specific to that test. +# Each TEST_START label will set up a new test. If you want to +# perform a test more than once, you can add the ITERATE label +# to it followed by the number of times you want that test +# to iterate. If the ITERATE is left off, the test will only +# be performed once. +# +# TEST_START ITERATE 10 +# +# You can skip a test by adding SKIP (before or after the ITERATE +# and number) +# +# TEST_START SKIP +# +# TEST_START SKIP ITERATE 10 +# +# TEST_START ITERATE 10 SKIP +# +# The SKIP label causes the options and the test itself to be ignored. +# This is useful to set up several different tests in one config file, and +# only enabling the ones you want to use for a current test run. +# +# You can add default options anywhere in the file as well +# with the DEFAULTS tag. This allows you to have default options +# after the test options to keep the test options at the top +# of the file. You can even place the DEFAULTS tag between +# test cases (but not in the middle of a single test case) +# +# TEST_START +# MIN_CONFIG = /home/test/config-test1 +# +# DEFAULTS +# MIN_CONFIG = /home/test/config-default +# +# TEST_START ITERATE 10 +# +# The above will run the first test with MIN_CONFIG set to +# /home/test/config-test-1. Then 10 tests will be executed +# with MIN_CONFIG with /home/test/config-default. +# +# You can also disable defaults with the SKIP option +# +# DEFAULTS SKIP +# MIN_CONFIG = /home/test/config-use-sometimes +# +# DEFAULTS +# MIN_CONFIG = /home/test/config-most-times +# +# The above will ignore the first MIN_CONFIG. If you want to +# use the first MIN_CONFIG, remove the SKIP from the first +# DEFAULTS tag and add it to the second. Be careful, options +# may only be declared once per test or default. If you have +# the same option name under the same test or as default +# ktest will fail to execute, and no tests will run. +# + + +#### Mandatory Default Options #### + +# These options must be in the default section, although most +# may be overridden by test options. + +# The machine hostname that you will test +#MACHINE = target + +# The box is expected to have ssh on normal bootup, provide the user +# (most likely root, since you need privileged operations) +#SSH_USER = root + +# The directory that contains the Linux source code +#BUILD_DIR = /home/test/linux.git + +# The directory that the objects will be built +# (can not be same as BUILD_DIR) +#OUTPUT_DIR = /home/test/build/target + +# The location of the compiled file to copy to the target +# (relative to OUTPUT_DIR) +#BUILD_TARGET = arch/x86/boot/bzImage + +# The place to put your image on the test machine +#TARGET_IMAGE = /boot/vmlinuz-test + +# A script or command to reboot the box +# +# Here is a digital loggers power switch example +#POWER_CYCLE = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin@power/outlet?5=CCL' +# +# Here is an example to reboot a virtual box on the current host +# with the name "Guest". +#POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest + +# The script or command that reads the console +# +# If you use ttywatch server, something like the following would work. +#CONSOLE = nc -d localhost 3001 +# +# For a virtual machine with guest name "Guest". +#CONSOLE = virsh console Guest + +# Required version ending to differentiate the test +# from other linux builds on the system. +#LOCALVERSION = -test + +# The grub title name for the test kernel to boot +# (Only mandatory if REBOOT_TYPE = grub) +# +# Note, ktest.pl will not update the grub menu.lst, you need to +# manually add an option for the test. ktest.pl will search +# the grub menu.lst for this option to find what kernel to +# reboot into. +# +# For example, if in the /boot/grub/menu.lst the test kernel title has: +# title Test Kernel +# kernel vmlinuz-test +#GRUB_MENU = Test Kernel + +# A script to reboot the target into the test kernel +# (Only mandatory if REBOOT_TYPE = script) +#REBOOT_SCRIPT = + +#### Optional Config Options (all have defaults) #### + +# Start a test setup. If you leave this off, all options +# will be default and the test will run once. +# This is a label and not really an option (it takes no value). +# You can append ITERATE and a number after it to iterate the +# test a number of times, or SKIP to ignore this test. +# +#TEST_START +#TEST_START ITERATE 5 +#TEST_START SKIP + +# Have the following options as default again. Used after tests +# have already been defined by TEST_START. Optionally, you can +# just define all default options before the first TEST_START +# and you do not need this option. +# +# This is a label and not really an option (it takes no value). +# You can append SKIP to this label and the options within this +# section will be ignored. +# +# DEFAULTS +# DEFAULTS SKIP + +# The default test type (default test) +# The test types may be: +# build - only build the kernel, do nothing else +# boot - build and boot the kernel +# test - build, boot and if TEST is set, run the test script +# (If TEST is not set, it defaults back to boot) +# bisect - Perform a bisect on the kernel (see BISECT_TYPE below) +# patchcheck - Do a test on a series of commits in git (see PATCHCHECK below) +#TEST_TYPE = test + +# Test to run if there is a successful boot and TEST_TYPE is test. +# Must exit with 0 on success and non zero on error +# default (undefined) +#TEST = ssh user@machine /root/run_test + +# The build type is any make config type or special command +# (default randconfig) +# nobuild - skip the clean and build step +# useconfig:/path/to/config - use the given config and run +# oldconfig on it. +# This option is ignored if TEST_TYPE is patchcheck or bisect +#BUILD_TYPE = randconfig + +# The make command (default make) +# If you are building a 32bit x86 on a 64 bit host +#MAKE_CMD = CC=i386-gcc AS=i386-as make ARCH=i386 + +# Any build options for the make of the kernel (not for other makes, like configs) +# (default "") +#BUILD_OPTIONS = -j20 + +# If you need an initrd, you can add a script or code here to install +# it. The environment variable KERNEL_VERSION will be set to the +# kernel version that is used. Remember to add the initrd line +# to your grub menu.lst file. +# +# Here's a couple of examples to use: +#POST_INSTALL = ssh user@target /sbin/mkinitrd --allow-missing -f /boot/initramfs-test.img $KERNEL_VERSION +# +# or on some systems: +#POST_INSTALL = ssh user@target /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION + +# Way to reboot the box to the test kernel. +# Only valid options so far are "grub" and "script" +# (default grub) +# If you specify grub, it will assume grub version 1 +# and will search in /boot/grub/menu.lst for the title $GRUB_MENU +# and select that target to reboot to the kernel. If this is not +# your setup, then specify "script" and have a command or script +# specified in REBOOT_SCRIPT to boot to the target. +# +# The entry in /boot/grub/menu.lst must be entered in manually. +# The test will not modify that file. +#REBOOT_TYPE = grub + +# The min config that is needed to build for the machine +# A nice way to create this is with the following: +# +# $ ssh target +# $ lsmod > mymods +# $ scp mymods host:/tmp +# $ exit +# $ cd linux.git +# $ rm .config +# $ make LSMOD=mymods localyesconfig +# $ grep '^CONFIG' .config > /home/test/config-min +# +# If you want even less configs: +# +# log in directly to target (do not ssh) +# +# $ su +# # lsmod | cut -d' ' -f1 | xargs rmmod +# +# repeat the above several times +# +# # lsmod > mymods +# # reboot +# +# May need to reboot to get your network back to copy the mymods +# to the host, and then remove the previous .config and run the +# localyesconfig again. The CONFIG_MIN generated like this will +# not guarantee network activity to the box so the TEST_TYPE of +# test may fail. +# +# You might also want to set: +# CONFIG_CMDLINE="<your options here>" +# randconfig may set the above and override your real command +# line options. +# (default undefined) +#MIN_CONFIG = /home/test/config-min + +# Sometimes there's options that just break the boot and +# you do not care about. Here are a few: +# # CONFIG_STAGING is not set +# Staging drivers are horrible, and can break the build. +# # CONFIG_SCSI_DEBUG is not set +# SCSI_DEBUG may change your root partition +# # CONFIG_KGDB_SERIAL_CONSOLE is not set +# KGDB may cause oops waiting for a connection that's not there. +# This option points to the file containing config options that will be prepended +# to the MIN_CONFIG (or be the MIN_CONFIG if it is not set) +# +# Note, config options in MIN_CONFIG will override these options. +# +# (default undefined) +#ADD_CONFIG = /home/test/config-broken + +# The location on the host where to write temp files +# (default /tmp/ktest) +#TMP_DIR = /tmp/ktest + +# Optional log file to write the status (recommended) +# Note, this is a DEFAULT section only option. +# (default undefined) +#LOG_FILE = /home/test/logfiles/target.log + +# Remove old logfile if it exists before starting all tests. +# Note, this is a DEFAULT section only option. +# (default 0) +#CLEAR_LOG = 0 + +# Line to define a successful boot up in console output. +# This is what the line contains, not the entire line. If you need +# the entire line to match, then use regural expression syntax like: +# (do not add any quotes around it) +# +# SUCCESS_LINE = ^MyBox Login:$ +# +# (default "login:") +#SUCCESS_LINE = login: + +# In case the console constantly fills the screen, having +# a specified time to stop the test after success is recommended. +# (in seconds) +# (default 10) +#STOP_AFTER_SUCCESS = 10 + +# In case the console constantly fills the screen, having +# a specified time to stop the test after failure is recommended. +# (in seconds) +# (default 60) +#STOP_AFTER_FAILURE = 60 + +# Stop testing if a build fails. If set, the script will end if +# a failure is detected, otherwise it will save off the .config, +# dmesg and bootlog in a directory called +# MACHINE-TEST_TYPE_BUILD_TYPE-fail-yyyymmddhhmmss +# if the STORE_FAILURES directory is set. +# (default 1) +# Note, even if this is set to zero, there are some errors that still +# stop the tests. +#DIE_ON_FAILURE = 1 + +# Directory to store failure directories on failure. If this is not +# set, DIE_ON_FAILURE=0 will not save off the .config, dmesg and +# bootlog. This option is ignored if DIE_ON_FAILURE is not set. +# (default undefined) +#STORE_FAILURES = /home/test/failures + +# Build without doing a make mrproper, or removing .config +# (default 0) +#BUILD_NOCLEAN = 0 + +# As the test reads the console, after it hits the SUCCESS_LINE +# the time it waits for the monitor to settle down between reads +# can usually be lowered. +# (in seconds) (default 1) +#BOOTED_TIMEOUT = 1 + +# The timeout in seconds when we consider the box hung after +# the console stop producing output. Be sure to leave enough +# time here to get pass a reboot. Some machines may not produce +# any console output for a long time during a reboot. You do +# not want the test to fail just because the system was in +# the process of rebooting to the test kernel. +# (default 120) +#TIMEOUT = 120 + +# In between tests, a reboot of the box may occur, and this +# is the time to wait for the console after it stops producing +# output. Some machines may not produce a large lag on reboot +# so this should accommodate it. +# The difference between this and TIMEOUT, is that TIMEOUT happens +# when rebooting to the test kernel. This sleep time happens +# after a test has completed and we are about to start running +# another test. If a reboot to the reliable kernel happens, +# we wait SLEEP_TIME for the console to stop producing output +# before starting the next test. +# (default 60) +#SLEEP_TIME = 60 + +# The time in between bisects to sleep (in seconds) +# (default 60) +#BISECT_SLEEP_TIME = 60 + +# Reboot the target box on error (default 0) +#REBOOT_ON_ERROR = 0 + +# Power off the target on error (ignored if REBOOT_ON_ERROR is set) +# Note, this is a DEFAULT section only option. +# (default 0) +#POWEROFF_ON_ERROR = 0 + +# Power off the target after all tests have completed successfully +# Note, this is a DEFAULT section only option. +# (default 0) +#POWEROFF_ON_SUCCESS = 0 + +# Reboot the target after all test completed successfully (default 1) +# (ignored if POWEROFF_ON_SUCCESS is set) +#REBOOT_ON_SUCCESS = 1 + +# In case there are isses with rebooting, you can specify this +# to always powercycle after this amount of time after calling +# reboot. +# Note, POWERCYCLE_AFTER_REBOOT = 0 does NOT disable it. It just +# makes it powercycle immediately after rebooting. Do not define +# it if you do not want it. +# (default undefined) +#POWERCYCLE_AFTER_REBOOT = 5 + +# In case there's isses with halting, you can specify this +# to always poweroff after this amount of time after calling +# halt. +# Note, POWEROFF_AFTER_HALT = 0 does NOT disable it. It just +# makes it poweroff immediately after halting. Do not define +# it if you do not want it. +# (default undefined) +#POWEROFF_AFTER_HALT = 20 + +# A script or command to power off the box (default undefined) +# Needed for POWEROFF_ON_ERROR and SUCCESS +# +# Example for digital loggers power switch: +#POWER_OFF = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin@power/outlet?5=OFF' +# +# Example for a virtual guest call "Guest". +#POWER_OFF = virsh destroy Guest + +# The way to execute a command on the target +# (default ssh $SSH_USER@$MACHINE $SSH_COMMAND";) +# The variables SSH_USER, MACHINE and SSH_COMMAND are defined +#SSH_EXEC = ssh $SSH_USER@$MACHINE $SSH_COMMAND"; + +# The way to copy a file to the target +# (default scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE) +# The variables SSH_USER, MACHINE, SRC_FILE and DST_FILE are defined. +#SCP_TO_TARGET = scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE + +# The nice way to reboot the target +# (default ssh $SSH_USER@$MACHINE reboot) +# The variables SSH_USER and MACHINE are defined. +#REBOOT = ssh $SSH_USER@$MACHINE reboot + +#### Per test run options #### +# The following options are only allowed in TEST_START sections. +# They are ignored in the DEFAULTS sections. +# +# All of these are optional and undefined by default, although +# some of these options are required for TEST_TYPE of patchcheck +# and bisect. +# +# +# CHECKOUT = branch +# +# If the BUILD_DIR is a git repository, then you can set this option +# to checkout the given branch before running the TEST. If you +# specify this for the first run, that branch will be used for +# all preceding tests until a new CHECKOUT is set. +# +# +# +# For TEST_TYPE = patchcheck +# +# This expects the BUILD_DIR to be a git repository, and +# will checkout the PATCHCHECK_START commit. +# +# The option BUILD_TYPE will be ignored. +# +# The MIN_CONFIG will be used for all builds of the patchcheck. The build type +# used for patchcheck is oldconfig. +# +# PATCHCHECK_START is required and is the first patch to +# test (the SHA1 of the commit). You may also specify anything +# that git checkout allows (branch name, tage, HEAD~3). +# +# PATCHCHECK_END is the last patch to check (default HEAD) +# +# PATCHCHECK_TYPE is required and is the type of test to run: +# build, boot, test. +# +# Note, the build test will look for warnings, if a warning occurred +# in a file that a commit touches, the build will fail. +# +# If BUILD_NOCLEAN is set, then make mrproper will not be run on +# any of the builds, just like all other TEST_TYPE tests. But +# what makes patchcheck different from the other tests, is if +# BUILD_NOCLEAN is not set, only the first and last patch run +# make mrproper. This helps speed up the test. +# +# Example: +# TEST_START +# TEST_TYPE = patchcheck +# CHECKOUT = mybranch +# PATCHCHECK_TYPE = boot +# PATCHCHECK_START = 747e94ae3d1b4c9bf5380e569f614eb9040b79e7 +# PATCHCHECK_END = HEAD~2 +# +# +# +# For TEST_TYPE = bisect +# +# You can specify a git bisect if the BUILD_DIR is a git repository. +# The MIN_CONFIG will be used for all builds of the bisect. The build type +# used for bisecting is oldconfig. +# +# The option BUILD_TYPE will be ignored. +# +# BISECT_TYPE is the type of test to perform: +# build - bad fails to build +# boot - bad builds but fails to boot +# test - bad boots but fails a test +# +# BISECT_GOOD is the commit (SHA1) to label as good (accepts all git good commit types) +# BISECT_BAD is the commit to label as bad (accepts all git bad commit types) +# +# The above three options are required for a bisect operation. +# +# BISECT_REPLAY = /path/to/replay/file (optional, default undefined) +# +# If an operation failed in the bisect that was not expected to +# fail. Then the test ends. The state of the BUILD_DIR will be +# left off at where the failure occurred. You can examine the +# reason for the failure, and perhaps even find a git commit +# that would work to continue with. You can run: +# +# git bisect log > /path/to/replay/file +# +# The adding: +# +# BISECT_REPLAY= /path/to/replay/file +# +# And running the test again. The test will perform the initial +# git bisect start, git bisect good, and git bisect bad, and +# then it will run git bisect replay on this file, before +# continuing with the bisect. +# +# BISECT_START = commit (optional, default undefined) +# +# As with BISECT_REPLAY, if the test failed on a commit that +# just happen to have a bad commit in the middle of the bisect, +# and you need to skip it. If BISECT_START is defined, it +# will checkout that commit after doing the initial git bisect start, +# git bisect good, git bisect bad, and running the git bisect replay +# if the BISECT_REPLAY is set. +# +# BISECT_REVERSE = 1 (optional, default 0) +# +# In those strange instances where it was broken forever +# and you are trying to find where it started to work! +# Set BISECT_GOOD to the commit that was last known to fail +# Set BISECT_BAD to the commit that is known to start working. +# With BISECT_REVERSE = 1, The test will consider failures as +# good, and success as bad. +# +# BISECT_CHECK = 1 (optional, default 0) +# +# Just to be sure the good is good and bad is bad, setting +# BISECT_CHECK to 1 will start the bisect by first checking +# out BISECT_BAD and makes sure it fails, then it will check +# out BISECT_GOOD and makes sure it succeeds before starting +# the bisect (it works for BISECT_REVERSE too). +# +# You can limit the test to just check BISECT_GOOD or +# BISECT_BAD with BISECT_CHECK = good or +# BISECT_CHECK = bad, respectively. +# +# Example: +# TEST_START +# TEST_TYPE = bisect +# BISECT_GOOD = v2.6.36 +# BISECT_BAD = b5153163ed580e00c67bdfecb02b2e3843817b3e +# BISECT_TYPE = build +# MIN_CONFIG = /home/test/config-bisect +# +# +# +# For TEST_TYPE = config_bisect +# +# In those cases that you have two different configs. One of them +# work, the other does not, and you do not know what config causes +# the problem. +# The TEST_TYPE config_bisect will bisect the bad config looking for +# what config causes the failure. +# +# The way it works is this: +# +# First it finds a config to work with. Since a different version, or +# MIN_CONFIG may cause different dependecies, it must run through this +# preparation. +# +# Overwrites any config set in the bad config with a config set in +# either the MIN_CONFIG or ADD_CONFIG. Thus, make sure these configs +# are minimal and do not disable configs you want to test: +# (ie. # CONFIG_FOO is not set). +# +# An oldconfig is run on the bad config and any new config that +# appears will be added to the configs to test. +# +# Finally, it generates a config with the above result and runs it +# again through make oldconfig to produce a config that should be +# satisfied by kconfig. +# +# Then it starts the bisect. +# +# The configs to test are cut in half. If all the configs in this +# half depend on a config in the other half, then the other half +# is tested instead. If no configs are enabled by either half, then +# this means a circular dependency exists and the test fails. +# +# A config is created with the test half, and the bisect test is run. +# +# If the bisect succeeds, then all configs in the generated config +# are removed from the configs to test and added to the configs that +# will be enabled for all builds (they will be enabled, but not be part +# of the configs to examine). +# +# If the bisect fails, then all test configs that were not enabled by +# the config file are removed from the test. These configs will not +# be enabled in future tests. Since current config failed, we consider +# this to be a subset of the config that we started with. +# +# When we are down to one config, it is considered the bad config. +# +# Note, the config chosen may not be the true bad config. Due to +# dependencies and selections of the kbuild system, mulitple +# configs may be needed to cause a failure. If you disable the +# config that was found and restart the test, if the test fails +# again, it is recommended to rerun the config_bisect with a new +# bad config without the found config enabled. +# +# The option BUILD_TYPE will be ignored. +# +# CONFIG_BISECT_TYPE is the type of test to perform: +# build - bad fails to build +# boot - bad builds but fails to boot +# test - bad boots but fails a test +# +# CONFIG_BISECT is the config that failed to boot +# +# Example: +# TEST_START +# TEST_TYPE = config_bisect +# CONFIG_BISECT_TYPE = build +# CONFIG_BISECT = /home/test/˘onfig-bad +# MIN_CONFIG = /home/test/config-min +# diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile new file mode 100644 index 0000000..d1d442e --- /dev/null +++ b/tools/virtio/Makefile @@ -0,0 +1,12 @@ +all: test mod +test: virtio_test +virtio_test: virtio_ring.o virtio_test.o +CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -MMD +vpath %.c ../../drivers/virtio +mod: + ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test +.PHONY: all test mod clean +clean: + ${RM} *.o vhost_test/*.o vhost_test/.*.cmd \ + vhost_test/Module.symvers vhost_test/modules.order *.d +-include *.d diff --git a/tools/virtio/linux/device.h b/tools/virtio/linux/device.h new file mode 100644 index 0000000..4ad7e1d --- /dev/null +++ b/tools/virtio/linux/device.h @@ -0,0 +1,2 @@ +#ifndef LINUX_DEVICE_H +#endif diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h new file mode 100644 index 0000000..81baeac --- /dev/null +++ b/tools/virtio/linux/slab.h @@ -0,0 +1,2 @@ +#ifndef LINUX_SLAB_H +#endif diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h new file mode 100644 index 0000000..669bcdd --- /dev/null +++ b/tools/virtio/linux/virtio.h @@ -0,0 +1,223 @@ +#ifndef LINUX_VIRTIO_H +#define LINUX_VIRTIO_H + +#include <stdbool.h> +#include <stdlib.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include <linux/types.h> +#include <errno.h> + +typedef unsigned long long dma_addr_t; + +struct scatterlist { + unsigned long page_link; + unsigned int offset; + unsigned int length; + dma_addr_t dma_address; +}; + +struct page { + unsigned long long dummy; +}; + +#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) + +/* Physical == Virtual */ +#define virt_to_phys(p) ((unsigned long)p) +#define phys_to_virt(a) ((void *)(unsigned long)(a)) +/* Page address: Virtual / 4K */ +#define virt_to_page(p) ((struct page*)((virt_to_phys(p) / 4096) * \ + sizeof(struct page))) +#define offset_in_page(p) (((unsigned long)p) % 4096) +#define sg_phys(sg) ((sg->page_link & ~0x3) / sizeof(struct page) * 4096 + \ + sg->offset) +static inline void sg_mark_end(struct scatterlist *sg) +{ + /* + * Set termination bit, clear potential chain bit + */ + sg->page_link |= 0x02; + sg->page_link &= ~0x01; +} +static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) +{ + memset(sgl, 0, sizeof(*sgl) * nents); + sg_mark_end(&sgl[nents - 1]); +} +static inline void sg_assign_page(struct scatterlist *sg, struct page *page) +{ + unsigned long page_link = sg->page_link & 0x3; + + /* + * In order for the low bit stealing approach to work, pages + * must be aligned at a 32-bit boundary as a minimum. + */ + BUG_ON((unsigned long) page & 0x03); + sg->page_link = page_link | (unsigned long) page; +} + +static inline void sg_set_page(struct scatterlist *sg, struct page *page, + unsigned int len, unsigned int offset) +{ + sg_assign_page(sg, page); + sg->offset = offset; + sg->length = len; +} + +static inline void sg_set_buf(struct scatterlist *sg, const void *buf, + unsigned int buflen) +{ + sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); +} + +static inline void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) +{ + sg_init_table(sg, 1); + sg_set_buf(sg, buf, buflen); +} + +typedef __u16 u16; + +typedef enum { + GFP_KERNEL, + GFP_ATOMIC, +} gfp_t; +typedef enum { + IRQ_NONE, + IRQ_HANDLED +} irqreturn_t; + +static inline void *kmalloc(size_t s, gfp_t gfp) +{ + return malloc(s); +} + +static inline void kfree(void *p) +{ + free(p); +} + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +#define uninitialized_var(x) x = x + +# ifndef likely +# define likely(x) (__builtin_expect(!!(x), 1)) +# endif +# ifndef unlikely +# define unlikely(x) (__builtin_expect(!!(x), 0)) +# endif + +#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#ifdef DEBUG +#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#else +#define pr_debug(format, ...) do {} while (0) +#endif +#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) + +/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */ +#define list_add_tail(a, b) do {} while (0) +#define list_del(a) do {} while (0) + +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITS_PER_BYTE 8 +#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE) +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +/* TODO: Not atomic as it should be: + * we don't use this for anything important. */ +static inline void clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + *p &= ~mask; +} + +static inline int test_bit(int nr, const volatile unsigned long *addr) +{ + return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +/* The only feature we care to support */ +#define virtio_has_feature(dev, feature) \ + test_bit((feature), (dev)->features) +/* end of stubs */ + +struct virtio_device { + void *dev; + unsigned long features[1]; +}; + +struct virtqueue { + /* TODO: commented as list macros are empty stubs for now. + * Broken but enough for virtio_ring.c + * struct list_head list; */ + void (*callback)(struct virtqueue *vq); + const char *name; + struct virtio_device *vdev; + void *priv; +}; + +#define EXPORT_SYMBOL_GPL(__EXPORT_SYMBOL_GPL_name) \ + void __EXPORT_SYMBOL_GPL##__EXPORT_SYMBOL_GPL_name() { \ +} +#define MODULE_LICENSE(__MODULE_LICENSE_value) \ + const char *__MODULE_LICENSE_name = __MODULE_LICENSE_value + +#define CONFIG_SMP + +#if defined(__i386__) || defined(__x86_64__) +#define barrier() asm volatile("" ::: "memory") +#define mb() __sync_synchronize() + +#define smp_mb() mb() +# define smp_rmb() barrier() +# define smp_wmb() barrier() +#else +#error Please fill in barrier macros +#endif + +/* Interfaces exported by virtio_ring. */ +int virtqueue_add_buf_gfp(struct virtqueue *vq, + struct scatterlist sg[], + unsigned int out_num, + unsigned int in_num, + void *data, + gfp_t gfp); + +static inline int virtqueue_add_buf(struct virtqueue *vq, + struct scatterlist sg[], + unsigned int out_num, + unsigned int in_num, + void *data) +{ + return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC); +} + +void virtqueue_kick(struct virtqueue *vq); + +void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); + +void virtqueue_disable_cb(struct virtqueue *vq); + +bool virtqueue_enable_cb(struct virtqueue *vq); + +void *virtqueue_detach_unused_buf(struct virtqueue *vq); +struct virtqueue *vring_new_virtqueue(unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + void *pages, + void (*notify)(struct virtqueue *vq), + void (*callback)(struct virtqueue *vq), + const char *name); +void vring_del_virtqueue(struct virtqueue *vq); + +#endif diff --git a/tools/virtio/vhost_test/Makefile b/tools/virtio/vhost_test/Makefile new file mode 100644 index 0000000..a1d35b8 --- /dev/null +++ b/tools/virtio/vhost_test/Makefile @@ -0,0 +1,2 @@ +obj-m += vhost_test.o +EXTRA_CFLAGS += -Idrivers/vhost diff --git a/tools/virtio/vhost_test/vhost_test.c b/tools/virtio/vhost_test/vhost_test.c new file mode 100644 index 0000000..1873518 --- /dev/null +++ b/tools/virtio/vhost_test/vhost_test.c @@ -0,0 +1 @@ +#include "test.c" diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c new file mode 100644 index 0000000..df0c6d2 --- /dev/null +++ b/tools/virtio/virtio_test.c @@ -0,0 +1,248 @@ +#define _GNU_SOURCE +#include <getopt.h> +#include <string.h> +#include <poll.h> +#include <sys/eventfd.h> +#include <stdlib.h> +#include <assert.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <fcntl.h> +#include <linux/vhost.h> +#include <linux/virtio.h> +#include <linux/virtio_ring.h> +#include "../../drivers/vhost/test.h" + +struct vq_info { + int kick; + int call; + int num; + int idx; + void *ring; + /* copy used for control */ + struct vring vring; + struct virtqueue *vq; +}; + +struct vdev_info { + struct virtio_device vdev; + int control; + struct pollfd fds[1]; + struct vq_info vqs[1]; + int nvqs; + void *buf; + size_t buf_size; + struct vhost_memory *mem; +}; + +void vq_notify(struct virtqueue *vq) +{ + struct vq_info *info = vq->priv; + unsigned long long v = 1; + int r; + r = write(info->kick, &v, sizeof v); + assert(r == sizeof v); +} + +void vq_callback(struct virtqueue *vq) +{ +} + + +void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info) +{ + struct vhost_vring_state state = { .index = info->idx }; + struct vhost_vring_file file = { .index = info->idx }; + unsigned long long features = dev->vdev.features[0]; + struct vhost_vring_addr addr = { + .index = info->idx, + .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc, + .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail, + .used_user_addr = (uint64_t)(unsigned long)info->vring.used, + }; + int r; + r = ioctl(dev->control, VHOST_SET_FEATURES, &features); + assert(r >= 0); + state.num = info->vring.num; + r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state); + assert(r >= 0); + state.num = 0; + r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state); + assert(r >= 0); + r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr); + assert(r >= 0); + file.fd = info->kick; + r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file); + assert(r >= 0); + file.fd = info->call; + r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file); + assert(r >= 0); +} + +static void vq_info_add(struct vdev_info *dev, int num) +{ + struct vq_info *info = &dev->vqs[dev->nvqs]; + int r; + info->idx = dev->nvqs; + info->kick = eventfd(0, EFD_NONBLOCK); + info->call = eventfd(0, EFD_NONBLOCK); + r = posix_memalign(&info->ring, 4096, vring_size(num, 4096)); + assert(r >= 0); + memset(info->ring, 0, vring_size(num, 4096)); + vring_init(&info->vring, num, info->ring, 4096); + info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring, + vq_notify, vq_callback, "test"); + assert(info->vq); + info->vq->priv = info; + vhost_vq_setup(dev, info); + dev->fds[info->idx].fd = info->call; + dev->fds[info->idx].events = POLLIN; + dev->nvqs++; +} + +static void vdev_info_init(struct vdev_info* dev, unsigned long long features) +{ + int r; + memset(dev, 0, sizeof *dev); + dev->vdev.features[0] = features; + dev->vdev.features[1] = features >> 32; + dev->buf_size = 1024; + dev->buf = malloc(dev->buf_size); + assert(dev->buf); + dev->control = open("/dev/vhost-test", O_RDWR); + assert(dev->control >= 0); + r = ioctl(dev->control, VHOST_SET_OWNER, NULL); + assert(r >= 0); + dev->mem = malloc(offsetof(struct vhost_memory, regions) + + sizeof dev->mem->regions[0]); + assert(dev->mem); + memset(dev->mem, 0, offsetof(struct vhost_memory, regions) + + sizeof dev->mem->regions[0]); + dev->mem->nregions = 1; + dev->mem->regions[0].guest_phys_addr = (long)dev->buf; + dev->mem->regions[0].userspace_addr = (long)dev->buf; + dev->mem->regions[0].memory_size = dev->buf_size; + r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem); + assert(r >= 0); +} + +/* TODO: this is pretty bad: we get a cache line bounce + * for the wait queue on poll and another one on read, + * plus the read which is there just to clear the + * current state. */ +static void wait_for_interrupt(struct vdev_info *dev) +{ + int i; + unsigned long long val; + poll(dev->fds, dev->nvqs, -1); + for (i = 0; i < dev->nvqs; ++i) + if (dev->fds[i].revents & POLLIN) { + read(dev->fds[i].fd, &val, sizeof val); + } +} + +static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) +{ + struct scatterlist sl; + long started = 0, completed = 0; + long completed_before; + int r, test = 1; + unsigned len; + long long spurious = 0; + r = ioctl(dev->control, VHOST_TEST_RUN, &test); + assert(r >= 0); + for (;;) { + virtqueue_disable_cb(vq->vq); + completed_before = completed; + do { + if (started < bufs) { + sg_init_one(&sl, dev->buf, dev->buf_size); + r = virtqueue_add_buf(vq->vq, &sl, 1, 0, + dev->buf + started); + if (likely(r >= 0)) { + ++started; + virtqueue_kick(vq->vq); + } + } else + r = -1; + + /* Flush out completed bufs if any */ + if (virtqueue_get_buf(vq->vq, &len)) { + ++completed; + r = 0; + } + + } while (r >= 0); + if (completed == completed_before) + ++spurious; + assert(completed <= bufs); + assert(started <= bufs); + if (completed == bufs) + break; + if (virtqueue_enable_cb(vq->vq)) { + wait_for_interrupt(dev); + } + } + test = 0; + r = ioctl(dev->control, VHOST_TEST_RUN, &test); + assert(r >= 0); + fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious); +} + +const char optstring[] = "h"; +const struct option longopts[] = { + { + .name = "help", + .val = 'h', + }, + { + .name = "indirect", + .val = 'I', + }, + { + .name = "no-indirect", + .val = 'i', + }, + { + } +}; + +static void help() +{ + fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n"); +} + +int main(int argc, char **argv) +{ + struct vdev_info dev; + unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC; + int o; + + for (;;) { + o = getopt_long(argc, argv, optstring, longopts, NULL); + switch (o) { + case -1: + goto done; + case '?': + help(); + exit(2); + case 'h': + help(); + goto done; + case 'i': + features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); + break; + default: + assert(0); + break; + } + } + +done: + vdev_info_init(&dev, features); + vq_info_add(&dev, 256); + run_test(&dev, &dev.vqs[0], 0x100000); + return 0; +} |