diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/hv/hv_kvp_daemon.c | 40 | ||||
-rw-r--r-- | tools/power/cpupower/.gitignore | 7 | ||||
-rw-r--r-- | tools/power/cpupower/Makefile | 3 | ||||
-rw-r--r-- | tools/power/cpupower/debug/i386/Makefile | 5 | ||||
-rw-r--r-- | tools/power/cpupower/man/cpupower-monitor.1 | 15 | ||||
-rw-r--r-- | tools/power/cpupower/utils/helpers/cpuid.c | 2 | ||||
-rw-r--r-- | tools/power/cpupower/utils/helpers/helpers.h | 18 | ||||
-rw-r--r-- | tools/power/cpupower/utils/helpers/sysfs.c | 19 | ||||
-rw-r--r-- | tools/power/cpupower/utils/helpers/topology.c | 53 | ||||
-rw-r--r-- | tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c | 21 | ||||
-rw-r--r-- | tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h | 17 | ||||
-rw-r--r-- | tools/power/cpupower/utils/idle_monitor/snb_idle.c | 10 | ||||
-rw-r--r-- | tools/testing/selftests/vm/Makefile | 4 | ||||
-rw-r--r-- | tools/testing/selftests/vm/thuge-gen.c | 254 |
14 files changed, 393 insertions, 75 deletions
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 5959aff..d25a469 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -43,6 +43,7 @@ #include <sys/stat.h> #include <fcntl.h> #include <dirent.h> +#include <net/if.h> /* * KVP protocol: The user mode component first registers with the @@ -88,6 +89,7 @@ static char *os_major = ""; static char *os_minor = ""; static char *processor_arch; static char *os_build; +static char *os_version; static char *lic_version = "Unknown version"; static struct utsname uts_buf; @@ -297,7 +299,7 @@ static int kvp_file_init(void) return 0; } -static int kvp_key_delete(int pool, __u8 *key, int key_size) +static int kvp_key_delete(int pool, const char *key, int key_size) { int i; int j, k; @@ -340,7 +342,7 @@ static int kvp_key_delete(int pool, __u8 *key, int key_size) return 1; } -static int kvp_key_add_or_modify(int pool, __u8 *key, int key_size, __u8 *value, +static int kvp_key_add_or_modify(int pool, const char *key, int key_size, const char *value, int value_size) { int i; @@ -394,7 +396,7 @@ static int kvp_key_add_or_modify(int pool, __u8 *key, int key_size, __u8 *value, return 0; } -static int kvp_get_value(int pool, __u8 *key, int key_size, __u8 *value, +static int kvp_get_value(int pool, const char *key, int key_size, char *value, int value_size) { int i; @@ -426,8 +428,8 @@ static int kvp_get_value(int pool, __u8 *key, int key_size, __u8 *value, return 1; } -static int kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size, - __u8 *value, int value_size) +static int kvp_pool_enumerate(int pool, int index, char *key, int key_size, + char *value, int value_size) { struct kvp_record *record; @@ -453,7 +455,9 @@ void kvp_get_os_info(void) char *p, buf[512]; uname(&uts_buf); - os_build = uts_buf.release; + os_version = uts_buf.release; + os_build = strdup(uts_buf.release); + os_name = uts_buf.sysname; processor_arch = uts_buf.machine; @@ -462,7 +466,7 @@ void kvp_get_os_info(void) * string to be of the form: x.y.z * Strip additional information we may have. */ - p = strchr(os_build, '-'); + p = strchr(os_version, '-'); if (p) *p = '\0'; @@ -879,7 +883,7 @@ static int kvp_process_ip_address(void *addrp, addr_length = INET6_ADDRSTRLEN; } - if ((length - *offset) < addr_length + 1) + if ((length - *offset) < addr_length + 2) return HV_E_FAIL; if (str == NULL) { strcpy(buffer, "inet_ntop failed\n"); @@ -887,11 +891,13 @@ static int kvp_process_ip_address(void *addrp, } if (*offset == 0) strcpy(buffer, tmp); - else + else { + strcat(buffer, ";"); strcat(buffer, tmp); - strcat(buffer, ";"); + } *offset += strlen(str) + 1; + return 0; } @@ -953,7 +959,9 @@ kvp_get_ip_info(int family, char *if_name, int op, * supported address families; if not we gather info on * the specified address family. */ - if ((family != 0) && (curp->ifa_addr->sa_family != family)) { + if ((((family != 0) && + (curp->ifa_addr->sa_family != family))) || + (curp->ifa_flags & IFF_LOOPBACK)) { curp = curp->ifa_next; continue; } @@ -1478,13 +1486,19 @@ int main(void) len = recvfrom(fd, kvp_recv_buffer, sizeof(kvp_recv_buffer), 0, addr_p, &addr_l); - if (len < 0 || addr.nl_pid) { + if (len < 0) { syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s", addr.nl_pid, errno, strerror(errno)); close(fd); return -1; } + if (addr.nl_pid) { + syslog(LOG_WARNING, "Received packet from untrusted pid:%u", + addr.nl_pid); + continue; + } + incoming_msg = (struct nlmsghdr *)kvp_recv_buffer; incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg); hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data; @@ -1649,7 +1663,7 @@ int main(void) strcpy(key_name, "OSMinorVersion"); break; case OSVersion: - strcpy(key_value, os_build); + strcpy(key_value, os_version); strcpy(key_name, "OSVersion"); break; case ProcessorArchitecture: diff --git a/tools/power/cpupower/.gitignore b/tools/power/cpupower/.gitignore index 8a83dd2..d42073f 100644 --- a/tools/power/cpupower/.gitignore +++ b/tools/power/cpupower/.gitignore @@ -20,3 +20,10 @@ utils/cpufreq-set.o utils/cpufreq-aperf.o cpupower bench/cpufreq-bench +debug/kernel/Module.symvers +debug/i386/centrino-decode +debug/i386/dump_psb +debug/i386/intel_gsic +debug/i386/powernow-k8-decode +debug/x86_64/centrino-decode +debug/x86_64/powernow-k8-decode diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index cf397bd..d875a74 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -253,7 +253,8 @@ clean: | xargs rm -f -rm -f $(OUTPUT)cpupower -rm -f $(OUTPUT)libcpupower.so* - -rm -rf $(OUTPUT)po/*.{gmo,pot} + -rm -rf $(OUTPUT)po/*.gmo + -rm -rf $(OUTPUT)po/*.pot $(MAKE) -C bench O=$(OUTPUT) clean diff --git a/tools/power/cpupower/debug/i386/Makefile b/tools/power/cpupower/debug/i386/Makefile index 3ba158f..c05cc0a 100644 --- a/tools/power/cpupower/debug/i386/Makefile +++ b/tools/power/cpupower/debug/i386/Makefile @@ -26,7 +26,10 @@ $(OUTPUT)powernow-k8-decode: powernow-k8-decode.c all: $(OUTPUT)centrino-decode $(OUTPUT)dump_psb $(OUTPUT)intel_gsic $(OUTPUT)powernow-k8-decode clean: - rm -rf $(OUTPUT){centrino-decode,dump_psb,intel_gsic,powernow-k8-decode} + rm -rf $(OUTPUT)centrino-decode + rm -rf $(OUTPUT)dump_psb + rm -rf $(OUTPUT)intel_gsic + rm -rf $(OUTPUT)powernow-k8-decode install: $(INSTALL) -d $(DESTDIR)${bindir} diff --git a/tools/power/cpupower/man/cpupower-monitor.1 b/tools/power/cpupower/man/cpupower-monitor.1 index 1141c20..e01c35d 100644 --- a/tools/power/cpupower/man/cpupower-monitor.1 +++ b/tools/power/cpupower/man/cpupower-monitor.1 @@ -7,11 +7,11 @@ cpupower\-monitor \- Report processor frequency and idle statistics .RB "\-l" .B cpupower monitor -.RB [ "\-m <mon1>," [ "<mon2>,..." ] ] +.RB [ -c ] [ "\-m <mon1>," [ "<mon2>,..." ] ] .RB [ "\-i seconds" ] .br .B cpupower monitor -.RB [ "\-m <mon1>," [ "<mon2>,..." ] ] +.RB [ -c ][ "\-m <mon1>," [ "<mon2>,..." ] ] .RB command .br .SH DESCRIPTION @@ -64,6 +64,17 @@ Only display specific monitors. Use the monitor string(s) provided by \-l option Measure intervall. .RE .PP +\-c +.RS 4 +Schedule the process on every core before starting and ending measuring. +This could be needed for the Idle_Stats monitor when no other MSR based +monitor (has to be run on the core that is measured) is run in parallel. +This is to wake up the processors from deeper sleep states and let the +kernel re +-account its cpuidle (C-state) information before reading the +cpuidle timings from sysfs. +.RE +.PP command .RS 4 Measure idle and frequency characteristics of an arbitrary command/workload. diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c index 906895d..93b0aa7 100644 --- a/tools/power/cpupower/utils/helpers/cpuid.c +++ b/tools/power/cpupower/utils/helpers/cpuid.c @@ -158,6 +158,8 @@ out: cpu_info->caps |= CPUPOWER_CAP_HAS_TURBO_RATIO; case 0x2A: /* SNB */ case 0x2D: /* SNB Xeon */ + case 0x3A: /* IVB */ + case 0x3E: /* IVB Xeon */ cpu_info->caps |= CPUPOWER_CAP_HAS_TURBO_RATIO; cpu_info->caps |= CPUPOWER_CAP_IS_SNB; break; diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 2eb584c..aa9e954 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -92,6 +92,14 @@ extern int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info); extern struct cpupower_cpu_info cpupower_cpu_info; /* cpuid and cpuinfo helpers **************************/ +struct cpuid_core_info { + int pkg; + int core; + int cpu; + + /* flags */ + unsigned int is_online:1; +}; /* CPU topology/hierarchy parsing ******************/ struct cpupower_topology { @@ -101,18 +109,12 @@ struct cpupower_topology { unsigned int threads; /* per core */ /* Array gets mallocated with cores entries, holding per core info */ - struct { - int pkg; - int core; - int cpu; - - /* flags */ - unsigned int is_online:1; - } *core_info; + struct cpuid_core_info *core_info; }; extern int get_cpu_topology(struct cpupower_topology *cpu_top); extern void cpu_topology_release(struct cpupower_topology cpu_top); + /* CPU topology/hierarchy parsing ******************/ /* X86 ONLY ****************************************/ diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c index 96e28c1..38ab916 100644 --- a/tools/power/cpupower/utils/helpers/sysfs.c +++ b/tools/power/cpupower/utils/helpers/sysfs.c @@ -37,25 +37,6 @@ unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen) return (unsigned int) numread; } -static unsigned int sysfs_write_file(const char *path, - const char *value, size_t len) -{ - int fd; - ssize_t numwrite; - - fd = open(path, O_WRONLY); - if (fd == -1) - return 0; - - numwrite = write(fd, value, len); - if (numwrite < 1) { - close(fd); - return 0; - } - close(fd); - return (unsigned int) numwrite; -} - /* * Detect whether a CPU is online * diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c index 4eae2c4..c13120a 100644 --- a/tools/power/cpupower/utils/helpers/topology.c +++ b/tools/power/cpupower/utils/helpers/topology.c @@ -20,9 +20,8 @@ #include <helpers/sysfs.h> /* returns -1 on failure, 0 on success */ -int sysfs_topology_read_file(unsigned int cpu, const char *fname) +static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result) { - unsigned long value; char linebuf[MAX_LINE_LEN]; char *endp; char path[SYSFS_PATH_MAX]; @@ -31,20 +30,12 @@ int sysfs_topology_read_file(unsigned int cpu, const char *fname) cpu, fname); if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0) return -1; - value = strtoul(linebuf, &endp, 0); + *result = strtol(linebuf, &endp, 0); if (endp == linebuf || errno == ERANGE) return -1; - return value; + return 0; } -struct cpuid_core_info { - unsigned int pkg; - unsigned int thread; - unsigned int cpu; - /* flags */ - unsigned int is_online:1; -}; - static int __compare(const void *t1, const void *t2) { struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1; @@ -53,9 +44,9 @@ static int __compare(const void *t1, const void *t2) return -1; else if (top1->pkg > top2->pkg) return 1; - else if (top1->thread < top2->thread) + else if (top1->core < top2->core) return -1; - else if (top1->thread > top2->thread) + else if (top1->core > top2->core) return 1; else if (top1->cpu < top2->cpu) return -1; @@ -73,28 +64,42 @@ static int __compare(const void *t1, const void *t2) */ int get_cpu_topology(struct cpupower_topology *cpu_top) { - int cpu, cpus = sysconf(_SC_NPROCESSORS_CONF); + int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF); - cpu_top->core_info = malloc(sizeof(struct cpupower_topology) * cpus); + cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus); if (cpu_top->core_info == NULL) return -ENOMEM; cpu_top->pkgs = cpu_top->cores = 0; for (cpu = 0; cpu < cpus; cpu++) { cpu_top->core_info[cpu].cpu = cpu; cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu); - cpu_top->core_info[cpu].pkg = - sysfs_topology_read_file(cpu, "physical_package_id"); - if ((int)cpu_top->core_info[cpu].pkg != -1 && - cpu_top->core_info[cpu].pkg > cpu_top->pkgs) - cpu_top->pkgs = cpu_top->core_info[cpu].pkg; - cpu_top->core_info[cpu].core = - sysfs_topology_read_file(cpu, "core_id"); + if(sysfs_topology_read_file( + cpu, + "physical_package_id", + &(cpu_top->core_info[cpu].pkg)) < 0) + return -1; + if(sysfs_topology_read_file( + cpu, + "core_id", + &(cpu_top->core_info[cpu].core)) < 0) + return -1; } - cpu_top->pkgs++; qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), __compare); + /* Count the number of distinct pkgs values. This works + because the primary sort of the core_info struct was just + done by pkg value. */ + last_pkg = cpu_top->core_info[0].pkg; + for(cpu = 1; cpu < cpus; cpu++) { + if(cpu_top->core_info[cpu].pkg != last_pkg) { + last_pkg = cpu_top->core_info[cpu].pkg; + cpu_top->pkgs++; + } + } + cpu_top->pkgs++; + /* Intel's cores count is not consecutively numbered, there may * be a core_id of 3, but none of 2. Assume there always is 0 * Get amount of cores by counting duplicates in a package diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 0d6571e..c4bae92 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -39,6 +39,7 @@ static int mode; static int interval = 1; static char *show_monitors_param; static struct cpupower_topology cpu_top; +static unsigned int wake_cpus; /* ToDo: Document this in the manpage */ static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', }; @@ -84,7 +85,7 @@ int fill_string_with_spaces(char *s, int n) void print_header(int topology_depth) { int unsigned mon; - int state, need_len, pr_mon_len; + int state, need_len; cstate_t s; char buf[128] = ""; int percent_width = 4; @@ -93,7 +94,6 @@ void print_header(int topology_depth) printf("%s|", buf); for (mon = 0; mon < avail_monitors; mon++) { - pr_mon_len = 0; need_len = monitors[mon]->hw_states_num * (percent_width + 3) - 1; if (mon != 0) { @@ -315,16 +315,28 @@ int fork_it(char **argv) int do_interval_measure(int i) { unsigned int num; + int cpu; + + if (wake_cpus) + for (cpu = 0; cpu < cpu_count; cpu++) + bind_cpu(cpu); for (num = 0; num < avail_monitors; num++) { dprint("HW C-state residency monitor: %s - States: %d\n", monitors[num]->name, monitors[num]->hw_states_num); monitors[num]->start(); } + sleep(i); + + if (wake_cpus) + for (cpu = 0; cpu < cpu_count; cpu++) + bind_cpu(cpu); + for (num = 0; num < avail_monitors; num++) monitors[num]->stop(); + return 0; } @@ -333,7 +345,7 @@ static void cmdline(int argc, char *argv[]) int opt; progname = basename(argv[0]); - while ((opt = getopt(argc, argv, "+li:m:")) != -1) { + while ((opt = getopt(argc, argv, "+lci:m:")) != -1) { switch (opt) { case 'l': if (mode) @@ -352,6 +364,9 @@ static void cmdline(int argc, char *argv[]) mode = show; show_monitors_param = optarg; break; + case 'c': + wake_cpus = 1; + break; default: print_wrong_arg_exit(); } diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h index 9312ee1..9e43f33 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h @@ -65,4 +65,21 @@ extern long long timespec_diff_us(struct timespec start, struct timespec end); "could be inaccurate\n"), mes, ov); \ } + +/* Taken over from x86info project sources -> return 0 on success */ +#include <sched.h> +#include <sys/types.h> +#include <unistd.h> +static inline int bind_cpu(int cpu) +{ + cpu_set_t set; + + if (sched_getaffinity(getpid(), sizeof(set), &set) == 0) { + CPU_ZERO(&set); + CPU_SET(cpu, &set); + return sched_setaffinity(getpid(), sizeof(set), &set); + } + return 1; +} + #endif /* __CPUIDLE_INFO_HW__ */ diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c index a1bc07cd..a99b43b 100644 --- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c @@ -150,9 +150,15 @@ static struct cpuidle_monitor *snb_register(void) || cpupower_cpu_info.family != 6) return NULL; - if (cpupower_cpu_info.model != 0x2A - && cpupower_cpu_info.model != 0x2D) + switch (cpupower_cpu_info.model) { + case 0x2A: /* SNB */ + case 0x2D: /* SNB Xeon */ + case 0x3A: /* IVB */ + case 0x3E: /* IVB Xeon */ + break; + default: return NULL; + } is_valid = calloc(cpu_count, sizeof(int)); for (num = 0; num < SNB_CSTATE_COUNT; num++) { diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index b336b24..7300d07 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,9 +1,9 @@ # Makefile for vm selftests CC = $(CROSS_COMPILE)gcc -CFLAGS = -Wall -Wextra +CFLAGS = -Wall -all: hugepage-mmap hugepage-shm map_hugetlb +all: hugepage-mmap hugepage-shm map_hugetlb thuge-gen %: %.c $(CC) $(CFLAGS) -o $@ $^ diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/vm/thuge-gen.c new file mode 100644 index 0000000..c879572 --- /dev/null +++ b/tools/testing/selftests/vm/thuge-gen.c @@ -0,0 +1,254 @@ +/* Test selecting other page sizes for mmap/shmget. + + Before running this huge pages for each huge page size must have been + reserved. + For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used. + Also shmmax must be increased. + And you need to run as root to work around some weird permissions in shm. + And nothing using huge pages should run in parallel. + When the program aborts you may need to clean up the shm segments with + ipcrm -m by hand, like this + sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m + (warning this will remove all if someone else uses them) */ + +#define _GNU_SOURCE 1 +#include <sys/mman.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/stat.h> +#include <glob.h> +#include <assert.h> +#include <unistd.h> +#include <stdarg.h> +#include <string.h> + +#define err(x) perror(x), exit(1) + +#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) +#define MAP_HUGE_SHIFT 26 +#define MAP_HUGE_MASK 0x3f +#define MAP_HUGETLB 0x40000 + +#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ +#define SHM_HUGE_SHIFT 26 +#define SHM_HUGE_MASK 0x3f +#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) +#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) + +#define NUM_PAGESIZES 5 + +#define NUM_PAGES 4 + +#define Dprintf(fmt...) // printf(fmt) + +unsigned long page_sizes[NUM_PAGESIZES]; +int num_page_sizes; + +int ilog2(unsigned long v) +{ + int l = 0; + while ((1UL << l) < v) + l++; + return l; +} + +void find_pagesizes(void) +{ + glob_t g; + int i; + glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g); + assert(g.gl_pathc <= NUM_PAGESIZES); + for (i = 0; i < g.gl_pathc; i++) { + sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB", + &page_sizes[i]); + page_sizes[i] <<= 10; + printf("Found %luMB\n", page_sizes[i] >> 20); + } + num_page_sizes = g.gl_pathc; + globfree(&g); +} + +unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + free(line); + return hps; +} + +void show(unsigned long ps) +{ + char buf[100]; + if (ps == getpagesize()) + return; + printf("%luMB: ", ps >> 20); + fflush(stdout); + snprintf(buf, sizeof buf, + "cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", + ps >> 10); + system(buf); +} + +unsigned long read_sysfs(int warn, char *fmt, ...) +{ + char *line = NULL; + size_t linelen = 0; + char buf[100]; + FILE *f; + va_list ap; + unsigned long val = 0; + + va_start(ap, fmt); + vsnprintf(buf, sizeof buf, fmt, ap); + va_end(ap); + + f = fopen(buf, "r"); + if (!f) { + if (warn) + printf("missing %s\n", buf); + return 0; + } + if (getline(&line, &linelen, f) > 0) { + sscanf(line, "%lu", &val); + } + fclose(f); + free(line); + return val; +} + +unsigned long read_free(unsigned long ps) +{ + return read_sysfs(ps != getpagesize(), + "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", + ps >> 10); +} + +void test_mmap(unsigned long size, unsigned flags) +{ + char *map; + unsigned long before, after; + int err; + + before = read_free(size); + map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, 0, 0); + + if (map == (char *)-1) err("mmap"); + memset(map, 0xff, size*NUM_PAGES); + after = read_free(size); + Dprintf("before %lu after %lu diff %ld size %lu\n", + before, after, before - after, size); + assert(size == getpagesize() || (before - after) == NUM_PAGES); + show(size); + err = munmap(map, size); + assert(!err); +} + +void test_shmget(unsigned long size, unsigned flags) +{ + int id; + unsigned long before, after; + int err; + + before = read_free(size); + id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags); + if (id < 0) err("shmget"); + + struct shm_info i; + if (shmctl(id, SHM_INFO, (void *)&i) < 0) err("shmctl"); + Dprintf("alloc %lu res %lu\n", i.shm_tot, i.shm_rss); + + + Dprintf("id %d\n", id); + char *map = shmat(id, NULL, 0600); + if (map == (char*)-1) err("shmat"); + + shmctl(id, IPC_RMID, NULL); + + memset(map, 0xff, size*NUM_PAGES); + after = read_free(size); + + Dprintf("before %lu after %lu diff %ld size %lu\n", + before, after, before - after, size); + assert(size == getpagesize() || (before - after) == NUM_PAGES); + show(size); + err = shmdt(map); + assert(!err); +} + +void sanity_checks(void) +{ + int i; + unsigned long largest = getpagesize(); + + for (i = 0; i < num_page_sizes; i++) { + if (page_sizes[i] > largest) + largest = page_sizes[i]; + + if (read_free(page_sizes[i]) < NUM_PAGES) { + printf("Not enough huge pages for page size %lu MB, need %u\n", + page_sizes[i] >> 20, + NUM_PAGES); + exit(0); + } + } + + if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) { + printf("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES); + exit(0); + } + +#if defined(__x86_64__) + if (largest != 1U<<30) { + printf("No GB pages available on x86-64\n" + "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES); + exit(0); + } +#endif +} + +int main(void) +{ + int i; + unsigned default_hps = default_huge_page_size(); + + find_pagesizes(); + + sanity_checks(); + + for (i = 0; i < num_page_sizes; i++) { + unsigned long ps = page_sizes[i]; + int arg = ilog2(ps) << MAP_HUGE_SHIFT; + printf("Testing %luMB mmap with shift %x\n", ps >> 20, arg); + test_mmap(ps, MAP_HUGETLB | arg); + } + printf("Testing default huge mmap\n"); + test_mmap(default_hps, SHM_HUGETLB); + + puts("Testing non-huge shmget"); + test_shmget(getpagesize(), 0); + + for (i = 0; i < num_page_sizes; i++) { + unsigned long ps = page_sizes[i]; + int arg = ilog2(ps) << SHM_HUGE_SHIFT; + printf("Testing %luMB shmget with shift %x\n", ps >> 20, arg); + test_shmget(ps, SHM_HUGETLB | arg); + } + puts("default huge shmget"); + test_shmget(default_hps, SHM_HUGETLB); + + return 0; +} |