From 0d4dd797564cddc1f71ab0b239e9ea50ddd40b2a Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sat, 25 Jan 2014 23:50:23 +0200 Subject: perf/doc: Remove mention of non-existent set_perf_event_pending() from design.txt set_perf_event_pending() was removed in e360adbe ("irq_work: Add generic hardirq context callbacks"). Signed-off-by: Baruch Siach Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/4c54761865d40210be0628cb84701afc5d57b5d8.1390686193.git.baruch@tkos.co.il Signed-off-by: Ingo Molnar --- tools/perf/design.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/design.txt b/tools/perf/design.txt index 67e5d0c..63a0e6f 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -454,7 +454,6 @@ So to start with, in order to add HAVE_PERF_EVENTS to your Kconfig, you will need at least this: - asm/perf_event.h - a basic stub will suffice at first - support for atomic64 types (and associated helper functions) - - set_perf_event_pending() implemented If your architecture does have hardware capabilities, you can override the weak stub hw_perf_event_init() to register hardware counters. -- cgit v1.1 From 950b8354716eb1f9c0b39777d379efa5f4125c04 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 22 Jan 2014 21:58:46 +0200 Subject: perf tools: Demangle kernel and kernel module symbols too Some kernels contain C++ code, and thus their symbols need to be demangled. This allows 'perf kvm top' to generate readable output. Signed-off-by: Avi Kivity Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/26f71bf5bf7ee1408e3f1a803556d5df18223ef1.1390420726.git.avi@cloudius-systems.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 7594567..8f12f0f 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -922,6 +922,7 @@ int dso__load_sym(struct dso *dso, struct map *map, (u64)shdr.sh_offset); sym.st_value -= shdr.sh_addr - shdr.sh_offset; } +new_symbol: /* * We need to figure out if the object was created from C++ sources * DWARF DW_compile_unit has this, but we don't always have access @@ -933,7 +934,6 @@ int dso__load_sym(struct dso *dso, struct map *map, if (demangled != NULL) elf_name = demangled; } -new_symbol: f = symbol__new(sym.st_value, sym.st_size, GELF_ST_BIND(sym.st_info), elf_name); free(demangled); -- cgit v1.1 From f428ebd184c82a7914b2aa7e9f868918aaf7ea78 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jan 2014 16:40:02 +0100 Subject: perf tools: Fix AAAAARGH64 memory barriers Someone got the load and store barriers mixed up for AAAAARGH64. Turn them the right side up. Reported-by: Will Deacon Signed-off-by: Peter Zijlstra Fixes: a94d342b9cb0 ("tools/perf: Add required memory barriers") Cc: Ingo Molnar Cc: Will Deacon Link: http://lkml.kernel.org/r/20140124154002.GF31570@twins.programming.kicks-ass.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 7daa806..e84fa26 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -100,8 +100,8 @@ #ifdef __aarch64__ #define mb() asm volatile("dmb ish" ::: "memory") -#define wmb() asm volatile("dmb ishld" ::: "memory") -#define rmb() asm volatile("dmb ishst" ::: "memory") +#define wmb() asm volatile("dmb ishst" ::: "memory") +#define rmb() asm volatile("dmb ishld" ::: "memory") #define cpu_relax() asm volatile("yield" ::: "memory") #endif -- cgit v1.1 From 6a02652df511029127406cf8fa89cdf5e987f963 Mon Sep 17 00:00:00 2001 From: Francesco Fusco Date: Mon, 27 Jan 2014 14:39:13 +0100 Subject: perf tools: Fix include for non x86 architectures Commit 71ae8aac ("lib: introduce arch optimized hash library") added an include to for setting up an architecture specific fast hash. Since perf includes directly the non-uapi kernel header, it cannot find on non-x86 and thus prevents perf to be compiled on every architecture other than x86. The problem is the inclusion of in hash.h that results in the following error originating from util/evlist.c: fatal error: asm/hash.h: No such file or directory This commit simply adds an empty stub/file to fix the compile issue on non-x86 architectures. As perf does not use any of these new functions, it fixes the compilation and therefore seems to be the most appropriate solution to go with. Signed-off-by: Francesco Fusco Link: http://lkml.kernel.org/r/2cf8143aad65a6aa6fe30325ef8a65847141afa2.1390829373.git.ffusco@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/include/asm/hash.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tools/perf/util/include/asm/hash.h diff --git a/tools/perf/util/include/asm/hash.h b/tools/perf/util/include/asm/hash.h new file mode 100644 index 0000000..d82b170b --- /dev/null +++ b/tools/perf/util/include/asm/hash.h @@ -0,0 +1,6 @@ +#ifndef __ASM_GENERIC_HASH_H +#define __ASM_GENERIC_HASH_H + +/* Stub */ + +#endif /* __ASM_GENERIC_HASH_H */ -- cgit v1.1 From 9176753d1ed56951a6ee2a0f0a3f367904e35567 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:36 +0200 Subject: perf symbols: Fix symbol annotation for relocated kernel Kernel maps map memory addresses to file offsets. For symbol annotation, objdump needs the object VMA addresses. For an unrelocated kernel, that is the same as the memory address. The addresses passed to objdump for symbol annotation did not take into account kernel relocation. This patch fixes that. Reported-by: Linus Torvalds Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 5 +++-- tools/perf/util/map.h | 1 + tools/perf/util/symbol-elf.c | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 3b97513..39cd2d0 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -39,6 +39,7 @@ void map__init(struct map *map, enum map_type type, map->start = start; map->end = end; map->pgoff = pgoff; + map->reloc = 0; map->dso = dso; map->map_ip = map__map_ip; map->unmap_ip = map__unmap_ip; @@ -288,7 +289,7 @@ u64 map__rip_2objdump(struct map *map, u64 rip) if (map->dso->rel) return rip - map->pgoff; - return map->unmap_ip(map, rip); + return map->unmap_ip(map, rip) - map->reloc; } /** @@ -311,7 +312,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) if (map->dso->rel) return map->unmap_ip(map, ip + map->pgoff); - return ip; + return ip + map->reloc; } void map_groups__init(struct map_groups *mg) diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 18068c6..257e513 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -36,6 +36,7 @@ struct map { bool erange_warned; u32 priv; u64 pgoff; + u64 reloc; u32 maj, min; /* only valid for MMAP2 record */ u64 ino; /* only valid for MMAP2 record */ u64 ino_generation;/* only valid for MMAP2 record */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 8f12f0f..3e9f336 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -751,6 +751,8 @@ int dso__load_sym(struct dso *dso, struct map *map, if (strcmp(elf_name, kmap->ref_reloc_sym->name)) continue; kmap->ref_reloc_sym->unrelocated_addr = sym.st_value; + map->reloc = kmap->ref_reloc_sym->addr - + kmap->ref_reloc_sym->unrelocated_addr; break; } } -- cgit v1.1 From 29b596b57426831fce92cd0ebb01c77627616fdf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:37 +0200 Subject: perf tools: Add kallsyms__get_function_start() Separate out the logic used to find the start address of the reference symbol used to track kernel relocation. kallsyms__get_function_start() is used in subsequent patches. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 18 +++++++++++++++--- tools/perf/util/event.h | 3 +++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1fc1c2f..17476df 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -470,6 +470,17 @@ static int find_symbol_cb(void *arg, const char *name, char type, return 1; } +u64 kallsyms__get_function_start(const char *kallsyms_filename, + const char *symbol_name) +{ + struct process_symbol_args args = { .name = symbol_name, }; + + if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0) + return 0; + + return args.start; +} + int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, @@ -480,13 +491,13 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, char path[PATH_MAX]; char name_buff[PATH_MAX]; struct map *map; + u64 start; int err; /* * We should get this from /sys/kernel/sections/.text, but till that is * available use this, and after it is use this as a fallback for older * kernels. */ - struct process_symbol_args args = { .name = symbol_name, }; union perf_event *event = zalloc((sizeof(event->mmap) + machine->id_hdr_size)); if (event == NULL) { @@ -513,7 +524,8 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, } } - if (kallsyms__parse(filename, &args, find_symbol_cb) <= 0) { + start = kallsyms__get_function_start(filename, symbol_name); + if (!start) { free(event); return -ENOENT; } @@ -525,7 +537,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); - event->mmap.pgoff = args.start; + event->mmap.pgoff = start; event->mmap.start = map->start; event->mmap.len = map->end - event->mmap.start; event->mmap.pid = machine->pid; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index faf6e21..66a0c03 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -279,4 +279,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); +u64 kallsyms__get_function_start(const char *kallsyms_filename, + const char *symbol_name); + #endif /* __PERF_RECORD_H */ -- cgit v1.1 From 15a0a8706c32bd38bff9ebf7c6ef24f32d1ea921 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:38 +0200 Subject: perf machine: Add machine__get_kallsyms_filename() Separate out the logic used to make the kallsyms full path name for a machine. It will be reused in a subsequent patch. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ded7459..290c2e6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -496,19 +496,22 @@ static int symbol__in_kernel(void *arg, const char *name, return 1; } +static void machine__get_kallsyms_filename(struct machine *machine, char *buf, + size_t bufsz) +{ + if (machine__is_default_guest(machine)) + scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms); + else + scnprintf(buf, bufsz, "%s/proc/kallsyms", machine->root_dir); +} + /* Figure out the start address of kernel map from /proc/kallsyms */ static u64 machine__get_kernel_start_addr(struct machine *machine) { - const char *filename; - char path[PATH_MAX]; + char filename[PATH_MAX]; struct process_args args; - if (machine__is_default_guest(machine)) - filename = (char *)symbol_conf.default_guest_kallsyms; - else { - sprintf(path, "%s/proc/kallsyms", machine->root_dir); - filename = path; - } + machine__get_kallsyms_filename(machine, filename, PATH_MAX); if (symbol__restricted_filename(filename, "/proc/kallsyms")) return 0; -- cgit v1.1 From 5512cf24bed2de56f1ef44b6cc9a0a9b15499cea Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:39 +0200 Subject: perf machine: Set up ref_reloc_sym in machine__create_kernel_maps() The ref_reloc_sym is always needed for the kernel map in order to check for relocation. Consequently set it up when the kernel map is created. Otherwise it was only being set up by 'perf record'. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-5-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 23 +++++++++++++++++++++++ tools/perf/util/machine.h | 2 ++ 2 files changed, 25 insertions(+) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 290c2e6..c872991 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -832,9 +832,25 @@ static int machine__create_modules(struct machine *machine) return 0; } +const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL}; + int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); + char filename[PATH_MAX]; + const char *name; + u64 addr = 0; + int i; + + machine__get_kallsyms_filename(machine, filename, PATH_MAX); + + for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) { + addr = kallsyms__get_function_start(filename, name); + if (addr) + break; + } + if (!addr) + return -1; if (kernel == NULL || __machine__create_kernel_maps(machine, kernel) < 0) @@ -853,6 +869,13 @@ int machine__create_kernel_maps(struct machine *machine) * Now that we have all the maps created, just set the ->end of them: */ map_groups__fixup_end(&machine->kmaps); + + if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, + addr)) { + machine__destroy_kernel_maps(machine); + return -1; + } + return 0; } diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 4771330..f77e91e 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -18,6 +18,8 @@ union perf_event; #define HOST_KERNEL_ID (-1) #define DEFAULT_GUEST_KERNEL_ID (0) +extern const char *ref_reloc_sym_names[]; + struct machine { struct rb_node rb_node; pid_t pid; -- cgit v1.1 From 0ae617bedde062003fd70e566e9a2601e273ea0e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:40 +0200 Subject: perf record: Get ref_reloc_sym from kernel map Now that ref_reloc_sym is set up when the kernel map is created, 'perf record' does not need to pass the symbol names to perf_event__synthesize_kernel_mmap() which can read the values needed from ref_reloc_sym directly. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 10 ++-------- tools/perf/util/event.c | 26 ++++++-------------------- tools/perf/util/event.h | 3 +-- 3 files changed, 9 insertions(+), 30 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3c394bf..af47531 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -287,10 +287,7 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data) * have no _text sometimes. */ err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine, "_text"); - if (err < 0) - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine, "_stext"); + machine); if (err < 0) pr_err("Couldn't record guest kernel [%d]'s reference" " relocation symbol.\n", machine->pid); @@ -457,10 +454,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine, "_text"); - if (err < 0) - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine, "_stext"); + machine); if (err < 0) pr_err("Couldn't record kernel reference relocation symbol\n" "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 17476df..b0f3ca8 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -483,15 +483,13 @@ u64 kallsyms__get_function_start(const char *kallsyms_filename, int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, - struct machine *machine, - const char *symbol_name) + struct machine *machine) { size_t size; - const char *filename, *mmap_name; - char path[PATH_MAX]; + const char *mmap_name; char name_buff[PATH_MAX]; struct map *map; - u64 start; + struct kmap *kmap; int err; /* * We should get this from /sys/kernel/sections/.text, but till that is @@ -513,31 +511,19 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, * see kernel/perf_event.c __perf_event_mmap */ event->header.misc = PERF_RECORD_MISC_KERNEL; - filename = "/proc/kallsyms"; } else { event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - if (machine__is_default_guest(machine)) - filename = (char *) symbol_conf.default_guest_kallsyms; - else { - sprintf(path, "%s/proc/kallsyms", machine->root_dir); - filename = path; - } - } - - start = kallsyms__get_function_start(filename, symbol_name); - if (!start) { - free(event); - return -ENOENT; } map = machine->vmlinux_maps[MAP__FUNCTION]; + kmap = map__kmap(map); size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), - "%s%s", mmap_name, symbol_name) + 1; + "%s%s", mmap_name, kmap->ref_reloc_sym->name) + 1; size = PERF_ALIGN(size, sizeof(u64)); event->mmap.header.type = PERF_RECORD_MMAP; event->mmap.header.size = (sizeof(event->mmap) - (sizeof(event->mmap.filename) - size) + machine->id_hdr_size); - event->mmap.pgoff = start; + event->mmap.pgoff = kmap->ref_reloc_sym->addr; event->mmap.start = map->start; event->mmap.len = map->end - event->mmap.start; event->mmap.pid = machine->pid; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 66a0c03..851fa06 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -214,8 +214,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, struct machine *machine, bool mmap_data); int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, - struct machine *machine, - const char *symbol_name); + struct machine *machine); int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, -- cgit v1.1 From a00d28cb72d3629c6481fe21ba6c6b4f96caed49 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:41 +0200 Subject: perf symbols: Prevent the use of kcore if the kernel has moved Use of kcore is predicated upon it matching the recorded data. If the kernel has been relocated at boot time (i.e. since the data was recorded) then do not use kcore. Note that it is possible to make a copy of kcore at the time the data is recorded using 'perf buildid-cache'. Then the perf tools will use the copy because it does match the data. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-7-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 39ce9ad..4ac1f87 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -976,6 +976,23 @@ static int validate_kcore_modules(const char *kallsyms_filename, return 0; } +static int validate_kcore_addresses(const char *kallsyms_filename, + struct map *map) +{ + struct kmap *kmap = map__kmap(map); + + if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) { + u64 start; + + start = kallsyms__get_function_start(kallsyms_filename, + kmap->ref_reloc_sym->name); + if (start != kmap->ref_reloc_sym->addr) + return -EINVAL; + } + + return validate_kcore_modules(kallsyms_filename, map); +} + struct kcore_mapfn_data { struct dso *dso; enum map_type type; @@ -1019,8 +1036,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map, kallsyms_filename)) return -EINVAL; - /* All modules must be present at their original addresses */ - if (validate_kcore_modules(kallsyms_filename, map)) + /* Modules and kernel must be present at their original addresses */ + if (validate_kcore_addresses(kallsyms_filename, map)) return -EINVAL; md.dso = dso; @@ -1424,7 +1441,7 @@ static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz) continue; scnprintf(kallsyms_filename, sizeof(kallsyms_filename), "%s/%s/kallsyms", dir, dent->d_name); - if (!validate_kcore_modules(kallsyms_filename, map)) { + if (!validate_kcore_addresses(kallsyms_filename, map)) { strlcpy(dir, kallsyms_filename, dir_sz); ret = 0; break; @@ -1479,7 +1496,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) if (fd != -1) { close(fd); /* If module maps match go with /proc/kallsyms */ - if (!validate_kcore_modules("/proc/kallsyms", map)) + if (!validate_kcore_addresses("/proc/kallsyms", map)) goto proc_kallsyms; } -- cgit v1.1 From c080f72753def150993144d755379941f8b14683 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:42 +0200 Subject: perf tests: No need to set up ref_reloc_sym Now that ref_reloc_sym is set up by machine__create_kernel_maps(), the "vmlinux symtab matches kallsyms" test does have to do it. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-8-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/vmlinux-kallsyms.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 2bd13ed..3d90880 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -26,7 +26,6 @@ int test__vmlinux_matches_kallsyms(void) struct map *kallsyms_map, *vmlinux_map; struct machine kallsyms, vmlinux; enum map_type type = MAP__FUNCTION; - struct ref_reloc_sym ref_reloc_sym = { .name = "_stext", }; u64 mem_start, mem_end; /* @@ -70,14 +69,6 @@ int test__vmlinux_matches_kallsyms(void) */ kallsyms_map = machine__kernel_map(&kallsyms, type); - sym = map__find_symbol_by_name(kallsyms_map, ref_reloc_sym.name, NULL); - if (sym == NULL) { - pr_debug("dso__find_symbol_by_name "); - goto out; - } - - ref_reloc_sym.addr = UM(sym->start); - /* * Step 5: * @@ -89,7 +80,6 @@ int test__vmlinux_matches_kallsyms(void) } vmlinux_map = machine__kernel_map(&vmlinux, type); - map__kmap(vmlinux_map)->ref_reloc_sym = &ref_reloc_sym; /* * Step 6: -- cgit v1.1 From d9b62aba87a82939c73f451a166c7a21342350d6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:43 +0200 Subject: perf tools: Adjust kallsyms for relocated kernel If the kernel is relocated at boot time, kallsyms will not match data recorded previously. That does not matter for modules because they are corrected anyway. It also does not matter if vmlinux is being used for symbols. But if perf tools has only kallsyms then the symbols will not match. Fix by applying the delta gained by comparing the old and current addresses of the relocation reference symbol. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-9-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 4ac1f87..a9d758a 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -627,7 +627,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, * kernel range is broken in several maps, named [kernel].N, as we don't have * the original ELF section names vmlinux have. */ -static int dso__split_kallsyms(struct dso *dso, struct map *map, +static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, symbol_filter_t filter) { struct map_groups *kmaps = map__kmap(map)->kmaps; @@ -692,6 +692,12 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, char dso_name[PATH_MAX]; struct dso *ndso; + if (delta) { + /* Kernel was relocated at boot time */ + pos->start -= delta; + pos->end -= delta; + } + if (count == 0) { curr_map = map; goto filter_symbol; @@ -721,6 +727,10 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, curr_map->map_ip = curr_map->unmap_ip = identity__map_ip; map_groups__insert(kmaps, curr_map); ++kernel_range; + } else if (delta) { + /* Kernel was relocated at boot time */ + pos->start -= delta; + pos->end -= delta; } filter_symbol: if (filter && filter(curr_map, pos)) { @@ -1130,15 +1140,41 @@ out_err: return -EINVAL; } +/* + * If the kernel is relocated at boot time, kallsyms won't match. Compute the + * delta based on the relocation reference symbol. + */ +static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) +{ + struct kmap *kmap = map__kmap(map); + u64 addr; + + if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name) + return 0; + + addr = kallsyms__get_function_start(filename, + kmap->ref_reloc_sym->name); + if (!addr) + return -1; + + *delta = addr - kmap->ref_reloc_sym->addr; + return 0; +} + int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, symbol_filter_t filter) { + u64 delta = 0; + if (symbol__restricted_filename(filename, "/proc/kallsyms")) return -1; if (dso__load_all_kallsyms(dso, filename, map) < 0) return -1; + if (kallsyms__delta(map, filename, &delta)) + return -1; + symbols__fixup_duplicate(&dso->symbols[map->type]); symbols__fixup_end(&dso->symbols[map->type]); @@ -1150,7 +1186,7 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, if (!dso__load_kcore(dso, map, filename)) return dso__split_kallsyms_for_kcore(dso, map, filter); else - return dso__split_kallsyms(dso, map, filter); + return dso__split_kallsyms(dso, map, delta, filter); } static int dso__load_perf_map(struct dso *dso, struct map *map, -- cgit v1.1 From d3b70220292c40d3b499797fd2f33f608fc35edb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 29 Jan 2014 16:14:44 +0200 Subject: perf buildid-cache: Check relocation when checking for existing kcore perf buildid-cache does not make another copy of kcore if the buildid and modules match an existing copy. That does not take into account the possibility that the kernel has been relocated. Extend the check to check if the reference relocation symbol matches too, otherwise do make a copy. Signed-off-by: Adrian Hunter Tested-by: Jiri Olsa Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1391004884-10334-10-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-cache.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index cfede86..b22dbb1 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -63,11 +63,35 @@ static int build_id_cache__kcore_dir(char *dir, size_t sz) return 0; } +static bool same_kallsyms_reloc(const char *from_dir, char *to_dir) +{ + char from[PATH_MAX]; + char to[PATH_MAX]; + const char *name; + u64 addr1 = 0, addr2 = 0; + int i; + + scnprintf(from, sizeof(from), "%s/kallsyms", from_dir); + scnprintf(to, sizeof(to), "%s/kallsyms", to_dir); + + for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) { + addr1 = kallsyms__get_function_start(from, name); + if (addr1) + break; + } + + if (name) + addr2 = kallsyms__get_function_start(to, name); + + return addr1 == addr2; +} + static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, size_t to_dir_sz) { char from[PATH_MAX]; char to[PATH_MAX]; + char to_subdir[PATH_MAX]; struct dirent *dent; int ret = -1; DIR *d; @@ -86,10 +110,11 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, continue; scnprintf(to, sizeof(to), "%s/%s/modules", to_dir, dent->d_name); - if (!compare_proc_modules(from, to)) { - scnprintf(to, sizeof(to), "%s/%s", to_dir, - dent->d_name); - strlcpy(to_dir, to, to_dir_sz); + scnprintf(to_subdir, sizeof(to_subdir), "%s/%s", + to_dir, dent->d_name); + if (!compare_proc_modules(from, to) && + same_kallsyms_reloc(from_dir, to_subdir)) { + strlcpy(to_dir, to_subdir, to_dir_sz); ret = 0; break; } -- cgit v1.1 From 6a96e15096da6e7491107321cfa660c7c2aa119d Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 28 Jan 2014 14:45:41 -0500 Subject: selinux: add SOCK_DIAG_BY_FAMILY to the list of netlink message types The SELinux AF_NETLINK/NETLINK_SOCK_DIAG socket class was missing the SOCK_DIAG_BY_FAMILY definition which caused SELINUX_ERR messages when the ss tool was run. # ss Netid State Recv-Q Send-Q Local Address:Port Peer Address:Port u_str ESTAB 0 0 * 14189 * 14190 u_str ESTAB 0 0 * 14145 * 14144 u_str ESTAB 0 0 * 14151 * 14150 {...} # ausearch -m SELINUX_ERR ---- time->Thu Jan 23 11:11:16 2014 type=SYSCALL msg=audit(1390493476.445:374): arch=c000003e syscall=44 success=yes exit=40 a0=3 a1=7fff03aa11f0 a2=28 a3=0 items=0 ppid=1852 pid=1895 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=1 comm="ss" exe="/usr/sbin/ss" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=SELINUX_ERR msg=audit(1390493476.445:374): SELinux: unrecognized netlink message type=20 for sclass=32 Signed-off-by: Paul Moore --- security/selinux/nlmsgtab.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 332ac8a..2df7b90 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "flask.h" #include "av_permissions.h" @@ -78,6 +79,7 @@ static struct nlmsg_perm nlmsg_tcpdiag_perms[] = { { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, + { SOCK_DIAG_BY_FAMILY, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, }; static struct nlmsg_perm nlmsg_xfrm_perms[] = -- cgit v1.1 From 2172fa709ab32ca60e86179dc67d0857be8e2c98 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 30 Jan 2014 11:26:59 -0500 Subject: SELinux: Fix kernel BUG on empty security contexts. Setting an empty security context (length=0) on a file will lead to incorrectly dereferencing the type and other fields of the security context structure, yielding a kernel BUG. As a zero-length security context is never valid, just reject all such security contexts whether coming from userspace via setxattr or coming from the filesystem upon a getxattr request by SELinux. Setting a security context value (empty or otherwise) unknown to SELinux in the first place is only possible for a root process (CAP_MAC_ADMIN), and, if running SELinux in enforcing mode, only if the corresponding SELinux mac_admin permission is also granted to the domain by policy. In Fedora policies, this is only allowed for specific domains such as livecd for setting down security contexts that are not defined in the build host policy. Reproducer: su setenforce 0 touch foo setfattr -n security.selinux foo Caveat: Relabeling or removing foo after doing the above may not be possible without booting with SELinux disabled. Any subsequent access to foo after doing the above will also trigger the BUG. BUG output from Matthew Thode: [ 473.893141] ------------[ cut here ]------------ [ 473.962110] kernel BUG at security/selinux/ss/services.c:654! [ 473.995314] invalid opcode: 0000 [#6] SMP [ 474.027196] Modules linked in: [ 474.058118] CPU: 0 PID: 8138 Comm: ls Tainted: G D I 3.13.0-grsec #1 [ 474.116637] Hardware name: Supermicro X8ST3/X8ST3, BIOS 2.0 07/29/10 [ 474.149768] task: ffff8805f50cd010 ti: ffff8805f50cd488 task.ti: ffff8805f50cd488 [ 474.183707] RIP: 0010:[] [] context_struct_compute_av+0xce/0x308 [ 474.219954] RSP: 0018:ffff8805c0ac3c38 EFLAGS: 00010246 [ 474.252253] RAX: 0000000000000000 RBX: ffff8805c0ac3d94 RCX: 0000000000000100 [ 474.287018] RDX: ffff8805e8aac000 RSI: 00000000ffffffff RDI: ffff8805e8aaa000 [ 474.321199] RBP: ffff8805c0ac3cb8 R08: 0000000000000010 R09: 0000000000000006 [ 474.357446] R10: 0000000000000000 R11: ffff8805c567a000 R12: 0000000000000006 [ 474.419191] R13: ffff8805c2b74e88 R14: 00000000000001da R15: 0000000000000000 [ 474.453816] FS: 00007f2e75220800(0000) GS:ffff88061fc00000(0000) knlGS:0000000000000000 [ 474.489254] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 474.522215] CR2: 00007f2e74716090 CR3: 00000005c085e000 CR4: 00000000000207f0 [ 474.556058] Stack: [ 474.584325] ffff8805c0ac3c98 ffffffff811b549b ffff8805c0ac3c98 ffff8805f1190a40 [ 474.618913] ffff8805a6202f08 ffff8805c2b74e88 00068800d0464990 ffff8805e8aac860 [ 474.653955] ffff8805c0ac3cb8 000700068113833a ffff880606c75060 ffff8805c0ac3d94 [ 474.690461] Call Trace: [ 474.723779] [] ? lookup_fast+0x1cd/0x22a [ 474.778049] [] security_compute_av+0xf4/0x20b [ 474.811398] [] avc_compute_av+0x2a/0x179 [ 474.843813] [] avc_has_perm+0x45/0xf4 [ 474.875694] [] inode_has_perm+0x2a/0x31 [ 474.907370] [] selinux_inode_getattr+0x3c/0x3e [ 474.938726] [] security_inode_getattr+0x1b/0x22 [ 474.970036] [] vfs_getattr+0x19/0x2d [ 475.000618] [] vfs_fstatat+0x54/0x91 [ 475.030402] [] vfs_lstat+0x19/0x1b [ 475.061097] [] SyS_newlstat+0x15/0x30 [ 475.094595] [] ? __audit_syscall_entry+0xa1/0xc3 [ 475.148405] [] system_call_fastpath+0x16/0x1b [ 475.179201] Code: 00 48 85 c0 48 89 45 b8 75 02 0f 0b 48 8b 45 a0 48 8b 3d 45 d0 b6 00 8b 40 08 89 c6 ff ce e8 d1 b0 06 00 48 85 c0 49 89 c7 75 02 <0f> 0b 48 8b 45 b8 4c 8b 28 eb 1e 49 8d 7d 08 be 80 01 00 00 e8 [ 475.255884] RIP [] context_struct_compute_av+0xce/0x308 [ 475.296120] RSP [ 475.328734] ---[ end trace f076482e9d754adc ]--- Reported-by: Matthew Thode Signed-off-by: Stephen Smalley Cc: stable@vger.kernel.org Signed-off-by: Paul Moore --- security/selinux/ss/services.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index fc5a63a..f1e46d7 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1232,6 +1232,10 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, struct context context; int rc = 0; + /* An empty security context is never valid. */ + if (!scontext_len) + return -EINVAL; + if (!ss_initialized) { int i; -- cgit v1.1 From 6cc98d90f8d14f8ebce2391323929024d7eef39f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 5 Feb 2014 16:19:21 -0500 Subject: Btrfs: fix assert screwup for the pending move stuff Wang noticed that he was failing btrfs/030 even though me and Filipe couldn't reproduce. Turns out this is because Wang didn't have CONFIG_BTRFS_ASSERT set, which meant that a key part of Filipe's original patch was not being built in. This appears to be a mess up with merging Filipe's patch as it does not exist in his original patch. Fix this by changing how we make sure del_waiting_dir_move asserts that it did not error and take the function out of the ifdef check. This makes btrfs/030 pass with the assert on or off. Thanks, Signed-off-by: Josef Bacik Reviewed-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/send.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index cf9107a..9c8d1a3 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2774,8 +2774,6 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) return 0; } -#ifdef CONFIG_BTRFS_ASSERT - static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino) { struct rb_node *n = sctx->waiting_dir_moves.rb_node; @@ -2796,8 +2794,6 @@ static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino) return -ENOENT; } -#endif - static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) { struct rb_node **p = &sctx->pending_dir_moves.rb_node; @@ -2902,7 +2898,9 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) } sctx->send_progress = sctx->cur_ino + 1; - ASSERT(del_waiting_dir_move(sctx, pm->ino) == 0); + ret = del_waiting_dir_move(sctx, pm->ino); + ASSERT(ret == 0); + ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); if (ret < 0) goto out; -- cgit v1.1 From d0270aca88966641eb15306e9bd0c7ad15321440 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Fri, 7 Feb 2014 14:33:57 +0100 Subject: btrfs: commit transaction after setting label and features The set_fslabel ioctl uses btrfs_end_transaction, which means it's possible that the change will be lost if the system crashes, same for the newly set features. Let's use btrfs_commit_transaction instead. Signed-off-by: Jeff Mahoney Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 34772cb..5bbf6b7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4547,7 +4547,7 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) spin_lock(&root->fs_info->super_lock); strcpy(super_block->label, label); spin_unlock(&root->fs_info->super_lock); - ret = btrfs_end_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); out_unlock: mnt_drop_write_file(file); @@ -4711,7 +4711,7 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg) btrfs_set_super_incompat_flags(super_block, newflags); spin_unlock(&root->fs_info->super_lock); - return btrfs_end_transaction(trans, root); + return btrfs_commit_transaction(trans, root); } long btrfs_ioctl(struct file *file, unsigned int -- cgit v1.1 From 8051aa1a3d5aaa7bd4c062cad94d09c3d567ef2e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 7 Feb 2014 14:34:04 +0100 Subject: btrfs: reserve no transaction units in btrfs_ioctl_set_features Added in patch "btrfs: add ioctls to query/change feature bits online" modifications to superblock don't need to reserve metadata blocks when starting a transaction. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5bbf6b7..ebdd866 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4690,7 +4690,7 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg) if (ret) return ret; - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) return PTR_ERR(trans); -- cgit v1.1 From 27a377db745ed4d11b3b9b340756857cb8dde07f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 7 Feb 2014 13:57:59 -0500 Subject: Btrfs: don't loop forever if we can't run because of the tree mod log A user reported a 100% cpu hang with my new delayed ref code. Turns out I forgot to increase the count check when we can't run a delayed ref because of the tree mod log. If we can't run any delayed refs during this there is no point in continuing to look, and we need to break out. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9c9ecc9..32312e0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2385,6 +2385,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, spin_unlock(&delayed_refs->lock); locked_ref = NULL; cond_resched(); + count++; continue; } -- cgit v1.1 From a2aa75e18a21b21952dc6daa9bac7c9f4426f81f Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Sat, 8 Feb 2014 15:47:46 +0000 Subject: Btrfs: fix data corruption when reading/updating compressed extents When using a mix of compressed file extents and prealloc extents, it is possible to fill a page of a file with random, garbage data from some unrelated previous use of the page, instead of a sequence of zeroes. A simple sequence of steps to get into such case, taken from the test case I made for xfstests, is: _scratch_mkfs _scratch_mount "-o compress-force=lzo" $XFS_IO_PROG -f -c "pwrite -S 0x06 -b 18670 266978 18670" $SCRATCH_MNT/foobar $XFS_IO_PROG -c "falloc 26450 665194" $SCRATCH_MNT/foobar $XFS_IO_PROG -c "truncate 542872" $SCRATCH_MNT/foobar $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar This results in the following file items in the fs tree: item 4 key (257 INODE_ITEM 0) itemoff 15879 itemsize 160 inode generation 6 transid 6 size 542872 block group 0 mode 100600 item 5 key (257 INODE_REF 256) itemoff 15863 itemsize 16 inode ref index 2 namelen 6 name: foobar item 6 key (257 EXTENT_DATA 0) itemoff 15810 itemsize 53 extent data disk byte 0 nr 0 gen 6 extent data offset 0 nr 24576 ram 266240 extent compression 0 item 7 key (257 EXTENT_DATA 24576) itemoff 15757 itemsize 53 prealloc data disk byte 12849152 nr 241664 gen 6 prealloc data offset 0 nr 241664 item 8 key (257 EXTENT_DATA 266240) itemoff 15704 itemsize 53 extent data disk byte 12845056 nr 4096 gen 6 extent data offset 0 nr 20480 ram 20480 extent compression 2 item 9 key (257 EXTENT_DATA 286720) itemoff 15651 itemsize 53 prealloc data disk byte 13090816 nr 405504 gen 6 prealloc data offset 0 nr 258048 The on disk extent at offset 266240 (which corresponds to 1 single disk block), contains 5 compressed chunks of file data. Each of the first 4 compress 4096 bytes of file data, while the last one only compresses 3024 bytes of file data. Therefore a read into the file region [285648 ; 286720[ (length = 4096 - 3024 = 1072 bytes) should always return zeroes (our next extent is a prealloc one). The solution here is the compression code path to zero the remaining (untouched) bytes of the last page it uncompressed data into, as the information about how much space the file data consumes in the last page is not known in the upper layer fs/btrfs/extent_io.c:__do_readpage(). In __do_readpage we were correctly zeroing the remainder of the page but only if it corresponds to the last page of the inode and if the inode's size is not a multiple of the page size. This would cause not only returning random data on reads, but also permanently storing random data when updating parts of the region that should be zeroed. For the example above, it means updating a single byte in the region [285648 ; 286720[ would store that byte correctly but also store random data on disk. A test case for xfstests follows soon. Signed-off-by: Filipe David Borba Manana Signed-off-by: Chris Mason --- fs/btrfs/compression.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index af815eb..ed1ff1c 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1011,6 +1011,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start, bytes = min(bytes, working_bytes); kaddr = kmap_atomic(page_out); memcpy(kaddr + *pg_offset, buf + buf_offset, bytes); + if (*pg_index == (vcnt - 1) && *pg_offset == 0) + memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); kunmap_atomic(kaddr); flush_dcache_page(page_out); -- cgit v1.1 From d311d79de305f1ada47cadd672e6ed1b28a949eb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Feb 2014 15:18:09 -0500 Subject: fix O_SYNC|O_APPEND syncing the wrong range on write() It actually goes back to 2004 ([PATCH] Concurrent O_SYNC write support) when sync_page_range() had been introduced; generic_file_write{,v}() correctly synced pos_after_write - written .. pos_after_write - 1 but generic_file_aio_write() synced pos_before_write .. pos_before_write + written - 1 instead. Which is not the same thing with O_APPEND, obviously. A couple of years later correct variant had been killed off when everything switched to use of generic_file_aio_write(). All users of generic_file_aio_write() are affected, and the same bug has been copied into other instances of ->aio_write(). The fix is trivial; the only subtle point is that generic_write_sync() ought to be inlined to avoid calculations useless for the majority of calls. Signed-off-by: Al Viro --- fs/cifs/file.c | 4 ++-- fs/ext4/file.c | 2 +- fs/ntfs/file.c | 2 +- fs/sync.c | 17 ----------------- fs/xfs/xfs_file.c | 2 +- include/linux/fs.h | 8 +++++++- mm/filemap.c | 4 ++-- 7 files changed, 14 insertions(+), 25 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 853d6d1..a7eda8e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2559,8 +2559,8 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, if (rc > 0) { ssize_t err; - err = generic_write_sync(file, pos, rc); - if (err < 0 && rc > 0) + err = generic_write_sync(file, iocb->ki_pos - rc, rc); + if (err < 0) rc = err; } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 43e64f6..1a50739 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -152,7 +152,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, if (ret > 0) { ssize_t err; - err = generic_write_sync(file, pos, ret); + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0 && ret > 0) ret = err; } diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index ea4ba9d..db9bd8a 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2134,7 +2134,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); if (ret > 0) { - int err = generic_write_sync(file, pos, ret); + int err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } diff --git a/fs/sync.c b/fs/sync.c index f155374..e8ba024 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -222,23 +222,6 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) return do_fsync(fd, 1); } -/** - * generic_write_sync - perform syncing after a write if file / inode is sync - * @file: file to which the write happened - * @pos: offset where the write started - * @count: length of the write - * - * This is just a simple wrapper about our general syncing function. - */ -int generic_write_sync(struct file *file, loff_t pos, loff_t count) -{ - if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) - return 0; - return vfs_fsync_range(file, pos, pos + count - 1, - (file->f_flags & __O_SYNC) ? 0 : 1); -} -EXPORT_SYMBOL(generic_write_sync); - /* * sys_sync_file_range() permits finely controlled syncing over a segment of * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 2e7989e..64b48ea 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -799,7 +799,7 @@ xfs_file_aio_write( XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ - err = generic_write_sync(file, pos, ret); + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 09f553c..75ff961 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2273,7 +2273,13 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); -extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); +static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count) +{ + if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) + return 0; + return vfs_fsync_range(file, pos, pos + count - 1, + (file->f_flags & __O_SYNC) ? 0 : 1); +} extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK diff --git a/mm/filemap.c b/mm/filemap.c index d56d3c1..7a13f6a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2553,8 +2553,8 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (ret > 0) { ssize_t err; - err = generic_write_sync(file, pos, ret); - if (err < 0 && ret > 0) + err = generic_write_sync(file, iocb->ki_pos - ret, ret); + if (err < 0) ret = err; } return ret; -- cgit v1.1 From c9efe51165fa0aff57be54e3cb0201ac87f68980 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Feb 2014 07:05:05 -0500 Subject: fix a kmap leak in virtio_console While we are at it, don't do kmap() under kmap_atomic(), *especially* for a page we'd allocated with GFP_KERNEL. It's spelled "page_address", and had that been more than that, we'd have a real trouble - kmap_high() can block, and doing that while holding kmap_atomic() is a Bad Idea(tm). Signed-off-by: Al Viro --- drivers/char/virtio_console.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index feea87c..6928d09 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -890,12 +890,10 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf, } else { /* Failback to copying a page */ struct page *page = alloc_page(GFP_KERNEL); - char *src = buf->ops->map(pipe, buf, 1); - char *dst; + char *src; if (!page) return -ENOMEM; - dst = kmap(page); offset = sd->pos & ~PAGE_MASK; @@ -903,9 +901,8 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf, if (len + offset > PAGE_SIZE) len = PAGE_SIZE - offset; - memcpy(dst + offset, src + buf->offset, len); - - kunmap(page); + src = buf->ops->map(pipe, buf, 1); + memcpy(page_address(page) + offset, src + buf->offset, len); buf->ops->unmap(pipe, buf, src); sg_set_page(&(sgl->sg[sgl->n]), page, len, offset); -- cgit v1.1 From b28a960c42fcd9cfc987441fa6d1c1a471f0f9ed Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Feb 2014 18:15:47 -0800 Subject: Linux 3.14-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 606ef7c..933e1de 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Shuffling Zombie Juror # *DOCUMENTATION* -- cgit v1.1 From c55d016f7a930dd1c995336017123b469a8c8f5a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 14 Feb 2014 08:24:24 +0100 Subject: x86/efi: Fix 32-bit fallout We do not enable the new efi memmap on 32-bit and thus we need to run runtime_code_page_mkexec() unconditionally there. Fix that. Reported-and-tested-by: Lejun Zhu Signed-off-by: Borislav Petkov Signed-off-by: Matt Fleming --- arch/x86/include/asm/efi.h | 2 ++ arch/x86/platform/efi/efi.c | 5 ++--- arch/x86/platform/efi/efi_32.c | 6 ++++++ arch/x86/platform/efi/efi_64.c | 9 +++++++++ 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 3b978c4..3d6b9f8 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -132,6 +132,8 @@ extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); extern void efi_setup_page_tables(void); extern void __init old_map_region(efi_memory_desc_t *md); +extern void __init runtime_code_page_mkexec(void); +extern void __init efi_runtime_mkexec(void); struct efi_setup_data { u64 fw_vendor; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index d62ec87..1a201ac 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -792,7 +792,7 @@ void __init efi_set_executable(efi_memory_desc_t *md, bool executable) set_memory_nx(addr, npages); } -static void __init runtime_code_page_mkexec(void) +void __init runtime_code_page_mkexec(void) { efi_memory_desc_t *md; void *p; @@ -1069,8 +1069,7 @@ void __init efi_enter_virtual_mode(void) efi.update_capsule = virt_efi_update_capsule; efi.query_capsule_caps = virt_efi_query_capsule_caps; - if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) - runtime_code_page_mkexec(); + efi_runtime_mkexec(); kfree(new_memmap); diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 249b183..0b74cdf 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -77,3 +77,9 @@ void efi_call_phys_epilog(void) local_irq_restore(efi_rt_eflags); } + +void __init efi_runtime_mkexec(void) +{ + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); +} diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 6284f15..0c2a234 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -233,3 +233,12 @@ void __init parse_efi_setup(u64 phys_addr, u32 data_len) { efi_setup = phys_addr + sizeof(struct setup_data); } + +void __init efi_runtime_mkexec(void) +{ + if (!efi_enabled(EFI_OLD_MEMMAP)) + return; + + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); +} -- cgit v1.1 From 09503379dc99535b1bbfa51aa1aeef340f5d82ec Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 13 Feb 2014 17:17:54 +0000 Subject: x86/efi: Check status field to validate BGRT header Madper reported seeing the following crash, BUG: unable to handle kernel paging request at ffffffffff340003 IP: [] efi_bgrt_init+0x9d/0x133 Call Trace: [] efi_late_init+0x9/0xb [] start_kernel+0x436/0x450 [] ? repair_env_string+0x5c/0x5c [] ? early_idt_handlers+0x120/0x120 [] x86_64_start_reservations+0x2a/0x2c [] x86_64_start_kernel+0x13e/0x14d This is caused because the layout of the ACPI BGRT header on this system doesn't match the definition from the ACPI spec, and so we get a bogus physical address when dereferencing ->image_address in efi_bgrt_init(). Luckily the status field in the BGRT header clearly marks it as invalid, so we can check that field and skip BGRT initialisation. Reported-by: Madper Xie Suggested-by: Toshi Kani Cc: "Rafael J. Wysocki" Cc: Matthew Garrett Cc: Josh Triplett Signed-off-by: Matt Fleming --- arch/x86/platform/efi/efi-bgrt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index 4df9591..f15103d 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -42,7 +42,7 @@ void __init efi_bgrt_init(void) if (bgrt_tab->header.length < sizeof(*bgrt_tab)) return; - if (bgrt_tab->version != 1) + if (bgrt_tab->version != 1 || bgrt_tab->status != 1) return; if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) return; -- cgit v1.1