diff options
author | jhb <jhb@FreeBSD.org> | 2016-02-10 00:08:51 +0000 |
---|---|---|
committer | jhb <jhb@FreeBSD.org> | 2016-02-10 00:08:51 +0000 |
commit | cb56e836d9a9db908a5f1e021e6483e1e8aab048 (patch) | |
tree | f465785d769592288d951ba36ff13f6bf757cd2a | |
parent | f1429e95c359a7e76f8385cdd47f887b7d1bbd0e (diff) | |
download | FreeBSD-src-cb56e836d9a9db908a5f1e021e6483e1e8aab048.zip FreeBSD-src-cb56e836d9a9db908a5f1e021e6483e1e8aab048.tar.gz |
MFC 287442,287537,288944:
Fix corruption of coredumps due to procstat notes changing size during
coredump generation. The changes in r287442 required some reworking
since the 'fo_fill_kinfo' file op does not exist in stable/10.
287442:
Detect badly behaved coredump note helpers
Coredump notes depend on being able to invoke dump routines twice; once
in a dry-run mode to get the size of the note, and another to actually
emit the note to the corefile.
When a note helper emits a different length section the second time
around than the length it requested the first time, the kernel produces
a corrupt coredump.
NT_PROCSTAT_FILES output length, when packing kinfo structs, is tied to
the length of filenames corresponding to vnodes in the process' fd table
via vn_fullpath. As vnodes may move around during dump, this is racy.
So:
- Detect badly behaved notes in putnote() and pad underfilled notes.
- Add a fail point, debug.fail_point.fill_kinfo_vnode__random_path to
exercise the NT_PROCSTAT_FILES corruption. It simply picks random
lengths to expand or truncate paths to in fo_fill_kinfo_vnode().
- Add a sysctl, kern.coredump_pack_fileinfo, to allow users to
disable kinfo packing for PROCSTAT_FILES notes. This should avoid
both FILES note corruption and truncation, even if filenames change,
at the cost of about 1 kiB in padding bloat per open fd. Document
the new sysctl in core.5.
- Fix note_procstat_files to self-limit in the 2nd pass. Since
sometimes this will result in a short write, pad up to our advertised
size. This addresses note corruption, at the risk of sometimes
truncating the last several fd info entries.
- Fix NT_PROCSTAT_FILES consumers libutil and libprocstat to grok the
zero padding.
287537:
Follow-up to r287442: Move sysctl to compiled-once file
Avoid duplicate sysctl nodes.
288944:
Fix core corruption caused by race in note_procstat_vmmap
This fix is spiritually similar to r287442 and was discovered thanks to
the KASSERT added in that revision.
NT_PROCSTAT_VMMAP output length, when packing kinfo structs, is tied to
the length of filenames corresponding to vnodes in the process' vm map
via vn_fullpath. As vnodes may move during coredump, this is racy.
We do not remove the race, only prevent it from causing coredump
corruption.
- Add a sysctl, kern.coredump_pack_vmmapinfo, to allow users to disable
kinfo packing for PROCSTAT_VMMAP notes. This avoids VMMAP corruption
and truncation, even if names change, at the cost of up to PATH_MAX
bytes per mapped object. The new sysctl is documented in core.5.
- Fix note_procstat_vmmap to self-limit in the second pass. This
addresses corruption, at the cost of sometimes producing a truncated
result.
- Fix PROCSTAT_VMMAP consumers libutil (and libprocstat, via copy-paste)
to grok the new zero padding.
Approved by: re (gjb)
-rw-r--r-- | lib/libprocstat/libprocstat.c | 8 | ||||
-rw-r--r-- | lib/libutil/kinfo_getfile.c | 4 | ||||
-rw-r--r-- | lib/libutil/kinfo_getvmmap.c | 4 | ||||
-rw-r--r-- | share/man/man5/core.5 | 25 | ||||
-rw-r--r-- | sys/kern/imgact_elf.c | 77 | ||||
-rw-r--r-- | sys/kern/kern_descrip.c | 40 | ||||
-rw-r--r-- | sys/kern/kern_exec.c | 10 | ||||
-rw-r--r-- | sys/kern/kern_proc.c | 23 | ||||
-rw-r--r-- | sys/sys/exec.h | 3 | ||||
-rw-r--r-- | sys/sys/user.h | 11 |
10 files changed, 181 insertions, 24 deletions
diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c index 727419b..7b0722c 100644 --- a/lib/libprocstat/libprocstat.c +++ b/lib/libprocstat/libprocstat.c @@ -767,6 +767,8 @@ kinfo_getfile_core(struct procstat_core *core, int *cntp) eb = buf + len; while (bp < eb) { kf = (struct kinfo_file *)(uintptr_t)bp; + if (kf->kf_structsize == 0) + break; bp += kf->kf_structsize; cnt++; } @@ -782,6 +784,8 @@ kinfo_getfile_core(struct procstat_core *core, int *cntp) /* Pass 2: unpack */ while (bp < eb) { kf = (struct kinfo_file *)(uintptr_t)bp; + if (kf->kf_structsize == 0) + break; /* Copy/expand into pre-zeroed buffer */ memcpy(kp, kf, kf->kf_structsize); /* Advance to next packed record */ @@ -1863,6 +1867,8 @@ kinfo_getvmmap_core(struct procstat_core *core, int *cntp) eb = buf + len; while (bp < eb) { kv = (struct kinfo_vmentry *)(uintptr_t)bp; + if (kv->kve_structsize == 0) + break; bp += kv->kve_structsize; cnt++; } @@ -1878,6 +1884,8 @@ kinfo_getvmmap_core(struct procstat_core *core, int *cntp) /* Pass 2: unpack */ while (bp < eb) { kv = (struct kinfo_vmentry *)(uintptr_t)bp; + if (kv->kve_structsize == 0) + break; /* Copy/expand into pre-zeroed buffer */ memcpy(kp, kv, kv->kve_structsize); /* Advance to next packed record */ diff --git a/lib/libutil/kinfo_getfile.c b/lib/libutil/kinfo_getfile.c index 84b64db..8a5477f 100644 --- a/lib/libutil/kinfo_getfile.c +++ b/lib/libutil/kinfo_getfile.c @@ -44,6 +44,8 @@ kinfo_getfile(pid_t pid, int *cntp) eb = buf + len; while (bp < eb) { kf = (struct kinfo_file *)(uintptr_t)bp; + if (kf->kf_structsize == 0) + break; bp += kf->kf_structsize; cnt++; } @@ -59,6 +61,8 @@ kinfo_getfile(pid_t pid, int *cntp) /* Pass 2: unpack */ while (bp < eb) { kf = (struct kinfo_file *)(uintptr_t)bp; + if (kf->kf_structsize == 0) + break; /* Copy/expand into pre-zeroed buffer */ memcpy(kp, kf, kf->kf_structsize); /* Advance to next packed record */ diff --git a/lib/libutil/kinfo_getvmmap.c b/lib/libutil/kinfo_getvmmap.c index 129aa03..9d9e427 100644 --- a/lib/libutil/kinfo_getvmmap.c +++ b/lib/libutil/kinfo_getvmmap.c @@ -44,6 +44,8 @@ kinfo_getvmmap(pid_t pid, int *cntp) eb = buf + len; while (bp < eb) { kv = (struct kinfo_vmentry *)(uintptr_t)bp; + if (kv->kve_structsize == 0) + break; bp += kv->kve_structsize; cnt++; } @@ -59,6 +61,8 @@ kinfo_getvmmap(pid_t pid, int *cntp) /* Pass 2: unpack */ while (bp < eb) { kv = (struct kinfo_vmentry *)(uintptr_t)bp; + if (kv->kve_structsize == 0) + break; /* Copy/expand into pre-zeroed buffer */ memcpy(kp, kv, kv->kve_structsize); /* Advance to next packed record */ diff --git a/share/man/man5/core.5 b/share/man/man5/core.5 index e8a1653..a5fefc0 100644 --- a/share/man/man5/core.5 +++ b/share/man/man5/core.5 @@ -32,7 +32,7 @@ .\" @(#)core.5 8.3 (Berkeley) 12/11/93 .\" $FreeBSD$ .\" -.Dd November 22, 2012 +.Dd October 5, 2015 .Dt CORE 5 .Os .Sh NAME @@ -126,6 +126,29 @@ Core files will have the suffix .Em .gz appended to them. .El +.Sh NOTES +Corefiles are written with open file descriptor information as an ELF note. +By default, file paths are packed to only use as much space as needed. +However, file paths can change at any time, including during core dump, +and this can result in truncated file descriptor data. +.Pp +All file descriptor information can be preserved by disabling packing. +This potentially wastes up to PATH_MAX bytes per open fd. +Packing is disabled with +.Dl sysctl kern.coredump_pack_fileinfo=0 . +.Pp +Similarly, corefiles are written with vmmap information as an ELF note, which +contains file paths. +By default, they are packed to only use as much space as +needed. +By the same mechanism as for the open files note, these paths can also +change at any time and result in a truncated note. +.Pp +All vmmap information can be preserved by disabling packing. +Like the file information, this potentially wastes up to PATH_MAX bytes per +mapped object. +Packing is disabled with +.Dl sysctl kern.coredump_pack_vmmapinfo=0 . .Sh EXAMPLES In order to store all core images in per-user private areas under .Pa /var/coredumps , diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 394d61f..e3c08de 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1709,7 +1709,8 @@ static void __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb) { Elf_Note note; - ssize_t old_len; + ssize_t old_len, sect_len; + size_t new_len, descsz, i; if (ninfo->type == -1) { ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); @@ -1728,7 +1729,33 @@ __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb) return; sbuf_start_section(sb, &old_len); ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); - sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); + sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); + if (sect_len < 0) + return; + + new_len = (size_t)sect_len; + descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE); + if (new_len < descsz) { + /* + * It is expected that individual note emitters will correctly + * predict their expected output size and fill up to that size + * themselves, padding in a format-specific way if needed. + * However, in case they don't, just do it here with zeros. + */ + for (i = 0; i < descsz - new_len; i++) + sbuf_putc(sb, 0); + } else if (new_len > descsz) { + /* + * We can't always truncate sb -- we may have drained some + * of it already. + */ + KASSERT(new_len == descsz, ("%s: Note type %u changed as we " + "read it (%zu > %zu). Since it is longer than " + "expected, this coredump's notes are corrupt. THIS " + "IS A BUG in the note_procstat routine for type %u.\n", + __func__, (unsigned)note.n_type, new_len, descsz, + (unsigned)note.n_type)); + } } /* @@ -1909,25 +1936,47 @@ static void note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; - size_t size; - int structsize; + size_t size, sect_sz, i; + ssize_t start_len, sect_len; + int structsize, filedesc_flags; + + if (coredump_pack_fileinfo) + filedesc_flags = KERN_FILEDESC_PACK_KINFO; + else + filedesc_flags = 0; p = (struct proc *)arg; + structsize = sizeof(struct kinfo_file); if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_count, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); - kern_proc_filedesc_out(p, sb, -1); + kern_proc_filedesc_out(p, sb, -1, filedesc_flags); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { - structsize = sizeof(struct kinfo_file); + sbuf_start_section(sb, &start_len); + sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); - kern_proc_filedesc_out(p, sb, -1); + kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize), + filedesc_flags); + + sect_len = sbuf_end_section(sb, start_len, 0, 0); + if (sect_len < 0) + return; + sect_sz = sect_len; + + KASSERT(sect_sz <= *sizep, + ("kern_proc_filedesc_out did not respect maxlen; " + "requested %zu, got %zu", *sizep - sizeof(structsize), + sect_sz - sizeof(structsize))); + + for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++) + sbuf_putc(sb, 0); } } @@ -1940,24 +1989,30 @@ note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; - int structsize; + int structsize, vmmap_flags; + + if (coredump_pack_vmmapinfo) + vmmap_flags = KERN_VMMAP_PACK_KINFO; + else + vmmap_flags = 0; p = (struct proc *)arg; + structsize = sizeof(struct kinfo_vmentry); if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_count, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); - kern_proc_vmmap_out(p, sb); + kern_proc_vmmap_out(p, sb, -1, vmmap_flags); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { - structsize = sizeof(struct kinfo_vmentry); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); - kern_proc_vmmap_out(p, sb); + kern_proc_vmmap_out(p, sb, *sizep - sizeof(structsize), + vmmap_flags); } } diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 4930e2f..7561368 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <sys/capsicum.h> #include <sys/conf.h> #include <sys/domain.h> +#include <sys/fail.h> #include <sys/fcntl.h> #include <sys/file.h> #include <sys/filedesc.h> @@ -3299,6 +3300,7 @@ struct export_fd_buf { struct sbuf *sb; ssize_t remainder; struct kinfo_file kif; + int flags; }; static int @@ -3385,9 +3387,12 @@ export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt, kif->kf_type = type; kif->kf_ref_count = refcnt; kif->kf_offset = offset; - /* Pack record size down */ - kif->kf_structsize = offsetof(struct kinfo_file, kf_path) + - strlen(kif->kf_path) + 1; + if ((efbuf->flags & KERN_FILEDESC_PACK_KINFO) != 0) + /* Pack record size down */ + kif->kf_structsize = offsetof(struct kinfo_file, kf_path) + + strlen(kif->kf_path) + 1; + else + kif->kf_structsize = sizeof(*kif); kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t)); if (efbuf->remainder != -1) { if (efbuf->remainder < kif->kf_structsize) { @@ -3413,7 +3418,8 @@ export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt, * Takes a locked proc as argument, and returns with the proc unlocked. */ int -kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) +kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, + int flags) { struct file *fp; struct filedesc *fdp; @@ -3448,6 +3454,7 @@ kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) efbuf->fdp = NULL; efbuf->sb = sb; efbuf->remainder = maxlen; + efbuf->flags = flags; if (tracevp != NULL) export_fd_to_sb(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE, FREAD | FWRITE, -1, -1, NULL, efbuf); @@ -3597,7 +3604,8 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) return (error); } maxlen = req->oldptr != NULL ? req->oldlen : -1; - error = kern_proc_filedesc_out(p, &sb, maxlen); + error = kern_proc_filedesc_out(p, &sb, maxlen, + KERN_FILEDESC_PACK_KINFO); error2 = sbuf_finish(&sb); sbuf_delete(&sb); return (error != 0 ? error : error2); @@ -3635,6 +3643,24 @@ vntype_to_kinfo(int vtype) return (KF_VTYPE_UNKNOWN); } +static inline void +vn_fill_junk(struct kinfo_file *kif) +{ + size_t len, olen; + + /* + * Simulate vn_fullpath returning changing values for a given + * vp during e.g. coredump. + */ + len = (arc4random() % (sizeof(kif->kf_path) - 2)) + 1; + olen = strlen(kif->kf_path); + if (len < olen) + strcpy(&kif->kf_path[len - 1], "$"); + else + for (; olen < len; olen++) + strcpy(&kif->kf_path[olen], "A"); +} + static int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif) { @@ -3654,6 +3680,10 @@ fill_vnode_info(struct vnode *vp, struct kinfo_file *kif) if (freepath != NULL) free(freepath, M_TEMP); + KFAIL_POINT_CODE(DEBUG_FP, fill_kinfo_vnode__random_path, + vn_fill_junk(kif); + ); + /* * Retrieve vnode attributes. */ diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index f82ffec..47ea9b0 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -102,6 +102,16 @@ SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *"); MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); +int coredump_pack_fileinfo = 1; +SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN, + &coredump_pack_fileinfo, 0, + "Enable file path packing in 'procstat -f' coredump notes"); + +int coredump_pack_vmmapinfo = 1; +SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN, + &coredump_pack_vmmapinfo, 0, + "Enable file path packing in 'procstat -v' coredump notes"); + static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 6b60840..cfaa354 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -2228,7 +2228,7 @@ next:; * Must be called with the process locked and will return unlocked. */ int -kern_proc_vmmap_out(struct proc *p, struct sbuf *sb) +kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags) { vm_map_entry_t entry, tmp_entry; struct vattr va; @@ -2252,7 +2252,7 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb) PRELE(p); return (ESRCH); } - kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK); + kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO); error = 0; map = &vm->vm_map; @@ -2387,10 +2387,23 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb) free(freepath, M_TEMP); /* Pack record size down */ - kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) + - strlen(kve->kve_path) + 1; + if ((flags & KERN_VMMAP_PACK_KINFO) != 0) + kve->kve_structsize = + offsetof(struct kinfo_vmentry, kve_path) + + strlen(kve->kve_path) + 1; + else + kve->kve_structsize = sizeof(*kve); kve->kve_structsize = roundup(kve->kve_structsize, sizeof(uint64_t)); + + /* Halt filling and truncate rather than exceeding maxlen */ + if (maxlen != -1 && maxlen < kve->kve_structsize) { + error = 0; + vm_map_lock_read(map); + break; + } else if (maxlen != -1) + maxlen -= kve->kve_structsize; + if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0) error = ENOMEM; vm_map_lock_read(map); @@ -2422,7 +2435,7 @@ sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS) sbuf_delete(&sb); return (error); } - error = kern_proc_vmmap_out(p, &sb); + error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO); error2 = sbuf_finish(&sb); sbuf_delete(&sb); return (error != 0 ? error : error2); diff --git a/sys/sys/exec.h b/sys/sys/exec.h index cccebfd..0ccba24 100644 --- a/sys/sys/exec.h +++ b/sys/sys/exec.h @@ -83,6 +83,9 @@ void exec_unmap_first_page(struct image_params *); int exec_register(const struct execsw *); int exec_unregister(const struct execsw *); +extern int coredump_pack_fileinfo; +extern int coredump_pack_vmmapinfo; + /* * note: name##_mod cannot be const storage because the * linker_file_sysinit() function modifies _file in the diff --git a/sys/sys/user.h b/sys/sys/user.h index 9374daf..a697b66 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -536,6 +536,11 @@ struct kinfo_sigtramp { #define KERN_PROC_NOTHREADS 0x1 #define KERN_PROC_MASK32 0x2 +/* Flags for kern_proc_filedesc_out. */ +#define KERN_FILEDESC_PACK_KINFO 0x00000001U + +/* Flags for kern_proc_vmmap_out. */ +#define KERN_VMMAP_PACK_KINFO 0x00000001U struct sbuf; /* @@ -547,9 +552,11 @@ struct sbuf; * to be locked on enter. On return the process is unlocked. */ -int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen); +int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, + int flags); int kern_proc_out(struct proc *p, struct sbuf *sb, int flags); -int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb); +int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, + int flags); int vntype_to_kinfo(int vtype); #endif /* !_KERNEL */ |