summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2016-02-10 00:08:51 +0000
committerjhb <jhb@FreeBSD.org>2016-02-10 00:08:51 +0000
commitcb56e836d9a9db908a5f1e021e6483e1e8aab048 (patch)
treef465785d769592288d951ba36ff13f6bf757cd2a /sys/kern
parentf1429e95c359a7e76f8385cdd47f887b7d1bbd0e (diff)
downloadFreeBSD-src-cb56e836d9a9db908a5f1e021e6483e1e8aab048.zip
FreeBSD-src-cb56e836d9a9db908a5f1e021e6483e1e8aab048.tar.gz
MFC 287442,287537,288944:
Fix corruption of coredumps due to procstat notes changing size during coredump generation. The changes in r287442 required some reworking since the 'fo_fill_kinfo' file op does not exist in stable/10. 287442: Detect badly behaved coredump note helpers Coredump notes depend on being able to invoke dump routines twice; once in a dry-run mode to get the size of the note, and another to actually emit the note to the corefile. When a note helper emits a different length section the second time around than the length it requested the first time, the kernel produces a corrupt coredump. NT_PROCSTAT_FILES output length, when packing kinfo structs, is tied to the length of filenames corresponding to vnodes in the process' fd table via vn_fullpath. As vnodes may move around during dump, this is racy. So: - Detect badly behaved notes in putnote() and pad underfilled notes. - Add a fail point, debug.fail_point.fill_kinfo_vnode__random_path to exercise the NT_PROCSTAT_FILES corruption. It simply picks random lengths to expand or truncate paths to in fo_fill_kinfo_vnode(). - Add a sysctl, kern.coredump_pack_fileinfo, to allow users to disable kinfo packing for PROCSTAT_FILES notes. This should avoid both FILES note corruption and truncation, even if filenames change, at the cost of about 1 kiB in padding bloat per open fd. Document the new sysctl in core.5. - Fix note_procstat_files to self-limit in the 2nd pass. Since sometimes this will result in a short write, pad up to our advertised size. This addresses note corruption, at the risk of sometimes truncating the last several fd info entries. - Fix NT_PROCSTAT_FILES consumers libutil and libprocstat to grok the zero padding. 287537: Follow-up to r287442: Move sysctl to compiled-once file Avoid duplicate sysctl nodes. 288944: Fix core corruption caused by race in note_procstat_vmmap This fix is spiritually similar to r287442 and was discovered thanks to the KASSERT added in that revision. NT_PROCSTAT_VMMAP output length, when packing kinfo structs, is tied to the length of filenames corresponding to vnodes in the process' vm map via vn_fullpath. As vnodes may move during coredump, this is racy. We do not remove the race, only prevent it from causing coredump corruption. - Add a sysctl, kern.coredump_pack_vmmapinfo, to allow users to disable kinfo packing for PROCSTAT_VMMAP notes. This avoids VMMAP corruption and truncation, even if names change, at the cost of up to PATH_MAX bytes per mapped object. The new sysctl is documented in core.5. - Fix note_procstat_vmmap to self-limit in the second pass. This addresses corruption, at the cost of sometimes producing a truncated result. - Fix PROCSTAT_VMMAP consumers libutil (and libprocstat, via copy-paste) to grok the new zero padding. Approved by: re (gjb)
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/imgact_elf.c77
-rw-r--r--sys/kern/kern_descrip.c40
-rw-r--r--sys/kern/kern_exec.c10
-rw-r--r--sys/kern/kern_proc.c23
4 files changed, 129 insertions, 21 deletions
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 394d61f..e3c08de 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -1709,7 +1709,8 @@ static void
__elfN(putnote)(struct note_info *ninfo, struct sbuf *sb)
{
Elf_Note note;
- ssize_t old_len;
+ ssize_t old_len, sect_len;
+ size_t new_len, descsz, i;
if (ninfo->type == -1) {
ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
@@ -1728,7 +1729,33 @@ __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb)
return;
sbuf_start_section(sb, &old_len);
ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
- sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
+ sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
+ if (sect_len < 0)
+ return;
+
+ new_len = (size_t)sect_len;
+ descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE);
+ if (new_len < descsz) {
+ /*
+ * It is expected that individual note emitters will correctly
+ * predict their expected output size and fill up to that size
+ * themselves, padding in a format-specific way if needed.
+ * However, in case they don't, just do it here with zeros.
+ */
+ for (i = 0; i < descsz - new_len; i++)
+ sbuf_putc(sb, 0);
+ } else if (new_len > descsz) {
+ /*
+ * We can't always truncate sb -- we may have drained some
+ * of it already.
+ */
+ KASSERT(new_len == descsz, ("%s: Note type %u changed as we "
+ "read it (%zu > %zu). Since it is longer than "
+ "expected, this coredump's notes are corrupt. THIS "
+ "IS A BUG in the note_procstat routine for type %u.\n",
+ __func__, (unsigned)note.n_type, new_len, descsz,
+ (unsigned)note.n_type));
+ }
}
/*
@@ -1909,25 +1936,47 @@ static void
note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep)
{
struct proc *p;
- size_t size;
- int structsize;
+ size_t size, sect_sz, i;
+ ssize_t start_len, sect_len;
+ int structsize, filedesc_flags;
+
+ if (coredump_pack_fileinfo)
+ filedesc_flags = KERN_FILEDESC_PACK_KINFO;
+ else
+ filedesc_flags = 0;
p = (struct proc *)arg;
+ structsize = sizeof(struct kinfo_file);
if (sb == NULL) {
size = 0;
sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
sbuf_set_drain(sb, sbuf_drain_count, &size);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
- kern_proc_filedesc_out(p, sb, -1);
+ kern_proc_filedesc_out(p, sb, -1, filedesc_flags);
sbuf_finish(sb);
sbuf_delete(sb);
*sizep = size;
} else {
- structsize = sizeof(struct kinfo_file);
+ sbuf_start_section(sb, &start_len);
+
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
- kern_proc_filedesc_out(p, sb, -1);
+ kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize),
+ filedesc_flags);
+
+ sect_len = sbuf_end_section(sb, start_len, 0, 0);
+ if (sect_len < 0)
+ return;
+ sect_sz = sect_len;
+
+ KASSERT(sect_sz <= *sizep,
+ ("kern_proc_filedesc_out did not respect maxlen; "
+ "requested %zu, got %zu", *sizep - sizeof(structsize),
+ sect_sz - sizeof(structsize)));
+
+ for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++)
+ sbuf_putc(sb, 0);
}
}
@@ -1940,24 +1989,30 @@ note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep)
{
struct proc *p;
size_t size;
- int structsize;
+ int structsize, vmmap_flags;
+
+ if (coredump_pack_vmmapinfo)
+ vmmap_flags = KERN_VMMAP_PACK_KINFO;
+ else
+ vmmap_flags = 0;
p = (struct proc *)arg;
+ structsize = sizeof(struct kinfo_vmentry);
if (sb == NULL) {
size = 0;
sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
sbuf_set_drain(sb, sbuf_drain_count, &size);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
- kern_proc_vmmap_out(p, sb);
+ kern_proc_vmmap_out(p, sb, -1, vmmap_flags);
sbuf_finish(sb);
sbuf_delete(sb);
*sizep = size;
} else {
- structsize = sizeof(struct kinfo_vmentry);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
- kern_proc_vmmap_out(p, sb);
+ kern_proc_vmmap_out(p, sb, *sizep - sizeof(structsize),
+ vmmap_flags);
}
}
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 4930e2f..7561368 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/capsicum.h>
#include <sys/conf.h>
#include <sys/domain.h>
+#include <sys/fail.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
@@ -3299,6 +3300,7 @@ struct export_fd_buf {
struct sbuf *sb;
ssize_t remainder;
struct kinfo_file kif;
+ int flags;
};
static int
@@ -3385,9 +3387,12 @@ export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt,
kif->kf_type = type;
kif->kf_ref_count = refcnt;
kif->kf_offset = offset;
- /* Pack record size down */
- kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
- strlen(kif->kf_path) + 1;
+ if ((efbuf->flags & KERN_FILEDESC_PACK_KINFO) != 0)
+ /* Pack record size down */
+ kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
+ strlen(kif->kf_path) + 1;
+ else
+ kif->kf_structsize = sizeof(*kif);
kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
if (efbuf->remainder != -1) {
if (efbuf->remainder < kif->kf_structsize) {
@@ -3413,7 +3418,8 @@ export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt,
* Takes a locked proc as argument, and returns with the proc unlocked.
*/
int
-kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
+kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
+ int flags)
{
struct file *fp;
struct filedesc *fdp;
@@ -3448,6 +3454,7 @@ kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
efbuf->fdp = NULL;
efbuf->sb = sb;
efbuf->remainder = maxlen;
+ efbuf->flags = flags;
if (tracevp != NULL)
export_fd_to_sb(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE,
FREAD | FWRITE, -1, -1, NULL, efbuf);
@@ -3597,7 +3604,8 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
return (error);
}
maxlen = req->oldptr != NULL ? req->oldlen : -1;
- error = kern_proc_filedesc_out(p, &sb, maxlen);
+ error = kern_proc_filedesc_out(p, &sb, maxlen,
+ KERN_FILEDESC_PACK_KINFO);
error2 = sbuf_finish(&sb);
sbuf_delete(&sb);
return (error != 0 ? error : error2);
@@ -3635,6 +3643,24 @@ vntype_to_kinfo(int vtype)
return (KF_VTYPE_UNKNOWN);
}
+static inline void
+vn_fill_junk(struct kinfo_file *kif)
+{
+ size_t len, olen;
+
+ /*
+ * Simulate vn_fullpath returning changing values for a given
+ * vp during e.g. coredump.
+ */
+ len = (arc4random() % (sizeof(kif->kf_path) - 2)) + 1;
+ olen = strlen(kif->kf_path);
+ if (len < olen)
+ strcpy(&kif->kf_path[len - 1], "$");
+ else
+ for (; olen < len; olen++)
+ strcpy(&kif->kf_path[olen], "A");
+}
+
static int
fill_vnode_info(struct vnode *vp, struct kinfo_file *kif)
{
@@ -3654,6 +3680,10 @@ fill_vnode_info(struct vnode *vp, struct kinfo_file *kif)
if (freepath != NULL)
free(freepath, M_TEMP);
+ KFAIL_POINT_CODE(DEBUG_FP, fill_kinfo_vnode__random_path,
+ vn_fill_junk(kif);
+ );
+
/*
* Retrieve vnode attributes.
*/
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index f82ffec..47ea9b0 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -102,6 +102,16 @@ SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *");
MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
+int coredump_pack_fileinfo = 1;
+SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN,
+ &coredump_pack_fileinfo, 0,
+ "Enable file path packing in 'procstat -f' coredump notes");
+
+int coredump_pack_vmmapinfo = 1;
+SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN,
+ &coredump_pack_vmmapinfo, 0,
+ "Enable file path packing in 'procstat -v' coredump notes");
+
static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 6b60840..cfaa354 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -2228,7 +2228,7 @@ next:;
* Must be called with the process locked and will return unlocked.
*/
int
-kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
+kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags)
{
vm_map_entry_t entry, tmp_entry;
struct vattr va;
@@ -2252,7 +2252,7 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
PRELE(p);
return (ESRCH);
}
- kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
+ kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO);
error = 0;
map = &vm->vm_map;
@@ -2387,10 +2387,23 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
free(freepath, M_TEMP);
/* Pack record size down */
- kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) +
- strlen(kve->kve_path) + 1;
+ if ((flags & KERN_VMMAP_PACK_KINFO) != 0)
+ kve->kve_structsize =
+ offsetof(struct kinfo_vmentry, kve_path) +
+ strlen(kve->kve_path) + 1;
+ else
+ kve->kve_structsize = sizeof(*kve);
kve->kve_structsize = roundup(kve->kve_structsize,
sizeof(uint64_t));
+
+ /* Halt filling and truncate rather than exceeding maxlen */
+ if (maxlen != -1 && maxlen < kve->kve_structsize) {
+ error = 0;
+ vm_map_lock_read(map);
+ break;
+ } else if (maxlen != -1)
+ maxlen -= kve->kve_structsize;
+
if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
error = ENOMEM;
vm_map_lock_read(map);
@@ -2422,7 +2435,7 @@ sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
sbuf_delete(&sb);
return (error);
}
- error = kern_proc_vmmap_out(p, &sb);
+ error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO);
error2 = sbuf_finish(&sb);
sbuf_delete(&sb);
return (error != 0 ? error : error2);
OpenPOWER on IntegriCloud