summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2016-02-10 00:08:51 +0000
committerjhb <jhb@FreeBSD.org>2016-02-10 00:08:51 +0000
commitcb56e836d9a9db908a5f1e021e6483e1e8aab048 (patch)
treef465785d769592288d951ba36ff13f6bf757cd2a
parentf1429e95c359a7e76f8385cdd47f887b7d1bbd0e (diff)
downloadFreeBSD-src-cb56e836d9a9db908a5f1e021e6483e1e8aab048.zip
FreeBSD-src-cb56e836d9a9db908a5f1e021e6483e1e8aab048.tar.gz
MFC 287442,287537,288944:
Fix corruption of coredumps due to procstat notes changing size during coredump generation. The changes in r287442 required some reworking since the 'fo_fill_kinfo' file op does not exist in stable/10. 287442: Detect badly behaved coredump note helpers Coredump notes depend on being able to invoke dump routines twice; once in a dry-run mode to get the size of the note, and another to actually emit the note to the corefile. When a note helper emits a different length section the second time around than the length it requested the first time, the kernel produces a corrupt coredump. NT_PROCSTAT_FILES output length, when packing kinfo structs, is tied to the length of filenames corresponding to vnodes in the process' fd table via vn_fullpath. As vnodes may move around during dump, this is racy. So: - Detect badly behaved notes in putnote() and pad underfilled notes. - Add a fail point, debug.fail_point.fill_kinfo_vnode__random_path to exercise the NT_PROCSTAT_FILES corruption. It simply picks random lengths to expand or truncate paths to in fo_fill_kinfo_vnode(). - Add a sysctl, kern.coredump_pack_fileinfo, to allow users to disable kinfo packing for PROCSTAT_FILES notes. This should avoid both FILES note corruption and truncation, even if filenames change, at the cost of about 1 kiB in padding bloat per open fd. Document the new sysctl in core.5. - Fix note_procstat_files to self-limit in the 2nd pass. Since sometimes this will result in a short write, pad up to our advertised size. This addresses note corruption, at the risk of sometimes truncating the last several fd info entries. - Fix NT_PROCSTAT_FILES consumers libutil and libprocstat to grok the zero padding. 287537: Follow-up to r287442: Move sysctl to compiled-once file Avoid duplicate sysctl nodes. 288944: Fix core corruption caused by race in note_procstat_vmmap This fix is spiritually similar to r287442 and was discovered thanks to the KASSERT added in that revision. NT_PROCSTAT_VMMAP output length, when packing kinfo structs, is tied to the length of filenames corresponding to vnodes in the process' vm map via vn_fullpath. As vnodes may move during coredump, this is racy. We do not remove the race, only prevent it from causing coredump corruption. - Add a sysctl, kern.coredump_pack_vmmapinfo, to allow users to disable kinfo packing for PROCSTAT_VMMAP notes. This avoids VMMAP corruption and truncation, even if names change, at the cost of up to PATH_MAX bytes per mapped object. The new sysctl is documented in core.5. - Fix note_procstat_vmmap to self-limit in the second pass. This addresses corruption, at the cost of sometimes producing a truncated result. - Fix PROCSTAT_VMMAP consumers libutil (and libprocstat, via copy-paste) to grok the new zero padding. Approved by: re (gjb)
-rw-r--r--lib/libprocstat/libprocstat.c8
-rw-r--r--lib/libutil/kinfo_getfile.c4
-rw-r--r--lib/libutil/kinfo_getvmmap.c4
-rw-r--r--share/man/man5/core.525
-rw-r--r--sys/kern/imgact_elf.c77
-rw-r--r--sys/kern/kern_descrip.c40
-rw-r--r--sys/kern/kern_exec.c10
-rw-r--r--sys/kern/kern_proc.c23
-rw-r--r--sys/sys/exec.h3
-rw-r--r--sys/sys/user.h11
10 files changed, 181 insertions, 24 deletions
diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c
index 727419b..7b0722c 100644
--- a/lib/libprocstat/libprocstat.c
+++ b/lib/libprocstat/libprocstat.c
@@ -767,6 +767,8 @@ kinfo_getfile_core(struct procstat_core *core, int *cntp)
eb = buf + len;
while (bp < eb) {
kf = (struct kinfo_file *)(uintptr_t)bp;
+ if (kf->kf_structsize == 0)
+ break;
bp += kf->kf_structsize;
cnt++;
}
@@ -782,6 +784,8 @@ kinfo_getfile_core(struct procstat_core *core, int *cntp)
/* Pass 2: unpack */
while (bp < eb) {
kf = (struct kinfo_file *)(uintptr_t)bp;
+ if (kf->kf_structsize == 0)
+ break;
/* Copy/expand into pre-zeroed buffer */
memcpy(kp, kf, kf->kf_structsize);
/* Advance to next packed record */
@@ -1863,6 +1867,8 @@ kinfo_getvmmap_core(struct procstat_core *core, int *cntp)
eb = buf + len;
while (bp < eb) {
kv = (struct kinfo_vmentry *)(uintptr_t)bp;
+ if (kv->kve_structsize == 0)
+ break;
bp += kv->kve_structsize;
cnt++;
}
@@ -1878,6 +1884,8 @@ kinfo_getvmmap_core(struct procstat_core *core, int *cntp)
/* Pass 2: unpack */
while (bp < eb) {
kv = (struct kinfo_vmentry *)(uintptr_t)bp;
+ if (kv->kve_structsize == 0)
+ break;
/* Copy/expand into pre-zeroed buffer */
memcpy(kp, kv, kv->kve_structsize);
/* Advance to next packed record */
diff --git a/lib/libutil/kinfo_getfile.c b/lib/libutil/kinfo_getfile.c
index 84b64db..8a5477f 100644
--- a/lib/libutil/kinfo_getfile.c
+++ b/lib/libutil/kinfo_getfile.c
@@ -44,6 +44,8 @@ kinfo_getfile(pid_t pid, int *cntp)
eb = buf + len;
while (bp < eb) {
kf = (struct kinfo_file *)(uintptr_t)bp;
+ if (kf->kf_structsize == 0)
+ break;
bp += kf->kf_structsize;
cnt++;
}
@@ -59,6 +61,8 @@ kinfo_getfile(pid_t pid, int *cntp)
/* Pass 2: unpack */
while (bp < eb) {
kf = (struct kinfo_file *)(uintptr_t)bp;
+ if (kf->kf_structsize == 0)
+ break;
/* Copy/expand into pre-zeroed buffer */
memcpy(kp, kf, kf->kf_structsize);
/* Advance to next packed record */
diff --git a/lib/libutil/kinfo_getvmmap.c b/lib/libutil/kinfo_getvmmap.c
index 129aa03..9d9e427 100644
--- a/lib/libutil/kinfo_getvmmap.c
+++ b/lib/libutil/kinfo_getvmmap.c
@@ -44,6 +44,8 @@ kinfo_getvmmap(pid_t pid, int *cntp)
eb = buf + len;
while (bp < eb) {
kv = (struct kinfo_vmentry *)(uintptr_t)bp;
+ if (kv->kve_structsize == 0)
+ break;
bp += kv->kve_structsize;
cnt++;
}
@@ -59,6 +61,8 @@ kinfo_getvmmap(pid_t pid, int *cntp)
/* Pass 2: unpack */
while (bp < eb) {
kv = (struct kinfo_vmentry *)(uintptr_t)bp;
+ if (kv->kve_structsize == 0)
+ break;
/* Copy/expand into pre-zeroed buffer */
memcpy(kp, kv, kv->kve_structsize);
/* Advance to next packed record */
diff --git a/share/man/man5/core.5 b/share/man/man5/core.5
index e8a1653..a5fefc0 100644
--- a/share/man/man5/core.5
+++ b/share/man/man5/core.5
@@ -32,7 +32,7 @@
.\" @(#)core.5 8.3 (Berkeley) 12/11/93
.\" $FreeBSD$
.\"
-.Dd November 22, 2012
+.Dd October 5, 2015
.Dt CORE 5
.Os
.Sh NAME
@@ -126,6 +126,29 @@ Core files will have the suffix
.Em .gz
appended to them.
.El
+.Sh NOTES
+Corefiles are written with open file descriptor information as an ELF note.
+By default, file paths are packed to only use as much space as needed.
+However, file paths can change at any time, including during core dump,
+and this can result in truncated file descriptor data.
+.Pp
+All file descriptor information can be preserved by disabling packing.
+This potentially wastes up to PATH_MAX bytes per open fd.
+Packing is disabled with
+.Dl sysctl kern.coredump_pack_fileinfo=0 .
+.Pp
+Similarly, corefiles are written with vmmap information as an ELF note, which
+contains file paths.
+By default, they are packed to only use as much space as
+needed.
+By the same mechanism as for the open files note, these paths can also
+change at any time and result in a truncated note.
+.Pp
+All vmmap information can be preserved by disabling packing.
+Like the file information, this potentially wastes up to PATH_MAX bytes per
+mapped object.
+Packing is disabled with
+.Dl sysctl kern.coredump_pack_vmmapinfo=0 .
.Sh EXAMPLES
In order to store all core images in per-user private areas under
.Pa /var/coredumps ,
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 394d61f..e3c08de 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -1709,7 +1709,8 @@ static void
__elfN(putnote)(struct note_info *ninfo, struct sbuf *sb)
{
Elf_Note note;
- ssize_t old_len;
+ ssize_t old_len, sect_len;
+ size_t new_len, descsz, i;
if (ninfo->type == -1) {
ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
@@ -1728,7 +1729,33 @@ __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb)
return;
sbuf_start_section(sb, &old_len);
ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
- sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
+ sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
+ if (sect_len < 0)
+ return;
+
+ new_len = (size_t)sect_len;
+ descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE);
+ if (new_len < descsz) {
+ /*
+ * It is expected that individual note emitters will correctly
+ * predict their expected output size and fill up to that size
+ * themselves, padding in a format-specific way if needed.
+ * However, in case they don't, just do it here with zeros.
+ */
+ for (i = 0; i < descsz - new_len; i++)
+ sbuf_putc(sb, 0);
+ } else if (new_len > descsz) {
+ /*
+ * We can't always truncate sb -- we may have drained some
+ * of it already.
+ */
+ KASSERT(new_len == descsz, ("%s: Note type %u changed as we "
+ "read it (%zu > %zu). Since it is longer than "
+ "expected, this coredump's notes are corrupt. THIS "
+ "IS A BUG in the note_procstat routine for type %u.\n",
+ __func__, (unsigned)note.n_type, new_len, descsz,
+ (unsigned)note.n_type));
+ }
}
/*
@@ -1909,25 +1936,47 @@ static void
note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep)
{
struct proc *p;
- size_t size;
- int structsize;
+ size_t size, sect_sz, i;
+ ssize_t start_len, sect_len;
+ int structsize, filedesc_flags;
+
+ if (coredump_pack_fileinfo)
+ filedesc_flags = KERN_FILEDESC_PACK_KINFO;
+ else
+ filedesc_flags = 0;
p = (struct proc *)arg;
+ structsize = sizeof(struct kinfo_file);
if (sb == NULL) {
size = 0;
sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
sbuf_set_drain(sb, sbuf_drain_count, &size);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
- kern_proc_filedesc_out(p, sb, -1);
+ kern_proc_filedesc_out(p, sb, -1, filedesc_flags);
sbuf_finish(sb);
sbuf_delete(sb);
*sizep = size;
} else {
- structsize = sizeof(struct kinfo_file);
+ sbuf_start_section(sb, &start_len);
+
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
- kern_proc_filedesc_out(p, sb, -1);
+ kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize),
+ filedesc_flags);
+
+ sect_len = sbuf_end_section(sb, start_len, 0, 0);
+ if (sect_len < 0)
+ return;
+ sect_sz = sect_len;
+
+ KASSERT(sect_sz <= *sizep,
+ ("kern_proc_filedesc_out did not respect maxlen; "
+ "requested %zu, got %zu", *sizep - sizeof(structsize),
+ sect_sz - sizeof(structsize)));
+
+ for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++)
+ sbuf_putc(sb, 0);
}
}
@@ -1940,24 +1989,30 @@ note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep)
{
struct proc *p;
size_t size;
- int structsize;
+ int structsize, vmmap_flags;
+
+ if (coredump_pack_vmmapinfo)
+ vmmap_flags = KERN_VMMAP_PACK_KINFO;
+ else
+ vmmap_flags = 0;
p = (struct proc *)arg;
+ structsize = sizeof(struct kinfo_vmentry);
if (sb == NULL) {
size = 0;
sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
sbuf_set_drain(sb, sbuf_drain_count, &size);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
- kern_proc_vmmap_out(p, sb);
+ kern_proc_vmmap_out(p, sb, -1, vmmap_flags);
sbuf_finish(sb);
sbuf_delete(sb);
*sizep = size;
} else {
- structsize = sizeof(struct kinfo_vmentry);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
- kern_proc_vmmap_out(p, sb);
+ kern_proc_vmmap_out(p, sb, *sizep - sizeof(structsize),
+ vmmap_flags);
}
}
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 4930e2f..7561368 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/capsicum.h>
#include <sys/conf.h>
#include <sys/domain.h>
+#include <sys/fail.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
@@ -3299,6 +3300,7 @@ struct export_fd_buf {
struct sbuf *sb;
ssize_t remainder;
struct kinfo_file kif;
+ int flags;
};
static int
@@ -3385,9 +3387,12 @@ export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt,
kif->kf_type = type;
kif->kf_ref_count = refcnt;
kif->kf_offset = offset;
- /* Pack record size down */
- kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
- strlen(kif->kf_path) + 1;
+ if ((efbuf->flags & KERN_FILEDESC_PACK_KINFO) != 0)
+ /* Pack record size down */
+ kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
+ strlen(kif->kf_path) + 1;
+ else
+ kif->kf_structsize = sizeof(*kif);
kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
if (efbuf->remainder != -1) {
if (efbuf->remainder < kif->kf_structsize) {
@@ -3413,7 +3418,8 @@ export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt,
* Takes a locked proc as argument, and returns with the proc unlocked.
*/
int
-kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
+kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
+ int flags)
{
struct file *fp;
struct filedesc *fdp;
@@ -3448,6 +3454,7 @@ kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
efbuf->fdp = NULL;
efbuf->sb = sb;
efbuf->remainder = maxlen;
+ efbuf->flags = flags;
if (tracevp != NULL)
export_fd_to_sb(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE,
FREAD | FWRITE, -1, -1, NULL, efbuf);
@@ -3597,7 +3604,8 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
return (error);
}
maxlen = req->oldptr != NULL ? req->oldlen : -1;
- error = kern_proc_filedesc_out(p, &sb, maxlen);
+ error = kern_proc_filedesc_out(p, &sb, maxlen,
+ KERN_FILEDESC_PACK_KINFO);
error2 = sbuf_finish(&sb);
sbuf_delete(&sb);
return (error != 0 ? error : error2);
@@ -3635,6 +3643,24 @@ vntype_to_kinfo(int vtype)
return (KF_VTYPE_UNKNOWN);
}
+static inline void
+vn_fill_junk(struct kinfo_file *kif)
+{
+ size_t len, olen;
+
+ /*
+ * Simulate vn_fullpath returning changing values for a given
+ * vp during e.g. coredump.
+ */
+ len = (arc4random() % (sizeof(kif->kf_path) - 2)) + 1;
+ olen = strlen(kif->kf_path);
+ if (len < olen)
+ strcpy(&kif->kf_path[len - 1], "$");
+ else
+ for (; olen < len; olen++)
+ strcpy(&kif->kf_path[olen], "A");
+}
+
static int
fill_vnode_info(struct vnode *vp, struct kinfo_file *kif)
{
@@ -3654,6 +3680,10 @@ fill_vnode_info(struct vnode *vp, struct kinfo_file *kif)
if (freepath != NULL)
free(freepath, M_TEMP);
+ KFAIL_POINT_CODE(DEBUG_FP, fill_kinfo_vnode__random_path,
+ vn_fill_junk(kif);
+ );
+
/*
* Retrieve vnode attributes.
*/
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index f82ffec..47ea9b0 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -102,6 +102,16 @@ SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *");
MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
+int coredump_pack_fileinfo = 1;
+SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN,
+ &coredump_pack_fileinfo, 0,
+ "Enable file path packing in 'procstat -f' coredump notes");
+
+int coredump_pack_vmmapinfo = 1;
+SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN,
+ &coredump_pack_vmmapinfo, 0,
+ "Enable file path packing in 'procstat -v' coredump notes");
+
static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 6b60840..cfaa354 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -2228,7 +2228,7 @@ next:;
* Must be called with the process locked and will return unlocked.
*/
int
-kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
+kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags)
{
vm_map_entry_t entry, tmp_entry;
struct vattr va;
@@ -2252,7 +2252,7 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
PRELE(p);
return (ESRCH);
}
- kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
+ kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO);
error = 0;
map = &vm->vm_map;
@@ -2387,10 +2387,23 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
free(freepath, M_TEMP);
/* Pack record size down */
- kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) +
- strlen(kve->kve_path) + 1;
+ if ((flags & KERN_VMMAP_PACK_KINFO) != 0)
+ kve->kve_structsize =
+ offsetof(struct kinfo_vmentry, kve_path) +
+ strlen(kve->kve_path) + 1;
+ else
+ kve->kve_structsize = sizeof(*kve);
kve->kve_structsize = roundup(kve->kve_structsize,
sizeof(uint64_t));
+
+ /* Halt filling and truncate rather than exceeding maxlen */
+ if (maxlen != -1 && maxlen < kve->kve_structsize) {
+ error = 0;
+ vm_map_lock_read(map);
+ break;
+ } else if (maxlen != -1)
+ maxlen -= kve->kve_structsize;
+
if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
error = ENOMEM;
vm_map_lock_read(map);
@@ -2422,7 +2435,7 @@ sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
sbuf_delete(&sb);
return (error);
}
- error = kern_proc_vmmap_out(p, &sb);
+ error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO);
error2 = sbuf_finish(&sb);
sbuf_delete(&sb);
return (error != 0 ? error : error2);
diff --git a/sys/sys/exec.h b/sys/sys/exec.h
index cccebfd..0ccba24 100644
--- a/sys/sys/exec.h
+++ b/sys/sys/exec.h
@@ -83,6 +83,9 @@ void exec_unmap_first_page(struct image_params *);
int exec_register(const struct execsw *);
int exec_unregister(const struct execsw *);
+extern int coredump_pack_fileinfo;
+extern int coredump_pack_vmmapinfo;
+
/*
* note: name##_mod cannot be const storage because the
* linker_file_sysinit() function modifies _file in the
diff --git a/sys/sys/user.h b/sys/sys/user.h
index 9374daf..a697b66 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -536,6 +536,11 @@ struct kinfo_sigtramp {
#define KERN_PROC_NOTHREADS 0x1
#define KERN_PROC_MASK32 0x2
+/* Flags for kern_proc_filedesc_out. */
+#define KERN_FILEDESC_PACK_KINFO 0x00000001U
+
+/* Flags for kern_proc_vmmap_out. */
+#define KERN_VMMAP_PACK_KINFO 0x00000001U
struct sbuf;
/*
@@ -547,9 +552,11 @@ struct sbuf;
* to be locked on enter. On return the process is unlocked.
*/
-int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen);
+int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
+ int flags);
int kern_proc_out(struct proc *p, struct sbuf *sb, int flags);
-int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb);
+int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
+ int flags);
int vntype_to_kinfo(int vtype);
#endif /* !_KERNEL */
OpenPOWER on IntegriCloud