summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
authorpeter <peter@FreeBSD.org>2005-06-29 22:28:46 +0000
committerpeter <peter@FreeBSD.org>2005-06-29 22:28:46 +0000
commit49bfde6bf0b6c2db289cd0ec08415985788c17e0 (patch)
treee189aafad3d106d28b146f6d568bf20ca8795202 /sys/amd64
parent76d3f5f6768a9a26b12823a0d37fda4d373ac9f6 (diff)
downloadFreeBSD-src-49bfde6bf0b6c2db289cd0ec08415985788c17e0.zip
FreeBSD-src-49bfde6bf0b6c2db289cd0ec08415985788c17e0.tar.gz
Switch AMD64 and i386 platforms to using ELF as their kernel crash
dump format. The key reason to do this is so that we can dump sparse address space. For example, we need to be able to skip the PCI hole just below the 4GB boundary. Trying to destructively dump MMIO device registers is Really Bad(TM). The frequent result of trying to do a crash dump on a machine with 4GB or more ram was ugly (lockup or reboot). This code has been taken directly from the IA64 dump_machdep.c code, with just a few (mostly minor) mods. Introduce a dump_avail[] array in the machdep.c code so that we have a source of truth for what memory is present in a machine that needs to be dumped. We can't use phys_avail[] because all sorts of things slice memory out of it that we really need to dump. eg: the vm page array and the dmesg buffer. dump_avail[] is pretty much an unmolested version of phys_avail[]. It does have Maxmem correction. Bump the i386 and amd64 dump format to version 2, but nothing actually uses this. amd64 was actually using the i386 dump version number. libkvm support to follow. Approved by: re
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/dump_machdep.c418
-rw-r--r--sys/amd64/amd64/machdep.c45
-rw-r--r--sys/amd64/include/pmap.h1
3 files changed, 360 insertions, 104 deletions
diff --git a/sys/amd64/amd64/dump_machdep.c b/sys/amd64/amd64/dump_machdep.c
index b806791..b3dbd2f 100644
--- a/sys/amd64/amd64/dump_machdep.c
+++ b/sys/amd64/amd64/dump_machdep.c
@@ -1,36 +1,27 @@
/*-
- * Copyright (c) 2002 Poul-Henning Kamp
- * Copyright (c) 2002 Networks Associates Technology, Inc.
+ * Copyright (c) 2002 Marcel Moolenaar
* All rights reserved.
*
- * This software was developed for the FreeBSD Project by Poul-Henning Kamp
- * and NAI Labs, the Security Research Division of Network Associates, Inc.
- * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
- * DARPA CHATS research program.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
+ *
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. The names of the authors may not be used to endorse or promote
- * products derived from this software without specific prior written
- * permission.
*
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
@@ -44,92 +35,335 @@ __FBSDID("$FreeBSD$");
#include <sys/kerneldump.h>
#include <vm/vm.h>
#include <vm/pmap.h>
+#include <machine/elf.h>
#include <machine/md_var.h>
CTASSERT(sizeof(struct kerneldumpheader) == 512);
+/*
+ * Don't touch the first SIZEOF_METADATA bytes on the dump device. This
+ * is to protect us from metadata and to protect metadata from us.
+ */
+#define SIZEOF_METADATA (64*1024)
+
+#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK)
+#define DEV_ALIGN(x) (((off_t)(x) + (DEV_BSIZE-1)) & ~(DEV_BSIZE-1))
+
+struct md_pa {
+ vm_paddr_t md_start;
+ vm_paddr_t md_size;
+};
+
+typedef int callback_t(struct md_pa *, int, void *);
+
static struct kerneldumpheader kdh;
+static off_t dumplo, fileofs;
-void
-dumpsys(struct dumperinfo *di)
+/* Handle buffered writes. */
+static char buffer[DEV_BSIZE];
+static size_t fragsz;
+
+/* 20 phys_avail entry pairs correspond to 10 md_pa's */
+static struct md_pa dump_map[10];
+
+static void
+md_pa_init(void)
{
- off_t dumplo;
- vm_offset_t a, addr;
- u_int count, left, u;
- void *va;
- int i, mb;
- int c;
-
- printf("Dumping %ld MB\n", Maxmem / (1024*1024 / PAGE_SIZE));
-
- /* Fill in the kernel dump header */
- strcpy(kdh.magic, KERNELDUMPMAGIC);
- strcpy(kdh.architecture, "amd64");
- kdh.version = htod32(KERNELDUMPVERSION);
- kdh.architectureversion = htod32(KERNELDUMP_I386_VERSION);
- kdh.dumplength = htod64(Maxmem * (off_t)PAGE_SIZE);
- kdh.dumptime = htod64(time_second);
- kdh.blocksize = htod32(di->blocksize);
- strncpy(kdh.hostname, hostname, sizeof kdh.hostname);
- strncpy(kdh.versionstring, version, sizeof kdh.versionstring);
+ int n, idx;
+
+ bzero(dump_map, sizeof(dump_map));
+ for (n = 0; n < sizeof(dump_map) / sizeof(dump_map[0]); n++) {
+ idx = n * 2;
+ if (dump_avail[idx] == 0 && dump_avail[idx + 1] == 0)
+ break;
+ dump_map[n].md_start = dump_avail[idx];
+ dump_map[n].md_size = dump_avail[idx + 1] - dump_avail[idx];
+ }
+}
+
+static struct md_pa *
+md_pa_first(void)
+{
+
+ return (&dump_map[0]);
+}
+
+static struct md_pa *
+md_pa_next(struct md_pa *mdp)
+{
+
+ mdp++;
+ if (mdp->md_size == 0)
+ mdp = NULL;
+ return (mdp);
+}
+
+/* XXX should be MI */
+static void
+mkdumpheader(struct kerneldumpheader *kdh, uint32_t archver, uint64_t dumplen,
+ uint32_t blksz)
+{
+
+ bzero(kdh, sizeof(*kdh));
+ strncpy(kdh->magic, KERNELDUMPMAGIC, sizeof(kdh->magic));
+ strncpy(kdh->architecture, MACHINE_ARCH, sizeof(kdh->architecture));
+ kdh->version = htod32(KERNELDUMPVERSION);
+ kdh->architectureversion = htod32(archver);
+ kdh->dumplength = htod64(dumplen);
+ kdh->dumptime = htod64(time_second);
+ kdh->blocksize = htod32(blksz);
+ strncpy(kdh->hostname, hostname, sizeof(kdh->hostname));
+ strncpy(kdh->versionstring, version, sizeof(kdh->versionstring));
if (panicstr != NULL)
- strncpy(kdh.panicstring, panicstr, sizeof kdh.panicstring);
- kdh.parity = kerneldump_parity(&kdh);
+ strncpy(kdh->panicstring, panicstr, sizeof(kdh->panicstring));
+ kdh->parity = kerneldump_parity(kdh);
+}
- /*
- * Check if we will have enough room to save the coredump.
- * The partition size needed is the sum of:
- * Memory to save + header + trailer + Room to leave untouched
- * at partition head. (an arbitrary amount).
- */
- if (di->mediasize <
- Maxmem * (off_t)PAGE_SIZE + sizeof kdh * 2 + 64*1024) {
- printf("\nDump failed. Partition too small.\n");
- return;
+static int
+buf_write(struct dumperinfo *di, char *ptr, size_t sz)
+{
+ size_t len;
+ int error;
+
+ while (sz) {
+ len = DEV_BSIZE - fragsz;
+ if (len > sz)
+ len = sz;
+ bcopy(ptr, buffer + fragsz, len);
+ fragsz += len;
+ ptr += len;
+ sz -= len;
+ if (fragsz == DEV_BSIZE) {
+ error = di->dumper(di->priv, buffer, 0, dumplo,
+ DEV_BSIZE);
+ if (error)
+ return error;
+ dumplo += DEV_BSIZE;
+ fragsz = 0;
+ }
}
- dumplo = di->mediaoffset + di->mediasize - Maxmem * (off_t)PAGE_SIZE;
- dumplo -= sizeof kdh * 2;
- i = di->dumper(di->priv, &kdh, 0, dumplo, sizeof kdh);
- if (i)
- printf("\nDump failed writing header (%d)\n", i);
- dumplo += sizeof kdh;
- i = 0;
- addr = 0;
+
+ return (0);
+}
+
+static int
+buf_flush(struct dumperinfo *di)
+{
+ int error;
+
+ if (fragsz == 0)
+ return (0);
+
+ error = di->dumper(di->priv, buffer, 0, dumplo, DEV_BSIZE);
+ dumplo += DEV_BSIZE;
+ return (error);
+}
+
+#define PG2MB(pgs) ((pgs + (1 << 8) - 1) >> 8)
+
+static int
+cb_dumpdata(struct md_pa *mdp, int seqnr, void *arg)
+{
+ struct dumperinfo *di = (struct dumperinfo*)arg;
+ vm_paddr_t a, pa;
+ void *va;
+ uint64_t pgs;
+ size_t counter, sz, chunk;
+ int i, c, error, twiddle;
+
+ error = 0; /* catch case in which chunk size is 0 */
+ counter = 0; /* Update twiddle every 16MB */
+ twiddle = 0;
va = 0;
- mb = 0;
- for (count = 0; count < Maxmem;) {
- left = Maxmem - count;
- if (left > MAXDUMPPGS)
- left = MAXDUMPPGS;
- for (u = 0; u < left; u++) {
- a = addr + u * PAGE_SIZE;
- if (!is_physical_memory(a))
- a = 0;
- va = pmap_kenter_temporary(trunc_page(a), u);
+ pgs = mdp->md_size / PAGE_SIZE;
+ pa = mdp->md_start;
+
+ printf(" chunk %d: %ldMB (%ld pages)", seqnr, PG2MB(pgs), pgs);
+
+ while (pgs) {
+ chunk = pgs;
+ if (chunk > MAXDUMPPGS)
+ chunk = MAXDUMPPGS;
+ sz = chunk << PAGE_SHIFT;
+ counter += sz;
+ if (counter >> 24) {
+ printf(" %ld", PG2MB(pgs));
+ counter &= (1<<24) - 1;
}
- i = count / (16*1024*1024 / PAGE_SIZE);
- if (i != mb) {
- printf(" %d", count / (1024 * 1024 / PAGE_SIZE));
- mb = i;
+ for (i = 0; i < chunk; i++) {
+ a = pa + i * PAGE_SIZE;
+ va = pmap_kenter_temporary(trunc_page(a), i);
}
- i = di->dumper(di->priv, va, 0, dumplo, left * PAGE_SIZE);
- if (i)
+ error = di->dumper(di->priv, va, 0, dumplo, sz);
+ if (error)
break;
- count += left;
- dumplo += left * PAGE_SIZE;
- addr += left * PAGE_SIZE;
- if ((c = cncheckc()) == 0x03) {
- printf("\nDump aborted.\n");
- return;
- } else if (c != -1)
- printf("[CTRL-C to abort] ");
+ dumplo += sz;
+ pgs -= chunk;
+ pa += sz;
+
+ /* Check for user abort. */
+ c = cncheckc();
+ if (c == 0x03)
+ return (ECANCELED);
+ if (c != -1)
+ printf(" (CTRL-C to abort) ");
+ }
+ printf(" ... %s\n", (error) ? "fail" : "ok");
+ return (error);
+}
+
+static int
+cb_dumphdr(struct md_pa *mdp, int seqnr, void *arg)
+{
+ struct dumperinfo *di = (struct dumperinfo*)arg;
+ Elf_Phdr phdr;
+ uint64_t size;
+ int error;
+
+ size = mdp->md_size;
+ bzero(&phdr, sizeof(phdr));
+ phdr.p_type = PT_LOAD;
+ phdr.p_flags = PF_R; /* XXX */
+ phdr.p_offset = fileofs;
+ phdr.p_vaddr = mdp->md_start;
+ phdr.p_paddr = mdp->md_start;
+ phdr.p_filesz = size;
+ phdr.p_memsz = size;
+ phdr.p_align = PAGE_SIZE;
+
+ error = buf_write(di, (char*)&phdr, sizeof(phdr));
+ fileofs += phdr.p_filesz;
+ return (error);
+}
+
+static int
+cb_size(struct md_pa *mdp, int seqnr, void *arg)
+{
+ uint64_t *sz = (uint64_t*)arg;
+
+ *sz += (uint64_t)mdp->md_size;
+ return (0);
+}
+
+static int
+foreach_chunk(callback_t cb, void *arg)
+{
+ struct md_pa *mdp;
+ int error, seqnr;
+
+ seqnr = 0;
+ mdp = md_pa_first();
+ while (mdp != NULL) {
+ error = (*cb)(mdp, seqnr++, arg);
+ if (error)
+ return (-error);
+ mdp = md_pa_next(mdp);
}
- if (i)
- printf("\nDump failed writing data (%d)\n", i);
- i = di->dumper(di->priv, &kdh, 0, dumplo, sizeof kdh);
- if (i)
- printf("\nDump failed writing trailer (%d)\n", i);
- di->dumper(di->priv, NULL, 0, 0, 0); /* tell them we are done */
+ return (seqnr);
+}
+
+void
+dumpsys(struct dumperinfo *di)
+{
+ Elf_Ehdr ehdr;
+ uint64_t dumpsize;
+ off_t hdrgap;
+ size_t hdrsz;
+ int error;
+
+ bzero(&ehdr, sizeof(ehdr));
+ ehdr.e_ident[EI_MAG0] = ELFMAG0;
+ ehdr.e_ident[EI_MAG1] = ELFMAG1;
+ ehdr.e_ident[EI_MAG2] = ELFMAG2;
+ ehdr.e_ident[EI_MAG3] = ELFMAG3;
+ ehdr.e_ident[EI_CLASS] = ELF_CLASS;
+#if BYTE_ORDER == LITTLE_ENDIAN
+ ehdr.e_ident[EI_DATA] = ELFDATA2LSB;
+#else
+ ehdr.e_ident[EI_DATA] = ELFDATA2MSB;
+#endif
+ ehdr.e_ident[EI_VERSION] = EV_CURRENT;
+ ehdr.e_ident[EI_OSABI] = ELFOSABI_STANDALONE; /* XXX big picture? */
+ ehdr.e_type = ET_CORE;
+ ehdr.e_machine = EM_X86_64;
+ ehdr.e_phoff = sizeof(ehdr);
+ ehdr.e_flags = 0;
+ ehdr.e_ehsize = sizeof(ehdr);
+ ehdr.e_phentsize = sizeof(Elf_Phdr);
+ ehdr.e_shentsize = sizeof(Elf_Shdr);
+
+ md_pa_init();
+
+ /* Calculate dump size. */
+ dumpsize = 0L;
+ ehdr.e_phnum = foreach_chunk(cb_size, &dumpsize);
+ hdrsz = ehdr.e_phoff + ehdr.e_phnum * ehdr.e_phentsize;
+ fileofs = MD_ALIGN(hdrsz);
+ dumpsize += fileofs;
+ hdrgap = fileofs - DEV_ALIGN(hdrsz);
+
+ /* Determine dump offset on device. */
+ if (di->mediasize < SIZEOF_METADATA + dumpsize + sizeof(kdh) * 2) {
+ error = ENOSPC;
+ goto fail;
+ }
+ dumplo = di->mediaoffset + di->mediasize - dumpsize;
+ dumplo -= sizeof(kdh) * 2;
+
+ mkdumpheader(&kdh, KERNELDUMP_AMD64_VERSION, dumpsize, di->blocksize);
+
+ printf("Dumping %llu MB (%d chunks)\n", (long long)dumpsize >> 20,
+ ehdr.e_phnum);
+
+ /* Dump leader */
+ error = di->dumper(di->priv, &kdh, 0, dumplo, sizeof(kdh));
+ if (error)
+ goto fail;
+ dumplo += sizeof(kdh);
+
+ /* Dump ELF header */
+ error = buf_write(di, (char*)&ehdr, sizeof(ehdr));
+ if (error)
+ goto fail;
+
+ /* Dump program headers */
+ error = foreach_chunk(cb_dumphdr, di);
+ if (error < 0)
+ goto fail;
+ buf_flush(di);
+
+ /*
+ * All headers are written using blocked I/O, so we know the
+ * current offset is (still) block aligned. Skip the alignement
+ * in the file to have the segment contents aligned at page
+ * boundary. We cannot use MD_ALIGN on dumplo, because we don't
+ * care and may very well be unaligned within the dump device.
+ */
+ dumplo += hdrgap;
+
+ /* Dump memory chunks (updates dumplo) */
+ error = foreach_chunk(cb_dumpdata, di);
+ if (error < 0)
+ goto fail;
+
+ /* Dump trailer */
+ error = di->dumper(di->priv, &kdh, 0, dumplo, sizeof(kdh));
+ if (error)
+ goto fail;
+
+ /* Signal completion, signoff and exit stage left. */
+ di->dumper(di->priv, NULL, 0, 0, 0);
printf("\nDump complete\n");
return;
+
+ fail:
+ if (error < 0)
+ error = -error;
+
+ if (error == ECANCELED)
+ printf("\nDump aborted\n");
+ else if (error == ENOSPC)
+ printf("\nDump failed. Partition too small.\n");
+ else
+ printf("\n** DUMP FAILED (ERROR %d) **\n", error);
}
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index e027250..3e07952 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -159,9 +159,11 @@ long Maxmem = 0;
long realmem = 0;
vm_paddr_t phys_avail[20];
+vm_paddr_t dump_avail[20];
/* must be 2 less so 0 0 can signal end of chunks */
-#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2)
+#define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2)
+#define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2)
struct kva_md_info kmi;
@@ -864,7 +866,7 @@ u_int basemem;
static void
getmemsize(caddr_t kmdp, u_int64_t first)
{
- int i, physmap_idx, pa_indx;
+ int i, physmap_idx, pa_indx, da_indx;
vm_paddr_t pa, physmap[PHYSMAP_SIZE];
u_long physmem_tunable;
pt_entry_t *pte;
@@ -978,8 +980,10 @@ getmemsize(caddr_t kmdp, u_int64_t first)
*/
physmap[0] = PAGE_SIZE; /* mask off page 0 */
pa_indx = 0;
+ da_indx = 1;
phys_avail[pa_indx++] = physmap[0];
phys_avail[pa_indx] = physmap[0];
+ dump_avail[da_indx] = physmap[0];
pte = CMAP1;
/*
@@ -1000,22 +1004,23 @@ getmemsize(caddr_t kmdp, u_int64_t first)
if (physmap[i + 1] < end)
end = trunc_page(physmap[i + 1]);
for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) {
- int tmp, page_bad;
+ int tmp, page_bad, full;
int *ptr = (int *)CADDR1;
+ full = FALSE;
/*
* block out kernel memory as not available.
*/
if (pa >= 0x100000 && pa < first)
- continue;
+ goto do_dump_avail;
- /*
- * block out dcons buffer
- */
- if (dcons_addr > 0
- && pa >= trunc_page(dcons_addr)
- && pa < dcons_addr + dcons_size)
- continue;
+ /*
+ * block out dcons buffer
+ */
+ if (dcons_addr > 0
+ && pa >= trunc_page(dcons_addr)
+ && pa < dcons_addr + dcons_size)
+ goto do_dump_avail;
page_bad = FALSE;
@@ -1079,12 +1084,28 @@ getmemsize(caddr_t kmdp, u_int64_t first)
printf(
"Too many holes in the physical address space, giving up\n");
pa_indx--;
- break;
+ full = TRUE;
+ goto do_dump_avail;
}
phys_avail[pa_indx++] = pa; /* start */
phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */
}
physmem++;
+do_dump_avail:
+ if (dump_avail[da_indx] == pa) {
+ dump_avail[da_indx] += PAGE_SIZE;
+ } else {
+ da_indx++;
+ if (da_indx == DUMP_AVAIL_ARRAY_END) {
+ da_indx--;
+ goto do_next;
+ }
+ dump_avail[da_indx++] = pa; /* start */
+ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */
+ }
+do_next:
+ if (full)
+ break;
}
}
*pte = 0;
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index c4804e9..504d9f8 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -282,6 +282,7 @@ extern caddr_t CADDR1;
extern pt_entry_t *CMAP1;
extern vm_paddr_t avail_end;
extern vm_paddr_t phys_avail[];
+extern vm_paddr_t dump_avail[];
extern vm_offset_t virtual_avail;
extern vm_offset_t virtual_end;
OpenPOWER on IntegriCloud