summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorae <ae@FreeBSD.org>2012-09-29 16:47:56 +0000
committerae <ae@FreeBSD.org>2012-09-29 16:47:56 +0000
commit75dfd50beb9ad0d46f0492739cbccf5795eb36a2 (patch)
tree403dd8dac6f6bc3959207afde75dad0ebb383086
parenta7f869fbc12c7a8560ce1231243042ee05c6b86a (diff)
downloadFreeBSD-src-75dfd50beb9ad0d46f0492739cbccf5795eb36a2.zip
FreeBSD-src-75dfd50beb9ad0d46f0492739cbccf5795eb36a2.tar.gz
Almost each time when loader opens a file, this leads to calling
disk_open(). Very often this is called several times for one file. This leads to reading partition table metadata for each call. To reduce the number of disk I/O we have a simple block cache, but it is very dumb and more than half of I/O operations related to reading metadata, misses this cache. Introduce new cache layer to resolve this problem. It is independent and doesn't need initialization like bcache, and will work by default for all loaders which use the new DISK API. A successful disk_open() call to each new disk or partition produces new entry in the cache. Even more, when disk was already open, now opening of any nested partitions does not require reading top level partition table. So, if without this cache, partition table metadata was read around 20-50 times during boot, now it reads only once. This affects the booting from GPT and MBR from the UFS.
-rw-r--r--sys/boot/common/disk.c212
-rw-r--r--sys/boot/common/disk.h1
-rw-r--r--sys/boot/i386/libi386/biosdisk.c10
-rw-r--r--sys/boot/uboot/lib/disk.c1
-rw-r--r--sys/boot/userboot/userboot/userboot_disk.c1
5 files changed, 190 insertions, 35 deletions
diff --git a/sys/boot/common/disk.c b/sys/boot/common/disk.c
index 0389956..1fcaf2b 100644
--- a/sys/boot/common/disk.c
+++ b/sys/boot/common/disk.c
@@ -29,6 +29,7 @@
__FBSDID("$FreeBSD$");
#include <sys/disk.h>
+#include <sys/queue.h>
#include <stand.h>
#include <stdarg.h>
#include <bootstrap.h>
@@ -46,6 +47,7 @@ struct open_disk {
struct ptable *table;
off_t mediasize;
u_int sectorsize;
+ int rcnt;
};
struct print_args {
@@ -54,6 +56,96 @@ struct print_args {
int verbose;
};
+struct dentry {
+ const struct devsw *d_dev;
+ int d_unit;
+ int d_slice;
+ int d_partition;
+
+ struct open_disk *od;
+ off_t d_offset;
+ STAILQ_ENTRY(dentry) entry;
+#ifdef DISK_DEBUG
+ uint32_t count;
+#endif
+};
+
+static STAILQ_HEAD(, dentry) opened_disks =
+ STAILQ_HEAD_INITIALIZER(opened_disks);
+
+static int
+disk_lookup(struct disk_devdesc *dev)
+{
+ struct dentry *entry;
+ int rc;
+
+ rc = ENOENT;
+ STAILQ_FOREACH(entry, &opened_disks, entry) {
+ if (entry->d_dev != dev->d_dev ||
+ entry->d_unit != dev->d_unit)
+ continue;
+ dev->d_opendata = entry->od;
+ if (entry->d_slice == dev->d_slice &&
+ entry->d_partition == dev->d_partition) {
+ dev->d_offset = entry->d_offset;
+ DEBUG("%s offset %lld", disk_fmtdev(dev),
+ dev->d_offset);
+#ifdef DISK_DEBUG
+ entry->count++;
+#endif
+ return (0);
+ }
+ rc = EAGAIN;
+ }
+ return (rc);
+}
+
+static void
+disk_insert(struct disk_devdesc *dev)
+{
+ struct dentry *entry;
+
+ entry = (struct dentry *)malloc(sizeof(struct dentry));
+ if (entry == NULL) {
+ DEBUG("no memory");
+ return;
+ }
+ entry->d_dev = dev->d_dev;
+ entry->d_unit = dev->d_unit;
+ entry->d_slice = dev->d_slice;
+ entry->d_partition = dev->d_partition;
+ entry->od = (struct open_disk *)dev->d_opendata;
+ entry->od->rcnt++;
+ entry->d_offset = dev->d_offset;
+#ifdef DISK_DEBUG
+ entry->count = 1;
+#endif
+ STAILQ_INSERT_TAIL(&opened_disks, entry, entry);
+ DEBUG("%s cached", disk_fmtdev(dev));
+}
+
+#ifdef DISK_DEBUG
+COMMAND_SET(dcachestat, "dcachestat", "get disk cache stats",
+ command_dcachestat);
+
+static int
+command_dcachestat(int argc, char *argv[])
+{
+ struct disk_devdesc dev;
+ struct dentry *entry;
+
+ STAILQ_FOREACH(entry, &opened_disks, entry) {
+ dev.d_dev = (struct devsw *)entry->d_dev;
+ dev.d_unit = entry->d_unit;
+ dev.d_slice = entry->d_slice;
+ dev.d_partition = entry->d_partition;
+ printf("%s %d => %p [%d]\n", disk_fmtdev(&dev), entry->count,
+ entry->od, entry->od->rcnt);
+ }
+ return (CMD_OK);
+}
+#endif /* DISK_DEBUG */
+
/* Convert size to a human-readable number. */
static char *
display_size(uint64_t size, u_int sectorsize)
@@ -145,25 +237,43 @@ disk_open(struct disk_devdesc *dev, off_t mediasize, u_int sectorsize)
struct open_disk *od;
struct ptable *table;
struct ptable_entry part;
- int rc;
+ int rc, slice, partition;
- od = (struct open_disk *)malloc(sizeof(struct open_disk));
- if (od == NULL) {
- DEBUG("no memory");
- return (ENOMEM);
- }
+ rc = disk_lookup(dev);
+ if (rc == 0)
+ return (0);
/*
* While we are reading disk metadata, make sure we do it relative
* to the start of the disk
*/
- rc = 0;
- table = NULL;
dev->d_offset = 0;
+ table = NULL;
+ slice = dev->d_slice;
+ partition = dev->d_partition;
+ if (rc == EAGAIN) {
+ /*
+ * This entire disk was already opened and there is no
+ * need to allocate new open_disk structure and open the
+ * main partition table.
+ */
+ od = (struct open_disk *)dev->d_opendata;
+ DEBUG("%s unit %d, slice %d, partition %d => %p (cached)",
+ disk_fmtdev(dev), dev->d_unit, dev->d_slice,
+ dev->d_partition, od);
+ goto opened;
+ } else {
+ od = (struct open_disk *)malloc(sizeof(struct open_disk));
+ if (od == NULL) {
+ DEBUG("no memory");
+ return (ENOMEM);
+ }
+ }
dev->d_opendata = od;
od->mediasize = mediasize;
od->sectorsize = sectorsize;
- DEBUG("%s unit %d, slice %d, partition %d",
- disk_fmtdev(dev), dev->d_unit, dev->d_slice, dev->d_partition);
+ od->rcnt = 0;
+ DEBUG("%s unit %d, slice %d, partition %d => %p",
+ disk_fmtdev(dev), dev->d_unit, dev->d_slice, dev->d_partition, od);
/* Determine disk layout. */
od->table = ptable_open(dev, mediasize / sectorsize, sectorsize,
@@ -173,35 +283,34 @@ disk_open(struct disk_devdesc *dev, off_t mediasize, u_int sectorsize)
rc = ENXIO;
goto out;
}
+opened:
+ rc = 0;
if (ptable_gettype(od->table) == PTABLE_BSD &&
- dev->d_partition >= 0) {
+ partition >= 0) {
/* It doesn't matter what value has d_slice */
- rc = ptable_getpart(od->table, &part, dev->d_partition);
+ rc = ptable_getpart(od->table, &part, partition);
if (rc == 0)
dev->d_offset = part.start;
- } else if (dev->d_slice >= 0) {
+ } else if (slice >= 0) {
/* Try to get information about partition */
- if (dev->d_slice == 0)
+ if (slice == 0)
rc = ptable_getbestpart(od->table, &part);
else
- rc = ptable_getpart(od->table, &part, dev->d_slice);
+ rc = ptable_getpart(od->table, &part, slice);
if (rc != 0) /* Partition doesn't exist */
goto out;
dev->d_offset = part.start;
- if (dev->d_slice == 0) {
- /* Save the slice number of best partition to dev */
- dev->d_slice = part.index;
- if (ptable_gettype(od->table) == PTABLE_GPT)
- dev->d_partition = 255;
- }
- if (dev->d_partition == 255)
+ slice = part.index;
+ if (ptable_gettype(od->table) == PTABLE_GPT) {
+ partition = 255;
goto out; /* Nothing more to do */
+ }
/*
* If d_partition < 0 and we are looking at a BSD slice,
* then try to read BSD label, otherwise return the
* whole MBR slice.
*/
- if (dev->d_partition == -1 &&
+ if (partition == -1 &&
part.type != PART_FREEBSD)
goto out;
/* Try to read BSD label */
@@ -217,12 +326,12 @@ disk_open(struct disk_devdesc *dev, off_t mediasize, u_int sectorsize)
* assume the 'a' partition. Otherwise just return the
* whole MBR slice, because it can contain ZFS.
*/
- if (dev->d_partition < 0) {
+ if (partition < 0) {
if (ptable_gettype(table) != PTABLE_BSD)
goto out;
- dev->d_partition = 0;
+ partition = 0;
}
- rc = ptable_getpart(table, &part, dev->d_partition);
+ rc = ptable_getpart(table, &part, partition);
if (rc != 0)
goto out;
dev->d_offset += part.start;
@@ -232,12 +341,19 @@ out:
ptable_close(table);
if (rc != 0) {
- if (od->table != NULL)
- ptable_close(od->table);
- free(od);
+ if (od->rcnt < 1) {
+ if (od->table != NULL)
+ ptable_close(od->table);
+ free(od);
+ }
DEBUG("%s could not open", disk_fmtdev(dev));
} else {
- DEBUG("%s offset %lld", disk_fmtdev(dev), dev->d_offset);
+ disk_insert(dev);
+ /* Save the slice and partition number to the dev */
+ dev->d_slice = slice;
+ dev->d_partition = partition;
+ DEBUG("%s offset %lld => %p", disk_fmtdev(dev),
+ dev->d_offset, od);
}
return (rc);
}
@@ -245,15 +361,43 @@ out:
int
disk_close(struct disk_devdesc *dev)
{
+#if DISK_DEBUG
struct open_disk *od;
od = (struct open_disk *)dev->d_opendata;
- DEBUG("%s closed", disk_fmtdev(dev));
- ptable_close(od->table);
- free(od);
+ DEBUG("%s closed => %p [%d]", disk_fmtdev(dev), od, od->rcnt);
+#endif
return (0);
}
+void
+disk_cleanup(const struct devsw *d_dev)
+{
+ struct disk_devdesc dev;
+ struct dentry *entry, *tmp;
+
+ STAILQ_FOREACH_SAFE(entry, &opened_disks, entry, tmp) {
+ if (entry->d_dev != d_dev)
+ continue;
+ entry->od->rcnt--;
+#ifdef DISK_DEBUG
+ dev.d_dev = (struct devsw *)entry->d_dev;
+ dev.d_unit = entry->d_unit;
+ dev.d_slice = entry->d_slice;
+ dev.d_partition = entry->d_partition;
+ STAILQ_REMOVE(&opened_disks, entry, dentry, entry);
+ DEBUG("%s was freed => %p [%d]", disk_fmtdev(&dev),
+ entry->od, entry->od->rcnt);
+#endif
+ if (entry->od->rcnt < 1) {
+ if (entry->od->table != NULL)
+ ptable_close(entry->od->table);
+ free(entry->od);
+ }
+ free(entry);
+ }
+}
+
char*
disk_fmtdev(struct disk_devdesc *dev)
{
@@ -261,7 +405,7 @@ disk_fmtdev(struct disk_devdesc *dev)
char *cp;
cp = buf + sprintf(buf, "%s%d", dev->d_dev->dv_name, dev->d_unit);
- if (dev->d_slice > 0) {
+ if (dev->d_slice >= 0) {
#ifdef LOADER_GPT_SUPPORT
if (dev->d_partition == 255) {
sprintf(cp, "p%d:", dev->d_slice);
diff --git a/sys/boot/common/disk.h b/sys/boot/common/disk.h
index 4894661..3e8b250 100644
--- a/sys/boot/common/disk.h
+++ b/sys/boot/common/disk.h
@@ -95,6 +95,7 @@ struct disk_devdesc
extern int disk_open(struct disk_devdesc *dev, off_t mediasize,
u_int sectorsize);
extern int disk_close(struct disk_devdesc *dev);
+extern void disk_cleanup(const struct devsw *d_dev);
/*
* Print information about slices on a disk.
diff --git a/sys/boot/i386/libi386/biosdisk.c b/sys/boot/i386/libi386/biosdisk.c
index 53e7e6a..36bde12 100644
--- a/sys/boot/i386/libi386/biosdisk.c
+++ b/sys/boot/i386/libi386/biosdisk.c
@@ -106,6 +106,7 @@ static int bd_open(struct open_file *f, ...);
static int bd_close(struct open_file *f);
static int bd_ioctl(struct open_file *f, u_long cmd, void *data);
static void bd_print(int verbose);
+static void bd_cleanup(void);
struct devsw biosdisk = {
"disk",
@@ -116,7 +117,7 @@ struct devsw biosdisk = {
bd_close,
bd_ioctl,
bd_print,
- NULL
+ bd_cleanup
};
/*
@@ -181,6 +182,13 @@ bd_init(void)
return(0);
}
+static void
+bd_cleanup(void)
+{
+
+ disk_cleanup(&biosdisk);
+}
+
/*
* Try to detect a device supported by the legacy int13 BIOS
*/
diff --git a/sys/boot/uboot/lib/disk.c b/sys/boot/uboot/lib/disk.c
index 13f913a..5dc9a5a 100644
--- a/sys/boot/uboot/lib/disk.c
+++ b/sys/boot/uboot/lib/disk.c
@@ -142,6 +142,7 @@ stor_cleanup(void)
for (i = 0; i < stor_info_no; i++)
if (stor_info[i].opened > 0)
ub_dev_close(stor_info[i].handle);
+ disk_cleanup(&uboot_storage);
}
static int
diff --git a/sys/boot/userboot/userboot/userboot_disk.c b/sys/boot/userboot/userboot/userboot_disk.c
index 84c4038..8e1a7a2 100644
--- a/sys/boot/userboot/userboot/userboot_disk.c
+++ b/sys/boot/userboot/userboot/userboot_disk.c
@@ -104,6 +104,7 @@ userdisk_cleanup(void)
if (userdisk_maxunit > 0)
free(ud_info);
+ disk_cleanup(&userboot_disk);
}
/*
OpenPOWER on IntegriCloud