summaryrefslogtreecommitdiffstats
path: root/sbin/hastd/activemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'sbin/hastd/activemap.c')
-rw-r--r--sbin/hastd/activemap.c691
1 files changed, 691 insertions, 0 deletions
diff --git a/sbin/hastd/activemap.c b/sbin/hastd/activemap.c
new file mode 100644
index 0000000..10eb641
--- /dev/null
+++ b/sbin/hastd/activemap.c
@@ -0,0 +1,691 @@
+/*-
+ * Copyright (c) 2009-2010 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Pawel Jakub Dawidek under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h> /* powerof2() */
+#include <sys/queue.h>
+
+#include <assert.h>
+#include <bitstring.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <activemap.h>
+
+#define ACTIVEMAP_MAGIC 0xac71e4
+struct activemap {
+ int am_magic; /* Magic value. */
+ off_t am_mediasize; /* Media size in bytes. */
+ uint32_t am_extentsize; /* Extent size in bytes,
+ must be power of 2. */
+ uint8_t am_extentshift;/* 2 ^ extentbits == extentsize */
+ int am_nextents; /* Number of extents. */
+ size_t am_mapsize; /* Bitmap size in bytes. */
+ uint16_t *am_memtab; /* An array that holds number of pending
+ writes per extent. */
+ bitstr_t *am_diskmap; /* On-disk bitmap of dirty extents. */
+ bitstr_t *am_memmap; /* In-memory bitmap of dirty extents. */
+ size_t am_diskmapsize; /* Map size rounded up to sector size. */
+ uint64_t am_ndirty; /* Number of dirty regions. */
+ bitstr_t *am_syncmap; /* Bitmap of extents to sync. */
+ off_t am_syncoff; /* Next synchronization offset. */
+ TAILQ_HEAD(skeepdirty, keepdirty) am_keepdirty; /* List of extents that
+ we keep dirty to reduce bitmap
+ updates. */
+ int am_nkeepdirty; /* Number of am_keepdirty elements. */
+ int am_nkeepdirty_limit; /* Maximum number of am_keepdirty
+ elements. */
+};
+
+struct keepdirty {
+ int kd_extent;
+ TAILQ_ENTRY(keepdirty) kd_next;
+};
+
+/*
+ * Helper function taken from sys/systm.h to calculate extentshift.
+ */
+static uint32_t
+bitcount32(uint32_t x)
+{
+
+ x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1);
+ x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2);
+ x = (x + (x >> 4)) & 0x0f0f0f0f;
+ x = (x + (x >> 8));
+ x = (x + (x >> 16)) & 0x000000ff;
+ return (x);
+}
+
+static __inline int
+off2ext(const struct activemap *amp, off_t offset)
+{
+ int extent;
+
+ assert(offset >= 0 && offset < amp->am_mediasize);
+ extent = (offset >> amp->am_extentshift);
+ assert(extent >= 0 && extent < amp->am_nextents);
+ return (extent);
+}
+
+static __inline off_t
+ext2off(const struct activemap *amp, int extent)
+{
+ off_t offset;
+
+ assert(extent >= 0 && extent < amp->am_nextents);
+ offset = ((off_t)extent << amp->am_extentshift);
+ assert(offset >= 0 && offset < amp->am_mediasize);
+ return (offset);
+}
+
+/*
+ * Function calculates number of requests needed to synchronize the given
+ * extent.
+ */
+static __inline int
+ext2reqs(const struct activemap *amp, int ext)
+{
+ off_t left;
+
+ if (ext < amp->am_nextents - 1)
+ return (((amp->am_extentsize - 1) / MAXPHYS) + 1);
+
+ assert(ext == amp->am_nextents - 1);
+ left = amp->am_mediasize % amp->am_extentsize;
+ if (left == 0)
+ left = amp->am_extentsize;
+ return (((left - 1) / MAXPHYS) + 1);
+}
+
+/*
+ * Initialize activemap structure and allocate memory for internal needs.
+ * Function returns 0 on success and -1 if any of the allocations failed.
+ */
+int
+activemap_init(struct activemap **ampp, uint64_t mediasize, uint32_t extentsize,
+ uint32_t sectorsize, uint32_t keepdirty)
+{
+ struct activemap *amp;
+
+ assert(ampp != NULL);
+ assert(mediasize > 0);
+ assert(extentsize > 0);
+ assert(powerof2(extentsize));
+ assert(sectorsize > 0);
+ assert(powerof2(sectorsize));
+ assert(keepdirty > 0);
+
+ amp = malloc(sizeof(*amp));
+ if (amp == NULL)
+ return (-1);
+
+ amp->am_mediasize = mediasize;
+ amp->am_nkeepdirty_limit = keepdirty;
+ amp->am_extentsize = extentsize;
+ amp->am_extentshift = bitcount32(extentsize - 1);
+ amp->am_nextents = ((mediasize - 1) / extentsize) + 1;
+ amp->am_mapsize = sizeof(bitstr_t) * bitstr_size(amp->am_nextents);
+ amp->am_diskmapsize = roundup2(amp->am_mapsize, sectorsize);
+ amp->am_ndirty = 0;
+ amp->am_syncoff = -2;
+ TAILQ_INIT(&amp->am_keepdirty);
+ amp->am_nkeepdirty = 0;
+
+ amp->am_memtab = calloc(amp->am_nextents, sizeof(amp->am_memtab[0]));
+ amp->am_diskmap = calloc(1, amp->am_diskmapsize);
+ amp->am_memmap = bit_alloc(amp->am_nextents);
+ amp->am_syncmap = bit_alloc(amp->am_nextents);
+
+ /*
+ * Check to see if any of the allocations above failed.
+ */
+ if (amp->am_memtab == NULL || amp->am_diskmap == NULL ||
+ amp->am_memmap == NULL || amp->am_syncmap == NULL) {
+ if (amp->am_memtab != NULL)
+ free(amp->am_memtab);
+ if (amp->am_diskmap != NULL)
+ free(amp->am_diskmap);
+ if (amp->am_memmap != NULL)
+ free(amp->am_memmap);
+ if (amp->am_syncmap != NULL)
+ free(amp->am_syncmap);
+ amp->am_magic = 0;
+ free(amp);
+ errno = ENOMEM;
+ return (-1);
+ }
+
+ amp->am_magic = ACTIVEMAP_MAGIC;
+ *ampp = amp;
+
+ return (0);
+}
+
+static struct keepdirty *
+keepdirty_find(struct activemap *amp, int extent)
+{
+ struct keepdirty *kd;
+
+ TAILQ_FOREACH(kd, &amp->am_keepdirty, kd_next) {
+ if (kd->kd_extent == extent)
+ break;
+ }
+ return (kd);
+}
+
+static void
+keepdirty_add(struct activemap *amp, int extent)
+{
+ struct keepdirty *kd;
+
+ kd = keepdirty_find(amp, extent);
+ if (kd != NULL) {
+ /*
+ * Only move element at the begining.
+ */
+ TAILQ_REMOVE(&amp->am_keepdirty, kd, kd_next);
+ TAILQ_INSERT_HEAD(&amp->am_keepdirty, kd, kd_next);
+ return;
+ }
+ /*
+ * Add new element, but first remove the most unused one if
+ * we have too many.
+ */
+ if (amp->am_nkeepdirty >= amp->am_nkeepdirty_limit) {
+ kd = TAILQ_LAST(&amp->am_keepdirty, skeepdirty);
+ assert(kd != NULL);
+ TAILQ_REMOVE(&amp->am_keepdirty, kd, kd_next);
+ amp->am_nkeepdirty--;
+ assert(amp->am_nkeepdirty > 0);
+ }
+ if (kd == NULL)
+ kd = malloc(sizeof(*kd));
+ /* We can ignore allocation failure. */
+ if (kd != NULL) {
+ kd->kd_extent = extent;
+ amp->am_nkeepdirty++;
+ TAILQ_INSERT_HEAD(&amp->am_keepdirty, kd, kd_next);
+ }
+}
+
+static void
+keepdirty_fill(struct activemap *amp)
+{
+ struct keepdirty *kd;
+
+ TAILQ_FOREACH(kd, &amp->am_keepdirty, kd_next)
+ bit_set(amp->am_diskmap, kd->kd_extent);
+}
+
+static void
+keepdirty_free(struct activemap *amp)
+{
+ struct keepdirty *kd;
+
+ while ((kd = TAILQ_FIRST(&amp->am_keepdirty)) != NULL) {
+ TAILQ_REMOVE(&amp->am_keepdirty, kd, kd_next);
+ amp->am_nkeepdirty--;
+ free(kd);
+ }
+ assert(amp->am_nkeepdirty == 0);
+}
+
+/*
+ * Function frees resources allocated by activemap_init() function.
+ */
+void
+activemap_free(struct activemap *amp)
+{
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+
+ amp->am_magic = 0;
+
+ keepdirty_free(amp);
+ free(amp->am_memtab);
+ free(amp->am_diskmap);
+ free(amp->am_memmap);
+ free(amp->am_syncmap);
+}
+
+/*
+ * Function should be called before we handle write requests. It updates
+ * internal structures and returns true if on-disk metadata should be updated.
+ */
+bool
+activemap_write_start(struct activemap *amp, off_t offset, off_t length)
+{
+ bool modified;
+ off_t end;
+ int ext;
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+ assert(length > 0);
+
+ modified = false;
+ end = offset + length - 1;
+
+ for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) {
+ /*
+ * If the number of pending writes is increased from 0,
+ * we have to mark the extent as dirty also in on-disk bitmap.
+ * By returning true we inform the caller that on-disk bitmap
+ * was modified and has to be flushed to disk.
+ */
+ if (amp->am_memtab[ext]++ == 0) {
+ assert(!bit_test(amp->am_memmap, ext));
+ bit_set(amp->am_memmap, ext);
+ amp->am_ndirty++;
+ modified = true;
+ }
+ keepdirty_add(amp, ext);
+ }
+
+ return (modified);
+}
+
+/*
+ * Function should be called after receiving write confirmation. It updates
+ * internal structures and returns true if on-disk metadata should be updated.
+ */
+bool
+activemap_write_complete(struct activemap *amp, off_t offset, off_t length)
+{
+ bool modified;
+ off_t end;
+ int ext;
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+ assert(length > 0);
+
+ modified = false;
+ end = offset + length - 1;
+
+ for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) {
+ /*
+ * If the number of pending writes goes down to 0, we have to
+ * mark the extent as clean also in on-disk bitmap.
+ * By returning true we inform the caller that on-disk bitmap
+ * was modified and has to be flushed to disk.
+ */
+ assert(amp->am_memtab[ext] > 0);
+ assert(bit_test(amp->am_memmap, ext));
+ if (--amp->am_memtab[ext] == 0) {
+ bit_clear(amp->am_memmap, ext);
+ amp->am_ndirty--;
+ modified = true;
+ }
+ }
+
+ return (modified);
+}
+
+/*
+ * Function should be called after finishing synchronization of one extent.
+ * It returns true if on-disk metadata should be updated.
+ */
+bool
+activemap_extent_complete(struct activemap *amp, int extent)
+{
+ bool modified;
+ int reqs;
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+ assert(extent >= 0 && extent < amp->am_nextents);
+
+ modified = false;
+
+ reqs = ext2reqs(amp, extent);
+ assert(amp->am_memtab[extent] >= reqs);
+ amp->am_memtab[extent] -= reqs;
+ assert(bit_test(amp->am_memmap, extent));
+ if (amp->am_memtab[extent] == 0) {
+ bit_clear(amp->am_memmap, extent);
+ amp->am_ndirty--;
+ modified = true;
+ }
+
+ return (modified);
+}
+
+/*
+ * Function returns number of dirty regions.
+ */
+uint64_t
+activemap_ndirty(const struct activemap *amp)
+{
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+
+ return (amp->am_ndirty);
+}
+
+/*
+ * Function compare on-disk bitmap and in-memory bitmap and returns true if
+ * they differ and should be flushed to the disk.
+ */
+bool
+activemap_differ(const struct activemap *amp)
+{
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+
+ return (memcmp(amp->am_diskmap, amp->am_memmap,
+ amp->am_mapsize) != 0);
+}
+
+/*
+ * Function returns number of bytes used by bitmap.
+ */
+size_t
+activemap_size(const struct activemap *amp)
+{
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+
+ return (amp->am_mapsize);
+}
+
+/*
+ * Function returns number of bytes needed for storing on-disk bitmap.
+ * This is the same as activemap_size(), but rounded up to sector size.
+ */
+size_t
+activemap_ondisk_size(const struct activemap *amp)
+{
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+
+ return (amp->am_diskmapsize);
+}
+
+/*
+ * Function copies the given buffer read from disk to the internal bitmap.
+ */
+void
+activemap_copyin(struct activemap *amp, const unsigned char *buf, size_t size)
+{
+ int ext;
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+ assert(size >= amp->am_mapsize);
+
+ memcpy(amp->am_diskmap, buf, amp->am_mapsize);
+ memcpy(amp->am_memmap, buf, amp->am_mapsize);
+ memcpy(amp->am_syncmap, buf, amp->am_mapsize);
+
+ bit_ffs(amp->am_memmap, amp->am_nextents, &ext);
+ if (ext == -1) {
+ /* There are no dirty extents, so we can leave now. */
+ return;
+ }
+ /*
+ * Set synchronization offset to the first dirty extent.
+ */
+ activemap_sync_rewind(amp);
+ /*
+ * We have dirty extents and we want them to stay that way until
+ * we synchronize, so we set number of pending writes to number
+ * of requests needed to synchronize one extent.
+ */
+ amp->am_ndirty = 0;
+ for (; ext < amp->am_nextents; ext++) {
+ if (bit_test(amp->am_memmap, ext)) {
+ amp->am_memtab[ext] = ext2reqs(amp, ext);
+ amp->am_ndirty++;
+ }
+ }
+}
+
+/*
+ * Function merges the given bitmap with existng one.
+ */
+void
+activemap_merge(struct activemap *amp, const unsigned char *buf, size_t size)
+{
+ bitstr_t *remmap = __DECONST(bitstr_t *, buf);
+ int ext;
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+ assert(size >= amp->am_mapsize);
+
+ bit_ffs(remmap, amp->am_nextents, &ext);
+ if (ext == -1) {
+ /* There are no dirty extents, so we can leave now. */
+ return;
+ }
+ /*
+ * We have dirty extents and we want them to stay that way until
+ * we synchronize, so we set number of pending writes to number
+ * of requests needed to synchronize one extent.
+ */
+ for (; ext < amp->am_nextents; ext++) {
+ /* Local extent already dirty. */
+ if (bit_test(amp->am_syncmap, ext))
+ continue;
+ /* Remote extent isn't dirty. */
+ if (!bit_test(remmap, ext))
+ continue;
+ bit_set(amp->am_syncmap, ext);
+ bit_set(amp->am_memmap, ext);
+ bit_set(amp->am_diskmap, ext);
+ if (amp->am_memtab[ext] == 0)
+ amp->am_ndirty++;
+ amp->am_memtab[ext] = ext2reqs(amp, ext);
+ }
+ /*
+ * Set synchronization offset to the first dirty extent.
+ */
+ activemap_sync_rewind(amp);
+}
+
+/*
+ * Function returns pointer to internal bitmap that should be written to disk.
+ */
+const unsigned char *
+activemap_bitmap(struct activemap *amp, size_t *sizep)
+{
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+
+ if (sizep != NULL)
+ *sizep = amp->am_diskmapsize;
+ memcpy(amp->am_diskmap, amp->am_memmap, amp->am_mapsize);
+ keepdirty_fill(amp);
+ return ((const unsigned char *)amp->am_diskmap);
+}
+
+/*
+ * Function calculates size needed to store bitmap on disk.
+ */
+size_t
+activemap_calc_ondisk_size(uint64_t mediasize, uint32_t extentsize,
+ uint32_t sectorsize)
+{
+ uint64_t nextents, mapsize;
+
+ assert(mediasize > 0);
+ assert(extentsize > 0);
+ assert(powerof2(extentsize));
+ assert(sectorsize > 0);
+ assert(powerof2(sectorsize));
+
+ nextents = ((mediasize - 1) / extentsize) + 1;
+ mapsize = sizeof(bitstr_t) * bitstr_size(nextents);
+ return (roundup2(mapsize, sectorsize));
+}
+
+/*
+ * Set synchronization offset to the first dirty extent.
+ */
+void
+activemap_sync_rewind(struct activemap *amp)
+{
+ int ext;
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+
+ bit_ffs(amp->am_syncmap, amp->am_nextents, &ext);
+ if (ext == -1) {
+ /* There are no extents to synchronize. */
+ amp->am_syncoff = -2;
+ return;
+ }
+ /*
+ * Mark that we want to start synchronization from the begining.
+ */
+ amp->am_syncoff = -1;
+}
+
+/*
+ * Return next offset of where we should synchronize.
+ */
+off_t
+activemap_sync_offset(struct activemap *amp, off_t *lengthp, int *syncextp)
+{
+ off_t syncoff, left;
+ int ext;
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+ assert(lengthp != NULL);
+ assert(syncextp != NULL);
+
+ *syncextp = -1;
+
+ if (amp->am_syncoff == -2)
+ return (-1);
+
+ if (amp->am_syncoff >= 0 &&
+ (amp->am_syncoff + MAXPHYS >= amp->am_mediasize ||
+ off2ext(amp, amp->am_syncoff) !=
+ off2ext(amp, amp->am_syncoff + MAXPHYS))) {
+ /*
+ * We are about to change extent, so mark previous one as clean.
+ */
+ ext = off2ext(amp, amp->am_syncoff);
+ bit_clear(amp->am_syncmap, ext);
+ *syncextp = ext;
+ amp->am_syncoff = -1;
+ }
+
+ if (amp->am_syncoff == -1) {
+ /*
+ * Let's find first extent to synchronize.
+ */
+ bit_ffs(amp->am_syncmap, amp->am_nextents, &ext);
+ if (ext == -1) {
+ amp->am_syncoff = -2;
+ return (-1);
+ }
+ amp->am_syncoff = ext2off(amp, ext);
+ } else {
+ /*
+ * We don't change extent, so just increase offset.
+ */
+ amp->am_syncoff += MAXPHYS;
+ if (amp->am_syncoff >= amp->am_mediasize) {
+ amp->am_syncoff = -2;
+ return (-1);
+ }
+ }
+
+ syncoff = amp->am_syncoff;
+ left = ext2off(amp, off2ext(amp, syncoff)) +
+ amp->am_extentsize - syncoff;
+ if (syncoff + left > amp->am_mediasize)
+ left = amp->am_mediasize - syncoff;
+ if (left > MAXPHYS)
+ left = MAXPHYS;
+
+ assert(left >= 0 && left <= MAXPHYS);
+ assert(syncoff >= 0 && syncoff < amp->am_mediasize);
+ assert(syncoff + left >= 0 && syncoff + left <= amp->am_mediasize);
+
+ *lengthp = left;
+ return (syncoff);
+}
+
+/*
+ * Mark extent(s) containing the given region for synchronization.
+ * Most likely one of the components is unavailable.
+ */
+bool
+activemap_need_sync(struct activemap *amp, off_t offset, off_t length)
+{
+ bool modified;
+ off_t end;
+ int ext;
+
+ assert(amp->am_magic == ACTIVEMAP_MAGIC);
+
+ modified = false;
+ end = offset + length - 1;
+
+ for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) {
+ if (bit_test(amp->am_syncmap, ext)) {
+ /* Already marked for synchronization. */
+ assert(bit_test(amp->am_memmap, ext));
+ continue;
+ }
+ bit_set(amp->am_syncmap, ext);
+ if (!bit_test(amp->am_memmap, ext)) {
+ bit_set(amp->am_memmap, ext);
+ amp->am_ndirty++;
+ }
+ amp->am_memtab[ext] += ext2reqs(amp, ext);
+ modified = true;
+ }
+
+ return (modified);
+}
+
+void
+activemap_dump(const struct activemap *amp)
+{
+ int bit;
+
+ printf("M: ");
+ for (bit = 0; bit < amp->am_nextents; bit++)
+ printf("%d", bit_test(amp->am_memmap, bit) ? 1 : 0);
+ printf("\n");
+ printf("D: ");
+ for (bit = 0; bit < amp->am_nextents; bit++)
+ printf("%d", bit_test(amp->am_diskmap, bit) ? 1 : 0);
+ printf("\n");
+ printf("S: ");
+ for (bit = 0; bit < amp->am_nextents; bit++)
+ printf("%d", bit_test(amp->am_syncmap, bit) ? 1 : 0);
+ printf("\n");
+}
OpenPOWER on IntegriCloud