summaryrefslogtreecommitdiffstats
path: root/sys/boot/zfs
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2011-02-27 19:41:40 +0000
committerpjd <pjd@FreeBSD.org>2011-02-27 19:41:40 +0000
commit1b03c5bf41222b723415638f03e00ed12cac076a (patch)
treeef515cadc08bf427e4d3f1360199ec9827b1596b /sys/boot/zfs
parentc67d387baf03726323703774b1b320235fb1f24b (diff)
downloadFreeBSD-src-1b03c5bf41222b723415638f03e00ed12cac076a.zip
FreeBSD-src-1b03c5bf41222b723415638f03e00ed12cac076a.tar.gz
Finally... Import the latest open-source ZFS version - (SPA) 28.
Few new things available from now on: - Data deduplication. - Triple parity RAIDZ (RAIDZ3). - zfs diff. - zpool split. - Snapshot holds. - zpool import -F. Allows to rewind corrupted pool to earlier transaction group. - Possibility to import pool in read-only mode. MFC after: 1 month
Diffstat (limited to 'sys/boot/zfs')
-rw-r--r--sys/boot/zfs/zfs.c40
-rw-r--r--sys/boot/zfs/zfsimpl.c211
2 files changed, 170 insertions, 81 deletions
diff --git a/sys/boot/zfs/zfs.c b/sys/boot/zfs/zfs.c
index a995f57..e313fde 100644
--- a/sys/boot/zfs/zfs.c
+++ b/sys/boot/zfs/zfs.c
@@ -144,13 +144,16 @@ zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */)
{
spa_t *spa = (spa_t *) f->f_devdata;
struct file *fp = (struct file *)f->f_fsdata;
- const znode_phys_t *zp = (const znode_phys_t *) fp->f_dnode.dn_bonus;
+ struct stat sb;
size_t n;
int rc;
+ rc = zfs_stat(f, &sb);
+ if (rc)
+ return (rc);
n = size;
- if (fp->f_seekp + n > zp->zp_size)
- n = zp->zp_size - fp->f_seekp;
+ if (fp->f_seekp + n > sb.st_size)
+ n = sb.st_size - fp->f_seekp;
rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
if (rc)
@@ -182,7 +185,6 @@ static off_t
zfs_seek(struct open_file *f, off_t offset, int where)
{
struct file *fp = (struct file *)f->f_fsdata;
- znode_phys_t *zp = (znode_phys_t *) fp->f_dnode.dn_bonus;
switch (where) {
case SEEK_SET:
@@ -192,8 +194,18 @@ zfs_seek(struct open_file *f, off_t offset, int where)
fp->f_seekp += offset;
break;
case SEEK_END:
- fp->f_seekp = zp->zp_size - offset;
+ {
+ struct stat sb;
+ int error;
+
+ error = zfs_stat(f, &sb);
+ if (error != 0) {
+ errno = error;
+ return (-1);
+ }
+ fp->f_seekp = sb.st_size - offset;
break;
+ }
default:
errno = EINVAL;
return (-1);
@@ -204,16 +216,10 @@ zfs_seek(struct open_file *f, off_t offset, int where)
static int
zfs_stat(struct open_file *f, struct stat *sb)
{
+ spa_t *spa = (spa_t *) f->f_devdata;
struct file *fp = (struct file *)f->f_fsdata;
- znode_phys_t *zp = (znode_phys_t *) fp->f_dnode.dn_bonus;
-
- /* only important stuff */
- sb->st_mode = zp->zp_mode;
- sb->st_uid = zp->zp_uid;
- sb->st_gid = zp->zp_gid;
- sb->st_size = zp->zp_size;
- return (0);
+ return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
}
static int
@@ -221,14 +227,16 @@ zfs_readdir(struct open_file *f, struct dirent *d)
{
spa_t *spa = (spa_t *) f->f_devdata;
struct file *fp = (struct file *)f->f_fsdata;
- znode_phys_t *zp = (znode_phys_t *) fp->f_dnode.dn_bonus;
mzap_ent_phys_t mze;
+ struct stat sb;
size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
int rc;
- if ((zp->zp_mode >> 12) != 0x4) {
+ rc = zfs_stat(f, &sb);
+ if (rc)
+ return (rc);
+ if (!S_ISDIR(sb.st_mode))
return (ENOTDIR);
- }
/*
* If this is the first read, get the zap type.
diff --git a/sys/boot/zfs/zfsimpl.c b/sys/boot/zfs/zfsimpl.c
index cb09120..497667a 100644
--- a/sys/boot/zfs/zfsimpl.c
+++ b/sys/boot/zfs/zfsimpl.c
@@ -31,6 +31,8 @@ __FBSDID("$FreeBSD$");
* Stand-alone ZFS file reader.
*/
+#include <sys/stat.h>
+
#include "zfsimpl.h"
#include "zfssubr.c"
@@ -70,26 +72,30 @@ zfs_init(void)
zfs_init_crc();
}
-static char *
-zfs_alloc_temp(size_t sz)
+static void *
+zfs_alloc(size_t size)
{
- char *p;
+ char *ptr;
- if (zfs_temp_ptr + sz > zfs_temp_end) {
+ if (zfs_temp_ptr + size > zfs_temp_end) {
printf("ZFS: out of temporary buffer space\n");
for (;;) ;
}
- p = zfs_temp_ptr;
- zfs_temp_ptr += sz;
+ ptr = zfs_temp_ptr;
+ zfs_temp_ptr += size;
- return (p);
+ return (ptr);
}
static void
-zfs_reset_temp(void)
+zfs_free(void *ptr, size_t size)
{
- zfs_temp_ptr = zfs_temp_buf;
+ zfs_temp_ptr -= size;
+ if (zfs_temp_ptr != ptr) {
+ printf("ZFS: zfs_alloc()/zfs_free() mismatch\n");
+ for (;;) ;
+ }
}
static int
@@ -341,7 +347,7 @@ vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf,
rc = vdev->v_phys_read(vdev, vdev->v_read_priv, offset, buf, psize);
if (rc)
return (rc);
- if (bp && zio_checksum_error(bp, buf))
+ if (bp && zio_checksum_error(bp, buf, offset))
return (EIO);
return (0);
@@ -428,7 +434,8 @@ vdev_create(uint64_t guid, vdev_read_t *read)
}
static int
-vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp, int is_newer)
+vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t *pvdev,
+ vdev_t **vdevp, int is_newer)
{
int rc;
uint64_t guid, id, ashift, nparity;
@@ -453,7 +460,7 @@ vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp, int is_newer)
&& strcmp(type, VDEV_TYPE_DISK)
&& strcmp(type, VDEV_TYPE_RAIDZ)
&& strcmp(type, VDEV_TYPE_REPLACING)) {
- printf("ZFS: can only boot from disk, mirror or raidz vdevs\n");
+ printf("ZFS: can only boot from disk, mirror, raidz1, raidz2 and raidz3 vdevs\n");
return (EIO);
}
@@ -484,6 +491,7 @@ vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp, int is_newer)
vdev = vdev_create(guid, vdev_disk_read);
vdev->v_id = id;
+ vdev->v_top = pvdev != NULL ? pvdev : vdev;
if (nvlist_find(nvlist, ZPOOL_CONFIG_ASHIFT,
DATA_TYPE_UINT64, 0, &ashift) == 0)
vdev->v_ashift = ashift;
@@ -503,8 +511,14 @@ vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp, int is_newer)
if (!strcmp(type, "raidz")) {
if (vdev->v_nparity == 1)
vdev->v_name = "raidz1";
- else
+ else if (vdev->v_nparity == 2)
vdev->v_name = "raidz2";
+ else if (vdev->v_nparity == 3)
+ vdev->v_name = "raidz3";
+ else {
+ printf("ZFS: can only boot from disk, mirror, raidz1, raidz2 and raidz3 vdevs\n");
+ return (EIO);
+ }
} else {
vdev->v_name = strdup(type);
}
@@ -541,7 +555,7 @@ vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t **vdevp, int is_newer)
if (rc == 0) {
vdev->v_nchildren = nkids;
for (i = 0; i < nkids; i++) {
- rc = vdev_init_from_nvlist(kids, &kid, is_newer);
+ rc = vdev_init_from_nvlist(kids, vdev, &kid, is_newer);
if (rc)
return (rc);
if (is_new)
@@ -770,7 +784,7 @@ vdev_probe(vdev_phys_read_t *read, void *read_priv, spa_t **spap)
const char *pool_name;
const unsigned char *vdevs;
int i, rc, is_newer;
- char upbuf[1024];
+ char *upbuf;
const struct uberblock *up;
/*
@@ -814,17 +828,10 @@ vdev_probe(vdev_phys_read_t *read, void *read_priv, spa_t **spap)
return (EIO);
}
-#ifndef TEST
- if (val != POOL_STATE_ACTIVE) {
- /*
- * Don't print a message here. If we happen to reboot
- * while where is an exported pool around, we don't
- * need a cascade of confusing messages during boot.
- */
- /*printf("ZFS: pool is not active\n");*/
+ if (val == POOL_STATE_DESTROYED) {
+ /* We don't boot only from destroyed pools. */
return (EIO);
}
-#endif
if (nvlist_find(nvlist,
ZPOOL_CONFIG_POOL_TXG,
@@ -884,7 +891,7 @@ vdev_probe(vdev_phys_read_t *read, void *read_priv, spa_t **spap)
return (EIO);
}
- rc = vdev_init_from_nvlist(vdevs, &top_vdev, is_newer);
+ rc = vdev_init_from_nvlist(vdevs, NULL, &top_vdev, is_newer);
if (rc)
return (rc);
@@ -920,22 +927,23 @@ vdev_probe(vdev_phys_read_t *read, void *read_priv, spa_t **spap)
* the best uberblock and then we can actually access
* the contents of the pool.
*/
+ upbuf = zfs_alloc(VDEV_UBERBLOCK_SIZE(vdev));
+ up = (const struct uberblock *)upbuf;
for (i = 0;
- i < VDEV_UBERBLOCK_RING >> UBERBLOCK_SHIFT;
+ i < VDEV_UBERBLOCK_COUNT(vdev);
i++) {
- off = offsetof(vdev_label_t, vl_uberblock);
- off += i << UBERBLOCK_SHIFT;
+ off = VDEV_UBERBLOCK_OFFSET(vdev, i);
BP_ZERO(&bp);
DVA_SET_OFFSET(&bp.blk_dva[0], off);
- BP_SET_LSIZE(&bp, 1 << UBERBLOCK_SHIFT);
- BP_SET_PSIZE(&bp, 1 << UBERBLOCK_SHIFT);
+ BP_SET_LSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev));
+ BP_SET_PSIZE(&bp, VDEV_UBERBLOCK_SIZE(vdev));
BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL);
BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF);
ZIO_SET_CHECKSUM(&bp.blk_cksum, off, 0, 0, 0);
- if (vdev_read_phys(vdev, &bp, upbuf, off, 0))
+
+ if (vdev_read_phys(vdev, NULL, upbuf, off, VDEV_UBERBLOCK_SIZE(vdev)))
continue;
- up = (const struct uberblock *) upbuf;
if (up->ub_magic != UBERBLOCK_MAGIC)
continue;
if (up->ub_txg < spa->spa_txg)
@@ -947,6 +955,7 @@ vdev_probe(vdev_phys_read_t *read, void *read_priv, spa_t **spap)
spa->spa_uberblock = *up;
}
}
+ zfs_free(upbuf, VDEV_UBERBLOCK_SIZE(vdev));
if (spap)
*spap = spa;
@@ -1000,16 +1009,11 @@ static int
zio_read(spa_t *spa, const blkptr_t *bp, void *buf)
{
int cpfunc = BP_GET_COMPRESS(bp);
- size_t lsize = BP_GET_LSIZE(bp);
- size_t psize = BP_GET_PSIZE(bp);
+ uint64_t align, size;
void *pbuf;
- int i;
+ int i, error;
- zfs_reset_temp();
- if (cpfunc != ZIO_COMPRESS_OFF)
- pbuf = zfs_alloc_temp(psize);
- else
- pbuf = buf;
+ error = EIO;
for (i = 0; i < SPA_DVAS_PER_BP; i++) {
const dva_t *dva = &bp->blk_dva[i];
@@ -1021,32 +1025,49 @@ zio_read(spa_t *spa, const blkptr_t *bp, void *buf)
continue;
if (DVA_GET_GANG(dva)) {
- if (zio_read_gang(spa, bp, dva, buf))
+ error = zio_read_gang(spa, bp, dva, buf);
+ if (error != 0)
continue;
} else {
vdevid = DVA_GET_VDEV(dva);
offset = DVA_GET_OFFSET(dva);
- STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink)
+ STAILQ_FOREACH(vdev, &spa->spa_vdevs, v_childlink) {
if (vdev->v_id == vdevid)
break;
- if (!vdev || !vdev->v_read) {
- continue;
}
- if (vdev->v_read(vdev, bp, pbuf, offset, psize))
+ if (!vdev || !vdev->v_read)
continue;
- if (cpfunc != ZIO_COMPRESS_OFF) {
- if (zio_decompress_data(cpfunc, pbuf, psize,
- buf, lsize))
- return (EIO);
+ size = BP_GET_PSIZE(bp);
+ align = 1ULL << vdev->v_top->v_ashift;
+ if (P2PHASE(size, align) != 0)
+ size = P2ROUNDUP(size, align);
+ if (size != BP_GET_PSIZE(bp) || cpfunc != ZIO_COMPRESS_OFF)
+ pbuf = zfs_alloc(size);
+ else
+ pbuf = buf;
+
+ error = vdev->v_read(vdev, bp, pbuf, offset, size);
+ if (error == 0) {
+ if (cpfunc != ZIO_COMPRESS_OFF) {
+ error = zio_decompress_data(cpfunc,
+ pbuf, BP_GET_PSIZE(bp), buf,
+ BP_GET_LSIZE(bp));
+ } else if (size != BP_GET_PSIZE(bp)) {
+ bcopy(pbuf, buf, BP_GET_PSIZE(bp));
+ }
}
+ if (buf != pbuf)
+ zfs_free(pbuf, size);
+ if (error != 0)
+ continue;
}
-
- return (0);
+ error = 0;
+ break;
}
- printf("ZFS: i/o error - all block copies unavailable\n");
-
- return (EIO);
+ if (error != 0)
+ printf("ZFS: i/o error - all block copies unavailable\n");
+ return (error);
}
static int
@@ -1276,7 +1297,7 @@ zap_lookup(spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t *va
{
int rc;
uint64_t zap_type;
- size_t size = dnode->dn_datablkszsec * 512;
+ size_t size = dnode->dn_datablkszsec << SPA_MINBLOCKSHIFT;
rc = dnode_read(spa, dnode, 0, zap_scratch, size);
if (rc)
@@ -1285,8 +1306,10 @@ zap_lookup(spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t *va
zap_type = *(uint64_t *) zap_scratch;
if (zap_type == ZBT_MICRO)
return mzap_lookup(spa, dnode, name, value);
- else
+ else if (zap_type == ZBT_HEADER)
return fzap_lookup(spa, dnode, name, value);
+ printf("ZFS: invalid zap_type=%d\n", (int)zap_type);
+ return (EIO);
}
#ifdef BOOT2
@@ -1497,6 +1520,7 @@ zfs_mount_root(spa_t *spa, objset_phys_t *objset)
static int
zfs_mount_pool(spa_t *spa)
{
+
/*
* Find the MOS and work our way in from there.
*/
@@ -1516,6 +1540,58 @@ zfs_mount_pool(spa_t *spa)
return (0);
}
+static int
+zfs_dnode_stat(spa_t *spa, dnode_phys_t *dn, struct stat *sb)
+{
+
+ if (dn->dn_bonustype != DMU_OT_SA) {
+ znode_phys_t *zp = (znode_phys_t *)dn->dn_bonus;
+
+ sb->st_mode = zp->zp_mode;
+ sb->st_uid = zp->zp_uid;
+ sb->st_gid = zp->zp_gid;
+ sb->st_size = zp->zp_size;
+ } else {
+ sa_hdr_phys_t *sahdrp;
+ int hdrsize;
+ size_t size = 0;
+ void *buf = NULL;
+
+ if (dn->dn_bonuslen != 0)
+ sahdrp = (sa_hdr_phys_t *)DN_BONUS(dn);
+ else {
+ if ((dn->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0) {
+ blkptr_t *bp = &dn->dn_spill;
+ int error;
+
+ size = BP_GET_LSIZE(bp);
+ buf = zfs_alloc(size);
+ error = zio_read(spa, bp, buf);
+ if (error != 0) {
+ zfs_free(buf, size);
+ return (error);
+ }
+ sahdrp = buf;
+ } else {
+ return (EIO);
+ }
+ }
+ hdrsize = SA_HDR_SIZE(sahdrp);
+ sb->st_mode = *(uint64_t *)((char *)sahdrp + hdrsize +
+ SA_MODE_OFFSET);
+ sb->st_uid = *(uint64_t *)((char *)sahdrp + hdrsize +
+ SA_UID_OFFSET);
+ sb->st_gid = *(uint64_t *)((char *)sahdrp + hdrsize +
+ SA_GID_OFFSET);
+ sb->st_size = *(uint64_t *)((char *)sahdrp + hdrsize +
+ SA_SIZE_OFFSET);
+ if (buf != NULL)
+ zfs_free(buf, size);
+ }
+
+ return (0);
+}
+
/*
* Lookup a file and return its dnode.
*/
@@ -1525,11 +1601,11 @@ zfs_lookup(spa_t *spa, const char *upath, dnode_phys_t *dnode)
int rc;
uint64_t objnum, rootnum, parentnum;
dnode_phys_t dn;
- const znode_phys_t *zp = (const znode_phys_t *) dn.dn_bonus;
const char *p, *q;
char element[256];
char path[1024];
int symlinks_followed = 0;
+ struct stat sb;
if (spa->spa_root_objset.os_type != DMU_OST_ZFS) {
printf("ZFS: unexpected object set type %llu\n",
@@ -1569,9 +1645,11 @@ zfs_lookup(spa_t *spa, const char *upath, dnode_phys_t *dnode)
p = 0;
}
- if ((zp->zp_mode >> 12) != 0x4) {
+ rc = zfs_dnode_stat(spa, &dn, &sb);
+ if (rc)
+ return (rc);
+ if (!S_ISDIR(sb.st_mode))
return (ENOTDIR);
- }
parentnum = objnum;
rc = zap_lookup(spa, &dn, element, &objnum);
@@ -1586,7 +1664,10 @@ zfs_lookup(spa_t *spa, const char *upath, dnode_phys_t *dnode)
/*
* Check for symlink.
*/
- if ((zp->zp_mode >> 12) == 0xa) {
+ rc = zfs_dnode_stat(spa, &dn, &sb);
+ if (rc)
+ return (rc);
+ if (S_ISLNK(sb.st_mode)) {
if (symlinks_followed > 10)
return (EMLINK);
symlinks_followed++;
@@ -1596,14 +1677,14 @@ zfs_lookup(spa_t *spa, const char *upath, dnode_phys_t *dnode)
* current path onto the end.
*/
if (p)
- strcpy(&path[zp->zp_size], p);
+ strcpy(&path[sb.st_size], p);
else
- path[zp->zp_size] = 0;
- if (zp->zp_size + sizeof(znode_phys_t) <= dn.dn_bonuslen) {
+ path[sb.st_size] = 0;
+ if (sb.st_size + sizeof(znode_phys_t) <= dn.dn_bonuslen) {
memcpy(path, &dn.dn_bonus[sizeof(znode_phys_t)],
- zp->zp_size);
+ sb.st_size);
} else {
- rc = dnode_read(spa, &dn, 0, path, zp->zp_size);
+ rc = dnode_read(spa, &dn, 0, path, sb.st_size);
if (rc)
return (rc);
}
OpenPOWER on IntegriCloud