summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2008-11-17 20:49:29 +0000
committerpjd <pjd@FreeBSD.org>2008-11-17 20:49:29 +0000
commitbbe899b96e388a8b82439f81ed3707e0d9c6070d (patch)
tree81b89fa4ac6467771d5aa291a97f4665981a6108 /sys/kern
parentd2f579595c362ce27b4d87e2c40e1c4e09b929e3 (diff)
downloadFreeBSD-src-bbe899b96e388a8b82439f81ed3707e0d9c6070d.zip
FreeBSD-src-bbe899b96e388a8b82439f81ed3707e0d9c6070d.tar.gz
Update ZFS from version 6 to 13 and bring some FreeBSD-specific changes.
This bring huge amount of changes, I'll enumerate only user-visible changes: - Delegated Administration Allows regular users to perform ZFS operations, like file system creation, snapshot creation, etc. - L2ARC Level 2 cache for ZFS - allows to use additional disks for cache. Huge performance improvements mostly for random read of mostly static content. - slog Allow to use additional disks for ZFS Intent Log to speed up operations like fsync(2). - vfs.zfs.super_owner Allows regular users to perform privileged operations on files stored on ZFS file systems owned by him. Very careful with this one. - chflags(2) Not all the flags are supported. This still needs work. - ZFSBoot Support to boot off of ZFS pool. Not finished, AFAIK. Submitted by: dfr - Snapshot properties - New failure modes Before if write requested failed, system paniced. Now one can select from one of three failure modes: - panic - panic on write error - wait - wait for disk to reappear - continue - serve read requests if possible, block write requests - Refquota, refreservation properties Just quota and reservation properties, but don't count space consumed by children file systems, clones and snapshots. - Sparse volumes ZVOLs that don't reserve space in the pool. - External attributes Compatible with extattr(2). - NFSv4-ACLs Not sure about the status, might not be complete yet. Submitted by: trasz - Creation-time properties - Regression tests for zpool(8) command. Obtained from: OpenSolaris
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/kern_jail.c262
-rw-r--r--sys/kern/kern_osd.c301
-rw-r--r--sys/kern/kern_proc.c3
-rw-r--r--sys/kern/kern_thread.c3
-rw-r--r--sys/kern/vfs_lookup.c31
-rw-r--r--sys/kern/vfs_subr.c14
6 files changed, 367 insertions, 247 deletions
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 3afbc91..cee1ca6 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/vimage.h>
+#include <sys/osd.h>
#include <net/if.h>
#include <netinet/in.h>
@@ -86,22 +87,6 @@ struct sx allprison_lock;
int lastprid = 0;
int prisoncount = 0;
-/*
- * List of jail services. Protected by allprison_lock.
- */
-TAILQ_HEAD(prison_services_head, prison_service);
-static struct prison_services_head prison_services =
- TAILQ_HEAD_INITIALIZER(prison_services);
-static int prison_service_slots = 0;
-
-struct prison_service {
- prison_create_t ps_create;
- prison_destroy_t ps_destroy;
- int ps_slotno;
- TAILQ_ENTRY(prison_service) ps_next;
- char ps_name[0];
-};
-
static void init_prison(void *);
static void prison_complete(void *context, int pending);
static int sysctl_jail_list(SYSCTL_HANDLER_ARGS);
@@ -126,7 +111,6 @@ jail(struct thread *td, struct jail_args *uap)
{
struct nameidata nd;
struct prison *pr, *tpr;
- struct prison_service *psrv;
struct jail j;
struct jail_attach_args jaa;
int vfslocked, error, tryprid;
@@ -159,12 +143,7 @@ jail(struct thread *td, struct jail_args *uap)
pr->pr_ip = j.ip_number;
pr->pr_linux = NULL;
pr->pr_securelevel = securelevel;
- if (prison_service_slots == 0)
- pr->pr_slots = NULL;
- else {
- pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots,
- M_PRISON, M_ZERO | M_WAITOK);
- }
+ bzero(&pr->pr_osd, sizeof(pr->pr_osd));
/* Determine next pr_id and add prison to allprison list. */
sx_xlock(&allprison_lock);
@@ -186,11 +165,7 @@ next:
pr->pr_id = jaa.jid = lastprid = tryprid;
LIST_INSERT_HEAD(&allprison, pr, pr_list);
prisoncount++;
- sx_downgrade(&allprison_lock);
- TAILQ_FOREACH(psrv, &prison_services, ps_next) {
- psrv->ps_create(psrv, pr);
- }
- sx_sunlock(&allprison_lock);
+ sx_xunlock(&allprison_lock);
error = jail_attach(td, &jaa);
if (error)
@@ -204,14 +179,8 @@ e_dropprref:
sx_xlock(&allprison_lock);
LIST_REMOVE(pr, pr_list);
prisoncount--;
- sx_downgrade(&allprison_lock);
- TAILQ_FOREACH(psrv, &prison_services, ps_next) {
- psrv->ps_destroy(psrv, pr);
- }
- sx_sunlock(&allprison_lock);
+ sx_xunlock(&allprison_lock);
e_dropvnref:
- if (pr->pr_slots != NULL)
- free(pr->pr_slots, M_PRISON);
vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
vrele(pr->pr_root);
VFS_UNLOCK_GIANT(vfslocked);
@@ -311,10 +280,10 @@ prison_find(int prid)
}
void
-prison_free(struct prison *pr)
+prison_free_locked(struct prison *pr)
{
- mtx_lock(&pr->pr_mtx);
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
pr->pr_ref--;
if (pr->pr_ref == 0) {
mtx_unlock(&pr->pr_mtx);
@@ -325,10 +294,17 @@ prison_free(struct prison *pr)
mtx_unlock(&pr->pr_mtx);
}
+void
+prison_free(struct prison *pr)
+{
+
+ mtx_lock(&pr->pr_mtx);
+ prison_free_locked(pr);
+}
+
static void
prison_complete(void *context, int pending)
{
- struct prison_service *psrv;
struct prison *pr;
int vfslocked;
@@ -337,13 +313,10 @@ prison_complete(void *context, int pending)
sx_xlock(&allprison_lock);
LIST_REMOVE(pr, pr_list);
prisoncount--;
- sx_downgrade(&allprison_lock);
- TAILQ_FOREACH(psrv, &prison_services, ps_next) {
- psrv->ps_destroy(psrv, pr);
- }
- sx_sunlock(&allprison_lock);
- if (pr->pr_slots != NULL)
- free(pr->pr_slots, M_PRISON);
+ sx_xunlock(&allprison_lock);
+
+ /* Free all OSD associated to this jail. */
+ osd_jail_exit(pr);
vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount);
vrele(pr->pr_root);
@@ -356,13 +329,21 @@ prison_complete(void *context, int pending)
}
void
-prison_hold(struct prison *pr)
+prison_hold_locked(struct prison *pr)
{
- mtx_lock(&pr->pr_mtx);
+ mtx_assert(&pr->pr_mtx, MA_OWNED);
KASSERT(pr->pr_ref > 0,
("Trying to hold dead prison (id=%d).", pr->pr_id));
pr->pr_ref++;
+}
+
+void
+prison_hold(struct prison *pr)
+{
+
+ mtx_lock(&pr->pr_mtx);
+ prison_hold_locked(pr);
mtx_unlock(&pr->pr_mtx);
}
@@ -762,193 +743,6 @@ prison_priv_check(struct ucred *cred, int priv)
}
}
-/*
- * Register jail service. Provides 'create' and 'destroy' methods.
- * 'create' method will be called for every existing jail and all
- * jails in the future as they beeing created.
- * 'destroy' method will be called for every jail going away and
- * for all existing jails at the time of service deregistration.
- */
-struct prison_service *
-prison_service_register(const char *name, prison_create_t create,
- prison_destroy_t destroy)
-{
- struct prison_service *psrv, *psrv2;
- struct prison *pr;
- int reallocate = 1, slotno = 0;
- void **slots, **oldslots;
-
- psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON,
- M_WAITOK | M_ZERO);
- psrv->ps_create = create;
- psrv->ps_destroy = destroy;
- strcpy(psrv->ps_name, name);
- /*
- * Grab the allprison_lock here, so we won't miss any jail
- * creation/destruction.
- */
- sx_xlock(&allprison_lock);
-#ifdef INVARIANTS
- /*
- * Verify if service is not already registered.
- */
- TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
- KASSERT(strcmp(psrv2->ps_name, name) != 0,
- ("jail service %s already registered", name));
- }
-#endif
- /*
- * Find free slot. When there is no existing free slot available,
- * allocate one at the end.
- */
- TAILQ_FOREACH(psrv2, &prison_services, ps_next) {
- if (psrv2->ps_slotno != slotno) {
- KASSERT(slotno < psrv2->ps_slotno,
- ("Invalid slotno (slotno=%d >= ps_slotno=%d",
- slotno, psrv2->ps_slotno));
- /* We found free slot. */
- reallocate = 0;
- break;
- }
- slotno++;
- }
- psrv->ps_slotno = slotno;
- /*
- * Keep the list sorted by slot number.
- */
- if (psrv2 != NULL) {
- KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0"));
- TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next);
- } else {
- KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0"));
- TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next);
- }
- prison_service_slots++;
- sx_downgrade(&allprison_lock);
- /*
- * Allocate memory for new slot if we didn't found empty one.
- * Do not use realloc(9), because pr_slots is protected with a mutex,
- * so we can't sleep.
- */
- LIST_FOREACH(pr, &allprison, pr_list) {
- if (reallocate) {
- /* First allocate memory with M_WAITOK. */
- slots = malloc(sizeof(*slots) * prison_service_slots,
- M_PRISON, M_WAITOK);
- /* Now grab the mutex and replace pr_slots. */
- mtx_lock(&pr->pr_mtx);
- oldslots = pr->pr_slots;
- if (psrv->ps_slotno > 0) {
- bcopy(oldslots, slots,
- sizeof(*slots) * (prison_service_slots - 1));
- }
- slots[psrv->ps_slotno] = NULL;
- pr->pr_slots = slots;
- mtx_unlock(&pr->pr_mtx);
- if (oldslots != NULL)
- free(oldslots, M_PRISON);
- }
- /*
- * Call 'create' method for each existing jail.
- */
- psrv->ps_create(psrv, pr);
- }
- sx_sunlock(&allprison_lock);
-
- return (psrv);
-}
-
-void
-prison_service_deregister(struct prison_service *psrv)
-{
- struct prison *pr;
- void **slots, **oldslots;
- int last = 0;
-
- sx_xlock(&allprison_lock);
- if (TAILQ_LAST(&prison_services, prison_services_head) == psrv)
- last = 1;
- TAILQ_REMOVE(&prison_services, psrv, ps_next);
- prison_service_slots--;
- sx_downgrade(&allprison_lock);
- LIST_FOREACH(pr, &allprison, pr_list) {
- /*
- * Call 'destroy' method for every currently existing jail.
- */
- psrv->ps_destroy(psrv, pr);
- /*
- * If this is the last slot, free the memory allocated for it.
- */
- if (last) {
- if (prison_service_slots == 0)
- slots = NULL;
- else {
- slots = malloc(sizeof(*slots) * prison_service_slots,
- M_PRISON, M_WAITOK);
- }
- mtx_lock(&pr->pr_mtx);
- oldslots = pr->pr_slots;
- /*
- * We require setting slot to NULL after freeing it,
- * this way we can check for memory leaks here.
- */
- KASSERT(oldslots[psrv->ps_slotno] == NULL,
- ("Slot %d (service %s, jailid=%d) still contains data?",
- psrv->ps_slotno, psrv->ps_name, pr->pr_id));
- if (psrv->ps_slotno > 0) {
- bcopy(oldslots, slots,
- sizeof(*slots) * prison_service_slots);
- }
- pr->pr_slots = slots;
- mtx_unlock(&pr->pr_mtx);
- KASSERT(oldslots != NULL, ("oldslots == NULL"));
- free(oldslots, M_PRISON);
- }
- }
- sx_sunlock(&allprison_lock);
- free(psrv, M_PRISON);
-}
-
-/*
- * Function sets data for the given jail in slot assigned for the given
- * jail service.
- */
-void
-prison_service_data_set(struct prison_service *psrv, struct prison *pr,
- void *data)
-{
-
- mtx_assert(&pr->pr_mtx, MA_OWNED);
- pr->pr_slots[psrv->ps_slotno] = data;
-}
-
-/*
- * Function clears slots assigned for the given jail service in the given
- * prison structure and returns current slot data.
- */
-void *
-prison_service_data_del(struct prison_service *psrv, struct prison *pr)
-{
- void *data;
-
- mtx_assert(&pr->pr_mtx, MA_OWNED);
- data = pr->pr_slots[psrv->ps_slotno];
- pr->pr_slots[psrv->ps_slotno] = NULL;
- return (data);
-}
-
-/*
- * Function returns current data from the slot assigned to the given jail
- * service for the given jail.
- */
-void *
-prison_service_data_get(struct prison_service *psrv, struct prison *pr)
-{
-
- mtx_assert(&pr->pr_mtx, MA_OWNED);
- return (pr->pr_slots[psrv->ps_slotno]);
-}
-
static int
sysctl_jail_list(SYSCTL_HANDLER_ARGS)
{
diff --git a/sys/kern/kern_osd.c b/sys/kern/kern_osd.c
new file mode 100644
index 0000000..d9563e6
--- /dev/null
+++ b/sys/kern/kern_osd.c
@@ -0,0 +1,301 @@
+/*-
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/proc.h>
+#include <sys/osd.h>
+
+/* OSD (Object Specific Data) */
+
+static MALLOC_DEFINE(M_OSD, "osd", "Object Specific Data");
+
+static int osd_debug = 0;
+TUNABLE_INT("debug.osd", &osd_debug);
+SYSCTL_INT(_debug, OID_AUTO, osd, CTLFLAG_RW, &osd_debug, 0, "OSD debug level");
+
+#define OSD_DEBUG(...) do { \
+ if (osd_debug) { \
+ printf("OSD (%s:%u): ", __func__, __LINE__); \
+ printf(__VA_ARGS__); \
+ printf("\n"); \
+ } \
+} while (0)
+
+/*
+ * Lists of objects with OSD.
+ */
+static LIST_HEAD(, osd) osd_list[OSD_LAST + 1];
+static osd_destructor_t *osd_destructors[OSD_LAST + 1];
+static u_int osd_nslots[OSD_LAST + 1];
+static struct mtx osd_lock[OSD_LAST + 1];
+
+static void
+osd_default_destructor(void *value __unused)
+{
+ /* Do nothing. */
+}
+
+int
+osd_register(u_int type, osd_destructor_t destructor)
+{
+ void *newptr;
+ u_int i;
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+
+ /*
+ * If no destructor is given, use default one. We need to use some
+ * destructor, because NULL destructor means unused slot.
+ */
+ if (destructor == NULL)
+ destructor = osd_default_destructor;
+
+ mtx_lock(&osd_lock[type]);
+ /*
+ * First, we try to find unused slot.
+ */
+ for (i = 0; i < osd_nslots[type]; i++) {
+ if (osd_destructors[type][i] == NULL) {
+ OSD_DEBUG("Unused slot found (type=%u, slot=%u).",
+ type, i);
+ break;
+ }
+ }
+ /*
+ * If no unused slot was found, allocate one.
+ */
+ if (i == osd_nslots[type]) {
+ osd_nslots[type]++;
+ newptr = realloc(osd_destructors[type],
+ sizeof(osd_destructor_t) * osd_nslots[type], M_OSD,
+ M_NOWAIT | M_ZERO);
+ if (newptr == NULL) {
+ mtx_unlock(&osd_lock[type]);
+ return (0);
+ }
+ osd_destructors[type] = newptr;
+ OSD_DEBUG("New slot allocated (type=%u, slot=%u).",
+ type, i + 1);
+ }
+ osd_destructors[type][i] = destructor;
+ mtx_unlock(&osd_lock[type]);
+ return (i + 1);
+}
+
+void
+osd_deregister(u_int type, u_int slot)
+{
+ struct osd *osd, *tosd;
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+ KASSERT(slot > 0, ("Invalid slot."));
+ KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+
+ mtx_lock(&osd_lock[type]);
+ /*
+ * Free all OSD for the given slot.
+ */
+ LIST_FOREACH_SAFE(osd, &osd_list[type], osd_next, tosd) {
+ osd_del(type, osd, slot);
+ }
+ /*
+ * Set destructor to NULL to free the slot.
+ */
+ osd_destructors[type][slot - 1] = NULL;
+ if (slot == osd_nslots[type]) {
+ osd_nslots[type]--;
+ osd_destructors[type] = realloc(osd_destructors[type],
+ sizeof(osd_destructor_t) * osd_nslots[type], M_OSD,
+ M_NOWAIT | M_ZERO);
+ /*
+ * We always reallocate to smaller size, so we assume it will
+ * always succeed.
+ */
+ KASSERT(osd_destructors[type] != NULL, ("realloc() failed"));
+ OSD_DEBUG("Deregistration of the last slot (type=%u, slot=%u).",
+ type, slot);
+ } else {
+ OSD_DEBUG("Slot deregistration (type=%u, slot=%u).",
+ type, slot);
+ }
+ mtx_unlock(&osd_lock[type]);
+}
+
+int
+osd_set(u_int type, struct osd *osd, u_int slot, void *value)
+{
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+ KASSERT(slot > 0, ("Invalid slot."));
+ KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+
+ if (osd->osd_nslots == 0) {
+ /*
+ * First OSD for this object, so we need to allocate space and
+ * put it onto the list.
+ */
+ osd->osd_slots = malloc(sizeof(void *) * slot, M_OSD,
+ M_NOWAIT | M_ZERO);
+ if (osd->osd_slots == NULL)
+ return (ENOMEM);
+ osd->osd_nslots = slot;
+ mtx_lock(&osd_lock[type]);
+ LIST_INSERT_HEAD(&osd_list[type], osd, osd_next);
+ mtx_unlock(&osd_lock[type]);
+ OSD_DEBUG("Setting first slot (type=%u).", type);
+ } else if (slot > osd->osd_nslots) {
+ void *newptr;
+
+ /*
+ * Too few slots allocated here, needs to extend the array.
+ */
+ newptr = realloc(osd->osd_slots, sizeof(void *) * slot, M_OSD,
+ M_NOWAIT | M_ZERO);
+ if (newptr == NULL)
+ return (ENOMEM);
+ osd->osd_slots = newptr;
+ osd->osd_nslots = slot;
+ OSD_DEBUG("Growing slots array (type=%u).", type);
+ }
+ OSD_DEBUG("Setting slot value (type=%u, slot=%u, value=%p).", type,
+ slot, value);
+ osd->osd_slots[slot - 1] = value;
+ return (0);
+}
+
+void *
+osd_get(u_int type, struct osd *osd, u_int slot)
+{
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+ KASSERT(slot > 0, ("Invalid slot."));
+ KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+
+ if (slot > osd->osd_nslots) {
+ OSD_DEBUG("Slot doesn't exist (type=%u, slot=%u).", type, slot);
+ return (NULL);
+ }
+
+ OSD_DEBUG("Returning slot value (type=%u, slot=%u, value=%p).", type,
+ slot, osd->osd_slots[slot - 1]);
+ return (osd->osd_slots[slot - 1]);
+}
+
+void
+osd_del(u_int type, struct osd *osd, u_int slot)
+{
+ int i;
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+ KASSERT(slot > 0, ("Invalid slot."));
+ KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+
+ OSD_DEBUG("Deleting slot (type=%u, slot=%u).", type, slot);
+
+ if (slot > osd->osd_nslots) {
+ OSD_DEBUG("Slot doesn't exist (type=%u, slot=%u).", type, slot);
+ return;
+ }
+ osd_destructors[type][slot - 1](osd->osd_slots[slot - 1]);
+ osd->osd_slots[slot - 1] = NULL;
+ for (i = osd->osd_nslots - 1; i >= 0; i--) {
+ if (osd->osd_slots[i] != NULL) {
+ OSD_DEBUG("Slot still has a value (type=%u, slot=%u).", type, i + 1);
+ break;
+ }
+ }
+ if (i == -1) {
+ int unlock;
+
+ /* No values left for this object. */
+ OSD_DEBUG("No more slots left (type=%u).", type);
+ if ((unlock = !mtx_owned(&osd_lock[type])))
+ mtx_lock(&osd_lock[type]);
+ LIST_REMOVE(osd, osd_next);
+ if (unlock)
+ mtx_unlock(&osd_lock[type]);
+ free(osd->osd_slots, M_OSD);
+ osd->osd_slots = NULL;
+ osd->osd_nslots = 0;
+ } else if (slot == osd->osd_nslots) {
+ /* This was the last slot. */
+ osd->osd_slots = realloc(osd->osd_slots,
+ sizeof(void *) * (i + 1), M_OSD, M_NOWAIT | M_ZERO);
+ /*
+ * We always reallocate to smaller size, so we assume it will
+ * always succeed.
+ */
+ KASSERT(osd->osd_slots != NULL, ("realloc() failed"));
+ osd->osd_nslots = i + 1;
+ OSD_DEBUG("Reducing slots array to %u (type=%u).",
+ osd->osd_nslots, type);
+ }
+}
+
+void
+osd_exit(u_int type, struct osd *osd)
+{
+ u_int i;
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+
+ if (osd->osd_nslots == 0) {
+ KASSERT(osd->osd_slots == NULL, ("Non-null osd_slots."));
+ /* No OSD attached, just leave. */
+ return;
+ }
+
+ mtx_lock(&osd_lock[type]);
+ for (i = 1; i <= osd->osd_nslots; i++)
+ osd_del(type, osd, i);
+ mtx_unlock(&osd_lock[type]);
+ OSD_DEBUG("Object exit (type=%u).", type);
+}
+
+static void
+osd_init(void *arg __unused)
+{
+ u_int i;
+
+ for (i = OSD_FIRST; i <= OSD_LAST; i++) {
+ osd_nslots[i] = 0;
+ LIST_INIT(&osd_list[i]);
+ mtx_init(&osd_lock[i], "osd", NULL, MTX_DEF);
+ osd_destructors[i] = NULL;
+ }
+}
+SYSINIT(osd, SI_SUB_LOCK, SI_ORDER_ANY, osd_init, NULL);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index f8f639c..a331a42 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -199,6 +199,9 @@ proc_dtor(void *mem, int size, void *arg)
("bad number of threads in exiting process"));
KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr"));
#endif
+ /* Free all OSD associated to this thread. */
+ osd_thread_exit(td);
+
/* Dispose of an alternate kstack, if it exists.
* XXX What if there are more than one thread in the proc?
* The first thread in the proc is special and not
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 3bde08e..4606005 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -141,6 +141,9 @@ thread_dtor(void *mem, int size, void *arg)
#ifdef AUDIT
audit_thread_free(td);
#endif
+ /* Free all OSD associated to this thread. */
+ osd_thread_exit(td);
+
EVENTHANDLER_INVOKE(thread_dtor, td);
free_unr(tid_unrhdr, td->td_tid);
}
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index c3e2e80..06682d5 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -189,14 +189,21 @@ namei(struct nameidata *ndp)
ndp->ni_rootdir = fdp->fd_rdir;
ndp->ni_topdir = fdp->fd_jdir;
- if (cnp->cn_pnbuf[0] != '/' && ndp->ni_dirfd != AT_FDCWD) {
- error = fgetvp(td, ndp->ni_dirfd, &dp);
- FILEDESC_SUNLOCK(fdp);
- if (error == 0 && dp->v_type != VDIR) {
- vfslocked = VFS_LOCK_GIANT(dp->v_mount);
- vrele(dp);
- VFS_UNLOCK_GIANT(vfslocked);
- error = ENOTDIR;
+ dp = NULL;
+ if (cnp->cn_pnbuf[0] != '/') {
+ if (ndp->ni_startdir != NULL) {
+ dp = ndp->ni_startdir;
+ error = 0;
+ } else if (ndp->ni_dirfd != AT_FDCWD)
+ error = fgetvp(td, ndp->ni_dirfd, &dp);
+ if (error != 0 || dp != NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ if (error == 0 && dp->v_type != VDIR) {
+ vfslocked = VFS_LOCK_GIANT(dp->v_mount);
+ vrele(dp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ error = ENOTDIR;
+ }
}
if (error) {
uma_zfree(namei_zone, cnp->cn_pnbuf);
@@ -206,10 +213,16 @@ namei(struct nameidata *ndp)
#endif
return (error);
}
- } else {
+ }
+ if (dp == NULL) {
dp = fdp->fd_cdir;
VREF(dp);
FILEDESC_SUNLOCK(fdp);
+ if (ndp->ni_startdir != NULL) {
+ vfslocked = VFS_LOCK_GIANT(ndp->ni_startdir->v_mount);
+ vrele(ndp->ni_startdir);
+ VFS_UNLOCK_GIANT(vfslocked);
+ }
}
vfslocked = VFS_LOCK_GIANT(dp->v_mount);
for (;;) {
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 327a9c5..32093c0 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -108,7 +108,6 @@ static void v_decr_useonly(struct vnode *);
static void v_upgrade_usecount(struct vnode *);
static void vfree(struct vnode *);
static void vnlru_free(int);
-static void vdestroy(struct vnode *);
static void vgonel(struct vnode *);
static void vfs_knllock(void *arg);
static void vfs_knlunlock(void *arg);
@@ -419,7 +418,7 @@ vfs_suser(struct mount *mp, struct thread *td)
* If the thread is jailed, but this is not a jail-friendly file
* system, deny immediately.
*/
- if (jailed(td->td_ucred) && !(mp->mnt_vfc->vfc_flags & VFCF_JAIL))
+ if (!(mp->mnt_vfc->vfc_flags & VFCF_JAIL) && jailed(td->td_ucred))
return (EPERM);
/*
@@ -438,7 +437,14 @@ vfs_suser(struct mount *mp, struct thread *td)
return (EPERM);
}
- if ((mp->mnt_flag & MNT_USER) == 0 ||
+ /*
+ * If file system supports delegated administration, we don't check
+ * for the PRIV_VFS_MOUNT_OWNER privilege - it will be better verified
+ * by the file system itself.
+ * If this is not the user that did original mount, we check for
+ * the PRIV_VFS_MOUNT_OWNER privilege.
+ */
+ if (!(mp->mnt_vfc->vfc_flags & VFCF_DELEGADMIN) &&
mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) {
if ((error = priv_check(td, PRIV_VFS_MOUNT_OWNER)) != 0)
return (error);
@@ -793,7 +799,7 @@ SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start,
* Routines having to do with the management of the vnode table.
*/
-static void
+void
vdestroy(struct vnode *vp)
{
struct bufobj *bo;
OpenPOWER on IntegriCloud