summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorRenato Botelho <renato@netgate.com>2016-08-17 15:23:38 -0300
committerRenato Botelho <renato@netgate.com>2016-08-17 15:23:38 -0300
commit75cd8d40056c799f03b759475d9bfd10ba266a6c (patch)
tree60433235501684bffeab90e65139a8285fcf46a9 /sys
parent99990a0d149f0eae805aa1f49d4a61be30c3b000 (diff)
parentad413762f28e3be343987e707b9cf4f10f963693 (diff)
downloadFreeBSD-src-75cd8d40056c799f03b759475d9bfd10ba266a6c.zip
FreeBSD-src-75cd8d40056c799f03b759475d9bfd10ba266a6c.tar.gz
Merge remote-tracking branch 'origin/stable/10' into devel
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/initcpu.c14
-rw-r--r--sys/amd64/amd64/trap.c4
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c20
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c148
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c15
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c8
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c24
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c24
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h10
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h10
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c211
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c1
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c31
-rw-r--r--sys/dev/mlx5/mlx5_en/mlx5_en_main.c7
-rw-r--r--sys/dev/usb/input/ukbd.c12
-rw-r--r--sys/dev/usb/usbdevs1
-rw-r--r--sys/dev/virtio/network/if_vtnet.c61
-rw-r--r--sys/dev/virtio/network/if_vtnetvar.h1
-rw-r--r--sys/fs/tmpfs/tmpfs_subr.c11
-rw-r--r--sys/i386/i386/trap.c4
-rw-r--r--sys/i386/linux/linux_ptrace.c4
-rw-r--r--sys/kern/kern_exec.c4
-rw-r--r--sys/kern/kern_exit.c9
-rw-r--r--sys/kern/kern_fork.c19
-rw-r--r--sys/kern/kern_kthread.c11
-rw-r--r--sys/kern/kern_sig.c56
-rw-r--r--sys/kern/kern_thr.c64
-rw-r--r--sys/kern/subr_syscall.c4
-rw-r--r--sys/kern/sys_process.c114
-rw-r--r--sys/netpfil/pf/pf_ioctl.c16
-rw-r--r--sys/sys/proc.h9
-rw-r--r--sys/sys/ptrace.h23
-rw-r--r--sys/vm/uma_core.c5
-rw-r--r--sys/vm/vm_page.c2
-rw-r--r--sys/vm/vm_pageout.c38
36 files changed, 690 insertions, 308 deletions
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index 8868cd3..e20a271 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -125,6 +125,20 @@ init_amd(void)
wrmsr(MSR_NB_CFG1, msr);
}
}
+
+ /*
+ * BIOS may configure Family 10h processors to convert WC+ cache type
+ * to CD. That can hurt performance of guest VMs using nested paging.
+ * The relevant MSR bit is not documented in the BKDG,
+ * the fix is borrowed from Linux.
+ */
+ if (CPUID_TO_FAMILY(cpu_id) == 0x10) {
+ if ((cpu_feature2 & CPUID2_HV) == 0) {
+ msr = rdmsr(0xc001102a);
+ msr &= ~((uint64_t)1 << 24);
+ wrmsr(0xc001102a, msr);
+ }
+ }
}
/*
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 218c8cf..f487c44 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -448,8 +448,8 @@ trap(struct trapframe *frame)
goto out;
case T_DNA:
- KASSERT(!PCB_USER_FPU(td->td_pcb),
- ("Unregistered use of FPU in kernel"));
+ if (PCB_USER_FPU(td->td_pcb))
+ panic("Unregistered use of FPU in kernel");
fpudna();
goto out;
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
index 9430037..1700194 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -782,6 +782,7 @@ typedef struct arc_write_callback arc_write_callback_t;
struct arc_write_callback {
void *awcb_private;
arc_done_func_t *awcb_ready;
+ arc_done_func_t *awcb_children_ready;
arc_done_func_t *awcb_physdone;
arc_done_func_t *awcb_done;
arc_buf_t *awcb_buf;
@@ -5045,6 +5046,15 @@ arc_write_ready(zio_t *zio)
hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS;
}
+static void
+arc_write_children_ready(zio_t *zio)
+{
+ arc_write_callback_t *callback = zio->io_private;
+ arc_buf_t *buf = callback->awcb_buf;
+
+ callback->awcb_children_ready(zio, buf, callback->awcb_private);
+}
+
/*
* The SPA calls this callback for each physical write that happens on behalf
* of a logical write. See the comment in dbuf_write_physdone() for details.
@@ -5141,7 +5151,8 @@ arc_write_done(zio_t *zio)
zio_t *
arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress,
- const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *physdone,
+ const zio_prop_t *zp, arc_done_func_t *ready,
+ arc_done_func_t *children_ready, arc_done_func_t *physdone,
arc_done_func_t *done, void *private, zio_priority_t priority,
int zio_flags, const zbookmark_phys_t *zb)
{
@@ -5161,13 +5172,16 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
hdr->b_flags |= ARC_FLAG_L2COMPRESS;
callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
callback->awcb_ready = ready;
+ callback->awcb_children_ready = children_ready;
callback->awcb_physdone = physdone;
callback->awcb_done = done;
callback->awcb_private = private;
callback->awcb_buf = buf;
zio = zio_write(pio, spa, txg, bp, buf->b_data, hdr->b_size, zp,
- arc_write_ready, arc_write_physdone, arc_write_done, callback,
+ arc_write_ready,
+ (children_ready != NULL) ? arc_write_children_ready : NULL,
+ arc_write_physdone, arc_write_done, callback,
priority, zio_flags, zb);
return (zio);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
index 2f26e9fb..9d5c398 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
@@ -481,13 +481,49 @@ dbuf_verify(dmu_buf_impl_t *db)
* If the blkptr isn't set but they have nonzero data,
* it had better be dirty, otherwise we'll lose that
* data when we evict this buffer.
+ *
+ * There is an exception to this rule for indirect blocks; in
+ * this case, if the indirect block is a hole, we fill in a few
+ * fields on each of the child blocks (importantly, birth time)
+ * to prevent hole birth times from being lost when you
+ * partially fill in a hole.
*/
if (db->db_dirtycnt == 0) {
- uint64_t *buf = db->db.db_data;
- int i;
+ if (db->db_level == 0) {
+ uint64_t *buf = db->db.db_data;
+ int i;
- for (i = 0; i < db->db.db_size >> 3; i++) {
- ASSERT(buf[i] == 0);
+ for (i = 0; i < db->db.db_size >> 3; i++) {
+ ASSERT(buf[i] == 0);
+ }
+ } else {
+ blkptr_t *bps = db->db.db_data;
+ ASSERT3U(1 << DB_DNODE(db)->dn_indblkshift, ==,
+ db->db.db_size);
+ /*
+ * We want to verify that all the blkptrs in the
+ * indirect block are holes, but we may have
+ * automatically set up a few fields for them.
+ * We iterate through each blkptr and verify
+ * they only have those fields set.
+ */
+ for (int i = 0;
+ i < db->db.db_size / sizeof (blkptr_t);
+ i++) {
+ blkptr_t *bp = &bps[i];
+ ASSERT(ZIO_CHECKSUM_IS_ZERO(
+ &bp->blk_cksum));
+ ASSERT(
+ DVA_IS_EMPTY(&bp->blk_dva[0]) &&
+ DVA_IS_EMPTY(&bp->blk_dva[1]) &&
+ DVA_IS_EMPTY(&bp->blk_dva[2]));
+ ASSERT0(bp->blk_fill);
+ ASSERT0(bp->blk_pad[0]);
+ ASSERT0(bp->blk_pad[1]);
+ ASSERT(!BP_IS_EMBEDDED(bp));
+ ASSERT(BP_IS_HOLE(bp));
+ ASSERT0(bp->blk_phys_birth);
+ }
}
}
}
@@ -655,10 +691,31 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
BP_IS_HOLE(db->db_blkptr)))) {
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- DB_DNODE_EXIT(db);
dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa,
db->db.db_size, db, type));
bzero(db->db.db_data, db->db.db_size);
+
+ if (db->db_blkptr != NULL && db->db_level > 0 &&
+ BP_IS_HOLE(db->db_blkptr) &&
+ db->db_blkptr->blk_birth != 0) {
+ blkptr_t *bps = db->db.db_data;
+ for (int i = 0; i < ((1 <<
+ DB_DNODE(db)->dn_indblkshift) / sizeof (blkptr_t));
+ i++) {
+ blkptr_t *bp = &bps[i];
+ ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
+ 1 << dn->dn_indblkshift);
+ BP_SET_LSIZE(bp,
+ BP_GET_LEVEL(db->db_blkptr) == 1 ?
+ dn->dn_datablksz :
+ BP_GET_LSIZE(db->db_blkptr));
+ BP_SET_TYPE(bp, BP_GET_TYPE(db->db_blkptr));
+ BP_SET_LEVEL(bp,
+ BP_GET_LEVEL(db->db_blkptr) - 1);
+ BP_SET_BIRTH(bp, db->db_blkptr->blk_birth, 0);
+ }
+ }
+ DB_DNODE_EXIT(db);
db->db_state = DB_CACHED;
mutex_exit(&db->db_mtx);
return;
@@ -2876,7 +2933,8 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
uint64_t fill = 0;
int i;
- ASSERT3P(db->db_blkptr, ==, bp);
+ ASSERT3P(db->db_blkptr, !=, NULL);
+ ASSERT3P(&db->db_data_pending->dr_bp_copy, ==, bp);
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
@@ -2898,7 +2956,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
#ifdef ZFS_DEBUG
if (db->db_blkid == DMU_SPILL_BLKID) {
ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
- ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
+ ASSERT(!(BP_IS_HOLE(bp)) &&
db->db_blkptr == &dn->dn_phys->dn_spill);
}
#endif
@@ -2939,6 +2997,49 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
bp->blk_fill = fill;
mutex_exit(&db->db_mtx);
+
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ *db->db_blkptr = *bp;
+ rw_exit(&dn->dn_struct_rwlock);
+}
+
+/* ARGSUSED */
+/*
+ * This function gets called just prior to running through the compression
+ * stage of the zio pipeline. If we're an indirect block comprised of only
+ * holes, then we want this indirect to be compressed away to a hole. In
+ * order to do that we must zero out any information about the holes that
+ * this indirect points to prior to before we try to compress it.
+ */
+static void
+dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
+{
+ dmu_buf_impl_t *db = vdb;
+ dnode_t *dn;
+ blkptr_t *bp;
+ uint64_t i;
+ int epbs;
+
+ ASSERT3U(db->db_level, >, 0);
+ DB_DNODE_ENTER(db);
+ dn = DB_DNODE(db);
+ epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
+
+ /* Determine if all our children are holes */
+ for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
+ if (!BP_IS_HOLE(bp))
+ break;
+ }
+
+ /*
+ * If all the children are holes, then zero them all out so that
+ * we may get compressed away.
+ */
+ if (i == 1 << epbs) {
+ /* didn't find any non-holes */
+ bzero(db->db.db_data, db->db.db_size);
+ }
+ DB_DNODE_EXIT(db);
}
/*
@@ -3117,6 +3218,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
zio_t *zio;
int wp_flag = 0;
+ ASSERT(dmu_tx_is_syncing(tx));
+
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
os = dn->dn_objset;
@@ -3175,6 +3278,14 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
DB_DNODE_EXIT(db);
+ /*
+ * We copy the blkptr now (rather than when we instantiate the dirty
+ * record), because its value can change between open context and
+ * syncing context. We do not need to hold dn_struct_rwlock to read
+ * db_blkptr because we are in syncing context.
+ */
+ dr->dr_bp_copy = *db->db_blkptr;
+
if (db->db_level == 0 &&
dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
/*
@@ -3184,8 +3295,9 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
void *contents = (data != NULL) ? data->b_data : NULL;
dr->dr_zio = zio_write(zio, os->os_spa, txg,
- db->db_blkptr, contents, db->db.db_size, &zp,
- dbuf_write_override_ready, NULL, dbuf_write_override_done,
+ &dr->dr_bp_copy, contents, db->db.db_size, &zp,
+ dbuf_write_override_ready, NULL, NULL,
+ dbuf_write_override_done,
dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
mutex_enter(&db->db_mtx);
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
@@ -3196,15 +3308,27 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF ||
zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
dr->dr_zio = zio_write(zio, os->os_spa, txg,
- db->db_blkptr, NULL, db->db.db_size, &zp,
- dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db,
+ &dr->dr_bp_copy, NULL, db->db.db_size, &zp,
+ dbuf_write_nofill_ready, NULL, NULL,
+ dbuf_write_nofill_done, db,
ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
} else {
ASSERT(arc_released(data));
+
+ /*
+ * For indirect blocks, we want to setup the children
+ * ready callback so that we can properly handle an indirect
+ * block that only contains holes.
+ */
+ arc_done_func_t *children_ready_cb = NULL;
+ if (db->db_level != 0)
+ children_ready_cb = dbuf_write_children_ready;
+
dr->dr_zio = arc_write(zio, os->os_spa, txg,
- db->db_blkptr, data, DBUF_IS_L2CACHEABLE(db),
+ &dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
DBUF_IS_L2COMPRESSIBLE(db), &zp, dbuf_write_ready,
+ children_ready_cb,
dbuf_write_physdone, dbuf_write_done, db,
ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
index d8d186c..9ce9665 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
*/
/* Copyright (c) 2013 by Saso Kiselkov. All rights reserved. */
/* Copyright (c) 2013, Joyent, Inc. All rights reserved. */
@@ -1590,10 +1590,11 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
dsa->dsa_zgd = zgd;
dsa->dsa_tx = tx;
- zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
- zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp,
- dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done, dsa,
- ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb));
+ zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx),
+ zgd->zgd_bp, zgd->zgd_db->db_data, zgd->zgd_db->db_size,
+ zp, dmu_sync_late_arrival_ready, NULL,
+ NULL, dmu_sync_late_arrival_done, dsa, ZIO_PRIORITY_SYNC_WRITE,
+ ZIO_FLAG_CANFAIL, zb));
return (0);
}
@@ -1746,8 +1747,8 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
zio_nowait(arc_write(pio, os->os_spa, txg,
bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready,
- NULL, dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE,
- ZIO_FLAG_CANFAIL, &zb));
+ NULL, NULL, dmu_sync_done, dsa,
+ ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
return (0);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
index 367dbcb..82c5854 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -1116,9 +1116,9 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
zio = arc_write(pio, os->os_spa, tx->tx_txg,
os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
- DMU_OS_IS_L2COMPRESSIBLE(os), &zp, dmu_objset_write_ready,
- NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE,
- ZIO_FLAG_MUSTSUCCEED, &zb);
+ DMU_OS_IS_L2COMPRESSIBLE(os),
+ &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
+ os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
/*
* Sync special dnodes - the parent IO for the sync is the root block
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
index 9aee513..7179c41 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -60,20 +60,14 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset,
dn->dn_object, dn->dn_phys->dn_nlevels);
- /* check for existing blkptrs in the dnode */
- for (i = 0; i < nblkptr; i++)
- if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i]))
- break;
- if (i != nblkptr) {
- /* transfer dnode's block pointers to new indirect block */
- (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
- ASSERT(db->db.db_data);
- ASSERT(arc_released(db->db_buf));
- ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
- bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
- sizeof (blkptr_t) * nblkptr);
- arc_buf_freeze(db->db_buf);
- }
+ /* transfer dnode's block pointers to new indirect block */
+ (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
+ ASSERT(db->db.db_data);
+ ASSERT(arc_released(db->db_buf));
+ ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
+ bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
+ sizeof (blkptr_t) * nblkptr);
+ arc_buf_freeze(db->db_buf);
/* set dbuf's parent pointers to new indirect buf */
for (i = 0; i < nblkptr; i++) {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
index a157dfe..9880de9 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
@@ -56,7 +56,8 @@ typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *,
static scan_cb_t dsl_scan_scrub_cb;
static void dsl_scan_cancel_sync(void *, dmu_tx_t *);
-static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
+static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *);
+static boolean_t dsl_scan_restarting(dsl_scan_t *, dmu_tx_t *);
unsigned int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */
unsigned int zfs_resilver_delay = 2; /* number of ticks to delay resilver */
@@ -329,8 +330,15 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
else
scn->scn_phys.scn_state = DSS_CANCELED;
- spa_history_log_internal(spa, "scan done", tx,
- "complete=%u", complete);
+ if (dsl_scan_restarting(scn, tx))
+ spa_history_log_internal(spa, "scan aborted, restarting", tx,
+ "errors=%llu", spa_get_errlog_size(spa));
+ else if (!complete)
+ spa_history_log_internal(spa, "scan cancelled", tx,
+ "errors=%llu", spa_get_errlog_size(spa));
+ else
+ spa_history_log_internal(spa, "scan done", tx,
+ "errors=%llu", spa_get_errlog_size(spa));
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
mutex_enter(&spa->spa_scrub_lock);
@@ -1485,8 +1493,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
* that we can restart an old-style scan while the pool is being
* imported (see dsl_scan_init).
*/
- if (scn->scn_restart_txg != 0 &&
- scn->scn_restart_txg <= tx->tx_txg) {
+ if (dsl_scan_restarting(scn, tx)) {
pool_scan_func_t func = POOL_SCAN_SCRUB;
dsl_scan_done(scn, B_FALSE, tx);
if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
@@ -1913,3 +1920,10 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_NONE));
}
+
+static boolean_t
+dsl_scan_restarting(dsl_scan_t *scn, dmu_tx_t *tx)
+{
+ return (scn->scn_restart_txg != 0 &&
+ scn->scn_restart_txg <= tx->tx_txg);
+}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
index 04a80f7..714c528 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
@@ -141,9 +141,11 @@ int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress,
- const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *physdone,
- arc_done_func_t *done, void *priv, zio_priority_t priority,
- int zio_flags, const zbookmark_phys_t *zb);
+ const zio_prop_t *zp,
+ arc_done_func_t *ready, arc_done_func_t *child_ready,
+ arc_done_func_t *physdone, arc_done_func_t *done,
+ void *priv, zio_priority_t priority, int zio_flags,
+ const zbookmark_phys_t *zb);
void arc_freed(spa_t *spa, const blkptr_t *bp);
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *priv);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
index 233d541..4964126 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
@@ -121,6 +121,9 @@ typedef struct dbuf_dirty_record {
/* How much space was changed to dsl_pool_dirty_space() for this? */
unsigned int dr_accounted;
+ /* A copy of the bp that points to us */
+ blkptr_t dr_bp_copy;
+
union dirty_types {
struct dirty_indirect {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
index 56c821a..fc7be5b 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
@@ -436,6 +436,7 @@ struct zio {
/* Callback info */
zio_done_func_t *io_ready;
+ zio_done_func_t *io_children_ready;
zio_done_func_t *io_physdone;
zio_done_func_t *io_done;
void *io_private;
@@ -503,9 +504,10 @@ extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, const zio_prop_t *zp,
- zio_done_func_t *ready, zio_done_func_t *physdone, zio_done_func_t *done,
- void *priv,
- zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb);
+ zio_done_func_t *ready, zio_done_func_t *children_ready,
+ zio_done_func_t *physdone, zio_done_func_t *done,
+ void *priv, zio_priority_t priority, enum zio_flag flags,
+ const zbookmark_phys_t *zb);
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, zio_done_func_t *done, void *priv,
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
index 9bb6091..8523bc4 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
@@ -22,7 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/
@@ -847,72 +847,46 @@ zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
}
-int
-zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
+/*
+ * Associate this zfsvfs with the given objset, which must be owned.
+ * This will cache a bunch of on-disk state from the objset in the
+ * zfsvfs.
+ */
+static int
+zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
{
- objset_t *os;
- zfsvfs_t *zfsvfs;
- uint64_t zval;
- int i, error;
- uint64_t sa_obj;
-
- /*
- * XXX: Fix struct statfs so this isn't necessary!
- *
- * The 'osname' is used as the filesystem's special node, which means
- * it must fit in statfs.f_mntfromname, or else it can't be
- * enumerated, so libzfs_mnttab_find() returns NULL, which causes
- * 'zfs unmount' to think it's not mounted when it is.
- */
- if (strlen(osname) >= MNAMELEN)
- return (SET_ERROR(ENAMETOOLONG));
-
- zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
-
- /*
- * We claim to always be readonly so we can open snapshots;
- * other ZPL code will prevent us from writing to snapshots.
- */
- error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
- if (error) {
- kmem_free(zfsvfs, sizeof (zfsvfs_t));
- return (error);
- }
+ int error;
+ uint64_t val;
- /*
- * Initialize the zfs-specific filesystem structure.
- * Should probably make this a kmem cache, shuffle fields,
- * and just bzero up to z_hold_mtx[].
- */
- zfsvfs->z_vfs = NULL;
- zfsvfs->z_parent = zfsvfs;
zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
zfsvfs->z_os = os;
error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
- if (error) {
- goto out;
- } else if (zfsvfs->z_version >
+ if (error != 0)
+ return (error);
+ if (zfsvfs->z_version >
zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
(void) printf("Can't mount a version %lld file system "
"on a version %lld pool\n. Pool must be upgraded to mount "
"this file system.", (u_longlong_t)zfsvfs->z_version,
(u_longlong_t)spa_version(dmu_objset_spa(os)));
- error = SET_ERROR(ENOTSUP);
- goto out;
+ return (SET_ERROR(ENOTSUP));
}
- if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
- goto out;
- zfsvfs->z_norm = (int)zval;
+ error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
+ if (error != 0)
+ return (error);
+ zfsvfs->z_norm = (int)val;
- if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
- goto out;
- zfsvfs->z_utf8 = (zval != 0);
+ error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
+ if (error != 0)
+ return (error);
+ zfsvfs->z_utf8 = (val != 0);
- if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
- goto out;
- zfsvfs->z_case = (uint_t)zval;
+ error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
+ if (error != 0)
+ return (error);
+ zfsvfs->z_case = (uint_t)val;
/*
* Fold case on file systems that are always or sometimes case
@@ -925,60 +899,99 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
+ uint64_t sa_obj = 0;
if (zfsvfs->z_use_sa) {
/* should either have both of these objects or none */
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
&sa_obj);
- if (error)
- goto out;
- } else {
- /*
- * Pre SA versions file systems should never touch
- * either the attribute registration or layout objects.
- */
- sa_obj = 0;
+ if (error != 0)
+ return (error);
}
error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
&zfsvfs->z_attr_table);
- if (error)
- goto out;
+ if (error != 0)
+ return (error);
if (zfsvfs->z_version >= ZPL_VERSION_SA)
sa_register_update_callback(os, zfs_sa_upgrade);
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
&zfsvfs->z_root);
- if (error)
- goto out;
+ if (error != 0)
+ return (error);
ASSERT(zfsvfs->z_root != 0);
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
&zfsvfs->z_unlinkedobj);
- if (error)
- goto out;
+ if (error != 0)
+ return (error);
error = zap_lookup(os, MASTER_NODE_OBJ,
zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
8, 1, &zfsvfs->z_userquota_obj);
- if (error && error != ENOENT)
- goto out;
+ if (error == ENOENT)
+ zfsvfs->z_userquota_obj = 0;
+ else if (error != 0)
+ return (error);
error = zap_lookup(os, MASTER_NODE_OBJ,
zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
8, 1, &zfsvfs->z_groupquota_obj);
- if (error && error != ENOENT)
- goto out;
+ if (error == ENOENT)
+ zfsvfs->z_groupquota_obj = 0;
+ else if (error != 0)
+ return (error);
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
&zfsvfs->z_fuid_obj);
- if (error && error != ENOENT)
- goto out;
+ if (error == ENOENT)
+ zfsvfs->z_fuid_obj = 0;
+ else if (error != 0)
+ return (error);
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
&zfsvfs->z_shares_dir);
- if (error && error != ENOENT)
- goto out;
+ if (error == ENOENT)
+ zfsvfs->z_shares_dir = 0;
+ else if (error != 0)
+ return (error);
+
+ return (0);
+}
+
+int
+zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
+{
+ objset_t *os;
+ zfsvfs_t *zfsvfs;
+ int error;
+
+ /*
+ * XXX: Fix struct statfs so this isn't necessary!
+ *
+ * The 'osname' is used as the filesystem's special node, which means
+ * it must fit in statfs.f_mntfromname, or else it can't be
+ * enumerated, so libzfs_mnttab_find() returns NULL, which causes
+ * 'zfs unmount' to think it's not mounted when it is.
+ */
+ if (strlen(osname) >= MNAMELEN)
+ return (SET_ERROR(ENAMETOOLONG));
+
+ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
+
+ /*
+ * We claim to always be readonly so we can open snapshots;
+ * other ZPL code will prevent us from writing to snapshots.
+ */
+ error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
+ if (error) {
+ kmem_free(zfsvfs, sizeof (zfsvfs_t));
+ return (error);
+ }
+
+ zfsvfs->z_vfs = NULL;
+ zfsvfs->z_parent = zfsvfs;
mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -987,17 +1000,19 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
- for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
+ for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
+ error = zfsvfs_init(zfsvfs, os);
+ if (error != 0) {
+ dmu_objset_disown(os, zfsvfs);
+ *zfvp = NULL;
+ kmem_free(zfsvfs, sizeof (zfsvfs_t));
+ return (error);
+ }
+
*zfvp = zfsvfs;
return (0);
-
-out:
- dmu_objset_disown(os, zfsvfs);
- *zfvp = NULL;
- kmem_free(zfsvfs, sizeof (zfsvfs_t));
- return (error);
}
static int
@@ -2199,7 +2214,6 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
{
int err;
znode_t *zp;
- uint64_t sa_obj = 0;
ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
@@ -2208,35 +2222,16 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
* We already own this, so just hold and rele it to update the
* objset_t, as the one we had before may have been evicted.
*/
- VERIFY0(dmu_objset_hold(osname, zfsvfs, &zfsvfs->z_os));
- VERIFY3P(zfsvfs->z_os->os_dsl_dataset->ds_owner, ==, zfsvfs);
- VERIFY(dsl_dataset_long_held(zfsvfs->z_os->os_dsl_dataset));
- dmu_objset_rele(zfsvfs->z_os, zfsvfs);
-
- /*
- * Make sure version hasn't changed
- */
-
- err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION,
- &zfsvfs->z_version);
-
- if (err)
- goto bail;
-
- err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
- ZFS_SA_ATTRS, 8, 1, &sa_obj);
-
- if (err && zfsvfs->z_version >= ZPL_VERSION_SA)
- goto bail;
+ objset_t *os;
+ VERIFY0(dmu_objset_hold(osname, zfsvfs, &os));
+ VERIFY3P(os->os_dsl_dataset->ds_owner, ==, zfsvfs);
+ VERIFY(dsl_dataset_long_held(os->os_dsl_dataset));
+ dmu_objset_rele(os, zfsvfs);
- if ((err = sa_setup(zfsvfs->z_os, sa_obj,
- zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0)
+ err = zfsvfs_init(zfsvfs, os);
+ if (err != 0)
goto bail;
- if (zfsvfs->z_version >= ZPL_VERSION_SA)
- sa_register_update_callback(zfsvfs->z_os,
- zfs_sa_upgrade);
-
VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
zfs_set_fuid_feature(zfsvfs);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
index b0f11ac..17179f6 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
@@ -2438,6 +2438,7 @@ top:
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
zfs_sa_upgrade_txholds(tx, zp);
zfs_sa_upgrade_txholds(tx, dzp);
+ dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
rw_exit(&zp->z_parent_lock);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
index 616aa79..98fd449 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/
@@ -771,9 +771,10 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
zio_t *
zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, const zio_prop_t *zp,
- zio_done_func_t *ready, zio_done_func_t *physdone, zio_done_func_t *done,
- void *private,
- zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb)
+ zio_done_func_t *ready, zio_done_func_t *children_ready,
+ zio_done_func_t *physdone, zio_done_func_t *done,
+ void *private, zio_priority_t priority, enum zio_flag flags,
+ const zbookmark_phys_t *zb)
{
zio_t *zio;
@@ -792,6 +793,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE);
zio->io_ready = ready;
+ zio->io_children_ready = children_ready;
zio->io_physdone = physdone;
zio->io_prop = *zp;
@@ -1189,6 +1191,16 @@ zio_write_bp_init(zio_t *zio)
if (!IO_IS_ALLOCATING(zio))
return (ZIO_PIPELINE_CONTINUE);
+ if (zio->io_children_ready != NULL) {
+ /*
+ * Now that all our children are ready, run the callback
+ * associated with this zio in case it wants to modify the
+ * data to be written.
+ */
+ ASSERT3U(zp->zp_level, >, 0);
+ zio->io_children_ready(zio);
+ }
+
ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
if (zio->io_bp_override) {
@@ -2118,9 +2130,9 @@ zio_write_gang_block(zio_t *pio)
zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
(char *)pio->io_data + (pio->io_size - resid), lsize, &zp,
- zio_write_gang_member_ready, NULL, NULL, &gn->gn_child[g],
- pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
- &pio->io_bookmark));
+ zio_write_gang_member_ready, NULL, NULL, NULL,
+ &gn->gn_child[g], pio->io_priority,
+ ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark));
}
/*
@@ -2509,7 +2521,7 @@ zio_ddt_write(zio_t *zio)
dio = zio_write(zio, spa, txg, bp, zio->io_orig_data,
zio->io_orig_size, &czp, NULL, NULL,
- zio_ddt_ditto_write_done, dde, zio->io_priority,
+ NULL, zio_ddt_ditto_write_done, dde, zio->io_priority,
ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
zio_push_transform(dio, zio->io_data, zio->io_size, 0, NULL);
@@ -2530,7 +2542,8 @@ zio_ddt_write(zio_t *zio)
ddt_phys_addref(ddp);
} else {
cio = zio_write(zio, spa, txg, bp, zio->io_orig_data,
- zio->io_orig_size, zp, zio_ddt_child_write_ready, NULL,
+ zio->io_orig_size, zp,
+ zio_ddt_child_write_ready, NULL, NULL,
zio_ddt_child_write_done, dde, zio->io_priority,
ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
index 4a8029b..03d9c15 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
@@ -3114,6 +3114,13 @@ mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
/* don't allow more IOCTLs */
priv->gone = 1;
+ /*
+ * Clear the device description to avoid use after free,
+ * because the bsddev is not destroyed when this module is
+ * unloaded:
+ */
+ device_set_desc(mdev->pdev->dev.bsddev, NULL);
+
/* XXX wait a bit to allow IOCTL handlers to complete */
pause("W", hz);
diff --git a/sys/dev/usb/input/ukbd.c b/sys/dev/usb/input/ukbd.c
index 3ee6b8bf..506ee45 100644
--- a/sys/dev/usb/input/ukbd.c
+++ b/sys/dev/usb/input/ukbd.c
@@ -201,6 +201,7 @@ struct ukbd_softc {
int sc_mode; /* input mode (K_XLATE,K_RAW,K_CODE) */
int sc_state; /* shift/lock key state */
int sc_accents; /* accent key index (> 0) */
+ int sc_polling; /* polling recursion count */
int sc_led_size;
int sc_kbd_size;
@@ -1986,7 +1987,16 @@ ukbd_poll(keyboard_t *kbd, int on)
struct ukbd_softc *sc = kbd->kb_data;
UKBD_LOCK();
- if (on) {
+ /*
+ * Keep a reference count on polling to allow recursive
+ * cngrab() during a panic for example.
+ */
+ if (on)
+ sc->sc_polling++;
+ else
+ sc->sc_polling--;
+
+ if (sc->sc_polling != 0) {
sc->sc_flags |= UKBD_FLAG_POLLING;
sc->sc_poll_thread = curthread;
} else {
diff --git a/sys/dev/usb/usbdevs b/sys/dev/usb/usbdevs
index a53e15b..84db769 100644
--- a/sys/dev/usb/usbdevs
+++ b/sys/dev/usb/usbdevs
@@ -2726,6 +2726,7 @@ product LOGITECH BB13 0xc401 USB-PS/2 Trackball
product LOGITECH RK53 0xc501 Cordless mouse
product LOGITECH RB6 0xc503 Cordless keyboard
product LOGITECH MX700 0xc506 Cordless optical mouse
+product LOGITECH UNIFYING 0xc52b Logitech Unifying Receiver
product LOGITECH QUICKCAMPRO2 0xd001 QuickCam Pro
/* Logitec Corp. products */
diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c
index a5e6383..8474edb 100644
--- a/sys/dev/virtio/network/if_vtnet.c
+++ b/sys/dev/virtio/network/if_vtnet.c
@@ -228,18 +228,32 @@ static void vtnet_disable_interrupts(struct vtnet_softc *);
static int vtnet_tunable_int(struct vtnet_softc *, const char *, int);
/* Tunables. */
+static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters");
static int vtnet_csum_disable = 0;
TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
+SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
+ &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
static int vtnet_tso_disable = 0;
TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
+SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable,
+ 0, "Disables TCP Segmentation Offload");
static int vtnet_lro_disable = 0;
TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
+SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable,
+ 0, "Disables TCP Large Receive Offload");
static int vtnet_mq_disable = 0;
TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
-static int vtnet_mq_max_pairs = 0;
+SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable,
+ 0, "Disables Multi Queue support");
+static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
+SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
+ &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs");
static int vtnet_rx_process_limit = 512;
TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
+SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
+ &vtnet_rx_process_limit, 0,
+ "Limits the number RX segments processed in a single pass");
static uma_zone_t vtnet_tx_header_zone;
@@ -589,7 +603,6 @@ static void
vtnet_setup_features(struct vtnet_softc *sc)
{
device_t dev;
- int max_pairs, max;
dev = sc->vtnet_dev;
@@ -638,32 +651,31 @@ vtnet_setup_features(struct vtnet_softc *sc)
if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
- max_pairs = virtio_read_dev_config_2(dev,
+ sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
offsetof(struct virtio_net_config, max_virtqueue_pairs));
- if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
- max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
- max_pairs = 1;
} else
- max_pairs = 1;
+ sc->vtnet_max_vq_pairs = 1;
- if (max_pairs > 1) {
+ if (sc->vtnet_max_vq_pairs > 1) {
/*
- * Limit the maximum number of queue pairs to the number of
- * CPUs or the configured maximum. The actual number of
- * queues that get used may be less.
+ * Limit the maximum number of queue pairs to the lower of
+ * the number of CPUs and the configured maximum.
+ * The actual number of queues that get used may be less.
*/
+ int max;
+
max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
- if (max > 0 && max_pairs > max)
- max_pairs = max;
- if (max_pairs > mp_ncpus)
- max_pairs = mp_ncpus;
- if (max_pairs > VTNET_MAX_QUEUE_PAIRS)
- max_pairs = VTNET_MAX_QUEUE_PAIRS;
- if (max_pairs > 1)
- sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
+ if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) {
+ if (max > mp_ncpus)
+ max = mp_ncpus;
+ if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
+ max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX;
+ if (max > 1) {
+ sc->vtnet_requested_vq_pairs = max;
+ sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
+ }
+ }
}
-
- sc->vtnet_max_vq_pairs = max_pairs;
}
static int
@@ -2989,13 +3001,11 @@ vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
dev = sc->vtnet_dev;
if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
- MPASS(sc->vtnet_max_vq_pairs == 1);
sc->vtnet_act_vq_pairs = 1;
return;
}
- /* BMV: Just use the maximum configured for now. */
- npairs = sc->vtnet_max_vq_pairs;
+ npairs = sc->vtnet_requested_vq_pairs;
if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
device_printf(dev,
@@ -3851,6 +3861,9 @@ vtnet_setup_sysctl(struct vtnet_softc *sc)
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
"Maximum number of supported virtqueue pairs");
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs",
+ CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0,
+ "Requested number of virtqueue pairs");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
"Number of active virtqueue pairs");
diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h
index f89f6b1..15436d9 100644
--- a/sys/dev/virtio/network/if_vtnetvar.h
+++ b/sys/dev/virtio/network/if_vtnetvar.h
@@ -155,6 +155,7 @@ struct vtnet_softc {
int vtnet_if_flags;
int vtnet_act_vq_pairs;
int vtnet_max_vq_pairs;
+ int vtnet_requested_vq_pairs;
struct virtqueue *vtnet_ctrl_vq;
struct vtnet_mac_filter *vtnet_mac_filter;
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index f06958e..9781b2c 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -818,10 +818,13 @@ tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie,
goto out;
}
- MPASS((cookie & TMPFS_DIRCOOKIE_MASK) == cookie);
- dekey.td_hash = cookie;
- /* Recover if direntry for cookie was removed */
- de = RB_NFIND(tmpfs_dir, dirhead, &dekey);
+ if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) {
+ de = NULL;
+ } else {
+ dekey.td_hash = cookie;
+ /* Recover if direntry for cookie was removed */
+ de = RB_NFIND(tmpfs_dir, dirhead, &dekey);
+ }
dc->tdc_tree = de;
dc->tdc_current = de;
if (de != NULL && tmpfs_dirent_duphead(de)) {
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 2fcf690..a186ff5 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -534,8 +534,8 @@ trap(struct trapframe *frame)
case T_DNA:
#ifdef DEV_NPX
- KASSERT(!PCB_USER_FPU(td->td_pcb),
- ("Unregistered use of FPU in kernel"));
+ if (PCB_USER_FPU(td->td_pcb))
+ panic("Unregistered use of FPU in kernel");
if (npxdna())
goto out;
#endif
diff --git a/sys/i386/linux/linux_ptrace.c b/sys/i386/linux/linux_ptrace.c
index 2925e6b..fbe66b4 100644
--- a/sys/i386/linux/linux_ptrace.c
+++ b/sys/i386/linux/linux_ptrace.c
@@ -69,7 +69,7 @@ __FBSDID("$FreeBSD$");
#define PTRACE_ATTACH 16
#define PTRACE_DETACH 17
-#define PTRACE_SYSCALL 24
+#define LINUX_PTRACE_SYSCALL 24
#define PTRACE_GETREGS 12
#define PTRACE_SETREGS 13
@@ -473,7 +473,7 @@ linux_ptrace(struct thread *td, struct linux_ptrace_args *uap)
break;
}
- case PTRACE_SYSCALL:
+ case LINUX_PTRACE_SYSCALL:
/* fall through */
default:
printf("linux: ptrace(%u, ...) not implemented\n",
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 8357ab8..6b34066 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/pioctl.h>
+#include <sys/ptrace.h>
#include <sys/namei.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
@@ -899,7 +900,8 @@ exec_fail_dealloc:
if (error == 0) {
PROC_LOCK(p);
- td->td_dbgflags |= TDB_EXEC;
+ if (p->p_ptevents & PTRACE_EXEC)
+ td->td_dbgflags |= TDB_EXEC;
PROC_UNLOCK(p);
/*
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index f9244af..c362530 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -336,6 +336,7 @@ exit1(struct thread *td, int rv)
rv = p->p_xstat; /* Event handler could change exit status */
stopprofclock(p);
p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE);
+ p->p_ptevents = 0;
/*
* Stop the real interval timer. If the handler is currently
@@ -519,8 +520,12 @@ exit1(struct thread *td, int rv)
*/
clear_orphan(q);
q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);
- FOREACH_THREAD_IN_PROC(q, tdt)
- tdt->td_dbgflags &= ~TDB_SUSPEND;
+ q->p_flag2 &= ~P2_PTRACE_FSTP;
+ q->p_ptevents = 0;
+ FOREACH_THREAD_IN_PROC(q, tdt) {
+ tdt->td_dbgflags &= ~(TDB_SUSPEND | TDB_XSIG |
+ TDB_FSTP);
+ }
kern_psignal(q, SIGKILL);
}
PROC_UNLOCK(q);
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index a84b619..4d96840 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -411,6 +411,7 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2,
__rangeof(struct proc, p_startzero, p_endzero));
p2->p_treeflag = 0;
p2->p_filemon = NULL;
+ p2->p_ptevents = 0;
/* Tell the prison that we exist. */
prison_proc_hold(p2->p_ucred->cr_prison);
@@ -710,8 +711,7 @@ do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2,
if ((flags & RFMEM) == 0 && dtrace_fasttrap_fork)
dtrace_fasttrap_fork(p1, p2);
#endif
- if ((p1->p_flag & (P_TRACED | P_FOLLOWFORK)) == (P_TRACED |
- P_FOLLOWFORK)) {
+ if (p1->p_ptevents & PTRACE_FORK) {
/*
* Arrange for debugger to receive the fork event.
*
@@ -1057,21 +1057,19 @@ fork_return(struct thread *td, struct trapframe *frame)
if (td->td_dbgflags & TDB_STOPATFORK) {
sx_xlock(&proctree_lock);
PROC_LOCK(p);
- if ((p->p_pptr->p_flag & (P_TRACED | P_FOLLOWFORK)) ==
- (P_TRACED | P_FOLLOWFORK)) {
+ if (p->p_pptr->p_ptevents & PTRACE_FORK) {
/*
* If debugger still wants auto-attach for the
* parent's children, do it now.
*/
dbg = p->p_pptr->p_pptr;
- p->p_flag |= P_TRACED;
- p->p_oppid = p->p_pptr->p_pid;
+ proc_set_traced(p);
CTR2(KTR_PTRACE,
"fork_return: attaching to new child pid %d: oppid %d",
p->p_pid, p->p_oppid);
proc_reparent(p, dbg);
sx_xunlock(&proctree_lock);
- td->td_dbgflags |= TDB_CHILD | TDB_SCX;
+ td->td_dbgflags |= TDB_CHILD | TDB_SCX | TDB_FSTP;
ptracestop(td, SIGSTOP);
td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX);
} else {
@@ -1083,7 +1081,7 @@ fork_return(struct thread *td, struct trapframe *frame)
cv_broadcast(&p->p_dbgwait);
}
PROC_UNLOCK(p);
- } else if (p->p_flag & P_TRACED) {
+ } else if (p->p_flag & P_TRACED || td->td_dbgflags & TDB_BORN) {
/*
* This is the start of a new thread in a traced
* process. Report a system call exit event.
@@ -1091,9 +1089,10 @@ fork_return(struct thread *td, struct trapframe *frame)
PROC_LOCK(p);
td->td_dbgflags |= TDB_SCX;
_STOPEVENT(p, S_SCX, td->td_dbg_sc_code);
- if ((p->p_stops & S_PT_SCX) != 0)
+ if ((p->p_ptevents & PTRACE_SCX) != 0 ||
+ (td->td_dbgflags & TDB_BORN) != 0)
ptracestop(td, SIGTRAP);
- td->td_dbgflags &= ~TDB_SCX;
+ td->td_dbgflags &= ~(TDB_SCX | TDB_BORN);
PROC_UNLOCK(p);
}
diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c
index 70d95fa..fb46025 100644
--- a/sys/kern/kern_kthread.c
+++ b/sys/kern/kern_kthread.c
@@ -322,11 +322,13 @@ void
kthread_exit(void)
{
struct proc *p;
+ struct thread *td;
- p = curthread->td_proc;
+ td = curthread;
+ p = td->td_proc;
/* A module may be waiting for us to exit. */
- wakeup(curthread);
+ wakeup(td);
/*
* The last exiting thread in a kernel process must tear down
@@ -339,9 +341,10 @@ kthread_exit(void)
rw_wunlock(&tidhash_lock);
kproc_exit(0);
}
- LIST_REMOVE(curthread, td_hash);
+ LIST_REMOVE(td, td_hash);
rw_wunlock(&tidhash_lock);
- umtx_thread_exit(curthread);
+ umtx_thread_exit(td);
+ tdsigcleanup(td);
PROC_SLOCK(p);
thread_exit();
}
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 29783f8..2c37d76 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -2175,9 +2175,10 @@ tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)
!((prop & SA_CONT) && (p->p_flag & P_STOPPED_SIG)))
return (ret);
/*
- * SIGKILL: Remove procfs STOPEVENTs.
+ * SIGKILL: Remove procfs STOPEVENTs and ptrace events.
*/
if (sig == SIGKILL) {
+ p->p_ptevents = 0;
/* from procfs_ioctl.c: PIOCBIC */
p->p_stops = 0;
/* from procfs_ioctl.c: PIOCCONT */
@@ -2488,19 +2489,36 @@ ptracestop(struct thread *td, int sig)
td->td_tid, p->p_pid, td->td_dbgflags, sig);
PROC_SLOCK(p);
while ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_XSIG)) {
- if (p->p_flag & P_SINGLE_EXIT) {
+ if (p->p_flag & P_SINGLE_EXIT &&
+ !(td->td_dbgflags & TDB_EXIT)) {
+ /*
+ * Ignore ptrace stops except for thread exit
+ * events when the process exits.
+ */
td->td_dbgflags &= ~TDB_XSIG;
PROC_SUNLOCK(p);
return (sig);
}
+
/*
- * Just make wait() to work, the last stopped thread
- * will win.
+ * Make wait(2) work. Ensure that right after the
+ * attach, the thread which was decided to become the
+ * leader of attach gets reported to the waiter.
+ * Otherwise, just avoid overwriting another thread's
+ * assignment to p_xthread. If another thread has
+ * already set p_xthread, the current thread will get
+ * a chance to report itself upon the next iteration.
*/
- p->p_xstat = sig;
- p->p_xthread = td;
- p->p_flag |= (P_STOPPED_SIG|P_STOPPED_TRACE);
- sig_suspend_threads(td, p, 0);
+ if ((td->td_dbgflags & TDB_FSTP) != 0 ||
+ ((p->p_flag & P2_PTRACE_FSTP) == 0 &&
+ p->p_xthread == NULL)) {
+ p->p_xstat = sig;
+ p->p_xthread = td;
+ td->td_dbgflags &= ~TDB_FSTP;
+ p->p_flag2 &= ~P2_PTRACE_FSTP;
+ p->p_flag |= P_STOPPED_SIG | P_STOPPED_TRACE;
+ sig_suspend_threads(td, p, 0);
+ }
if ((td->td_dbgflags & TDB_STOPATFORK) != 0) {
td->td_dbgflags &= ~TDB_STOPATFORK;
cv_broadcast(&p->p_dbgwait);
@@ -2651,7 +2669,20 @@ issignal(struct thread *td)
SIG_STOPSIGMASK(sigpending);
if (SIGISEMPTY(sigpending)) /* no signal to send */
return (0);
- sig = sig_ffs(&sigpending);
+ if ((p->p_flag & (P_TRACED | P_PPTRACE)) == P_TRACED &&
+ (p->p_flag2 & P2_PTRACE_FSTP) != 0 &&
+ SIGISMEMBER(sigpending, SIGSTOP)) {
+ /*
+ * If debugger just attached, always consume
+ * SIGSTOP from ptrace(PT_ATTACH) first, to
+ * execute the debugger attach ritual in
+ * order.
+ */
+ sig = SIGSTOP;
+ td->td_dbgflags |= TDB_FSTP;
+ } else {
+ sig = sig_ffs(&sigpending);
+ }
if (p->p_stops & S_SIG) {
mtx_unlock(&ps->ps_mtx);
@@ -2668,7 +2699,7 @@ issignal(struct thread *td)
sigqueue_delete(&p->p_sigqueue, sig);
continue;
}
- if (p->p_flag & P_TRACED && (p->p_flag & P_PPTRACE) == 0) {
+ if ((p->p_flag & (P_TRACED | P_PPTRACE)) == P_TRACED) {
/*
* If traced, always stop.
* Remove old signal from queue before the stop.
@@ -2766,6 +2797,8 @@ issignal(struct thread *td)
mtx_unlock(&ps->ps_mtx);
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
&p->p_mtx.lock_object, "Catching SIGSTOP");
+ sigqueue_delete(&td->td_sigqueue, sig);
+ sigqueue_delete(&p->p_sigqueue, sig);
p->p_flag |= P_STOPPED_SIG;
p->p_xstat = sig;
PROC_SLOCK(p);
@@ -2773,7 +2806,7 @@ issignal(struct thread *td)
thread_suspend_switch(td, p);
PROC_SUNLOCK(p);
mtx_lock(&ps->ps_mtx);
- break;
+ goto next;
} else if (prop & SA_IGNORE) {
/*
* Except for SIGCONT, shouldn't get here.
@@ -2804,6 +2837,7 @@ issignal(struct thread *td)
}
sigqueue_delete(&td->td_sigqueue, sig); /* take the signal! */
sigqueue_delete(&p->p_sigqueue, sig);
+next:;
}
/* NOTREACHED */
}
diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c
index 98965b1..b01aecb 100644
--- a/sys/kern/kern_thr.c
+++ b/sys/kern/kern_thr.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/posix4.h>
+#include <sys/ptrace.h>
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
@@ -252,6 +253,8 @@ thread_create(struct thread *td, struct rtprio *rtp,
thread_unlock(td);
if (P_SHOULDSTOP(p))
newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
+ if (p->p_ptevents & PTRACE_LWP)
+ newtd->td_dbgflags |= TDB_BORN;
PROC_UNLOCK(p);
tidhash_add(newtd);
@@ -314,29 +317,54 @@ kern_thr_exit(struct thread *td)
p = td->td_proc;
- rw_wlock(&tidhash_lock);
+ /*
+ * If all of the threads in a process call this routine to
+ * exit (e.g. all threads call pthread_exit()), exactly one
+ * thread should return to the caller to terminate the process
+ * instead of the thread.
+ *
+ * Checking p_numthreads alone is not sufficient since threads
+ * might be committed to terminating while the PROC_LOCK is
+ * dropped in either ptracestop() or while removing this thread
+ * from the tidhash. Instead, the p_pendingexits field holds
+ * the count of threads in either of those states and a thread
+ * is considered the "last" thread if all of the other threads
+ * in a process are already terminating.
+ */
PROC_LOCK(p);
-
- if (p->p_numthreads != 1) {
- racct_sub(p, RACCT_NTHR, 1);
- LIST_REMOVE(td, td_hash);
- rw_wunlock(&tidhash_lock);
- tdsigcleanup(td);
- umtx_thread_exit(td);
- PROC_SLOCK(p);
- thread_stopped(p);
- thread_exit();
- /* NOTREACHED */
+ if (p->p_numthreads == p->p_pendingexits + 1) {
+ /*
+ * Ignore attempts to shut down last thread in the
+ * proc. This will actually call _exit(2) in the
+ * usermode trampoline when it returns.
+ */
+ PROC_UNLOCK(p);
+ return (0);
}
+ p->p_pendingexits++;
+ td->td_dbgflags |= TDB_EXIT;
+ if (p->p_ptevents & PTRACE_LWP)
+ ptracestop(td, SIGTRAP);
+ PROC_UNLOCK(p);
+ tidhash_remove(td);
+ PROC_LOCK(p);
+ p->p_pendingexits--;
+
/*
- * Ignore attempts to shut down last thread in the proc. This
- * will actually call _exit(2) in the usermode trampoline when
- * it returns.
+ * The check above should prevent all other threads from this
+ * process from exiting while the PROC_LOCK is dropped, so
+ * there must be at least one other thread other than the
+ * current thread.
*/
- PROC_UNLOCK(p);
- rw_wunlock(&tidhash_lock);
- return (0);
+ KASSERT(p->p_numthreads > 1, ("too few threads"));
+ racct_sub(p, RACCT_NTHR, 1);
+ tdsigcleanup(td);
+ umtx_thread_exit(td);
+ PROC_SLOCK(p);
+ thread_stopped(p);
+ thread_exit();
+ /* NOTREACHED */
}
int
diff --git a/sys/kern/subr_syscall.c b/sys/kern/subr_syscall.c
index 1ad7dd0..f2b83f0 100644
--- a/sys/kern/subr_syscall.c
+++ b/sys/kern/subr_syscall.c
@@ -88,7 +88,7 @@ syscallenter(struct thread *td, struct syscall_args *sa)
PROC_LOCK(p);
td->td_dbg_sc_code = sa->code;
td->td_dbg_sc_narg = sa->narg;
- if (p->p_stops & S_PT_SCE)
+ if (p->p_ptevents & PTRACE_SCE)
ptracestop((td), SIGTRAP);
PROC_UNLOCK(p);
}
@@ -215,7 +215,7 @@ syscallret(struct thread *td, int error, struct syscall_args *sa __unused)
*/
if (traced &&
((td->td_dbgflags & (TDB_FORK | TDB_EXEC)) != 0 ||
- (p->p_stops & S_PT_SCX) != 0))
+ (p->p_ptevents & PTRACE_SCX) != 0))
ptracestop(td, SIGTRAP);
td->td_dbgflags &= ~(TDB_SCX | TDB_EXEC | TDB_FORK);
PROC_UNLOCK(p);
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index 96e0181..c4533ce 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -542,6 +542,7 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap)
struct ptrace_lwpinfo32 pl32;
struct ptrace_vm_entry32 pve32;
#endif
+ int ptevents;
} r;
void *addr;
int error = 0;
@@ -556,6 +557,7 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap)
AUDIT_ARG_VALUE(uap->data);
addr = &r;
switch (uap->req) {
+ case PT_GET_EVENT_MASK:
case PT_GETREGS:
case PT_GETFPREGS:
case PT_GETDBREGS:
@@ -570,6 +572,12 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap)
case PT_SETDBREGS:
error = COPYIN(uap->addr, &r.dbreg, sizeof r.dbreg);
break;
+ case PT_SET_EVENT_MASK:
+ if (uap->data != sizeof(r.ptevents))
+ error = EINVAL;
+ else
+ error = copyin(uap->addr, &r.ptevents, uap->data);
+ break;
case PT_IO:
error = COPYIN(uap->addr, &r.piod, sizeof r.piod);
break;
@@ -603,7 +611,12 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap)
case PT_GETDBREGS:
error = COPYOUT(&r.dbreg, uap->addr, sizeof r.dbreg);
break;
+ case PT_GET_EVENT_MASK:
+ /* NB: The size in uap->data is validated in kern_ptrace(). */
+ error = copyout(&r.ptevents, uap->addr, uap->data);
+ break;
case PT_LWPINFO:
+ /* NB: The size in uap->data is validated in kern_ptrace(). */
error = copyout(&r.pl, uap->addr, uap->data);
break;
}
@@ -635,6 +648,17 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap)
#define PROC_WRITE(w, t, a) proc_write_ ## w (t, a)
#endif
+void
+proc_set_traced(struct proc *p)
+{
+
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ p->p_flag |= P_TRACED;
+ p->p_flag2 |= P2_PTRACE_FSTP;
+ p->p_ptevents = PTRACE_DEFAULT;
+ p->p_oppid = p->p_pptr->p_pid;
+}
+
int
kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
{
@@ -666,6 +690,9 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
case PT_TO_SCX:
case PT_SYSCALL:
case PT_FOLLOW_FORK:
+ case PT_LWP_EVENTS:
+ case PT_GET_EVENT_MASK:
+ case PT_SET_EVENT_MASK:
case PT_DETACH:
sx_xlock(&proctree_lock);
proctree_locked = 1;
@@ -840,10 +867,9 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
switch (req) {
case PT_TRACE_ME:
/* set my trace flag and "owner" so it can read/write me */
- p->p_flag |= P_TRACED;
+ proc_set_traced(p);
if (p->p_flag & P_PPWAIT)
p->p_flag |= P_PPTRACE;
- p->p_oppid = p->p_pptr->p_pid;
CTR1(KTR_PTRACE, "PT_TRACE_ME: pid %d", p->p_pid);
break;
@@ -858,8 +884,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
* The old parent is remembered so we can put things back
* on a "detach".
*/
- p->p_flag |= P_TRACED;
- p->p_oppid = p->p_pptr->p_pid;
+ proc_set_traced(p);
if (p->p_pptr != td->td_proc) {
proc_reparent(p, td->td_proc);
}
@@ -897,14 +922,50 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
case PT_FOLLOW_FORK:
CTR3(KTR_PTRACE, "PT_FOLLOW_FORK: pid %d %s -> %s", p->p_pid,
- p->p_flag & P_FOLLOWFORK ? "enabled" : "disabled",
+ p->p_ptevents & PTRACE_FORK ? "enabled" : "disabled",
+ data ? "enabled" : "disabled");
+ if (data)
+ p->p_ptevents |= PTRACE_FORK;
+ else
+ p->p_ptevents &= ~PTRACE_FORK;
+ break;
+
+ case PT_LWP_EVENTS:
+ CTR3(KTR_PTRACE, "PT_LWP_EVENTS: pid %d %s -> %s", p->p_pid,
+ p->p_ptevents & PTRACE_LWP ? "enabled" : "disabled",
data ? "enabled" : "disabled");
if (data)
- p->p_flag |= P_FOLLOWFORK;
+ p->p_ptevents |= PTRACE_LWP;
else
- p->p_flag &= ~P_FOLLOWFORK;
+ p->p_ptevents &= ~PTRACE_LWP;
break;
+ case PT_GET_EVENT_MASK:
+ if (data != sizeof(p->p_ptevents)) {
+ error = EINVAL;
+ break;
+ }
+ CTR2(KTR_PTRACE, "PT_GET_EVENT_MASK: pid %d mask %#x", p->p_pid,
+ p->p_ptevents);
+ *(int *)addr = p->p_ptevents;
+ break;
+
+ case PT_SET_EVENT_MASK:
+ if (data != sizeof(p->p_ptevents)) {
+ error = EINVAL;
+ break;
+ }
+ tmp = *(int *)addr;
+ if ((tmp & ~(PTRACE_EXEC | PTRACE_SCE | PTRACE_SCX |
+ PTRACE_FORK | PTRACE_LWP)) != 0) {
+ error = EINVAL;
+ break;
+ }
+ CTR3(KTR_PTRACE, "PT_SET_EVENT_MASK: pid %d mask %#x -> %#x",
+ p->p_pid, p->p_ptevents, tmp);
+ p->p_ptevents = tmp;
+ break;
+
case PT_STEP:
case PT_CONTINUE:
case PT_TO_SCE:
@@ -937,24 +998,24 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
}
switch (req) {
case PT_TO_SCE:
- p->p_stops |= S_PT_SCE;
+ p->p_ptevents |= PTRACE_SCE;
CTR4(KTR_PTRACE,
- "PT_TO_SCE: pid %d, stops = %#x, PC = %#lx, sig = %d",
- p->p_pid, p->p_stops,
+ "PT_TO_SCE: pid %d, events = %#x, PC = %#lx, sig = %d",
+ p->p_pid, p->p_ptevents,
(u_long)(uintfptr_t)addr, data);
break;
case PT_TO_SCX:
- p->p_stops |= S_PT_SCX;
+ p->p_ptevents |= PTRACE_SCX;
CTR4(KTR_PTRACE,
- "PT_TO_SCX: pid %d, stops = %#x, PC = %#lx, sig = %d",
- p->p_pid, p->p_stops,
+ "PT_TO_SCX: pid %d, events = %#x, PC = %#lx, sig = %d",
+ p->p_pid, p->p_ptevents,
(u_long)(uintfptr_t)addr, data);
break;
case PT_SYSCALL:
- p->p_stops |= S_PT_SCE | S_PT_SCX;
+ p->p_ptevents |= PTRACE_SYSCALL;
CTR4(KTR_PTRACE,
- "PT_SYSCALL: pid %d, stops = %#x, PC = %#lx, sig = %d",
- p->p_pid, p->p_stops,
+ "PT_SYSCALL: pid %d, events = %#x, PC = %#lx, sig = %d",
+ p->p_pid, p->p_ptevents,
(u_long)(uintfptr_t)addr, data);
break;
case PT_CONTINUE:
@@ -973,7 +1034,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
* parent. Otherwise the debugee will be set
* as an orphan of the debugger.
*/
- p->p_flag &= ~(P_TRACED | P_WAITED | P_FOLLOWFORK);
+ p->p_flag &= ~(P_TRACED | P_WAITED);
if (p->p_oppid != p->p_pptr->p_pid) {
PROC_LOCK(p->p_pptr);
sigqueue_take(p->p_ksi);
@@ -990,7 +1051,18 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
CTR2(KTR_PTRACE, "PT_DETACH: pid %d, sig %d",
p->p_pid, data);
p->p_oppid = 0;
- p->p_stops = 0;
+ p->p_ptevents = 0;
+ FOREACH_THREAD_IN_PROC(p, td3) {
+ if ((td3->td_dbgflags & TDB_FSTP) != 0) {
+ sigqueue_delete(&td3->td_sigqueue,
+ SIGSTOP);
+ }
+ td3->td_dbgflags &= ~(TDB_XSIG | TDB_FSTP);
+ }
+ if ((p->p_flag2 & P2_PTRACE_FSTP) != 0) {
+ sigqueue_delete(&p->p_sigqueue, SIGSTOP);
+ p->p_flag2 &= ~P2_PTRACE_FSTP;
+ }
/* should we send SIGCHLD? */
/* childproc_continued(p); */
@@ -1011,7 +1083,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
if (req == PT_DETACH) {
FOREACH_THREAD_IN_PROC(p, td3)
- td3->td_dbgflags &= ~TDB_SUSPEND;
+ td3->td_dbgflags &= ~TDB_SUSPEND;
}
/*
* unsuspend all threads, to not let a thread run,
@@ -1227,6 +1299,10 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
}
if (td2->td_dbgflags & TDB_CHILD)
pl->pl_flags |= PL_FLAG_CHILD;
+ if (td2->td_dbgflags & TDB_BORN)
+ pl->pl_flags |= PL_FLAG_BORN;
+ if (td2->td_dbgflags & TDB_EXIT)
+ pl->pl_flags |= PL_FLAG_EXITED;
pl->pl_sigmask = td2->td_sigmask;
pl->pl_siglist = td2->td_siglist;
strcpy(pl->pl_tdname, td2->td_name);
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index b00952c..2543879 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -3626,7 +3626,9 @@ pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
*m = NULL;
}
- return (chk);
+ if (chk != PF_PASS)
+ return (EACCES);
+ return (0);
}
static int
@@ -3641,7 +3643,9 @@ pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
*m = NULL;
}
- return (chk);
+ if (chk != PF_PASS)
+ return (EACCES);
+ return (0);
}
#endif
@@ -3664,7 +3668,9 @@ pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
m_freem(*m);
*m = NULL;
}
- return chk;
+ if (chk != PF_PASS)
+ return (EACCES);
+ return (0);
}
static int
@@ -3680,7 +3686,9 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir,
m_freem(*m);
*m = NULL;
}
- return chk;
+ if (chk != PF_PASS)
+ return (EACCES);
+ return (0);
}
#endif /* INET6 */
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index b2de02d..1b8bda5 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -396,6 +396,9 @@ do { \
#define TDB_STOPATFORK 0x00000080 /* Stop at the return from fork (child
only) */
#define TDB_CHILD 0x00000100 /* New child indicator for ptrace() */
+#define TDB_BORN 0x00000200 /* New LWP indicator for ptrace() */
+#define TDB_EXIT 0x00000400 /* Exiting LWP indicator for ptrace() */
+#define TDB_FSTP 0x00001000 /* The thread is PT_ATTACH leader */
/*
* "Private" flags kept in td_pflags:
@@ -551,6 +554,7 @@ struct proc {
int p_pendingcnt; /* how many signals are pending */
struct itimers *p_itimers; /* (c) POSIX interval timers. */
struct procdesc *p_procdesc; /* (e) Process descriptor, if any. */
+ int p_pendingexits; /* (c) Count of pending thread exits. */
/* End area that is zeroed on creation. */
#define p_endzero p_magic
@@ -608,6 +612,7 @@ struct proc {
our subtree. */
struct pgrp *p_pgrp; /* (c + e) Pointer to process group. */
struct filemon *p_filemon; /* (c) filemon-specific data. */
+ u_int p_ptevents; /* (c) ptrace() event mask. */
};
#define p_session p_pgrp->pg_session
@@ -635,7 +640,7 @@ struct proc {
#define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */
#define P_CONTROLT 0x00002 /* Has a controlling terminal. */
#define P_KTHREAD 0x00004 /* Kernel thread (*). */
-#define P_FOLLOWFORK 0x00008 /* Attach parent debugger to children. */
+#define P_UNUSED3 0x00008 /* --available-- */
#define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */
#define P_PROFIL 0x00020 /* Has started profiling. */
#define P_STOPPROF 0x00040 /* Has thread requesting to stop profiling. */
@@ -674,6 +679,7 @@ struct proc {
#define P2_NOTRACE 0x00000002 /* No ptrace(2) attach or coredumps. */
#define P2_NOTRACE_EXEC 0x00000004 /* Keep P2_NOPTRACE on exec(2). */
#define P2_AST_SU 0x00000008 /* Handles SU ast for kthreads. */
+#define P2_PTRACE_FSTP 0x00000010 /* SIGSTOP from PT_ATTACH not yet handled. */
/* Flags protected by proctree_lock, kept in p_treeflags. */
#define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */
@@ -926,6 +932,7 @@ void proc_linkup(struct proc *p, struct thread *td);
struct proc *proc_realparent(struct proc *child);
void proc_reap(struct thread *td, struct proc *p, int *status, int options);
void proc_reparent(struct proc *child, struct proc *newparent);
+void proc_set_traced(struct proc *p);
struct pstats *pstats_alloc(void);
void pstats_fork(struct pstats *src, struct pstats *dst);
void pstats_free(struct pstats *ps);
diff --git a/sys/sys/ptrace.h b/sys/sys/ptrace.h
index 7135767..e2b6a5f 100644
--- a/sys/sys/ptrace.h
+++ b/sys/sys/ptrace.h
@@ -64,6 +64,10 @@
#define PT_SYSCALL 22
#define PT_FOLLOW_FORK 23
+#define PT_LWP_EVENTS 24 /* report LWP birth and exit */
+
+#define PT_GET_EVENT_MASK 25 /* get mask of optional events */
+#define PT_SET_EVENT_MASK 26 /* set mask of optional events */
#define PT_GETREGS 33 /* get general-purpose registers */
#define PT_SETREGS 34 /* set general-purpose registers */
@@ -78,6 +82,16 @@
#define PT_FIRSTMACH 64 /* for machine-specific requests */
#include <machine/ptrace.h> /* machine-specific requests, if any */
+/* Events used with PT_GET_EVENT_MASK and PT_SET_EVENT_MASK */
+#define PTRACE_EXEC 0x0001
+#define PTRACE_SCE 0x0002
+#define PTRACE_SCX 0x0004
+#define PTRACE_SYSCALL (PTRACE_SCE | PTRACE_SCX)
+#define PTRACE_FORK 0x0008
+#define PTRACE_LWP 0x0010
+
+#define PTRACE_DEFAULT (PTRACE_EXEC)
+
struct ptrace_io_desc {
int piod_op; /* I/O operation */
void *piod_offs; /* child offset */
@@ -108,6 +122,8 @@ struct ptrace_lwpinfo {
#define PL_FLAG_SI 0x20 /* siginfo is valid */
#define PL_FLAG_FORKED 0x40 /* new child */
#define PL_FLAG_CHILD 0x80 /* I am from child */
+#define PL_FLAG_BORN 0x100 /* new LWP */
+#define PL_FLAG_EXITED 0x200 /* exiting LWP */
sigset_t pl_sigmask; /* LWP signal mask */
sigset_t pl_siglist; /* LWP pending signal */
struct __siginfo pl_siginfo; /* siginfo for signal */
@@ -133,13 +149,6 @@ struct ptrace_vm_entry {
#ifdef _KERNEL
-/*
- * The flags below are used for ptrace(2) tracing and have no relation
- * to procfs. They are stored in struct proc's p_stops member.
- */
-#define S_PT_SCE 0x000010000
-#define S_PT_SCX 0x000020000
-
int ptrace_set_pc(struct thread *_td, unsigned long _addr);
int ptrace_single_step(struct thread *_td);
int ptrace_clear_single_step(struct thread *_td);
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index ee0b207..11dceeb 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -2770,6 +2770,8 @@ zfree_start:
goto zfree_start;
}
cache->uc_freebucket = NULL;
+ /* We are no longer associated with this CPU. */
+ critical_exit();
/* Can we throw this on the zone full list? */
if (bucket != NULL) {
@@ -2782,9 +2784,6 @@ zfree_start:
LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
}
- /* We are no longer associated with this CPU. */
- critical_exit();
-
/*
* We bump the uz count when the cache size is insufficient to
* handle the working set.
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 7010580..433c875 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2651,7 +2651,7 @@ vm_page_advise(vm_page_t m, int advice)
* But we do make the page as freeable as we can without
* actually taking the step of unmapping it.
*/
- m->dirty = 0;
+ vm_page_undirty(m);
else if (advice != MADV_DONTNEED)
return;
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 0d81be4..c9c1271 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -262,7 +262,7 @@ vm_pageout_init_marker(vm_page_t marker, u_short queue)
*
* Lock vm object currently associated with `m'. VM_OBJECT_TRYWLOCK is
* known to have failed and page queue must be either PQ_ACTIVE or
- * PQ_INACTIVE. To avoid lock order violation, unlock the page queues
+ * PQ_INACTIVE. To avoid lock order violation, unlock the page queue
* while locking the vm object. Use marker page to detect page queue
* changes and maintain notion of next page on page queue. Return
* TRUE if no changes were detected, FALSE otherwise. vm object is
@@ -950,7 +950,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
int vnodes_skipped = 0;
int maxlaunder, scan_tick, scanned, starting_page_shortage;
int lockmode;
- boolean_t queues_locked;
+ boolean_t queue_locked;
/*
* If we need to reclaim memory ask kernel caches to return
@@ -1015,12 +1015,12 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
pq = &vmd->vmd_pagequeues[PQ_INACTIVE];
maxscan = pq->pq_cnt;
vm_pagequeue_lock(pq);
- queues_locked = TRUE;
+ queue_locked = TRUE;
for (m = TAILQ_FIRST(&pq->pq_pl);
m != NULL && maxscan-- > 0 && page_shortage > 0;
m = next) {
vm_pagequeue_assert_locked(pq);
- KASSERT(queues_locked, ("unlocked queues"));
+ KASSERT(queue_locked, ("unlocked inactive queue"));
KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m));
PCPU_INC(cnt.v_pdpages);
@@ -1076,7 +1076,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
*/
TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q);
vm_pagequeue_unlock(pq);
- queues_locked = FALSE;
+ queue_locked = FALSE;
/*
* We bump the activation count if the page has been
@@ -1109,12 +1109,12 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
m->act_count += act_delta + ACT_ADVANCE;
} else {
vm_pagequeue_lock(pq);
- queues_locked = TRUE;
+ queue_locked = TRUE;
vm_page_requeue_locked(m);
}
VM_OBJECT_WUNLOCK(object);
vm_page_unlock(m);
- goto relock_queues;
+ goto relock_queue;
}
if (m->hold_count != 0) {
@@ -1129,7 +1129,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* loop over the active queue below.
*/
addl_page_shortage++;
- goto relock_queues;
+ goto relock_queue;
}
/*
@@ -1175,7 +1175,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
*/
m->flags |= PG_WINATCFLS;
vm_pagequeue_lock(pq);
- queues_locked = TRUE;
+ queue_locked = TRUE;
vm_page_requeue_locked(m);
} else if (maxlaunder > 0) {
/*
@@ -1206,9 +1206,9 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
vm_pagequeue_lock(pq);
vm_page_unlock(m);
VM_OBJECT_WUNLOCK(object);
- queues_locked = TRUE;
+ queue_locked = TRUE;
vm_page_requeue_locked(m);
- goto relock_queues;
+ goto relock_queue;
}
/*
@@ -1263,7 +1263,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
VM_OBJECT_WLOCK(object);
vm_page_lock(m);
vm_pagequeue_lock(pq);
- queues_locked = TRUE;
+ queue_locked = TRUE;
/*
* The page might have been moved to another
* queue during potential blocking in vget()
@@ -1303,7 +1303,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
goto unlock_and_continue;
}
vm_pagequeue_unlock(pq);
- queues_locked = FALSE;
+ queue_locked = FALSE;
}
/*
@@ -1324,9 +1324,9 @@ unlock_and_continue:
vm_page_lock_assert(m, MA_NOTOWNED);
VM_OBJECT_WUNLOCK(object);
if (mp != NULL) {
- if (queues_locked) {
+ if (queue_locked) {
vm_pagequeue_unlock(pq);
- queues_locked = FALSE;
+ queue_locked = FALSE;
}
if (vp != NULL)
vput(vp);
@@ -1334,14 +1334,14 @@ unlock_and_continue:
vn_finished_write(mp);
}
vm_page_lock_assert(m, MA_NOTOWNED);
- goto relock_queues;
+ goto relock_queue;
}
vm_page_unlock(m);
VM_OBJECT_WUNLOCK(object);
-relock_queues:
- if (!queues_locked) {
+relock_queue:
+ if (!queue_locked) {
vm_pagequeue_lock(pq);
- queues_locked = TRUE;
+ queue_locked = TRUE;
}
next = TAILQ_NEXT(&vmd->vmd_marker, plinks.q);
TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_marker, plinks.q);
OpenPOWER on IntegriCloud