diff options
author | dim <dim@FreeBSD.org> | 2015-09-24 21:48:04 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-09-24 21:48:04 +0000 |
commit | 5c80b187639f9d0f9013ea4068039582c115f9f8 (patch) | |
tree | 1bcad8efc17a77f9140e1a6bdc016fcf18546365 /sys | |
parent | 86e5497c676342d8d53c9f1035bc44653cab1fee (diff) | |
parent | 47a49e1e6cd1f4b2d0c68abdf3186595a39423c1 (diff) | |
download | FreeBSD-src-5c80b187639f9d0f9013ea4068039582c115f9f8.zip FreeBSD-src-5c80b187639f9d0f9013ea4068039582c115f9f8.tar.gz |
Merge ^/head r288126 through r288196.
Diffstat (limited to 'sys')
-rw-r--r-- | sys/amd64/include/smp.h | 1 | ||||
-rw-r--r-- | sys/arm64/conf/GENERIC | 3 | ||||
-rw-r--r-- | sys/cam/ctl/ctl.c | 219 | ||||
-rw-r--r-- | sys/cam/ctl/ctl.h | 2 | ||||
-rw-r--r-- | sys/cam/ctl/ctl_backend_block.c | 7 | ||||
-rw-r--r-- | sys/cam/ctl/ctl_cmd_table.c | 12 | ||||
-rw-r--r-- | sys/cam/ctl/ctl_ha.c | 35 | ||||
-rw-r--r-- | sys/cam/ctl/ctl_ha.h | 1 | ||||
-rw-r--r-- | sys/cam/ctl/ctl_io.h | 37 | ||||
-rw-r--r-- | sys/cam/ctl/scsi_ctl.c | 1 | ||||
-rw-r--r-- | sys/cam/scsi/scsi_all.h | 28 | ||||
-rw-r--r-- | sys/conf/kern.post.mk | 26 | ||||
-rw-r--r-- | sys/conf/kmod.mk | 13 | ||||
-rw-r--r-- | sys/dev/iwn/if_iwn.c | 18 | ||||
-rw-r--r-- | sys/dev/usb/quirk/usb_quirk.c | 139 | ||||
-rw-r--r-- | sys/geom/geom_dev.c | 9 | ||||
-rw-r--r-- | sys/kern/kern_event.c | 24 | ||||
-rw-r--r-- | sys/kern/vfs_bio.c | 740 | ||||
-rw-r--r-- | sys/netinet6/nd6.c | 22 | ||||
-rw-r--r-- | sys/netinet6/nd6.h | 2 |
20 files changed, 855 insertions, 484 deletions
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index 4fd6aac..850289a 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -35,7 +35,6 @@ extern int mp_naps; extern int boot_cpu_id; extern struct pcb stoppcbs[]; extern int cpu_apic_ids[]; -extern int bootAP; extern void *dpcpu; extern char *bootSTK; extern int bootAP; diff --git a/sys/arm64/conf/GENERIC b/sys/arm64/conf/GENERIC index 17f9bc9..26ca51d 100644 --- a/sys/arm64/conf/GENERIC +++ b/sys/arm64/conf/GENERIC @@ -107,6 +107,9 @@ device ahci device scbus device da +# ATA/SCSI peripherals +device pass # Passthrough device (direct ATA/SCSI access) + # MMC/SD/SDIO Card slot support device mmc # mmc/sd bus device mmcsd # mmc/sd flash cards diff --git a/sys/cam/ctl/ctl.c b/sys/cam/ctl/ctl.c index 13c474b..8d85bc55 100644 --- a/sys/cam/ctl/ctl.c +++ b/sys/cam/ctl/ctl.c @@ -611,6 +611,14 @@ alloc: ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg->port, sizeof(msg->port) + i, M_WAITOK); free(msg, M_CTL); + + if (lun->flags & CTL_LUN_PRIMARY_SC) { + for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { + ctl_isc_announce_mode(lun, -1, + lun->mode_pages.index[i].page_code & SMPH_PC_MASK, + lun->mode_pages.index[i].subpage); + } + } } void @@ -710,13 +718,57 @@ ctl_isc_announce_iid(struct ctl_port *port, int iid) free(msg, M_CTL); } +void +ctl_isc_announce_mode(struct ctl_lun *lun, uint32_t initidx, + uint8_t page, uint8_t subpage) +{ + struct ctl_softc *softc = lun->ctl_softc; + union ctl_ha_msg msg; + int i; + + if (softc->ha_link != CTL_HA_LINK_ONLINE) + return; + for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { + if ((lun->mode_pages.index[i].page_code & SMPH_PC_MASK) == + page && lun->mode_pages.index[i].subpage == subpage) + break; + } + if (i == CTL_NUM_MODE_PAGES) + return; + bzero(&msg.mode, sizeof(msg.mode)); + msg.hdr.msg_type = CTL_MSG_MODE_SYNC; + msg.hdr.nexus.targ_port = initidx / CTL_MAX_INIT_PER_PORT; + msg.hdr.nexus.initid = initidx % CTL_MAX_INIT_PER_PORT; + msg.hdr.nexus.targ_lun = lun->lun; + msg.hdr.nexus.targ_mapped_lun = lun->lun; + msg.mode.page_code = page; + msg.mode.subpage = subpage; + msg.mode.page_len = lun->mode_pages.index[i].page_len; + memcpy(msg.mode.data, lun->mode_pages.index[i].page_data, + msg.mode.page_len); + ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg.mode, sizeof(msg.mode), + M_WAITOK); +} + static void ctl_isc_ha_link_up(struct ctl_softc *softc) { struct ctl_port *port; struct ctl_lun *lun; + union ctl_ha_msg msg; int i; + /* Announce this node parameters to peer for validation. */ + msg.login.msg_type = CTL_MSG_LOGIN; + msg.login.version = CTL_HA_VERSION; + msg.login.ha_mode = softc->ha_mode; + msg.login.ha_id = softc->ha_id; + msg.login.max_luns = CTL_MAX_LUNS; + msg.login.max_ports = CTL_MAX_PORTS; + msg.login.max_init_per_port = CTL_MAX_INIT_PER_PORT; + ctl_ha_msg_send(CTL_HA_CHAN_CTL, &msg.login, sizeof(msg.login), + M_WAITOK); + STAILQ_FOREACH(port, &softc->port_list, links) { ctl_isc_announce_port(port); for (i = 0; i < CTL_MAX_INIT_PER_PORT; i++) { @@ -999,6 +1051,74 @@ ctl_isc_iid_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len) port->wwpn_iid[iid].name = NULL; } +static void +ctl_isc_login(struct ctl_softc *softc, union ctl_ha_msg *msg, int len) +{ + + if (msg->login.version != CTL_HA_VERSION) { + printf("CTL HA peers have different versions %d != %d\n", + msg->login.version, CTL_HA_VERSION); + ctl_ha_msg_abort(CTL_HA_CHAN_CTL); + return; + } + if (msg->login.ha_mode != softc->ha_mode) { + printf("CTL HA peers have different ha_mode %d != %d\n", + msg->login.ha_mode, softc->ha_mode); + ctl_ha_msg_abort(CTL_HA_CHAN_CTL); + return; + } + if (msg->login.ha_id == softc->ha_id) { + printf("CTL HA peers have same ha_id %d\n", msg->login.ha_id); + ctl_ha_msg_abort(CTL_HA_CHAN_CTL); + return; + } + if (msg->login.max_luns != CTL_MAX_LUNS || + msg->login.max_ports != CTL_MAX_PORTS || + msg->login.max_init_per_port != CTL_MAX_INIT_PER_PORT) { + printf("CTL HA peers have different limits\n"); + ctl_ha_msg_abort(CTL_HA_CHAN_CTL); + return; + } +} + +static void +ctl_isc_mode_sync(struct ctl_softc *softc, union ctl_ha_msg *msg, int len) +{ + struct ctl_lun *lun; + int i; + uint32_t initidx, targ_lun; + + targ_lun = msg->hdr.nexus.targ_mapped_lun; + mtx_lock(&softc->ctl_lock); + if ((targ_lun >= CTL_MAX_LUNS) || + ((lun = softc->ctl_luns[targ_lun]) == NULL)) { + mtx_unlock(&softc->ctl_lock); + return; + } + mtx_lock(&lun->lun_lock); + mtx_unlock(&softc->ctl_lock); + if (lun->flags & CTL_LUN_DISABLED) { + mtx_unlock(&lun->lun_lock); + return; + } + for (i = 0; i < CTL_NUM_MODE_PAGES; i++) { + if ((lun->mode_pages.index[i].page_code & SMPH_PC_MASK) == + msg->mode.page_code && + lun->mode_pages.index[i].subpage == msg->mode.subpage) + break; + } + if (i == CTL_NUM_MODE_PAGES) { + mtx_unlock(&lun->lun_lock); + return; + } + memcpy(lun->mode_pages.index[i].page_data, msg->mode.data, + lun->mode_pages.index[i].page_len); + initidx = ctl_get_initindex(&msg->hdr.nexus); + if (initidx != -1) + ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE); + mtx_unlock(&lun->lun_lock); +} + /* * ISC (Inter Shelf Communication) event handler. Events from the HA * subsystem come in here. @@ -1275,9 +1395,16 @@ ctl_isc_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param) case CTL_MSG_IID_SYNC: ctl_isc_iid_sync(softc, msg, param); break; + case CTL_MSG_LOGIN: + ctl_isc_login(softc, msg, param); + break; + case CTL_MSG_MODE_SYNC: + ctl_isc_mode_sync(softc, msg, param); + break; default: printf("Received HA message of unknown type %d\n", msg->hdr.msg_type); + ctl_ha_msg_abort(CTL_HA_CHAN_CTL); break; } if (msg != &msgbuf) @@ -5483,20 +5610,43 @@ bailout: int ctl_read_buffer(struct ctl_scsiio *ctsio) { - struct scsi_read_buffer *cdb; struct ctl_lun *lun; - int buffer_offset, len; + uint64_t buffer_offset; + uint32_t len; + uint8_t byte2; static uint8_t descr[4]; static uint8_t echo_descr[4] = { 0 }; CTL_DEBUG_PRINT(("ctl_read_buffer\n")); - lun = (struct ctl_lun *)ctsio->io_hdr.ctl_private[CTL_PRIV_LUN].ptr; - cdb = (struct scsi_read_buffer *)ctsio->cdb; + switch (ctsio->cdb[0]) { + case READ_BUFFER: { + struct scsi_read_buffer *cdb; - if ((cdb->byte2 & RWB_MODE) != RWB_MODE_DATA && - (cdb->byte2 & RWB_MODE) != RWB_MODE_ECHO_DESCR && - (cdb->byte2 & RWB_MODE) != RWB_MODE_DESCR) { + cdb = (struct scsi_read_buffer *)ctsio->cdb; + buffer_offset = scsi_3btoul(cdb->offset); + len = scsi_3btoul(cdb->length); + byte2 = cdb->byte2; + break; + } + case READ_BUFFER_16: { + struct scsi_read_buffer_16 *cdb; + + cdb = (struct scsi_read_buffer_16 *)ctsio->cdb; + buffer_offset = scsi_8btou64(cdb->offset); + len = scsi_4btoul(cdb->length); + byte2 = cdb->byte2; + break; + } + default: /* This shouldn't happen. */ + ctl_set_invalid_opcode(ctsio); + ctl_done((union ctl_io *)ctsio); + return (CTL_RETVAL_COMPLETE); + } + + if ((byte2 & RWB_MODE) != RWB_MODE_DATA && + (byte2 & RWB_MODE) != RWB_MODE_ECHO_DESCR && + (byte2 & RWB_MODE) != RWB_MODE_DESCR) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, @@ -5507,10 +5657,8 @@ ctl_read_buffer(struct ctl_scsiio *ctsio) return (CTL_RETVAL_COMPLETE); } - len = scsi_3btoul(cdb->length); - buffer_offset = scsi_3btoul(cdb->offset); - - if (buffer_offset + len > CTL_WRITE_BUFFER_SIZE) { + if (buffer_offset > CTL_WRITE_BUFFER_SIZE || + buffer_offset + len > CTL_WRITE_BUFFER_SIZE) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, @@ -5521,12 +5669,12 @@ ctl_read_buffer(struct ctl_scsiio *ctsio) return (CTL_RETVAL_COMPLETE); } - if ((cdb->byte2 & RWB_MODE) == RWB_MODE_DESCR) { + if ((byte2 & RWB_MODE) == RWB_MODE_DESCR) { descr[0] = 0; scsi_ulto3b(CTL_WRITE_BUFFER_SIZE, &descr[1]); ctsio->kern_data_ptr = descr; len = min(len, sizeof(descr)); - } else if ((cdb->byte2 & RWB_MODE) == RWB_MODE_ECHO_DESCR) { + } else if ((byte2 & RWB_MODE) == RWB_MODE_ECHO_DESCR) { ctsio->kern_data_ptr = echo_descr; len = min(len, sizeof(echo_descr)); } else { @@ -5660,9 +5808,8 @@ ctl_write_same(struct ctl_scsiio *ctsio) break; /* NOTREACHED */ } - /* NDOB and ANCHOR flags can be used only together with UNMAP */ - if ((byte2 & SWS_UNMAP) == 0 && - (byte2 & (SWS_NDOB | SWS_ANCHOR)) != 0) { + /* ANCHOR flag can be used only together with UNMAP */ + if ((byte2 & SWS_UNMAP) == 0 && (byte2 & SWS_ANCHOR) != 0) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 1, /*bit_valid*/ 1, /*bit*/ 0); ctl_done((union ctl_io *)ctsio); @@ -5906,7 +6053,11 @@ ctl_control_page_handler(struct ctl_scsiio *ctsio, if (set_ua != 0) ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE); mtx_unlock(&lun->lun_lock); - + if (set_ua) { + ctl_isc_announce_mode(lun, + ctl_get_initindex(&ctsio->io_hdr.nexus), + page_index->page_code, page_index->subpage); + } return (0); } @@ -5943,7 +6094,11 @@ ctl_caching_sp_handler(struct ctl_scsiio *ctsio, if (set_ua != 0) ctl_est_ua_all(lun, initidx, CTL_UA_MODE_CHANGE); mtx_unlock(&lun->lun_lock); - + if (set_ua) { + ctl_isc_announce_mode(lun, + ctl_get_initindex(&ctsio->io_hdr.nexus), + page_index->page_code, page_index->subpage); + } return (0); } @@ -8784,7 +8939,7 @@ ctl_read_write(struct ctl_scsiio *ctsio) break; } case WRITE_ATOMIC_16: { - struct scsi_rw_16 *cdb; + struct scsi_write_atomic_16 *cdb; if (lun->be_lun->atomicblock == 0) { ctl_set_invalid_opcode(ctsio); @@ -8792,13 +8947,13 @@ ctl_read_write(struct ctl_scsiio *ctsio) return (CTL_RETVAL_COMPLETE); } - cdb = (struct scsi_rw_16 *)ctsio->cdb; + cdb = (struct scsi_write_atomic_16 *)ctsio->cdb; if (cdb->byte2 & SRW12_FUA) flags |= CTL_LLF_FUA; if (cdb->byte2 & SRW12_DPO) flags |= CTL_LLF_DPO; lba = scsi_8btou64(cdb->addr); - num_blocks = scsi_4btoul(cdb->length); + num_blocks = scsi_2btoul(cdb->length); if (num_blocks > lun->be_lun->atomicblock) { ctl_set_invalid_field(ctsio, /*sks_valid*/ 1, /*command*/ 1, /*field*/ 12, /*bit_valid*/ 0, @@ -9109,12 +9264,10 @@ ctl_report_luns(struct ctl_scsiio *ctsio) struct ctl_port *port; int num_luns, retval; uint32_t alloc_len, lun_datalen; - int num_filled, well_known; + int num_filled; uint32_t initidx, targ_lun_id, lun_id; retval = CTL_RETVAL_COMPLETE; - well_known = 0; - cdb = (struct scsi_report_luns *)ctsio->cdb; port = ctl_io_port(&ctsio->io_hdr); @@ -9131,9 +9284,11 @@ ctl_report_luns(struct ctl_scsiio *ctsio) switch (cdb->select_report) { case RPL_REPORT_DEFAULT: case RPL_REPORT_ALL: + case RPL_REPORT_NONSUBSID: break; case RPL_REPORT_WELLKNOWN: - well_known = 1; + case RPL_REPORT_ADMIN: + case RPL_REPORT_CONGLOM: num_luns = 0; break; default: @@ -9992,6 +10147,8 @@ ctl_inquiry_evpd_block_limits(struct ctl_scsiio *ctsio, int alloc_len) bl_ptr->max_atomic_transfer_length); scsi_ulto4b(0, bl_ptr->atomic_alignment); scsi_ulto4b(0, bl_ptr->atomic_transfer_length_granularity); + scsi_ulto4b(0, bl_ptr->max_atomic_transfer_length_with_atomic_boundary); + scsi_ulto4b(0, bl_ptr->max_atomic_boundary_size); } scsi_u64to8b(UINT64_MAX, bl_ptr->max_write_same_length); @@ -10491,8 +10648,7 @@ ctl_get_lba_len(union ctl_io *io, uint64_t *lba, uint64_t *len) break; } case READ_16: - case WRITE_16: - case WRITE_ATOMIC_16: { + case WRITE_16: { struct scsi_rw_16 *cdb; cdb = (struct scsi_rw_16 *)io->scsiio.cdb; @@ -10501,6 +10657,15 @@ ctl_get_lba_len(union ctl_io *io, uint64_t *lba, uint64_t *len) *len = scsi_4btoul(cdb->length); break; } + case WRITE_ATOMIC_16: { + struct scsi_write_atomic_16 *cdb; + + cdb = (struct scsi_write_atomic_16 *)io->scsiio.cdb; + + *lba = scsi_8btou64(cdb->addr); + *len = scsi_2btoul(cdb->length); + break; + } case WRITE_VERIFY_16: { struct scsi_write_verify_16 *cdb; diff --git a/sys/cam/ctl/ctl.h b/sys/cam/ctl/ctl.h index c024336..9fd6cce 100644 --- a/sys/cam/ctl/ctl.h +++ b/sys/cam/ctl/ctl.h @@ -193,6 +193,8 @@ void ctl_clr_ua_allluns(struct ctl_softc *ctl_softc, uint32_t initidx, void ctl_isc_announce_lun(struct ctl_lun *lun); void ctl_isc_announce_port(struct ctl_port *port); void ctl_isc_announce_iid(struct ctl_port *port, int iid); +void ctl_isc_announce_mode(struct ctl_lun *lun, uint32_t initidx, + uint8_t page, uint8_t subpage); /* * KPI to manipulate LUN/port options diff --git a/sys/cam/ctl/ctl_backend_block.c b/sys/cam/ctl/ctl_backend_block.c index 2ef5598..a37ac7b 100644 --- a/sys/cam/ctl/ctl_backend_block.c +++ b/sys/cam/ctl/ctl_backend_block.c @@ -1357,7 +1357,12 @@ ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun, buf = beio->sg_segs[i].addr; end = buf + seglen; for (; buf < end; buf += cbe_lun->blocksize) { - memcpy(buf, io->scsiio.kern_data_ptr, cbe_lun->blocksize); + if (lbalen->flags & SWS_NDOB) { + memset(buf, 0, cbe_lun->blocksize); + } else { + memcpy(buf, io->scsiio.kern_data_ptr, + cbe_lun->blocksize); + } if (lbalen->flags & SWS_LBDATA) scsi_ulto4b(lbalen->lba + lba, buf); lba++; diff --git a/sys/cam/ctl/ctl_cmd_table.c b/sys/cam/ctl/ctl_cmd_table.c index 7711b31..0753f28 100644 --- a/sys/cam/ctl/ctl_cmd_table.c +++ b/sys/cam/ctl/ctl_cmd_table.c @@ -1155,8 +1155,16 @@ const struct ctl_cmd_entry ctl_cmd_table[256] = /* 9A */ {NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE}, -/* 9B */ -{NULL, CTL_SERIDX_INVLD, CTL_CMD_FLAG_NONE, CTL_LUN_PAT_NONE}, +/* 9B READ BUFFER(16) */ +{ctl_read_buffer, CTL_SERIDX_MD_SNS, CTL_CMD_FLAG_OK_ON_BOTH | + CTL_CMD_FLAG_OK_ON_STOPPED | + CTL_CMD_FLAG_OK_ON_INOPERABLE | + CTL_CMD_FLAG_OK_ON_STANDBY | + CTL_FLAG_DATA_IN | + CTL_CMD_FLAG_ALLOW_ON_PR_WRESV, + CTL_LUN_PAT_NONE, + 10, {0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0x07}}, /* 9C WRITE ATOMIC (16) */ {ctl_read_write, CTL_SERIDX_WRITE, CTL_CMD_FLAG_OK_ON_SLUN| CTL_FLAG_DATA_OUT, diff --git a/sys/cam/ctl/ctl_ha.c b/sys/cam/ctl/ctl_ha.c index 13cb2e1..03401ae 100644 --- a/sys/cam/ctl/ctl_ha.c +++ b/sys/cam/ctl/ctl_ha.c @@ -283,8 +283,9 @@ ctl_ha_rx_thread(void *arg) else next = sizeof(wire_hdr); SOCKBUF_LOCK(&so->so_rcv); - while (sbavail(&so->so_rcv) < next) { - if (softc->ha_connected == 0 || so->so_error || + while (sbavail(&so->so_rcv) < next || softc->ha_disconnect) { + if (softc->ha_connected == 0 || softc->ha_disconnect || + so->so_error || (so->so_rcv.sb_state & SBS_CANTRCVMORE)) { goto errout; } @@ -541,6 +542,18 @@ ctl_ha_listen(struct ha_softc *softc) printf("%s: REUSEADDR setting failed %d\n", __func__, error); } + bzero(&opt, sizeof(struct sockopt)); + opt.sopt_dir = SOPT_SET; + opt.sopt_level = SOL_SOCKET; + opt.sopt_name = SO_REUSEPORT; + opt.sopt_val = &val; + opt.sopt_valsize = sizeof(val); + val = 1; + error = sosetopt(softc->ha_lso, &opt); + if (error) { + printf("%s: REUSEPORT setting failed %d\n", + __func__, error); + } SOCKBUF_LOCK(&softc->ha_lso->so_rcv); soupcall_set(softc->ha_lso, SO_RCV, ctl_ha_lupcall, softc); SOCKBUF_UNLOCK(&softc->ha_lso->so_rcv); @@ -572,7 +585,8 @@ ctl_ha_conn_thread(void *arg) while (1) { if (softc->ha_disconnect || softc->ha_shutdown) { ctl_ha_close(softc); - ctl_ha_lclose(softc); + if (softc->ha_disconnect == 2 || softc->ha_shutdown) + ctl_ha_lclose(softc); softc->ha_disconnect = 0; if (softc->ha_shutdown) break; @@ -666,7 +680,7 @@ ctl_ha_peer_sysctl(SYSCTL_HANDLER_ARGS) sa->sin_addr.s_addr = htonl((b1 << 24) + (b2 << 16) + (b3 << 8) + b4); } - softc->ha_disconnect = 1; + softc->ha_disconnect = 2; softc->ha_wakeup = 1; mtx_unlock(&softc->ha_lock); wakeup(&softc->ha_wakeup); @@ -811,6 +825,19 @@ ctl_ha_msg_send(ctl_ha_channel channel, const void *addr, size_t len, return (ctl_ha_msg_send2(channel, addr, len, NULL, 0, wait)); } +ctl_ha_status +ctl_ha_msg_abort(ctl_ha_channel channel) +{ + struct ha_softc *softc = &ha_softc; + + mtx_lock(&softc->ha_lock); + softc->ha_disconnect = 1; + softc->ha_wakeup = 1; + mtx_unlock(&softc->ha_lock); + wakeup(&softc->ha_wakeup); + return (CTL_HA_STATUS_SUCCESS); +} + /* * Allocate a data transfer request structure. */ diff --git a/sys/cam/ctl/ctl_ha.h b/sys/cam/ctl/ctl_ha.h index 0d2c011..f38f640 100644 --- a/sys/cam/ctl/ctl_ha.h +++ b/sys/cam/ctl/ctl_ha.h @@ -125,6 +125,7 @@ ctl_ha_status ctl_ha_msg_send(ctl_ha_channel channel, const void *addr, size_t len, int wait); ctl_ha_status ctl_ha_msg_send2(ctl_ha_channel channel, const void *addr, size_t len, const void *addr2, size_t len2, int wait); +ctl_ha_status ctl_ha_msg_abort(ctl_ha_channel channel); ctl_ha_status ctl_ha_msg_deregister(ctl_ha_channel channel); struct ctl_ha_dt_req * ctl_dt_req_alloc(void); diff --git a/sys/cam/ctl/ctl_io.h b/sys/cam/ctl/ctl_io.h index 805a4ce..17fce7e 100644 --- a/sys/cam/ctl/ctl_io.h +++ b/sys/cam/ctl/ctl_io.h @@ -197,6 +197,8 @@ typedef enum { CTL_MSG_PORT_SYNC, /* Information about port. */ CTL_MSG_LUN_SYNC, /* Information about LUN. */ CTL_MSG_IID_SYNC, /* Information about initiator. */ + CTL_MSG_LOGIN, /* Information about HA peer. */ + CTL_MSG_MODE_SYNC, /* Mode page current content. */ CTL_MSG_FAILOVER /* Fake, never sent though the wire */ } ctl_msg_type; @@ -358,6 +360,25 @@ struct ctl_taskio { uint8_t task_resp[3];/* Response information */ }; + +/* + * HA link messages. + */ +#define CTL_HA_VERSION 1 + +/* + * Used for CTL_MSG_LOGIN. + */ +struct ctl_ha_msg_login { + ctl_msg_type msg_type; + int version; + int ha_mode; + int ha_id; + int max_luns; + int max_ports; + int max_init_per_port; +}; + typedef enum { CTL_PR_REG_KEY, CTL_PR_UNREG_KEY, @@ -513,6 +534,17 @@ struct ctl_ha_msg_iid { uint8_t data[]; }; +/* + * Used for CTL_MSG_MODE_SYNC. + */ +struct ctl_ha_msg_mode { + struct ctl_ha_msg_hdr hdr; + uint8_t page_code; + uint8_t subpage; + uint16_t page_len; + uint8_t data[]; +}; + union ctl_ha_msg { struct ctl_ha_msg_hdr hdr; struct ctl_ha_msg_task task; @@ -523,16 +555,15 @@ union ctl_ha_msg { struct ctl_ha_msg_port port; struct ctl_ha_msg_lun lun; struct ctl_ha_msg_iid iid; + struct ctl_ha_msg_login login; + struct ctl_ha_msg_mode mode; }; - struct ctl_prio { struct ctl_io_hdr io_hdr; struct ctl_ha_msg_pr pr_msg; }; - - union ctl_io { struct ctl_io_hdr io_hdr; /* common to all I/O types */ struct ctl_scsiio scsiio; /* Normal SCSI commands */ diff --git a/sys/cam/ctl/scsi_ctl.c b/sys/cam/ctl/scsi_ctl.c index 655afd8..25c745b 100644 --- a/sys/cam/ctl/scsi_ctl.c +++ b/sys/cam/ctl/scsi_ctl.c @@ -1068,7 +1068,6 @@ ctlfe_adjust_cdb(struct ccb_accept_tio *atio, uint32_t offset) } case READ_16: case WRITE_16: - case WRITE_ATOMIC_16: { struct scsi_rw_16 *cdb = (struct scsi_rw_16 *)cmdbyt; lba = scsi_8btou64(cdb->addr); diff --git a/sys/cam/scsi/scsi_all.h b/sys/cam/scsi/scsi_all.h index 4f1b5b3..f2b4b21 100644 --- a/sys/cam/scsi/scsi_all.h +++ b/sys/cam/scsi/scsi_all.h @@ -1002,6 +1002,16 @@ struct scsi_read_buffer u_int8_t control; }; +struct scsi_read_buffer_16 +{ + uint8_t opcode; + uint8_t byte2; + uint8_t offset[8]; + uint8_t length[4]; + uint8_t buffer_id; + uint8_t control; +}; + struct scsi_write_buffer { u_int8_t opcode; @@ -1273,6 +1283,17 @@ struct scsi_rw_16 u_int8_t control; }; +struct scsi_write_atomic_16 +{ + uint8_t opcode; + uint8_t byte2; + uint8_t addr[8]; + uint8_t boundary[2]; + uint8_t length[2]; + uint8_t group; + uint8_t control; +}; + struct scsi_write_same_10 { uint8_t opcode; @@ -1988,6 +2009,7 @@ struct ata_pass_16 { #define VERIFY_16 0x8F #define SYNCHRONIZE_CACHE_16 0x91 #define WRITE_SAME_16 0x93 +#define READ_BUFFER_16 0x9B #define WRITE_ATOMIC_16 0x9C #define SERVICE_ACTION_IN 0x9E #define REPORT_LUNS 0xA0 @@ -2746,7 +2768,8 @@ struct scsi_vpd_block_limits u_int8_t max_atomic_transfer_length[4]; u_int8_t atomic_alignment[4]; u_int8_t atomic_transfer_length_granularity[4]; - u_int8_t reserved2[8]; + u_int8_t max_atomic_transfer_length_with_atomic_boundary[4]; + u_int8_t max_atomic_boundary_size[4]; }; struct scsi_read_capacity @@ -2841,6 +2864,9 @@ struct scsi_report_luns #define RPL_REPORT_DEFAULT 0x00 #define RPL_REPORT_WELLKNOWN 0x01 #define RPL_REPORT_ALL 0x02 +#define RPL_REPORT_ADMIN 0x10 +#define RPL_REPORT_NONSUBSID 0x11 +#define RPL_REPORT_CONGLOM 0x12 uint8_t select_report; uint8_t reserved2[3]; uint8_t length[4]; diff --git a/sys/conf/kern.post.mk b/sys/conf/kern.post.mk index 137e72c..0703cc8 100644 --- a/sys/conf/kern.post.mk +++ b/sys/conf/kern.post.mk @@ -23,6 +23,11 @@ MKMODULESENV+= CONF_CFLAGS="${CONF_CFLAGS}" MKMODULESENV+= WITH_CTF="${WITH_CTF}" .endif +# Allow overriding the kernel debug directory, so kernel and user debug may be +# installed in different directories. Setting it to "" restores the historical +# behavior of installing debug files in the kernel directory. +KERN_DEBUGDIR?= ${DEBUGDIR} + .MAIN: all .for target in all clean cleandepend cleandir clobber depend install \ @@ -101,11 +106,11 @@ modules-all modules-depend: modules-obj .if !defined(DEBUG) FULLKERNEL= ${KERNEL_KO} .else -FULLKERNEL= ${KERNEL_KO}.debug -${KERNEL_KO}: ${FULLKERNEL} ${KERNEL_KO}.symbols - ${OBJCOPY} --strip-debug --add-gnu-debuglink=${KERNEL_KO}.symbols\ +FULLKERNEL= ${KERNEL_KO}.full +${KERNEL_KO}: ${FULLKERNEL} ${KERNEL_KO}.debug + ${OBJCOPY} --strip-debug --add-gnu-debuglink=${KERNEL_KO}.debug \ ${FULLKERNEL} ${.TARGET} -${KERNEL_KO}.symbols: ${FULLKERNEL} +${KERNEL_KO}.debug: ${FULLKERNEL} ${OBJCOPY} --only-keep-debug ${FULLKERNEL} ${.TARGET} install.debug reinstall.debug: gdbinit cd ${.CURDIR}; ${MAKE} ${.TARGET:R} @@ -151,7 +156,7 @@ ${mfile:T:S/.m$/.h/}: ${mfile} kernel-clean: rm -f *.o *.so *.So *.ko *.s eddep errs \ - ${FULLKERNEL} ${KERNEL_KO} ${KERNEL_KO}.symbols \ + ${FULLKERNEL} ${KERNEL_KO} ${KERNEL_KO}.debug \ linterrs tags vers.c \ vnode_if.c vnode_if.h vnode_if_newproto.h vnode_if_typedef.h \ ${MFILES:T:S/.m$/.c/} ${MFILES:T:S/.m$/.h/} \ @@ -249,19 +254,26 @@ kernel-install: if [ ! "`dirname "$$thiskernel"`" -ef ${DESTDIR}${KODIR} ] ; then \ chflags -R noschg ${DESTDIR}${KODIR} ; \ rm -rf ${DESTDIR}${KODIR} ; \ + rm -rf ${DESTDIR}${KERN_DEBUGDIR}${KODIR} ; \ else \ if [ -d ${DESTDIR}${KODIR}.old ] ; then \ chflags -R noschg ${DESTDIR}${KODIR}.old ; \ rm -rf ${DESTDIR}${KODIR}.old ; \ fi ; \ mv ${DESTDIR}${KODIR} ${DESTDIR}${KODIR}.old ; \ + if [ -n "${KERN_DEBUGDIR}" -a \ + -d ${DESTDIR}${KERN_DEBUGDIR}${KODIR} ]; then \ + rm -rf ${DESTDIR}${KERN_DEBUGDIR}${KODIR}.old ; \ + mv ${DESTDIR}${KERN_DEBUGDIR}${KODIR} ${DESTDIR}${KERN_DEBUGDIR}${KODIR}.old ; \ + fi ; \ sysctl kern.bootfile=${DESTDIR}${KODIR}.old/"`basename "$$thiskernel"`" ; \ fi .endif mkdir -p ${DESTDIR}${KODIR} ${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO} ${DESTDIR}${KODIR} .if defined(DEBUG) && !defined(INSTALL_NODEBUG) && ${MK_KERNEL_SYMBOLS} != "no" - ${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO}.symbols ${DESTDIR}${KODIR} + mkdir -p ${DESTDIR}${KERN_DEBUGDIR}${KODIR} + ${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO}.debug ${DESTDIR}${KERN_DEBUGDIR}${KODIR} .endif .if defined(KERNEL_EXTRA_INSTALL) ${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_EXTRA_INSTALL} ${DESTDIR}${KODIR} @@ -273,7 +285,7 @@ kernel-reinstall: @-chflags -R noschg ${DESTDIR}${KODIR} ${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO} ${DESTDIR}${KODIR} .if defined(DEBUG) && !defined(INSTALL_NODEBUG) && ${MK_KERNEL_SYMBOLS} != "no" - ${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO}.symbols ${DESTDIR}${KODIR} + ${INSTALL} -p -m 555 -o ${KMODOWN} -g ${KMODGRP} ${KERNEL_KO}.debug ${DESTDIR}${KERN_DEBUGDIR}${KODIR} .endif config.o env.o hints.o vers.o vnode_if.o: diff --git a/sys/conf/kmod.mk b/sys/conf/kmod.mk index 7e3bc43..47bc593 100644 --- a/sys/conf/kmod.mk +++ b/sys/conf/kmod.mk @@ -172,11 +172,11 @@ PROG= ${KMOD}.ko .if !defined(DEBUG_FLAGS) FULLPROG= ${PROG} .else -FULLPROG= ${PROG}.debug -${PROG}: ${FULLPROG} ${PROG}.symbols - ${OBJCOPY} --strip-debug --add-gnu-debuglink=${PROG}.symbols\ +FULLPROG= ${PROG}.full +${PROG}: ${FULLPROG} ${PROG}.debug + ${OBJCOPY} --strip-debug --add-gnu-debuglink=${PROG}.debug \ ${FULLPROG} ${.TARGET} -${PROG}.symbols: ${FULLPROG} +${PROG}.debug: ${FULLPROG} ${OBJCOPY} --only-keep-debug ${FULLPROG} ${.TARGET} .endif @@ -266,7 +266,7 @@ ${_ILINKS}: CLEANFILES+= ${PROG} ${KMOD}.kld ${OBJS} .if defined(DEBUG_FLAGS) -CLEANFILES+= ${FULLPROG} ${PROG}.symbols +CLEANFILES+= ${FULLPROG} ${PROG}.debug .endif .if !target(install) @@ -277,6 +277,7 @@ _INSTALLFLAGS:= ${_INSTALLFLAGS${ie}} .endfor .if !target(realinstall) +KERN_DEBUGDIR?= ${DEBUGDIR} realinstall: _kmodinstall .ORDER: beforeinstall _kmodinstall _kmodinstall: @@ -284,7 +285,7 @@ _kmodinstall: ${_INSTALLFLAGS} ${PROG} ${DESTDIR}${KMODDIR} .if defined(DEBUG_FLAGS) && !defined(INSTALL_NODEBUG) && ${MK_KERNEL_SYMBOLS} != "no" ${INSTALL} -o ${KMODOWN} -g ${KMODGRP} -m ${KMODMODE} \ - ${_INSTALLFLAGS} ${PROG}.symbols ${DESTDIR}${KMODDIR} + ${_INSTALLFLAGS} ${PROG}.debug ${DESTDIR}${KERN_DEBUGDIR}${KMODDIR} .endif .include <bsd.links.mk> diff --git a/sys/dev/iwn/if_iwn.c b/sys/dev/iwn/if_iwn.c index 5e6876d..54c8c83 100644 --- a/sys/dev/iwn/if_iwn.c +++ b/sys/dev/iwn/if_iwn.c @@ -4368,7 +4368,6 @@ iwn_tx_data(struct iwn_softc *sc, struct mbuf *m, struct ieee80211_node *ni) struct ieee80211_tx_ampdu *tap = &ni->ni_tx_ampdu[ac]; if (!IEEE80211_AMPDU_RUNNING(tap)) { - m_freem(m); return EINVAL; } @@ -4420,7 +4419,6 @@ iwn_tx_data(struct iwn_softc *sc, struct mbuf *m, struct ieee80211_node *ni) /* Retrieve key for TX. */ k = ieee80211_crypto_encap(ni, m); if (k == NULL) { - m_freem(m); return ENOBUFS; } /* 802.11 header may have moved. */ @@ -4551,7 +4549,6 @@ iwn_tx_data(struct iwn_softc *sc, struct mbuf *m, struct ieee80211_node *ni) if (error != EFBIG) { device_printf(sc->sc_dev, "%s: can't map mbuf (error %d)\n", __func__, error); - m_freem(m); return error; } /* Too many DMA segments, linearize mbuf. */ @@ -4559,7 +4556,6 @@ iwn_tx_data(struct iwn_softc *sc, struct mbuf *m, struct ieee80211_node *ni) if (m1 == NULL) { device_printf(sc->sc_dev, "%s: could not defrag mbuf\n", __func__); - m_freem(m); return ENOBUFS; } m = m1; @@ -4569,7 +4565,6 @@ iwn_tx_data(struct iwn_softc *sc, struct mbuf *m, struct ieee80211_node *ni) if (error != 0) { device_printf(sc->sc_dev, "%s: can't map mbuf (error %d)\n", __func__, error); - m_freem(m); return error; } } @@ -4755,7 +4750,6 @@ iwn_tx_data_raw(struct iwn_softc *sc, struct mbuf *m, if (error != EFBIG) { device_printf(sc->sc_dev, "%s: can't map mbuf (error %d)\n", __func__, error); - m_freem(m); return error; } /* Too many DMA segments, linearize mbuf. */ @@ -4763,7 +4757,6 @@ iwn_tx_data_raw(struct iwn_softc *sc, struct mbuf *m, if (m1 == NULL) { device_printf(sc->sc_dev, "%s: could not defrag mbuf\n", __func__); - m_freem(m); return ENOBUFS; } m = m1; @@ -4773,7 +4766,6 @@ iwn_tx_data_raw(struct iwn_softc *sc, struct mbuf *m, if (error != 0) { device_printf(sc->sc_dev, "%s: can't map mbuf (error %d)\n", __func__, error); - m_freem(m); return error; } } @@ -4869,6 +4861,9 @@ iwn_xmit_task(void *arg0, int pending) IWN_UNLOCK(sc); } +/* + * raw frame xmit - free node/reference if failed. + */ static int iwn_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) @@ -4931,6 +4926,9 @@ iwn_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, return error; } +/* + * transmit - don't free mbuf if failed; don't free node ref if failed. + */ static int iwn_transmit(struct ieee80211com *ic, struct mbuf *m) { @@ -4938,6 +4936,8 @@ iwn_transmit(struct ieee80211com *ic, struct mbuf *m) struct ieee80211_node *ni; int error; + ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; + IWN_LOCK(sc); if ((sc->sc_flags & IWN_FLAG_RUNNING) == 0 || sc->sc_beacon_wait) { IWN_UNLOCK(sc); @@ -4949,11 +4949,9 @@ iwn_transmit(struct ieee80211com *ic, struct mbuf *m) return (ENOBUFS); } - ni = (struct ieee80211_node *)m->m_pkthdr.rcvif; error = iwn_tx_data(sc, m, ni); if (error) { if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); - ieee80211_free_node(ni); } else sc->sc_tx_timer = 5; IWN_UNLOCK(sc); diff --git a/sys/dev/usb/quirk/usb_quirk.c b/sys/dev/usb/quirk/usb_quirk.c index 459dc2a..927cec1 100644 --- a/sys/dev/usb/quirk/usb_quirk.c +++ b/sys/dev/usb/quirk/usb_quirk.c @@ -61,6 +61,7 @@ MODULE_VERSION(usb_quirk, 1); #define USB_DEV_QUIRKS_MAX 384 #define USB_SUB_QUIRKS_MAX 8 +#define USB_QUIRK_ENVROOT "hw.usb.quirk." struct usb_quirk_entry { uint16_t vid; @@ -608,8 +609,32 @@ static const char *usb_quirk_str[USB_QUIRK_MAX] = { static const char * usb_quirkstr(uint16_t quirk) { - return ((quirk < USB_QUIRK_MAX) ? - usb_quirk_str[quirk] : "USB_QUIRK_UNKNOWN"); + return ((quirk < USB_QUIRK_MAX && usb_quirk_str[quirk] != NULL) ? + usb_quirk_str[quirk] : "UQ_UNKNOWN"); +} + +/*------------------------------------------------------------------------* + * usb_strquirk + * + * This function converts a string into a USB quirk code. + * + * Returns: + * Less than USB_QUIRK_MAX: Quirk code + * Else: Quirk code not found + *------------------------------------------------------------------------*/ +static uint16_t +usb_strquirk(const char *str, size_t len) +{ + const char *quirk; + uint16_t x; + + for (x = 0; x != USB_QUIRK_MAX; x++) { + quirk = usb_quirkstr(x); + if (strncmp(str, quirk, len) == 0 && + quirk[len] == 0) + break; + } + return (x); } /*------------------------------------------------------------------------* @@ -854,12 +879,122 @@ usb_quirk_ioctl(unsigned long cmd, caddr_t data, return (ENOIOCTL); } +/*------------------------------------------------------------------------* + * usb_quirk_strtou16 + * + * Helper function to scan a 16-bit integer. + *------------------------------------------------------------------------*/ +static uint16_t +usb_quirk_strtou16(const char **pptr, const char *name, const char *what) +{ + unsigned long value; + char *end; + + value = strtoul(*pptr, &end, 0); + if (value > 65535 || *pptr == end || (*end != ' ' && *end != '\t')) { + printf("%s: %s 16-bit %s value set to zero\n", + name, what, *end == 0 ? "incomplete" : "invalid"); + return (0); + } + *pptr = end + 1; + return ((uint16_t)value); +} + +/*------------------------------------------------------------------------* + * usb_quirk_add_entry_from_str + * + * Add a USB quirk entry from string. + * "VENDOR PRODUCT LO_REV HI_REV QUIRK[,QUIRK[,...]]" + *------------------------------------------------------------------------*/ +static void +usb_quirk_add_entry_from_str(const char *name, const char *env) +{ + struct usb_quirk_entry entry = { }; + struct usb_quirk_entry *new; + uint16_t quirk_idx; + uint16_t quirk; + const char *end; + + /* check for invalid environment variable */ + if (name == NULL || env == NULL) + return; + + if (bootverbose) + printf("Adding USB QUIRK '%s' = '%s'\n", name, env); + + /* parse device information */ + entry.vid = usb_quirk_strtou16(&env, name, "Vendor ID"); + entry.pid = usb_quirk_strtou16(&env, name, "Product ID"); + entry.lo_rev = usb_quirk_strtou16(&env, name, "Low revision"); + entry.hi_rev = usb_quirk_strtou16(&env, name, "High revision"); + + /* parse quirk information */ + quirk_idx = 0; + while (*env != 0 && quirk_idx != USB_SUB_QUIRKS_MAX) { + /* skip whitespace before quirks */ + while (*env == ' ' || *env == '\t') + env++; + + /* look for quirk separation character */ + end = strchr(env, ','); + if (end == NULL) + end = env + strlen(env); + + /* lookup quirk in string table */ + quirk = usb_strquirk(env, end - env); + if (quirk < USB_QUIRK_MAX) { + entry.quirks[quirk_idx++] = quirk; + } else { + printf("%s: unknown USB quirk '%.*s' (skipped)\n", + name, (int)(end - env), env); + } + env = end; + + /* skip quirk delimiter, if any */ + if (*env != 0) + env++; + } + + /* register quirk */ + if (quirk_idx != 0) { + if (*env != 0) { + printf("%s: Too many USB quirks, only %d allowed!\n", + name, USB_SUB_QUIRKS_MAX); + } + mtx_lock(&usb_quirk_mtx); + new = usb_quirk_get_entry(entry.vid, entry.pid, + entry.lo_rev, entry.hi_rev, 1); + if (new == NULL) + printf("%s: USB quirks table is full!\n", name); + else + memcpy(new->quirks, entry.quirks, sizeof(entry.quirks)); + mtx_unlock(&usb_quirk_mtx); + } else { + printf("%s: No USB quirks found!\n", name); + } +} + static void usb_quirk_init(void *arg) { + char envkey[sizeof(USB_QUIRK_ENVROOT) + 2]; /* 2 digits max, 0 to 99 */ + int i; + /* initialize mutex */ mtx_init(&usb_quirk_mtx, "USB quirk", NULL, MTX_DEF); + /* look for quirks defined by the environment variable */ + for (i = 0; i != 100; i++) { + snprintf(envkey, sizeof(envkey), USB_QUIRK_ENVROOT "%d", i); + + /* Stop at first undefined var */ + if (!testenv(envkey)) + break; + + /* parse environment variable */ + usb_quirk_add_entry_from_str(envkey, kern_getenv(envkey)); + } + /* register our function */ usb_test_quirk_p = &usb_test_quirk_by_info; usb_quirk_ioctl_p = &usb_quirk_ioctl; diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index 0d34ef5..789de4d 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -124,6 +124,7 @@ g_dev_fini(struct g_class *mp) { freeenv(dumpdev); + dumpdev = NULL; } static int @@ -152,10 +153,16 @@ g_dev_setdumpdev(struct cdev *dev, struct thread *td) static void init_dumpdev(struct cdev *dev) { + const char *devprefix = "/dev/", *devname; + size_t len; if (dumpdev == NULL) return; - if (strcmp(devtoname(dev), dumpdev) != 0) + len = strlen(devprefix); + devname = devtoname(dev); + if (strcmp(devname, dumpdev) != 0 && + (strncmp(dumpdev, devprefix, len) != 0 || + strcmp(devname, dumpdev + len) != 0)) return; if (g_dev_setdumpdev(dev, curthread) == 0) { freeenv(dumpdev); diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 0e26a78..d41ac96 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -759,28 +759,25 @@ kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps) struct filedesc *fdp; struct kqueue *kq; struct file *fp; - struct proc *p; struct ucred *cred; int fd, error; - p = td->td_proc; + fdp = td->td_proc->p_fd; cred = td->td_ucred; - crhold(cred); - if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES))) { - crfree(cred); + if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES))) return (ENOMEM); - } - fdp = p->p_fd; error = falloc_caps(td, &fp, &fd, flags, fcaps); - if (error) - goto done2; + if (error != 0) { + chgkqcnt(cred->cr_ruidinfo, -1, 0); + return (error); + } /* An extra reference on `fp' has been held for us by falloc(). */ kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); kqueue_init(kq); kq->kq_fdp = fdp; - kq->kq_cred = cred; + kq->kq_cred = crhold(cred); FILEDESC_XLOCK(fdp); TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); @@ -790,12 +787,7 @@ kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps) fdrop(fp, td); td->td_retval[0] = fd; -done2: - if (error != 0) { - chgkqcnt(cred->cr_ruidinfo, -1, 0); - crfree(cred); - } - return (error); + return (0); } #ifndef _SYS_SYSPROTO_H_ diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index ff4ea73..5053fd1 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -110,7 +110,10 @@ static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, vm_page_t m); static void vfs_clean_pages_dirty_buf(struct buf *bp); static void vfs_setdirty_locked_object(struct buf *bp); +static void vfs_vmio_invalidate(struct buf *bp); static void vfs_vmio_release(struct buf *bp); +static void vfs_vmio_truncate(struct buf *bp, int npages); +static void vfs_vmio_extend(struct buf *bp, int npages, int size); static int vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno); static int buf_flush(struct vnode *vp, int); @@ -661,11 +664,9 @@ waitrunningbufspace(void) * bit if the newly extended portion of the buffer does not contain * valid data. */ -static __inline -void -vfs_buf_test_cache(struct buf *bp, - vm_ooffset_t foff, vm_offset_t off, vm_offset_t size, - vm_page_t m) +static __inline void +vfs_buf_test_cache(struct buf *bp, vm_ooffset_t foff, vm_offset_t off, + vm_offset_t size, vm_page_t m) { VM_OBJECT_ASSERT_LOCKED(m->object); @@ -1865,105 +1866,16 @@ brelse(struct buf *bp) * around to prevent it from being reconstituted and starting a second * background write. */ - if ((bp->b_flags & B_VMIO) - && !(bp->b_vp->v_mount != NULL && - (bp->b_vp->v_mount->mnt_vfc->vfc_flags & VFCF_NETWORK) != 0 && - !vn_isdisk(bp->b_vp, NULL) && - (bp->b_flags & B_DELWRI)) - ) { - - int i, j, resid; - vm_page_t m; - off_t foff; - vm_pindex_t poff; - vm_object_t obj; - - obj = bp->b_bufobj->bo_object; - - /* - * Get the base offset and length of the buffer. Note that - * in the VMIO case if the buffer block size is not - * page-aligned then b_data pointer may not be page-aligned. - * But our b_pages[] array *IS* page aligned. - * - * block sizes less then DEV_BSIZE (usually 512) are not - * supported due to the page granularity bits (m->valid, - * m->dirty, etc...). - * - * See man buf(9) for more information - */ - resid = bp->b_bufsize; - foff = bp->b_offset; - for (i = 0; i < bp->b_npages; i++) { - int had_bogus = 0; - - m = bp->b_pages[i]; - - /* - * If we hit a bogus page, fixup *all* the bogus pages - * now. - */ - if (m == bogus_page) { - poff = OFF_TO_IDX(bp->b_offset); - had_bogus = 1; - - VM_OBJECT_RLOCK(obj); - for (j = i; j < bp->b_npages; j++) { - vm_page_t mtmp; - mtmp = bp->b_pages[j]; - if (mtmp == bogus_page) { - mtmp = vm_page_lookup(obj, poff + j); - if (!mtmp) { - panic("brelse: page missing\n"); - } - bp->b_pages[j] = mtmp; - } - } - VM_OBJECT_RUNLOCK(obj); - - if ((bp->b_flags & B_INVAL) == 0 && - buf_mapped(bp)) { - BUF_CHECK_MAPPED(bp); - pmap_qenter( - trunc_page((vm_offset_t)bp->b_data), - bp->b_pages, bp->b_npages); - } - m = bp->b_pages[i]; - } - if ((bp->b_flags & B_NOCACHE) || - (bp->b_ioflags & BIO_ERROR && - bp->b_iocmd == BIO_READ)) { - int poffset = foff & PAGE_MASK; - int presid = resid > (PAGE_SIZE - poffset) ? - (PAGE_SIZE - poffset) : resid; - - KASSERT(presid >= 0, ("brelse: extra page")); - VM_OBJECT_WLOCK(obj); - while (vm_page_xbusied(m)) { - vm_page_lock(m); - VM_OBJECT_WUNLOCK(obj); - vm_page_busy_sleep(m, "mbncsh"); - VM_OBJECT_WLOCK(obj); - } - if (pmap_page_wired_mappings(m) == 0) - vm_page_set_invalid(m, poffset, presid); - VM_OBJECT_WUNLOCK(obj); - if (had_bogus) - printf("avoided corruption bug in bogus_page/brelse code\n"); - } - resid -= PAGE_SIZE - (foff & PAGE_MASK); - foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; - } - if (bp->b_flags & (B_INVAL | B_RELBUF)) - vfs_vmio_release(bp); - - } else if (bp->b_flags & B_VMIO) { - - if (bp->b_flags & (B_INVAL | B_RELBUF)) { + if ((bp->b_flags & B_VMIO) && (bp->b_flags & B_NOCACHE || + (bp->b_ioflags & BIO_ERROR && bp->b_iocmd == BIO_READ)) && + !(bp->b_vp->v_mount != NULL && + (bp->b_vp->v_mount->mnt_vfc->vfc_flags & VFCF_NETWORK) != 0 && + !vn_isdisk(bp->b_vp, NULL) && (bp->b_flags & B_DELWRI))) + vfs_vmio_invalidate(bp); + + if ((bp->b_flags & (B_INVAL | B_RELBUF)) != 0) { + if (bp->b_flags & B_VMIO) vfs_vmio_release(bp); - } - - } else if ((bp->b_flags & (B_INVAL | B_RELBUF)) != 0) { if (bp->b_bufsize != 0) allocbuf(bp, 0); if (bp->b_vp != NULL) @@ -2069,6 +1981,132 @@ out: BUF_UNLOCK(bp); } +/* + * Complete I/O to a VMIO backed page. Validate the pages as appropriate, + * restore bogus pages. + */ +static void +vfs_vmio_iodone(struct buf *bp) +{ + vm_ooffset_t foff; + vm_page_t m; + vm_object_t obj; + struct vnode *vp; + int bogus, i, iosize; + + obj = bp->b_bufobj->bo_object; + KASSERT(obj->paging_in_progress >= bp->b_npages, + ("vfs_vmio_iodone: paging in progress(%d) < b_npages(%d)", + obj->paging_in_progress, bp->b_npages)); + + vp = bp->b_vp; + KASSERT(vp->v_holdcnt > 0, + ("vfs_vmio_iodone: vnode %p has zero hold count", vp)); + KASSERT(vp->v_object != NULL, + ("vfs_vmio_iodone: vnode %p has no vm_object", vp)); + + foff = bp->b_offset; + KASSERT(bp->b_offset != NOOFFSET, + ("vfs_vmio_iodone: bp %p has no buffer offset", bp)); + + bogus = 0; + iosize = bp->b_bcount - bp->b_resid; + VM_OBJECT_WLOCK(obj); + for (i = 0; i < bp->b_npages; i++) { + int resid; + + resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff; + if (resid > iosize) + resid = iosize; + + /* + * cleanup bogus pages, restoring the originals + */ + m = bp->b_pages[i]; + if (m == bogus_page) { + bogus = 1; + m = vm_page_lookup(obj, OFF_TO_IDX(foff)); + if (m == NULL) + panic("biodone: page disappeared!"); + bp->b_pages[i] = m; + } else if ((bp->b_iocmd == BIO_READ) && resid > 0) { + /* + * In the write case, the valid and clean bits are + * already changed correctly ( see bdwrite() ), so we + * only need to do this here in the read case. + */ + KASSERT((m->dirty & vm_page_bits(foff & PAGE_MASK, + resid)) == 0, ("vfs_vmio_iodone: page %p " + "has unexpected dirty bits", m)); + vfs_page_set_valid(bp, foff, m); + } + KASSERT(OFF_TO_IDX(foff) == m->pindex, + ("vfs_vmio_iodone: foff(%jd)/pindex(%ju) mismatch", + (intmax_t)foff, (uintmax_t)m->pindex)); + + vm_page_sunbusy(m); + vm_object_pip_subtract(obj, 1); + foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; + iosize -= resid; + } + vm_object_pip_wakeupn(obj, 0); + VM_OBJECT_WUNLOCK(obj); + if (bogus && buf_mapped(bp)) { + BUF_CHECK_MAPPED(bp); + pmap_qenter(trunc_page((vm_offset_t)bp->b_data), + bp->b_pages, bp->b_npages); + } +} + +/* + * Perform page invalidation when a buffer is released. The fully invalid + * pages will be reclaimed later in vfs_vmio_release(). + */ +static void +vfs_vmio_invalidate(struct buf *bp) +{ + vm_object_t obj; + vm_page_t m; + int i, resid, poffset, presid; + + /* + * Get the base offset and length of the buffer. Note that + * in the VMIO case if the buffer block size is not + * page-aligned then b_data pointer may not be page-aligned. + * But our b_pages[] array *IS* page aligned. + * + * block sizes less then DEV_BSIZE (usually 512) are not + * supported due to the page granularity bits (m->valid, + * m->dirty, etc...). + * + * See man buf(9) for more information + */ + obj = bp->b_bufobj->bo_object; + resid = bp->b_bufsize; + poffset = bp->b_offset & PAGE_MASK; + VM_OBJECT_WLOCK(obj); + for (i = 0; i < bp->b_npages; i++) { + m = bp->b_pages[i]; + if (m == bogus_page) + panic("vfs_vmio_invalidate: Unexpected bogus page."); + + presid = resid > (PAGE_SIZE - poffset) ? + (PAGE_SIZE - poffset) : resid; + KASSERT(presid >= 0, ("brelse: extra page")); + while (vm_page_xbusied(m)) { + vm_page_lock(m); + VM_OBJECT_WUNLOCK(obj); + vm_page_busy_sleep(m, "mbncsh"); + VM_OBJECT_WLOCK(obj); + } + if (pmap_page_wired_mappings(m) == 0) + vm_page_set_invalid(m, poffset, presid); + resid -= presid; + poffset = 0; + } + VM_OBJECT_WUNLOCK(obj); +} + /* Give pages used by the bp back to the VM system (where possible) */ static void vfs_vmio_release(struct buf *bp) @@ -2120,8 +2158,124 @@ vfs_vmio_release(struct buf *bp) bufspaceadjust(bp, 0); bp->b_npages = 0; bp->b_flags &= ~B_VMIO; - if (bp->b_vp) - brelvp(bp); +} + +/* + * Page-granular truncation of an existing VMIO buffer. + */ +static void +vfs_vmio_truncate(struct buf *bp, int desiredpages) +{ + vm_page_t m; + int i; + + if (bp->b_npages == desiredpages) + return; + + if (buf_mapped(bp)) { + BUF_CHECK_MAPPED(bp); + pmap_qremove((vm_offset_t)trunc_page((vm_offset_t)bp->b_data) + + (desiredpages << PAGE_SHIFT), bp->b_npages - desiredpages); + } else + BUF_CHECK_UNMAPPED(bp); + VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); + for (i = desiredpages; i < bp->b_npages; i++) { + /* + * The page is not freed here -- it is the responsibility of + * vnode_pager_setsize. + */ + m = bp->b_pages[i]; + KASSERT(m != bogus_page, ("allocbuf: bogus page found")); + while (vm_page_sleep_if_busy(m, "biodep")) + continue; + bp->b_pages[i] = NULL; + vm_page_lock(m); + vm_page_unwire(m, PQ_INACTIVE); + vm_page_unlock(m); + } + VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); + bp->b_npages = desiredpages; +} + +/* + * Byte granular extension of VMIO buffers. + */ +static void +vfs_vmio_extend(struct buf *bp, int desiredpages, int size) +{ + /* + * We are growing the buffer, possibly in a + * byte-granular fashion. + */ + vm_object_t obj; + vm_offset_t toff; + vm_offset_t tinc; + vm_page_t m; + + /* + * Step 1, bring in the VM pages from the object, allocating + * them if necessary. We must clear B_CACHE if these pages + * are not valid for the range covered by the buffer. + */ + obj = bp->b_bufobj->bo_object; + VM_OBJECT_WLOCK(obj); + while (bp->b_npages < desiredpages) { + /* + * We must allocate system pages since blocking + * here could interfere with paging I/O, no + * matter which process we are. + * + * Only exclusive busy can be tested here. + * Blocking on shared busy might lead to + * deadlocks once allocbuf() is called after + * pages are vfs_busy_pages(). + */ + m = vm_page_grab(obj, OFF_TO_IDX(bp->b_offset) + bp->b_npages, + VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | + VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY | + VM_ALLOC_COUNT(desiredpages - bp->b_npages)); + if (m->valid == 0) + bp->b_flags &= ~B_CACHE; + bp->b_pages[bp->b_npages] = m; + ++bp->b_npages; + } + + /* + * Step 2. We've loaded the pages into the buffer, + * we have to figure out if we can still have B_CACHE + * set. Note that B_CACHE is set according to the + * byte-granular range ( bcount and size ), not the + * aligned range ( newbsize ). + * + * The VM test is against m->valid, which is DEV_BSIZE + * aligned. Needless to say, the validity of the data + * needs to also be DEV_BSIZE aligned. Note that this + * fails with NFS if the server or some other client + * extends the file's EOF. If our buffer is resized, + * B_CACHE may remain set! XXX + */ + toff = bp->b_bcount; + tinc = PAGE_SIZE - ((bp->b_offset + toff) & PAGE_MASK); + while ((bp->b_flags & B_CACHE) && toff < size) { + vm_pindex_t pi; + + if (tinc > (size - toff)) + tinc = size - toff; + pi = ((bp->b_offset & PAGE_MASK) + toff) >> PAGE_SHIFT; + m = bp->b_pages[pi]; + vfs_buf_test_cache(bp, bp->b_offset, toff, tinc, m); + toff += tinc; + tinc = PAGE_SIZE; + } + VM_OBJECT_WUNLOCK(obj); + + /* + * Step 3, fixup the KVA pmap. + */ + if (buf_mapped(bp)) + bpmap_qenter(bp); + else + BUF_CHECK_UNMAPPED(bp); } /* @@ -3430,6 +3584,80 @@ geteblk(int size, int flags) } /* + * Truncate the backing store for a non-vmio buffer. + */ +static void +vfs_nonvmio_truncate(struct buf *bp, int newbsize) +{ + + if (bp->b_flags & B_MALLOC) { + /* + * malloced buffers are not shrunk + */ + if (newbsize == 0) { + bufmallocadjust(bp, 0); + free(bp->b_data, M_BIOBUF); + bp->b_data = bp->b_kvabase; + bp->b_flags &= ~B_MALLOC; + } + return; + } + vm_hold_free_pages(bp, newbsize); + bufspaceadjust(bp, newbsize); +} + +/* + * Extend the backing for a non-VMIO buffer. + */ +static void +vfs_nonvmio_extend(struct buf *bp, int newbsize) +{ + caddr_t origbuf; + int origbufsize; + + /* + * We only use malloced memory on the first allocation. + * and revert to page-allocated memory when the buffer + * grows. + * + * There is a potential smp race here that could lead + * to bufmallocspace slightly passing the max. It + * is probably extremely rare and not worth worrying + * over. + */ + if (bp->b_bufsize == 0 && newbsize <= PAGE_SIZE/2 && + bufmallocspace < maxbufmallocspace) { + bp->b_data = malloc(newbsize, M_BIOBUF, M_WAITOK); + bp->b_flags |= B_MALLOC; + bufmallocadjust(bp, newbsize); + return; + } + + /* + * If the buffer is growing on its other-than-first + * allocation then we revert to the page-allocation + * scheme. + */ + origbuf = NULL; + origbufsize = 0; + if (bp->b_flags & B_MALLOC) { + origbuf = bp->b_data; + origbufsize = bp->b_bufsize; + bp->b_data = bp->b_kvabase; + bufmallocadjust(bp, 0); + bp->b_flags &= ~B_MALLOC; + newbsize = round_page(newbsize); + } + vm_hold_load_pages(bp, (vm_offset_t) bp->b_data + bp->b_bufsize, + (vm_offset_t) bp->b_data + newbsize); + if (origbuf != NULL) { + bcopy(origbuf, bp->b_data, origbufsize); + free(origbuf, M_BIOBUF); + } + bufspaceadjust(bp, newbsize); +} + +/* * This code constitutes the buffer memory from either anonymous system * memory (in the case of non-VMIO operations) or from an associated * VM object (in the case of VMIO operations). This code is able to @@ -3443,100 +3671,33 @@ geteblk(int size, int flags) * allocbuf() only adjusts B_CACHE for VMIO buffers. getblk() deals with * B_CACHE for the non-VMIO case. */ - int allocbuf(struct buf *bp, int size) { - int newbsize, mbsize; - int i; + int newbsize; BUF_ASSERT_HELD(bp); if (bp->b_kvasize != 0 && bp->b_kvasize < size) panic("allocbuf: buffer too small"); + newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); if ((bp->b_flags & B_VMIO) == 0) { - caddr_t origbuf; - int origbufsize; + if ((bp->b_flags & B_MALLOC) == 0) + newbsize = round_page(newbsize); /* * Just get anonymous memory from the kernel. Don't * mess with B_CACHE. */ - mbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); - if (bp->b_flags & B_MALLOC) - newbsize = mbsize; - else - newbsize = round_page(size); - - if (newbsize < bp->b_bufsize) { - /* - * malloced buffers are not shrunk - */ - if (bp->b_flags & B_MALLOC) { - if (newbsize) { - bp->b_bcount = size; - } else { - free(bp->b_data, M_BIOBUF); - bufmallocadjust(bp, 0); - bp->b_data = bp->b_kvabase; - bp->b_bcount = 0; - bp->b_flags &= ~B_MALLOC; - } - return 1; - } - vm_hold_free_pages(bp, newbsize); - } else if (newbsize > bp->b_bufsize) { - /* - * We only use malloced memory on the first allocation. - * and revert to page-allocated memory when the buffer - * grows. - */ - /* - * There is a potential smp race here that could lead - * to bufmallocspace slightly passing the max. It - * is probably extremely rare and not worth worrying - * over. - */ - if ((bufmallocspace < maxbufmallocspace) && - (bp->b_bufsize == 0) && - (mbsize <= PAGE_SIZE/2)) { - - bp->b_data = malloc(mbsize, M_BIOBUF, M_WAITOK); - bp->b_bcount = size; - bp->b_flags |= B_MALLOC; - bufmallocadjust(bp, mbsize); - return 1; - } - origbuf = NULL; - origbufsize = 0; - /* - * If the buffer is growing on its other-than-first - * allocation then we revert to the page-allocation - * scheme. - */ - if (bp->b_flags & B_MALLOC) { - origbuf = bp->b_data; - origbufsize = bp->b_bufsize; - bp->b_data = bp->b_kvabase; - bufmallocadjust(bp, 0); - bp->b_flags &= ~B_MALLOC; - newbsize = round_page(newbsize); - } - vm_hold_load_pages( - bp, - (vm_offset_t) bp->b_data + bp->b_bufsize, - (vm_offset_t) bp->b_data + newbsize); - if (origbuf) { - bcopy(origbuf, bp->b_data, origbufsize); - free(origbuf, M_BIOBUF); - } - } + if (newbsize < bp->b_bufsize) + vfs_nonvmio_truncate(bp, newbsize); + else if (newbsize > bp->b_bufsize) + vfs_nonvmio_extend(bp, newbsize); } else { int desiredpages; - newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); desiredpages = (size == 0) ? 0 : - num_pages((bp->b_offset & PAGE_MASK) + newbsize); + num_pages((bp->b_offset & PAGE_MASK) + newbsize); if (bp->b_flags & B_MALLOC) panic("allocbuf: VMIO buffer can't be malloced"); @@ -3547,139 +3708,13 @@ allocbuf(struct buf *bp, int size) if (size == 0 || bp->b_bufsize == 0) bp->b_flags |= B_CACHE; - if (newbsize < bp->b_bufsize) { - /* - * DEV_BSIZE aligned new buffer size is less then the - * DEV_BSIZE aligned existing buffer size. Figure out - * if we have to remove any pages. - */ - if (desiredpages < bp->b_npages) { - vm_page_t m; - - if (buf_mapped(bp)) { - BUF_CHECK_MAPPED(bp); - pmap_qremove((vm_offset_t)trunc_page( - (vm_offset_t)bp->b_data) + - (desiredpages << PAGE_SHIFT), - (bp->b_npages - desiredpages)); - } else - BUF_CHECK_UNMAPPED(bp); - VM_OBJECT_WLOCK(bp->b_bufobj->bo_object); - for (i = desiredpages; i < bp->b_npages; i++) { - /* - * the page is not freed here -- it - * is the responsibility of - * vnode_pager_setsize - */ - m = bp->b_pages[i]; - KASSERT(m != bogus_page, - ("allocbuf: bogus page found")); - while (vm_page_sleep_if_busy(m, - "biodep")) - continue; - - bp->b_pages[i] = NULL; - vm_page_lock(m); - vm_page_unwire(m, PQ_INACTIVE); - vm_page_unlock(m); - } - VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object); - bp->b_npages = desiredpages; - } - } else if (size > bp->b_bcount) { - /* - * We are growing the buffer, possibly in a - * byte-granular fashion. - */ - vm_object_t obj; - vm_offset_t toff; - vm_offset_t tinc; - - /* - * Step 1, bring in the VM pages from the object, - * allocating them if necessary. We must clear - * B_CACHE if these pages are not valid for the - * range covered by the buffer. - */ - - obj = bp->b_bufobj->bo_object; - - VM_OBJECT_WLOCK(obj); - while (bp->b_npages < desiredpages) { - vm_page_t m; - - /* - * We must allocate system pages since blocking - * here could interfere with paging I/O, no - * matter which process we are. - * - * Only exclusive busy can be tested here. - * Blocking on shared busy might lead to - * deadlocks once allocbuf() is called after - * pages are vfs_busy_pages(). - */ - m = vm_page_grab(obj, OFF_TO_IDX(bp->b_offset) + - bp->b_npages, VM_ALLOC_NOBUSY | - VM_ALLOC_SYSTEM | VM_ALLOC_WIRED | - VM_ALLOC_IGN_SBUSY | - VM_ALLOC_COUNT(desiredpages - bp->b_npages)); - if (m->valid == 0) - bp->b_flags &= ~B_CACHE; - bp->b_pages[bp->b_npages] = m; - ++bp->b_npages; - } - - /* - * Step 2. We've loaded the pages into the buffer, - * we have to figure out if we can still have B_CACHE - * set. Note that B_CACHE is set according to the - * byte-granular range ( bcount and size ), new the - * aligned range ( newbsize ). - * - * The VM test is against m->valid, which is DEV_BSIZE - * aligned. Needless to say, the validity of the data - * needs to also be DEV_BSIZE aligned. Note that this - * fails with NFS if the server or some other client - * extends the file's EOF. If our buffer is resized, - * B_CACHE may remain set! XXX - */ - - toff = bp->b_bcount; - tinc = PAGE_SIZE - ((bp->b_offset + toff) & PAGE_MASK); - - while ((bp->b_flags & B_CACHE) && toff < size) { - vm_pindex_t pi; - - if (tinc > (size - toff)) - tinc = size - toff; - - pi = ((bp->b_offset & PAGE_MASK) + toff) >> - PAGE_SHIFT; - - vfs_buf_test_cache( - bp, - bp->b_offset, - toff, - tinc, - bp->b_pages[pi] - ); - toff += tinc; - tinc = PAGE_SIZE; - } - VM_OBJECT_WUNLOCK(obj); - - /* - * Step 3, fixup the KVA pmap. - */ - if (buf_mapped(bp)) - bpmap_qenter(bp); - else - BUF_CHECK_UNMAPPED(bp); - } - } - /* Record changes in allocation size. */ - if (bp->b_bufsize != newbsize) + if (newbsize < bp->b_bufsize) + vfs_vmio_truncate(bp, desiredpages); + /* XXX This looks as if it should be newbsize > b_bufsize */ + else if (size > bp->b_bcount) + vfs_vmio_extend(bp, desiredpages, size); bufspaceadjust(bp, newbsize); + } bp->b_bcount = size; /* requested buffer size. */ return 1; } @@ -3833,87 +3868,16 @@ bufdone_finish(struct buf *bp) buf_complete(bp); if (bp->b_flags & B_VMIO) { - vm_ooffset_t foff; - vm_page_t m; - vm_object_t obj; - struct vnode *vp; - int bogus, i, iosize; - - obj = bp->b_bufobj->bo_object; - KASSERT(obj->paging_in_progress >= bp->b_npages, - ("biodone_finish: paging in progress(%d) < b_npages(%d)", - obj->paging_in_progress, bp->b_npages)); - - vp = bp->b_vp; - KASSERT(vp->v_holdcnt > 0, - ("biodone_finish: vnode %p has zero hold count", vp)); - KASSERT(vp->v_object != NULL, - ("biodone_finish: vnode %p has no vm_object", vp)); - - foff = bp->b_offset; - KASSERT(bp->b_offset != NOOFFSET, - ("biodone_finish: bp %p has no buffer offset", bp)); - /* * Set B_CACHE if the op was a normal read and no error * occured. B_CACHE is set for writes in the b*write() * routines. */ - iosize = bp->b_bcount - bp->b_resid; if (bp->b_iocmd == BIO_READ && !(bp->b_flags & (B_INVAL|B_NOCACHE)) && - !(bp->b_ioflags & BIO_ERROR)) { + !(bp->b_ioflags & BIO_ERROR)) bp->b_flags |= B_CACHE; - } - bogus = 0; - VM_OBJECT_WLOCK(obj); - for (i = 0; i < bp->b_npages; i++) { - int bogusflag = 0; - int resid; - - resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff; - if (resid > iosize) - resid = iosize; - - /* - * cleanup bogus pages, restoring the originals - */ - m = bp->b_pages[i]; - if (m == bogus_page) { - bogus = bogusflag = 1; - m = vm_page_lookup(obj, OFF_TO_IDX(foff)); - if (m == NULL) - panic("biodone: page disappeared!"); - bp->b_pages[i] = m; - } - KASSERT(OFF_TO_IDX(foff) == m->pindex, - ("biodone_finish: foff(%jd)/pindex(%ju) mismatch", - (intmax_t)foff, (uintmax_t)m->pindex)); - - /* - * In the write case, the valid and clean bits are - * already changed correctly ( see bdwrite() ), so we - * only need to do this here in the read case. - */ - if ((bp->b_iocmd == BIO_READ) && !bogusflag && resid > 0) { - KASSERT((m->dirty & vm_page_bits(foff & - PAGE_MASK, resid)) == 0, ("bufdone_finish:" - " page %p has unexpected dirty bits", m)); - vfs_page_set_valid(bp, foff, m); - } - - vm_page_sunbusy(m); - vm_object_pip_subtract(obj, 1); - foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK; - iosize -= resid; - } - vm_object_pip_wakeupn(obj, 0); - VM_OBJECT_WUNLOCK(obj); - if (bogus && buf_mapped(bp)) { - BUF_CHECK_MAPPED(bp); - pmap_qenter(trunc_page((vm_offset_t)bp->b_data), - bp->b_pages, bp->b_npages); - } + vfs_vmio_iodone(bp); } /* @@ -3921,9 +3885,9 @@ bufdone_finish(struct buf *bp) * will do a wakeup there if necessary - so no need to do a wakeup * here in the async case. The sync case always needs to do a wakeup. */ - if (bp->b_flags & B_ASYNC) { - if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_RELBUF)) || (bp->b_ioflags & BIO_ERROR)) + if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_RELBUF)) || + (bp->b_ioflags & BIO_ERROR)) brelse(bp); else bqrelse(bp); diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 7ede15e..b842319 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -991,7 +991,6 @@ nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp) { struct sockaddr_in6 sin6; struct llentry *ln; - int llflags; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); @@ -1000,8 +999,7 @@ nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp) IF_AFDATA_LOCK_ASSERT(ifp); - llflags = (flags & ND6_EXCLUSIVE) ? LLE_EXCLUSIVE : 0; - ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6); + ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6); return (ln); } @@ -1331,7 +1329,7 @@ nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force) ifp = rt->rt_ifp; IF_AFDATA_RLOCK(ifp); - ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL); + ln = nd6_lookup(dst6, LLE_EXCLUSIVE, NULL); IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) return; @@ -1741,13 +1739,13 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, * Spec says nothing in sections for RA, RS and NA. There's small * description on it in NS section (RFC 2461 7.2.3). */ - flags = lladdr ? ND6_EXCLUSIVE : 0; + flags = lladdr ? LLE_EXCLUSIVE : 0; IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(from, flags, ifp); IF_AFDATA_RUNLOCK(ifp); is_newentry = 0; if (ln == NULL) { - flags |= ND6_EXCLUSIVE; + flags |= LLE_EXCLUSIVE; ln = nd6_alloc(from, 0, ifp); if (ln == NULL) return; @@ -1763,7 +1761,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, IF_AFDATA_WLOCK(ifp); LLE_WLOCK(ln); /* Prefer any existing lle over newly-created one */ - ln_tmp = nd6_lookup(from, ND6_EXCLUSIVE, ifp); + ln_tmp = nd6_lookup(from, LLE_EXCLUSIVE, ifp); if (ln_tmp == NULL) lltable_link_entry(LLTABLE6(ifp), ln); IF_AFDATA_WUNLOCK(ifp); @@ -1779,7 +1777,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, } /* do nothing if static ndp is set */ if ((ln->la_flags & LLE_STATIC)) { - if (flags & ND6_EXCLUSIVE) + if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(ln); else LLE_RUNLOCK(ln); @@ -1836,7 +1834,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, if ((type & 0xFF) == ND_REDIRECT && code != ND_REDIRECT_ROUTER) ln->la_flags |= LLE_REDIRECT; - if (flags & ND6_EXCLUSIVE) + if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(ln); else LLE_RUNLOCK(ln); @@ -2053,7 +2051,7 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m, * * Heavy version. * Function assume that destination LLE does not exist, - * is invalid or stale, so ND6_EXCLUSIVE lock needs to be acquired. + * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired. */ static int nd6_resolve_slow(struct ifnet *ifp, struct mbuf *m, @@ -2071,7 +2069,7 @@ nd6_resolve_slow(struct ifnet *ifp, struct mbuf *m, */ if (lle == NULL) { IF_AFDATA_RLOCK(ifp); - lle = nd6_lookup(&dst->sin6_addr, ND6_EXCLUSIVE, ifp); + lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); IF_AFDATA_RUNLOCK(ifp); if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) { /* @@ -2093,7 +2091,7 @@ nd6_resolve_slow(struct ifnet *ifp, struct mbuf *m, IF_AFDATA_WLOCK(ifp); LLE_WLOCK(lle); /* Prefer any existing entry over newly-created one */ - lle_tmp = nd6_lookup(&dst->sin6_addr, ND6_EXCLUSIVE, ifp); + lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp); if (lle_tmp == NULL) lltable_link_entry(LLTABLE6(ifp), lle); IF_AFDATA_WUNLOCK(ifp); diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h index 4765687..304b8fa 100644 --- a/sys/netinet6/nd6.h +++ b/sys/netinet6/nd6.h @@ -89,8 +89,6 @@ struct nd_ifinfo { #define ND6_IFF_NO_PREFER_IFACE 0x80 /* XXX: not related to ND. */ #define ND6_IFF_NO_DAD 0x100 -#define ND6_EXCLUSIVE LLE_EXCLUSIVE - #ifdef _KERNEL #define ND_IFINFO(ifp) \ (((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->nd_ifinfo) |