summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/mp_machdep.c7
-rw-r--r--sys/cddl/compat/opensolaris/sys/atomic.h2
-rw-r--r--sys/conf/files2
-rw-r--r--sys/conf/options1
-rw-r--r--sys/dev/ath/if_ath.c10
-rw-r--r--sys/dev/cardbus/cardbus_cis.c5
-rw-r--r--sys/dev/cxgbe/adapter.h1
-rw-r--r--sys/dev/cxgbe/t4_main.c2
-rw-r--r--sys/dev/cxgbe/t4_sge.c10
-rw-r--r--sys/dev/pccard/pccard.c8
-rw-r--r--sys/dev/pci/pci.c11
-rw-r--r--sys/dev/puc/pucdata.c10
-rw-r--r--sys/fs/nfs/nfs_var.h19
-rw-r--r--sys/fs/nfsclient/nfs_clport.c66
-rw-r--r--sys/fs/nfsclient/nfs_clrpcops.c18
-rw-r--r--sys/fs/nfsclient/nfs_clstate.c32
-rw-r--r--sys/fs/nfsclient/nfs_clvnops.c12
-rw-r--r--sys/i386/i386/mp_machdep.c7
-rw-r--r--sys/ia64/acpica/acpi_machdep.c7
-rw-r--r--sys/ia64/ia64/machdep.c33
-rw-r--r--sys/ia64/ia64/pal.S25
-rw-r--r--sys/kern/subr_rman.c1
-rw-r--r--sys/netinet/icmp6.h21
-rw-r--r--sys/netinet/in_pcb.c229
-rw-r--r--sys/netinet/in_pcb.h86
-rw-r--r--sys/netinet/in_pcbgroup.c457
-rw-r--r--sys/netinet/ip_divert.c3
-rw-r--r--sys/netinet/ipfw/ip_fw2.c4
-rw-r--r--sys/netinet/ipfw/ip_fw_sockopt.c7
-rw-r--r--sys/netinet/raw_ip.c3
-rw-r--r--sys/netinet/tcp_subr.c3
-rw-r--r--sys/netinet/tcp_syncache.c9
-rw-r--r--sys/netinet/udp_usrreq.c3
-rw-r--r--sys/netinet6/in6.c26
-rw-r--r--sys/netinet6/in6.h5
-rw-r--r--sys/netinet6/in6_pcb.c167
-rw-r--r--sys/netinet6/in6_pcb.h10
-rw-r--r--sys/netinet6/in6_pcbgroup.c103
-rw-r--r--sys/netinet6/in6_proto.c11
-rw-r--r--sys/netinet6/ip6_var.h5
-rw-r--r--sys/netinet6/nd6.c48
-rw-r--r--sys/netinet6/nd6.h1
-rw-r--r--sys/netinet6/nd6_nbr.c14
-rw-r--r--sys/netinet6/nd6_rtr.c40
-rw-r--r--sys/netinet6/send.h2
-rw-r--r--sys/sys/mbuf.h40
-rw-r--r--sys/sys/soundcard.h3
-rw-r--r--sys/ufs/ffs/ffs_alloc.c6
48 files changed, 1425 insertions, 170 deletions
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 9e20f95..d72afd6 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -242,8 +242,11 @@ topo_probe_0x4(void)
* logical processors that belong to the same core
* as BSP thus deducing number of threads per core.
*/
- cpuid_count(0x04, 0, p);
- max_cores = ((p[0] >> 26) & 0x3f) + 1;
+ if (cpu_high >= 0x4) {
+ cpuid_count(0x04, 0, p);
+ max_cores = ((p[0] >> 26) & 0x3f) + 1;
+ } else
+ max_cores = 1;
core_id_bits = mask_width(max_logical/max_cores);
if (core_id_bits < 0)
return;
diff --git a/sys/cddl/compat/opensolaris/sys/atomic.h b/sys/cddl/compat/opensolaris/sys/atomic.h
index af9cc5d..f34d77e 100644
--- a/sys/cddl/compat/opensolaris/sys/atomic.h
+++ b/sys/cddl/compat/opensolaris/sys/atomic.h
@@ -40,8 +40,6 @@
extern void atomic_add_64(volatile uint64_t *target, int64_t delta);
extern void atomic_dec_64(volatile uint64_t *target);
#endif
-#ifndef __LP64__
-#endif
#ifndef __sparc64__
extern uint32_t atomic_cas_32(volatile uint32_t *target, uint32_t cmp,
uint32_t newval);
diff --git a/sys/conf/files b/sys/conf/files
index 59286a5..d654c6f 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2748,6 +2748,7 @@ netinet/ip_gre.c optional gre inet
netinet/ip_id.c optional inet
netinet/in_mcast.c optional inet
netinet/in_pcb.c optional inet | inet6
+netinet/in_pcbgroup.c optional inet pcbgroup | inet6 pcbgroup
netinet/in_proto.c optional inet | inet6 \
compile-with "${NORMAL_C} -I$S/contrib/pf"
netinet/in_rmx.c optional inet
@@ -2825,6 +2826,7 @@ netinet6/in6_gif.c optional gif inet6 | netgraph_gif inet6
netinet6/in6_ifattach.c optional inet6
netinet6/in6_mcast.c optional inet6
netinet6/in6_pcb.c optional inet6
+netinet6/in6_pcbgroup.c optional inet6 pcbgroup
netinet6/in6_proto.c optional inet6
netinet6/in6_rmx.c optional inet6
netinet6/in6_src.c optional inet6
diff --git a/sys/conf/options b/sys/conf/options
index a608d86..ee696a8 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -419,6 +419,7 @@ MROUTING opt_mrouting.h
NCP
NETATALK opt_atalk.h
NFSLOCKD
+PCBGROUP opt_pcbgroup.h
RADIX_MPATH opt_mpath.h
ROUTETABLES opt_route.h
SLIP_IFF_OPTS opt_slip.h
diff --git a/sys/dev/ath/if_ath.c b/sys/dev/ath/if_ath.c
index 451bbeaf..6c7b0e7 100644
--- a/sys/dev/ath/if_ath.c
+++ b/sys/dev/ath/if_ath.c
@@ -3473,7 +3473,15 @@ ath_rx_proc(void *arg, int npending)
if (rs->rs_status & HAL_RXERR_PHY) {
sc->sc_stats.ast_rx_phyerr++;
/* Process DFS radar events */
- ath_dfs_process_phy_err(sc, mtod(m, char *), tsf, rs);
+ if ((rs->rs_phyerr == HAL_PHYERR_RADAR) ||
+ (rs->rs_phyerr == HAL_PHYERR_FALSE_RADAR_EXT)) {
+ /* Since we're touching the frame data, sync it */
+ bus_dmamap_sync(sc->sc_dmat,
+ bf->bf_dmamap,
+ BUS_DMASYNC_POSTREAD);
+ /* Now pass it to the radar processing code */
+ ath_dfs_process_phy_err(sc, mtod(m, char *), tsf, rs);
+ }
/* Be suitably paranoid about receiving phy errors out of the stats array bounds */
if (rs->rs_phyerr < 64)
diff --git a/sys/dev/cardbus/cardbus_cis.c b/sys/dev/cardbus/cardbus_cis.c
index 2cfea19..3352a56 100644
--- a/sys/dev/cardbus/cardbus_cis.c
+++ b/sys/dev/cardbus/cardbus_cis.c
@@ -324,7 +324,7 @@ decode_tuple_bar(device_t cbdev, device_t child, int id,
* hint when the cardbus bridge is a child of pci0 (the main
* bus). The PC Card spec seems to indicate that this should
* only be done on x86 based machines, which suggests that on
- * non-x86 machines the adddresses can be anywhere. Since the
+ * non-x86 machines the addresses can be anywhere. Since the
* hardware can do it on non-x86 machines, it should be able
* to do it on x86 machines too. Therefore, we can and should
* ignore this hint. Furthermore, the PC Card spec recommends
@@ -430,7 +430,6 @@ cardbus_read_tuple_finish(device_t cbdev, device_t child, int rid,
{
if (res != CIS_CONFIG_SPACE) {
bus_release_resource(child, SYS_RES_MEMORY, rid, res);
- bus_delete_resource(child, SYS_RES_MEMORY, rid);
}
}
@@ -467,7 +466,7 @@ cardbus_read_tuple_init(device_t cbdev, device_t child, uint32_t *start,
}
/* allocate the memory space to read CIS */
- res = bus_alloc_resource(child, SYS_RES_MEMORY, rid, 0, ~0, 1,
+ res = bus_alloc_resource_any(child, SYS_RES_MEMORY, rid,
rman_make_alignment_flags(4096) | RF_ACTIVE);
if (res == NULL) {
device_printf(cbdev, "Unable to allocate resource "
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index 58ff04e..0b33b67 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -396,6 +396,7 @@ struct sge_ctrlq {
struct sge {
uint16_t timer_val[SGE_NTIMERS];
uint8_t counter_val[SGE_NCOUNTERS];
+ int fl_starve_threshold;
int nrxq; /* total rx queues (all ports and the rest) */
int ntxq; /* total tx queues (all ports and the rest) */
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 165a677..90e5001 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -492,6 +492,8 @@ t4_attach(device_t dev)
V_RXTSHIFTMAXR2(15) | V_PERSHIFTBACKOFFMAX(8) | V_PERSHIFTMAX(8) |
V_KEEPALIVEMAXR1(4) | V_KEEPALIVEMAXR2(9));
t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
+ t4_set_reg_field(sc, A_TP_PARA_REG3, F_TUNNELCNGDROP0 |
+ F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 | F_TUNNELCNGDROP3, 0);
setup_memwin(sc);
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index f8efda5..b3e3567 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -203,6 +203,9 @@ t4_sge_init(struct adapter *sc)
FL_BUF_SIZE(i));
}
+ i = t4_read_reg(sc, A_SGE_CONM_CTRL);
+ s->fl_starve_threshold = G_EGRTHRESHOLD(i) * 2 + 1;
+
t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD,
V_THRESHOLD_0(s->counter_val[0]) |
V_THRESHOLD_1(s->counter_val[1]) |
@@ -1233,7 +1236,8 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
sc->sge.eqmap[cntxt_id] = (void *)fl;
FL_LOCK(fl);
- refill_fl(sc, fl, -1, 8);
+ /* Just enough to make sure it doesn't starve right away. */
+ refill_fl(sc, fl, roundup(sc->sge.fl_starve_threshold, 8), 8);
FL_UNLOCK(fl);
}
@@ -1389,6 +1393,10 @@ alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx)
if (rc != 0)
return (rc);
+ FL_LOCK(&rxq->fl);
+ refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8, 8);
+ FL_UNLOCK(&rxq->fl);
+
#ifdef INET
rc = tcp_lro_init(&rxq->lro);
if (rc != 0)
diff --git a/sys/dev/pccard/pccard.c b/sys/dev/pccard/pccard.c
index 00cd1dc..1de571c 100644
--- a/sys/dev/pccard/pccard.c
+++ b/sys/dev/pccard/pccard.c
@@ -1405,8 +1405,8 @@ pccard_ccr_read_impl(device_t brdev, device_t child, uint32_t offset,
struct pccard_ivar *devi = PCCARD_IVAR(child);
*val = pccard_ccr_read(devi->pf, offset);
- device_printf(child, "ccr_read of %#x (%#x) is %#x\n", offset,
- devi->pf->pf_ccr_offset, *val);
+ DEVPRINTF((child, "ccr_read of %#x (%#x) is %#x\n", offset,
+ devi->pf->pf_ccr_offset, *val));
return 0;
}
@@ -1421,8 +1421,8 @@ pccard_ccr_write_impl(device_t brdev, device_t child, uint32_t offset,
* Can't use pccard_ccr_write since client drivers may access
* registers not contained in the 'mask' if they are non-standard.
*/
- device_printf(child, "ccr_write of %#x to %#x (%#x)\n", val, offset,
- devi->pf->pf_ccr_offset);
+ DEVPRINTF((child, "ccr_write of %#x to %#x (%#x)\n", val, offset,
+ devi->pf->pf_ccr_offset));
bus_space_write_1(pf->pf_ccrt, pf->pf_ccrh, pf->pf_ccr_offset + offset,
val);
return 0;
diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
index 22046c1..9cd5a1c 100644
--- a/sys/dev/pci/pci.c
+++ b/sys/dev/pci/pci.c
@@ -2576,6 +2576,17 @@ pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
uint16_t cmd;
struct resource *res;
+ /*
+ * The BAR may already exist if the device is a CardBus card
+ * whose CIS is stored in this BAR.
+ */
+ pm = pci_find_bar(dev, reg);
+ if (pm != NULL) {
+ maprange = pci_maprange(pm->pm_value);
+ barlen = maprange == 64 ? 2 : 1;
+ return (barlen);
+ }
+
pci_read_bar(dev, reg, &map, &testval);
if (PCI_BAR_MEM(map)) {
type = SYS_RES_MEMORY;
diff --git a/sys/dev/puc/pucdata.c b/sys/dev/puc/pucdata.c
index a56971e..2b38d9b 100644
--- a/sys/dev/puc/pucdata.c
+++ b/sys/dev/puc/pucdata.c
@@ -51,12 +51,12 @@ static puc_config_f puc_config_amc;
static puc_config_f puc_config_diva;
static puc_config_f puc_config_exar;
static puc_config_f puc_config_icbook;
+static puc_config_f puc_config_oxford_pcie;
static puc_config_f puc_config_quatech;
static puc_config_f puc_config_syba;
static puc_config_f puc_config_siig;
static puc_config_f puc_config_timedia;
static puc_config_f puc_config_titan;
-static puc_config_f puc_config_oxford_pcie;
const struct puc_cfg puc_pci_devices[] = {
@@ -1366,14 +1366,12 @@ puc_config_oxford_pcie(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port,
bar = puc_get_bar(sc, cfg->rid);
if (bar == NULL)
return (ENXIO);
-
for (idx = 0; idx < sc->sc_nports; idx++) {
- value = bus_read_1(bar->b_res, 0x1000 + (idx << 9)
- + 0x92);
+ value = bus_read_1(bar->b_res, 0x1000 + (idx << 9) +
+ 0x92);
bus_write_1(bar->b_res, 0x1000 + (idx << 9) + 0x92,
- value | 0x10);
+ value | 0x10);
}
-
return (0);
case PUC_CFG_GET_LEN:
*res = 0x200;
diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index 8ed60a7..5f944b5 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -401,10 +401,10 @@ int nfsrpc_readdirplus(vnode_t, struct uio *, nfsuint64 *,
int nfsrpc_commit(vnode_t, u_quad_t, int, struct ucred *,
NFSPROC_T *, u_char *, struct nfsvattr *, int *, void *);
int nfsrpc_advlock(vnode_t, off_t, int, struct flock *, int,
- struct ucred *, NFSPROC_T *);
+ struct ucred *, NFSPROC_T *, void *, int);
int nfsrpc_lockt(struct nfsrv_descript *, vnode_t,
struct nfsclclient *, u_int64_t, u_int64_t, struct flock *,
- struct ucred *, NFSPROC_T *);
+ struct ucred *, NFSPROC_T *, void *, int);
int nfsrpc_lock(struct nfsrv_descript *, struct nfsmount *, vnode_t,
u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t,
u_int64_t, short, struct ucred *, NFSPROC_T *, int);
@@ -439,16 +439,16 @@ struct nfsclclient *nfscl_findcl(struct nfsmount *);
void nfscl_clientrelease(struct nfsclclient *);
void nfscl_freelock(struct nfscllock *, int);
int nfscl_getbytelock(vnode_t, u_int64_t, u_int64_t, short,
- struct ucred *, NFSPROC_T *, struct nfsclclient *, int, u_int8_t *,
- u_int8_t *, struct nfscllockowner **, int *, int *);
+ struct ucred *, NFSPROC_T *, struct nfsclclient *, int, void *, int,
+ u_int8_t *, u_int8_t *, struct nfscllockowner **, int *, int *);
int nfscl_relbytelock(vnode_t, u_int64_t, u_int64_t,
struct ucred *, NFSPROC_T *, int, struct nfsclclient *,
- struct nfscllockowner **, int *);
+ void *, int, struct nfscllockowner **, int *);
int nfscl_checkwritelocked(vnode_t, struct flock *,
- struct ucred *, NFSPROC_T *);
+ struct ucred *, NFSPROC_T *, void *, int);
void nfscl_lockrelease(struct nfscllockowner *, int, int);
void nfscl_fillclid(u_int64_t, char *, u_int8_t *, u_int16_t);
-void nfscl_filllockowner(NFSPROC_T *, u_int8_t *);
+void nfscl_filllockowner(void *, u_int8_t *, int);
void nfscl_freeopen(struct nfsclopen *, int);
void nfscl_umount(struct nfsmount *, NFSPROC_T *);
void nfscl_renewthread(struct nfsclclient *, NFSPROC_T *);
@@ -466,9 +466,10 @@ void nfscl_lockexcl(struct nfsv4lock *, void *);
void nfscl_lockunlock(struct nfsv4lock *);
void nfscl_lockderef(struct nfsv4lock *);
void nfscl_docb(struct nfsrv_descript *, NFSPROC_T *);
-void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *);
+void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *, void *,
+ int);
int nfscl_lockt(vnode_t, struct nfsclclient *, u_int64_t,
- u_int64_t, struct flock *, NFSPROC_T *);
+ u_int64_t, struct flock *, NFSPROC_T *, void *, int);
int nfscl_mustflush(vnode_t);
int nfscl_nodeleg(vnode_t, int);
int nfscl_removedeleg(vnode_t, NFSPROC_T *, nfsv4stateid_t *);
diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c
index 0c3a4c9..4d88bd2 100644
--- a/sys/fs/nfsclient/nfs_clport.c
+++ b/sys/fs/nfsclient/nfs_clport.c
@@ -500,7 +500,7 @@ nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen)
* Fill in a lock owner name. For now, pid + the process's creation time.
*/
void
-nfscl_filllockowner(struct thread *td, u_int8_t *cp)
+nfscl_filllockowner(void *id, u_int8_t *cp, int flags)
{
union {
u_int32_t lval;
@@ -508,37 +508,35 @@ nfscl_filllockowner(struct thread *td, u_int8_t *cp)
} tl;
struct proc *p;
-if (td == NULL) {
- printf("NULL td\n");
- bzero(cp, 12);
- return;
-}
- p = td->td_proc;
-if (p == NULL) {
- printf("NULL pid\n");
- bzero(cp, 12);
- return;
-}
- tl.lval = p->p_pid;
- *cp++ = tl.cval[0];
- *cp++ = tl.cval[1];
- *cp++ = tl.cval[2];
- *cp++ = tl.cval[3];
-if (p->p_stats == NULL) {
- printf("pstats null\n");
- bzero(cp, 8);
- return;
-}
- tl.lval = p->p_stats->p_start.tv_sec;
- *cp++ = tl.cval[0];
- *cp++ = tl.cval[1];
- *cp++ = tl.cval[2];
- *cp++ = tl.cval[3];
- tl.lval = p->p_stats->p_start.tv_usec;
- *cp++ = tl.cval[0];
- *cp++ = tl.cval[1];
- *cp++ = tl.cval[2];
- *cp = tl.cval[3];
+ if (id == NULL) {
+ printf("NULL id\n");
+ bzero(cp, NFSV4CL_LOCKNAMELEN);
+ return;
+ }
+ if ((flags & F_POSIX) != 0) {
+ p = (struct proc *)id;
+ tl.lval = p->p_pid;
+ *cp++ = tl.cval[0];
+ *cp++ = tl.cval[1];
+ *cp++ = tl.cval[2];
+ *cp++ = tl.cval[3];
+ tl.lval = p->p_stats->p_start.tv_sec;
+ *cp++ = tl.cval[0];
+ *cp++ = tl.cval[1];
+ *cp++ = tl.cval[2];
+ *cp++ = tl.cval[3];
+ tl.lval = p->p_stats->p_start.tv_usec;
+ *cp++ = tl.cval[0];
+ *cp++ = tl.cval[1];
+ *cp++ = tl.cval[2];
+ *cp = tl.cval[3];
+ } else if ((flags & F_FLOCK) != 0) {
+ bcopy(&id, cp, sizeof(id));
+ bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id));
+ } else {
+ printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n");
+ bzero(cp, NFSV4CL_LOCKNAMELEN);
+ }
}
/*
@@ -943,6 +941,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
sad.sin_family = AF_INET;
sad.sin_len = sizeof (struct sockaddr_in);
sad.sin_addr.s_addr = sin->sin_addr.s_addr;
+ CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred));
rt = rtalloc1((struct sockaddr *)&sad, 0, 0UL);
if (rt != NULL) {
if (rt->rt_ifp != NULL &&
@@ -956,6 +955,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
}
RTFREE_LOCKED(rt);
}
+ CURVNET_RESTORE();
#ifdef INET6
} else if (nmp->nm_nam->sa_family == AF_INET6) {
struct sockaddr_in6 sad6, *sin6;
@@ -966,6 +966,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
sad6.sin6_family = AF_INET6;
sad6.sin6_len = sizeof (struct sockaddr_in6);
sad6.sin6_addr = sin6->sin6_addr;
+ CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred));
rt = rtalloc1((struct sockaddr *)&sad6, 0, 0UL);
if (rt != NULL) {
if (rt->rt_ifp != NULL &&
@@ -980,6 +981,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
}
RTFREE_LOCKED(rt);
}
+ CURVNET_RESTORE();
#endif
}
return (retp);
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index 0fc9bfd..5d83d0b 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -3459,7 +3459,7 @@ nfsmout:
*/
APPLESTATIC int
nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
- int reclaim, struct ucred *cred, NFSPROC_T *p)
+ int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
{
struct nfscllockowner *lp;
struct nfsclclient *clp;
@@ -3511,11 +3511,11 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
error = nfscl_getcl(vp, cred, p, &clp);
if (error)
return (error);
- error = nfscl_lockt(vp, clp, off, len, fl, p);
+ error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
if (!error) {
clidrev = clp->nfsc_clientidrev;
error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
- p);
+ p, id, flags);
} else if (error == -1) {
error = 0;
}
@@ -3530,7 +3530,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
return (error);
do {
error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
- clp, &lp, &dorpc);
+ clp, id, flags, &lp, &dorpc);
/*
* If it returns a NULL lp, we're done.
*/
@@ -3538,7 +3538,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
if (callcnt == 0)
nfscl_clientrelease(clp);
else
- nfscl_releasealllocks(clp, vp, p);
+ nfscl_releasealllocks(clp, vp, p, id, flags);
return (error);
}
if (nmp->nm_clp != NULL)
@@ -3572,10 +3572,10 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
}
callcnt++;
} while (error == 0 && nd->nd_repstat == 0);
- nfscl_releasealllocks(clp, vp, p);
+ nfscl_releasealllocks(clp, vp, p, id, flags);
} else if (op == F_SETLK) {
error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
- NULL, 0, NULL, NULL, &lp, &newone, &donelocally);
+ NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
if (error || donelocally) {
return (error);
}
@@ -3625,7 +3625,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
APPLESTATIC int
nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
- struct ucred *cred, NFSPROC_T *p)
+ struct ucred *cred, NFSPROC_T *p, void *id, int flags)
{
u_int32_t *tl;
int error, type, size;
@@ -3643,7 +3643,7 @@ nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
tl += 2;
*tl++ = clp->nfsc_clientid.lval[0];
*tl = clp->nfsc_clientid.lval[1];
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(id, own, flags);
(void) nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN);
error = nfscl_request(nd, vp, p, cred, NULL);
if (error)
diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c
index 86d71b6..aa81437 100644
--- a/sys/fs/nfsclient/nfs_clstate.c
+++ b/sys/fs/nfsclient/nfs_clstate.c
@@ -226,7 +226,7 @@ nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
* If none found, add the new one or return error, depending upon
* "create".
*/
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(p->td_proc, own, F_POSIX);
NFSLOCKCLSTATE();
dp = NULL;
/* First check the delegation list */
@@ -521,7 +521,7 @@ nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
* If p != NULL, we want to search the parentage tree
* for a matching OpenOwner and use that.
*/
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(p->td_proc, own, F_POSIX);
error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, NULL, p,
mode, NULL, &op);
if (error == 0) {
@@ -596,7 +596,7 @@ nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen,
op = NULL;
while (op == NULL && (nproc != NULL || rown != NULL)) {
if (nproc != NULL) {
- nfscl_filllockowner(nproc, own);
+ nfscl_filllockowner(nproc->td_proc, own, F_POSIX);
ownp = own;
} else {
ownp = rown;
@@ -881,7 +881,7 @@ nfscl_clientrelease(struct nfsclclient *clp)
APPLESTATIC int
nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp,
- int recovery, u_int8_t *rownp, u_int8_t *ropenownp,
+ int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp,
struct nfscllockowner **lpp, int *newonep, int *donelocallyp)
{
struct nfscllockowner *lp;
@@ -942,7 +942,7 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
if (recovery) {
ownp = rownp;
} else {
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(id, own, flags);
ownp = own;
}
if (!recovery) {
@@ -1079,7 +1079,8 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
APPLESTATIC int
nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
__unused struct ucred *cred, NFSPROC_T *p, int callcnt,
- struct nfsclclient *clp, struct nfscllockowner **lpp, int *dorpcp)
+ struct nfsclclient *clp, void *id, int flags,
+ struct nfscllockowner **lpp, int *dorpcp)
{
struct nfscllockowner *lp;
struct nfsclowner *owp;
@@ -1116,7 +1117,7 @@ nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
*other_lop = *nlop;
}
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(id, own, flags);
dp = NULL;
NFSLOCKCLSTATE();
if (callcnt == 0)
@@ -1188,7 +1189,8 @@ nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
* Release all lockowners marked in progess for this process and file.
*/
APPLESTATIC void
-nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p)
+nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p,
+ void *id, int flags)
{
struct nfsclowner *owp;
struct nfsclopen *op;
@@ -1197,7 +1199,7 @@ nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p)
u_int8_t own[NFSV4CL_LOCKNAMELEN];
np = VTONFS(vp);
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(id, own, flags);
NFSLOCKCLSTATE();
LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
@@ -1226,7 +1228,7 @@ nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p)
*/
APPLESTATIC int
nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
- struct ucred *cred, NFSPROC_T *p)
+ struct ucred *cred, NFSPROC_T *p, void *id, int flags)
{
struct nfsclowner *owp;
struct nfscllockowner *lp;
@@ -1266,7 +1268,7 @@ nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
error = nfscl_getcl(vp, cred, p, &clp);
if (error)
return (1);
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(id, own, flags);
NFSLOCKCLSTATE();
/*
@@ -1641,7 +1643,7 @@ nfscl_cleanup(NFSPROC_T *p)
if (!nfscl_inited)
return;
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(p->td_proc, own, F_POSIX);
NFSLOCKCLSTATE();
/*
@@ -3322,7 +3324,7 @@ nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop,
*/
APPLESTATIC int
nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
- u_int64_t len, struct flock *fl, NFSPROC_T *p)
+ u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags)
{
struct nfscllock *lop, nlck;
struct nfscldeleg *dp;
@@ -3340,7 +3342,7 @@ nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
return (NFSERR_INVAL);
}
np = VTONFS(vp);
- nfscl_filllockowner(p, own);
+ nfscl_filllockowner(id, own, flags);
NFSLOCKCLSTATE();
dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
@@ -3615,7 +3617,7 @@ nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
off = lop->nfslo_first;
len = lop->nfslo_end - lop->nfslo_first;
error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p,
- clp, 1, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone,
+ clp, 1, NULL, 0, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone,
&donelocally);
if (error || donelocally)
return (error);
diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c
index 984724d..3e1c66d 100644
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@@ -2884,8 +2884,11 @@ nfs_advlock(struct vop_advlock_args *ap)
int ret, error = EOPNOTSUPP;
u_quad_t size;
- if (NFS_ISV4(vp) && (ap->a_flags & F_POSIX)) {
- cred = p->p_ucred;
+ if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) {
+ if ((ap->a_flags & F_POSIX) != 0)
+ cred = p->p_ucred;
+ else
+ cred = td->td_ucred;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (vp->v_iflag & VI_DOOMED) {
VOP_UNLOCK(vp, 0);
@@ -2898,7 +2901,8 @@ nfs_advlock(struct vop_advlock_args *ap)
* RFC3530 Sec. 9.3.2.
*/
if (ap->a_op == F_UNLCK &&
- nfscl_checkwritelocked(vp, ap->a_fl, cred, td))
+ nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id,
+ ap->a_flags))
(void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0);
/*
@@ -2907,7 +2911,7 @@ nfs_advlock(struct vop_advlock_args *ap)
*/
do {
ret = nfsrpc_advlock(vp, np->n_size, ap->a_op,
- ap->a_fl, 0, cred, td);
+ ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags);
if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
ap->a_op == F_SETLK) {
VOP_UNLOCK(vp, 0);
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index fbaae89..be603eb 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -290,8 +290,11 @@ topo_probe_0x4(void)
* logical processors that belong to the same core
* as BSP thus deducing number of threads per core.
*/
- cpuid_count(0x04, 0, p);
- max_cores = ((p[0] >> 26) & 0x3f) + 1;
+ if (cpu_high >= 0x4) {
+ cpuid_count(0x04, 0, p);
+ max_cores = ((p[0] >> 26) & 0x3f) + 1;
+ } else
+ max_cores = 1;
core_id_bits = mask_width(max_logical/max_cores);
if (core_id_bits < 0)
return;
diff --git a/sys/ia64/acpica/acpi_machdep.c b/sys/ia64/acpica/acpi_machdep.c
index b7b612f..1466cfe 100644
--- a/sys/ia64/acpica/acpi_machdep.c
+++ b/sys/ia64/acpica/acpi_machdep.c
@@ -56,7 +56,14 @@ acpi_machdep_quirks(int *quirks)
void
acpi_cpu_c1()
{
+#ifdef INVARIANTS
+ register_t ie;
+
+ ie = intr_disable();
+ KASSERT(ie == 0, ("%s called with interrupts enabled\n", __func__));
+#endif
ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
+ ia64_enable_intr();
}
void *
diff --git a/sys/ia64/ia64/machdep.c b/sys/ia64/ia64/machdep.c
index 7252865..fc7df7a 100644
--- a/sys/ia64/ia64/machdep.c
+++ b/sys/ia64/ia64/machdep.c
@@ -411,12 +411,34 @@ cpu_halt()
void
cpu_idle(int busy)
{
- struct ia64_pal_result res;
+ register_t ie;
- if (cpu_idle_hook != NULL)
+#if 0
+ if (!busy) {
+ critical_enter();
+ cpu_idleclock();
+ }
+#endif
+
+ ie = intr_disable();
+ KASSERT(ie != 0, ("%s called with interrupts disabled\n", __func__));
+
+ if (sched_runnable())
+ ia64_enable_intr();
+ else if (cpu_idle_hook != NULL) {
(*cpu_idle_hook)();
- else
- res = ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
+ /* The hook must enable interrupts! */
+ } else {
+ ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
+ ia64_enable_intr();
+ }
+
+#if 0
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
+ }
+#endif
}
int
@@ -644,9 +666,12 @@ calculate_frequencies(void)
{
struct ia64_sal_result sal;
struct ia64_pal_result pal;
+ register_t ie;
+ ie = intr_disable();
sal = ia64_sal_entry(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0);
pal = ia64_call_pal_static(PAL_FREQ_RATIOS, 0, 0, 0);
+ intr_restore(ie);
if (sal.sal_status == 0 && pal.pal_status == 0) {
if (bootverbose) {
diff --git a/sys/ia64/ia64/pal.S b/sys/ia64/ia64/pal.S
index 2f0d0da..2e3f4cd 100644
--- a/sys/ia64/ia64/pal.S
+++ b/sys/ia64/ia64/pal.S
@@ -38,43 +38,40 @@ ia64_pal_entry: .quad 0
* u_int64_t arg1, u_int64_t arg2, u_int64_t arg3)
*/
ENTRY(ia64_call_pal_static, 4)
-
- .regstk 4,5,0,0
+
+ .regstk 4,4,0,0
palret = loc0
entry = loc1
rpsave = loc2
pfssave = loc3
-psrsave = loc4
- alloc pfssave=ar.pfs,4,5,0,0
+ alloc pfssave=ar.pfs,4,4,0,0
;;
mov rpsave=rp
-
movl entry=@gprel(ia64_pal_entry)
+
1: mov palret=ip // for return address
;;
add entry=entry,gp
- mov psrsave=psr
+ add palret=2f-1b,palret // calculate return address
mov r28=in0 // procedure number
- ;;
- ld8 entry=[entry] // read entry point
mov r29=in1 // copy arguments
mov r30=in2
mov r31=in3
;;
- mov b6=entry
- add palret=2f-1b,palret // calculate return address
- ;;
+ ld8 entry=[entry] // read entry point
mov b0=palret
- rsm psr.i // disable interrupts
+ ;;
+ mov b6=entry
;;
br.cond.sptk b6 // call into firmware
-2: mov psr.l=psrsave
+ ;;
+2:
mov rp=rpsave
mov ar.pfs=pfssave
;;
- srlz.d
br.ret.sptk rp
+ ;;
END(ia64_call_pal_static)
/*
diff --git a/sys/kern/subr_rman.c b/sys/kern/subr_rman.c
index 3014b19..abd72c0 100644
--- a/sys/kern/subr_rman.c
+++ b/sys/kern/subr_rman.c
@@ -839,6 +839,7 @@ int_rman_release_resource(struct rman *rm, struct resource_i *r)
* without freeing anything.
*/
r->r_flags &= ~RF_ALLOCATED;
+ r->r_dev = NULL;
return 0;
}
diff --git a/sys/netinet/icmp6.h b/sys/netinet/icmp6.h
index 5faae7c..c9da86a 100644
--- a/sys/netinet/icmp6.h
+++ b/sys/netinet/icmp6.h
@@ -297,8 +297,9 @@ struct nd_opt_hdr { /* Neighbor discovery option header */
#define ND_OPT_PREFIX_INFORMATION 3
#define ND_OPT_REDIRECTED_HEADER 4
#define ND_OPT_MTU 5
-
-#define ND_OPT_ROUTE_INFO 200 /* draft-ietf-ipngwg-router-preference, not officially assigned yet */
+#define ND_OPT_ROUTE_INFO 24 /* RFC 4191 */
+#define ND_OPT_RDNSS 25 /* RFC 6016 */
+#define ND_OPT_DNSSL 31 /* RFC 6016 */
struct nd_opt_prefix_info { /* prefix information */
u_int8_t nd_opt_pi_type;
@@ -338,6 +339,22 @@ struct nd_opt_route_info { /* route info */
/* prefix follows */
} __packed;
+struct nd_opt_rdnss { /* RDNSS option (RFC 6106) */
+ u_int8_t nd_opt_rdnss_type;
+ u_int8_t nd_opt_rdnss_len;
+ u_int16_t nd_opt_rdnss_reserved;
+ u_int32_t nd_opt_rdnss_lifetime;
+ /* followed by list of recursive DNS servers */
+} __packed;
+
+struct nd_opt_dnssl { /* DNSSL option (RFC 6106) */
+ u_int8_t nd_opt_dnssl_type;
+ u_int8_t nd_opt_dnssl_len;
+ u_int16_t nd_opt_dnssl_reserved;
+ u_int32_t nd_opt_dnssl_lifetime;
+ /* followed by list of DNS search domains */
+} __packed;
+
/*
* icmp6 namelookup
*/
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 4aa998f..4eb309a 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "opt_ipsec.h"
#include "opt_inet.h"
#include "opt_inet6.h"
+#include "opt_pcbgroup.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -212,7 +213,7 @@ void
in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini,
- uint32_t inpcbzone_flags)
+ uint32_t inpcbzone_flags, u_int hashfields)
{
INP_INFO_LOCK_INIT(pcbinfo, name);
@@ -227,6 +228,9 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
&pcbinfo->ipi_hashmask);
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
&pcbinfo->ipi_porthashmask);
+#ifdef PCBGROUP
+ in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
+#endif
pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
inpcbzone_flags);
@@ -246,6 +250,9 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
pcbinfo->ipi_porthashmask);
+#ifdef PCBGROUP
+ in_pcbgroup_destroy(pcbinfo);
+#endif
uma_zdestroy(pcbinfo->ipi_zone);
INP_HASH_LOCK_DESTROY(pcbinfo);
INP_INFO_LOCK_DESTROY(pcbinfo);
@@ -1053,7 +1060,8 @@ in_pcbdetach(struct inpcb *inp)
* in_pcbref() bumps the reference count on an inpcb in order to maintain
* stability of an inpcb pointer despite the inpcb lock being released. This
* is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
- * but where the inpcb lock is already held.
+ * but where the inpcb lock may already held, or when acquiring a reference
+ * via a pcbgroup.
*
* in_pcbref() should be used only to provide brief memory stability, and
* must always be followed by a call to INP_WLOCK() and in_pcbrele() to
@@ -1223,6 +1231,9 @@ in_pcbdrop(struct inpcb *inp)
}
INP_HASH_WUNLOCK(inp->inp_pcbinfo);
inp->inp_flags &= ~INP_INHASHLIST;
+#ifdef PCBGROUP
+ in_pcbgroup_remove(inp);
+#endif
}
}
@@ -1472,6 +1483,148 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
}
#undef INP_LOOKUP_MAPPED_PCB_COST
+#ifdef PCBGROUP
+/*
+ * Lookup PCB in hash list, using pcbgroup tables.
+ */
+static struct inpcb *
+in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
+ struct in_addr faddr, u_int fport_arg, struct in_addr laddr,
+ u_int lport_arg, int lookupflags, struct ifnet *ifp)
+{
+ struct inpcbhead *head;
+ struct inpcb *inp, *tmpinp;
+ u_short fport = fport_arg, lport = lport_arg;
+
+ /*
+ * First look for an exact match.
+ */
+ tmpinp = NULL;
+ INP_GROUP_LOCK(pcbgroup);
+ head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
+ pcbgroup->ipg_hashmask)];
+ LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+#ifdef INET6
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV4) == 0)
+ continue;
+#endif
+ if (inp->inp_faddr.s_addr == faddr.s_addr &&
+ inp->inp_laddr.s_addr == laddr.s_addr &&
+ inp->inp_fport == fport &&
+ inp->inp_lport == lport) {
+ /*
+ * XXX We should be able to directly return
+ * the inp here, without any checks.
+ * Well unless both bound with SO_REUSEPORT?
+ */
+ if (prison_flag(inp->inp_cred, PR_IP4))
+ goto found;
+ if (tmpinp == NULL)
+ tmpinp = inp;
+ }
+ }
+ if (tmpinp != NULL) {
+ inp = tmpinp;
+ goto found;
+ }
+
+ /*
+ * Then look for a wildcard match, if requested.
+ */
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+ struct inpcb *local_wild = NULL, *local_exact = NULL;
+#ifdef INET6
+ struct inpcb *local_wild_mapped = NULL;
+#endif
+ struct inpcb *jail_wild = NULL;
+ struct inpcbhead *head;
+ int injail;
+
+ /*
+ * Order of socket selection - we always prefer jails.
+ * 1. jailed, non-wild.
+ * 2. jailed, wild.
+ * 3. non-jailed, non-wild.
+ * 4. non-jailed, wild.
+ */
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
+ 0, pcbinfo->ipi_wildmask)];
+ LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+#ifdef INET6
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV4) == 0)
+ continue;
+#endif
+ if (inp->inp_faddr.s_addr != INADDR_ANY ||
+ inp->inp_lport != lport)
+ continue;
+
+ /* XXX inp locking */
+ if (ifp && ifp->if_type == IFT_FAITH &&
+ (inp->inp_flags & INP_FAITH) == 0)
+ continue;
+
+ injail = prison_flag(inp->inp_cred, PR_IP4);
+ if (injail) {
+ if (prison_check_ip4(inp->inp_cred,
+ &laddr) != 0)
+ continue;
+ } else {
+ if (local_exact != NULL)
+ continue;
+ }
+
+ if (inp->inp_laddr.s_addr == laddr.s_addr) {
+ if (injail)
+ goto found;
+ else
+ local_exact = inp;
+ } else if (inp->inp_laddr.s_addr == INADDR_ANY) {
+#ifdef INET6
+ /* XXX inp locking, NULL check */
+ if (inp->inp_vflag & INP_IPV6PROTO)
+ local_wild_mapped = inp;
+ else
+#endif /* INET6 */
+ if (injail)
+ jail_wild = inp;
+ else
+ local_wild = inp;
+ }
+ } /* LIST_FOREACH */
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = local_exact;
+ if (inp == NULL)
+ inp = local_wild;
+#ifdef INET6
+ if (inp == NULL)
+ inp = local_wild_mapped;
+#endif /* defined(INET6) */
+ if (inp != NULL)
+ goto found;
+ } /* if (lookupflags & INPLOOKUP_WILDCARD) */
+ INP_GROUP_UNLOCK(pcbgroup);
+ return (NULL);
+
+found:
+ in_pcbref(inp);
+ INP_GROUP_UNLOCK(pcbgroup);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking bug", __func__);
+ return (inp);
+}
+#endif /* PCBGROUP */
+
/*
* Lookup PCB in hash list, using pcbinfo tables. This variation assumes
* that the caller has locked the hash list, and will not perform any further
@@ -1636,17 +1789,30 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
/*
* Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
* from which a pre-calculated hash value may be extracted.
+ *
+ * Possibly more of this logic should be in in_pcbgroup.c.
*/
struct inpcb *
in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp)
{
+#if defined(PCBGROUP)
+ struct inpcbgroup *pcbgroup;
+#endif
KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
+#if defined(PCBGROUP)
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
lookupflags, ifp));
}
@@ -1656,12 +1822,28 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
struct ifnet *ifp, struct mbuf *m)
{
+#ifdef PCBGROUP
+ struct inpcbgroup *pcbgroup;
+#endif
KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
+#ifdef PCBGROUP
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid);
+ if (pcbgroup != NULL)
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr,
+ fport, laddr, lport, lookupflags, ifp));
+ pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
lookupflags, ifp));
}
@@ -1670,8 +1852,8 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
/*
* Insert PCB onto various hash lists.
*/
-int
-in_pcbinshash(struct inpcb *inp)
+static int
+in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
{
struct inpcbhead *pcbhash;
struct inpcbporthead *pcbporthash;
@@ -1721,10 +1903,39 @@ in_pcbinshash(struct inpcb *inp)
LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
inp->inp_flags |= INP_INHASHLIST;
+#ifdef PCBGROUP
+ if (do_pcbgroup_update)
+ in_pcbgroup_update(inp);
+#endif
return (0);
}
/*
+ * For now, there are two public interfaces to insert an inpcb into the hash
+ * lists -- one that does update pcbgroups, and one that doesn't. The latter
+ * is used only in the TCP syncache, where in_pcbinshash is called before the
+ * full 4-tuple is set for the inpcb, and we don't want to install in the
+ * pcbgroup until later.
+ *
+ * XXXRW: This seems like a misfeature. in_pcbinshash should always update
+ * connection groups, and partially initialised inpcbs should not be exposed
+ * to either reservation hash tables or pcbgroups.
+ */
+int
+in_pcbinshash(struct inpcb *inp)
+{
+
+ return (in_pcbinshash_internal(inp, 1));
+}
+
+int
+in_pcbinshash_nopcbgroup(struct inpcb *inp)
+{
+
+ return (in_pcbinshash_internal(inp, 0));
+}
+
+/*
* Move PCB to the proper hash bucket when { faddr, fport } have been
* changed. NOTE: This does not handle the case of the lport changing (the
* hashed port list would have to be updated as well), so the lport must
@@ -1755,6 +1966,13 @@ in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m)
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
+
+#ifdef PCBGROUP
+ if (m != NULL)
+ in_pcbgroup_update_mbuf(inp, m);
+ else
+ in_pcbgroup_update(inp);
+#endif
}
void
@@ -1791,6 +2009,9 @@ in_pcbremlists(struct inpcb *inp)
}
LIST_REMOVE(inp, inp_list);
pcbinfo->ipi_count--;
+#ifdef PCBGROUP
+ in_pcbgroup_remove(inp);
+#endif
}
/*
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 809bc05..a8524be 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -141,6 +141,7 @@ struct icmp6_filter;
*
* Key:
* (c) - Constant after initialization
+ * (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
* (s) - Protected by another subsystem's locks
@@ -160,9 +161,12 @@ struct icmp6_filter;
*/
struct inpcb {
LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */
+ LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
+ struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
+ LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */
struct socket *inp_socket; /* (i) back pointer to socket */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
@@ -272,13 +276,14 @@ struct inpcbport {
* the former covering mutable global fields (such as the global pcb list),
* and the latter covering the hashed lookup tables. The lock order is:
*
- * ipi_lock (before) inpcb locks (before) ipi_hash_lock
+ * ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks}
*
* Locking key:
*
* (c) Constant or nearly constant after initialisation
* (g) Locked by ipi_lock
- * (h) Read using either ipi_hash_lock or inpcb lock; write requires both.
+ * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
+ * (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
@@ -312,7 +317,16 @@ struct inpcbinfo {
struct uma_zone *ipi_zone; /* (c) */
/*
- * Global lock protecting hash lookup tables.
+ * Connection groups associated with this protocol. These fields are
+ * constant, but pcbgroup structures themselves are protected by
+ * per-pcbgroup locks.
+ */
+ struct inpcbgroup *ipi_pcbgroups; /* (c) */
+ u_int ipi_npcbgroups; /* (c) */
+ u_int ipi_hashfields; /* (c) */
+
+ /*
+ * Global lock protecting non-pcbgroup hash lookup tables.
*/
struct rwlock ipi_hash_lock;
@@ -330,6 +344,14 @@ struct inpcbinfo {
u_long ipi_porthashmask; /* (h) */
/*
+ * List of wildcard inpcbs for use with pcbgroups. In the past, was
+ * per-pcbgroup but is now global. All pcbgroup locks must be held
+ * to modify the list, so any is sufficient to read it.
+ */
+ struct inpcbhead *ipi_wildbase; /* (p) */
+ u_long ipi_wildmask; /* (p) */
+
+ /*
* Pointer to network stack instance
*/
struct vnet *ipi_vnet; /* (c) */
@@ -340,6 +362,31 @@ struct inpcbinfo {
void *ipi_pspare[2];
};
+/*
+ * Connection groups hold sets of connections that have similar CPU/thread
+ * affinity. Each connection belongs to exactly one connection group.
+ */
+struct inpcbgroup {
+ /*
+ * Per-connection group hash of inpcbs, hashed by local and foreign
+ * addresses and port numbers.
+ */
+ struct inpcbhead *ipg_hashbase; /* (c) */
+ u_long ipg_hashmask; /* (c) */
+
+ /*
+ * Notional affinity of this pcbgroup.
+ */
+ u_int ipg_cpu; /* (p) */
+
+ /*
+ * Per-connection group lock, not to be confused with ipi_lock.
+ * Protects the hash table hung off the group, but also the global
+ * wildcard list in inpcbinfo.
+ */
+ struct mtx ipg_lock;
+} __aligned(CACHE_LINE_SIZE);
+
#define INP_LOCK_INIT(inp, d, t) \
rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK)
#define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock)
@@ -423,6 +470,14 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \
RA_WLOCKED)
+#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \
+ MTX_DEF | MTX_DUPOK)
+#define INP_GROUP_LOCK_DESTROY(ipg) mtx_destroy(&(ipg)->ipg_lock)
+
+#define INP_GROUP_LOCK(ipg) mtx_lock(&(ipg)->ipg_lock)
+#define INP_GROUP_LOCK_ASSERT(ipg) mtx_assert(&(ipg)->ipg_lock, MA_OWNED)
+#define INP_GROUP_UNLOCK(ipg) mtx_unlock(&(ipg)->ipg_lock)
+
#define INP_PCBHASH(faddr, lport, fport, mask) \
(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
#define INP_PCBPORTHASH(lport, mask) \
@@ -482,6 +537,7 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
*/
#define INP_LLE_VALID 0x00000001 /* cached lle is valid */
#define INP_RT_VALID 0x00000002 /* cached rtentry is valid */
+#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */
/*
* Flags passed to in_pcblookup*() functions.
@@ -500,6 +556,13 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
#define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af)
+/*
+ * Constants for pcbinfo.ipi_hashfields.
+ */
+#define IPI_HASHFIELDS_NONE 0
+#define IPI_HASHFIELDS_2TUPLE 1
+#define IPI_HASHFIELDS_4TUPLE 2
+
#ifdef _KERNEL
VNET_DECLARE(int, ipport_reservedhigh);
VNET_DECLARE(int, ipport_reservedlow);
@@ -531,7 +594,21 @@ VNET_DECLARE(int, ipport_tcpallocs);
void in_pcbinfo_destroy(struct inpcbinfo *);
void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *,
- int, int, char *, uma_init, uma_fini, uint32_t);
+ int, int, char *, uma_init, uma_fini, uint32_t, u_int);
+
+struct inpcbgroup *
+ in_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t);
+struct inpcbgroup *
+ in_pcbgroup_byinpcb(struct inpcb *);
+struct inpcbgroup *
+ in_pcbgroup_bytuple(struct inpcbinfo *, struct in_addr, u_short,
+ struct in_addr, u_short);
+void in_pcbgroup_destroy(struct inpcbinfo *);
+int in_pcbgroup_enabled(struct inpcbinfo *);
+void in_pcbgroup_init(struct inpcbinfo *, u_int, int);
+void in_pcbgroup_remove(struct inpcb *);
+void in_pcbgroup_update(struct inpcb *);
+void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *);
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
int in_pcballoc(struct socket *, struct inpcbinfo *);
@@ -551,6 +628,7 @@ void in_pcbdisconnect(struct inpcb *);
void in_pcbdrop(struct inpcb *);
void in_pcbfree(struct inpcb *);
int in_pcbinshash(struct inpcb *);
+int in_pcbinshash_nopcbgroup(struct inpcb *);
struct inpcb *
in_pcblookup_local(struct inpcbinfo *,
struct in_addr, u_short, int, struct ucred *);
diff --git a/sys/netinet/in_pcbgroup.c b/sys/netinet/in_pcbgroup.c
new file mode 100644
index 0000000..c9f5c70
--- /dev/null
+++ b/sys/netinet/in_pcbgroup.c
@@ -0,0 +1,457 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
+#include <sys/socketvar.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif /* INET6 */
+
+/*
+ * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's
+ * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization
+ * Strategies in Modern Operating Systems". This implementation differs
+ * significantly from that described in the paper, in that it attempts to
+ * introduce not just notions of affinity for connections and distribute work
+ * so as to reduce lock contention, but also align those notions with
+ * hardware work distribution strategies such as RSS. In this construction,
+ * connection groups supplement, rather than replace, existing reservation
+ * tables for protocol 4-tuples, offering CPU-affine lookup tables with
+ * minimal cache line migration and lock contention during steady state
+ * operation.
+ *
+ * Internet protocols, such as UDP and TCP, register to use connection groups
+ * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this
+ * indicates to the connection group code whether a 2-tuple or 4-tuple is
+ * used as an argument to hashes that assign a connection to a particular
+ * group. This must be aligned with any hardware offloaded distribution
+ * model, such as RSS or similar approaches taken in embedded network boards.
+ * Wildcard sockets require special handling, as in Willman 2006, and are
+ * shared between connection groups -- while being protected by group-local
+ * locks. This means that connection establishment and teardown can be
+ * signficantly more expensive than without connection groups, but that
+ * steady-state processing can be significantly faster.
+ *
+ * Most of the implementation of connection groups is in this file; however,
+ * connection group lookup is implemented in in_pcb.c alongside reservation
+ * table lookups -- see in_pcblookup_group().
+ *
+ * TODO:
+ *
+ * Implement dynamic rebalancing of buckets with connection groups; when
+ * load is unevenly distributed, search for more optimal balancing on
+ * demand. This might require scaling up the number of connection groups
+ * by <<1.
+ *
+ * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection
+ * groups for ip_input and ip6_input, allowing non-offloaded work
+ * distribution.
+ *
+ * Expose effective CPU affinity of connections to userspace using socket
+ * options.
+ *
+ * Investigate per-connection affinity overrides based on socket options; an
+ * option could be set, certainly resulting in work being distributed
+ * differently in software, and possibly propagated to supporting hardware
+ * with TCAMs or hardware hash tables. This might require connections to
+ * exist in more than one connection group at a time.
+ *
+ * Hook netisr thread reconfiguration events, and propagate those to RSS so
+ * that rebalancing can occur when the thread pool grows or shrinks.
+ *
+ * Expose per-pcbgroup statistics to userspace monitoring tools such as
+ * netstat, in order to allow better debugging and profiling.
+ */
+
+void
+in_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields,
+ int hash_nelements)
+{
+ struct inpcbgroup *pcbgroup;
+ u_int numpcbgroups, pgn;
+
+ /*
+ * Only enable connection groups for a protocol if it has been
+ * specifically requested.
+ */
+ if (hashfields == IPI_HASHFIELDS_NONE)
+ return;
+
+ /*
+ * Connection groups are about multi-processor load distribution,
+ * lock contention, and connection CPU affinity. As such, no point
+ * in turning them on for a uniprocessor machine, it only wastes
+ * memory.
+ */
+ if (mp_ncpus == 1)
+ return;
+
+ /*
+ * Use one group per CPU for now. If we decide to do dynamic
+ * rebalancing a la RSS, we'll need to shift left by at least 1.
+ */
+ numpcbgroups = mp_ncpus;
+
+ pcbinfo->ipi_hashfields = hashfields;
+ pcbinfo->ipi_pcbgroups = malloc(numpcbgroups *
+ sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO);
+ pcbinfo->ipi_npcbgroups = numpcbgroups;
+ pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB,
+ &pcbinfo->ipi_wildmask);
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
+ pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
+ pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB,
+ &pcbgroup->ipg_hashmask);
+ INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup");
+
+ /*
+ * Initialise notional affinity of the pcbgroup -- for RSS,
+ * we want the same notion of affinity as NICs to be used.
+ * Just round robin for the time being.
+ */
+ pcbgroup->ipg_cpu = (pgn % mp_ncpus);
+ }
+}
+
+void
+in_pcbgroup_destroy(struct inpcbinfo *pcbinfo)
+{
+ struct inpcbgroup *pcbgroup;
+ u_int pgn;
+
+ if (pcbinfo->ipi_npcbgroups == 0)
+ return;
+
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
+ pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
+ KASSERT(LIST_EMPTY(pcbinfo->ipi_listhead),
+ ("in_pcbinfo_destroy: listhead not empty"));
+ INP_GROUP_LOCK_DESTROY(pcbgroup);
+ hashdestroy(pcbgroup->ipg_hashbase, M_PCB,
+ pcbgroup->ipg_hashmask);
+ }
+ hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask);
+ free(pcbinfo->ipi_pcbgroups, M_PCB);
+ pcbinfo->ipi_pcbgroups = NULL;
+ pcbinfo->ipi_npcbgroups = 0;
+ pcbinfo->ipi_hashfields = 0;
+}
+
+/*
+ * Given a hash of whatever the covered tuple might be, return a pcbgroup
+ * index.
+ */
+static __inline u_int
+in_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
+{
+
+ return (hash % pcbinfo->ipi_npcbgroups);
+}
+
+/*
+ * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash
+ * information is insufficient to identify the pcbgroup.
+ */
+struct inpcbgroup *
+in_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
+{
+
+ return (NULL);
+}
+
+static struct inpcbgroup *
+in_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
+{
+
+ return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid));
+}
+
+struct inpcbgroup *
+in_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr,
+ u_short lport, struct in_addr faddr, u_short fport)
+{
+ uint32_t hash;
+
+ switch (pcbinfo->ipi_hashfields) {
+ case IPI_HASHFIELDS_4TUPLE:
+ hash = faddr.s_addr ^ fport;
+ break;
+
+ case IPI_HASHFIELDS_2TUPLE:
+ hash = faddr.s_addr ^ laddr.s_addr;
+ break;
+
+ default:
+ hash = 0;
+ }
+ return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo,
+ hash)]);
+}
+
+struct inpcbgroup *
+in_pcbgroup_byinpcb(struct inpcb *inp)
+{
+
+ return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr,
+ inp->inp_lport, inp->inp_faddr, inp->inp_fport));
+}
+
+static void
+in_pcbwild_add(struct inpcb *inp)
+{
+ struct inpcbinfo *pcbinfo;
+ struct inpcbhead *head;
+ u_int pgn;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD),
+ ("%s: is wild",__func__));
+
+ pcbinfo = inp->inp_pcbinfo;
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+ INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport,
+ 0, pcbinfo->ipi_wildmask)];
+ LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild);
+ inp->inp_flags2 |= INP_PCBGROUPWILD;
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+ INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+}
+
+static void
+in_pcbwild_remove(struct inpcb *inp)
+{
+ struct inpcbinfo *pcbinfo;
+ u_int pgn;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD),
+ ("%s: not wild", __func__));
+
+ pcbinfo = inp->inp_pcbinfo;
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+ INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+ LIST_REMOVE(inp, inp_pcbgroup_wild);
+ for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+ INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+ inp->inp_flags2 &= ~INP_PCBGROUPWILD;
+}
+
+static __inline int
+in_pcbwild_needed(struct inpcb *inp)
+{
+
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6)
+ return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr));
+ else
+#endif
+ return (inp->inp_faddr.s_addr == htonl(INADDR_ANY));
+}
+
+static void
+in_pcbwild_update_internal(struct inpcb *inp)
+{
+ int wildcard_needed;
+
+ wildcard_needed = in_pcbwild_needed(inp);
+ if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD))
+ in_pcbwild_add(inp);
+ else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD))
+ in_pcbwild_remove(inp);
+}
+
+/*
+ * Update the pcbgroup of an inpcb, which might include removing an old
+ * pcbgroup reference and/or adding a new one. Wildcard processing is not
+ * performed here, although ideally we'll never install a pcbgroup for a
+ * wildcard inpcb (asserted below).
+ */
+static void
+in_pcbgroup_update_internal(struct inpcbinfo *pcbinfo,
+ struct inpcbgroup *newpcbgroup, struct inpcb *inp)
+{
+ struct inpcbgroup *oldpcbgroup;
+ struct inpcbhead *pcbhash;
+ uint32_t hashkey_faddr;
+
+ INP_WLOCK_ASSERT(inp);
+
+ oldpcbgroup = inp->inp_pcbgroup;
+ if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
+ INP_GROUP_LOCK(oldpcbgroup);
+ LIST_REMOVE(inp, inp_pcbgrouphash);
+ inp->inp_pcbgroup = NULL;
+ INP_GROUP_UNLOCK(oldpcbgroup);
+ }
+ if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6)
+ hashkey_faddr = inp->in6p_faddr.s6_addr32[3]; /* XXX */
+ else
+#endif
+ hashkey_faddr = inp->inp_faddr.s_addr;
+ INP_GROUP_LOCK(newpcbgroup);
+ pcbhash = &newpcbgroup->ipg_hashbase[
+ INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport,
+ newpcbgroup->ipg_hashmask)];
+ LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash);
+ inp->inp_pcbgroup = newpcbgroup;
+ INP_GROUP_UNLOCK(newpcbgroup);
+ }
+
+ KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)),
+ ("%s: pcbgroup and wildcard!", __func__));
+}
+
+/*
+ * Two update paths: one in which the 4-tuple on an inpcb has been updated
+ * and therefore connection groups may need to change (or a wildcard entry
+ * may needed to be installed), and another in which the 4-tuple has been
+ * set as a result of a packet received, in which case we may be able to use
+ * the hash on the mbuf to avoid doing a software hash calculation for RSS.
+ *
+ * In each case: first, let the wildcard code have a go at placing it as a
+ * wildcard socket. If it was a wildcard, or if the connection has been
+ * dropped, then no pcbgroup is required (so potentially clear it);
+ * otherwise, calculate and update the pcbgroup for the inpcb.
+ */
+void
+in_pcbgroup_update(struct inpcb *inp)
+{
+ struct inpcbinfo *pcbinfo;
+ struct inpcbgroup *newpcbgroup;
+
+ INP_WLOCK_ASSERT(inp);
+
+ pcbinfo = inp->inp_pcbinfo;
+ if (!in_pcbgroup_enabled(pcbinfo))
+ return;
+
+ in_pcbwild_update_internal(inp);
+ if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
+ !(inp->inp_flags & INP_DROPPED)) {
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6)
+ newpcbgroup = in6_pcbgroup_byinpcb(inp);
+ else
+#endif
+ newpcbgroup = in_pcbgroup_byinpcb(inp);
+ } else
+ newpcbgroup = NULL;
+ in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
+}
+
+void
+in_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m)
+{
+ struct inpcbinfo *pcbinfo;
+ struct inpcbgroup *newpcbgroup;
+
+ INP_WLOCK_ASSERT(inp);
+
+ pcbinfo = inp->inp_pcbinfo;
+ if (!in_pcbgroup_enabled(pcbinfo))
+ return;
+
+ /*
+ * Possibly should assert !INP_PCBGROUPWILD rather than testing for
+ * it; presumably this function should never be called for anything
+ * other than non-wildcard socket?
+ */
+ in_pcbwild_update_internal(inp);
+ if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
+ !(inp->inp_flags & INP_DROPPED)) {
+ newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m);
+#ifdef INET6
+ if (inp->inp_vflag & INP_IPV6) {
+ if (newpcbgroup == NULL)
+ newpcbgroup = in6_pcbgroup_byinpcb(inp);
+ } else {
+#endif
+ if (newpcbgroup == NULL)
+ newpcbgroup = in_pcbgroup_byinpcb(inp);
+#ifdef INET6
+ }
+#endif
+ } else
+ newpcbgroup = NULL;
+ in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
+}
+
+/*
+ * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb.
+ */
+void
+in_pcbgroup_remove(struct inpcb *inp)
+{
+ struct inpcbgroup *pcbgroup;
+
+ INP_WLOCK_ASSERT(inp);
+
+ if (!in_pcbgroup_enabled(inp->inp_pcbinfo))
+ return;
+
+ if (inp->inp_flags2 & INP_PCBGROUPWILD)
+ in_pcbwild_remove(inp);
+
+ pcbgroup = inp->inp_pcbgroup;
+ if (pcbgroup != NULL) {
+ INP_GROUP_LOCK(pcbgroup);
+ LIST_REMOVE(inp, inp_pcbgrouphash);
+ inp->inp_pcbgroup = NULL;
+ INP_GROUP_UNLOCK(pcbgroup);
+ }
+}
+
+/*
+ * Query whether or not it is appropriate to use pcbgroups to look up inpcbs
+ * for a protocol.
+ */
+int
+in_pcbgroup_enabled(struct inpcbinfo *pcbinfo)
+{
+
+ return (pcbinfo->ipi_npcbgroups > 0);
+}
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
index 6f5bce7..527ce56 100644
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -153,7 +153,8 @@ div_init(void)
* place for hashbase == NULL.
*/
in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb",
- div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE);
+ div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_NONE);
}
static void
diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c
index b4d3abb..49c48b9 100644
--- a/sys/netinet/ipfw/ip_fw2.c
+++ b/sys/netinet/ipfw/ip_fw2.c
@@ -692,6 +692,10 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
lookupflags |= INPLOOKUP_RLOCKPCB;
match = 0;
if (*ugid_lookupp == 0) {
+ /*
+ * XXXRW: If we had the mbuf here, could use
+ * in_pcblookup_mbuf().
+ */
pcb = (oif) ?
in_pcblookup(pi,
dst_ip, htons(dst_port),
diff --git a/sys/netinet/ipfw/ip_fw_sockopt.c b/sys/netinet/ipfw/ip_fw_sockopt.c
index f81d57d..2347456 100644
--- a/sys/netinet/ipfw/ip_fw_sockopt.c
+++ b/sys/netinet/ipfw/ip_fw_sockopt.c
@@ -349,12 +349,13 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg)
}
if (n == 0) {
- /* A flush request (arg == 0) on empty ruleset
- * returns with no error. On the contrary,
+ /* A flush request (arg == 0 or cmd == 1) on empty
+ * ruleset returns with no error. On the contrary,
* if there is no match on a specific request,
* we return EINVAL.
*/
- error = (arg == 0) ? 0 : EINVAL;
+ if (arg != 0 && cmd != 1)
+ error = EINVAL;
break;
}
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 635f08f..e754b88 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -205,7 +205,8 @@ rip_init(void)
{
in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
- 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE);
+ 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_NONE);
EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
}
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 06854ec..6ed58911 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -300,7 +300,8 @@ tcp_init(void)
hashsize = 512; /* safe default */
}
in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
- "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE);
+ "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_4TUPLE);
/*
* These have to be type stable for the benefit of the timers.
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 5125134..66e4732 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_pcbgroup.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -676,8 +677,14 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
#ifdef INET6
}
#endif
+
+ /*
+ * Install in the reservation hash table for now, but don't yet
+ * install a connection group since the full 4-tuple isn't yet
+ * configured.
+ */
inp->inp_lport = sc->sc_inc.inc_lport;
- if ((error = in_pcbinshash(inp)) != 0) {
+ if ((error = in_pcbinshash_nopcbgroup(inp)) != 0) {
/*
* Undo the assignments above if we failed to
* put the PCB on the hash lists.
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index fd864c0..28eb8fd 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -186,7 +186,8 @@ udp_init(void)
{
in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
- "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE);
+ "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE,
+ IPI_HASHFIELDS_2TUPLE);
V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(V_udpcb_zone, maxsockets);
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index 9e8e5cd..9558d1b 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -652,8 +652,32 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
* that is, this address might make other addresses detached.
*/
pfxlist_onlink_check();
- if (error == 0 && ia)
+ if (error == 0 && ia) {
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) {
+ /*
+ * Try to clear the flag when a new
+ * IPv6 address is added onto an
+ * IFDISABLED interface and it
+ * succeeds.
+ */
+ struct in6_ndireq nd;
+
+ memset(&nd, 0, sizeof(nd));
+ nd.ndi.flags = ND_IFINFO(ifp)->flags;
+ nd.ndi.flags &= ~ND6_IFF_IFDISABLED;
+ if (nd6_ioctl(SIOCSIFINFO_FLAGS,
+ (caddr_t)&nd, ifp) < 0)
+ log(LOG_NOTICE, "SIOCAIFADDR_IN6: "
+ "SIOCSIFINFO_FLAGS for -ifdisabled "
+ "failed.");
+ /*
+ * Ignore failure of clearing the flag
+ * intentionally. The failure means
+ * address duplication was detected.
+ */
+ }
EVENTHANDLER_INVOKE(ifaddr_event, ifp);
+ }
break;
}
diff --git a/sys/netinet6/in6.h b/sys/netinet6/in6.h
index 32759af..ae0da6a 100644
--- a/sys/netinet6/in6.h
+++ b/sys/netinet6/in6.h
@@ -611,7 +611,10 @@ struct ip6_mtuinfo {
#define IPV6CTL_STEALTH 45
#define ICMPV6CTL_ND6_ONLINKNSRFC4861 47
-#define IPV6CTL_MAXID 48
+#define IPV6CTL_NO_RADR 48 /* No defroute from RA */
+#define IPV6CTL_NORBIT_RAIF 49 /* Disable R-bit in NA on RA
+ * receiving IF. */
+#define IPV6CTL_MAXID 50
#endif /* __BSD_VISIBLE */
/*
diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c
index da73f21..d15c605 100644
--- a/sys/netinet6/in6_pcb.c
+++ b/sys/netinet6/in6_pcb.c
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_pcbgroup.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -827,6 +828,141 @@ in6_rtchange(struct inpcb *inp, int errno)
return inp;
}
+#ifdef PCBGROUP
+/*
+ * Lookup PCB in hash list, using pcbgroup tables.
+ */
+static struct inpcb *
+in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
+ struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr,
+ u_int lport_arg, int lookupflags, struct ifnet *ifp)
+{
+ struct inpcbhead *head;
+ struct inpcb *inp, *tmpinp;
+ u_short fport = fport_arg, lport = lport_arg;
+ int faith;
+
+ if (faithprefix_p != NULL)
+ faith = (*faithprefix_p)(laddr);
+ else
+ faith = 0;
+
+ /*
+ * First look for an exact match.
+ */
+ tmpinp = NULL;
+ INP_GROUP_LOCK(pcbgroup);
+ head = &pcbgroup->ipg_hashbase[
+ INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport,
+ pcbgroup->ipg_hashmask)];
+ LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV6) == 0)
+ continue;
+ if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
+ IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
+ inp->inp_fport == fport &&
+ inp->inp_lport == lport) {
+ /*
+ * XXX We should be able to directly return
+ * the inp here, without any checks.
+ * Well unless both bound with SO_REUSEPORT?
+ */
+ if (prison_flag(inp->inp_cred, PR_IP6))
+ goto found;
+ if (tmpinp == NULL)
+ tmpinp = inp;
+ }
+ }
+ if (tmpinp != NULL) {
+ inp = tmpinp;
+ goto found;
+ }
+
+ /*
+ * Then look for a wildcard match, if requested.
+ */
+ if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+ struct inpcb *local_wild = NULL, *local_exact = NULL;
+ struct inpcb *jail_wild = NULL;
+ int injail;
+
+ /*
+ * Order of socket selection - we always prefer jails.
+ * 1. jailed, non-wild.
+ * 2. jailed, wild.
+ * 3. non-jailed, non-wild.
+ * 4. non-jailed, wild.
+ */
+ head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
+ 0, pcbinfo->ipi_wildmask)];
+ LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+ /* XXX inp locking */
+ if ((inp->inp_vflag & INP_IPV6) == 0)
+ continue;
+
+ if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
+ inp->inp_lport != lport) {
+ continue;
+ }
+
+ /* XXX inp locking */
+ if (faith && (inp->inp_flags & INP_FAITH) == 0)
+ continue;
+
+ injail = prison_flag(inp->inp_cred, PR_IP6);
+ if (injail) {
+ if (prison_check_ip6(inp->inp_cred,
+ laddr) != 0)
+ continue;
+ } else {
+ if (local_exact != NULL)
+ continue;
+ }
+
+ if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
+ if (injail)
+ goto found;
+ else
+ local_exact = inp;
+ } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
+ if (injail)
+ jail_wild = inp;
+ else
+ local_wild = inp;
+ }
+ } /* LIST_FOREACH */
+
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = jail_wild;
+ if (inp == NULL)
+ inp = local_exact;
+ if (inp == NULL)
+ inp = local_wild;
+ if (inp != NULL)
+ goto found;
+ } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
+ INP_GROUP_UNLOCK(pcbgroup);
+ return (NULL);
+
+found:
+ in_pcbref(inp);
+ INP_GROUP_UNLOCK(pcbgroup);
+ if (lookupflags & INPLOOKUP_WLOCKPCB) {
+ INP_WLOCK(inp);
+ if (in_pcbrele_wlocked(inp))
+ return (NULL);
+ } else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+ INP_RLOCK(inp);
+ if (in_pcbrele_rlocked(inp))
+ return (NULL);
+ } else
+ panic("%s: locking buf", __func__);
+ return (inp);
+}
+#endif /* PCBGROUP */
+
/*
* Lookup PCB in hash list.
*/
@@ -983,16 +1119,30 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
/*
* Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
* from which a pre-calculated hash value may be extracted.
+ *
+ * Possibly more of this logic should be in in6_pcbgroup.c.
*/
struct inpcb *
in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp)
{
+#if defined(PCBGROUP)
+ struct inpcbgroup *pcbgroup;
+#endif
+
KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
+#if defined(PCBGROUP)
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
lookupflags, ifp));
}
@@ -1002,11 +1152,28 @@ in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
struct ifnet *ifp, struct mbuf *m)
{
+#ifdef PCBGROUP
+ struct inpcbgroup *pcbgroup;
+#endif
+
KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
("%s: LOCKPCB not set", __func__));
+#ifdef PCBGROUP
+ if (in_pcbgroup_enabled(pcbinfo)) {
+ pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid);
+ if (pcbgroup != NULL)
+ return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr,
+ fport, laddr, lport, lookupflags, ifp));
+ pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+ fport);
+ return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+ laddr, lport, lookupflags, ifp));
+ }
+#endif
return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
lookupflags, ifp));
}
diff --git a/sys/netinet6/in6_pcb.h b/sys/netinet6/in6_pcb.h
index cf24704..8398d54 100644
--- a/sys/netinet6/in6_pcb.h
+++ b/sys/netinet6/in6_pcb.h
@@ -69,6 +69,16 @@
#define sin6tosa(sin6) ((struct sockaddr *)(sin6))
#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
+struct inpcbgroup *
+ in6_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t);
+struct inpcbgroup *
+ in6_pcbgroup_byinpcb __P((struct inpcb *));
+struct inpcbgroup *
+ in6_pcbgroup_bymbuf(struct inpcbinfo *, struct mbuf *);
+struct inpcbgroup *
+ in6_pcbgroup_bytuple __P((struct inpcbinfo *, const struct in6_addr *,
+ u_short, const struct in6_addr *, u_short));
+
void in6_pcbpurgeif0 __P((struct inpcbinfo *, struct ifnet *));
void in6_losing __P((struct inpcb *));
int in6_pcbbind __P((struct inpcb *, struct sockaddr *, struct ucred *));
diff --git a/sys/netinet6/in6_pcbgroup.c b/sys/netinet6/in6_pcbgroup.c
new file mode 100644
index 0000000..850d7f4
--- /dev/null
+++ b/sys/netinet6/in6_pcbgroup.c
@@ -0,0 +1,103 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif /* INET6 */
+
+/*
+ * Given a hash of whatever the covered tuple might be, return a pcbgroup
+ * index.
+ */
+static __inline u_int
+in6_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
+{
+
+ return (hash % pcbinfo->ipi_npcbgroups);
+}
+
+/*
+ * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash
+ * information is insufficient to identify the pcbgroup.
+ */
+struct inpcbgroup *
+in6_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
+{
+
+ return (NULL);
+}
+
+struct inpcbgroup *
+in6_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
+{
+
+ return (in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+ m->m_pkthdr.flowid));
+}
+
+struct inpcbgroup *
+in6_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, const struct in6_addr *laddrp,
+ u_short lport, const struct in6_addr *faddrp, u_short fport)
+{
+ uint32_t hash;
+
+ switch (pcbinfo->ipi_hashfields) {
+ case IPI_HASHFIELDS_4TUPLE:
+ hash = faddrp->s6_addr32[3] ^ fport;
+ break;
+
+ case IPI_HASHFIELDS_2TUPLE:
+ hash = faddrp->s6_addr32[3] ^ laddrp->s6_addr32[3];
+ break;
+
+ default:
+ hash = 0;
+ }
+ return (&pcbinfo->ipi_pcbgroups[in6_pcbgroup_getbucket(pcbinfo,
+ hash)]);
+}
+
+struct inpcbgroup *
+in6_pcbgroup_byinpcb(struct inpcb *inp)
+{
+
+ return (in6_pcbgroup_bytuple(inp->inp_pcbinfo, &inp->in6p_laddr,
+ inp->inp_lport, &inp->in6p_faddr, inp->inp_fport));
+}
diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c
index ab54755..9e78e9a 100644
--- a/sys/netinet6/in6_proto.c
+++ b/sys/netinet6/in6_proto.c
@@ -409,6 +409,8 @@ VNET_DEFINE(int, ip6_sendredirects) = IPV6_SENDREDIRECTS;
VNET_DEFINE(int, ip6_defhlim) = IPV6_DEFHLIM;
VNET_DEFINE(int, ip6_defmcasthlim) = IPV6_DEFAULT_MULTICAST_HOPS;
VNET_DEFINE(int, ip6_accept_rtadv) = 0;
+VNET_DEFINE(int, ip6_no_radr) = 0;
+VNET_DEFINE(int, ip6_norbit_raif) = 0;
VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */
VNET_DEFINE(int, ip6_maxfrags); /* initialized in frag6.c:frag6_init() */
VNET_DEFINE(int, ip6_log_interval) = 5;
@@ -537,6 +539,15 @@ SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0,
"Default value of per-interface flag for accepting ICMPv6 Router"
"Advertisement messages");
+SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr,
+ CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0,
+ "Default value of per-interface flag to control whether routers "
+ "sending ICMPv6 RA messages on that interface are added into the "
+ "default router list.");
+SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif, CTLFLAG_RW,
+ &VNET_NAME(ip6_norbit_raif), 0,
+ "Always set 0 to R flag in ICMPv6 NA messages when accepting RA"
+ " on the interface.");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
&VNET_NAME(ip6_keepfaith), 0, "");
SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval,
diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h
index 4e44d9f..dbfba9a 100644
--- a/sys/netinet6/ip6_var.h
+++ b/sys/netinet6/ip6_var.h
@@ -316,6 +316,9 @@ VNET_DECLARE(int, ip6_maxfragpackets); /* Maximum packets in reassembly
VNET_DECLARE(int, ip6_maxfrags); /* Maximum fragments in reassembly
* queue */
VNET_DECLARE(int, ip6_accept_rtadv); /* Acts as a host not a router */
+VNET_DECLARE(int, ip6_no_radr); /* No defroute from RA */
+VNET_DECLARE(int, ip6_norbit_raif); /* Disable R-bit in NA on RA
+ * receiving IF. */
VNET_DECLARE(int, ip6_keepfaith); /* Firewall Aided Internet Translator */
VNET_DECLARE(int, ip6_log_interval);
VNET_DECLARE(time_t, ip6_log_time);
@@ -327,6 +330,8 @@ VNET_DECLARE(int, ip6_dad_count); /* DupAddrDetectionTransmits */
#define V_ip6_maxfragpackets VNET(ip6_maxfragpackets)
#define V_ip6_maxfrags VNET(ip6_maxfrags)
#define V_ip6_accept_rtadv VNET(ip6_accept_rtadv)
+#define V_ip6_no_radr VNET(ip6_no_radr)
+#define V_ip6_norbit_raif VNET(ip6_norbit_raif)
#define V_ip6_keepfaith VNET(ip6_keepfaith)
#define V_ip6_log_interval VNET(ip6_log_interval)
#define V_ip6_log_time VNET(ip6_log_time)
diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c
index f1e48ea..2b51e43 100644
--- a/sys/netinet6/nd6.c
+++ b/sys/netinet6/nd6.c
@@ -193,6 +193,8 @@ nd6_ifattach(struct ifnet *ifp)
/* A loopback interface does not need to accept RTADV. */
if (V_ip6_accept_rtadv && !(ifp->if_flags & IFF_LOOPBACK))
nd->flags |= ND6_IFF_ACCEPT_RTADV;
+ if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK))
+ nd->flags |= ND6_IFF_NO_RADR;
/* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
nd6_setmtu0(ifp, nd);
@@ -825,7 +827,7 @@ nd6_purge(struct ifnet *ifp)
if (V_nd6_defifindex == ifp->if_index)
nd6_setdefaultiface(0);
- if (!V_ip6_forwarding && ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
/* Refresh default router list. */
defrouter_select();
}
@@ -958,10 +960,9 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
/*
* If the default router list is empty, all addresses are regarded
* as on-link, and thus, as a neighbor.
- * XXX: we restrict the condition to hosts, because routers usually do
- * not have the "default router list".
*/
- if (!V_ip6_forwarding && TAILQ_FIRST(&V_nd_defrouter) == NULL &&
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV &&
+ TAILQ_FIRST(&V_nd_defrouter) == NULL &&
V_nd6_defifindex == ifp->if_index) {
return (1);
}
@@ -1022,8 +1023,7 @@ nd6_free(struct llentry *ln, int gc)
ifp = ln->lle_tbl->llt_ifp;
- if (!V_ip6_forwarding) {
-
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
if (dr != NULL && dr->expire &&
@@ -1322,6 +1322,16 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
struct ifaddr *ifa;
struct in6_ifaddr *ia;
+ /*
+ * Try to clear ifdisabled flag when enabling
+ * accept_rtadv or auto_linklocal.
+ */
+ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
+ !(ND.flags & ND6_IFF_IFDISABLED) &&
+ (ND.flags & (ND6_IFF_ACCEPT_RTADV |
+ ND6_IFF_AUTO_LINKLOCAL)))
+ ND.flags &= ~ND6_IFF_IFDISABLED;
+
if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
!(ND.flags & ND6_IFF_IFDISABLED)) {
/* ifdisabled 1->0 transision */
@@ -1340,7 +1350,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
continue;
ia = (struct in6_ifaddr *)ifa;
if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
- IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) {
+ IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) {
duplicated_linklocal = 1;
break;
}
@@ -1379,6 +1389,28 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
/* If no link-local address on ifp, configure */
ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL;
in6_ifattach(ifp, NULL);
+ } else if ((ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL) &&
+ !(ND.flags & ND6_IFF_IFDISABLED)) {
+ /*
+ * When the IF already has
+ * ND6_IFF_AUTO_LINKLOCAL and no link-local
+ * address is assigned, try to assign one.
+ */
+ int haslinklocal = 0;
+
+ IF_ADDR_LOCK(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+ ia = (struct in6_ifaddr *)ifa;
+ if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) {
+ haslinklocal = 1;
+ break;
+ }
+ }
+ IF_ADDR_UNLOCK(ifp);
+ if (!haslinklocal)
+ in6_ifattach(ifp, NULL);
}
}
ND_IFINFO(ifp)->flags = ND.flags;
@@ -1718,7 +1750,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
* for those are not autoconfigured hosts, we explicitly avoid such
* cases for safety.
*/
- if (do_update && router && !V_ip6_forwarding &&
+ if (do_update && router &&
ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
/*
* guaranteed recursion
diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h
index abcfcb7..6f63192 100644
--- a/sys/netinet6/nd6.h
+++ b/sys/netinet6/nd6.h
@@ -85,6 +85,7 @@ struct nd_ifinfo {
*/
#define ND6_IFF_DONT_SET_IFROUTE 0x10
#define ND6_IFF_AUTO_LINKLOCAL 0x20
+#define ND6_IFF_NO_RADR 0x40
#define ND6_CREATE LLE_CREATE
#define ND6_EXCLUSIVE LLE_EXCLUSIVE
diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c
index fd5bcf2..fb8e379 100644
--- a/sys/netinet6/nd6_nbr.c
+++ b/sys/netinet6/nd6_nbr.c
@@ -112,10 +112,14 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
int lladdrlen = 0;
int anycast = 0, proxy = 0, tentative = 0;
int tlladdr;
+ int rflag;
union nd_opts ndopts;
struct sockaddr_dl proxydl;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
+ rflag = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0;
+ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && V_ip6_norbit_raif)
+ rflag = 0;
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, icmp6len,);
nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off);
@@ -339,8 +343,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
goto bad;
nd6_na_output(ifp, &in6_all, &taddr6,
((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
- (V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0),
- tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL);
+ rflag, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL);
goto freeit;
}
@@ -349,8 +352,8 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
nd6_na_output(ifp, &saddr6, &taddr6,
((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
- (V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0) | ND_NA_FLAG_SOLICITED,
- tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL);
+ rflag | ND_NA_FLAG_SOLICITED, tlladdr,
+ proxy ? (struct sockaddr *)&proxydl : NULL);
freeit:
if (ifa != NULL)
ifa_free(ifa);
@@ -862,7 +865,8 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp);
if (dr)
defrtrlist_del(dr);
- else if (!V_ip6_forwarding) {
+ else if (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags &
+ ND6_IFF_ACCEPT_RTADV) {
/*
* Even if the neighbor is not in the default
* router list, the neighbor may be used
diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c
index 19ec989..e791e2e 100644
--- a/sys/netinet6/nd6_rtr.c
+++ b/sys/netinet6/nd6_rtr.c
@@ -127,8 +127,11 @@ nd6_rs_input(struct mbuf *m, int off, int icmp6len)
union nd_opts ndopts;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
- /* If I'm not a router, ignore it. */
- if (!V_ip6_forwarding)
+ /*
+ * Accept RS only when V_ip6_forwarding=1 and the interface has
+ * no ND6_IFF_ACCEPT_RTADV.
+ */
+ if (!V_ip6_forwarding || ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV)
goto freeit;
/* Sanity checks */
@@ -213,11 +216,10 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
/*
- * We only accept RAs only when
- * the node is not a router and
- * per-interface variable allows RAs on the receiving interface.
+ * We only accept RAs only when the per-interface flag
+ * ND6_IFF_ACCEPT_RTADV is on the receiving interface.
*/
- if (V_ip6_forwarding || !(ndi->flags & ND6_IFF_ACCEPT_RTADV))
+ if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
goto freeit;
if (ip6->ip6_hlim != 255) {
@@ -266,7 +268,15 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len)
bzero(&dr0, sizeof(dr0));
dr0.rtaddr = saddr6;
dr0.flags = nd_ra->nd_ra_flags_reserved;
- dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
+ /*
+ * Effectively-disable the route in the RA packet
+ * when ND6_IFF_NO_RADR on the receiving interface or
+ * ip6.forwarding=1.
+ */
+ if (ndi->flags & ND6_IFF_NO_RADR || V_ip6_forwarding)
+ dr0.rtlifetime = 0;
+ else
+ dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
dr0.expire = time_second + dr0.rtlifetime;
dr0.ifp = ifp;
/* unspecified or not? (RFC 2461 6.3.4) */
@@ -557,7 +567,7 @@ defrtrlist_del(struct nd_defrouter *dr)
* Flush all the routing table entries that use the router
* as a next hop.
*/
- if (!V_ip6_forwarding)
+ if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV)
rt6_flush(&dr->rtaddr, dr->ifp);
if (dr->installed) {
@@ -616,20 +626,6 @@ defrouter_select(void)
struct llentry *ln = NULL;
/*
- * This function should be called only when acting as an autoconfigured
- * host. Although the remaining part of this function is not effective
- * if the node is not an autoconfigured host, we explicitly exclude
- * such cases here for safety.
- */
- if (V_ip6_forwarding) {
- nd6log((LOG_WARNING,
- "defrouter_select: called unexpectedly (forwarding=%d)\n",
- V_ip6_forwarding));
- splx(s);
- return;
- }
-
- /*
* Let's handle easy case (3) first:
* If default router list is empty, there's nothing to be done.
*/
diff --git a/sys/netinet6/send.h b/sys/netinet6/send.h
index 36ba571..9795d14 100644
--- a/sys/netinet6/send.h
+++ b/sys/netinet6/send.h
@@ -33,7 +33,7 @@
#define SND_IN 1 /* Incoming traffic. */
struct sockaddr_send {
- unsigned char send_len; /* total length */
+ uint8_t send_len; /* total length */
sa_family_t send_family; /* address family */
int send_direction;
int send_ifidx;
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index 654f145..c2b7081 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -199,7 +199,9 @@ struct mbuf {
#define M_PROTO6 0x00080000 /* protocol-specific */
#define M_PROTO7 0x00100000 /* protocol-specific */
#define M_PROTO8 0x00200000 /* protocol-specific */
-#define M_FLOWID 0x00400000 /* flowid is valid */
+#define M_FLOWID 0x00400000 /* deprecated: flowid is valid */
+#define M_HASHTYPEBITS 0x0F000000 /* mask of bits holding flowid hash type */
+
/*
* For RELENG_{6,7} steal these flags for limited multiple routing table
* support. In RELENG_8 and beyond, use just one flag and a tag.
@@ -215,11 +217,45 @@ struct mbuf {
(M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8)
/*
+ * Network interface cards are able to hash protocol fields (such as IPv4
+ * addresses and TCP port numbers) classify packets into flows. These flows
+ * can then be used to maintain ordering while delivering packets to the OS
+ * via parallel input queues, as well as to provide a stateless affinity
+ * model. NIC drivers can pass up the hash via m->m_pkthdr.flowid, and set
+ * m_flag fields to indicate how the hash should be interpreted by the
+ * network stack.
+ *
+ * Most NICs support RSS, which provides ordering and explicit affinity, and
+ * use the hash m_flag bits to indicate what header fields were covered by
+ * the hash. M_HASHTYPE_OPAQUE can be set by non-RSS cards or configurations
+ * that provide an opaque flow identifier, allowing for ordering and
+ * distribution without explicit affinity.
+ */
+#define M_HASHTYPE_SHIFT 24
+#define M_HASHTYPE_NONE 0x0
+#define M_HASHTYPE_RSS_IPV4 0x1 /* IPv4 2-tuple */
+#define M_HASHTYPE_RSS_TCP_IPV4 0x2 /* TCPv4 4-tuple */
+#define M_HASHTYPE_RSS_IPV6 0x3 /* IPv6 2-tuple */
+#define M_HASHTYPE_RSS_TCP_IPV6 0x4 /* TCPv6 4-tuple */
+#define M_HASHTYPE_RSS_IPV6_EX 0x5 /* IPv6 2-tuple + ext hdrs */
+#define M_HASHTYPE_RSS_TCP_IPV6_EX 0x6 /* TCPv6 4-tiple + ext hdrs */
+#define M_HASHTYPE_OPAQUE 0xf /* ordering, not affinity */
+
+#define M_HASHTYPE_CLEAR(m) (m)->m_flags &= ~(M_HASHTYPEBITS)
+#define M_HASHTYPE_GET(m) (((m)->m_flags & M_HASHTYPEBITS) >> \
+ M_HASHTYPE_SHIFT)
+#define M_HASHTYPE_SET(m, v) do { \
+ (m)->m_flags &= ~M_HASHTYPEBITS; \
+ (m)->m_flags |= ((v) << M_HASHTYPE_SHIFT); \
+} while (0)
+#define M_HASHTYPE_TEST(m, v) (M_HASHTYPE_GET(m) == (v))
+
+/*
* Flags preserved when copying m_pkthdr.
*/
#define M_COPYFLAGS \
(M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_SKIP_FIREWALL|M_BCAST|M_MCAST|\
- M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB)
+ M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_FIB|M_HASHTYPEBITS)
/*
* External buffer types: identify ext_buf type.
diff --git a/sys/sys/soundcard.h b/sys/sys/soundcard.h
index c4cfc27..a6817df 100644
--- a/sys/sys/soundcard.h
+++ b/sys/sys/soundcard.h
@@ -311,7 +311,8 @@ typedef struct _snd_capabilities {
* IOCTL Commands for /dev/sequencer
*/
-#define SNDCTL_SEQ_RESET _IO ('Q', 0)
+#define SNDCTL_SEQ_HALT _IO ('Q', 0)
+#define SNDCTL_SEQ_RESET SNDCTL_SEQ_HALT /* Historic interface */
#define SNDCTL_SEQ_SYNC _IO ('Q', 1)
#define SNDCTL_SYNTH_INFO _IOWR('Q', 2, struct synth_info)
#define SNDCTL_SEQ_CTRLRATE _IOWR('Q', 3, int) /* Set/get timer res.(hz) */
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 5b8f3c8..7f5d1b4 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -420,13 +420,13 @@ nospace:
*/
if (reclaimed == 0) {
reclaimed = 1;
- softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT);
UFS_UNLOCK(ump);
if (bp) {
brelse(bp);
bp = NULL;
}
UFS_LOCK(ump);
+ softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT);
goto retry;
}
UFS_UNLOCK(ump);
@@ -2356,8 +2356,8 @@ ffs_fserr(fs, inum, cp)
* specified inode by the specified amount. Under normal
* operation the count should always go down. Decrementing
* the count to zero will cause the inode to be freed.
- * adjblkcnt(inode, amt) - adjust the number of blocks used to
- * by the specifed amount.
+ * adjblkcnt(inode, amt) - adjust the number of blocks used by the
+ * inode by the specified amount.
* adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) -
* adjust the superblock summary.
* freedirs(inode, count) - directory inodes [inode..inode + count - 1]
OpenPOWER on IntegriCloud