diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/kern/kern_poll.c | 47 | ||||
-rw-r--r-- | sys/net/netisr.c | 1186 | ||||
-rw-r--r-- | sys/net/netisr.h | 138 | ||||
-rw-r--r-- | sys/net/rtsock.c | 34 | ||||
-rw-r--r-- | sys/netatalk/ddp_usrreq.c | 35 | ||||
-rw-r--r-- | sys/netinet/if_ether.c | 13 | ||||
-rw-r--r-- | sys/netinet/igmp.c | 20 | ||||
-rw-r--r-- | sys/netinet/ip_divert.c | 2 | ||||
-rw-r--r-- | sys/netinet/ip_input.c | 56 | ||||
-rw-r--r-- | sys/netinet6/ip6_input.c | 14 | ||||
-rw-r--r-- | sys/netinet6/vinet6.h | 2 | ||||
-rw-r--r-- | sys/netipsec/ipsec_input.c | 2 | ||||
-rw-r--r-- | sys/netipx/ipx_input.c | 22 | ||||
-rw-r--r-- | sys/netnatm/natm_proto.c | 15 | ||||
-rw-r--r-- | sys/sys/param.h | 2 | ||||
-rw-r--r-- | sys/sys/pcpu.h | 1 |
16 files changed, 1312 insertions, 277 deletions
diff --git a/sys/kern/kern_poll.c b/sys/kern/kern_poll.c index 2952a88..fbe9027 100644 --- a/sys/kern/kern_poll.c +++ b/sys/kern/kern_poll.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/kthread.h> #include <sys/proc.h> +#include <sys/eventhandler.h> #include <sys/resourcevar.h> #include <sys/socket.h> /* needed by net/if.h */ #include <sys/sockio.h> @@ -48,8 +49,6 @@ __FBSDID("$FreeBSD$"); #include <net/route.h> #include <net/vnet.h> -static void netisr_poll(void); /* the two netisr handlers */ -static void netisr_pollmore(void); static int poll_switch(SYSCTL_HANDLER_ARGS); void hardclock_device_poll(void); /* hook from hardclock */ @@ -110,6 +109,10 @@ SYSCTL_NODE(_kern, OID_AUTO, polling, CTLFLAG_RW, 0, SYSCTL_UINT(_kern_polling, OID_AUTO, burst, CTLFLAG_RD, &poll_burst, 0, "Current polling burst size"); +static int netisr_poll_scheduled; +static int netisr_pollmore_scheduled; +static int poll_shutting_down; + static int poll_burst_max_sysctl(SYSCTL_HANDLER_ARGS) { uint32_t val = poll_burst_max; @@ -260,12 +263,19 @@ struct pollrec { static struct pollrec pr[POLL_LIST_LEN]; static void +poll_shutdown(void *arg, int howto) +{ + + poll_shutting_down = 1; +} + +static void init_device_poll(void) { mtx_init(&poll_mtx, "polling", NULL, MTX_DEF); - netisr_register(NETISR_POLL, (netisr_t *)netisr_poll, NULL, 0); - netisr_register(NETISR_POLLMORE, (netisr_t *)netisr_pollmore, NULL, 0); + EVENTHANDLER_REGISTER(shutdown_post_sync, poll_shutdown, NULL, + SHUTDOWN_PRI_LAST); } SYSINIT(device_poll, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, init_device_poll, NULL); @@ -289,7 +299,7 @@ hardclock_device_poll(void) static struct timeval prev_t, t; int delta; - if (poll_handlers == 0) + if (poll_handlers == 0 || poll_shutting_down) return; microuptime(&t); @@ -314,7 +324,9 @@ hardclock_device_poll(void) if (phase != 0) suspect++; phase = 1; - schednetisrbits(1 << NETISR_POLL | 1 << NETISR_POLLMORE); + netisr_poll_scheduled = 1; + netisr_pollmore_scheduled = 1; + netisr_sched_poll(); phase = 2; } if (pending_polls++ > 0) @@ -365,9 +377,16 @@ netisr_pollmore() int kern_load; mtx_lock(&poll_mtx); + if (!netisr_pollmore_scheduled) { + mtx_unlock(&poll_mtx); + return; + } + netisr_pollmore_scheduled = 0; phase = 5; if (residual_burst > 0) { - schednetisrbits(1 << NETISR_POLL | 1 << NETISR_POLLMORE); + netisr_poll_scheduled = 1; + netisr_pollmore_scheduled = 1; + netisr_sched_poll(); mtx_unlock(&poll_mtx); /* will run immediately on return, followed by netisrs */ return; @@ -397,23 +416,29 @@ netisr_pollmore() poll_burst -= (poll_burst / 8); if (poll_burst < 1) poll_burst = 1; - schednetisrbits(1 << NETISR_POLL | 1 << NETISR_POLLMORE); + netisr_poll_scheduled = 1; + netisr_pollmore_scheduled = 1; + netisr_sched_poll(); phase = 6; } mtx_unlock(&poll_mtx); } /* - * netisr_poll is scheduled by schednetisr when appropriate, typically once - * per tick. + * netisr_poll is typically scheduled once per tick. */ -static void +void netisr_poll(void) { int i, cycles; enum poll_cmd arg = POLL_ONLY; mtx_lock(&poll_mtx); + if (!netisr_poll_scheduled) { + mtx_unlock(&poll_mtx); + return; + } + netisr_poll_scheduled = 0; phase = 3; if (residual_burst == 0) { /* first call in this tick */ microuptime(&poll_start_t); diff --git a/sys/net/netisr.c b/sys/net/netisr.c index eecf315..1a75ae8 100644 --- a/sys/net/netisr.c +++ b/sys/net/netisr.c @@ -1,6 +1,5 @@ /*- - * Copyright (c) 2001,2002,2003 Jonathan Lemon <jlemon@FreeBSD.org> - * Copyright (c) 1997, Stefan Esser <se@freebsd.org> + * Copyright (c) 2007-2009 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -23,230 +22,1103 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * netisr is a packet dispatch service, allowing synchronous (directly + * dispatched) and asynchronous (deferred dispatch) processing of packets by + * registered protocol handlers. Callers pass a protocol identifier and + * packet to netisr, along with a direct dispatch hint, and work will either + * be immediately processed with the registered handler, or passed to a + * kernel software interrupt (SWI) thread for deferred dispatch. Callers + * will generally select one or the other based on: + * + * - Might directly dispatching a netisr handler lead to code reentrance or + * lock recursion, such as entering the socket code from the socket code. + * - Might directly dispatching a netisr handler lead to recursive + * processing, such as when decapsulating several wrapped layers of tunnel + * information (IPSEC within IPSEC within ...). * - * $FreeBSD$ + * Maintaining ordering for protocol streams is a critical design concern. + * Enforcing ordering limits the opportunity for concurrency, but maintains + * the strong ordering requirements found in some protocols, such as TCP. Of + * related concern is CPU affinity--it is desirable to process all data + * associated with a particular stream on the same CPU over time in order to + * avoid acquiring locks associated with the connection on different CPUs, + * keep connection data in one cache, and to generally encourage associated + * user threads to live on the same CPU as the stream. It's also desirable + * to avoid lock migration and contention where locks are associated with + * more than one flow. + * + * netisr supports several policy variations, represented by the + * NETISR_POLICY_* constants, allowing protocols to play a varying role in + * identifying flows, assigning work to CPUs, etc. These are described in + * detail in netisr.h. */ +#include "opt_ddb.h" #include "opt_device_polling.h" #include <sys/param.h> #include <sys/bus.h> -#include <sys/rtprio.h> -#include <sys/systm.h> -#include <sys/interrupt.h> #include <sys/kernel.h> #include <sys/kthread.h> +#include <sys/interrupt.h> #include <sys/lock.h> -#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> #include <sys/proc.h> -#include <sys/random.h> -#include <sys/resourcevar.h> +#include <sys/rmlock.h> +#include <sys/sched.h> +#include <sys/smp.h> +#include <sys/socket.h> #include <sys/sysctl.h> -#include <sys/unistd.h> +#include <sys/systm.h> #include <sys/vimage.h> -#include <machine/atomic.h> -#include <machine/cpu.h> -#include <machine/stdarg.h> -#include <sys/mbuf.h> -#include <sys/socket.h> +#ifdef DDB +#include <ddb/ddb.h> +#endif #include <net/if.h> -#include <net/if_types.h> #include <net/if_var.h> #include <net/netisr.h> -volatile unsigned int netisr; /* scheduling bits for network */ +/*- + * Synchronize use and modification of the registered netisr data structures; + * acquire a read lock while modifying the set of registered protocols to + * prevent partially registered or unregistered protocols from being run. + * + * The following data structures and fields are protected by this lock: + * + * - The np array, including all fields of struct netisr_proto. + * - The nws array, including all fields of struct netisr_worker. + * - The nws_array array. + * + * Note: the NETISR_LOCKING define controls whether read locks are acquired + * in packet processing paths requiring netisr registration stability. This + * is disabled by default as it can lead to a measurable performance + * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and + * because netisr registration and unregistration is extremely rare at + * runtime. If it becomes more common, this decision should be revisited. + * + * XXXRW: rmlocks don't support assertions. + */ +static struct rmlock netisr_rmlock; +#define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ + RM_NOWITNESS) +#define NETISR_LOCK_ASSERT() +#define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) +#define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) +#define NETISR_WLOCK() rm_wlock(&netisr_rmlock) +#define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) +/* #define NETISR_LOCKING */ + +SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); + +/*- + * Three direct dispatch policies are supported: + * + * - Always defer: all work is scheduled for a netisr, regardless of context. + * (!direct) + * + * - Hybrid: if the executing context allows direct dispatch, and we're + * running on the CPU the work would be done on, then direct dispatch if it + * wouldn't violate ordering constraints on the workstream. + * (direct && !direct_force) + * + * - Always direct: if the executing context allows direct dispatch, always + * direct dispatch. (direct && direct_force) + * + * Notice that changing the global policy could lead to short periods of + * misordered processing, but this is considered acceptable as compared to + * the complexity of enforcing ordering during policy changes. + */ +static int netisr_direct_force = 1; /* Always direct dispatch. */ +TUNABLE_INT("net.isr.direct_force", &netisr_direct_force); +SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RW, + &netisr_direct_force, 0, "Force direct dispatch"); + +static int netisr_direct = 1; /* Enable direct dispatch. */ +TUNABLE_INT("net.isr.direct", &netisr_direct); +SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RW, + &netisr_direct, 0, "Enable direct dispatch"); + +/* + * Allow the administrator to limit the number of threads (CPUs) to use for + * netisr. We don't check netisr_maxthreads before creating the thread for + * CPU 0, so in practice we ignore values <= 1. This must be set at boot. + * We will create at most one thread per CPU. + */ +static int netisr_maxthreads = 1; /* Max number of threads. */ +TUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads); +SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RD, + &netisr_maxthreads, 0, + "Use at most this many CPUs for netisr processing"); + +static int netisr_bindthreads = 0; /* Bind threads to CPUs. */ +TUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads); +SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RD, + &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); + +/* + * Limit per-workstream queues to at most net.isr.maxqlimit, both for initial + * configuration and later modification using netisr_setqlimit(). + */ +#define NETISR_DEFAULT_MAXQLIMIT 10240 +static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT; +TUNABLE_INT("net.isr.maxqlimit", &netisr_maxqlimit); +SYSCTL_INT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RD, + &netisr_maxqlimit, 0, + "Maximum netisr per-protocol, per-CPU queue depth."); + +/* + * The default per-workstream queue limit for protocols that don't initialize + * the nh_qlimit field of their struct netisr_handler. If this is set above + * netisr_maxqlimit, we truncate it to the maximum during boot. + */ +#define NETISR_DEFAULT_DEFAULTQLIMIT 256 +static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT; +TUNABLE_INT("net.isr.defaultqlimit", &netisr_defaultqlimit); +SYSCTL_INT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RD, + &netisr_defaultqlimit, 0, + "Default netisr per-protocol, per-CPU queue limit if not set by protocol"); + +/* + * Each protocol is described by a struct netisr_proto, which holds all + * global per-protocol information. This data structure is set up by + * netisr_register(), and derived from the public struct netisr_handler. + */ +struct netisr_proto { + const char *np_name; /* Character string protocol name. */ + netisr_handler_t *np_handler; /* Protocol handler. */ + netisr_m2flow_t *np_m2flow; /* Query flow for untagged packet. */ + netisr_m2cpuid_t *np_m2cpuid; /* Query CPU to process packet on. */ + u_int np_qlimit; /* Maximum per-CPU queue depth. */ + u_int np_policy; /* Work placement policy. */ +}; + +#define NETISR_MAXPROT 32 /* Compile-time limit. */ + +/* + * The np array describes all registered protocols, indexed by protocol + * number. + */ +static struct netisr_proto np[NETISR_MAXPROT]; + +/* + * Protocol-specific work for each workstream is described by struct + * netisr_work. Each work descriptor consists of an mbuf queue and + * statistics. + */ +struct netisr_work { + /* + * Packet queue, linked by m_nextpkt. + */ + struct mbuf *nw_head; + struct mbuf *nw_tail; + u_int nw_len; + u_int nw_qlimit; + u_int nw_watermark; + + /* + * Statistics -- written unlocked, but mostly from curcpu. + */ + u_int64_t nw_dispatched; /* Number of direct dispatches. */ + u_int64_t nw_hybrid_dispatched; /* "" hybrid dispatches. */ + u_int64_t nw_qdrops; /* "" drops. */ + u_int64_t nw_queued; /* "" enqueues. */ + u_int64_t nw_handled; /* "" handled in worker. */ +}; + +/* + * Workstreams hold a set of ordered work across each protocol, and are + * described by netisr_workstream. Each workstream is associated with a + * worker thread, which in turn is pinned to a CPU. Work associated with a + * workstream can be processd in other threads during direct dispatch; + * concurrent processing is prevented by the NWS_RUNNING flag, which + * indicates that a thread is already processing the work queue. + */ +struct netisr_workstream { + struct intr_event *nws_intr_event; /* Handler for stream. */ + void *nws_swi_cookie; /* swi(9) cookie for stream. */ + struct mtx nws_mtx; /* Synchronize work. */ + u_int nws_cpu; /* CPU pinning. */ + u_int nws_flags; /* Wakeup flags. */ + u_int nws_pendingbits; /* Scheduled protocols. */ + + /* + * Each protocol has per-workstream data. + */ + struct netisr_work nws_work[NETISR_MAXPROT]; +} __aligned(CACHE_LINE_SIZE); + +/* + * Per-CPU workstream data, indexed by CPU ID. + */ +static struct netisr_workstream nws[MAXCPU]; + +/* + * Map contiguous values between 0 and nws_count into CPU IDs appropriate for + * indexing the nws[] array. This allows constructions of the form + * nws[nws_array(arbitraryvalue % nws_count)]. + */ +static u_int nws_array[MAXCPU]; + +/* + * Number of registered workstreams. Will be at most the number of running + * CPUs once fully started. + */ +static u_int nws_count; +SYSCTL_INT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD, + &nws_count, 0, "Number of extant netisr threads."); + +/* + * Per-workstream flags. + */ +#define NWS_RUNNING 0x00000001 /* Currently running in a thread. */ +#define NWS_DISPATCHING 0x00000002 /* Currently being direct-dispatched. */ +#define NWS_SCHEDULED 0x00000004 /* Signal issued. */ + +/* + * Synchronization for each workstream: a mutex protects all mutable fields + * in each stream, including per-protocol state (mbuf queues). The SWI is + * woken up if asynchronous dispatch is required. + */ +#define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx) +#define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED) +#define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx) +#define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0) + +/* + * Utility routines for protocols that implement their own mapping of flows + * to CPUs. + */ +u_int +netisr_get_cpucount(void) +{ + + return (nws_count); +} + +u_int +netisr_get_cpuid(u_int cpunumber) +{ -struct netisr { - netisr_t *ni_handler; - struct ifqueue *ni_queue; - int ni_flags; -} netisrs[32]; + KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber, + nws_count)); -static void *net_ih; + return (nws_array[cpunumber]); +} + +/* + * The default implementation of -> CPU ID mapping. + * + * Non-static so that protocols can use it to map their own work to specific + * CPUs in a manner consistent to netisr for affinity purposes. + */ +u_int +netisr_default_flow2cpu(u_int flowid) +{ + + return (nws_array[flowid % nws_count]); +} +/* + * Register a new netisr handler, which requires initializing per-protocol + * fields for each workstream. All netisr work is briefly suspended while + * the protocol is installed. + */ void -legacy_setsoftnet(void) +netisr_register(const struct netisr_handler *nhp) { - swi_sched(net_ih, 0); + struct netisr_work *npwp; + const char *name; + u_int i, proto; + + proto = nhp->nh_proto; + name = nhp->nh_name; + + /* + * Test that the requested registration is valid. + */ + KASSERT(nhp->nh_name != NULL, + ("%s: nh_name NULL for %u", __func__, proto)); + KASSERT(nhp->nh_handler != NULL, + ("%s: nh_handler NULL for %s", __func__, name)); + KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE || + nhp->nh_policy == NETISR_POLICY_FLOW || + nhp->nh_policy == NETISR_POLICY_CPU, + ("%s: unsupported nh_policy %u for %s", __func__, + nhp->nh_policy, name)); + KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW || + nhp->nh_m2flow == NULL, + ("%s: nh_policy != FLOW but m2flow defined for %s", __func__, + name)); + KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL, + ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__, + name)); + KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, + ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, + name)); + KASSERT(proto < NETISR_MAXPROT, + ("%s(%u, %s): protocol too big", __func__, proto, name)); + + /* + * Test that no existing registration exists for this protocol. + */ + NETISR_WLOCK(); + KASSERT(np[proto].np_name == NULL, + ("%s(%u, %s): name present", __func__, proto, name)); + KASSERT(np[proto].np_handler == NULL, + ("%s(%u, %s): handler present", __func__, proto, name)); + + np[proto].np_name = name; + np[proto].np_handler = nhp->nh_handler; + np[proto].np_m2flow = nhp->nh_m2flow; + np[proto].np_m2cpuid = nhp->nh_m2cpuid; + if (nhp->nh_qlimit == 0) + np[proto].np_qlimit = netisr_defaultqlimit; + else if (nhp->nh_qlimit > netisr_maxqlimit) { + printf("%s: %s requested queue limit %u capped to " + "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit, + netisr_maxqlimit); + np[proto].np_qlimit = netisr_maxqlimit; + } else + np[proto].np_qlimit = nhp->nh_qlimit; + np[proto].np_policy = nhp->nh_policy; + for (i = 0; i < MAXCPU; i++) { + npwp = &nws[i].nws_work[proto]; + bzero(npwp, sizeof(*npwp)); + npwp->nw_qlimit = np[proto].np_qlimit; + } + NETISR_WUNLOCK(); } +/* + * Clear drop counters across all workstreams for a protocol. + */ void -netisr_register(int num, netisr_t *handler, struct ifqueue *inq, int flags) +netisr_clearqdrops(const struct netisr_handler *nhp) { - - KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), - ("bad isr %d", num)); - KASSERT(flags == 0, ("netisr_register: bad flags 0x%x\n", flags)); - netisrs[num].ni_handler = handler; - netisrs[num].ni_queue = inq; - netisrs[num].ni_flags = flags; + struct netisr_work *npwp; +#ifdef INVARIANTS + const char *name; +#endif + u_int i, proto; + + proto = nhp->nh_proto; +#ifdef INVARIANTS + name = nhp->nh_name; +#endif + KASSERT(proto < NETISR_MAXPROT, + ("%s(%u): protocol too big for %s", __func__, proto, name)); + + NETISR_WLOCK(); + KASSERT(np[proto].np_handler != NULL, + ("%s(%u): protocol not registered for %s", __func__, proto, + name)); + + for (i = 0; i < MAXCPU; i++) { + npwp = &nws[i].nws_work[proto]; + npwp->nw_qdrops = 0; + } + NETISR_WUNLOCK(); } +/* + * Query the current drop counters across all workstreams for a protocol. + */ void -netisr_unregister(int num) -{ - struct netisr *ni; - - KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), - ("bad isr %d", num)); - ni = &netisrs[num]; - ni->ni_handler = NULL; - if (ni->ni_queue != NULL) - IF_DRAIN(ni->ni_queue); - ni->ni_queue = NULL; -} - -struct isrstat { - int isrs_count; /* dispatch count */ - int isrs_directed; /* ...directly dispatched */ - int isrs_deferred; /* ...queued instead */ - int isrs_queued; /* intentionally queueued */ - int isrs_drop; /* dropped 'cuz no handler */ - int isrs_swi_count; /* swi_net handlers called */ -}; -static struct isrstat isrstat; +netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp) +{ + struct netisr_work *npwp; + struct rm_priotracker tracker; +#ifdef INVARIANTS + const char *name; +#endif + u_int i, proto; -SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr counters"); + *qdropp = 0; + proto = nhp->nh_proto; +#ifdef INVARIANTS + name = nhp->nh_name; +#endif + KASSERT(proto < NETISR_MAXPROT, + ("%s(%u): protocol too big for %s", __func__, proto, name)); -static int netisr_direct = 1; -SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RW, - &netisr_direct, 0, "enable direct dispatch"); -TUNABLE_INT("net.isr.direct", &netisr_direct); + NETISR_RLOCK(&tracker); + KASSERT(np[proto].np_handler != NULL, + ("%s(%u): protocol not registered for %s", __func__, proto, + name)); + + for (i = 0; i < MAXCPU; i++) { + npwp = &nws[i].nws_work[proto]; + *qdropp += npwp->nw_qdrops; + } + NETISR_RUNLOCK(&tracker); +} + +/* + * Query the current queue limit for per-workstream queues for a protocol. + */ +void +netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp) +{ + struct rm_priotracker tracker; +#ifdef INVARIANTS + const char *name; +#endif + u_int proto; + + proto = nhp->nh_proto; +#ifdef INVARIANTS + name = nhp->nh_name; +#endif + KASSERT(proto < NETISR_MAXPROT, + ("%s(%u): protocol too big for %s", __func__, proto, name)); + + NETISR_RLOCK(&tracker); + KASSERT(np[proto].np_handler != NULL, + ("%s(%u): protocol not registered for %s", __func__, proto, + name)); + *qlimitp = np[proto].np_qlimit; + NETISR_RUNLOCK(&tracker); +} + +/* + * Update the queue limit across per-workstream queues for a protocol. We + * simply change the limits, and don't drain overflowed packets as they will + * (hopefully) take care of themselves shortly. + */ +int +netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit) +{ + struct netisr_work *npwp; +#ifdef INVARIANTS + const char *name; +#endif + u_int i, proto; -SYSCTL_INT(_net_isr, OID_AUTO, count, CTLFLAG_RD, - &isrstat.isrs_count, 0, ""); -SYSCTL_INT(_net_isr, OID_AUTO, directed, CTLFLAG_RD, - &isrstat.isrs_directed, 0, ""); -SYSCTL_INT(_net_isr, OID_AUTO, deferred, CTLFLAG_RD, - &isrstat.isrs_deferred, 0, ""); -SYSCTL_INT(_net_isr, OID_AUTO, queued, CTLFLAG_RD, - &isrstat.isrs_queued, 0, ""); -SYSCTL_INT(_net_isr, OID_AUTO, drop, CTLFLAG_RD, - &isrstat.isrs_drop, 0, ""); -SYSCTL_INT(_net_isr, OID_AUTO, swi_count, CTLFLAG_RD, - &isrstat.isrs_swi_count, 0, ""); + if (qlimit > netisr_maxqlimit) + return (EINVAL); + + proto = nhp->nh_proto; +#ifdef INVARIANTS + name = nhp->nh_name; +#endif + KASSERT(proto < NETISR_MAXPROT, + ("%s(%u): protocol too big for %s", __func__, proto, name)); + + NETISR_WLOCK(); + KASSERT(np[proto].np_handler != NULL, + ("%s(%u): protocol not registered for %s", __func__, proto, + name)); + + np[proto].np_qlimit = qlimit; + for (i = 0; i < MAXCPU; i++) { + npwp = &nws[i].nws_work[proto]; + npwp->nw_qlimit = qlimit; + } + NETISR_WUNLOCK(); + return (0); +} /* - * Process all packets currently present in a netisr queue. Used to - * drain an existing set of packets waiting for processing when we - * begin direct dispatch, to avoid processing packets out of order. + * Drain all packets currently held in a particular protocol work queue. */ static void -netisr_processqueue(struct netisr *ni) +netisr_drain_proto(struct netisr_work *npwp) { struct mbuf *m; - for (;;) { - IF_DEQUEUE(ni->ni_queue, m); - if (m == NULL) - break; - VNET_ASSERT(m->m_pkthdr.rcvif != NULL); - CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); - ni->ni_handler(m); - CURVNET_RESTORE(); + /* + * We would assert the lock on the workstream but it's not passed in. + */ + while ((m = npwp->nw_head) != NULL) { + npwp->nw_head = m->m_nextpkt; + m->m_nextpkt = NULL; + if (npwp->nw_head == NULL) + npwp->nw_tail = NULL; + npwp->nw_len--; + m_freem(m); } + KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__)); + KASSERT(npwp->nw_len == 0, ("%s: len", __func__)); } /* - * Call the netisr directly instead of queueing the packet, if possible. + * Remove the registration of a network protocol, which requires clearing + * per-protocol fields across all workstreams, including freeing all mbufs in + * the queues at time of unregister. All work in netisr is briefly suspended + * while this takes place. */ void -netisr_dispatch(int num, struct mbuf *m) -{ - struct netisr *ni; - - isrstat.isrs_count++; /* XXX redundant */ - KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), - ("bad isr %d", num)); - ni = &netisrs[num]; - if (ni->ni_queue == NULL) { - isrstat.isrs_drop++; - m_freem(m); - return; +netisr_unregister(const struct netisr_handler *nhp) +{ + struct netisr_work *npwp; +#ifdef INVARIANTS + const char *name; +#endif + u_int i, proto; + + proto = nhp->nh_proto; +#ifdef INVARIANTS + name = nhp->nh_name; +#endif + KASSERT(proto < NETISR_MAXPROT, + ("%s(%u): protocol too big for %s", __func__, proto, name)); + + NETISR_WLOCK(); + KASSERT(np[proto].np_handler != NULL, + ("%s(%u): protocol not registered for %s", __func__, proto, + name)); + + np[proto].np_name = NULL; + np[proto].np_handler = NULL; + np[proto].np_m2flow = NULL; + np[proto].np_m2cpuid = NULL; + np[proto].np_qlimit = 0; + np[proto].np_policy = 0; + for (i = 0; i < MAXCPU; i++) { + npwp = &nws[i].nws_work[proto]; + netisr_drain_proto(npwp); + bzero(npwp, sizeof(*npwp)); + } + NETISR_WUNLOCK(); +} + +/* + * Look up the workstream given a packet and source identifier. Do this by + * checking the protocol's policy, and optionally call out to the protocol + * for assistance if required. + */ +static struct mbuf * +netisr_select_cpuid(struct netisr_proto *npp, uintptr_t source, + struct mbuf *m, u_int *cpuidp) +{ + struct ifnet *ifp; + + NETISR_LOCK_ASSERT(); + + /* + * In the event we have only one worker, shortcut and deliver to it + * without further ado. + */ + if (nws_count == 1) { + *cpuidp = nws_array[0]; + return (m); } /* - * Directly dispatch handling of this packet, if permitted by global - * policy. Source ordering is maintained by virtue of callers - * consistently calling one of queued or direct dispatch. + * What happens next depends on the policy selected by the protocol. + * If we want to support per-interface policies, we should do that + * here first. */ - if (netisr_direct) { - isrstat.isrs_directed++; - ni->ni_handler(m); + switch (npp->np_policy) { + case NETISR_POLICY_CPU: + return (npp->np_m2cpuid(m, source, cpuidp)); + + case NETISR_POLICY_FLOW: + if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) { + m = npp->np_m2flow(m, source); + if (m == NULL) + return (NULL); + } + if (m->m_flags & M_FLOWID) { + *cpuidp = + netisr_default_flow2cpu(m->m_pkthdr.flowid); + return (m); + } + /* FALLTHROUGH */ + + case NETISR_POLICY_SOURCE: + ifp = m->m_pkthdr.rcvif; + if (ifp != NULL) + *cpuidp = nws_array[(ifp->if_index + source) % + nws_count]; + else + *cpuidp = nws_array[source % nws_count]; + return (m); + + default: + panic("%s: invalid policy %u for %s", __func__, + npp->np_policy, npp->np_name); + } +} + +/* + * Process packets associated with a workstream and protocol. For reasons of + * fairness, we process up to one complete netisr queue at a time, moving the + * queue to a stack-local queue for processing, but do not loop refreshing + * from the global queue. The caller is responsible for deciding whether to + * loop, and for setting the NWS_RUNNING flag. The passed workstream will be + * locked on entry and relocked before return, but will be released while + * processing. The number of packets processed is returned. + */ +static u_int +netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto) +{ + struct netisr_work local_npw, *npwp; + u_int handled; + struct mbuf *m; + + NETISR_LOCK_ASSERT(); + NWS_LOCK_ASSERT(nwsp); + + KASSERT(nwsp->nws_flags & NWS_RUNNING, + ("%s(%u): not running", __func__, proto)); + KASSERT(proto >= 0 && proto < NETISR_MAXPROT, + ("%s(%u): invalid proto\n", __func__, proto)); + + npwp = &nwsp->nws_work[proto]; + if (npwp->nw_len == 0) + return (0); + + /* + * Move the global work queue to a thread-local work queue. + * + * Notice that this means the effective maximum length of the queue + * is actually twice that of the maximum queue length specified in + * the protocol registration call. + */ + handled = npwp->nw_len; + local_npw = *npwp; + npwp->nw_head = NULL; + npwp->nw_tail = NULL; + npwp->nw_len = 0; + nwsp->nws_pendingbits &= ~(1 << proto); + NWS_UNLOCK(nwsp); + while ((m = local_npw.nw_head) != NULL) { + local_npw.nw_head = m->m_nextpkt; + m->m_nextpkt = NULL; + if (local_npw.nw_head == NULL) + local_npw.nw_tail = NULL; + local_npw.nw_len--; + VNET_ASSERT(m->m_pkthdr.rcvif != NULL); + CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); + np[proto].np_handler(m); + CURVNET_RESTORE(); + } + KASSERT(local_npw.nw_len == 0, + ("%s(%u): len %u", __func__, proto, local_npw.nw_len)); + NWS_LOCK(nwsp); + npwp->nw_handled += handled; + return (handled); +} + +/* + * SWI handler for netisr -- processes prackets in a set of workstreams that + * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already + * being direct dispatched, go back to sleep and wait for the dispatching + * thread to wake us up again. + */ +static void +swi_net(void *arg) +{ +#ifdef NETISR_LOCKING + struct rm_priotracker tracker; +#endif + struct netisr_workstream *nwsp; + u_int bits, prot; + + nwsp = arg; + +#ifdef DEVICE_POLLING + KASSERT(nws_count == 1, + ("%s: device_polling but nws_count != 1", __func__)); + netisr_poll(); +#endif +#ifdef NETISR_LOCKING + NETISR_RLOCK(&tracker); +#endif + NWS_LOCK(nwsp); + KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running")); + if (nwsp->nws_flags & NWS_DISPATCHING) + goto out; + nwsp->nws_flags |= NWS_RUNNING; + nwsp->nws_flags &= ~NWS_SCHEDULED; + while ((bits = nwsp->nws_pendingbits) != 0) { + while ((prot = ffs(bits)) != 0) { + prot--; + bits &= ~(1 << prot); + (void)netisr_process_workstream_proto(nwsp, prot); + } + } + nwsp->nws_flags &= ~NWS_RUNNING; +out: + NWS_UNLOCK(nwsp); +#ifdef NETISR_LOCKING + NETISR_RUNLOCK(&tracker); +#endif +#ifdef DEVICE_POLLING + netisr_pollmore(); +#endif +} + +static int +netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto, + struct netisr_work *npwp, struct mbuf *m, int *dosignalp) +{ + + NWS_LOCK_ASSERT(nwsp); + + *dosignalp = 0; + if (npwp->nw_len < npwp->nw_qlimit) { + m->m_nextpkt = NULL; + if (npwp->nw_head == NULL) { + npwp->nw_head = m; + npwp->nw_tail = m; + } else { + npwp->nw_tail->m_nextpkt = m; + npwp->nw_tail = m; + } + npwp->nw_len++; + if (npwp->nw_len > npwp->nw_watermark) + npwp->nw_watermark = npwp->nw_len; + nwsp->nws_pendingbits |= (1 << proto); + if (!(nwsp->nws_flags & + (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) { + nwsp->nws_flags |= NWS_SCHEDULED; + *dosignalp = 1; /* Defer until unlocked. */ + } + npwp->nw_queued++; + return (0); } else { - isrstat.isrs_deferred++; - if (IF_HANDOFF(ni->ni_queue, m, NULL)) - schednetisr(num); + npwp->nw_qdrops++; + return (ENOBUFS); } } +static int +netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid) +{ + struct netisr_workstream *nwsp; + struct netisr_work *npwp; + int dosignal, error; + +#ifdef NETISR_LOCKING + NETISR_LOCK_ASSERT(); +#endif + KASSERT(cpuid < MAXCPU, ("%s: cpuid too big (%u, %u)", __func__, + cpuid, MAXCPU)); + + dosignal = 0; + error = 0; + nwsp = &nws[cpuid]; + npwp = &nwsp->nws_work[proto]; + NWS_LOCK(nwsp); + error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal); + NWS_UNLOCK(nwsp); + if (dosignal) + NWS_SIGNAL(nwsp); + return (error); +} + +int +netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) +{ +#ifdef NETISR_LOCKING + struct rm_priotracker tracker; +#endif + u_int cpuid; + int error; + + KASSERT(proto < NETISR_MAXPROT, + ("%s: invalid proto %u", __func__, proto)); + +#ifdef NETISR_LOCKING + NETISR_RLOCK(&tracker); +#endif + KASSERT(np[proto].np_handler != NULL, + ("%s: invalid proto %u", __func__, proto)); + + m = netisr_select_cpuid(&np[proto], source, m, &cpuid); + if (m != NULL) + error = netisr_queue_internal(proto, m, cpuid); + else + error = ENOBUFS; +#ifdef NETISR_LOCKING + NETISR_RUNLOCK(&tracker); +#endif + return (error); +} + +int +netisr_queue(u_int proto, struct mbuf *m) +{ + + return (netisr_queue_src(proto, 0, m)); +} + /* - * Same as above, but always queue. - * This is either used in places where we are not confident that - * direct dispatch is possible, or where queueing is required. - * It returns (0) on success and ERRNO on failure. On failure the - * mbuf has been free'd. + * Dispatch a packet for netisr processing, direct dispatch permitted by + * calling context. */ int -netisr_queue(int num, struct mbuf *m) -{ - struct netisr *ni; - - KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), - ("bad isr %d", num)); - ni = &netisrs[num]; - if (ni->ni_queue == NULL) { - isrstat.isrs_drop++; - m_freem(m); - return (ENXIO); +netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) +{ +#ifdef NETISR_LOCKING + struct rm_priotracker tracker; +#endif + struct netisr_workstream *nwsp; + struct netisr_work *npwp; + int dosignal, error; + u_int cpuid; + + /* + * If direct dispatch is entirely disabled, fall back on queueing. + */ + if (!netisr_direct) + return (netisr_queue_src(proto, source, m)); + + KASSERT(proto < NETISR_MAXPROT, + ("%s: invalid proto %u", __func__, proto)); +#ifdef NETISR_LOCKING + NETISR_RLOCK(&tracker); +#endif + KASSERT(np[proto].np_handler != NULL, + ("%s: invalid proto %u", __func__, proto)); + + /* + * If direct dispatch is forced, then unconditionally dispatch + * without a formal CPU selection. Borrow the current CPU's stats, + * even if there's no worker on it. In this case we don't update + * nws_flags because all netisr processing will be source ordered due + * to always being forced to directly dispatch. + */ + if (netisr_direct_force) { + nwsp = &nws[curcpu]; + npwp = &nwsp->nws_work[proto]; + NWS_LOCK(nwsp); + npwp->nw_dispatched++; + npwp->nw_handled++; + NWS_UNLOCK(nwsp); + np[proto].np_handler(m); + error = 0; + goto out_unlock; } - isrstat.isrs_queued++; - if (!IF_HANDOFF(ni->ni_queue, m, NULL)) - return (ENOBUFS); /* IF_HANDOFF has free'd the mbuf */ - schednetisr(num); - return (0); + + /* + * Otherwise, we execute in a hybrid mode where we will try to direct + * dispatch if we're on the right CPU and the netisr worker isn't + * already running. + */ + m = netisr_select_cpuid(&np[proto], source, m, &cpuid); + if (m == NULL) { + error = ENOBUFS; + goto out_unlock; + } + sched_pin(); + if (cpuid != curcpu) + goto queue_fallback; + nwsp = &nws[cpuid]; + npwp = &nwsp->nws_work[proto]; + + /*- + * We are willing to direct dispatch only if three conditions hold: + * + * (1) The netisr worker isn't already running, + * (2) Another thread isn't already directly dispatching, and + * (3) The netisr hasn't already been woken up. + */ + NWS_LOCK(nwsp); + if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) { + error = netisr_queue_workstream(nwsp, proto, npwp, m, + &dosignal); + NWS_UNLOCK(nws); + if (dosignal) + NWS_SIGNAL(nwsp); + goto out_unpin; + } + + /* + * The current thread is now effectively the netisr worker, so set + * the dispatching flag to prevent concurrent processing of the + * stream from another thread (even the netisr worker), which could + * otherwise lead to effective misordering of the stream. + */ + nwsp->nws_flags |= NWS_DISPATCHING; + NWS_UNLOCK(nwsp); + np[proto].np_handler(m); + NWS_LOCK(nwsp); + nwsp->nws_flags &= ~NWS_DISPATCHING; + npwp->nw_handled++; + npwp->nw_hybrid_dispatched++; + + /* + * If other work was enqueued by another thread while we were direct + * dispatching, we need to signal the netisr worker to do that work. + * In the future, we might want to do some of that work in the + * current thread, rather than trigger further context switches. If + * so, we'll want to establish a reasonable bound on the work done in + * the "borrowed" context. + */ + if (nwsp->nws_pendingbits != 0) { + nwsp->nws_flags |= NWS_SCHEDULED; + dosignal = 1; + } else + dosignal = 0; + NWS_UNLOCK(nwsp); + if (dosignal) + NWS_SIGNAL(nwsp); + error = 0; + goto out_unpin; + +queue_fallback: + error = netisr_queue_internal(proto, m, cpuid); +out_unpin: + sched_unpin(); +out_unlock: +#ifdef NETISR_LOCKING + NETISR_RUNLOCK(&tracker); +#endif + return (error); +} + +int +netisr_dispatch(u_int proto, struct mbuf *m) +{ + + return (netisr_dispatch_src(proto, 0, m)); +} + +#ifdef DEVICE_POLLING +/* + * Kernel polling borrows a netisr thread to run interface polling in; this + * function allows kernel polling to request that the netisr thread be + * scheduled even if no packets are pending for protocols. + */ +void +netisr_sched_poll(void) +{ + struct netisr_workstream *nwsp; + + nwsp = &nws[nws_array[0]]; + NWS_SIGNAL(nwsp); } +#endif static void -swi_net(void *dummy) +netisr_start_swi(u_int cpuid, struct pcpu *pc) +{ + char swiname[12]; + struct netisr_workstream *nwsp; + int error; + + nwsp = &nws[cpuid]; + mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF); + nwsp->nws_cpu = cpuid; + snprintf(swiname, sizeof(swiname), "netisr %u", cpuid); + error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp, + SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie); + if (error) + panic("%s: swi_add %d", __func__, error); + pc->pc_netisr = nwsp->nws_intr_event; + if (netisr_bindthreads) { + error = intr_event_bind(nwsp->nws_intr_event, cpuid); + if (error != 0) + printf("%s: cpu %u: intr_event_bind: %d", __func__, + cpuid, error); + } + NETISR_WLOCK(); + nws_array[nws_count] = nwsp->nws_cpu; + nws_count++; + NETISR_WUNLOCK(); +} + +/* + * Initialize the netisr subsystem. We rely on BSS and static initialization + * of most fields in global data structures. + * + * Start a worker thread for the boot CPU so that we can support network + * traffic immediately in case the network stack is used before additional + * CPUs are started (for example, diskless boot). + */ +static void +netisr_init(void *arg) { - struct netisr *ni; - u_int bits; - int i; + + KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__)); + + NETISR_LOCK_INIT(); + if (netisr_maxthreads < 1) + netisr_maxthreads = 1; + if (netisr_maxthreads > MAXCPU) + netisr_maxthreads = MAXCPU; + if (netisr_defaultqlimit > netisr_maxqlimit) + netisr_defaultqlimit = netisr_maxqlimit; #ifdef DEVICE_POLLING - const int polling = 1; -#else - const int polling = 0; + /* + * The device polling code is not yet aware of how to deal with + * multiple netisr threads, so for the time being compiling in device + * polling disables parallel netisr workers. + */ + netisr_maxthreads = 1; + netisr_bindthreads = 0; #endif - do { - bits = atomic_readandclear_int(&netisr); - if (bits == 0) - break; - while ((i = ffs(bits)) != 0) { - isrstat.isrs_swi_count++; - i--; - bits &= ~(1 << i); - ni = &netisrs[i]; - if (ni->ni_handler == NULL) { - printf("swi_net: unregistered isr %d.\n", i); - continue; - } - if (ni->ni_queue == NULL) - ni->ni_handler(NULL); - else - netisr_processqueue(ni); - } - } while (polling); + netisr_start_swi(curcpu, pcpu_find(curcpu)); } +SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL); +/* + * Start worker threads for additional CPUs. No attempt to gracefully handle + * work reassignment, we don't yet support dynamic reconfiguration. + */ static void -start_netisr(void *dummy) +netisr_start(void *arg) { + struct pcpu *pc; - if (swi_add(NULL, "net", swi_net, NULL, SWI_NET, INTR_MPSAFE, &net_ih)) - panic("start_netisr"); + SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + if (nws_count >= netisr_maxthreads) + break; + /* XXXRW: Is skipping absent CPUs still required here? */ + if (CPU_ABSENT(pc->pc_cpuid)) + continue; + /* Worker will already be present for boot CPU. */ + if (pc->pc_netisr != NULL) + continue; + netisr_start_swi(pc->pc_cpuid, pc); + } } -SYSINIT(start_netisr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_netisr, NULL); +SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL); + +#ifdef DDB +DB_SHOW_COMMAND(netisr, db_show_netisr) +{ + struct netisr_workstream *nwsp; + struct netisr_work *nwp; + int first, proto; + u_int cpu; + + db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto", + "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue"); + for (cpu = 0; cpu < MAXCPU; cpu++) { + nwsp = &nws[cpu]; + if (nwsp->nws_intr_event == NULL) + continue; + first = 1; + for (proto = 0; proto < NETISR_MAXPROT; proto++) { + if (np[proto].np_handler == NULL) + continue; + nwp = &nwsp->nws_work[proto]; + if (first) { + db_printf("%3d ", cpu); + first = 0; + } else + db_printf("%3s ", ""); + db_printf( + "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n", + np[proto].np_name, nwp->nw_len, + nwp->nw_watermark, nwp->nw_qlimit, + nwp->nw_dispatched, nwp->nw_hybrid_dispatched, + nwp->nw_qdrops, nwp->nw_queued); + } + } +} +#endif diff --git a/sys/net/netisr.h b/sys/net/netisr.h index 7929302..f299b2e 100644 --- a/sys/net/netisr.h +++ b/sys/net/netisr.h @@ -1,6 +1,6 @@ /*- - * Copyright (c) 1980, 1986, 1989, 1993 - * The Regents of the University of California. All rights reserved. + * Copyright (c) 2007-2009 Robert N. M. Watson + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -10,14 +10,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) @@ -26,20 +23,18 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)netisr.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_NETISR_H_ #define _NET_NETISR_H_ +#ifdef _KERNEL /* * The netisr (network interrupt service routine) provides a deferred * execution evironment in which (generally inbound) network processing can - * take place. Protocols register handlers and, optionally, packet queues; - * when packets are delivered to the queue, the protocol handler will be - * executed directly, or via deferred dispatch depending on the - * circumstances. + * take place. Protocols register handlers which will be executed directly, + * or via deferred dispatch, depending on the circumstances. * * Historically, this was implemented by the BSD software ISR facility; it is * now implemented via a software ithread (SWI). @@ -53,37 +48,108 @@ #define NETISR_ATALK1 17 /* Appletalk phase 1 */ #define NETISR_ARP 18 /* same as AF_LINK */ #define NETISR_IPX 23 /* same as AF_IPX */ +#define NETISR_ETHER 24 /* ethernet input */ #define NETISR_IPV6 27 #define NETISR_NATM 28 #define NETISR_POLLMORE 31 /* polling callback, must be last */ -#ifndef LOCORE -#ifdef _KERNEL +/*- + * Protocols express ordering constraints and affinity preferences by + * implementing one or neither of nh_m2flow and nh_m2cpuid, which are used by + * netisr to determine which per-CPU workstream to assign mbufs to. + * + * The following policies may be used by protocols: + * + * NETISR_POLICY_SOURCE - netisr should maintain source ordering without + * advice from the protocol. netisr will ignore any + * flow IDs present on the mbuf for the purposes of + * work placement. + * + * NETISR_POLICY_FLOW - netisr should maintain flow ordering as defined by + * the mbuf header flow ID field. If the protocol + * implements nh_m2flow, then netisr will query the + * protocol in the event that the mbuf doesn't have a + * flow ID, falling back on source ordering. + * + * NETISR_POLICY_CPU - netisr will delegate all work placement decisions to + * the protocol, querying nh_m2cpuid for each packet. + * + * Protocols might make decisions about work placement based on an existing + * calculated flow ID on the mbuf, such as one provided in hardware, the + * receive interface pointed to by the mbuf (if any), the optional source + * identifier passed at some dispatch points, or even parse packet headers to + * calculate a flow. Both protocol handlers may return a new mbuf pointer + * for the chain, or NULL if the packet proves invalid or m_pullup() fails. + * + * XXXRW: If we eventually support dynamic reconfiguration, there should be + * protocol handlers to notify them of CPU configuration changes so that they + * can rebalance work. + */ +struct mbuf; +typedef void netisr_handler_t (struct mbuf *m); +typedef struct mbuf *netisr_m2cpuid_t(struct mbuf *m, uintptr_t source, + u_int *cpuid); +typedef struct mbuf *netisr_m2flow_t(struct mbuf *m, uintptr_t source); + +#define NETISR_POLICY_SOURCE 1 /* Maintain source ordering. */ +#define NETISR_POLICY_FLOW 2 /* Maintain flow ordering. */ +#define NETISR_POLICY_CPU 3 /* Protocol determines CPU placement. */ + +/* + * Data structure describing a protocol handler. + */ +struct netisr_handler { + const char *nh_name; /* Character string protocol name. */ + netisr_handler_t *nh_handler; /* Protocol handler. */ + netisr_m2flow_t *nh_m2flow; /* Query flow for untagged packet. */ + netisr_m2cpuid_t *nh_m2cpuid; /* Query CPU to process mbuf on. */ + u_int nh_proto; /* Integer protocol ID. */ + u_int nh_qlimit; /* Maximum per-CPU queue depth. */ + u_int nh_policy; /* Work placement policy. */ + u_int nh_ispare[5]; /* For future use. */ + void *nh_pspare[4]; /* For future use. */ +}; -void legacy_setsoftnet(void); +/* + * Register, unregister, and other netisr handler management functions. + */ +void netisr_clearqdrops(const struct netisr_handler *nhp); +void netisr_getqdrops(const struct netisr_handler *nhp, + u_int64_t *qdropsp); +void netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp); +void netisr_register(const struct netisr_handler *nhp); +int netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit); +void netisr_unregister(const struct netisr_handler *nhp); -extern volatile unsigned int netisr; /* scheduling bits for network */ -#define schednetisr(anisr) do { \ - atomic_set_rel_int(&netisr, 1 << (anisr)); \ - legacy_setsoftnet(); \ -} while (0) -/* used to atomically schedule multiple netisrs */ -#define schednetisrbits(isrbits) do { \ - atomic_set_rel_int(&netisr, isrbits); \ - legacy_setsoftnet(); \ -} while (0) +/* + * Process a packet destined for a protocol, and attempt direct dispatch. + * Supplemental source ordering information can be passed using the _src + * variant. + */ +int netisr_dispatch(u_int proto, struct mbuf *m); +int netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m); +int netisr_queue(u_int proto, struct mbuf *m); +int netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m); -struct ifqueue; -struct mbuf; +/* + * Provide a default implementation of "map an ID to a CPU ID". + */ +u_int netisr_default_flow2cpu(u_int flowid); -typedef void netisr_t (struct mbuf *); - -void netisr_dispatch(int, struct mbuf *); -int netisr_queue(int, struct mbuf *); -void netisr_register(int, netisr_t *, struct ifqueue *, int); -void netisr_unregister(int); +/* + * Utility routines to return the number of CPUs participting in netisr, and + * to return a mapping from a number to a CPU ID that can be used with the + * scheduler. + */ +u_int netisr_get_cpucount(void); +u_int netisr_get_cpuid(u_int cpunumber); -#endif -#endif +/* + * Interfaces between DEVICE_POLLING and netisr. + */ +void netisr_sched_poll(void); +void netisr_poll(void); +void netisr_pollmore(void); -#endif +#endif /* !_KERNEL */ +#endif /* !_NET_NETISR_H_ */ diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 125db40..c9f76af 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -90,11 +90,7 @@ MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF); #define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx) #define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED) -static struct ifqueue rtsintrq; - SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, ""); -SYSCTL_INT(_net_route, OID_AUTO, netisr_maxqlen, CTLFLAG_RW, - &rtsintrq.ifq_maxlen, 0, "maximum routing socket dispatch queue length"); struct walkarg { int w_tmemsize; @@ -119,16 +115,38 @@ static void rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out); static void rt_dispatch(struct mbuf *, const struct sockaddr *); +static struct netisr_handler rtsock_nh = { + .nh_name = "rtsock", + .nh_handler = rts_input, + .nh_proto = NETISR_ROUTE, + .nh_policy = NETISR_POLICY_SOURCE, +}; + +static int +sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS) +{ + int error, qlimit; + + netisr_getqlimit(&rtsock_nh, &qlimit); + error = sysctl_handle_int(oidp, &qlimit, 0, req); + if (error || !req->newptr) + return (error); + if (qlimit < 1) + return (EINVAL); + return (netisr_setqlimit(&rtsock_nh, qlimit)); +} +SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen, CTLTYPE_INT|CTLFLAG_RW, + 0, 0, sysctl_route_netisr_maxqlen, "I", + "maximum routing socket dispatch queue length"); + static void rts_init(void) { int tmp; - rtsintrq.ifq_maxlen = 256; if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp)) - rtsintrq.ifq_maxlen = tmp; - mtx_init(&rtsintrq.ifq_mtx, "rts_inq", NULL, MTX_DEF); - netisr_register(NETISR_ROUTE, rts_input, &rtsintrq, 0); + rtsock_nh.nh_qlimit = tmp; + netisr_register(&rtsock_nh); } SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0); diff --git a/sys/netatalk/ddp_usrreq.c b/sys/netatalk/ddp_usrreq.c index 7ea8f4f..1c2f601 100644 --- a/sys/netatalk/ddp_usrreq.c +++ b/sys/netatalk/ddp_usrreq.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2004-2005 Robert N. M. Watson + * Copyright (c) 2004-2009 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -70,7 +70,26 @@ static u_long ddp_sendspace = DDP_MAXSZ; /* Max ddp size + 1 (ddp_type) */ static u_long ddp_recvspace = 10 * (587 + sizeof(struct sockaddr_at)); -static struct ifqueue atintrq1, atintrq2, aarpintrq; +static const struct netisr_handler atalk1_nh = { + .nh_name = "atalk1", + .nh_handler = at1intr, + .nh_proto = NETISR_ATALK1, + .nh_policy = NETISR_POLICY_SOURCE, +}; + +static const struct netisr_handler atalk2_nh = { + .nh_name = "atalk2", + .nh_handler = at2intr, + .nh_proto = NETISR_ATALK2, + .nh_policy = NETISR_POLICY_SOURCE, +}; + +static const struct netisr_handler aarp_nh = { + .nh_name = "aarp", + .nh_handler = aarpintr, + .nh_proto = NETISR_AARP, + .nh_policy = NETISR_POLICY_SOURCE, +}; static int ddp_attach(struct socket *so, int proto, struct thread *td) @@ -256,16 +275,10 @@ void ddp_init(void) { - atintrq1.ifq_maxlen = IFQ_MAXLEN; - atintrq2.ifq_maxlen = IFQ_MAXLEN; - aarpintrq.ifq_maxlen = IFQ_MAXLEN; - mtx_init(&atintrq1.ifq_mtx, "at1_inq", NULL, MTX_DEF); - mtx_init(&atintrq2.ifq_mtx, "at2_inq", NULL, MTX_DEF); - mtx_init(&aarpintrq.ifq_mtx, "aarp_inq", NULL, MTX_DEF); DDP_LIST_LOCK_INIT(); - netisr_register(NETISR_ATALK1, at1intr, &atintrq1, 0); - netisr_register(NETISR_ATALK2, at2intr, &atintrq2, 0); - netisr_register(NETISR_AARP, aarpintr, &aarpintrq, 0); + netisr_register(&atalk1_nh); + netisr_register(&atalk2_nh); + netisr_register(&aarp_nh); } #if 0 diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index d24b459..734dd02 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -96,8 +96,6 @@ static int arp_proxyall; SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, arpt_keep, 0, "ARP entry lifetime in seconds"); -static struct ifqueue arpintrq; - SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW, arp_maxtries, 0, "ARP resolution attempts before returning error"); @@ -118,6 +116,13 @@ static void arptimer(void *); static void in_arpinput(struct mbuf *); #endif +static const struct netisr_handler arp_nh = { + .nh_name = "arp", + .nh_handler = arpintr, + .nh_proto = NETISR_ARP, + .nh_policy = NETISR_POLICY_SOURCE, +}; + #ifndef VIMAGE_GLOBALS static const vnet_modinfo_t vnet_arp_modinfo = { .vmi_id = VNET_MOD_ARP, @@ -823,8 +828,6 @@ arp_init(void) arp_iattach(NULL); #endif - arpintrq.ifq_maxlen = 50; - mtx_init(&arpintrq.ifq_mtx, "arp_inq", NULL, MTX_DEF); - netisr_register(NETISR_ARP, arpintr, &arpintrq, 0); + netisr_register(&arp_nh); } SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0); diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c index 1a311f2..b5b6302 100644 --- a/sys/netinet/igmp.c +++ b/sys/netinet/igmp.c @@ -144,6 +144,13 @@ static int sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS); static vnet_attach_fn vnet_igmp_iattach; static vnet_detach_fn vnet_igmp_idetach; +static const struct netisr_handler igmp_nh = { + .nh_name = "igmp", + .nh_handler = igmp_intr, + .nh_proto = NETISR_IGMP, + .nh_policy = NETISR_POLICY_SOURCE, +}; + /* * System-wide globals. * @@ -190,11 +197,6 @@ struct mbuf *m_raopt; /* Router Alert option */ MALLOC_DEFINE(M_IGMP, "igmp", "igmp state"); /* - * Global netisr output queue. - */ -struct ifqueue igmpoq; - -/* * VIMAGE-wide globals. * * The IGMPv3 timers themselves need to run per-image, however, @@ -3537,12 +3539,9 @@ igmp_sysinit(void) IGMP_LOCK_INIT(); - mtx_init(&igmpoq.ifq_mtx, "igmpoq_mtx", NULL, MTX_DEF); - IFQ_SET_MAXLEN(&igmpoq, IFQ_MAXLEN); - m_raopt = igmp_ra_alloc(); - netisr_register(NETISR_IGMP, igmp_intr, &igmpoq, 0); + netisr_register(&igmp_nh); } static void @@ -3551,8 +3550,7 @@ igmp_sysuninit(void) CTR1(KTR_IGMPV3, "%s: tearing down", __func__); - netisr_unregister(NETISR_IGMP); - mtx_destroy(&igmpoq.ifq_mtx); + netisr_unregister(&igmp_nh); m_free(m_raopt); m_raopt = NULL; diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index 3bd3049..5e71d4d 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -472,7 +472,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, SOCK_UNLOCK(so); #endif /* Send packet to input processing via netisr */ - netisr_queue(NETISR_IP, m); + netisr_queue_src(NETISR_IP, (uintptr_t)so, m); } return error; diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 48143d7..2859b8c 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -164,18 +164,17 @@ SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, struct pfil_head inet_pfil_hook; /* Packet filter hooks */ -static struct ifqueue ipintrq; -static int ipqmaxlen = IFQ_MAXLEN; +static struct netisr_handler ip_nh = { + .nh_name = "ip", + .nh_handler = ip_input, + .nh_proto = NETISR_IP, + .nh_policy = NETISR_POLICY_FLOW, +}; extern struct domain inetdomain; extern struct protosw inetsw[]; u_char ip_protox[IPPROTO_MAX]; -SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW, - &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue"); -SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD, - &ipintrq.ifq_drops, 0, - "Number of packets dropped from the IP input queue"); SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW, ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)"); @@ -249,6 +248,44 @@ static void vnet_inet_register() SYSINIT(inet, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, vnet_inet_register, 0); #endif +static int +sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) +{ + int error, qlimit; + + netisr_getqlimit(&ip_nh, &qlimit); + error = sysctl_handle_int(oidp, &qlimit, 0, req); + if (error || !req->newptr) + return (error); + if (qlimit < 1) + return (EINVAL); + return (netisr_setqlimit(&ip_nh, qlimit)); +} +SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, + CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I", + "Maximum size of the IP input queue"); + +static int +sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) +{ + u_int64_t qdrops_long; + int error, qdrops; + + netisr_getqdrops(&ip_nh, &qdrops_long); + qdrops = qdrops_long; + error = sysctl_handle_int(oidp, &qdrops, 0, req); + if (error || !req->newptr) + return (error); + if (qdrops != 0) + return (EINVAL); + netisr_clearqdrops(&ip_nh); + return (0); +} + +SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, + CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I", + "Number of packets dropped from the IP input queue"); + /* * IP initialization: fill in IP protocol switch table. * All protocols not implemented in kernel go to raw IP protocol handler. @@ -347,10 +384,7 @@ ip_init(void) /* Initialize various other remaining things. */ IPQ_LOCK_INIT(); - ipintrq.ifq_maxlen = ipqmaxlen; - mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF); - netisr_register(NETISR_IP, ip_input, &ipintrq, 0); - + netisr_register(&ip_nh); ip_ft = flowtable_alloc(ip_output_flowtable_size, FL_PCPU); } diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 7498860..3847e2f 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -120,7 +120,13 @@ __FBSDID("$FreeBSD$"); extern struct domain inet6domain; u_char ip6_protox[IPPROTO_MAX]; -static struct ifqueue ip6intrq; + +static struct netisr_handler ip6_nh = { + .nh_name = "ip6", + .nh_handler = ip6_input, + .nh_proto = NETISR_IPV6, + .nh_policy = NETISR_POLICY_FLOW, +}; #ifndef VIMAGE #ifndef VIMAGE_GLOBALS @@ -129,7 +135,6 @@ struct vnet_inet6 vnet_inet6_0; #endif #ifdef VIMAGE_GLOBALS -static int ip6qmaxlen; struct in6_ifaddr *in6_ifaddr; struct ip6stat ip6stat; @@ -186,7 +191,6 @@ ip6_init(void) struct ip6protosw *pr; int i; - V_ip6qmaxlen = IFQ_MAXLEN; V_in6_maxmtu = 0; #ifdef IP6_AUTO_LINKLOCAL V_ip6_auto_linklocal = IP6_AUTO_LINKLOCAL; @@ -296,9 +300,7 @@ ip6_init(void) printf("%s: WARNING: unable to register pfil hook, " "error %d\n", __func__, i); - ip6intrq.ifq_maxlen = V_ip6qmaxlen; /* XXX */ - mtx_init(&ip6intrq.ifq_mtx, "ip6_inq", NULL, MTX_DEF); - netisr_register(NETISR_IPV6, ip6_input, &ip6intrq, 0); + netisr_register(&ip6_nh); } static int diff --git a/sys/netinet6/vinet6.h b/sys/netinet6/vinet6.h index 0d08cac..f0ea4b9 100644 --- a/sys/netinet6/vinet6.h +++ b/sys/netinet6/vinet6.h @@ -118,7 +118,6 @@ struct vnet_inet6 { int _icmp6_nodeinfo; int _udp6_sendspace; int _udp6_recvspace; - int _ip6qmaxlen; int _ip6_prefer_tempaddr; int _nd6_prune; @@ -224,7 +223,6 @@ extern struct vnet_inet6 vnet_inet6_0; #define V_ip6_use_tempaddr VNET_INET6(ip6_use_tempaddr) #define V_ip6_v6only VNET_INET6(ip6_v6only) #define V_ip6q VNET_INET6(ip6q) -#define V_ip6qmaxlen VNET_INET6(ip6qmaxlen) #define V_ip6stat VNET_INET6(ip6stat) #define V_ip6stealth VNET_INET6(ip6stealth) #define V_llinfo_nd6 VNET_INET6(llinfo_nd6) diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c index 018e0c5..62ded7b 100644 --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -481,7 +481,7 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, /* * Re-dispatch via software interrupt. */ - if ((error = netisr_queue(NETISR_IP, m))) { + if ((error = netisr_queue_src(NETISR_IP, (uintptr_t)sav, m))) { IPSEC_ISTAT(sproto, V_espstat.esps_qfull, V_ahstat.ahs_qfull, V_ipcompstat.ipcomps_qfull); diff --git a/sys/netipx/ipx_input.c b/sys/netipx/ipx_input.c index 9f9b8d4..2a0980a 100644 --- a/sys/netipx/ipx_input.c +++ b/sys/netipx/ipx_input.c @@ -100,6 +100,11 @@ static int ipxnetbios = 0; SYSCTL_INT(_net_ipx, OID_AUTO, ipxnetbios, CTLFLAG_RW, &ipxnetbios, 0, "Propagate netbios over ipx"); +static int ipx_do_route(struct ipx_addr *src, struct route *ro); +static void ipx_undo_route(struct route *ro); +static void ipx_forward(struct mbuf *m); +static void ipxintr(struct mbuf *m); + const union ipx_net ipx_zeronet; const union ipx_host ipx_zerohost; @@ -119,16 +124,15 @@ struct mtx ipxpcb_list_mtx; struct ipxpcbhead ipxpcb_list; struct ipxpcbhead ipxrawpcb_list; -static int ipxqmaxlen = IFQ_MAXLEN; -static struct ifqueue ipxintrq; +static struct netisr_handler ipx_nh = { + .nh_name = "ipx", + .nh_handler = ipxintr, + .nh_proto = NETISR_IPX, + .nh_policy = NETISR_POLICY_SOURCE, +}; long ipx_pexseq; /* Locked with ipxpcb_list_mtx. */ -static int ipx_do_route(struct ipx_addr *src, struct route *ro); -static void ipx_undo_route(struct route *ro); -static void ipx_forward(struct mbuf *m); -static void ipxintr(struct mbuf *m); - /* * IPX initialization. */ @@ -151,9 +155,7 @@ ipx_init(void) ipx_hostmask.sipx_addr.x_net = ipx_broadnet; ipx_hostmask.sipx_addr.x_host = ipx_broadhost; - ipxintrq.ifq_maxlen = ipxqmaxlen; - mtx_init(&ipxintrq.ifq_mtx, "ipx_inq", NULL, MTX_DEF); - netisr_register(NETISR_IPX, ipxintr, &ipxintrq, 0); + netisr_register(&ipx_nh); } /* diff --git a/sys/netnatm/natm_proto.c b/sys/netnatm/natm_proto.c index 145fb58..d97809b 100644 --- a/sys/netnatm/natm_proto.c +++ b/sys/netnatm/natm_proto.c @@ -88,8 +88,14 @@ static struct domain natmdomain = { .dom_protoswNPROTOSW = &natmsw[sizeof(natmsw)/sizeof(natmsw[0])], }; -static int natmqmaxlen = 1000 /* IFQ_MAXLEN */; /* max # of packets on queue */ -static struct ifqueue natmintrq; +static struct netisr_handler natm_nh = { + .nh_name = "natm", + .nh_handler = natmintr, + .nh_proto = NETISR_NATM, + .nh_qlimit = 1000, + .nh_policy = NETISR_POLICY_SOURCE, +}; + #ifdef NATM_STAT u_int natm_sodropcnt; /* # mbufs dropped due to full sb */ u_int natm_sodropbytes; /* # of bytes dropped */ @@ -101,11 +107,8 @@ static void natm_init(void) { LIST_INIT(&natm_pcbs); - bzero(&natmintrq, sizeof(natmintrq)); - natmintrq.ifq_maxlen = natmqmaxlen; NATM_LOCK_INIT(); - mtx_init(&natmintrq.ifq_mtx, "natm_inq", NULL, MTX_DEF); - netisr_register(NETISR_NATM, natmintr, &natmintrq, 0); + netisr_register(&natm_nh); } DOMAIN_SET(natm); diff --git a/sys/sys/param.h b/sys/sys/param.h index 9defb92..bf3892d 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -57,7 +57,7 @@ * is created, otherwise 1. */ #undef __FreeBSD_version -#define __FreeBSD_version 800095 /* Master, propagated to newvers */ +#define __FreeBSD_version 800096 /* Master, propagated to newvers */ #ifndef LOCORE #include <sys/types.h> diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h index a1052e6..63c3fa3 100644 --- a/sys/sys/pcpu.h +++ b/sys/sys/pcpu.h @@ -86,6 +86,7 @@ struct pcpu { struct vmmeter pc_cnt; /* VM stats counters */ long pc_cp_time[CPUSTATES]; /* statclock ticks */ struct device *pc_device; + void *pc_netisr; /* netisr SWI cookie. */ /* * Stuff for read mostly lock |