diff options
author | rwatson <rwatson@FreeBSD.org> | 2009-06-01 10:41:38 +0000 |
---|---|---|
committer | rwatson <rwatson@FreeBSD.org> | 2009-06-01 10:41:38 +0000 |
commit | 2bab6955606e0d2046018b3a7a9f06775f06b145 (patch) | |
tree | 786f045f4da789d697041f2eeca0ba057458f558 /sys/netinet | |
parent | 3683dec3736f1a8cb93ab52595216d6f60c54ca4 (diff) | |
download | FreeBSD-src-2bab6955606e0d2046018b3a7a9f06775f06b145.zip FreeBSD-src-2bab6955606e0d2046018b3a7a9f06775f06b145.tar.gz |
Reimplement the netisr framework in order to support parallel netisr
threads:
- Support up to one netisr thread per CPU, each processings its own
workstream, or set of per-protocol queues. Threads may be bound
to specific CPUs, or allowed to migrate, based on a global policy.
In the future it would be desirable to support topology-centric
policies, such as "one netisr per package".
- Allow each protocol to advertise an ordering policy, which can
currently be one of:
NETISR_POLICY_SOURCE: packets must maintain ordering with respect to
an implicit or explicit source (such as an interface or socket).
NETISR_POLICY_FLOW: make use of mbuf flow identifiers to place work,
as well as allowing protocols to provide a flow generation function
for mbufs without flow identifers (m2flow). Falls back on
NETISR_POLICY_SOURCE if now flow ID is available.
NETISR_POLICY_CPU: allow protocols to inspect and assign a CPU for
each packet handled by netisr (m2cpuid).
- Provide utility functions for querying the number of workstreams
being used, as well as a mapping function from workstream to CPU ID,
which protocols may use in work placement decisions.
- Add explicit interfaces to get and set per-protocol queue limits, and
get and clear drop counters, which query data or apply changes across
all workstreams.
- Add a more extensible netisr registration interface, in which
protocols declare 'struct netisr_handler' structures for each
registered NETISR_ type. These include name, handler function,
optional mbuf to flow ID function, optional mbuf to CPU ID function,
queue limit, and ordering policy. Padding is present to allow these
to be expanded in the future. If no queue limit is declared, then
a default is used.
- Queue limits are now per-workstream, and raised from the previous
IFQ_MAXLEN default of 50 to 256.
- All protocols are updated to use the new registration interface, and
with the exception of netnatm, default queue limits. Most protocols
register as NETISR_POLICY_SOURCE, except IPv4 and IPv6, which use
NETISR_POLICY_FLOW, and will therefore take advantage of driver-
generated flow IDs if present.
- Formalize a non-packet based interface between interface polling and
the netisr, rather than having polling pretend to be two protocols.
Provide two explicit hooks in the netisr worker for start and end
events for runs: netisr_poll() and netisr_pollmore(), as well as a
function, netisr_sched_poll(), to allow the polling code to schedule
netisr execution. DEVICE_POLLING still embeds single-netisr
assumptions in its implementation, so for now if it is compiled into
the kernel, a single and un-bound netisr thread is enforced
regardless of tunable configuration.
In the default configuration, the new netisr implementation maintains
the same basic assumptions as the previous implementation: a single,
un-bound worker thread processes all deferred work, and direct dispatch
is enabled by default wherever possible.
Performance measurement shows a marginal performance improvement over
the old implementation due to the use of batched dequeue.
An rmlock is used to synchronize use and registration/unregistration
using the framework; currently, synchronized use is disabled
(replicating current netisr policy) due to a measurable 3%-6% hit in
ping-pong micro-benchmarking. It will be enabled once further rmlock
optimization has taken place. However, in practice, netisrs are
rarely registered or unregistered at runtime.
A new man page for netisr will follow, but since one doesn't currently
exist, it hasn't been updated.
This change is not appropriate for MFC, although the polling shutdown
handler should be merged to 7-STABLE.
Bump __FreeBSD_version.
Reviewed by: bz
Diffstat (limited to 'sys/netinet')
-rw-r--r-- | sys/netinet/if_ether.c | 13 | ||||
-rw-r--r-- | sys/netinet/igmp.c | 20 | ||||
-rw-r--r-- | sys/netinet/ip_divert.c | 2 | ||||
-rw-r--r-- | sys/netinet/ip_input.c | 56 |
4 files changed, 63 insertions, 28 deletions
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index d24b459..734dd02 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -96,8 +96,6 @@ static int arp_proxyall; SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, arpt_keep, 0, "ARP entry lifetime in seconds"); -static struct ifqueue arpintrq; - SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW, arp_maxtries, 0, "ARP resolution attempts before returning error"); @@ -118,6 +116,13 @@ static void arptimer(void *); static void in_arpinput(struct mbuf *); #endif +static const struct netisr_handler arp_nh = { + .nh_name = "arp", + .nh_handler = arpintr, + .nh_proto = NETISR_ARP, + .nh_policy = NETISR_POLICY_SOURCE, +}; + #ifndef VIMAGE_GLOBALS static const vnet_modinfo_t vnet_arp_modinfo = { .vmi_id = VNET_MOD_ARP, @@ -823,8 +828,6 @@ arp_init(void) arp_iattach(NULL); #endif - arpintrq.ifq_maxlen = 50; - mtx_init(&arpintrq.ifq_mtx, "arp_inq", NULL, MTX_DEF); - netisr_register(NETISR_ARP, arpintr, &arpintrq, 0); + netisr_register(&arp_nh); } SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0); diff --git a/sys/netinet/igmp.c b/sys/netinet/igmp.c index 1a311f2..b5b6302 100644 --- a/sys/netinet/igmp.c +++ b/sys/netinet/igmp.c @@ -144,6 +144,13 @@ static int sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS); static vnet_attach_fn vnet_igmp_iattach; static vnet_detach_fn vnet_igmp_idetach; +static const struct netisr_handler igmp_nh = { + .nh_name = "igmp", + .nh_handler = igmp_intr, + .nh_proto = NETISR_IGMP, + .nh_policy = NETISR_POLICY_SOURCE, +}; + /* * System-wide globals. * @@ -190,11 +197,6 @@ struct mbuf *m_raopt; /* Router Alert option */ MALLOC_DEFINE(M_IGMP, "igmp", "igmp state"); /* - * Global netisr output queue. - */ -struct ifqueue igmpoq; - -/* * VIMAGE-wide globals. * * The IGMPv3 timers themselves need to run per-image, however, @@ -3537,12 +3539,9 @@ igmp_sysinit(void) IGMP_LOCK_INIT(); - mtx_init(&igmpoq.ifq_mtx, "igmpoq_mtx", NULL, MTX_DEF); - IFQ_SET_MAXLEN(&igmpoq, IFQ_MAXLEN); - m_raopt = igmp_ra_alloc(); - netisr_register(NETISR_IGMP, igmp_intr, &igmpoq, 0); + netisr_register(&igmp_nh); } static void @@ -3551,8 +3550,7 @@ igmp_sysuninit(void) CTR1(KTR_IGMPV3, "%s: tearing down", __func__); - netisr_unregister(NETISR_IGMP); - mtx_destroy(&igmpoq.ifq_mtx); + netisr_unregister(&igmp_nh); m_free(m_raopt); m_raopt = NULL; diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index 3bd3049..5e71d4d 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -472,7 +472,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, SOCK_UNLOCK(so); #endif /* Send packet to input processing via netisr */ - netisr_queue(NETISR_IP, m); + netisr_queue_src(NETISR_IP, (uintptr_t)so, m); } return error; diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 48143d7..2859b8c 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -164,18 +164,17 @@ SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, struct pfil_head inet_pfil_hook; /* Packet filter hooks */ -static struct ifqueue ipintrq; -static int ipqmaxlen = IFQ_MAXLEN; +static struct netisr_handler ip_nh = { + .nh_name = "ip", + .nh_handler = ip_input, + .nh_proto = NETISR_IP, + .nh_policy = NETISR_POLICY_FLOW, +}; extern struct domain inetdomain; extern struct protosw inetsw[]; u_char ip_protox[IPPROTO_MAX]; -SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW, - &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue"); -SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD, - &ipintrq.ifq_drops, 0, - "Number of packets dropped from the IP input queue"); SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW, ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)"); @@ -249,6 +248,44 @@ static void vnet_inet_register() SYSINIT(inet, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, vnet_inet_register, 0); #endif +static int +sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) +{ + int error, qlimit; + + netisr_getqlimit(&ip_nh, &qlimit); + error = sysctl_handle_int(oidp, &qlimit, 0, req); + if (error || !req->newptr) + return (error); + if (qlimit < 1) + return (EINVAL); + return (netisr_setqlimit(&ip_nh, qlimit)); +} +SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, + CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I", + "Maximum size of the IP input queue"); + +static int +sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) +{ + u_int64_t qdrops_long; + int error, qdrops; + + netisr_getqdrops(&ip_nh, &qdrops_long); + qdrops = qdrops_long; + error = sysctl_handle_int(oidp, &qdrops, 0, req); + if (error || !req->newptr) + return (error); + if (qdrops != 0) + return (EINVAL); + netisr_clearqdrops(&ip_nh); + return (0); +} + +SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, + CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I", + "Number of packets dropped from the IP input queue"); + /* * IP initialization: fill in IP protocol switch table. * All protocols not implemented in kernel go to raw IP protocol handler. @@ -347,10 +384,7 @@ ip_init(void) /* Initialize various other remaining things. */ IPQ_LOCK_INIT(); - ipintrq.ifq_maxlen = ipqmaxlen; - mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF); - netisr_register(NETISR_IP, ip_input, &ipintrq, 0); - + netisr_register(&ip_nh); ip_ft = flowtable_alloc(ip_output_flowtable_size, FL_PCPU); } |