diff options
author | oleg <oleg@FreeBSD.org> | 2007-11-06 23:01:42 +0000 |
---|---|---|
committer | oleg <oleg@FreeBSD.org> | 2007-11-06 23:01:42 +0000 |
commit | 7eef73ab3fe01565e1d4d1f9d1a070116ed41e13 (patch) | |
tree | 225c425cef7bdeda5b84e8391283c7c145b24798 /sys | |
parent | dd5717deccd91591695d9f8054e6c7d8dfadf358 (diff) | |
download | FreeBSD-src-7eef73ab3fe01565e1d4d1f9d1a070116ed41e13.zip FreeBSD-src-7eef73ab3fe01565e1d4d1f9d1a070116ed41e13.tar.gz |
1) dummynet_io() declaration has changed.
2) Alter packet flow inside dummynet: allow certain packets to bypass
dummynet scheduler. Benefits are:
- lower latency: if packet flow does not exceed pipe bandwidth, packets
will not be (up to tick) delayed (due to dummynet's scheduler granularity).
- lower overhead: if packet avoids dummynet scheduler it shouldn't reenter ip
stack later. Such packets can be fastforwarded.
- recursion (which can lead to kernel stack exhaution) eliminated. This fix
long existed panic, which can be triggered this way:
kldload dummynet
sysctl net.inet.ip.fw.one_pass=0
ipfw pipe 1 config bw 0
for i in `jot 30`; do ipfw add 1 pipe 1 icmp from any to any; done
ping -c 1 localhost
3) Three new sysctl nodes are added:
net.inet.ip.dummynet.io_pkt - packets passed to dummynet
net.inet.ip.dummynet.io_pkt_fast - packets avoided dummynet scheduler
net.inet.ip.dummynet.io_pkt_drop - packets dropped by dummynet
P.S. Above comments are true only for layer 3 packets. Layer 2 packet flow
is not changed yet.
MFC after: 3 month
Diffstat (limited to 'sys')
-rw-r--r-- | sys/net/if_bridge.c | 2 | ||||
-rw-r--r-- | sys/net/if_ethersubr.c | 2 | ||||
-rw-r--r-- | sys/netinet/ip_dummynet.c | 44 | ||||
-rw-r--r-- | sys/netinet/ip_dummynet.h | 2 | ||||
-rw-r--r-- | sys/netinet/ip_fw_pfil.c | 54 |
5 files changed, 68 insertions, 36 deletions
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 2f64a04..4825483 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -3038,7 +3038,7 @@ bridge_pfil(struct mbuf **mp, struct ifnet *bifp, struct ifnet *ifp, int dir) * packet will return to us via bridge_dummynet(). */ args.oif = ifp; - ip_dn_io_ptr(*mp, DN_TO_IFB_FWD, &args); + ip_dn_io_ptr(mp, DN_TO_IFB_FWD, &args); return (error); } diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index e3d3620..07a3fb9 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -491,7 +491,7 @@ ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst, */ *m0 = NULL ; } - ip_dn_io_ptr(m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args); + ip_dn_io_ptr(&m, dst ? DN_TO_ETH_OUT: DN_TO_ETH_DEMUX, &args); return 0; } /* diff --git a/sys/netinet/ip_dummynet.c b/sys/netinet/ip_dummynet.c index c33f90f..d6f1880 100644 --- a/sys/netinet/ip_dummynet.c +++ b/sys/netinet/ip_dummynet.c @@ -110,6 +110,10 @@ static long tick_lost; /* Lost(coalesced) ticks number. */ /* Adjusted vs non-adjusted curr_time difference (ticks). */ static long tick_diff; +static unsigned long io_pkt; +static unsigned long io_pkt_fast; +static unsigned long io_pkt_drop; + /* * Three heaps contain queues and pipes that the scheduler handles: * @@ -181,6 +185,15 @@ SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff, SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost, CTLFLAG_RD, &tick_lost, 0, "Number of ticks coalesced by dummynet taskqueue."); +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt, + CTLFLAG_RD, &io_pkt, 0, + "Number of packets passed to dummynet."); +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast, + CTLFLAG_RD, &io_pkt_fast, 0, + "Number of packets bypassed dummynet scheduler."); +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop, + CTLFLAG_RD, &io_pkt_drop, 0, + "Number of packets dropped by dummynet."); #endif #ifdef DUMMYNET_DEBUG @@ -579,10 +592,9 @@ ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail) * XXX Should check errors on heap_insert, and drain the whole * queue on error hoping next time we are luckier. */ - } else { /* RED needs to know when the queue becomes empty. */ + } else /* RED needs to know when the queue becomes empty. */ q->q_time = curr_time; - q->numbytes = 0; - } + /* * If the delay line was empty call transmit_event() now. * Otherwise, the scheduler will take care of it. @@ -955,6 +967,7 @@ create_queue(struct dn_flow_set *fs, int i) q->hash_slot = i; q->next = fs->rq[i]; q->S = q->F + 1; /* hack - mark timestamp as invalid. */ + q->numbytes = fs->pipe->bandwidth; fs->rq[i] = q; fs->rq_elements++; return (q); @@ -1213,9 +1226,9 @@ locate_pipe(int pipe_nr) * rule matching rule, in case of multiple passes */ static int -dummynet_io(struct mbuf *m, int dir, struct ip_fw_args *fwa) +dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) { - struct mbuf *head = NULL, *tail = NULL; + struct mbuf *m = *m0, *head = NULL, *tail = NULL; struct dn_pkt_tag *pkt; struct m_tag *mtag; struct dn_flow_set *fs = NULL; @@ -1237,6 +1250,7 @@ dummynet_io(struct mbuf *m, int dir, struct ip_fw_args *fwa) is_pipe = (cmd->opcode == O_PIPE); DUMMYNET_LOCK(); + io_pkt++; /* * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. * @@ -1309,6 +1323,11 @@ dummynet_io(struct mbuf *m, int dir, struct ip_fw_args *fwa) if (q->head != m) /* Flow was not idle, we are done. */ goto done; + + if (q->q_time < curr_time) + q->numbytes = fs->pipe->bandwidth; + q->q_time = curr_time; + /* * If we reach this point the flow was previously idle, so we need * to schedule it. This involves different actions for fixed-rate or @@ -1318,7 +1337,7 @@ dummynet_io(struct mbuf *m, int dir, struct ip_fw_args *fwa) /* Fixed-rate queue: just insert into the ready_heap. */ dn_key t = 0; - if (pipe->bandwidth) + if (pipe->bandwidth && m->m_pkthdr.len * 8 * hz > q->numbytes) t = SET_TICKS(m, q, pipe); q->sched_time = curr_time; if (t == 0) /* Must process it now. */ @@ -1378,16 +1397,27 @@ dummynet_io(struct mbuf *m, int dir, struct ip_fw_args *fwa) } } done: + if (head == m && dir != DN_TO_IFB_FWD && dir != DN_TO_ETH_DEMUX && + dir != DN_TO_ETH_OUT) { /* Fast io. */ + io_pkt_fast++; + if (m->m_nextpkt != NULL) + printf("dummynet: fast io: pkt chain detected!\n"); + head = m->m_nextpkt = NULL; + } else + *m0 = NULL; /* Normal io. */ + DUMMYNET_UNLOCK(); if (head != NULL) dummynet_send(head); return (0); dropit: + io_pkt_drop++; if (q) q->drops++; DUMMYNET_UNLOCK(); m_freem(m); + *m0 = NULL; return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); } @@ -1706,7 +1736,7 @@ config_pipe(struct dn_pipe *p) /* Flush accumulated credit for all queues. */ for (i = 0; i <= pipe->fs.rq_size; i++) for (q = pipe->fs.rq[i]; q; q = q->next) - q->numbytes = 0; + q->numbytes = p->bandwidth; pipe->bandwidth = p->bandwidth; pipe->numbytes = 0; /* just in case... */ diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h index 5215d932..ea4cb33 100644 --- a/sys/netinet/ip_dummynet.h +++ b/sys/netinet/ip_dummynet.h @@ -343,7 +343,7 @@ SLIST_HEAD(dn_pipe_head, dn_pipe); #ifdef _KERNEL typedef int ip_dn_ctl_t(struct sockopt *); /* raw_ip.c */ typedef void ip_dn_ruledel_t(void *); /* ip_fw.c */ -typedef int ip_dn_io_t(struct mbuf *m, int dir, struct ip_fw_args *fwa); +typedef int ip_dn_io_t(struct mbuf **m, int dir, struct ip_fw_args *fwa); extern ip_dn_ctl_t *ip_dn_ctl_ptr; extern ip_dn_ruledel_t *ip_dn_ruledel_ptr; extern ip_dn_io_t *ip_dn_io_ptr; diff --git a/sys/netinet/ip_fw_pfil.c b/sys/netinet/ip_fw_pfil.c index 3d7f2a7..d7ff400 100644 --- a/sys/netinet/ip_fw_pfil.c +++ b/sys/netinet/ip_fw_pfil.c @@ -104,16 +104,6 @@ ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, bzero(&args, sizeof(args)); - dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); - if (dn_tag != NULL){ - struct dn_pkt_tag *dt; - - dt = (struct dn_pkt_tag *)(dn_tag+1); - args.rule = dt->rule; - - m_tag_delete(*m0, dn_tag); - } - ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0, NULL); if (ng_tag != NULL) { @@ -124,6 +114,16 @@ ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, } again: + dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); + if (dn_tag != NULL){ + struct dn_pkt_tag *dt; + + dt = (struct dn_pkt_tag *)(dn_tag+1); + args.rule = dt->rule; + + m_tag_delete(*m0, dn_tag); + } + args.m = *m0; args.inp = inp; ipfw = ipfw_chk(&args); @@ -160,10 +160,11 @@ again: if (!DUMMYNET_LOADED) goto drop; if (mtod(*m0, struct ip *)->ip_v == 4) - ip_dn_io_ptr(*m0, DN_TO_IP_IN, &args); + ip_dn_io_ptr(m0, DN_TO_IP_IN, &args); else if (mtod(*m0, struct ip *)->ip_v == 6) - ip_dn_io_ptr(*m0, DN_TO_IP6_IN, &args); - *m0 = NULL; + ip_dn_io_ptr(m0, DN_TO_IP6_IN, &args); + if (*m0 != NULL) + goto again; return 0; /* packet consumed */ case IP_FW_TEE: @@ -225,16 +226,6 @@ ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, bzero(&args, sizeof(args)); - dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); - if (dn_tag != NULL) { - struct dn_pkt_tag *dt; - - dt = (struct dn_pkt_tag *)(dn_tag+1); - args.rule = dt->rule; - - m_tag_delete(*m0, dn_tag); - } - ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0, NULL); if (ng_tag != NULL) { @@ -245,6 +236,16 @@ ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, } again: + dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); + if (dn_tag != NULL) { + struct dn_pkt_tag *dt; + + dt = (struct dn_pkt_tag *)(dn_tag+1); + args.rule = dt->rule; + + m_tag_delete(*m0, dn_tag); + } + args.m = *m0; args.oif = ifp; args.inp = inp; @@ -286,10 +287,11 @@ again: if (!DUMMYNET_LOADED) break; if (mtod(*m0, struct ip *)->ip_v == 4) - ip_dn_io_ptr(*m0, DN_TO_IP_OUT, &args); + ip_dn_io_ptr(m0, DN_TO_IP_OUT, &args); else if (mtod(*m0, struct ip *)->ip_v == 6) - ip_dn_io_ptr(*m0, DN_TO_IP6_OUT, &args); - *m0 = NULL; + ip_dn_io_ptr(m0, DN_TO_IP6_OUT, &args); + if (*m0 != NULL) + goto again; return 0; /* packet consumed */ break; |