summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormlaier <mlaier@FreeBSD.org>2004-02-13 19:14:16 +0000
committermlaier <mlaier@FreeBSD.org>2004-02-13 19:14:16 +0000
commitda4d773b129fb1339d7b5fc23b93388b18952d3b (patch)
tree75fdee3c73dd1369e60e6348343749bdf159313a
parent09ad0862e6acad72dd0217846a1c16d5bea5c454 (diff)
downloadFreeBSD-src-da4d773b129fb1339d7b5fc23b93388b18952d3b.zip
FreeBSD-src-da4d773b129fb1339d7b5fc23b93388b18952d3b.tar.gz
This set of changes eliminates the use of MT_TAG "pseudo mbufs", replacing
them mostly with packet tags (one case is handled by using an mbuf flag since the linkage between "caller" and "callee" is direct and there's no need to incur the overhead of a packet tag). This is (mostly) work from: sam Silence from: -arch Approved by: bms(mentor), sam, rwatson
-rw-r--r--sys/net/bridge.c14
-rw-r--r--sys/net/if_ethersubr.c17
-rw-r--r--sys/netinet/in_proto.c1
-rw-r--r--sys/netinet/ip_divert.c69
-rw-r--r--sys/netinet/ip_divert.h83
-rw-r--r--sys/netinet/ip_dummynet.c216
-rw-r--r--sys/netinet/ip_dummynet.h35
-rw-r--r--sys/netinet/ip_fastfwd.c85
-rw-r--r--sys/netinet/ip_fw.h1
-rw-r--r--sys/netinet/ip_fw2.c30
-rw-r--r--sys/netinet/ip_input.c174
-rw-r--r--sys/netinet/ip_output.c113
-rw-r--r--sys/netinet/ip_var.h25
-rw-r--r--sys/netinet/tcp_debug.c1
-rw-r--r--sys/netinet/tcp_input.c8
-rw-r--r--sys/netinet/tcp_reass.c8
-rw-r--r--sys/sys/mbuf.h19
17 files changed, 461 insertions, 438 deletions
diff --git a/sys/net/bridge.c b/sys/net/bridge.c
index a444f1c..9ca1dd8 100644
--- a/sys/net/bridge.c
+++ b/sys/net/bridge.c
@@ -920,14 +920,11 @@ bdg_forward(struct mbuf *m0, struct ifnet *dst)
DDB(quad_t ticks; ticks = rdtsc();)
- args.rule = NULL; /* did we match a firewall rule ? */
- /* Fetch state from dummynet tag, ignore others */
- for (;m0->m_type == MT_TAG; m0 = m0->m_next)
- if (m0->_m_tag_id == PACKET_TAG_DUMMYNET) {
- args.rule = ((struct dn_pkt *)m0)->rule;
- shared = 0; /* For sure this is our own mbuf. */
- }
- if (args.rule == NULL)
+ /* did we match a firewall rule ? */
+ args.rule = ip_dn_find_rule(m0);
+ if (args.rule)
+ shared = 0; /* For sure this is our own mbuf. */
+ else
bdg_thru++; /* count 1st time through bdg_forward */
/*
@@ -1046,7 +1043,6 @@ bdg_forward(struct mbuf *m0, struct ifnet *dst)
args.m = m0; /* the packet we are looking at */
args.oif = NULL; /* this is an input packet */
- args.divert_rule = 0; /* we do not support divert yet */
args.next_hop = NULL; /* we do not support forward yet */
args.eh = &save_eh; /* MAC header for bridged/MAC packets */
i = ip_fw_chk_ptr(&args);
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 70eb9f7..7af66b3 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -322,12 +322,7 @@ bad: if (m != NULL)
int
ether_output_frame(struct ifnet *ifp, struct mbuf *m)
{
- struct ip_fw *rule = NULL;
-
- /* Extract info from dummynet tag, ignore others */
- for (; m->m_type == MT_TAG; m = m->m_next)
- if (m->m_flags == PACKET_TAG_DUMMYNET)
- rule = ((struct dn_pkt *)m)->rule;
+ struct ip_fw *rule = ip_dn_find_rule(m);
if (rule == NULL && BDG_ACTIVE(ifp)) {
/*
@@ -397,7 +392,6 @@ ether_ipfw_chk(struct mbuf **m0, struct ifnet *dst,
args.m = m; /* the packet we are looking at */
args.oif = dst; /* destination, if any */
- args.divert_rule = 0; /* we do not support divert yet */
args.rule = *rule; /* matching rule to restart */
args.next_hop = NULL; /* we do not support forward yet */
args.eh = &save_eh; /* MAC header for bridged/MAC packets */
@@ -611,14 +605,7 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
#if defined(NETATALK)
struct llc *l;
#endif
- struct ip_fw *rule = NULL;
-
- /* Extract info from dummynet tag, ignore others */
- for (;m->m_type == MT_TAG; m = m->m_next)
- if (m->m_flags == PACKET_TAG_DUMMYNET) {
- rule = ((struct dn_pkt *)m)->rule;
- ifp = m->m_next->m_pkthdr.rcvif;
- }
+ struct ip_fw *rule = ip_dn_find_rule(m);
KASSERT(ifp != NULL, ("ether_demux: NULL interface pointer"));
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
index c57f878..6f2e8da 100644
--- a/sys/netinet/in_proto.c
+++ b/sys/netinet/in_proto.c
@@ -57,6 +57,7 @@
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>
+#include <netinet/ip_divert.h>
#include <netinet/igmp_var.h>
#ifdef PIM
#include <netinet/pim_var.h>
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
index fe560a0..ee1f97c 100644
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -68,6 +68,7 @@
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
+#include <netinet/ip_divert.h>
#include <netinet/ip_var.h>
/*
@@ -150,17 +151,21 @@ div_input(struct mbuf *m, int off)
* then pass them along with mbuf chain.
*/
void
-divert_packet(struct mbuf *m, int incoming, int port, int rule)
+divert_packet(struct mbuf *m, int incoming)
{
struct ip *ip;
struct inpcb *inp;
struct socket *sa;
u_int16_t nport;
struct sockaddr_in divsrc;
+ struct m_tag *mtag;
- /* Sanity check */
- KASSERT(port != 0, ("%s: port=0", __func__));
-
+ mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
+ if (mtag == NULL) {
+ printf("%s: no divert tag\n", __func__);
+ m_freem(m);
+ return;
+ }
/* Assure header */
if (m->m_len < sizeof(struct ip) &&
(m = m_pullup(m, sizeof(struct ip))) == 0)
@@ -174,7 +179,7 @@ divert_packet(struct mbuf *m, int incoming, int port, int rule)
bzero(&divsrc, sizeof(divsrc));
divsrc.sin_len = sizeof(divsrc);
divsrc.sin_family = AF_INET;
- divsrc.sin_port = rule; /* record matching rule */
+ divsrc.sin_port = divert_cookie(mtag); /* record matching rule */
if (incoming) {
struct ifaddr *ifa;
@@ -234,7 +239,7 @@ divert_packet(struct mbuf *m, int incoming, int port, int rule)
mtx_lock(&Giant);
/* Put packet on socket queue, if any */
sa = NULL;
- nport = htons((u_int16_t)port);
+ nport = htons((u_int16_t)divert_info(mtag));
INP_INFO_RLOCK(&divcbinfo);
LIST_FOREACH(inp, &divcb, inp_list) {
INP_LOCK(inp);
@@ -273,19 +278,8 @@ div_output(struct socket *so, struct mbuf *m,
struct sockaddr_in *sin, struct mbuf *control)
{
int error = 0;
- struct m_hdr divert_tag;
- /*
- * Prepare the tag for divert info. Note that a packet
- * with a 0 tag in mh_data is effectively untagged,
- * so we could optimize that case.
- */
- divert_tag.mh_type = MT_TAG;
- divert_tag.mh_flags = PACKET_TAG_DIVERT;
- divert_tag.mh_next = m;
- divert_tag.mh_data = 0; /* the matching rule # */
- divert_tag.mh_nextpkt = NULL;
- m->m_pkthdr.rcvif = NULL; /* XXX is it necessary ? */
+ KASSERT(m->m_pkthdr.rcvif == NULL, ("rcvif not null"));
#ifdef MAC
mac_create_mbuf_from_socket(so, m);
@@ -296,9 +290,21 @@ div_output(struct socket *so, struct mbuf *m,
/* Loopback avoidance and state recovery */
if (sin) {
+ struct m_tag *mtag;
+ struct divert_tag *dt;
int i;
- divert_tag.mh_data = (caddr_t)(uintptr_t)sin->sin_port;
+ mtag = m_tag_get(PACKET_TAG_DIVERT,
+ sizeof(struct divert_tag), M_NOWAIT);
+ if (mtag == NULL) {
+ error = ENOBUFS;
+ goto cantsend;
+ }
+ dt = (struct divert_tag *)(mtag+1);
+ dt->info = 0;
+ dt->cookie = sin->sin_port;
+ m_tag_prepend(m, mtag);
+
/*
* Find receive interface with the given name, stuffed
* (if it exists) in the sin_zero[] field.
@@ -335,7 +341,7 @@ div_output(struct socket *so, struct mbuf *m,
/* Send packet to output processing */
ipstat.ips_rawout++; /* XXX */
- error = ip_output((struct mbuf *)&divert_tag,
+ error = ip_output(m,
inp->inp_options, NULL,
(so->so_options & SO_DONTROUTE) |
IP_ALLOWBROADCAST | IP_RAWOUTPUT,
@@ -362,7 +368,7 @@ div_output(struct socket *so, struct mbuf *m,
m->m_pkthdr.rcvif = ifa->ifa_ifp;
}
/* Send packet to input processing */
- ip_input((struct mbuf *)&divert_tag);
+ ip_input(m);
}
return error;
@@ -372,6 +378,27 @@ cantsend:
return error;
}
+/*
+ * Return a copy of the specified packet, but without
+ * the divert tag. This is used when packets are ``tee'd''
+ * and we want the cloned copy to not have divert processing.
+ */
+struct mbuf *
+divert_clone(struct mbuf *m)
+{
+ struct mbuf *clone;
+ struct m_tag *mtag;
+
+ clone = m_dup(m, M_DONTWAIT);
+ if (clone != NULL) {
+ /* strip divert tag from copy */
+ mtag = m_tag_find(clone, PACKET_TAG_DIVERT, NULL);
+ if (mtag != NULL)
+ m_tag_delete(clone, mtag);
+ }
+ return clone;
+}
+
static int
div_attach(struct socket *so, int proto, struct thread *td)
{
diff --git a/sys/netinet/ip_divert.h b/sys/netinet/ip_divert.h
new file mode 100644
index 0000000..d925918
--- /dev/null
+++ b/sys/netinet/ip_divert.h
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 2003 Sam Leffler, Errno Consulting
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
+ * redistribution must be conditioned upon including a substantially
+ * similar Disclaimer requirement for further binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ * of any contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGES.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_IP_DIVERT_H_
+#define _NETINET_IP_DIVERT_H_
+
+/*
+ * Divert socket definitions.
+ */
+
+struct divert_tag {
+ u_int32_t info; /* port & flags */
+ u_int16_t cookie; /* ipfw rule number */
+};
+
+/*
+ * Return the divert cookie associated with the mbuf; if any.
+ */
+static __inline u_int16_t
+divert_cookie(struct m_tag *mtag)
+{
+ return ((struct divert_tag *)(mtag+1))->cookie;
+}
+static __inline u_int16_t
+divert_find_cookie(struct mbuf *m)
+{
+ struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
+ return mtag ? divert_cookie(mtag) : 0;
+}
+
+/*
+ * Return the divert info associated with the mbuf; if any.
+ */
+static __inline u_int32_t
+divert_info(struct m_tag *mtag)
+{
+ return ((struct divert_tag *)(mtag+1))->info;
+}
+static __inline u_int32_t
+divert_find_info(struct mbuf *m)
+{
+ struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
+ return mtag ? divert_info(mtag) : 0;
+}
+
+extern void div_init(void);
+extern void div_input(struct mbuf *, int);
+extern void div_ctlinput(int, struct sockaddr *, void *);
+extern void divert_packet(struct mbuf *m, int incoming);
+extern struct mbuf *divert_clone(struct mbuf *);
+extern struct pr_usrreqs div_usrreqs;
+#endif /* _NETINET_IP_DIVERT_H_ */
diff --git a/sys/netinet/ip_dummynet.c b/sys/netinet/ip_dummynet.c
index 1e9ecb8..114ed44 100644
--- a/sys/netinet/ip_dummynet.c
+++ b/sys/netinet/ip_dummynet.c
@@ -405,6 +405,22 @@ heap_free(struct dn_heap *h)
*/
/*
+ * Return the mbuf tag holding the dummynet state. As an optimization
+ * this is assumed to be the first tag on the list. If this turns out
+ * wrong we'll need to search the list.
+ */
+static struct dn_pkt_tag *
+dn_tag_get(struct mbuf *m)
+{
+ struct m_tag *mtag = m_tag_first(m);
+ KASSERT(mtag != NULL &&
+ mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
+ mtag->m_tag_id == PACKET_TAG_DUMMYNET,
+ ("packet on dummynet queue w/o dummynet tag!"));
+ return (struct dn_pkt_tag *)(mtag+1);
+}
+
+/*
* Scheduler functions:
*
* transmit_event() is called when the delay-line needs to enter
@@ -425,87 +441,85 @@ heap_free(struct dn_heap *h)
static void
transmit_event(struct dn_pipe *pipe)
{
- struct dn_pkt *pkt ;
+ struct mbuf *m ;
+ struct dn_pkt_tag *pkt ;
DUMMYNET_LOCK_ASSERT();
- while ( (pkt = pipe->head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) {
+ while ( (m = pipe->head) ) {
+ pkt = dn_tag_get(m);
+ if ( !DN_KEY_LEQ(pkt->output_time, curr_time) )
+ break;
/*
* first unlink, then call procedures, since ip_input() can invoke
* ip_output() and viceversa, thus causing nested calls
*/
- pipe->head = DN_NEXT(pkt) ;
+ pipe->head = m->m_nextpkt ;
/* XXX: drop the lock for now to avoid LOR's */
DUMMYNET_UNLOCK();
- /*
- * The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf
- * (NOT A REAL one, just a small block of malloc'ed memory) with
- * m_type = MT_TAG, m_flags = PACKET_TAG_DUMMYNET
- * dn_m (m_next) = actual mbuf to be processed by ip_input/output
- * and some other fields.
- * The block IS FREED HERE because it contains parameters passed
- * to the called routine.
- */
switch (pkt->dn_dir) {
case DN_TO_IP_OUT:
- (void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL, NULL);
- rt_unref (pkt->ro.ro_rt, __func__) ;
+ (void)ip_output(m, NULL, NULL, pkt->flags, NULL, NULL);
break ;
case DN_TO_IP_IN :
- ip_input((struct mbuf *)pkt) ;
+ ip_input(m) ;
break ;
case DN_TO_BDG_FWD :
- if (!BDG_LOADED) {
+ /*
+ * The bridge requires/assumes the Ethernet header is
+ * contiguous in the first mbuf header. Insure this is true.
+ */
+ if (BDG_LOADED) {
+ if (m->m_len < ETHER_HDR_LEN &&
+ (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
+ printf("dummynet/bridge: pullup fail, dropping pkt\n");
+ break;
+ }
+ m = bdg_forward_ptr(m, pkt->ifp);
+ } else {
/* somebody unloaded the bridge module. Drop pkt */
/* XXX rate limit */
printf("dummynet: dropping bridged packet trapped in pipe\n");
- m_freem(pkt->dn_m);
- break;
- } /* fallthrough */
- case DN_TO_ETH_DEMUX:
- {
- struct mbuf *m = (struct mbuf *)pkt ;
+ }
+ if (m)
+ m_freem(m);
+ break;
- if (pkt->dn_m->m_len < ETHER_HDR_LEN &&
- (pkt->dn_m = m_pullup(pkt->dn_m, ETHER_HDR_LEN)) == NULL) {
- printf("dummynet/bridge: pullup fail, dropping pkt\n");
- break;
- }
- /*
- * bdg_forward() wants a pointer to the pseudo-mbuf-header, but
- * on return it will supply the pointer to the actual packet
- * (originally pkt->dn_m, but could be something else now) if
- * it has not consumed it.
- */
- if (pkt->dn_dir == DN_TO_BDG_FWD) {
- m = bdg_forward_ptr(m, pkt->ifp);
- if (m)
- m_freem(m);
- } else
- ether_demux(NULL, m); /* which consumes the mbuf */
+ case DN_TO_ETH_DEMUX:
+ /*
+ * The Ethernet code assumes the Ethernet header is
+ * contiguous in the first mbuf header. Insure this is true.
+ */
+ if (m->m_len < ETHER_HDR_LEN &&
+ (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
+ printf("dummynet/ether: pullup fail, dropping pkt\n");
+ break;
}
+ ether_demux(m->m_pkthdr.rcvif, m); /* which consumes the mbuf */
break ;
+
case DN_TO_ETH_OUT:
- ether_output_frame(pkt->ifp, (struct mbuf *)pkt);
+ ether_output_frame(pkt->ifp, m);
break;
default:
printf("dummynet: bad switch %d!\n", pkt->dn_dir);
- m_freem(pkt->dn_m);
+ m_freem(m);
break ;
}
- free(pkt, M_DUMMYNET);
DUMMYNET_LOCK();
}
/* if there are leftover packets, put into the heap for next event */
- if ( (pkt = pipe->head) )
- heap_insert(&extract_heap, pkt->output_time, pipe ) ;
- /* XXX should check errors on heap_insert, by draining the
- * whole pipe p and hoping in the future we are more successful
- */
+ if ( (m = pipe->head) ) {
+ pkt = dn_tag_get(m) ;
+ /* XXX should check errors on heap_insert, by draining the
+ * whole pipe p and hoping in the future we are more successful
+ */
+ heap_insert(&extract_heap, pkt->output_time, pipe ) ;
+ }
}
/*
@@ -513,8 +527,8 @@ transmit_event(struct dn_pipe *pipe)
* before being able to transmit a packet. The credit is taken from
* either a pipe (WF2Q) or a flow_queue (per-flow queueing)
*/
-#define SET_TICKS(pkt, q, p) \
- (pkt->dn_m->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \
+#define SET_TICKS(_m, q, p) \
+ ((_m)->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \
p->bandwidth ;
/*
@@ -522,21 +536,23 @@ transmit_event(struct dn_pipe *pipe)
* and put into delay line (p_queue)
*/
static void
-move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q,
+move_pkt(struct mbuf *pkt, struct dn_flow_queue *q,
struct dn_pipe *p, int len)
{
- q->head = DN_NEXT(pkt) ;
+ struct dn_pkt_tag *dt = dn_tag_get(pkt);
+
+ q->head = pkt->m_nextpkt ;
q->len-- ;
q->len_bytes -= len ;
- pkt->output_time = curr_time + p->delay ;
+ dt->output_time = curr_time + p->delay ;
if (p->head == NULL)
p->head = pkt;
else
- DN_NEXT(p->tail) = pkt;
+ p->tail->m_nextpkt = pkt;
p->tail = pkt;
- DN_NEXT(p->tail) = NULL;
+ p->tail->m_nextpkt = NULL;
}
/*
@@ -549,7 +565,7 @@ move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q,
static void
ready_event(struct dn_flow_queue *q)
{
- struct dn_pkt *pkt;
+ struct mbuf *pkt;
struct dn_pipe *p = q->fs->pipe ;
int p_was_empty ;
@@ -571,7 +587,7 @@ ready_event(struct dn_flow_queue *q)
*/
q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth;
while ( (pkt = q->head) != NULL ) {
- int len = pkt->dn_m->m_pkthdr.len;
+ int len = pkt->m_pkthdr.len;
int len_scaled = p->bandwidth ? len*8*hz : 0 ;
if (len_scaled > q->numbytes )
break ;
@@ -639,9 +655,9 @@ ready_event_wfq(struct dn_pipe *p)
while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) {
if (sch->elements > 0) { /* have some eligible pkts to send out */
struct dn_flow_queue *q = sch->p[0].object ;
- struct dn_pkt *pkt = q->head;
+ struct mbuf *pkt = q->head;
struct dn_flow_set *fs = q->fs;
- u_int64_t len = pkt->dn_m->m_pkthdr.len;
+ u_int64_t len = pkt->m_pkthdr.len;
int len_scaled = p->bandwidth ? len*8*hz : 0 ;
heap_extract(sch, NULL); /* remove queue from heap */
@@ -658,7 +674,7 @@ ready_event_wfq(struct dn_pipe *p)
* update F and position in backlogged queue, then
* put flow in not_eligible_heap (we will fix this later).
*/
- len = (q->head)->dn_m->m_pkthdr.len;
+ len = (q->head)->m_pkthdr.len;
q->F += (len<<MY_M)/(u_int64_t) fs->weight ;
if (DN_KEY_LEQ(q->S, p->V))
heap_insert(neh, q->S, q);
@@ -713,7 +729,7 @@ ready_event_wfq(struct dn_pipe *p)
if (p->bandwidth > 0)
t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ;
- p->tail->output_time += t ;
+ dn_tag_get(p->tail)->output_time += t ;
p->sched_time = curr_time ;
heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
/* XXX should check errors on heap_insert, and drain the whole
@@ -1116,7 +1132,8 @@ locate_flowset(int pipe_nr, struct ip_fw *rule)
static int
dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
{
- struct dn_pkt *pkt;
+ struct dn_pkt_tag *pkt;
+ struct m_tag *mtag;
struct dn_flow_set *fs;
struct dn_pipe *pipe ;
u_int64_t len = m->m_pkthdr.len ;
@@ -1124,7 +1141,9 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
int is_pipe;
#if IPFW2
ipfw_insn *cmd = fwa->rule->cmd + fwa->rule->act_ofs;
+#endif
+#if IPFW2
if (cmd->opcode == O_LOG)
cmd += F_LEN(cmd);
is_pipe = (cmd->opcode == O_PIPE);
@@ -1175,16 +1194,17 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
goto dropit ;
/* XXX expensive to zero, see if we can remove it*/
- pkt = (struct dn_pkt *)malloc(sizeof (*pkt), M_DUMMYNET, M_NOWAIT|M_ZERO);
- if ( pkt == NULL )
+ mtag = m_tag_get(PACKET_TAG_DUMMYNET,
+ sizeof(struct dn_pkt_tag), M_NOWAIT|M_ZERO);
+ if ( mtag == NULL )
goto dropit ; /* cannot allocate packet header */
+ m_tag_prepend(m, mtag); /* attach to mbuf chain */
+ m->m_nextpkt = NULL;
+
+ pkt = (struct dn_pkt_tag *)(mtag+1);
/* ok, i can handle the pkt now... */
/* build and enqueue packet + parameters */
- pkt->hdr.mh_type = MT_TAG;
- pkt->hdr.mh_flags = PACKET_TAG_DUMMYNET;
pkt->rule = fwa->rule ;
- DN_NEXT(pkt) = NULL;
- pkt->dn_m = m;
pkt->dn_dir = dir ;
pkt->ifp = fwa->oif;
@@ -1206,14 +1226,14 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
pkt->flags = fwa->flags;
}
if (q->head == NULL)
- q->head = pkt;
+ q->head = m;
else
- DN_NEXT(q->tail) = pkt;
- q->tail = pkt;
+ q->tail->m_nextpkt = m;
+ q->tail = m;
q->len++;
q->len_bytes += len ;
- if ( q->head != pkt ) /* flow was not idle, we are done */
+ if ( q->head != m ) /* flow was not idle, we are done */
goto done;
/*
* If we reach this point the flow was previously idle, so we need
@@ -1226,7 +1246,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa)
*/
dn_key t = 0 ;
if (pipe->bandwidth)
- t = SET_TICKS(pkt, q, pipe);
+ t = SET_TICKS(m, q, pipe);
q->sched_time = curr_time ;
if (t == 0) /* must process it now */
ready_event( q );
@@ -1300,12 +1320,10 @@ dropit:
* Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT)
* Doing this would probably save us the initial bzero of dn_pkt
*/
-#define DN_FREE_PKT(pkt) { \
- struct dn_pkt *n = pkt ; \
- rt_unref ( n->ro.ro_rt, __func__ ) ; \
- m_freem(n->dn_m); \
- pkt = DN_NEXT(n) ; \
- free(n, M_DUMMYNET) ; }
+#define DN_FREE_PKT(_m) do { \
+ rt_unref(dn_tag_get(_m)->ro.ro_rt, __func__); \
+ m_freem(_m); \
+} while (0)
/*
* Dispose all packets and flow_queues on a flow_set.
@@ -1316,7 +1334,6 @@ dropit:
static void
purge_flow_set(struct dn_flow_set *fs, int all)
{
- struct dn_pkt *pkt ;
struct dn_flow_queue *q, *qn ;
int i ;
@@ -1324,8 +1341,13 @@ purge_flow_set(struct dn_flow_set *fs, int all)
for (i = 0 ; i <= fs->rq_size ; i++ ) {
for (q = fs->rq[i] ; q ; q = qn ) {
- for (pkt = q->head ; pkt ; )
- DN_FREE_PKT(pkt) ;
+ struct mbuf *m, *mnext;
+
+ mnext = q->head;
+ while ((m = mnext) != NULL) {
+ mnext = m->m_nextpkt;
+ DN_FREE_PKT(m);
+ }
qn = q->next ;
free(q, M_DUMMYNET);
}
@@ -1352,12 +1374,15 @@ purge_flow_set(struct dn_flow_set *fs, int all)
static void
purge_pipe(struct dn_pipe *pipe)
{
- struct dn_pkt *pkt ;
+ struct mbuf *m, *mnext;
purge_flow_set( &(pipe->fs), 1 );
- for (pkt = pipe->head ; pkt ; )
- DN_FREE_PKT(pkt) ;
+ mnext = pipe->head;
+ while ((m = mnext) != NULL) {
+ mnext = m->m_nextpkt;
+ DN_FREE_PKT(m);
+ }
heap_free( &(pipe->scheduler_heap) );
heap_free( &(pipe->not_eligible_heap) );
@@ -1412,13 +1437,15 @@ dn_rule_delete_fs(struct dn_flow_set *fs, void *r)
{
int i ;
struct dn_flow_queue *q ;
- struct dn_pkt *pkt ;
+ struct mbuf *m ;
for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */
for (q = fs->rq[i] ; q ; q = q->next )
- for (pkt = q->head ; pkt ; pkt = DN_NEXT(pkt) )
+ for (m = q->head ; m ; m = m->m_nextpkt ) {
+ struct dn_pkt_tag *pkt = dn_tag_get(m) ;
if (pkt->rule == r)
pkt->rule = ip_fw_default_rule ;
+ }
}
/*
* when a firewall rule is deleted, scan all queues and remove the flow-id
@@ -1428,8 +1455,9 @@ void
dn_rule_delete(void *r)
{
struct dn_pipe *p ;
- struct dn_pkt *pkt ;
struct dn_flow_set *fs ;
+ struct dn_pkt_tag *pkt ;
+ struct mbuf *m ;
DUMMYNET_LOCK();
/*
@@ -1442,9 +1470,11 @@ dn_rule_delete(void *r)
for ( p = all_pipes ; p ; p = p->next ) {
fs = &(p->fs) ;
dn_rule_delete_fs(fs, r);
- for (pkt = p->head ; pkt ; pkt = DN_NEXT(pkt) )
+ for (m = p->head ; m ; m = m->m_nextpkt ) {
+ pkt = dn_tag_get(m) ;
if (pkt->rule == r)
pkt->rule = ip_fw_default_rule ;
+ }
}
DUMMYNET_UNLOCK();
}
@@ -1718,7 +1748,7 @@ dummynet_drain()
{
struct dn_flow_set *fs;
struct dn_pipe *p;
- struct dn_pkt *pkt;
+ struct mbuf *m, *mnext;
DUMMYNET_LOCK_ASSERT();
@@ -1731,8 +1761,12 @@ dummynet_drain()
for (p = all_pipes; p; p= p->next ) {
purge_flow_set(&(p->fs), 0);
- for (pkt = p->head ; pkt ; )
- DN_FREE_PKT(pkt) ;
+
+ mnext = p->head;
+ while ((m = mnext) != NULL) {
+ mnext = m->m_nextpkt;
+ DN_FREE_PKT(m);
+ }
p->head = p->tail = NULL ;
}
}
diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h
index a946256..085f6d3 100644
--- a/sys/netinet/ip_dummynet.h
+++ b/sys/netinet/ip_dummynet.h
@@ -111,24 +111,12 @@ struct dn_heap {
#ifdef _KERNEL
/*
- * struct dn_pkt identifies a packet in the dummynet queue, but
- * is also used to tag packets passed back to the various destinations
- * (ip_input(), ip_output(), bdg_forward() and so on).
- * As such the first part of the structure must be a struct m_hdr,
- * followed by dummynet-specific parameters. The m_hdr must be
- * initialized with
- * mh_type = MT_TAG;
- * mh_flags = PACKET_TYPE_DUMMYNET;
- * mh_next = <pointer to the actual mbuf>
- *
- * mh_nextpkt, mh_data are free for dummynet use (mh_nextpkt is used to
- * build a linked list of packets in a dummynet queue).
+ * Packets processed by dummynet have an mbuf tag associated with
+ * them that carries their dummynet state. This is used within
+ * the dummynet code as well as outside when checking for special
+ * processing requirements.
*/
-struct dn_pkt {
- struct m_hdr hdr ;
-#define DN_NEXT(x) (struct dn_pkt *)(x)->hdr.mh_nextpkt
-#define dn_m hdr.mh_next /* packet to be forwarded */
-
+struct dn_pkt_tag {
struct ip_fw *rule; /* matching rule */
int dn_dir; /* action when packet comes out. */
#define DN_TO_IP_OUT 1
@@ -217,7 +205,7 @@ struct dn_flow_queue {
struct dn_flow_queue *next ;
struct ipfw_flow_id id ;
- struct dn_pkt *head, *tail ; /* queue of packets */
+ struct mbuf *head, *tail ; /* queue of packets */
u_int len ;
u_int len_bytes ;
u_long numbytes ; /* credit for transmission (dynamic queues) */
@@ -330,7 +318,7 @@ struct dn_pipe { /* a pipe */
int bandwidth; /* really, bytes/tick. */
int delay ; /* really, ticks */
- struct dn_pkt *head, *tail ; /* packets in delay line */
+ struct mbuf *head, *tail ; /* packets in delay line */
/* WF2Q+ */
struct dn_heap scheduler_heap ; /* top extract - key Finish time*/
@@ -365,4 +353,13 @@ extern ip_dn_io_t *ip_dn_io_ptr;
#define DUMMYNET_LOADED (ip_dn_io_ptr != NULL)
#endif
+/*
+ * Return the IPFW rule associated with the dummynet tag; if any.
+ */
+static __inline struct ip_fw *
+ip_dn_find_rule(struct mbuf *m)
+{
+ struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL);
+ return mtag ? ((struct dn_pkt_tag *)(mtag+1))->rule : NULL;
+}
#endif /* _IP_DUMMYNET_H */
diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c
index 4041712..3db615d 100644
--- a/sys/netinet/ip_fastfwd.c
+++ b/sys/netinet/ip_fastfwd.c
@@ -110,6 +110,7 @@
#include <machine/in_cksum.h>
#include <netinet/ip_fw.h>
+#include <netinet/ip_divert.h>
#include <netinet/ip_dummynet.h>
static int ipfastforward_active = 0;
@@ -132,7 +133,7 @@ ip_fastforward(struct mbuf *m)
struct ip *tip;
struct mbuf *teem = NULL;
#endif
- struct mbuf *tag = NULL;
+ struct m_tag *mtag;
struct route ro;
struct sockaddr_in *dst = NULL;
struct in_ifaddr *ia = NULL;
@@ -150,16 +151,6 @@ ip_fastforward(struct mbuf *m)
if (!ipfastforward_active || !ipforwarding)
return 0;
- /*
- * If there is any MT_TAG we fall back to ip_input because we can't
- * handle TAGs here. Should never happen as we get directly called
- * from the if_output routines.
- */
- if (m->m_type == MT_TAG) {
- KASSERT(0, ("%s: packet with MT_TAG not expected", __func__));
- return 0;
- }
-
M_ASSERTVALID(m);
M_ASSERTPKTHDR(m);
@@ -373,25 +364,13 @@ fallback:
/*
* See if this is a fragment
*/
- if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
- MGETHDR(tag, M_DONTWAIT, MT_TAG);
- if (tag == NULL)
- goto drop;
- tag->m_flags = PACKET_TAG_DIVERT;
- tag->m_data = (caddr_t)(intptr_t)args.divert_rule;
- tag->m_next = m;
- /* XXX: really bloody hack, see ip_input */
- tag->m_nextpkt = (struct mbuf *)1;
- m = tag;
- tag = NULL;
-
+ if (ip->ip_off & (IP_MF | IP_OFFMASK))
goto droptoours;
- }
/*
* Tee packet
*/
if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0)
- teem = m_dup(m, M_DONTWAIT);
+ teem = divert_clone(m);
else
teem = m;
if (teem == NULL)
@@ -413,7 +392,7 @@ fallback:
/*
* Deliver packet to divert input routine
*/
- divert_packet(teem, 0, ipfw & 0xffff, args.divert_rule);
+ divert_packet(teem, 0);
/*
* If this was not tee, we are done
*/
@@ -560,27 +539,13 @@ passin:
/*
* See if this is a fragment
*/
- if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
- MGETHDR(tag, M_DONTWAIT, MT_TAG);
- if (tag == NULL) {
- RTFREE(ro.ro_rt);
- goto drop;
- }
- tag->m_flags = PACKET_TAG_DIVERT;
- tag->m_data = (caddr_t)(intptr_t)args.divert_rule;
- tag->m_next = m;
- /* XXX: really bloody hack, see ip_input */
- tag->m_nextpkt = (struct mbuf *)1;
- m = tag;
- tag = NULL;
-
+ if (ip->ip_off & (IP_MF | IP_OFFMASK))
goto droptoours;
- }
/*
* Tee packet
*/
if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0)
- teem = m_dup(m, M_DONTWAIT);
+ teem = divert_clone(m);
else
teem = m;
if (teem == NULL)
@@ -602,7 +567,7 @@ passin:
/*
* Deliver packet to divert input routine
*/
- divert_packet(teem, 0, ipfw & 0xffff, args.divert_rule);
+ divert_packet(teem, 0);
/*
* If this was not tee, we are done
*/
@@ -638,38 +603,24 @@ passout:
if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) {
forwardlocal:
if (args.next_hop) {
- /* XXX leak */
- MGETHDR(tag, M_DONTWAIT, MT_TAG);
- if (tag == NULL) {
+ mtag = m_tag_get(PACKET_TAG_IPFORWARD,
+ sizeof(struct sockaddr_in *),
+ M_NOWAIT);
+ if (mtag == NULL) {
+ /* XXX statistic */
if (ro.ro_rt)
RTFREE(ro.ro_rt);
goto drop;
}
- tag->m_flags = PACKET_TAG_IPFORWARD;
- tag->m_data = (caddr_t)args.next_hop;
- tag->m_next = m;
- /* XXX: really bloody hack,
- * see ip_input */
- tag->m_nextpkt = (struct mbuf *)1;
- m = tag;
- tag = NULL;
+ *(struct sockaddr_in **)(mtag+1) =
+ args.next_hop;
+ m_tag_prepend(m, mtag);
}
#ifdef IPDIVERT
droptoours: /* Used for DIVERT */
#endif
- MGETHDR(tag, M_DONTWAIT, MT_TAG);
- if (tag == NULL) {
- if (ro.ro_rt)
- RTFREE(ro.ro_rt);
- goto drop;
- }
- tag->m_flags = PACKET_TAG_IPFASTFWD_OURS;
- tag->m_data = NULL;
- tag->m_next = m;
- /* XXX: really bloody hack, see ip_input */
- tag->m_nextpkt = (struct mbuf *)1;
- m = tag;
- tag = NULL;
+ /* NB: ip_input understands this */
+ m->m_flags |= M_FASTFWD_OURS;
/* ip still points to the real packet */
ip->ip_len = htons(ip->ip_len);
diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h
index e6eae2d..8e3047d 100644
--- a/sys/netinet/ip_fw.h
+++ b/sys/netinet/ip_fw.h
@@ -400,7 +400,6 @@ struct ip_fw_args {
int flags; /* for dummynet */
struct ipfw_flow_id f_id; /* grabbed from IP header */
- u_int16_t divert_rule; /* divert cookie */
u_int32_t retval;
};
diff --git a/sys/netinet/ip_fw2.c b/sys/netinet/ip_fw2.c
index 836be9d..7097b20 100644
--- a/sys/netinet/ip_fw2.c
+++ b/sys/netinet/ip_fw2.c
@@ -66,6 +66,7 @@
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>
#include <netinet/ip_fw.h>
+#include <netinet/ip_divert.h>
#include <netinet/ip_dummynet.h>
#include <netinet/tcp.h>
#include <netinet/tcp_timer.h>
@@ -1457,6 +1458,7 @@ ipfw_chk(struct ip_fw_args *args)
int dyn_dir = MATCH_UNKNOWN;
ipfw_dyn_rule *q = NULL;
struct ip_fw_chain *chain = &layer3_chain;
+ struct m_tag *mtag;
if (m->m_flags & M_SKIP_FIREWALL)
return 0; /* accept */
@@ -1545,6 +1547,7 @@ ipfw_chk(struct ip_fw_args *args)
after_ip_checks:
IPFW_LOCK(chain); /* XXX expensive? can we run lock free? */
+ mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
if (args->rule) {
/*
* Packet has already been tagged. Look for the next rule
@@ -1567,7 +1570,7 @@ after_ip_checks:
* Find the starting rule. It can be either the first
* one, or the one after divert_rule if asked so.
*/
- int skipto = args->divert_rule;
+ int skipto = mtag ? divert_cookie(mtag) : 0;
f = chain->rules;
if (args->eh == NULL && skipto != 0) {
@@ -1583,7 +1586,9 @@ after_ip_checks:
}
}
}
- args->divert_rule = 0; /* reset to avoid confusion later */
+ /* reset divert rule to avoid confusion later */
+ if (mtag)
+ m_tag_delete(m, mtag);
/*
* Now scan the rules, and parse microinstructions for each rule.
@@ -2018,14 +2023,29 @@ check_body:
goto done;
case O_DIVERT:
- case O_TEE:
+ case O_TEE: {
+ struct divert_tag *dt;
+
if (args->eh) /* not on layer 2 */
break;
- args->divert_rule = f->rulenum;
- retval = (cmd->opcode == O_DIVERT) ?
+ mtag = m_tag_get(PACKET_TAG_DIVERT,
+ sizeof(struct divert_tag),
+ M_NOWAIT);
+ if (mtag == NULL) {
+ /* XXX statistic */
+ /* drop packet */
+ IPFW_UNLOCK(chain);
+ return IP_FW_PORT_DENY_FLAG;
+ }
+ dt = (struct divert_tag *)(mtag+1);
+ dt->cookie = f->rulenum;
+ dt->info = (cmd->opcode == O_DIVERT) ?
cmd->arg1 :
cmd->arg1 | IP_FW_PORT_TEE_FLAG;
+ m_tag_prepend(m, mtag);
+ retval = dt->info;
goto done;
+ }
case O_COUNT:
case O_SKIPTO:
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 0900202..3156b85 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -78,6 +78,7 @@
#include <sys/socketvar.h>
#include <netinet/ip_fw.h>
+#include <netinet/ip_divert.h>
#include <netinet/ip_dummynet.h>
#ifdef IPSEC
@@ -239,8 +240,7 @@ static int ip_dooptions(struct mbuf *m, int,
static void ip_forward(struct mbuf *m, int srcrt,
struct sockaddr_in *next_hop);
static void ip_freef(struct ipqhead *, struct ipq *);
-static struct mbuf *ip_reass(struct mbuf *, struct ipqhead *,
- struct ipq *, u_int32_t *, u_int16_t *);
+static struct mbuf *ip_reass(struct mbuf *, struct ipqhead *, struct ipq *);
/*
* IP initialization: fill in IP protocol switch table.
@@ -300,17 +300,18 @@ ip_input(struct mbuf *m)
struct in_ifaddr *ia = NULL;
struct ifaddr *ifa;
int i, checkif, hlen = 0;
- int ours = 0;
u_short sum;
struct in_addr pkt_dst;
- u_int32_t divert_info = 0; /* packet divert/tee info */
+ struct m_tag *mtag;
+#ifdef IPDIVERT
+ u_int32_t divert_info; /* packet divert/tee info */
+#endif
struct ip_fw_args args;
int dchg = 0; /* dest changed after fw */
#ifdef PFIL_HOOKS
struct in_addr odst; /* original dst address */
#endif
#ifdef FAST_IPSEC
- struct m_tag *mtag;
struct tdb_ident *tdbi;
struct secpolicy *sp;
int s, error;
@@ -318,57 +319,16 @@ ip_input(struct mbuf *m)
args.eh = NULL;
args.oif = NULL;
- args.rule = NULL;
- args.divert_rule = 0; /* divert cookie */
- args.next_hop = NULL;
-
- /*
- * Grab info from MT_TAG mbufs prepended to the chain.
- *
- * XXX: This is ugly. These pseudo mbuf prepend tags should really
- * be real m_tags. Before these have always been allocated on the
- * callers stack, so we didn't have to free them. Now with
- * ip_fastforward they are true mbufs and we have to free them
- * otherwise we have a leak. Must rewrite ipfw to use m_tags.
- */
- for (; m && m->m_type == MT_TAG;) {
- struct mbuf *m0;
-
- switch(m->_m_tag_id) {
- default:
- printf("ip_input: unrecognised MT_TAG tag %d\n",
- m->_m_tag_id);
- break;
-
- case PACKET_TAG_DUMMYNET:
- args.rule = ((struct dn_pkt *)m)->rule;
- break;
-
- case PACKET_TAG_DIVERT:
- args.divert_rule = (intptr_t)m->m_hdr.mh_data & 0xffff;
- break;
-
- case PACKET_TAG_IPFORWARD:
- args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data;
- break;
-
- case PACKET_TAG_IPFASTFWD_OURS:
- ours = 1;
- break;
- }
-
- m0 = m;
- m = m->m_next;
- /* XXX: This is set by ip_fastforward */
- if (m0->m_nextpkt == (struct mbuf *)1)
- m_free(m0);
- }
+ args.next_hop = ip_claim_next_hop(m);
+ args.rule = ip_dn_find_rule(m);
M_ASSERTPKTHDR(m);
- if (ours) /* ip_fastforward firewall changed dest to local */
+ if (m->m_flags & M_FASTFWD_OURS) {
+ /* ip_fastforward firewall changed dest to local */
+ m->m_flags &= ~M_FASTFWD_OURS; /* just in case... */
goto ours;
-
+ }
if (args.rule) { /* dummynet already filtered us */
ip = mtod(m, struct ip *);
hlen = ip->ip_hl << 2;
@@ -531,7 +491,6 @@ iphack:
#ifdef IPDIVERT
if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
/* Divert or tee packet */
- divert_info = i;
goto ours;
}
#endif
@@ -839,13 +798,11 @@ found:
/*
* Attempt reassembly; if it succeeds, proceed.
- * ip_reass() will return a different mbuf, and update
- * the divert info in divert_info and args.divert_rule.
+ * ip_reass() will return a different mbuf.
*/
ipstat.ips_fragments++;
m->m_pkthdr.header = ip;
- m = ip_reass(m,
- &ipq[sum], fp, &divert_info, &args.divert_rule);
+ m = ip_reass(m, &ipq[sum], fp);
IPQ_UNLOCK();
if (m == 0)
return;
@@ -855,7 +812,7 @@ found:
hlen = ip->ip_hl << 2;
#ifdef IPDIVERT
/* Restore original checksum before diverting packet */
- if (divert_info != 0) {
+ if (divert_find_info(m) != 0) {
ip->ip_len += hlen;
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
@@ -876,12 +833,15 @@ found:
/*
* Divert or tee packet to the divert protocol if required.
*/
+ divert_info = divert_find_info(m);
if (divert_info != 0) {
- struct mbuf *clone = NULL;
+ struct mbuf *clone;
/* Clone packet if we're doing a 'tee' */
if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
- clone = m_dup(m, M_DONTWAIT);
+ clone = divert_clone(m);
+ else
+ clone = NULL;
/* Restore packet header fields to original values */
ip->ip_len += hlen;
@@ -889,7 +849,7 @@ found:
ip->ip_off = htons(ip->ip_off);
/* Deliver packet to divert input routine */
- divert_packet(m, 1, divert_info & 0xffff, args.divert_rule);
+ divert_packet(m, 1);
ipstat.ips_delivered++;
/* If 'tee', continue with original packet */
@@ -900,12 +860,11 @@ found:
ip->ip_len += hlen;
/*
* Jump backwards to complete processing of the
- * packet. But first clear divert_info to avoid
- * entering this block again.
- * We do not need to clear args.divert_rule
- * or args.next_hop as they will not be used.
+ * packet. We do not need to clear args.next_hop
+ * as that will not be used again and the cloned packet
+ * doesn't contain a divert packet tag so we won't
+ * re-entry this block.
*/
- divert_info = 0;
goto pass;
}
#endif
@@ -968,19 +927,18 @@ DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
ipstat.ips_delivered++;
NET_PICKUP_GIANT();
if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
- /* TCP needs IPFORWARD info if available */
- struct m_hdr tag;
-
- tag.mh_type = MT_TAG;
- tag.mh_flags = PACKET_TAG_IPFORWARD;
- tag.mh_data = (caddr_t)args.next_hop;
- tag.mh_next = m;
- tag.mh_nextpkt = NULL;
-
- (*inetsw[ip_protox[ip->ip_p]].pr_input)(
- (struct mbuf *)&tag, hlen);
- } else
- (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
+ /* attach next hop info for TCP */
+ mtag = m_tag_get(PACKET_TAG_IPFORWARD,
+ sizeof(struct sockaddr_in *), M_NOWAIT);
+ if (mtag == NULL) {
+ /* XXX statistic */
+ NET_DROP_GIANT();
+ goto bad;
+ }
+ *(struct sockaddr_in **)(mtag+1) = args.next_hop;
+ m_tag_prepend(m, mtag);
+ }
+ (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
NET_DROP_GIANT();
return;
bad:
@@ -999,8 +957,7 @@ bad:
*/
static struct mbuf *
-ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp,
- u_int32_t *divinfo, u_int16_t *divert_rule)
+ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp)
{
struct ip *ip = mtod(m, struct ip *);
register struct mbuf *p, *q, *nq;
@@ -1042,10 +999,6 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp,
fp->ipq_dst = ip->ip_dst;
fp->ipq_frags = m;
m->m_nextpkt = NULL;
-#ifdef IPDIVERT
- fp->ipq_div_info = 0;
- fp->ipq_div_cookie = 0;
-#endif
goto inserted;
} else {
fp->ipq_nfrags++;
@@ -1129,16 +1082,15 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp,
inserted:
#ifdef IPDIVERT
- /*
- * Transfer firewall instructions to the fragment structure.
- * Only trust info in the fragment at offset 0.
- */
- if (ip->ip_off == 0) {
- fp->ipq_div_info = *divinfo;
- fp->ipq_div_cookie = *divert_rule;
+ if (ip->ip_off != 0) {
+ /*
+ * Strip any divert information; only the info
+ * on the first fragment is used/kept.
+ */
+ struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
+ if (mtag)
+ m_tag_delete(m, mtag);
}
- *divinfo = 0;
- *divert_rule = 0;
#endif
/*
@@ -1204,14 +1156,6 @@ inserted:
mac_destroy_ipq(fp);
#endif
-#ifdef IPDIVERT
- /*
- * Extract firewall instructions from the fragment structure.
- */
- *divinfo = fp->ipq_div_info;
- *divert_rule = fp->ipq_div_cookie;
-#endif
-
/*
* Create header for new ip packet by
* modifying header of first packet;
@@ -1232,10 +1176,6 @@ inserted:
return (m);
dropfrag:
-#ifdef IPDIVERT
- *divinfo = 0;
- *divert_rule = 0;
-#endif
ipstat.ips_fragdropped++;
if (fp != NULL)
fp->ipq_nfrags--;
@@ -1781,6 +1721,7 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
struct in_ifaddr *ia;
int error, type = 0, code = 0;
struct mbuf *mcopy;
+ struct m_tag *mtag;
n_long dest;
struct in_addr pkt_dst;
struct ifnet *destifp;
@@ -1913,21 +1854,18 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
RTFREE(rt);
}
- {
- struct m_hdr tag;
-
if (next_hop) {
- /* Pass IPFORWARD info if available */
-
- tag.mh_type = MT_TAG;
- tag.mh_flags = PACKET_TAG_IPFORWARD;
- tag.mh_data = (caddr_t)next_hop;
- tag.mh_next = m;
- tag.mh_nextpkt = NULL;
- m = (struct mbuf *)&tag;
+ mtag = m_tag_get(PACKET_TAG_IPFORWARD,
+ sizeof(struct sockaddr_in *), M_NOWAIT);
+ if (mtag == NULL) {
+ /* XXX statistic */
+ m_freem(m);
+ return;
+ }
+ *(struct sockaddr_in **)(mtag+1) = next_hop;
+ m_tag_prepend(m, mtag);
}
error = ip_output(m, (struct mbuf *)0, NULL, IP_FORWARDING, 0, NULL);
- }
if (error)
ipstat.ips_cantforward++;
else {
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index e567936..26d76cb 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -90,6 +90,7 @@ static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
#endif /*FAST_IPSEC*/
#include <netinet/ip_fw.h>
+#include <netinet/ip_divert.h>
#include <netinet/ip_dummynet.h>
#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\
@@ -130,12 +131,11 @@ extern struct protosw inetsw[];
* inserted, so must have a NULL opt pointer.
*/
int
-ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
+ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro,
int flags, struct ip_moptions *imo, struct inpcb *inp)
{
struct ip *ip;
struct ifnet *ifp = NULL; /* keep compiler happy */
- struct mbuf *m;
int hlen = sizeof (struct ip);
int len, off, error = 0;
struct sockaddr_in *dst = NULL; /* keep compiler happy */
@@ -143,11 +143,13 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
int isbroadcast, sw_csum;
struct in_addr pkt_dst;
struct route iproute;
+ struct m_tag *dummytag; /* dummynet packet tag */
+ struct m_tag *mtag;
+ struct mbuf *m0; /* XXX */
#ifdef IPSEC
struct secpolicy *sp = NULL;
#endif
#ifdef FAST_IPSEC
- struct m_tag *mtag;
struct secpolicy *sp = NULL;
struct tdb_ident *tdbi;
int s;
@@ -157,42 +159,7 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
args.eh = NULL;
args.rule = NULL;
- args.next_hop = NULL;
- args.divert_rule = 0; /* divert cookie */
-
- /* Grab info from MT_TAG mbufs prepended to the chain. */
- for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) {
- switch(m0->_m_tag_id) {
- default:
- printf("ip_output: unrecognised MT_TAG tag %d\n",
- m0->_m_tag_id);
- break;
-
- case PACKET_TAG_DUMMYNET:
- /*
- * the packet was already tagged, so part of the
- * processing was already done, and we need to go down.
- * Get parameters from the header.
- */
- args.rule = ((struct dn_pkt *)m0)->rule;
- opt = NULL ;
- ro = & ( ((struct dn_pkt *)m0)->ro ) ;
- imo = NULL ;
- dst = ((struct dn_pkt *)m0)->dn_dst ;
- ifp = ((struct dn_pkt *)m0)->ifp ;
- flags = ((struct dn_pkt *)m0)->flags ;
- break;
-
- case PACKET_TAG_DIVERT:
- args.divert_rule = (intptr_t)m0->m_data & 0xffff;
- break;
-
- case PACKET_TAG_IPFORWARD:
- args.next_hop = (struct sockaddr_in *)m0->m_data;
- break;
- }
- }
- m = m0;
+ args.next_hop = ip_claim_next_hop(m);
M_ASSERTPKTHDR(m);
@@ -204,7 +171,34 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
if (inp != NULL)
INP_LOCK_ASSERT(inp);
- if (args.rule != NULL) { /* dummynet already saw us */
+ /*
+ * When packet comes from dummynet restore state from
+ * previous processing instead of the header. Yech!
+ *
+ * XXX add conditional compilation?
+ */
+ dummytag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL);
+ if (dummytag != NULL) {
+ struct dn_pkt_tag *dt = (struct dn_pkt_tag *)(dummytag+1);
+
+ /*
+ * NB: the route in the tag is known to have a
+ * reference that must be free'd, but doing this
+ * before the storage is reclaimed is painful due
+ * to some of the contorted code in this routine.
+ * So instead unlink the tag from the mbuf so it
+ * doesn't get reclaimed and do the cleanup explicitly
+ * below. We should be able to do this automatically
+ * using a uma dtor method when m_tag's can be
+ * allocated from zones.
+ */
+ m_tag_unlink(m, dummytag);
+
+ args.rule = dt->rule;
+ ro = &dt->ro;
+ dst = dt->dn_dst;
+ ifp = dt->ifp;
+
ip = mtod(m, struct ip *);
hlen = ip->ip_hl << 2 ;
if (ro->ro_rt)
@@ -557,7 +551,7 @@ sendit:
dst = (struct sockaddr_in *)state.dst;
if (error) {
/* mbuf is already reclaimed in ipsec4_output. */
- m0 = NULL;
+ m = NULL;
switch (error) {
case EHOSTUNREACH:
case ENETUNREACH:
@@ -795,11 +789,13 @@ spd_done:
}
#ifdef IPDIVERT
if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
- struct mbuf *clone = NULL;
+ struct mbuf *clone;
/* Clone packet if we're doing a 'tee' */
if ((off & IP_FW_PORT_TEE_FLAG) != 0)
- clone = m_dup(m, M_DONTWAIT);
+ clone = divert_clone(m);
+ else
+ clone = NULL;
/*
* XXX
@@ -816,7 +812,7 @@ spd_done:
ip->ip_off = htons(ip->ip_off);
/* Deliver packet to divert input routine */
- divert_packet(m, 0, off & 0xffff, args.divert_rule);
+ divert_packet(m, 0);
/* If 'tee', continue with original packet */
if (clone != NULL) {
@@ -894,26 +890,31 @@ spd_done:
break;
}
if (ia) { /* tell ip_input "dont filter" */
- struct m_hdr tag;
-
- tag.mh_type = MT_TAG;
- tag.mh_flags = PACKET_TAG_IPFORWARD;
- tag.mh_data = (caddr_t)args.next_hop;
- tag.mh_next = m;
- tag.mh_nextpkt = NULL;
+ mtag = m_tag_get(PACKET_TAG_IPFORWARD,
+ sizeof(struct sockaddr_in *),
+ M_NOWAIT);
+ if (mtag == NULL) {
+ /* XXX statistic */
+ error = ENOBUFS; /* XXX */
+ goto bad;
+ }
+ *(struct sockaddr_in **)(mtag+1) =
+ args.next_hop;
+ m_tag_prepend(m, mtag);
if (m->m_pkthdr.rcvif == NULL)
m->m_pkthdr.rcvif = ifunit("lo0");
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
m->m_pkthdr.csum_flags |=
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
- m0->m_pkthdr.csum_data = 0xffff;
+ m->m_pkthdr.csum_data = 0xffff;
}
m->m_pkthdr.csum_flags |=
CSUM_IP_CHECKED | CSUM_IP_VALID;
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
- ip_input((struct mbuf *)&tag);
+ /* XXX netisr_queue(NETISR_IP, m); */
+ ip_input(m);
goto done;
}
/*
@@ -1070,6 +1071,12 @@ done:
RTFREE(ro->ro_rt);
ro->ro_rt = NULL;
}
+ if (dummytag) {
+ struct dn_pkt_tag *dt = (struct dn_pkt_tag *)(dummytag+1);
+ if (dt->ro.ro_rt)
+ RTFREE(dt->ro.ro_rt);
+ m_tag_free(dummytag);
+ }
#ifdef IPSEC
if (sp != NULL) {
KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
index 4bd600d..23d1d00 100644
--- a/sys/netinet/ip_var.h
+++ b/sys/netinet/ip_var.h
@@ -65,8 +65,6 @@ struct ipq {
struct mbuf *ipq_frags; /* to ip headers of fragments */
struct in_addr ipq_src,ipq_dst;
u_char ipq_nfrags; /* # frags in this packet */
- u_int32_t ipq_div_info; /* ipfw divert port & flags */
- u_int16_t ipq_div_cookie; /* ipfw divert cookie */
struct label *ipq_label; /* MAC label */
};
#endif /* _KERNEL */
@@ -197,14 +195,23 @@ extern int (*ip_rsvp_vif)(struct socket *, struct sockopt *);
extern void (*ip_rsvp_force_done)(struct socket *);
extern void (*rsvp_input_p)(struct mbuf *m, int off);
+#define M_FASTFWD_OURS M_PROTO1 /* sent by ip_fastforward to ip_input */
-#ifdef IPDIVERT
-void div_init(void);
-void div_input(struct mbuf *, int);
-void div_ctlinput(int, struct sockaddr *, void *);
-void divert_packet(struct mbuf *m, int incoming, int port, int rule);
-extern struct pr_usrreqs div_usrreqs;
-#endif
+/*
+ * Return the next hop address associated with the mbuf; if any.
+ * If a tag is present it is also removed.
+ */
+static __inline struct sockaddr_in *
+ip_claim_next_hop(struct mbuf *m)
+{
+ struct m_tag *mtag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
+ if (mtag) {
+ struct sockaddr_in *sin = *(struct sockaddr_in **)(mtag+1);
+ m_tag_delete(m, mtag);
+ return sin;
+ } else
+ return NULL;
+}
#ifdef PFIL_HOOKS
extern struct pfil_head inet_pfil_hook;
diff --git a/sys/netinet/tcp_debug.c b/sys/netinet/tcp_debug.c
index 89e9d7c..421ecbe 100644
--- a/sys/netinet/tcp_debug.c
+++ b/sys/netinet/tcp_debug.c
@@ -54,6 +54,7 @@
#include <sys/systm.h>
#include <sys/protosw.h>
#include <sys/socket.h>
+#include <sys/mbuf.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 17011ca..a868f80 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -360,7 +360,7 @@ tcp_input(m, off0)
struct tcpopt to; /* options in this segment */
struct rmxp_tao tao; /* our TAO cache entry */
int headlocked = 0;
- struct sockaddr_in *next_hop = NULL;
+ struct sockaddr_in *next_hop;
int rstreason; /* For badport_bandlim accounting purposes */
struct ip6_hdr *ip6 = NULL;
@@ -380,11 +380,7 @@ tcp_input(m, off0)
short ostate = 0;
#endif
- /* Grab info from MT_TAG mbufs prepended to the chain. */
- for (;m && m->m_type == MT_TAG; m = m->m_next) {
- if (m->_m_tag_id == PACKET_TAG_IPFORWARD)
- next_hop = (struct sockaddr_in *)m->m_hdr.mh_data;
- }
+ next_hop = ip_claim_next_hop(m);
#ifdef INET6
isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
#endif
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index 17011ca..a868f80 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -360,7 +360,7 @@ tcp_input(m, off0)
struct tcpopt to; /* options in this segment */
struct rmxp_tao tao; /* our TAO cache entry */
int headlocked = 0;
- struct sockaddr_in *next_hop = NULL;
+ struct sockaddr_in *next_hop;
int rstreason; /* For badport_bandlim accounting purposes */
struct ip6_hdr *ip6 = NULL;
@@ -380,11 +380,7 @@ tcp_input(m, off0)
short ostate = 0;
#endif
- /* Grab info from MT_TAG mbufs prepended to the chain. */
- for (;m && m->m_type == MT_TAG; m = m->m_next) {
- if (m->_m_tag_id == PACKET_TAG_IPFORWARD)
- next_hop = (struct sockaddr_in *)m->m_hdr.mh_data;
- }
+ next_hop = ip_claim_next_hop(m);
#ifdef INET6
isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
#endif
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index 6ce9922..fa2cc40 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -221,8 +221,8 @@ struct mbuf {
#if 0
#define MT_RIGHTS 12 /* access rights */
#define MT_IFADDR 13 /* interface address */
+#define MT_TAG 13 /* deprecated: use m_tag's instead */
#endif
-#define MT_TAG 13 /* volatile metadata associated to pkts */
#define MT_CONTROL 14 /* extra-data protocol message */
#define MT_OOBDATA 15 /* expedited data */
#define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */
@@ -549,28 +549,11 @@ struct mbuf *
#define PACKET_TAG_IPSEC_SOCKET 12 /* IPSEC socket ref */
#define PACKET_TAG_IPSEC_HISTORY 13 /* IPSEC history */
#define PACKET_TAG_IPV6_INPUT 14 /* IPV6 input processing */
-
-/*
- * As a temporary and low impact solution to replace the even uglier
- * approach used so far in some parts of the network stack (which relies
- * on global variables), packet tag-like annotations are stored in MT_TAG
- * mbufs (or lookalikes) prepended to the actual mbuf chain.
- *
- * m_type = MT_TAG
- * m_flags = m_tag_id
- * m_next = next buffer in chain.
- *
- * BE VERY CAREFUL not to pass these blocks to the mbuf handling routines.
- */
-#define _m_tag_id m_hdr.mh_flags
-
-/* Packet tags used in the FreeBSD network stack. */
#define PACKET_TAG_DUMMYNET 15 /* dummynet info */
#define PACKET_TAG_IPFW 16 /* ipfw classification */
#define PACKET_TAG_DIVERT 17 /* divert info */
#define PACKET_TAG_IPFORWARD 18 /* ipforward info */
#define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */
-#define PACKET_TAG_IPFASTFWD_OURS 20 /* IP fastforward dropback */
/* Packet tag routines. */
struct m_tag *m_tag_alloc(u_int32_t, int, int, int);
OpenPOWER on IntegriCloud