From 1504165dce30978b55fb5992d94ebc8914ced705 Mon Sep 17 00:00:00 2001 From: mlaier Date: Wed, 25 Feb 2004 19:55:29 +0000 Subject: Re-remove MT_TAGs. The problems with dummynet have been fixed now. Tested by: -current, bms(mentor), me Approved by: bms(mentor), sam --- sys/netinet/in_proto.c | 1 + sys/netinet/ip_divert.c | 69 ++++++++++----- sys/netinet/ip_divert.h | 83 ++++++++++++++++++ sys/netinet/ip_dummynet.c | 219 +++++++++++++++++++++++++++------------------- sys/netinet/ip_dummynet.h | 40 +++++---- sys/netinet/ip_fastfwd.c | 112 +++++++----------------- sys/netinet/ip_fw.h | 1 - sys/netinet/ip_fw2.c | 30 +++++-- sys/netinet/ip_input.c | 167 +++++++++++------------------------ sys/netinet/ip_output.c | 99 ++++++++++----------- sys/netinet/ip_var.h | 28 +++--- sys/netinet/tcp_debug.c | 1 + sys/netinet/tcp_input.c | 7 +- sys/netinet/tcp_reass.c | 7 +- 14 files changed, 461 insertions(+), 403 deletions(-) create mode 100644 sys/netinet/ip_divert.h (limited to 'sys/netinet') diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index c57f878..6f2e8da 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #ifdef PIM #include diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index fe560a0..ee1f97c 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -68,6 +68,7 @@ #include #include #include +#include #include /* @@ -150,17 +151,21 @@ div_input(struct mbuf *m, int off) * then pass them along with mbuf chain. */ void -divert_packet(struct mbuf *m, int incoming, int port, int rule) +divert_packet(struct mbuf *m, int incoming) { struct ip *ip; struct inpcb *inp; struct socket *sa; u_int16_t nport; struct sockaddr_in divsrc; + struct m_tag *mtag; - /* Sanity check */ - KASSERT(port != 0, ("%s: port=0", __func__)); - + mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); + if (mtag == NULL) { + printf("%s: no divert tag\n", __func__); + m_freem(m); + return; + } /* Assure header */ if (m->m_len < sizeof(struct ip) && (m = m_pullup(m, sizeof(struct ip))) == 0) @@ -174,7 +179,7 @@ divert_packet(struct mbuf *m, int incoming, int port, int rule) bzero(&divsrc, sizeof(divsrc)); divsrc.sin_len = sizeof(divsrc); divsrc.sin_family = AF_INET; - divsrc.sin_port = rule; /* record matching rule */ + divsrc.sin_port = divert_cookie(mtag); /* record matching rule */ if (incoming) { struct ifaddr *ifa; @@ -234,7 +239,7 @@ divert_packet(struct mbuf *m, int incoming, int port, int rule) mtx_lock(&Giant); /* Put packet on socket queue, if any */ sa = NULL; - nport = htons((u_int16_t)port); + nport = htons((u_int16_t)divert_info(mtag)); INP_INFO_RLOCK(&divcbinfo); LIST_FOREACH(inp, &divcb, inp_list) { INP_LOCK(inp); @@ -273,19 +278,8 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, struct mbuf *control) { int error = 0; - struct m_hdr divert_tag; - /* - * Prepare the tag for divert info. Note that a packet - * with a 0 tag in mh_data is effectively untagged, - * so we could optimize that case. - */ - divert_tag.mh_type = MT_TAG; - divert_tag.mh_flags = PACKET_TAG_DIVERT; - divert_tag.mh_next = m; - divert_tag.mh_data = 0; /* the matching rule # */ - divert_tag.mh_nextpkt = NULL; - m->m_pkthdr.rcvif = NULL; /* XXX is it necessary ? */ + KASSERT(m->m_pkthdr.rcvif == NULL, ("rcvif not null")); #ifdef MAC mac_create_mbuf_from_socket(so, m); @@ -296,9 +290,21 @@ div_output(struct socket *so, struct mbuf *m, /* Loopback avoidance and state recovery */ if (sin) { + struct m_tag *mtag; + struct divert_tag *dt; int i; - divert_tag.mh_data = (caddr_t)(uintptr_t)sin->sin_port; + mtag = m_tag_get(PACKET_TAG_DIVERT, + sizeof(struct divert_tag), M_NOWAIT); + if (mtag == NULL) { + error = ENOBUFS; + goto cantsend; + } + dt = (struct divert_tag *)(mtag+1); + dt->info = 0; + dt->cookie = sin->sin_port; + m_tag_prepend(m, mtag); + /* * Find receive interface with the given name, stuffed * (if it exists) in the sin_zero[] field. @@ -335,7 +341,7 @@ div_output(struct socket *so, struct mbuf *m, /* Send packet to output processing */ ipstat.ips_rawout++; /* XXX */ - error = ip_output((struct mbuf *)&divert_tag, + error = ip_output(m, inp->inp_options, NULL, (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST | IP_RAWOUTPUT, @@ -362,7 +368,7 @@ div_output(struct socket *so, struct mbuf *m, m->m_pkthdr.rcvif = ifa->ifa_ifp; } /* Send packet to input processing */ - ip_input((struct mbuf *)&divert_tag); + ip_input(m); } return error; @@ -372,6 +378,27 @@ cantsend: return error; } +/* + * Return a copy of the specified packet, but without + * the divert tag. This is used when packets are ``tee'd'' + * and we want the cloned copy to not have divert processing. + */ +struct mbuf * +divert_clone(struct mbuf *m) +{ + struct mbuf *clone; + struct m_tag *mtag; + + clone = m_dup(m, M_DONTWAIT); + if (clone != NULL) { + /* strip divert tag from copy */ + mtag = m_tag_find(clone, PACKET_TAG_DIVERT, NULL); + if (mtag != NULL) + m_tag_delete(clone, mtag); + } + return clone; +} + static int div_attach(struct socket *so, int proto, struct thread *td) { diff --git a/sys/netinet/ip_divert.h b/sys/netinet/ip_divert.h new file mode 100644 index 0000000..d925918 --- /dev/null +++ b/sys/netinet/ip_divert.h @@ -0,0 +1,83 @@ +/*- + * Copyright (c) 2003 Sam Leffler, Errno Consulting + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any + * redistribution must be conditioned upon including a substantially + * similar Disclaimer requirement for further binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGES. + * + * $FreeBSD$ + */ + +#ifndef _NETINET_IP_DIVERT_H_ +#define _NETINET_IP_DIVERT_H_ + +/* + * Divert socket definitions. + */ + +struct divert_tag { + u_int32_t info; /* port & flags */ + u_int16_t cookie; /* ipfw rule number */ +}; + +/* + * Return the divert cookie associated with the mbuf; if any. + */ +static __inline u_int16_t +divert_cookie(struct m_tag *mtag) +{ + return ((struct divert_tag *)(mtag+1))->cookie; +} +static __inline u_int16_t +divert_find_cookie(struct mbuf *m) +{ + struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); + return mtag ? divert_cookie(mtag) : 0; +} + +/* + * Return the divert info associated with the mbuf; if any. + */ +static __inline u_int32_t +divert_info(struct m_tag *mtag) +{ + return ((struct divert_tag *)(mtag+1))->info; +} +static __inline u_int32_t +divert_find_info(struct mbuf *m) +{ + struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); + return mtag ? divert_info(mtag) : 0; +} + +extern void div_init(void); +extern void div_input(struct mbuf *, int); +extern void div_ctlinput(int, struct sockaddr *, void *); +extern void divert_packet(struct mbuf *m, int incoming); +extern struct mbuf *divert_clone(struct mbuf *); +extern struct pr_usrreqs div_usrreqs; +#endif /* _NETINET_IP_DIVERT_H_ */ diff --git a/sys/netinet/ip_dummynet.c b/sys/netinet/ip_dummynet.c index 1e9ecb8..821b69b 100644 --- a/sys/netinet/ip_dummynet.c +++ b/sys/netinet/ip_dummynet.c @@ -405,6 +405,22 @@ heap_free(struct dn_heap *h) */ /* + * Return the mbuf tag holding the dummynet state. As an optimization + * this is assumed to be the first tag on the list. If this turns out + * wrong we'll need to search the list. + */ +static struct dn_pkt_tag * +dn_tag_get(struct mbuf *m) +{ + struct m_tag *mtag = m_tag_first(m); + KASSERT(mtag != NULL && + mtag->m_tag_cookie == MTAG_ABI_COMPAT && + mtag->m_tag_id == PACKET_TAG_DUMMYNET, + ("packet on dummynet queue w/o dummynet tag!")); + return (struct dn_pkt_tag *)(mtag+1); +} + +/* * Scheduler functions: * * transmit_event() is called when the delay-line needs to enter @@ -425,87 +441,86 @@ heap_free(struct dn_heap *h) static void transmit_event(struct dn_pipe *pipe) { - struct dn_pkt *pkt ; + struct mbuf *m ; + struct dn_pkt_tag *pkt ; DUMMYNET_LOCK_ASSERT(); - while ( (pkt = pipe->head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) { + while ( (m = pipe->head) ) { + pkt = dn_tag_get(m); + if ( !DN_KEY_LEQ(pkt->output_time, curr_time) ) + break; /* * first unlink, then call procedures, since ip_input() can invoke * ip_output() and viceversa, thus causing nested calls */ - pipe->head = DN_NEXT(pkt) ; + pipe->head = m->m_nextpkt ; + m->m_nextpkt = NULL; /* XXX: drop the lock for now to avoid LOR's */ DUMMYNET_UNLOCK(); - /* - * The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf - * (NOT A REAL one, just a small block of malloc'ed memory) with - * m_type = MT_TAG, m_flags = PACKET_TAG_DUMMYNET - * dn_m (m_next) = actual mbuf to be processed by ip_input/output - * and some other fields. - * The block IS FREED HERE because it contains parameters passed - * to the called routine. - */ switch (pkt->dn_dir) { case DN_TO_IP_OUT: - (void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL, NULL); - rt_unref (pkt->ro.ro_rt, __func__) ; + (void)ip_output(m, NULL, NULL, pkt->flags, NULL, NULL); break ; case DN_TO_IP_IN : - ip_input((struct mbuf *)pkt) ; + ip_input(m) ; break ; case DN_TO_BDG_FWD : - if (!BDG_LOADED) { + /* + * The bridge requires/assumes the Ethernet header is + * contiguous in the first mbuf header. Insure this is true. + */ + if (BDG_LOADED) { + if (m->m_len < ETHER_HDR_LEN && + (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { + printf("dummynet/bridge: pullup fail, dropping pkt\n"); + break; + } + m = bdg_forward_ptr(m, pkt->ifp); + } else { /* somebody unloaded the bridge module. Drop pkt */ /* XXX rate limit */ printf("dummynet: dropping bridged packet trapped in pipe\n"); - m_freem(pkt->dn_m); - break; - } /* fallthrough */ - case DN_TO_ETH_DEMUX: - { - struct mbuf *m = (struct mbuf *)pkt ; + } + if (m) + m_freem(m); + break; - if (pkt->dn_m->m_len < ETHER_HDR_LEN && - (pkt->dn_m = m_pullup(pkt->dn_m, ETHER_HDR_LEN)) == NULL) { - printf("dummynet/bridge: pullup fail, dropping pkt\n"); - break; - } - /* - * bdg_forward() wants a pointer to the pseudo-mbuf-header, but - * on return it will supply the pointer to the actual packet - * (originally pkt->dn_m, but could be something else now) if - * it has not consumed it. - */ - if (pkt->dn_dir == DN_TO_BDG_FWD) { - m = bdg_forward_ptr(m, pkt->ifp); - if (m) - m_freem(m); - } else - ether_demux(NULL, m); /* which consumes the mbuf */ + case DN_TO_ETH_DEMUX: + /* + * The Ethernet code assumes the Ethernet header is + * contiguous in the first mbuf header. Insure this is true. + */ + if (m->m_len < ETHER_HDR_LEN && + (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { + printf("dummynet/ether: pullup fail, dropping pkt\n"); + break; } + ether_demux(m->m_pkthdr.rcvif, m); /* which consumes the mbuf */ break ; + case DN_TO_ETH_OUT: - ether_output_frame(pkt->ifp, (struct mbuf *)pkt); + ether_output_frame(pkt->ifp, m); break; default: printf("dummynet: bad switch %d!\n", pkt->dn_dir); - m_freem(pkt->dn_m); + m_freem(m); break ; } - free(pkt, M_DUMMYNET); DUMMYNET_LOCK(); } /* if there are leftover packets, put into the heap for next event */ - if ( (pkt = pipe->head) ) - heap_insert(&extract_heap, pkt->output_time, pipe ) ; - /* XXX should check errors on heap_insert, by draining the - * whole pipe p and hoping in the future we are more successful - */ + if ( (m = pipe->head) ) { + pkt = dn_tag_get(m) ; + /* XXX should check errors on heap_insert, by draining the + * whole pipe p and hoping in the future we are more successful + */ + heap_insert(&extract_heap, pkt->output_time, pipe ) ; + } } /* @@ -513,8 +528,8 @@ transmit_event(struct dn_pipe *pipe) * before being able to transmit a packet. The credit is taken from * either a pipe (WF2Q) or a flow_queue (per-flow queueing) */ -#define SET_TICKS(pkt, q, p) \ - (pkt->dn_m->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \ +#define SET_TICKS(_m, q, p) \ + ((_m)->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \ p->bandwidth ; /* @@ -522,21 +537,23 @@ transmit_event(struct dn_pipe *pipe) * and put into delay line (p_queue) */ static void -move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q, +move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, struct dn_pipe *p, int len) { - q->head = DN_NEXT(pkt) ; + struct dn_pkt_tag *dt = dn_tag_get(pkt); + + q->head = pkt->m_nextpkt ; q->len-- ; q->len_bytes -= len ; - pkt->output_time = curr_time + p->delay ; + dt->output_time = curr_time + p->delay ; if (p->head == NULL) p->head = pkt; else - DN_NEXT(p->tail) = pkt; + p->tail->m_nextpkt = pkt; p->tail = pkt; - DN_NEXT(p->tail) = NULL; + p->tail->m_nextpkt = NULL; } /* @@ -549,7 +566,7 @@ move_pkt(struct dn_pkt *pkt, struct dn_flow_queue *q, static void ready_event(struct dn_flow_queue *q) { - struct dn_pkt *pkt; + struct mbuf *pkt; struct dn_pipe *p = q->fs->pipe ; int p_was_empty ; @@ -571,7 +588,7 @@ ready_event(struct dn_flow_queue *q) */ q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth; while ( (pkt = q->head) != NULL ) { - int len = pkt->dn_m->m_pkthdr.len; + int len = pkt->m_pkthdr.len; int len_scaled = p->bandwidth ? len*8*hz : 0 ; if (len_scaled > q->numbytes ) break ; @@ -639,9 +656,9 @@ ready_event_wfq(struct dn_pipe *p) while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) { if (sch->elements > 0) { /* have some eligible pkts to send out */ struct dn_flow_queue *q = sch->p[0].object ; - struct dn_pkt *pkt = q->head; + struct mbuf *pkt = q->head; struct dn_flow_set *fs = q->fs; - u_int64_t len = pkt->dn_m->m_pkthdr.len; + u_int64_t len = pkt->m_pkthdr.len; int len_scaled = p->bandwidth ? len*8*hz : 0 ; heap_extract(sch, NULL); /* remove queue from heap */ @@ -658,7 +675,7 @@ ready_event_wfq(struct dn_pipe *p) * update F and position in backlogged queue, then * put flow in not_eligible_heap (we will fix this later). */ - len = (q->head)->dn_m->m_pkthdr.len; + len = (q->head)->m_pkthdr.len; q->F += (len<weight ; if (DN_KEY_LEQ(q->S, p->V)) heap_insert(neh, q->S, q); @@ -713,7 +730,7 @@ ready_event_wfq(struct dn_pipe *p) if (p->bandwidth > 0) t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ; - p->tail->output_time += t ; + dn_tag_get(p->tail)->output_time += t ; p->sched_time = curr_time ; heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); /* XXX should check errors on heap_insert, and drain the whole @@ -1116,7 +1133,8 @@ locate_flowset(int pipe_nr, struct ip_fw *rule) static int dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) { - struct dn_pkt *pkt; + struct dn_pkt_tag *pkt; + struct m_tag *mtag; struct dn_flow_set *fs; struct dn_pipe *pipe ; u_int64_t len = m->m_pkthdr.len ; @@ -1124,7 +1142,12 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) int is_pipe; #if IPFW2 ipfw_insn *cmd = fwa->rule->cmd + fwa->rule->act_ofs; +#endif + KASSERT(m->m_nextpkt == NULL, + ("dummynet_io: mbuf queue passed to dummynet")); + +#if IPFW2 if (cmd->opcode == O_LOG) cmd += F_LEN(cmd); is_pipe = (cmd->opcode == O_PIPE); @@ -1175,16 +1198,16 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) goto dropit ; /* XXX expensive to zero, see if we can remove it*/ - pkt = (struct dn_pkt *)malloc(sizeof (*pkt), M_DUMMYNET, M_NOWAIT|M_ZERO); - if ( pkt == NULL ) + mtag = m_tag_get(PACKET_TAG_DUMMYNET, + sizeof(struct dn_pkt_tag), M_NOWAIT|M_ZERO); + if ( mtag == NULL ) goto dropit ; /* cannot allocate packet header */ + m_tag_prepend(m, mtag); /* attach to mbuf chain */ + + pkt = (struct dn_pkt_tag *)(mtag+1); /* ok, i can handle the pkt now... */ /* build and enqueue packet + parameters */ - pkt->hdr.mh_type = MT_TAG; - pkt->hdr.mh_flags = PACKET_TAG_DUMMYNET; pkt->rule = fwa->rule ; - DN_NEXT(pkt) = NULL; - pkt->dn_m = m; pkt->dn_dir = dir ; pkt->ifp = fwa->oif; @@ -1206,14 +1229,14 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) pkt->flags = fwa->flags; } if (q->head == NULL) - q->head = pkt; + q->head = m; else - DN_NEXT(q->tail) = pkt; - q->tail = pkt; + q->tail->m_nextpkt = m; + q->tail = m; q->len++; q->len_bytes += len ; - if ( q->head != pkt ) /* flow was not idle, we are done */ + if ( q->head != m ) /* flow was not idle, we are done */ goto done; /* * If we reach this point the flow was previously idle, so we need @@ -1226,7 +1249,7 @@ dummynet_io(struct mbuf *m, int pipe_nr, int dir, struct ip_fw_args *fwa) */ dn_key t = 0 ; if (pipe->bandwidth) - t = SET_TICKS(pkt, q, pipe); + t = SET_TICKS(m, q, pipe); q->sched_time = curr_time ; if (t == 0) /* must process it now */ ready_event( q ); @@ -1300,12 +1323,10 @@ dropit: * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT) * Doing this would probably save us the initial bzero of dn_pkt */ -#define DN_FREE_PKT(pkt) { \ - struct dn_pkt *n = pkt ; \ - rt_unref ( n->ro.ro_rt, __func__ ) ; \ - m_freem(n->dn_m); \ - pkt = DN_NEXT(n) ; \ - free(n, M_DUMMYNET) ; } +#define DN_FREE_PKT(_m) do { \ + rt_unref(dn_tag_get(_m)->ro.ro_rt, __func__); \ + m_freem(_m); \ +} while (0) /* * Dispose all packets and flow_queues on a flow_set. @@ -1316,7 +1337,6 @@ dropit: static void purge_flow_set(struct dn_flow_set *fs, int all) { - struct dn_pkt *pkt ; struct dn_flow_queue *q, *qn ; int i ; @@ -1324,8 +1344,13 @@ purge_flow_set(struct dn_flow_set *fs, int all) for (i = 0 ; i <= fs->rq_size ; i++ ) { for (q = fs->rq[i] ; q ; q = qn ) { - for (pkt = q->head ; pkt ; ) - DN_FREE_PKT(pkt) ; + struct mbuf *m, *mnext; + + mnext = q->head; + while ((m = mnext) != NULL) { + mnext = m->m_nextpkt; + DN_FREE_PKT(m); + } qn = q->next ; free(q, M_DUMMYNET); } @@ -1352,12 +1377,15 @@ purge_flow_set(struct dn_flow_set *fs, int all) static void purge_pipe(struct dn_pipe *pipe) { - struct dn_pkt *pkt ; + struct mbuf *m, *mnext; purge_flow_set( &(pipe->fs), 1 ); - for (pkt = pipe->head ; pkt ; ) - DN_FREE_PKT(pkt) ; + mnext = pipe->head; + while ((m = mnext) != NULL) { + mnext = m->m_nextpkt; + DN_FREE_PKT(m); + } heap_free( &(pipe->scheduler_heap) ); heap_free( &(pipe->not_eligible_heap) ); @@ -1412,13 +1440,15 @@ dn_rule_delete_fs(struct dn_flow_set *fs, void *r) { int i ; struct dn_flow_queue *q ; - struct dn_pkt *pkt ; + struct mbuf *m ; for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */ for (q = fs->rq[i] ; q ; q = q->next ) - for (pkt = q->head ; pkt ; pkt = DN_NEXT(pkt) ) + for (m = q->head ; m ; m = m->m_nextpkt ) { + struct dn_pkt_tag *pkt = dn_tag_get(m) ; if (pkt->rule == r) pkt->rule = ip_fw_default_rule ; + } } /* * when a firewall rule is deleted, scan all queues and remove the flow-id @@ -1428,8 +1458,9 @@ void dn_rule_delete(void *r) { struct dn_pipe *p ; - struct dn_pkt *pkt ; struct dn_flow_set *fs ; + struct dn_pkt_tag *pkt ; + struct mbuf *m ; DUMMYNET_LOCK(); /* @@ -1442,9 +1473,11 @@ dn_rule_delete(void *r) for ( p = all_pipes ; p ; p = p->next ) { fs = &(p->fs) ; dn_rule_delete_fs(fs, r); - for (pkt = p->head ; pkt ; pkt = DN_NEXT(pkt) ) + for (m = p->head ; m ; m = m->m_nextpkt ) { + pkt = dn_tag_get(m) ; if (pkt->rule == r) pkt->rule = ip_fw_default_rule ; + } } DUMMYNET_UNLOCK(); } @@ -1718,7 +1751,7 @@ dummynet_drain() { struct dn_flow_set *fs; struct dn_pipe *p; - struct dn_pkt *pkt; + struct mbuf *m, *mnext; DUMMYNET_LOCK_ASSERT(); @@ -1731,8 +1764,12 @@ dummynet_drain() for (p = all_pipes; p; p= p->next ) { purge_flow_set(&(p->fs), 0); - for (pkt = p->head ; pkt ; ) - DN_FREE_PKT(pkt) ; + + mnext = p->head; + while ((m = mnext) != NULL) { + mnext = m->m_nextpkt; + DN_FREE_PKT(m); + } p->head = p->tail = NULL ; } } diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h index bc506af..28e85a9 100644 --- a/sys/netinet/ip_dummynet.h +++ b/sys/netinet/ip_dummynet.h @@ -111,24 +111,12 @@ struct dn_heap { #ifdef _KERNEL /* - * struct dn_pkt identifies a packet in the dummynet queue, but - * is also used to tag packets passed back to the various destinations - * (ip_input(), ip_output(), bdg_forward() and so on). - * As such the first part of the structure must be a struct m_hdr, - * followed by dummynet-specific parameters. The m_hdr must be - * initialized with - * mh_type = MT_TAG; - * mh_flags = PACKET_TYPE_DUMMYNET; - * mh_next = - * - * mh_nextpkt, mh_data are free for dummynet use (mh_nextpkt is used to - * build a linked list of packets in a dummynet queue). + * Packets processed by dummynet have an mbuf tag associated with + * them that carries their dummynet state. This is used within + * the dummynet code as well as outside when checking for special + * processing requirements. */ -struct dn_pkt { - struct m_hdr hdr ; -#define DN_NEXT(x) (struct dn_pkt *)(x)->hdr.mh_nextpkt -#define dn_m hdr.mh_next /* packet to be forwarded */ - +struct dn_pkt_tag { struct ip_fw *rule; /* matching rule */ int dn_dir; /* action when packet comes out. */ #define DN_TO_IP_OUT 1 @@ -217,7 +205,7 @@ struct dn_flow_queue { struct dn_flow_queue *next ; struct ipfw_flow_id id ; - struct dn_pkt *head, *tail ; /* queue of packets */ + struct mbuf *head, *tail ; /* queue of packets */ u_int len ; u_int len_bytes ; u_long numbytes ; /* credit for transmission (dynamic queues) */ @@ -330,7 +318,7 @@ struct dn_pipe { /* a pipe */ int bandwidth; /* really, bytes/tick. */ int delay ; /* really, ticks */ - struct dn_pkt *head, *tail ; /* packets in delay line */ + struct mbuf *head, *tail ; /* packets in delay line */ /* WF2Q+ */ struct dn_heap scheduler_heap ; /* top extract - key Finish time*/ @@ -364,5 +352,19 @@ extern ip_dn_ruledel_t *ip_dn_ruledel_ptr; extern ip_dn_io_t *ip_dn_io_ptr; #define DUMMYNET_LOADED (ip_dn_io_ptr != NULL) +/* + * Return the IPFW rule associated with the dummynet tag; if any. + * Make sure that the dummynet tag is not reused by lower layers. + */ +static __inline struct ip_fw * +ip_dn_claim_rule(struct mbuf *m) +{ + struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); + if (mtag != NULL) { + mtag->m_tag_id = PACKET_TAG_NONE; + return (((struct dn_pkt_tag *)(mtag+1))->rule); + } else + return (NULL); +} #endif #endif /* _IP_DUMMYNET_H */ diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index 4041712..3463a33 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -110,6 +110,7 @@ #include #include +#include #include static int ipfastforward_active = 0; @@ -130,9 +131,8 @@ ip_fastforward(struct mbuf *m) struct mbuf *m0 = NULL; #ifdef IPDIVERT struct ip *tip; - struct mbuf *teem = NULL; + struct mbuf *clone = NULL; #endif - struct mbuf *tag = NULL; struct route ro; struct sockaddr_in *dst = NULL; struct in_ifaddr *ia = NULL; @@ -150,16 +150,6 @@ ip_fastforward(struct mbuf *m) if (!ipfastforward_active || !ipforwarding) return 0; - /* - * If there is any MT_TAG we fall back to ip_input because we can't - * handle TAGs here. Should never happen as we get directly called - * from the if_output routines. - */ - if (m->m_type == MT_TAG) { - KASSERT(0, ("%s: packet with MT_TAG not expected", __func__)); - return 0; - } - M_ASSERTVALID(m); M_ASSERTPKTHDR(m); @@ -373,50 +363,39 @@ fallback: /* * See if this is a fragment */ - if (ip->ip_off & (IP_MF | IP_OFFMASK)) { - MGETHDR(tag, M_DONTWAIT, MT_TAG); - if (tag == NULL) - goto drop; - tag->m_flags = PACKET_TAG_DIVERT; - tag->m_data = (caddr_t)(intptr_t)args.divert_rule; - tag->m_next = m; - /* XXX: really bloody hack, see ip_input */ - tag->m_nextpkt = (struct mbuf *)1; - m = tag; - tag = NULL; - + if (ip->ip_off & (IP_MF | IP_OFFMASK)) goto droptoours; - } /* * Tee packet */ if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0) - teem = m_dup(m, M_DONTWAIT); + clone = divert_clone(m); else - teem = m; - if (teem == NULL) + clone = m; + if (clone == NULL) goto passin; /* * Delayed checksums are not compatible */ - if (teem->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(teem); - teem->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } /* * Restore packet header fields to original values */ - tip = mtod(teem, struct ip *); + tip = mtod(m, struct ip *); tip->ip_len = htons(tip->ip_len); tip->ip_off = htons(tip->ip_off); /* * Deliver packet to divert input routine */ - divert_packet(teem, 0, ipfw & 0xffff, args.divert_rule); + divert_packet(m, 0); /* * If this was not tee, we are done */ + m = clone; if ((ipfw & IP_FW_PORT_TEE_FLAG) == 0) return 1; /* Continue if it was tee */ @@ -560,52 +539,39 @@ passin: /* * See if this is a fragment */ - if (ip->ip_off & (IP_MF | IP_OFFMASK)) { - MGETHDR(tag, M_DONTWAIT, MT_TAG); - if (tag == NULL) { - RTFREE(ro.ro_rt); - goto drop; - } - tag->m_flags = PACKET_TAG_DIVERT; - tag->m_data = (caddr_t)(intptr_t)args.divert_rule; - tag->m_next = m; - /* XXX: really bloody hack, see ip_input */ - tag->m_nextpkt = (struct mbuf *)1; - m = tag; - tag = NULL; - + if (ip->ip_off & (IP_MF | IP_OFFMASK)) goto droptoours; - } /* * Tee packet */ if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0) - teem = m_dup(m, M_DONTWAIT); + clone = divert_clone(m); else - teem = m; - if (teem == NULL) + clone = m; + if (clone == NULL) goto passout; /* * Delayed checksums are not compatible with divert */ - if (teem->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(teem); - teem->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } /* * Restore packet header fields to original values */ - tip = mtod(teem, struct ip *); + tip = mtod(m, struct ip *); tip->ip_len = htons(tip->ip_len); tip->ip_off = htons(tip->ip_off); /* * Deliver packet to divert input routine */ - divert_packet(teem, 0, ipfw & 0xffff, args.divert_rule); + divert_packet(m, 0); /* * If this was not tee, we are done */ + m = clone; if ((ipfw & IP_FW_PORT_TEE_FLAG) == 0) { RTFREE(ro.ro_rt); return 1; @@ -638,38 +604,24 @@ passout: if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) { forwardlocal: if (args.next_hop) { - /* XXX leak */ - MGETHDR(tag, M_DONTWAIT, MT_TAG); - if (tag == NULL) { + struct m_tag *mtag = m_tag_get( + PACKET_TAG_IPFORWARD, + sizeof(struct sockaddr_in *), + M_NOWAIT); + if (mtag == NULL) { if (ro.ro_rt) RTFREE(ro.ro_rt); goto drop; } - tag->m_flags = PACKET_TAG_IPFORWARD; - tag->m_data = (caddr_t)args.next_hop; - tag->m_next = m; - /* XXX: really bloody hack, - * see ip_input */ - tag->m_nextpkt = (struct mbuf *)1; - m = tag; - tag = NULL; + *(struct sockaddr_in **)(mtag+1) = + args.next_hop; + m_tag_prepend(m, mtag); } #ifdef IPDIVERT droptoours: /* Used for DIVERT */ #endif - MGETHDR(tag, M_DONTWAIT, MT_TAG); - if (tag == NULL) { - if (ro.ro_rt) - RTFREE(ro.ro_rt); - goto drop; - } - tag->m_flags = PACKET_TAG_IPFASTFWD_OURS; - tag->m_data = NULL; - tag->m_next = m; - /* XXX: really bloody hack, see ip_input */ - tag->m_nextpkt = (struct mbuf *)1; - m = tag; - tag = NULL; + /* for ip_input */ + m->m_flags |= M_FASTFWD_OURS; /* ip still points to the real packet */ ip->ip_len = htons(ip->ip_len); diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h index e6eae2d..8e3047d 100644 --- a/sys/netinet/ip_fw.h +++ b/sys/netinet/ip_fw.h @@ -400,7 +400,6 @@ struct ip_fw_args { int flags; /* for dummynet */ struct ipfw_flow_id f_id; /* grabbed from IP header */ - u_int16_t divert_rule; /* divert cookie */ u_int32_t retval; }; diff --git a/sys/netinet/ip_fw2.c b/sys/netinet/ip_fw2.c index 836be9d..7097b20 100644 --- a/sys/netinet/ip_fw2.c +++ b/sys/netinet/ip_fw2.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -1457,6 +1458,7 @@ ipfw_chk(struct ip_fw_args *args) int dyn_dir = MATCH_UNKNOWN; ipfw_dyn_rule *q = NULL; struct ip_fw_chain *chain = &layer3_chain; + struct m_tag *mtag; if (m->m_flags & M_SKIP_FIREWALL) return 0; /* accept */ @@ -1545,6 +1547,7 @@ ipfw_chk(struct ip_fw_args *args) after_ip_checks: IPFW_LOCK(chain); /* XXX expensive? can we run lock free? */ + mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); if (args->rule) { /* * Packet has already been tagged. Look for the next rule @@ -1567,7 +1570,7 @@ after_ip_checks: * Find the starting rule. It can be either the first * one, or the one after divert_rule if asked so. */ - int skipto = args->divert_rule; + int skipto = mtag ? divert_cookie(mtag) : 0; f = chain->rules; if (args->eh == NULL && skipto != 0) { @@ -1583,7 +1586,9 @@ after_ip_checks: } } } - args->divert_rule = 0; /* reset to avoid confusion later */ + /* reset divert rule to avoid confusion later */ + if (mtag) + m_tag_delete(m, mtag); /* * Now scan the rules, and parse microinstructions for each rule. @@ -2018,14 +2023,29 @@ check_body: goto done; case O_DIVERT: - case O_TEE: + case O_TEE: { + struct divert_tag *dt; + if (args->eh) /* not on layer 2 */ break; - args->divert_rule = f->rulenum; - retval = (cmd->opcode == O_DIVERT) ? + mtag = m_tag_get(PACKET_TAG_DIVERT, + sizeof(struct divert_tag), + M_NOWAIT); + if (mtag == NULL) { + /* XXX statistic */ + /* drop packet */ + IPFW_UNLOCK(chain); + return IP_FW_PORT_DENY_FLAG; + } + dt = (struct divert_tag *)(mtag+1); + dt->cookie = f->rulenum; + dt->info = (cmd->opcode == O_DIVERT) ? cmd->arg1 : cmd->arg1 | IP_FW_PORT_TEE_FLAG; + m_tag_prepend(m, mtag); + retval = dt->info; goto done; + } case O_COUNT: case O_SKIPTO: diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index e82b1ad..5c9727d 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -78,6 +78,7 @@ #include #include +#include #include #ifdef IPSEC @@ -239,8 +240,7 @@ static int ip_dooptions(struct mbuf *m, int, static void ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop); static void ip_freef(struct ipqhead *, struct ipq *); -static struct mbuf *ip_reass(struct mbuf *, struct ipqhead *, - struct ipq *, u_int32_t *, u_int16_t *); +static struct mbuf *ip_reass(struct mbuf *, struct ipqhead *, struct ipq *); /* * IP initialization: fill in IP protocol switch table. @@ -300,10 +300,11 @@ ip_input(struct mbuf *m) struct in_ifaddr *ia = NULL; struct ifaddr *ifa; int i, checkif, hlen = 0; - int ours = 0; u_short sum; struct in_addr pkt_dst; - u_int32_t divert_info = 0; /* packet divert/tee info */ +#ifdef IPDIVERT + u_int32_t divert_info; /* packet divert/tee info */ +#endif struct ip_fw_args args; int dchg = 0; /* dest changed after fw */ #ifdef PFIL_HOOKS @@ -318,56 +319,17 @@ ip_input(struct mbuf *m) args.eh = NULL; args.oif = NULL; - args.rule = NULL; - args.divert_rule = 0; /* divert cookie */ - args.next_hop = NULL; - - /* - * Grab info from MT_TAG mbufs prepended to the chain. - * - * XXX: This is ugly. These pseudo mbuf prepend tags should really - * be real m_tags. Before these have always been allocated on the - * callers stack, so we didn't have to free them. Now with - * ip_fastforward they are true mbufs and we have to free them - * otherwise we have a leak. Must rewrite ipfw to use m_tags. - */ - for (; m && m->m_type == MT_TAG;) { - struct mbuf *m0; - - switch(m->_m_tag_id) { - default: - printf("ip_input: unrecognised MT_TAG tag %d\n", - m->_m_tag_id); - break; - - case PACKET_TAG_DUMMYNET: - args.rule = ((struct dn_pkt *)m)->rule; - break; - case PACKET_TAG_DIVERT: - args.divert_rule = (intptr_t)m->m_hdr.mh_data & 0xffff; - break; - - case PACKET_TAG_IPFORWARD: - args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data; - break; - - case PACKET_TAG_IPFASTFWD_OURS: - ours = 1; - break; - } - - m0 = m; - m = m->m_next; - /* XXX: This is set by ip_fastforward */ - if (m0->m_nextpkt == (struct mbuf *)1) - m_free(m0); - } - M_ASSERTPKTHDR(m); + + args.next_hop = ip_claim_next_hop(m); + args.rule = ip_dn_claim_rule(m); - if (ours) /* ip_fastforward firewall changed dest to local */ + if (m->m_flags & M_FASTFWD_OURS) { + /* ip_fastforward firewall changed dest to local */ + m->m_flags &= ~M_FASTFWD_OURS; /* for reflected mbufs */ goto ours; + } if (args.rule) { /* dummynet already filtered us */ ip = mtod(m, struct ip *); @@ -531,7 +493,6 @@ iphack: #ifdef IPDIVERT if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) { /* Divert or tee packet */ - divert_info = i; goto ours; } #endif @@ -839,13 +800,11 @@ found: /* * Attempt reassembly; if it succeeds, proceed. - * ip_reass() will return a different mbuf, and update - * the divert info in divert_info and args.divert_rule. + * ip_reass() will return a different mbuf. */ ipstat.ips_fragments++; m->m_pkthdr.header = ip; - m = ip_reass(m, - &ipq[sum], fp, &divert_info, &args.divert_rule); + m = ip_reass(m, &ipq[sum], fp); IPQ_UNLOCK(); if (m == 0) return; @@ -855,7 +814,7 @@ found: hlen = ip->ip_hl << 2; #ifdef IPDIVERT /* Restore original checksum before diverting packet */ - if (divert_info != 0) { + if (divert_find_info(m) != 0) { ip->ip_len += hlen; ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); @@ -876,12 +835,15 @@ found: /* * Divert or tee packet to the divert protocol if required. */ + divert_info = divert_find_info(m); if (divert_info != 0) { - struct mbuf *clone = NULL; + struct mbuf *clone; /* Clone packet if we're doing a 'tee' */ if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0) - clone = m_dup(m, M_DONTWAIT); + clone = divert_clone(m); + else + clone = NULL; /* Restore packet header fields to original values */ ip->ip_len += hlen; @@ -889,7 +851,7 @@ found: ip->ip_off = htons(ip->ip_off); /* Deliver packet to divert input routine */ - divert_packet(m, 1, divert_info & 0xffff, args.divert_rule); + divert_packet(m, 1); ipstat.ips_delivered++; /* If 'tee', continue with original packet */ @@ -900,12 +862,11 @@ found: ip->ip_len += hlen; /* * Jump backwards to complete processing of the - * packet. But first clear divert_info to avoid - * entering this block again. - * We do not need to clear args.divert_rule - * or args.next_hop as they will not be used. + * packet. We do not need to clear args.next_hop + * as that will not be used again and the cloned packet + * doesn't contain a divert packet tag so we won't + * re-entry this block. */ - divert_info = 0; goto pass; } #endif @@ -966,21 +927,17 @@ DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ * Switch out to protocol's input routine. */ ipstat.ips_delivered++; - NET_PICKUP_GIANT(); if (args.next_hop && ip->ip_p == IPPROTO_TCP) { - /* TCP needs IPFORWARD info if available */ - struct m_hdr tag; - - tag.mh_type = MT_TAG; - tag.mh_flags = PACKET_TAG_IPFORWARD; - tag.mh_data = (caddr_t)args.next_hop; - tag.mh_next = m; - tag.mh_nextpkt = NULL; - - (*inetsw[ip_protox[ip->ip_p]].pr_input)( - (struct mbuf *)&tag, hlen); - } else - (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen); + /* attach next hop info for TCP */ + struct m_tag *mtag = m_tag_get(PACKET_TAG_IPFORWARD, + sizeof(struct sockaddr_in *), M_NOWAIT); + if (mtag == NULL) + goto bad; + *(struct sockaddr_in **)(mtag+1) = args.next_hop; + m_tag_prepend(m, mtag); + } + NET_PICKUP_GIANT(); + (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen); NET_DROP_GIANT(); return; bad: @@ -999,8 +956,7 @@ bad: */ static struct mbuf * -ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp, - u_int32_t *divinfo, u_int16_t *divert_rule) +ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp) { struct ip *ip = mtod(m, struct ip *); register struct mbuf *p, *q, *nq; @@ -1042,10 +998,6 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp, fp->ipq_dst = ip->ip_dst; fp->ipq_frags = m; m->m_nextpkt = NULL; -#ifdef IPDIVERT - fp->ipq_div_info = 0; - fp->ipq_div_cookie = 0; -#endif goto inserted; } else { fp->ipq_nfrags++; @@ -1129,16 +1081,15 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp, inserted: #ifdef IPDIVERT - /* - * Transfer firewall instructions to the fragment structure. - * Only trust info in the fragment at offset 0. - */ - if (ip->ip_off == 0) { - fp->ipq_div_info = *divinfo; - fp->ipq_div_cookie = *divert_rule; + if (ip->ip_off != 0) { + /* + * Strip any divert information; only the info + * on the first fragment is used/kept. + */ + struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); + if (mtag) + m_tag_delete(m, mtag); } - *divinfo = 0; - *divert_rule = 0; #endif /* @@ -1204,14 +1155,6 @@ inserted: mac_destroy_ipq(fp); #endif -#ifdef IPDIVERT - /* - * Extract firewall instructions from the fragment structure. - */ - *divinfo = fp->ipq_div_info; - *divert_rule = fp->ipq_div_cookie; -#endif - /* * Create header for new ip packet by * modifying header of first packet; @@ -1232,10 +1175,6 @@ inserted: return (m); dropfrag: -#ifdef IPDIVERT - *divinfo = 0; - *divert_rule = 0; -#endif ipstat.ips_fragdropped++; if (fp != NULL) fp->ipq_nfrags--; @@ -1914,17 +1853,15 @@ ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) } { - struct m_hdr tag; - if (next_hop) { - /* Pass IPFORWARD info if available */ - - tag.mh_type = MT_TAG; - tag.mh_flags = PACKET_TAG_IPFORWARD; - tag.mh_data = (caddr_t)next_hop; - tag.mh_next = m; - tag.mh_nextpkt = NULL; - m = (struct mbuf *)&tag; + struct m_tag *mtag = m_tag_get(PACKET_TAG_IPFORWARD, + sizeof(struct sockaddr_in *), M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + return; + } + *(struct sockaddr_in **)(mtag+1) = next_hop; + m_tag_prepend(m, mtag); } error = ip_output(m, (struct mbuf *)0, NULL, IP_FORWARDING, 0, NULL); } diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 368d3c5..abf570f 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -90,6 +90,7 @@ static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); #endif /*FAST_IPSEC*/ #include +#include #include #define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ @@ -130,12 +131,12 @@ extern struct protosw inetsw[]; * inserted, so must have a NULL opt pointer. */ int -ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, +ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp) { struct ip *ip; struct ifnet *ifp = NULL; /* keep compiler happy */ - struct mbuf *m; + struct mbuf *m0; int hlen = sizeof (struct ip); int len, off, error = 0; struct sockaddr_in *dst = NULL; /* keep compiler happy */ @@ -143,6 +144,7 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, int isbroadcast, sw_csum; struct in_addr pkt_dst; struct route iproute; + struct m_tag *dummytag; #ifdef IPSEC struct secpolicy *sp = NULL; #endif @@ -157,44 +159,28 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro, args.eh = NULL; args.rule = NULL; - args.next_hop = NULL; - args.divert_rule = 0; /* divert cookie */ - - /* Grab info from MT_TAG mbufs prepended to the chain. */ - for (; m0 && m0->m_type == MT_TAG; m0 = m0->m_next) { - switch(m0->_m_tag_id) { - default: - printf("ip_output: unrecognised MT_TAG tag %d\n", - m0->_m_tag_id); - break; - - case PACKET_TAG_DUMMYNET: - /* - * the packet was already tagged, so part of the - * processing was already done, and we need to go down. - * Get parameters from the header. - */ - args.rule = ((struct dn_pkt *)m0)->rule; - opt = NULL ; - ro = & ( ((struct dn_pkt *)m0)->ro ) ; - imo = NULL ; - dst = ((struct dn_pkt *)m0)->dn_dst ; - ifp = ((struct dn_pkt *)m0)->ifp ; - flags = ((struct dn_pkt *)m0)->flags ; - break; - - case PACKET_TAG_DIVERT: - args.divert_rule = (intptr_t)m0->m_data & 0xffff; - break; - - case PACKET_TAG_IPFORWARD: - args.next_hop = (struct sockaddr_in *)m0->m_data; - break; - } - } - m = m0; M_ASSERTPKTHDR(m); + + args.next_hop = ip_claim_next_hop(m); + dummytag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); + if (dummytag != NULL) { + struct dn_pkt_tag *dt = (struct dn_pkt_tag *)(dummytag+1); + /* + * Prevent lower layers from finding the tag + * Cleanup and free is done below + */ + m_tag_unlink(m, dummytag); + /* + * the packet was already tagged, so part of the + * processing was already done, and we need to go down. + * Get parameters from the header. + */ + args.rule = dt->rule; + ro = &(dt->ro); + dst = dt->dn_dst; + ifp = dt->ifp; + } if (ro == NULL) { ro = &iproute; @@ -557,7 +543,7 @@ sendit: dst = (struct sockaddr_in *)state.dst; if (error) { /* mbuf is already reclaimed in ipsec4_output. */ - m0 = NULL; + m = NULL; switch (error) { case EHOSTUNREACH: case ENETUNREACH: @@ -797,11 +783,13 @@ spd_done: } #ifdef IPDIVERT if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { - struct mbuf *clone = NULL; + struct mbuf *clone; /* Clone packet if we're doing a 'tee' */ if ((off & IP_FW_PORT_TEE_FLAG) != 0) - clone = m_dup(m, M_DONTWAIT); + clone = divert_clone(m); + else + clone = NULL; /* * XXX @@ -818,7 +806,7 @@ spd_done: ip->ip_off = htons(ip->ip_off); /* Deliver packet to divert input routine */ - divert_packet(m, 0, off & 0xffff, args.divert_rule); + divert_packet(m, 0); /* If 'tee', continue with original packet */ if (clone != NULL) { @@ -896,26 +884,29 @@ spd_done: break; } if (ia) { /* tell ip_input "dont filter" */ - struct m_hdr tag; - - tag.mh_type = MT_TAG; - tag.mh_flags = PACKET_TAG_IPFORWARD; - tag.mh_data = (caddr_t)args.next_hop; - tag.mh_next = m; - tag.mh_nextpkt = NULL; + struct m_tag *mtag = m_tag_get( + PACKET_TAG_IPFORWARD, + sizeof(struct sockaddr_in *), M_NOWAIT); + if (mtag == NULL) { + error = ENOBUFS; + goto bad; + } + *(struct sockaddr_in **)(mtag+1) = + args.next_hop; + m_tag_prepend(m, mtag); if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = ifunit("lo0"); if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m0->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_data = 0xffff; } m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); - ip_input((struct mbuf *)&tag); + ip_input(m); goto done; } /* @@ -1072,6 +1063,12 @@ done: RTFREE(ro->ro_rt); ro->ro_rt = NULL; } + if (dummytag) { + struct dn_pkt_tag *dt = (struct dn_pkt_tag *)(dummytag+1); + if (dt->ro.ro_rt) + RTFREE(dt->ro.ro_rt); + m_tag_free(dummytag); + } #ifdef IPSEC if (sp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 4bd600d..52ddffb 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -65,8 +65,6 @@ struct ipq { struct mbuf *ipq_frags; /* to ip headers of fragments */ struct in_addr ipq_src,ipq_dst; u_char ipq_nfrags; /* # frags in this packet */ - u_int32_t ipq_div_info; /* ipfw divert port & flags */ - u_int16_t ipq_div_cookie; /* ipfw divert cookie */ struct label *ipq_label; /* MAC label */ }; #endif /* _KERNEL */ @@ -139,6 +137,9 @@ struct ipstat { #define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables */ #define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */ +/* mbuf flag used by ip_fastfwd */ +#define M_FASTFWD_OURS M_PROTO1 /* changed dst to local */ + struct ip; struct inpcb; struct route; @@ -197,14 +198,21 @@ extern int (*ip_rsvp_vif)(struct socket *, struct sockopt *); extern void (*ip_rsvp_force_done)(struct socket *); extern void (*rsvp_input_p)(struct mbuf *m, int off); - -#ifdef IPDIVERT -void div_init(void); -void div_input(struct mbuf *, int); -void div_ctlinput(int, struct sockaddr *, void *); -void divert_packet(struct mbuf *m, int incoming, int port, int rule); -extern struct pr_usrreqs div_usrreqs; -#endif +/* + * Obtain next_hop information asociated with the mbuf; if any. + * If a tag is present devalidate it also. + */ +static __inline struct sockaddr_in * +ip_claim_next_hop(struct mbuf *m) +{ + struct m_tag *mtag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); + if (mtag) { + struct sockaddr_in *sin = *(struct sockaddr_in **)(mtag+1); + mtag->m_tag_id = PACKET_TAG_NONE; + return sin; + } else + return NULL; +} #ifdef PFIL_HOOKS extern struct pfil_head inet_pfil_hook; diff --git a/sys/netinet/tcp_debug.c b/sys/netinet/tcp_debug.c index 89e9d7c..f6743fd 100644 --- a/sys/netinet/tcp_debug.c +++ b/sys/netinet/tcp_debug.c @@ -52,6 +52,7 @@ #include #include +#include #include #include diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 2b4c564..1fe6ef4 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -448,11 +448,8 @@ tcp_input(m, off0) short ostate = 0; #endif - /* Grab info from MT_TAG mbufs prepended to the chain. */ - for (;m && m->m_type == MT_TAG; m = m->m_next) { - if (m->_m_tag_id == PACKET_TAG_IPFORWARD) - next_hop = (struct sockaddr_in *)m->m_hdr.mh_data; - } + /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ + next_hop = ip_claim_next_hop(m); #ifdef INET6 isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; #endif diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index 2b4c564..1fe6ef4 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -448,11 +448,8 @@ tcp_input(m, off0) short ostate = 0; #endif - /* Grab info from MT_TAG mbufs prepended to the chain. */ - for (;m && m->m_type == MT_TAG; m = m->m_next) { - if (m->_m_tag_id == PACKET_TAG_IPFORWARD) - next_hop = (struct sockaddr_in *)m->m_hdr.mh_data; - } + /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ + next_hop = ip_claim_next_hop(m); #ifdef INET6 isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0; #endif -- cgit v1.1