summaryrefslogtreecommitdiffstats
path: root/sys/netinet/ip_output.c
diff options
context:
space:
mode:
authorluigi <luigi@FreeBSD.org>2002-11-17 16:30:44 +0000
committerluigi <luigi@FreeBSD.org>2002-11-17 16:30:44 +0000
commitacc6f4edfb35c2d6a47d1c2462ce45cb86e11312 (patch)
tree9b3da047164eb37459949f87d8d1b3491813f37d /sys/netinet/ip_output.c
parenta98b300bf38c0cb695bbfd14a0f5389699880f00 (diff)
downloadFreeBSD-src-acc6f4edfb35c2d6a47d1c2462ce45cb86e11312.zip
FreeBSD-src-acc6f4edfb35c2d6a47d1c2462ce45cb86e11312.tar.gz
Move the ip_fragment code from ip_output() to a separate function,
so that it can be reused elsewhere (there is a number of places where it can be useful). This also trims some 200 lines from the body of ip_output(), which helps readability a bit. (This change was discussed a few weeks ago on the mailing lists, Julian agreed, silence from others. It is not a functional change, so i expect it to be ok to commit it now but i am happy to back it out if there are objections). While at it, fix some function headers and replace m_copy() with m_copypacket() where applicable. MFC after: 1 week
Diffstat (limited to 'sys/netinet/ip_output.c')
-rw-r--r--sys/netinet/ip_output.c311
1 files changed, 168 insertions, 143 deletions
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index 4c19090..1ed5459 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -116,15 +116,10 @@ extern struct protosw inetsw[];
* The mbuf opt, if present, will not be freed.
*/
int
-ip_output(m0, opt, ro, flags, imo, inp)
- struct mbuf *m0;
- struct mbuf *opt;
- struct route *ro;
- int flags;
- struct ip_moptions *imo;
- struct inpcb *inp;
+ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
+ int flags, struct ip_moptions *imo, struct inpcb *inp)
{
- struct ip *ip, *mhip;
+ struct ip *ip;
struct ifnet *ifp = NULL; /* keep compiler happy */
struct mbuf *m;
int hlen = sizeof (struct ip);
@@ -458,7 +453,7 @@ ip_output(m0, opt, ro, flags, imo, inp)
goto bad;
}
/* don't allow broadcast messages to be fragmented */
- if ((u_short)ip->ip_len > ifp->if_mtu) {
+ if (ip->ip_len > ifp->if_mtu) {
error = EMSGSIZE;
goto bad;
}
@@ -985,8 +980,7 @@ pass:
* If small enough for interface, or the interface will take
* care of the fragmentation for us, can just send directly.
*/
- if ((u_short)ip->ip_len <= ifp->if_mtu ||
- ifp->if_hwassist & CSUM_FRAGMENT) {
+ if (ip->ip_len <= ifp->if_mtu || ifp->if_hwassist & CSUM_FRAGMENT) {
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
@@ -1008,10 +1002,6 @@ pass:
(struct sockaddr *)dst, ro->ro_rt);
goto done;
}
- /*
- * Too large for interface; fragment if possible.
- * Must be able to put at least 8 bytes per fragment.
- */
if (ip->ip_off & IP_DF) {
error = EMSGSIZE;
/*
@@ -1029,38 +1019,122 @@ pass:
ipstat.ips_cantfrag++;
goto bad;
}
- len = (ifp->if_mtu - hlen) &~ 7;
- if (len < 8) {
- error = EMSGSIZE;
+ /*
+ * Too large for interface; fragment if possible. If successful,
+ * on return m will point to a list of packets to be sent.
+ */
+ error = ip_fragment(ip, &m, ifp->if_mtu, ifp->if_hwassist, sw_csum);
+ if (error)
goto bad;
+ for (; m; m = m0) {
+ m0 = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+#ifdef IPSEC
+ /* clean ipsec history once it goes out of the node */
+ ipsec_delaux(m);
+#endif
+ if (error == 0) {
+ /* Record statistics for this interface address. */
+ if (ia != NULL) {
+ ia->ia_ifa.if_opackets++;
+ ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+ }
+
+ error = (*ifp->if_output)(ifp, m,
+ (struct sockaddr *)dst, ro->ro_rt);
+ } else
+ m_freem(m);
+ }
+
+ if (error == 0)
+ ipstat.ips_fragmented++;
+
+done:
+#ifdef IPSEC
+ if (ro == &iproute && ro->ro_rt) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = NULL;
}
+ if (sp != NULL) {
+ KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
+ printf("DP ip_output call free SP:%p\n", sp));
+ key_freesp(sp);
+ }
+#endif /* IPSEC */
+#ifdef FAST_IPSEC
+ if (ro == &iproute && ro->ro_rt) {
+ RTFREE(ro->ro_rt);
+ ro->ro_rt = NULL;
+ }
+ if (sp != NULL)
+ KEY_FREESP(&sp);
+#endif /* FAST_IPSEC */
+ return (error);
+bad:
+ m_freem(m);
+ goto done;
+}
+
+/*
+ * Create a chain of fragments which fit the given mtu. m_frag points to the
+ * mbuf to be fragmented; on return it points to the chain with the fragments.
+ * Return 0 if no error.
+ *
+ * if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
+ * sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
+ */
+int
+ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
+ u_long if_hwassist_flags, int sw_csum)
+{
+ int error = 0;
+ int hlen = ip->ip_hl << 2;
+ int len = (mtu - hlen) &~ 7; /* size of payload in each fragment */
+ int off;
+ struct mbuf *m0 = *m_frag; /* the original packet */
+ int firstlen;
+ struct mbuf **mnext;
+ int nfrags;
+
+ if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
+ ipstat.ips_cantfrag++;
+ return EMSGSIZE;
+ }
+
+ /*
+ * Must be able to put at least 8 bytes per fragment.
+ */
+ if (len < 8)
+ return EMSGSIZE;
/*
* if the interface will not calculate checksums on
* fragmented packets, then do it here.
*/
- if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
- (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
- in_delayed_cksum(m);
- m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+ if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
+ (if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
+ in_delayed_cksum(m0);
+ m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
if (len > PAGE_SIZE) {
/*
- * Fragement large datagrams such that each segment
+ * Fragment large datagrams such that each segment
* contains a multiple of PAGE_SIZE amount of data,
* plus headers. This enables a receiver to perform
* page-flipping zero-copy optimizations.
+ *
+ * XXX When does this help given that sender and receiver
+ * could have different page sizes, and also mtu could
+ * be less than the receiver's page size ?
*/
int newlen;
- struct mbuf *mtmp;
+ struct mbuf *m;
- for (mtmp = m, off = 0;
- mtmp && ((off + mtmp->m_len) <= ifp->if_mtu);
- mtmp = mtmp->m_next) {
- off += mtmp->m_len;
- }
+ for (m = m0, off = 0; m && ((off + m->m_len) <= mtu);
+ m = m->m_next)
+ off += m->m_len;
/*
* firstlen (off - hlen) must be aligned on an
* 8-byte boundary
@@ -1068,44 +1142,46 @@ pass:
if (off < hlen)
goto smart_frag_failure;
off = ((off - hlen) & ~7) + hlen;
- newlen = (~PAGE_MASK) & ifp->if_mtu;
- if ((newlen + sizeof (struct ip)) > ifp->if_mtu) {
+ newlen = (~PAGE_MASK) & mtu;
+ if ((newlen + sizeof (struct ip)) > mtu) {
/* we failed, go back the default */
smart_frag_failure:
newlen = len;
off = hlen + len;
}
-
-/* printf("ipfrag: len = %d, hlen = %d, mhlen = %d, newlen = %d, off = %d\n",
- len, hlen, sizeof (struct ip), newlen, off);*/
-
len = newlen;
} else {
off = hlen + len;
}
-
-
- {
- int mhlen, firstlen = off - hlen;
- struct mbuf **mnext = &m->m_nextpkt;
- int nfrags = 1;
+ firstlen = off - hlen;
+ mnext = &m0->m_nextpkt; /* pointer to next packet */
/*
* Loop through length of segment after first fragment,
* make new header and copy data of each part and link onto chain.
+ * Here, m0 is the original packet, m is the fragment being created.
+ * The fragments are linked off the m_nextpkt of the original
+ * packet, which after processing serves as the first fragment.
*/
- m0 = m;
- mhlen = sizeof (struct ip);
- for (; off < (u_short)ip->ip_len; off += len) {
+ for (nfrags=1; off < ip->ip_len; off += len, nfrags++) {
+ struct ip *mhip; /* ip header on the fragment */
+ struct mbuf *m;
+ int mhlen = sizeof (struct ip);
+
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (m == 0) {
error = ENOBUFS;
ipstat.ips_odropped++;
- goto sendorfree;
+ goto done;
}
m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
+ /*
+ * In the first mbuf, leave room for the link header, then
+ * copy the original IP header including options. The payload
+ * goes into an additional mbuf chain returned by m_copy().
+ */
m->m_data += max_linkhdr;
mhip = mtod(m, struct ip *);
*mhip = *ip;
@@ -1115,18 +1191,20 @@ smart_frag_failure:
mhip->ip_hl = mhlen >> 2;
}
m->m_len = mhlen;
+ /* XXX do we need to add ip->ip_off below ? */
mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
- if (off + len >= (u_short)ip->ip_len)
- len = (u_short)ip->ip_len - off;
- else
+ if (off + len >= ip->ip_len) { /* last fragment */
+ len = ip->ip_len - off;
+ m->m_flags |= M_LASTFRAG;
+ } else
mhip->ip_off |= IP_MF;
mhip->ip_len = htons((u_short)(len + mhlen));
m->m_next = m_copy(m0, off, len);
- if (m->m_next == 0) {
- (void) m_free(m);
+ if (m->m_next == 0) { /* copy failed */
+ m_free(m);
error = ENOBUFS; /* ??? */
ipstat.ips_odropped++;
- goto sendorfree;
+ goto done;
}
m->m_pkthdr.len = mhlen + len;
m->m_pkthdr.rcvif = (struct ifnet *)0;
@@ -1140,76 +1218,42 @@ smart_frag_failure:
mhip->ip_sum = in_cksum(m, mhlen);
*mnext = m;
mnext = &m->m_nextpkt;
- nfrags++;
}
ipstat.ips_ofragments += nfrags;
- /* set first/last markers for fragment chain */
- m->m_flags |= M_LASTFRAG;
+ /* set first markers for fragment chain */
m0->m_flags |= M_FIRSTFRAG | M_FRAG;
m0->m_pkthdr.csum_data = nfrags;
/*
- * Update first fragment by trimming what's been copied out
- * and updating header, then send each fragment (in order).
+ * Update first fragment by trimming what has been copied out
+ * and updating header.
*/
- m = m0;
- m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
- m->m_pkthdr.len = hlen + firstlen;
- ip->ip_len = htons((u_short)m->m_pkthdr.len);
+ m_adj(m0, hlen + firstlen - ip->ip_len);
+ m0->m_pkthdr.len = hlen + firstlen;
+ ip->ip_len = htons((u_short)m0->m_pkthdr.len);
ip->ip_off |= IP_MF;
ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
- ip->ip_sum = in_cksum(m, hlen);
-sendorfree:
- for (m = m0; m; m = m0) {
- m0 = m->m_nextpkt;
- m->m_nextpkt = 0;
-#ifdef IPSEC
- /* clean ipsec history once it goes out of the node */
- ipsec_delaux(m);
-#endif
- if (error == 0) {
- /* Record statistics for this interface address. */
- if (ia != NULL) {
- ia->ia_ifa.if_opackets++;
- ia->ia_ifa.if_obytes += m->m_pkthdr.len;
- }
-
- error = (*ifp->if_output)(ifp, m,
- (struct sockaddr *)dst, ro->ro_rt);
- } else
- m_freem(m);
- }
+ ip->ip_sum = in_cksum(m0, hlen);
+ *m_frag = m0;
- if (error == 0)
- ipstat.ips_fragmented++;
- }
done:
+ if (error) {
+ struct mbuf *m;
+
+ for (m = m0; m; m = m0) {
+ m0 = m->m_nextpkt;
+ m->m_nextpkt = 0;
#ifdef IPSEC
- if (ro == &iproute && ro->ro_rt) {
- RTFREE(ro->ro_rt);
- ro->ro_rt = NULL;
- }
- if (sp != NULL) {
- KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
- printf("DP ip_output call free SP:%p\n", sp));
- key_freesp(sp);
- }
-#endif /* IPSEC */
-#ifdef FAST_IPSEC
- if (ro == &iproute && ro->ro_rt) {
- RTFREE(ro->ro_rt);
- ro->ro_rt = NULL;
+ /* clean ipsec history */
+ ipsec_delaux(m);
+#endif
+ m_freem(m);
+ }
}
- if (sp != NULL)
- KEY_FREESP(&sp);
-#endif /* FAST_IPSEC */
- return (error);
-bad:
- m_freem(m);
- goto done;
+ return error;
}
void
@@ -1247,18 +1291,15 @@ in_delayed_cksum(struct mbuf *m)
* XXX This routine assumes that the packet has no options in place.
*/
static struct mbuf *
-ip_insertoptions(m, opt, phlen)
- register struct mbuf *m;
- struct mbuf *opt;
- int *phlen;
+ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
{
- register struct ipoption *p = mtod(opt, struct ipoption *);
+ struct ipoption *p = mtod(opt, struct ipoption *);
struct mbuf *n;
- register struct ip *ip = mtod(m, struct ip *);
+ struct ip *ip = mtod(m, struct ip *);
unsigned optlen;
optlen = opt->m_len - sizeof(p->ipopt_dst);
- if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) {
+ if (optlen + ip->ip_len > IP_MAXPACKET) {
*phlen = 0;
return (m); /* XXX should fail */
}
@@ -1302,10 +1343,9 @@ ip_insertoptions(m, opt, phlen)
* omitting those not copied during fragmentation.
*/
int
-ip_optcopy(ip, jp)
- struct ip *ip, *jp;
+ip_optcopy(struct ip *ip, struct ip *jp)
{
- register u_char *cp, *dp;
+ u_char *cp, *dp;
int opt, optlen, cnt;
cp = (u_char *)(ip + 1);
@@ -1345,9 +1385,7 @@ ip_optcopy(ip, jp)
* IP socket option processing.
*/
int
-ip_ctloutput(so, sopt)
- struct socket *so;
- struct sockopt *sopt;
+ip_ctloutput(struct socket *so, struct sockopt *sopt)
{
struct inpcb *inp = sotoinpcb(so);
int error, optval;
@@ -1608,13 +1646,10 @@ ip_ctloutput(so, sopt)
* with destination address if source routed.
*/
static int
-ip_pcbopts(optname, pcbopt, m)
- int optname;
- struct mbuf **pcbopt;
- register struct mbuf *m;
+ip_pcbopts(int optname, struct mbuf **pcbopt, struct mbuf *m)
{
- register int cnt, optlen;
- register u_char *cp;
+ int cnt, optlen;
+ u_char *cp;
u_char opt;
/* turn off any old options */
@@ -1717,9 +1752,7 @@ bad:
* following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
*/
static struct ifnet *
-ip_multicast_if(a, ifindexp)
- struct in_addr *a;
- int *ifindexp;
+ip_multicast_if(struct in_addr *a, int *ifindexp)
{
int ifindex;
struct ifnet *ifp;
@@ -1743,9 +1776,7 @@ ip_multicast_if(a, ifindexp)
* Set the IP multicast options in response to user setsockopt().
*/
static int
-ip_setmoptions(sopt, imop)
- struct sockopt *sopt;
- struct ip_moptions **imop;
+ip_setmoptions(struct sockopt *sopt, struct ip_moptions **imop)
{
int error = 0;
int i;
@@ -2041,9 +2072,7 @@ ip_setmoptions(sopt, imop)
* Return the IP multicast options in response to user getsockopt().
*/
static int
-ip_getmoptions(sopt, imo)
- struct sockopt *sopt;
- register struct ip_moptions *imo;
+ip_getmoptions(struct sockopt *sopt, struct ip_moptions *imo)
{
struct in_addr addr;
struct in_ifaddr *ia;
@@ -2107,10 +2136,9 @@ ip_getmoptions(sopt, imo)
* Discard the IP multicast options.
*/
void
-ip_freemoptions(imo)
- register struct ip_moptions *imo;
+ip_freemoptions(struct ip_moptions *imo)
{
- register int i;
+ int i;
if (imo != NULL) {
for (i = 0; i < imo->imo_num_memberships; ++i)
@@ -2127,16 +2155,13 @@ ip_freemoptions(imo)
* replicating that code here.
*/
static void
-ip_mloopback(ifp, m, dst, hlen)
- struct ifnet *ifp;
- register struct mbuf *m;
- register struct sockaddr_in *dst;
- int hlen;
+ip_mloopback(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr_in *dst, int hlen)
{
- register struct ip *ip;
+ struct ip *ip;
struct mbuf *copym;
- copym = m_copy(m, 0, M_COPYALL);
+ copym = m_copypacket(m, M_DONTWAIT);
if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
copym = m_pullup(copym, hlen);
if (copym != NULL) {
OpenPOWER on IntegriCloud