summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorume <ume@FreeBSD.org>2003-10-29 15:07:04 +0000
committerume <ume@FreeBSD.org>2003-10-29 15:07:04 +0000
commitb9fecc82d3e55cefb5fd427307272fed377b780a (patch)
treeff2af6160ee3c3b0bf6e218dc2107ae003e82cde /sys
parentf965698ed4683de29221f38b96189223a4cf0b2e (diff)
downloadFreeBSD-src-b9fecc82d3e55cefb5fd427307272fed377b780a.zip
FreeBSD-src-b9fecc82d3e55cefb5fd427307272fed377b780a.tar.gz
add ECN support in layer-3.
- implement the tunnel egress rule in ip_ecn_egress() in ip_ecn.c. make ip{,6}_ecn_egress() return integer to tell the caller that this packet should be dropped. - handle ECN at fragment reassembly in ip_input.c and frag6.c. Obtained from: KAME
Diffstat (limited to 'sys')
-rw-r--r--sys/netinet/in_gif.c37
-rw-r--r--sys/netinet/ip.h19
-rw-r--r--sys/netinet/ip_ecn.c84
-rw-r--r--sys/netinet/ip_ecn.h8
-rw-r--r--sys/netinet/ip_input.c17
-rw-r--r--sys/netinet6/ah_input.c10
-rw-r--r--sys/netinet6/esp_input.c10
-rw-r--r--sys/netinet6/frag6.c23
-rw-r--r--sys/netinet6/in6_gif.c28
-rw-r--r--sys/netinet6/ip6_ecn.h4
10 files changed, 186 insertions, 54 deletions
diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c
index 3d2ff66..fd6397e 100644
--- a/sys/netinet/in_gif.c
+++ b/sys/netinet/in_gif.c
@@ -161,10 +161,8 @@ in_gif_output(ifp, family, m)
/* version will be set in ip_output() */
iphdr.ip_ttl = ip_gif_ttl;
iphdr.ip_len = m->m_pkthdr.len + sizeof(struct ip);
- if (ifp->if_flags & IFF_LINK1)
- ip_ecn_ingress(ECN_ALLOWED, &iphdr.ip_tos, &tos);
- else
- ip_ecn_ingress(ECN_NOCARE, &iphdr.ip_tos, &tos);
+ ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
+ &iphdr.ip_tos, &tos);
/* prepend new IP header */
M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
@@ -250,10 +248,12 @@ in_gif_input(m, off)
return;
}
ip = mtod(m, struct ip *);
- if (gifp->if_flags & IFF_LINK1)
- ip_ecn_egress(ECN_ALLOWED, &otos, &ip->ip_tos);
- else
- ip_ecn_egress(ECN_NOCARE, &otos, &ip->ip_tos);
+ if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
+ ECN_ALLOWED : ECN_NOCARE,
+ &otos, &ip->ip_tos) == 0) {
+ m_freem(m);
+ return;
+ }
break;
}
#endif
@@ -261,7 +261,8 @@ in_gif_input(m, off)
case IPPROTO_IPV6:
{
struct ip6_hdr *ip6;
- u_int8_t itos;
+ u_int8_t itos, oitos;
+
af = AF_INET6;
if (m->m_len < sizeof(*ip6)) {
m = m_pullup(m, sizeof(*ip6));
@@ -269,13 +270,17 @@ in_gif_input(m, off)
return;
}
ip6 = mtod(m, struct ip6_hdr *);
- itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
- if (gifp->if_flags & IFF_LINK1)
- ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
- else
- ip_ecn_egress(ECN_NOCARE, &otos, &itos);
- ip6->ip6_flow &= ~htonl(0xff << 20);
- ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
+ itos = oitos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+ if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
+ ECN_ALLOWED : ECN_NOCARE,
+ &otos, &itos) == 0) {
+ m_freem(m);
+ return;
+ }
+ if (itos != oitos) {
+ ip6->ip6_flow &= ~htonl(0xff << 20);
+ ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
+ }
break;
}
#endif /* INET6 */
diff --git a/sys/netinet/ip.h b/sys/netinet/ip.h
index 24f66ce..025ad08 100644
--- a/sys/netinet/ip.h
+++ b/sys/netinet/ip.h
@@ -84,10 +84,11 @@ CTASSERT(sizeof (struct ip) == 20);
#define IPTOS_THROUGHPUT 0x08
#define IPTOS_RELIABILITY 0x04
#define IPTOS_MINCOST 0x02
-/* ECN bits proposed by Sally Floyd */
-#define IPTOS_CE 0x01 /* congestion experienced */
-#define IPTOS_ECT 0x02 /* ECN-capable transport */
-
+#if 1
+/* ECN RFC3168 obsoletes RFC2481, and these will be deprecated soon. */
+#define IPTOS_CE 0x01
+#define IPTOS_ECT 0x02
+#endif
/*
* Definitions for IP precedence (also in ip_tos) (hopefully unused)
@@ -102,6 +103,16 @@ CTASSERT(sizeof (struct ip) == 20);
#define IPTOS_PREC_ROUTINE 0x00
/*
+ * ECN (Explicit Congestion Notification) codepoints in RFC3168
+ * mapped to the lower 2 bits of the TOS field.
+ */
+#define IPTOS_ECN_NOTECT 0x00 /* not-ECT */
+#define IPTOS_ECN_ECT1 0x01 /* ECN-capable transport (1) */
+#define IPTOS_ECN_ECT0 0x02 /* ECN-capable transport (0) */
+#define IPTOS_ECN_CE 0x03 /* congestion experienced */
+#define IPTOS_ECN_MASK 0x03 /* ECN field mask */
+
+/*
* Definitions for options.
*/
#define IPOPT_COPIED(o) ((o)&0x80)
diff --git a/sys/netinet/ip_ecn.c b/sys/netinet/ip_ecn.c
index de3d38e..9ea2f6b 100644
--- a/sys/netinet/ip_ecn.c
+++ b/sys/netinet/ip_ecn.c
@@ -1,5 +1,5 @@
/* $FreeBSD$ */
-/* $KAME: ip_ecn.c,v 1.11 2001/05/03 16:09:29 itojun Exp $ */
+/* $KAME: ip_ecn.c,v 1.12 2002/01/07 11:34:47 kjc Exp $ */
/*
* Copyright (C) 1999 WIDE Project.
@@ -56,6 +56,37 @@
#endif
/*
+ * ECN and TOS (or TCLASS) processing rules at tunnel encapsulation and
+ * decapsulation from RFC3168:
+ *
+ * Outer Hdr at Inner Hdr at
+ * Encapsulator Decapsulator
+ * Header fields: -------------------- ------------
+ * DS Field copied from inner hdr no change
+ * ECN Field constructed by (I) constructed by (E)
+ *
+ * ECN_ALLOWED (full functionality):
+ * (I) if the ECN field in the inner header is set to CE, then set the
+ * ECN field in the outer header to ECT(0).
+ * otherwise, copy the ECN field to the outer header.
+ *
+ * (E) if the ECN field in the outer header is set to CE and the ECN
+ * field of the inner header is not-ECT, drop the packet.
+ * if the ECN field in the inner header is set to ECT(0) or ECT(1)
+ * and the ECN field in the outer header is set to CE, then copy CE to
+ * the inner header. otherwise, make no change to the inner header.
+ *
+ * ECN_FORBIDDEN (limited functionality):
+ * (I) set the ECN field to not-ECT in the outer header.
+ *
+ * (E) if the ECN field in the outer header is set to CE, drop the packet.
+ * otherwise, make no change to the ECN field in the inner header.
+ *
+ * the drop rule is for backward compatibility and protection against
+ * erasure of CE.
+ */
+
+/*
* modify outer ECN (TOS) field on ingress operation (tunnel encapsulation).
*/
void
@@ -70,10 +101,18 @@ ip_ecn_ingress(mode, outer, inner)
*outer = *inner;
switch (mode) {
case ECN_ALLOWED: /* ECN allowed */
- *outer &= ~IPTOS_CE;
+ /*
+ * full-functionality: if the inner is CE, set ECT(0)
+ * to the outer. otherwise, copy the ECN field.
+ */
+ if ((*inner & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ *outer &= ~IPTOS_ECN_ECT1;
break;
case ECN_FORBIDDEN: /* ECN forbidden */
- *outer &= ~(IPTOS_ECT | IPTOS_CE);
+ /*
+ * limited-functionality: set not-ECT to the outer
+ */
+ *outer &= ~IPTOS_ECN_MASK;
break;
case ECN_NOCARE: /* no consideration to ECN */
break;
@@ -82,8 +121,9 @@ ip_ecn_ingress(mode, outer, inner)
/*
* modify inner ECN (TOS) field on egress operation (tunnel decapsulation).
+ * the caller should drop the packet if the return value is 0.
*/
-void
+int
ip_ecn_egress(mode, outer, inner)
int mode;
const u_int8_t *outer;
@@ -94,13 +134,28 @@ ip_ecn_egress(mode, outer, inner)
switch (mode) {
case ECN_ALLOWED:
- if (*outer & IPTOS_CE)
- *inner |= IPTOS_CE;
+ /*
+ * full-functionality: if the outer is CE and the inner is
+ * not-ECT, should drop it. otherwise, copy CE.
+ */
+ if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE) {
+ if ((*inner & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
+ return (0);
+ *inner |= IPTOS_ECN_CE;
+ }
break;
case ECN_FORBIDDEN: /* ECN forbidden */
+ /*
+ * limited-functionality: if the outer is CE, should drop it.
+ * otherwise, leave the inner.
+ */
+ if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ return (0);
+ break;
case ECN_NOCARE: /* no consideration to ECN */
break;
}
+ return (1);
}
#ifdef INET6
@@ -115,28 +170,31 @@ ip6_ecn_ingress(mode, outer, inner)
if (!outer || !inner)
panic("NULL pointer passed to ip6_ecn_ingress");
- outer8 = (ntohl(*outer) >> 20) & 0xff;
inner8 = (ntohl(*inner) >> 20) & 0xff;
ip_ecn_ingress(mode, &outer8, &inner8);
*outer &= ~htonl(0xff << 20);
*outer |= htonl((u_int32_t)outer8 << 20);
}
-void
+int
ip6_ecn_egress(mode, outer, inner)
int mode;
const u_int32_t *outer;
u_int32_t *inner;
{
- u_int8_t outer8, inner8;
+ u_int8_t outer8, inner8, oinner8;
if (!outer || !inner)
panic("NULL pointer passed to ip6_ecn_egress");
outer8 = (ntohl(*outer) >> 20) & 0xff;
- inner8 = (ntohl(*inner) >> 20) & 0xff;
- ip_ecn_egress(mode, &outer8, &inner8);
- *inner &= ~htonl(0xff << 20);
- *inner |= htonl((u_int32_t)inner8 << 20);
+ inner8 = oinner8 = (ntohl(*inner) >> 20) & 0xff;
+ if (ip_ecn_egress(mode, &outer8, &inner8) == 0)
+ return (0);
+ if (inner8 != oinner8) {
+ *inner &= ~htonl(0xff << 20);
+ *inner |= htonl((u_int32_t)inner8 << 20);
+ }
+ return (1);
}
#endif
diff --git a/sys/netinet/ip_ecn.h b/sys/netinet/ip_ecn.h
index 1a38a48..01163ed 100644
--- a/sys/netinet/ip_ecn.h
+++ b/sys/netinet/ip_ecn.h
@@ -1,5 +1,5 @@
/* $FreeBSD$ */
-/* $KAME: ip_ecn.h,v 1.6 2001/05/03 14:51:48 itojun Exp $ */
+/* $KAME: ip_ecn.h,v 1.8 2002/01/07 11:34:47 kjc Exp $ */
/*
* Copyright (C) 1999 WIDE Project.
@@ -35,6 +35,9 @@
* http://www.aciri.org/floyd/papers/draft-ipsec-ecn-00.txt
*/
+#ifndef _NETINET_IP_ECN_H_
+#define _NETINET_IP_ECN_H_
+
#if defined(_KERNEL) && !defined(_LKM)
#include "opt_inet.h"
#endif
@@ -45,5 +48,6 @@
#ifdef _KERNEL
extern void ip_ecn_ingress(int, u_int8_t *, const u_int8_t *);
-extern void ip_ecn_egress(int, const u_int8_t *, u_int8_t *);
+extern int ip_ecn_egress(int, const u_int8_t *, u_int8_t *);
+#endif
#endif
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 02bd132..30a5b75 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -1037,6 +1037,7 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp,
struct mbuf *t;
int hlen = ip->ip_hl << 2;
int i, next;
+ u_int8_t ecn, ecn0;
IPQ_LOCK_ASSERT();
@@ -1086,6 +1087,22 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp,
#define GETIP(m) ((struct ip*)((m)->m_pkthdr.header))
/*
+ * Handle ECN by comparing this segment with the first one;
+ * if CE is set, do not lose CE.
+ * drop if CE and not-ECT are mixed for the same packet.
+ */
+ ecn = ip->ip_tos & IPTOS_ECN_MASK;
+ ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
+ if (ecn == IPTOS_ECN_CE) {
+ if (ecn0 == IPTOS_ECN_NOTECT)
+ goto dropfrag;
+ if (ecn0 != IPTOS_ECN_CE)
+ GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
+ }
+ if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
+ goto dropfrag;
+
+ /*
* Find a segment which begins after this one does.
*/
for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
diff --git a/sys/netinet6/ah_input.c b/sys/netinet6/ah_input.c
index 6fb8071..78076b6 100644
--- a/sys/netinet6/ah_input.c
+++ b/sys/netinet6/ah_input.c
@@ -405,7 +405,10 @@ ah4_input(m, off)
}
ip = mtod(m, struct ip *);
/* ECN consideration. */
- ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos);
+ if (!ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos)) {
+ ipsecstat.in_inval++;
+ goto fail;
+ }
if (!key_checktunnelsanity(sav, AF_INET,
(caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) {
ipseclog((LOG_NOTICE, "ipsec tunnel address mismatch "
@@ -812,7 +815,10 @@ ah6_input(mp, offp, proto)
}
ip6 = mtod(m, struct ip6_hdr *);
/* ECN consideration. */
- ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow);
+ if (!ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow)) {
+ ipsec6stat.in_inval++;
+ goto fail;
+ }
if (!key_checktunnelsanity(sav, AF_INET6,
(caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) {
ipseclog((LOG_NOTICE, "ipsec tunnel address mismatch "
diff --git a/sys/netinet6/esp_input.c b/sys/netinet6/esp_input.c
index f2d802c..f25d0f3 100644
--- a/sys/netinet6/esp_input.c
+++ b/sys/netinet6/esp_input.c
@@ -371,7 +371,10 @@ noreplaycheck:
}
ip = mtod(m, struct ip *);
/* ECN consideration. */
- ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos);
+ if (!ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos)) {
+ ipsecstat.in_inval++;
+ goto bad;
+ }
if (!key_checktunnelsanity(sav, AF_INET,
(caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) {
ipseclog((LOG_ERR, "ipsec tunnel address mismatch "
@@ -723,7 +726,10 @@ noreplaycheck:
}
ip6 = mtod(m, struct ip6_hdr *);
/* ECN consideration. */
- ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow);
+ if (!ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow)) {
+ ipsec6stat.in_inval++;
+ goto bad;
+ }
if (!key_checktunnelsanity(sav, AF_INET6,
(caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) {
ipseclog((LOG_ERR, "ipsec tunnel address mismatch "
diff --git a/sys/netinet6/frag6.c b/sys/netinet6/frag6.c
index 3bb425f..f8a86a1 100644
--- a/sys/netinet6/frag6.c
+++ b/sys/netinet6/frag6.c
@@ -52,6 +52,8 @@
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet/icmp6.h>
+#include <netinet/in_systm.h> /* for ECN definitions */
+#include <netinet/ip.h> /* for ECN definitions */
#include <net/net_osdep.h>
@@ -151,6 +153,7 @@ frag6_input(mp, offp, proto)
int first_frag = 0;
int fragoff, frgpartlen; /* must be larger than u_int16_t */
struct ifnet *dstifp;
+ u_int8_t ecn, ecn0;
ip6 = mtod(m, struct ip6_hdr *);
#ifndef PULLDOWN_TEST
@@ -350,6 +353,26 @@ frag6_input(mp, offp, proto)
}
/*
+ * Handle ECN by comparing this segment with the first one;
+ * if CE is set, do not lose CE.
+ * drop if CE and not-ECT are mixed for the same packet.
+ */
+ ecn = (ntohl(ip6->ip6_flow) >> 20) & IPTOS_ECN_MASK;
+ ecn0 = (ntohl(q6->ip6q_down->ip6af_head) >> 20) & IPTOS_ECN_MASK;
+ if (ecn == IPTOS_ECN_CE) {
+ if (ecn0 == IPTOS_ECN_NOTECT) {
+ free(ip6af, M_FTABLE);
+ goto dropfrag;
+ }
+ if (ecn0 != IPTOS_ECN_CE)
+ q6->ip6q_down->ip6af_head |= htonl(IPTOS_ECN_CE << 20);
+ }
+ if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) {
+ free(ip6af, M_FTABLE);
+ goto dropfrag;
+ }
+
+ /*
* Find a segment which begins after this one does.
*/
for (af6 = q6->ip6q_down; af6 != (struct ip6asfrag *)q6;
diff --git a/sys/netinet6/in6_gif.c b/sys/netinet6/in6_gif.c
index fae8a98..e3cd124 100644
--- a/sys/netinet6/in6_gif.c
+++ b/sys/netinet6/in6_gif.c
@@ -168,11 +168,9 @@ in6_gif_output(ifp, family, m)
m_freem(m);
return ENETUNREACH;
}
- if (ifp->if_flags & IFF_LINK1)
- ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
- else
- ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
- ip6->ip6_flow &= ~ntohl(0xff00000);
+ ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE,
+ &otos, &itos);
+ ip6->ip6_flow &= ~htonl(0xff << 20);
ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
if (dst->sin6_family != sin6_dst->sin6_family ||
@@ -259,10 +257,12 @@ in6_gif_input(mp, offp, proto)
return IPPROTO_DONE;
}
ip = mtod(m, struct ip *);
- if (gifp->if_flags & IFF_LINK1)
- ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
- else
- ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
+ if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ?
+ ECN_ALLOWED : ECN_NOCARE,
+ &otos8, &ip->ip_tos) == 0) {
+ m_freem(m);
+ return IPPROTO_DONE;
+ }
break;
}
#endif /* INET */
@@ -277,10 +277,12 @@ in6_gif_input(mp, offp, proto)
return IPPROTO_DONE;
}
ip6 = mtod(m, struct ip6_hdr *);
- if (gifp->if_flags & IFF_LINK1)
- ip6_ecn_egress(ECN_ALLOWED, &otos, &ip6->ip6_flow);
- else
- ip6_ecn_egress(ECN_NOCARE, &otos, &ip6->ip6_flow);
+ if (ip6_ecn_egress((gifp->if_flags & IFF_LINK1) ?
+ ECN_ALLOWED : ECN_NOCARE,
+ &otos, &ip6->ip6_flow) == 0) {
+ m_freem(m);
+ return IPPROTO_DONE;
+ }
break;
}
#endif
diff --git a/sys/netinet6/ip6_ecn.h b/sys/netinet6/ip6_ecn.h
index 4107cf0..6644b56 100644
--- a/sys/netinet6/ip6_ecn.h
+++ b/sys/netinet6/ip6_ecn.h
@@ -36,6 +36,6 @@
*/
#ifdef _KERNEL
-extern void ip6_ecn_ingress __P((int, u_int32_t *, const u_int32_t *));
-extern void ip6_ecn_egress __P((int, const u_int32_t *, u_int32_t *));
+extern void ip6_ecn_ingress(int, u_int32_t *, const u_int32_t *);
+extern int ip6_ecn_egress(int, const u_int32_t *, u_int32_t *);
#endif
OpenPOWER on IntegriCloud