diff options
Diffstat (limited to 'sys/netinet')
-rw-r--r-- | sys/netinet/in_gif.c | 37 | ||||
-rw-r--r-- | sys/netinet/ip.h | 19 | ||||
-rw-r--r-- | sys/netinet/ip_ecn.c | 84 | ||||
-rw-r--r-- | sys/netinet/ip_ecn.h | 8 | ||||
-rw-r--r-- | sys/netinet/ip_input.c | 17 |
5 files changed, 130 insertions, 35 deletions
diff --git a/sys/netinet/in_gif.c b/sys/netinet/in_gif.c index 3d2ff66..fd6397e 100644 --- a/sys/netinet/in_gif.c +++ b/sys/netinet/in_gif.c @@ -161,10 +161,8 @@ in_gif_output(ifp, family, m) /* version will be set in ip_output() */ iphdr.ip_ttl = ip_gif_ttl; iphdr.ip_len = m->m_pkthdr.len + sizeof(struct ip); - if (ifp->if_flags & IFF_LINK1) - ip_ecn_ingress(ECN_ALLOWED, &iphdr.ip_tos, &tos); - else - ip_ecn_ingress(ECN_NOCARE, &iphdr.ip_tos, &tos); + ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED : ECN_NOCARE, + &iphdr.ip_tos, &tos); /* prepend new IP header */ M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); @@ -250,10 +248,12 @@ in_gif_input(m, off) return; } ip = mtod(m, struct ip *); - if (gifp->if_flags & IFF_LINK1) - ip_ecn_egress(ECN_ALLOWED, &otos, &ip->ip_tos); - else - ip_ecn_egress(ECN_NOCARE, &otos, &ip->ip_tos); + if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ? + ECN_ALLOWED : ECN_NOCARE, + &otos, &ip->ip_tos) == 0) { + m_freem(m); + return; + } break; } #endif @@ -261,7 +261,8 @@ in_gif_input(m, off) case IPPROTO_IPV6: { struct ip6_hdr *ip6; - u_int8_t itos; + u_int8_t itos, oitos; + af = AF_INET6; if (m->m_len < sizeof(*ip6)) { m = m_pullup(m, sizeof(*ip6)); @@ -269,13 +270,17 @@ in_gif_input(m, off) return; } ip6 = mtod(m, struct ip6_hdr *); - itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; - if (gifp->if_flags & IFF_LINK1) - ip_ecn_egress(ECN_ALLOWED, &otos, &itos); - else - ip_ecn_egress(ECN_NOCARE, &otos, &itos); - ip6->ip6_flow &= ~htonl(0xff << 20); - ip6->ip6_flow |= htonl((u_int32_t)itos << 20); + itos = oitos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; + if (ip_ecn_egress((gifp->if_flags & IFF_LINK1) ? + ECN_ALLOWED : ECN_NOCARE, + &otos, &itos) == 0) { + m_freem(m); + return; + } + if (itos != oitos) { + ip6->ip6_flow &= ~htonl(0xff << 20); + ip6->ip6_flow |= htonl((u_int32_t)itos << 20); + } break; } #endif /* INET6 */ diff --git a/sys/netinet/ip.h b/sys/netinet/ip.h index 24f66ce..025ad08 100644 --- a/sys/netinet/ip.h +++ b/sys/netinet/ip.h @@ -84,10 +84,11 @@ CTASSERT(sizeof (struct ip) == 20); #define IPTOS_THROUGHPUT 0x08 #define IPTOS_RELIABILITY 0x04 #define IPTOS_MINCOST 0x02 -/* ECN bits proposed by Sally Floyd */ -#define IPTOS_CE 0x01 /* congestion experienced */ -#define IPTOS_ECT 0x02 /* ECN-capable transport */ - +#if 1 +/* ECN RFC3168 obsoletes RFC2481, and these will be deprecated soon. */ +#define IPTOS_CE 0x01 +#define IPTOS_ECT 0x02 +#endif /* * Definitions for IP precedence (also in ip_tos) (hopefully unused) @@ -102,6 +103,16 @@ CTASSERT(sizeof (struct ip) == 20); #define IPTOS_PREC_ROUTINE 0x00 /* + * ECN (Explicit Congestion Notification) codepoints in RFC3168 + * mapped to the lower 2 bits of the TOS field. + */ +#define IPTOS_ECN_NOTECT 0x00 /* not-ECT */ +#define IPTOS_ECN_ECT1 0x01 /* ECN-capable transport (1) */ +#define IPTOS_ECN_ECT0 0x02 /* ECN-capable transport (0) */ +#define IPTOS_ECN_CE 0x03 /* congestion experienced */ +#define IPTOS_ECN_MASK 0x03 /* ECN field mask */ + +/* * Definitions for options. */ #define IPOPT_COPIED(o) ((o)&0x80) diff --git a/sys/netinet/ip_ecn.c b/sys/netinet/ip_ecn.c index de3d38e..9ea2f6b 100644 --- a/sys/netinet/ip_ecn.c +++ b/sys/netinet/ip_ecn.c @@ -1,5 +1,5 @@ /* $FreeBSD$ */ -/* $KAME: ip_ecn.c,v 1.11 2001/05/03 16:09:29 itojun Exp $ */ +/* $KAME: ip_ecn.c,v 1.12 2002/01/07 11:34:47 kjc Exp $ */ /* * Copyright (C) 1999 WIDE Project. @@ -56,6 +56,37 @@ #endif /* + * ECN and TOS (or TCLASS) processing rules at tunnel encapsulation and + * decapsulation from RFC3168: + * + * Outer Hdr at Inner Hdr at + * Encapsulator Decapsulator + * Header fields: -------------------- ------------ + * DS Field copied from inner hdr no change + * ECN Field constructed by (I) constructed by (E) + * + * ECN_ALLOWED (full functionality): + * (I) if the ECN field in the inner header is set to CE, then set the + * ECN field in the outer header to ECT(0). + * otherwise, copy the ECN field to the outer header. + * + * (E) if the ECN field in the outer header is set to CE and the ECN + * field of the inner header is not-ECT, drop the packet. + * if the ECN field in the inner header is set to ECT(0) or ECT(1) + * and the ECN field in the outer header is set to CE, then copy CE to + * the inner header. otherwise, make no change to the inner header. + * + * ECN_FORBIDDEN (limited functionality): + * (I) set the ECN field to not-ECT in the outer header. + * + * (E) if the ECN field in the outer header is set to CE, drop the packet. + * otherwise, make no change to the ECN field in the inner header. + * + * the drop rule is for backward compatibility and protection against + * erasure of CE. + */ + +/* * modify outer ECN (TOS) field on ingress operation (tunnel encapsulation). */ void @@ -70,10 +101,18 @@ ip_ecn_ingress(mode, outer, inner) *outer = *inner; switch (mode) { case ECN_ALLOWED: /* ECN allowed */ - *outer &= ~IPTOS_CE; + /* + * full-functionality: if the inner is CE, set ECT(0) + * to the outer. otherwise, copy the ECN field. + */ + if ((*inner & IPTOS_ECN_MASK) == IPTOS_ECN_CE) + *outer &= ~IPTOS_ECN_ECT1; break; case ECN_FORBIDDEN: /* ECN forbidden */ - *outer &= ~(IPTOS_ECT | IPTOS_CE); + /* + * limited-functionality: set not-ECT to the outer + */ + *outer &= ~IPTOS_ECN_MASK; break; case ECN_NOCARE: /* no consideration to ECN */ break; @@ -82,8 +121,9 @@ ip_ecn_ingress(mode, outer, inner) /* * modify inner ECN (TOS) field on egress operation (tunnel decapsulation). + * the caller should drop the packet if the return value is 0. */ -void +int ip_ecn_egress(mode, outer, inner) int mode; const u_int8_t *outer; @@ -94,13 +134,28 @@ ip_ecn_egress(mode, outer, inner) switch (mode) { case ECN_ALLOWED: - if (*outer & IPTOS_CE) - *inner |= IPTOS_CE; + /* + * full-functionality: if the outer is CE and the inner is + * not-ECT, should drop it. otherwise, copy CE. + */ + if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE) { + if ((*inner & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) + return (0); + *inner |= IPTOS_ECN_CE; + } break; case ECN_FORBIDDEN: /* ECN forbidden */ + /* + * limited-functionality: if the outer is CE, should drop it. + * otherwise, leave the inner. + */ + if ((*outer & IPTOS_ECN_MASK) == IPTOS_ECN_CE) + return (0); + break; case ECN_NOCARE: /* no consideration to ECN */ break; } + return (1); } #ifdef INET6 @@ -115,28 +170,31 @@ ip6_ecn_ingress(mode, outer, inner) if (!outer || !inner) panic("NULL pointer passed to ip6_ecn_ingress"); - outer8 = (ntohl(*outer) >> 20) & 0xff; inner8 = (ntohl(*inner) >> 20) & 0xff; ip_ecn_ingress(mode, &outer8, &inner8); *outer &= ~htonl(0xff << 20); *outer |= htonl((u_int32_t)outer8 << 20); } -void +int ip6_ecn_egress(mode, outer, inner) int mode; const u_int32_t *outer; u_int32_t *inner; { - u_int8_t outer8, inner8; + u_int8_t outer8, inner8, oinner8; if (!outer || !inner) panic("NULL pointer passed to ip6_ecn_egress"); outer8 = (ntohl(*outer) >> 20) & 0xff; - inner8 = (ntohl(*inner) >> 20) & 0xff; - ip_ecn_egress(mode, &outer8, &inner8); - *inner &= ~htonl(0xff << 20); - *inner |= htonl((u_int32_t)inner8 << 20); + inner8 = oinner8 = (ntohl(*inner) >> 20) & 0xff; + if (ip_ecn_egress(mode, &outer8, &inner8) == 0) + return (0); + if (inner8 != oinner8) { + *inner &= ~htonl(0xff << 20); + *inner |= htonl((u_int32_t)inner8 << 20); + } + return (1); } #endif diff --git a/sys/netinet/ip_ecn.h b/sys/netinet/ip_ecn.h index 1a38a48..01163ed 100644 --- a/sys/netinet/ip_ecn.h +++ b/sys/netinet/ip_ecn.h @@ -1,5 +1,5 @@ /* $FreeBSD$ */ -/* $KAME: ip_ecn.h,v 1.6 2001/05/03 14:51:48 itojun Exp $ */ +/* $KAME: ip_ecn.h,v 1.8 2002/01/07 11:34:47 kjc Exp $ */ /* * Copyright (C) 1999 WIDE Project. @@ -35,6 +35,9 @@ * http://www.aciri.org/floyd/papers/draft-ipsec-ecn-00.txt */ +#ifndef _NETINET_IP_ECN_H_ +#define _NETINET_IP_ECN_H_ + #if defined(_KERNEL) && !defined(_LKM) #include "opt_inet.h" #endif @@ -45,5 +48,6 @@ #ifdef _KERNEL extern void ip_ecn_ingress(int, u_int8_t *, const u_int8_t *); -extern void ip_ecn_egress(int, const u_int8_t *, u_int8_t *); +extern int ip_ecn_egress(int, const u_int8_t *, u_int8_t *); +#endif #endif diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 02bd132..30a5b75 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -1037,6 +1037,7 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp, struct mbuf *t; int hlen = ip->ip_hl << 2; int i, next; + u_int8_t ecn, ecn0; IPQ_LOCK_ASSERT(); @@ -1086,6 +1087,22 @@ ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp, #define GETIP(m) ((struct ip*)((m)->m_pkthdr.header)) /* + * Handle ECN by comparing this segment with the first one; + * if CE is set, do not lose CE. + * drop if CE and not-ECT are mixed for the same packet. + */ + ecn = ip->ip_tos & IPTOS_ECN_MASK; + ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK; + if (ecn == IPTOS_ECN_CE) { + if (ecn0 == IPTOS_ECN_NOTECT) + goto dropfrag; + if (ecn0 != IPTOS_ECN_CE) + GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE; + } + if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) + goto dropfrag; + + /* * Find a segment which begins after this one does. */ for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) |