summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhiren <hiren@FreeBSD.org>2014-06-01 07:28:24 +0000
committerhiren <hiren@FreeBSD.org>2014-06-01 07:28:24 +0000
commitcc47b6d9479ebc0469f422069b78a7e89bdbc128 (patch)
treec5c0b3ee7f9941654343d00b25f436ca8ca672d9
parent8db2b665686650f51c5873f5b48e5bdceaed6d52 (diff)
downloadFreeBSD-src-cc47b6d9479ebc0469f422069b78a7e89bdbc128.zip
FreeBSD-src-cc47b6d9479ebc0469f422069b78a7e89bdbc128.tar.gz
ECN marking implenetation for dummynet.
Changes include both DCTCP and RFC 3168 ECN marking methodology. DCTCP draft: http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-00 Submitted by: Midori Kato (aoimidori27@gmail.com) Worked with: Lars Eggert (lars@netapp.com) Reviewed by: luigi, hiren
-rw-r--r--sbin/ipfw/dummynet.c24
-rw-r--r--sbin/ipfw/ipfw.811
-rw-r--r--sbin/ipfw/ipfw2.h1
-rw-r--r--sys/netinet/ip_dummynet.h1
-rw-r--r--sys/netpfil/ipfw/ip_dn_glue.c3
-rw-r--r--sys/netpfil/ipfw/ip_dn_io.c78
-rw-r--r--sys/netpfil/ipfw/ip_dummynet.c5
7 files changed, 108 insertions, 15 deletions
diff --git a/sbin/ipfw/dummynet.c b/sbin/ipfw/dummynet.c
index 28dc2c7..cb62853 100644
--- a/sbin/ipfw/dummynet.c
+++ b/sbin/ipfw/dummynet.c
@@ -56,6 +56,7 @@ static struct _s_x dummynet_params[] = {
{ "sched_mask", TOK_SCHED_MASK },
{ "flow_mask", TOK_FLOW_MASK },
{ "droptail", TOK_DROPTAIL },
+ { "ecn", TOK_ECN },
{ "red", TOK_RED },
{ "gred", TOK_GRED },
{ "bw", TOK_BW },
@@ -239,7 +240,7 @@ print_flowset_parms(struct dn_fs *fs, char *prefix)
else
plr[0] = '\0';
- if (fs->flags & DN_IS_RED) /* RED parameters */
+ if (fs->flags & DN_IS_RED) { /* RED parameters */
sprintf(red,
"\n\t %cRED w_q %f min_th %d max_th %d max_p %f",
(fs->flags & DN_IS_GENTLE_RED) ? 'G' : ' ',
@@ -247,7 +248,9 @@ print_flowset_parms(struct dn_fs *fs, char *prefix)
fs->min_th,
fs->max_th,
1.0 * fs->max_p / (double)(1 << SCALE_RED));
- else
+ if (fs->flags & DN_IS_ECN)
+ strncat(red, " (ecn)", 6);
+ } else
sprintf(red, "droptail");
if (prefix[0]) {
@@ -1046,13 +1049,17 @@ end_mask:
}
if ((end = strsep(&av[0], "/"))) {
double max_p = strtod(end, NULL);
- if (max_p > 1 || max_p <= 0)
- errx(EX_DATAERR, "0 < max_p <= 1");
+ if (max_p > 1 || max_p < 0)
+ errx(EX_DATAERR, "0 <= max_p <= 1");
fs->max_p = (int)(max_p * (1 << SCALE_RED));
}
ac--; av++;
break;
+ case TOK_ECN:
+ fs->flags |= DN_IS_ECN;
+ break;
+
case TOK_DROPTAIL:
NEED(fs, "droptail is only for flowsets");
fs->flags &= ~(DN_IS_RED|DN_IS_GENTLE_RED);
@@ -1175,13 +1182,20 @@ end_mask:
errx(EX_DATAERR, "2 <= queue size <= %ld", limit);
}
+ if ((fs->flags & DN_IS_ECN) && !(fs->flags & DN_IS_RED))
+ errx(EX_USAGE, "enable red/gred for ECN");
+
if (fs->flags & DN_IS_RED) {
size_t len;
int lookup_depth, avg_pkt_size;
- if (fs->min_th >= fs->max_th)
+ if (!(fs->flags & DN_IS_ECN) && (fs->min_th >= fs->max_th))
errx(EX_DATAERR, "min_th %d must be < than max_th %d",
fs->min_th, fs->max_th);
+ else if ((fs->flags & DN_IS_ECN) && (fs->min_th > fs->max_th))
+ errx(EX_DATAERR, "min_th %d must be =< than max_th %d",
+ fs->min_th, fs->max_th);
+
if (fs->max_th == 0)
errx(EX_DATAERR, "max_th must be > 0");
diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8
index bc8d819..b419371 100644
--- a/sbin/ipfw/ipfw.8
+++ b/sbin/ipfw/ipfw.8
@@ -1,7 +1,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd October 25, 2012
+.Dd May 31, 2014
.Dt IPFW 8
.Os
.Sh NAME
@@ -2441,22 +2441,23 @@ and
control the maximum lengths that can be specified.
.Pp
.It Cm red | gred Ar w_q Ns / Ns Ar min_th Ns / Ns Ar max_th Ns / Ns Ar max_p
+[ecn]
Make use of the RED (Random Early Detection) queue management algorithm.
.Ar w_q
and
.Ar max_p
are floating
-point numbers between 0 and 1 (0 not included), while
+point numbers between 0 and 1 (inclusive), while
.Ar min_th
and
.Ar max_th
are integer numbers specifying thresholds for queue management
(thresholds are computed in bytes if the queue has been defined
in bytes, in slots otherwise).
-The
+The two parameters can also be of the same value if needed. The
.Nm dummynet
-also supports the gentle RED variant (gred).
-Three
+also supports the gentle RED variant (gred) and ECN (Explicit Congestion
+Notification) as optional. Three
.Xr sysctl 8
variables can be used to control the RED behaviour:
.Bl -tag -width indent
diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h
index 6e895b8..2301c40 100644
--- a/sbin/ipfw/ipfw2.h
+++ b/sbin/ipfw/ipfw2.h
@@ -165,6 +165,7 @@ enum tokens {
TOK_BURST,
TOK_RED,
TOK_GRED,
+ TOK_ECN,
TOK_DROPTAIL,
TOK_PROTO,
/* dummynet tokens */
diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h
index 1c09197..202f1e2 100644
--- a/sys/netinet/ip_dummynet.h
+++ b/sys/netinet/ip_dummynet.h
@@ -104,6 +104,7 @@ enum { /* user flags */
DN_HAS_PROFILE = 0x0010, /* a link has a profile */
DN_IS_RED = 0x0020,
DN_IS_GENTLE_RED= 0x0040,
+ DN_IS_ECN = 0x0080,
DN_PIPE_CMD = 0x1000, /* pipe config... */
};
diff --git a/sys/netpfil/ipfw/ip_dn_glue.c b/sys/netpfil/ipfw/ip_dn_glue.c
index 7d7e695..095758f 100644
--- a/sys/netpfil/ipfw/ip_dn_glue.c
+++ b/sys/netpfil/ipfw/ip_dn_glue.c
@@ -83,6 +83,7 @@ struct dn_flow_set {
#define DNOLD_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */
#define DNOLD_NOERROR 0x0010 /* do not report ENOBUFS on drops */
#define DNOLD_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */
+#define DNOLD_IS_ECN 0x0040
#define DNOLD_IS_PIPE 0x4000
#define DNOLD_IS_QUEUE 0x8000
@@ -338,6 +339,8 @@ convertflags2new(int src)
dst |= DN_IS_RED;
if (src & DNOLD_IS_GENTLE_RED)
dst |= DN_IS_GENTLE_RED;
+ if (src & DNOLD_IS_ECN)
+ dst |= DN_IS_ECN;
if (src & DNOLD_HAS_PROFILE)
dst |= DN_HAS_PROFILE;
diff --git a/sys/netpfil/ipfw/ip_dn_io.c b/sys/netpfil/ipfw/ip_dn_io.c
index 1748194..df298ae 100644
--- a/sys/netpfil/ipfw/ip_dn_io.c
+++ b/sys/netpfil/ipfw/ip_dn_io.c
@@ -367,6 +367,8 @@ red_drops (struct dn_queue *q, int len)
return (0); /* accept packet */
}
if (q->avg >= fs->max_th) { /* average queue >= max threshold */
+ if (fs->fs.flags & DN_IS_ECN)
+ return (1);
if (fs->fs.flags & DN_IS_GENTLE_RED) {
/*
* According to Gentle-RED, if avg is greater than
@@ -382,6 +384,8 @@ red_drops (struct dn_queue *q, int len)
return (1);
}
} else if (q->avg > fs->min_th) {
+ if (fs->fs.flags & DN_IS_ECN)
+ return (1);
/*
* We compute p_b using the linear dropping function
* p_b = c_1 * avg - c_2
@@ -414,6 +418,70 @@ red_drops (struct dn_queue *q, int len)
}
/*
+ * ECN/ECT Processing (partially adopted from altq)
+ */
+static int
+ecn_mark(struct mbuf* m)
+{
+ struct ip *ip;
+ ip = mtod(m, struct ip *);
+
+ switch (ip->ip_v) {
+ case IPVERSION:
+ {
+ u_int8_t otos;
+ int sum;
+
+ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
+ return (0); /* not-ECT */
+ if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ return (1); /* already marked */
+
+ /*
+ * ecn-capable but not marked,
+ * mark CE and update checksum
+ */
+ otos = ip->ip_tos;
+ ip->ip_tos |= IPTOS_ECN_CE;
+ /*
+ * update checksum (from RFC1624)
+ * HC' = ~(~HC + ~m + m')
+ */
+ sum = ~ntohs(ip->ip_sum) & 0xffff;
+ sum += (~otos & 0xffff) + ip->ip_tos;
+ sum = (sum >> 16) + (sum & 0xffff);
+ sum += (sum >> 16); /* add carry */
+ ip->ip_sum = htons(~sum & 0xffff);
+ return (1);
+ }
+#ifdef INET6
+ case (IPV6_VERSION >> 4):
+ {
+ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ u_int32_t flowlabel;
+
+ flowlabel = ntohl(ip6->ip6_flow);
+ if ((flowlabel >> 28) != 6)
+ return (0); /* version mismatch! */
+ if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+ (IPTOS_ECN_NOTECT << 20))
+ return (0); /* not-ECT */
+ if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
+ (IPTOS_ECN_CE << 20))
+ return (1); /* already marked */
+ /*
+ * ecn-capable but not marked, mark CE
+ */
+ flowlabel |= (IPTOS_ECN_CE << 20);
+ ip6->ip6_flow = htonl(flowlabel);
+ return (1);
+ }
+#endif
+ }
+ return (0);
+}
+
+/*
* Enqueue a packet in q, subject to space and queue management policy
* (whose parameters are in q->fs).
* Update stats for the queue and the scheduler.
@@ -444,8 +512,10 @@ dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
goto drop;
if (f->plr && random() < f->plr)
goto drop;
- if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len))
- goto drop;
+ if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len)) {
+ if (!(f->flags & DN_IS_ECN) || !ecn_mark(m))
+ goto drop;
+ }
if (f->flags & DN_QSIZE_BYTES) {
if (q->ni.len_bytes > f->qsize)
goto drop;
@@ -457,14 +527,14 @@ dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
q->ni.len_bytes += len;
ni->length++;
ni->len_bytes += len;
- return 0;
+ return (0);
drop:
io_pkt_drop++;
q->ni.drops++;
ni->drops++;
FREE_PKT(m);
- return 1;
+ return (1);
}
/*
diff --git a/sys/netpfil/ipfw/ip_dummynet.c b/sys/netpfil/ipfw/ip_dummynet.c
index 453c0f1..5268686 100644
--- a/sys/netpfil/ipfw/ip_dummynet.c
+++ b/sys/netpfil/ipfw/ip_dummynet.c
@@ -1071,7 +1071,10 @@ config_red(struct dn_fsk *fs)
fs->min_th = SCALE(fs->fs.min_th);
fs->max_th = SCALE(fs->fs.max_th);
- fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th);
+ if (fs->fs.max_th == fs->fs.min_th)
+ fs->c_1 = fs->max_p;
+ else
+ fs->c_1 = SCALE((int64_t)(fs->max_p)) / (fs->fs.max_th - fs->fs.min_th);
fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th));
if (fs->fs.flags & DN_IS_GENTLE_RED) {
OpenPOWER on IntegriCloud