5 files changed, 549 insertions, 22 deletions
diff --git a/sbin/ipfw/dummynet.c b/sbin/ipfw/dummynet.c
index 5c83db9..c6e6b37 100644
--- a/sbin/ipfw/dummynet.c
+++ b/sbin/ipfw/dummynet.c
@@ -69,6 +69,7 @@ static struct _s_x dummynet_params[] = {
 	{ "dst-ip6",		TOK_DSTIP6},
 	{ "src-ipv6",		TOK_SRCIP6},
 	{ "src-ip6",		TOK_SRCIP6},
+	{ "profile",		TOK_PIPE_PROFILE},
 	{ "dummynet-params",	TOK_NULL },
 	{ NULL, 0 }	/* terminator */
 };
@@ -248,6 +249,19 @@ print_flowset_parms(struct dn_flow_set *fs, char *prefix)
 	    prefix, qs, plr, fs->rq_elements, fs->rq_size, red);
 }
 
+static void
+print_extra_delay_parms(struct dn_pipe *p, char *prefix)
+{
+	double loss;
+	if (p->samples_no <= 0)
+		return;
+
+	loss = p->loss_level;
+	loss /= p->samples_no;
+	printf("%s profile: name \"%s\" loss %f samples %d\n",
+		prefix, p->name, loss, p->samples_no);
+}
+
 void
 ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[])
 {
@@ -296,6 +310,9 @@ ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[])
 
 		sprintf(prefix, "%05d: %s %4d ms ",
 		    p->pipe_nr, buf, p->delay);
+
+		print_extra_delay_parms(p, prefix);
+
 		print_flowset_parms(&(p->fs), prefix);
 		if (co.verbose)
 			printf("   V %20llu\n", align_uint64(&p->V) >> MY_M);
@@ -346,9 +363,284 @@ ipfw_delete_pipe(int pipe_or_queue, int i)
 	return i;
 }
 
+/*
+ * Code to parse delay profiles.
+ *
+ * Some link types introduce extra delays in the transmission
+ * of a packet, e.g. because of MAC level framing, contention on
+ * the use of the channel, MAC level retransmissions and so on.
+ * From our point of view, the channel is effectively unavailable
+ * for this extra time, which is constant or variable depending
+ * on the link type. Additionally, packets may be dropped after this
+ * time (e.g. on a wireless link after too many retransmissions).
+ * We can model the additional delay with an empirical curve
+ * that represents its distribution.
+ *
+ *	cumulative probability
+ *	1.0 ^
+ *	    |
+ *	L   +-- loss-level          x
+ *	    |                 ******
+ *	    |                *
+ *	    |           *****
+ *	    |          *
+ *	    |        **
+ *	    |       *                         
+ *	    +-------*------------------->
+ *			delay
+ *
+ * The empirical curve may have both vertical and horizontal lines.
+ * Vertical lines represent constant delay for a range of
+ * probabilities; horizontal lines correspond to a discontinuty
+ * in the delay distribution: the pipe will use the largest delay
+ * for a given probability.
+ * 
+ * To pass the curve to dummynet, we must store the parameters
+ * in a file as described below, and issue the command
+ *
+ *      ipfw pipe <n> config ... bw XXX profile <filename> ...
+ *
+ * The file format is the following, with whitespace acting as
+ * a separator and '#' indicating the beginning a comment:
+ *
+ *	samples N
+ *		the number of samples used in the internal
+ *		representation (2..1024; default 100);
+ *
+ *	loss-level L 
+ *		The probability above which packets are lost.
+ *               (0.0 <= L <= 1.0, default 1.0 i.e. no loss);
+ *
+ *	name identifier
+ *		Optional a name (listed by "ipfw pipe show")
+ *		to identify the distribution;
+ *
+ *	"delay prob" | "prob delay"
+ *		One of these two lines is mandatory and defines
+ *		the format of the following lines with data points.
+ *
+ *	XXX YYY
+ *		2 or more lines representing points in the curve,
+ *		with either delay or probability first, according
+ *		to the chosen format.
+ *		The unit for delay is milliseconds.
+ *
+ * Data points does not need to be ordered or equal to the number
+ * specified in the "samples" line. ipfw will sort and interpolate
+ * the curve as needed.
+ *
+ * Example of a profile file:
+ 
+        name    bla_bla_bla
+        samples 100
+        loss-level    0.86
+        prob    delay
+        0       200	# minimum overhead is 200ms
+        0.5     200
+        0.5     300
+        0.8     1000
+        0.9     1300
+        1       1300
+ 
+ * Internally, we will convert the curve to a fixed number of
+ * samples, and when it is time to transmit a packet we will
+ * model the extra delay as extra bits in the packet.
+ *
+ */
+
+#define ED_MAX_LINE_LEN	256+ED_MAX_NAME_LEN
+#define ED_TOK_SAMPLES	"samples"
+#define ED_TOK_LOSS	"loss-level"
+#define ED_TOK_NAME	"name"
+#define ED_TOK_DELAY	"delay"
+#define ED_TOK_PROB	"prob"
+#define ED_SEPARATORS	" \t\n"
+#define ED_MIN_SAMPLES_NO	2
+
+/*
+ * returns 1 if s is a non-negative number, with at least one '.'
+ */
+static int
+is_valid_number(const char *s)
+{
+	int i, dots_found = 0;
+	int len = strlen(s);
+
+	for (i = 0; i<len; ++i)
+		if (!isdigit(s[i]) && (s[i] !='.' || ++dots_found > 1))
+			return 0;
+	return 1;
+}
+
+struct point {
+	double prob;
+	double delay;
+};
+
+int
+compare_points(const void *vp1, const void *vp2)
+{
+	const struct point *p1 = vp1;
+	const struct point *p2 = vp2;
+	double res = 0;
+
+	res = p1->prob - p2->prob;
+	if (res == 0)
+		res = p1->delay - p2->delay;
+	if (res < 0)
+		return -1;
+	else if (res > 0)
+		return 1;
+	else
+		return 0;
+}
+
+#define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno
+
+static void
+load_extra_delays(const char *filename, struct dn_pipe *p)
+{
+	char    line[ED_MAX_LINE_LEN];
+	FILE    *f;
+	int     lineno = 0;
+	int     i;
+
+	int     samples = -1;
+	double  loss = -1.0;
+	char    profile_name[ED_MAX_NAME_LEN];
+	int     delay_first = -1;
+	int     do_points = 0;
+	struct point    points[ED_MAX_SAMPLES_NO];
+	int     points_no = 0;
+
+	profile_name[0] = '\0';
+	f = fopen(filename, "r");
+	if (f == NULL)
+		err(EX_UNAVAILABLE, "fopen: %s", filename);
+
+	while (fgets(line, ED_MAX_LINE_LEN, f)) {         /* read commands */
+		char *s, *cur = line, *name = NULL, *arg = NULL;
+
+		++lineno;
+
+		/* parse the line */
+		while (cur) {
+			s = strsep(&cur, ED_SEPARATORS);
+			if (s == NULL || *s == '#')
+				break;
+			if (*s == '\0')
+				continue;
+			if (arg)
+				errx(ED_EFMT("too many arguments"));
+			if (name == NULL)
+				name = s;
+			else
+				arg = s;
+		}
+		if (name == NULL)	/* empty line */
+			continue;
+		if (arg == NULL)
+			errx(ED_EFMT("missing arg for %s"), name);
+
+		if (!strcasecmp(name, ED_TOK_SAMPLES)) {
+		    if (samples > 0)
+			errx(ED_EFMT("duplicate ``samples'' line"));
+		    if (atoi(arg) <=0)
+			errx(ED_EFMT("invalid number of samples"));
+		    samples = atoi(arg);
+		    if (samples>ED_MAX_SAMPLES_NO)
+			    errx(ED_EFMT("too many samples, maximum is %d"),
+				ED_MAX_SAMPLES_NO);
+		    do_points = 0;
+		} else if (!strcasecmp(name, ED_TOK_LOSS)) {
+		    if (loss != -1.0)
+			errx(ED_EFMT("duplicated token: %s"), name);
+		    if (!is_valid_number(arg))
+			errx(ED_EFMT("invalid %s"), arg);
+		    loss = atof(arg);
+		    if (loss > 1)
+			errx(ED_EFMT("%s greater than 1.0"), name);
+		    do_points = 0;
+		} else if (!strcasecmp(name, ED_TOK_NAME)) {
+		    if (profile_name[0] != '\0')
+			errx(ED_EFMT("duplicated token: %s"), name);
+		    strncpy(profile_name, arg, sizeof(profile_name) - 1);
+		    profile_name[sizeof(profile_name)-1] = '\0';
+		    do_points = 0;
+		} else if (!strcasecmp(name, ED_TOK_DELAY)) {
+		    if (do_points)
+			errx(ED_EFMT("duplicated token: %s"), name);
+		    delay_first = 1;
+		    do_points = 1;
+		} else if (!strcasecmp(name, ED_TOK_PROB)) {
+		    if (do_points)
+			errx(ED_EFMT("duplicated token: %s"), name);
+		    delay_first = 0;
+		    do_points = 1;
+		} else if (do_points) {
+		    if (!is_valid_number(name) || !is_valid_number(arg))
+			errx(ED_EFMT("invalid point found"));
+		    if (delay_first) {
+			points[points_no].delay = atof(name);
+			points[points_no].prob = atof(arg);
+		    } else {
+			points[points_no].delay = atof(arg);
+			points[points_no].prob = atof(name);
+		    }
+		    if (points[points_no].prob > 1.0)
+			errx(ED_EFMT("probability greater than 1.0"));
+		    ++points_no;
+		} else {
+		    errx(ED_EFMT("unrecognised command '%s'"), name);
+		}
+	}
+
+	if (samples == -1) {
+	    warnx("'%s' not found, assuming 100", ED_TOK_SAMPLES);
+	    samples = 100;
+	}
+
+	if (loss == -1.0) {
+	    warnx("'%s' not found, assuming no loss", ED_TOK_LOSS);
+	    loss = 1;
+	}
+
+	/* make sure that there are enough points. */
+	if (points_no < ED_MIN_SAMPLES_NO)
+	    errx(ED_EFMT("too few samples, need at least %d"),
+		ED_MIN_SAMPLES_NO);
+
+	qsort(points, points_no, sizeof(struct point), compare_points);
+
+	/* interpolation */
+	for (i = 0; i<points_no-1; ++i) {
+	    double y1 = points[i].prob * samples;
+	    double x1 = points[i].delay;
+	    double y2 = points[i+1].prob * samples;
+	    double x2 = points[i+1].delay;
+
+	    int index = y1;
+	    int stop = y2;
+
+	    if (x1 == x2) {
+		for (; index<stop; ++index)
+		    p->samples[index] = x1;
+	    } else {
+		double m = (y2-y1)/(x2-x1);
+		double c = y1 - m*x1;
+		for (; index<stop ; ++index)
+		    p->samples[index] = (index - c)/m;
+	    }
+	}
+	p->samples_no = samples;
+	p->loss_level = loss * samples;
+	strncpy(p->name, profile_name, sizeof(p->name));
+}
+
 void
 ipfw_config_pipe(int ac, char **av)
 {
+	int samples[ED_MAX_SAMPLES_NO];
 	struct dn_pipe p;
 	int i;
 	char *end;
@@ -611,6 +903,15 @@ end_mask:
 			ac--; av++;
 			break;
 
+		case TOK_PIPE_PROFILE:
+			if (co.do_pipe != 1)
+			    errx(EX_DATAERR, "extra delay only valid for pipes");
+			NEED1("extra delay needs the file name\n");
+			p.samples = &samples[0];
+			load_extra_delays(av[0], &p);
+			--ac; ++av;
+			break;
+
 		default:
 			errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]);
 		}
@@ -620,6 +921,9 @@ end_mask:
 			errx(EX_DATAERR, "pipe_nr must be > 0");
 		if (p.delay > 10000)
 			errx(EX_DATAERR, "delay must be < 10000");
+		if (p.samples_no > 0 && p.bandwidth == 0)
+			errx(EX_DATAERR,
+				"profile requires a bandwidth limit");
 	} else { /* co.do_pipe == 2, queue */
 		if (p.fs.parent_nr == 0)
 			errx(EX_DATAERR, "pipe must be > 0");
@@ -713,7 +1017,18 @@ end_mask:
 			weight *= 1 - w_q;
 		p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED));
 	}
-	i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p);
+	if (p.samples_no <= 0) {
+		i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p);
+	} else {
+		struct dn_pipe_max pm;
+		int len = sizeof(pm);
+
+		memcpy(&pm.pipe, &p, sizeof(pm.pipe));
+		memcpy(&pm.samples, samples, sizeof(pm.samples));
+
+		i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len);
+	}
+
 	if (i)
 		err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE");
 }
diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8
index 44eb05c..4a6d203 100644
--- a/sbin/ipfw/ipfw.8
+++ b/sbin/ipfw/ipfw.8
@@ -1,7 +1,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd April 5, 2009
+.Dd April 9, 2009
 .Dt IPFW 8
 .Os
 .Sh NAME
@@ -1942,6 +1942,80 @@ with
 to reduce
 the granularity to 1ms or less).
 Default value is 0, meaning no delay.
+.Pp
+.It Cm profile Ar filename
+A file specifying the additional overhead incurred in the transmission
+of a packet on the link.
+.Pp
+Some link types introduce extra delays in the transmission
+of a packet, e.g. because of MAC level framing, contention on
+the use of the channel, MAC level retransmissions and so on.
+From our point of view, the channel is effectively unavailable
+for this extra time, which is constant or variable depending
+on the link type. Additionally, packets may be dropped after this
+time (e.g. on a wireless link after too many retransmissions).
+We can model the additional delay with an empirical curve
+that represents its distribution.
+.Bd -literal -offset indent
+      cumulative probability
+      1.0 ^
+          |
+      L   +-- loss-level          x
+          |                 ******
+          |                *
+          |           *****
+          |          *
+          |        **
+          |       *
+          +-------*------------------->
+                      delay
+.Ed
+The empirical curve may have both vertical and horizontal lines.
+Vertical lines represent constant delay for a range of
+probabilities.
+Horizontal lines correspond to a discontinuty in the delay
+distribution: the pipe will use the largest delay for a
+given probability.
+.Pp
+The file format is the following, with whitespace acting as
+a separator and '#' indicating the beginning a comment:
+.Bl -tag -width indent
+.It Cm samples Ar N
+the number of samples used in the internal
+representation (2..1024; default 100);
+.It Cm loss-level Ar L
+The probability above which packets are lost.
+(0.0 <= L <= 1.0, default 1.0 i.e. no loss);
+.It Cm name Ar identifier
+Optional a name (listed by "ipfw pipe show")
+to identify the distribution;
+.It Cm "delay prob" | "prob delay"
+One of these two lines is mandatory and defines
+the format of the following lines with data points.
+.It Ar XXX Ar YYY
+2 or more lines representing points in the curve,
+with either delay or probability first, according
+to the chosen format.
+The unit for delay is milliseconds.
+Data points do not need to be ordered or equal to the number
+specified in the "samples" line. ipfw will sort and interpolate
+the curve as needed.
+.El
+.Pp
+Example of a profile file:
+.Bd -literal -offset indent
+name    bla_bla_bla
+samples 100
+loss-level    0.86
+prob    delay
+0       200	# minimum overhead is 200ms
+0.5     200
+0.5     300
+0.8     1000
+0.9     1300
+1       1300
+#configuration file end
+.Ed
 .El
 .Pp
 The following parameters can be configured for a queue:
@@ -2917,10 +2991,17 @@ API based upon code written by
 .An Daniel Boulet
 for BSDI.
 .Pp
+Dummynet has been introduced by Luigi Rizzo in 1997-1998.
+.Pp
 Some early work (1999-2000) on the
 .Nm dummynet
 traffic shaper supported by Akamba Corp.
 .Pp
+The ipfw core (ipfw2) has been completely redesigned and
+reimplemented by Luigi Rizzo in summer 2002. Further
+actions and
+options have been added by various developer over the years.
+.Pp
 .An -nosplit
 In-kernel NAT support written by
 .An Paolo Pisati Aq piso@FreeBSD.org
@@ -2933,6 +3014,10 @@ support has been developed by
 The primary developers and maintainers are David Hayes and Jason But.
 For further information visit:
 .Aq http://www.caia.swin.edu.au/urp/SONATA
+.Pp
+Delay profiles have been developed by Alessandro Cerri and
+Luigi Rizzo, supported by the
+European Commission within Projects Onelab and Onelab2.
 .Sh BUGS
 The syntax has grown over the years and sometimes it might be confusing.
 Unfortunately, backward compatibility prevents cleaning up mistakes
diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h
index 508bb6c..5b49f55 100644
--- a/sbin/ipfw/ipfw2.h
+++ b/sbin/ipfw/ipfw2.h
@@ -153,6 +153,7 @@ enum tokens {
 	TOK_MASK,
 	TOK_BW,
 	TOK_DELAY,
+	TOK_PIPE_PROFILE,
 	TOK_RED,
 	TOK_GRED,
 	TOK_DROPTAIL,
diff --git a/sys/netinet/ip_dummynet.c b/sys/netinet/ip_dummynet.c
index 4c7f60a..5816be9 100644
--- a/sys/netinet/ip_dummynet.c
+++ b/sys/netinet/ip_dummynet.c
@@ -519,14 +519,64 @@ transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail)
 	}
 }
 
+#define div64(a, b)	((int64_t)(a) / (int64_t)(b))
+#define DN_TO_DROP	0xffff
 /*
- * the following macro computes how many ticks we have to wait
- * before being able to transmit a packet. The credit is taken from
- * either a pipe (WF2Q) or a flow_queue (per-flow queueing)
+ * Compute how many ticks we have to wait before being able to send
+ * a packet. This is computed as the "wire time" for the packet
+ * (length + extra bits), minus the credit available, scaled to ticks.
+ * Check that the result is not be negative (it could be if we have
+ * too much leftover credit in q->numbytes).
  */
-#define SET_TICKS(_m, q, p)	\
-    ((_m)->m_pkthdr.len * 8 * hz - (q)->numbytes + p->bandwidth - 1) / \
-    p->bandwidth;
+static inline dn_key
+set_ticks(struct mbuf *m, struct dn_flow_queue *q, struct dn_pipe *p)
+{
+	int64_t ret;
+
+	ret = div64( (m->m_pkthdr.len * 8 + q->extra_bits) * hz
+		- q->numbytes + p->bandwidth - 1 , p->bandwidth);
+#if 0
+	printf("%s %d extra_bits %d numb %d ret %d\n",
+		__FUNCTION__, __LINE__,
+		(int)(q->extra_bits & 0xffffffff),
+		(int)(q->numbytes & 0xffffffff),
+		(int)(ret & 0xffffffff));
+#endif
+	if (ret < 0)
+		ret = 0;
+	return ret;
+}
+
+/*
+ * Convert the additional MAC overheads/delays into an equivalent
+ * number of bits for the given data rate. The samples are in milliseconds
+ * so we need to divide by 1000.
+ */
+static dn_key
+compute_extra_bits(struct mbuf *pkt, struct dn_pipe *p)
+{
+	int index;
+	dn_key extra_bits;
+
+	if (!p->samples || p->samples_no == 0)
+		return 0;
+	index  = random() % p->samples_no;
+	extra_bits = ((dn_key)p->samples[index] * p->bandwidth) / 1000;
+	if (index >= p->loss_level) {
+		struct dn_pkt_tag *dt = dn_tag_get(pkt);
+		if (dt)
+			dt->dn_dir = DN_TO_DROP;
+	}
+	return extra_bits;
+}
+
+static void
+free_pipe(struct dn_pipe *p)
+{
+	if (p->samples)
+		free(p->samples, M_DUMMYNET);
+	free(p, M_DUMMYNET);
+}
 
 /*
  * extract pkt from queue, compute output time (could be now)
@@ -585,12 +635,16 @@ ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail)
 	q->numbytes += (curr_time - q->sched_time) * p->bandwidth;
 	while ((pkt = q->head) != NULL) {
 		int len = pkt->m_pkthdr.len;
-		int len_scaled = p->bandwidth ? len * 8 * hz : 0;
+		dn_key len_scaled = p->bandwidth ? len*8*hz
+			+ q->extra_bits*hz
+			: 0;
 
-		if (len_scaled > q->numbytes)
+		if (DN_KEY_GT(len_scaled, q->numbytes))
 			break;
 		q->numbytes -= len_scaled;
 		move_pkt(pkt, q, p, len);
+		if (q->head)
+			q->extra_bits = compute_extra_bits(q->head, p);
 	}
 	/*
 	 * If we have more packets queued, schedule next ready event
@@ -600,7 +654,7 @@ ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail)
 	 * ticks to go for the finish time of the packet.
 	 */
 	if ((pkt = q->head) != NULL) {	/* this implies bandwidth != 0 */
-		dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */
+		dn_key t = set_ticks(pkt, q, p); /* ticks i have to wait */
 
 		q->sched_time = curr_time;
 		heap_insert(&ready_heap, curr_time + t, (void *)q);
@@ -933,6 +987,12 @@ dummynet_send(struct mbuf *m)
 		case DN_TO_ETH_OUT:
 			ether_output_frame(pkt->ifp, m);
 			break;
+
+		case DN_TO_DROP:
+			/* drop the packet after some time */
+			m_freem(m);
+			break;
+
 		default:
 			printf("dummynet: bad switch %d!\n", pkt->dn_dir);
 			m_freem(m);
@@ -1367,8 +1427,10 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
 		/* Fixed-rate queue: just insert into the ready_heap. */
 		dn_key t = 0;
 
-		if (pipe->bandwidth && m->m_pkthdr.len * 8 * hz > q->numbytes)
-			t = SET_TICKS(m, q, pipe);
+		if (pipe->bandwidth) {
+			q->extra_bits = compute_extra_bits(m, pipe);
+			t = set_ticks(m, q, pipe);
+		}
 		q->sched_time = curr_time;
 		if (t == 0)		/* Must process it now. */
 			ready_event(q, &head, &tail);
@@ -1555,7 +1617,7 @@ dummynet_flush(void)
 		SLIST_FOREACH_SAFE(pipe, &pipehash[i], next, pipe1) {
 			SLIST_REMOVE(&pipehash[i], pipe, dn_pipe, next);
 			purge_pipe(pipe);
-			free(pipe, M_DUMMYNET);
+			free_pipe(pipe);
 		}
 	DUMMYNET_UNLOCK();
 }
@@ -1775,11 +1837,38 @@ config_pipe(struct dn_pipe *p)
 		pipe->delay = p->delay;
 		set_fs_parms(&(pipe->fs), pfs);
 
+		/* Handle changes in the delay profile. */
+		if (p->samples_no > 0) {
+			if (pipe->samples_no != p->samples_no) {
+				if (pipe->samples != NULL)
+					free(pipe->samples, M_DUMMYNET);
+				pipe->samples =
+				    malloc(p->samples_no*sizeof(dn_key),
+					M_DUMMYNET, M_NOWAIT | M_ZERO);
+				if (pipe->samples == NULL) {
+					DUMMYNET_UNLOCK();
+					printf("dummynet: no memory "
+						"for new samples\n");
+					return (ENOMEM);
+				}
+				pipe->samples_no = p->samples_no;
+			}
+
+			strncpy(pipe->name,p->name,sizeof(pipe->name));
+			pipe->loss_level = p->loss_level;
+			for (i = 0; i<pipe->samples_no; ++i)
+				pipe->samples[i] = p->samples[i];
+		} else if (pipe->samples != NULL) {
+			free(pipe->samples, M_DUMMYNET);
+			pipe->samples = NULL;
+			pipe->samples_no = 0;
+		}
+
 		if (pipe->fs.rq == NULL) {	/* a new pipe */
 			error = alloc_hash(&(pipe->fs), pfs);
 			if (error) {
 				DUMMYNET_UNLOCK();
-				free(pipe, M_DUMMYNET);
+				free_pipe(pipe);
 				return (error);
 			}
 			SLIST_INSERT_HEAD(&pipehash[HASH(pipe->pipe_nr)],
@@ -1957,7 +2046,7 @@ delete_pipe(struct dn_pipe *p)
 	pipe_remove_from_heap(&wfq_ready_heap, pipe);
 	DUMMYNET_UNLOCK();
 
-	free(pipe, M_DUMMYNET);
+	free_pipe(pipe);
     } else { /* this is a WF2Q queue (dn_flow_set) */
 	struct dn_flow_set *fs;
 
@@ -2095,6 +2184,7 @@ dummynet_get(struct sockopt *sopt)
 		pipe_bp->fs.next.sle_next = NULL;
 		pipe_bp->fs.pipe = NULL;
 		pipe_bp->fs.rq = NULL;
+		pipe_bp->samples = NULL;
 
 		bp += sizeof(*pipe) ;
 		bp = dn_copy_set(&(pipe->fs), bp);
@@ -2127,7 +2217,8 @@ static int
 ip_dn_ctl(struct sockopt *sopt)
 {
     int error = 0 ;
-    struct dn_pipe *p, tmp_pipe;
+    struct dn_pipe *p;
+    struct dn_pipe_max tmp_pipe;	/* pipe + large buffer */
 
     error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET);
     if (error)
@@ -2159,15 +2250,18 @@ ip_dn_ctl(struct sockopt *sopt)
 	break ;
 
     case IP_DUMMYNET_CONFIGURE :
-	p = &tmp_pipe ;
-	error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
+	p = (struct dn_pipe *)&tmp_pipe ;
+	error = sooptcopyin(sopt, p, sizeof(tmp_pipe), sizeof *p);
 	if (error)
 	    break ;
+	if (p->samples_no > 0)
+	    p->samples = &tmp_pipe.samples[0];
+
 	error = config_pipe(p);
 	break ;
 
     case IP_DUMMYNET_DEL :	/* remove a pipe or queue */
-	p = &tmp_pipe ;
+	p = (struct dn_pipe *)&tmp_pipe ;
 	error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
 	if (error)
 	    break ;
diff --git a/sys/netinet/ip_dummynet.h b/sys/netinet/ip_dummynet.h
index 0a37e6c..6e102f4 100644
--- a/sys/netinet/ip_dummynet.h
+++ b/sys/netinet/ip_dummynet.h
@@ -204,7 +204,18 @@ struct dn_flow_queue {
     struct mbuf *head, *tail ;	/* queue of packets */
     u_int len ;
     u_int len_bytes ;
-    u_long numbytes ;		/* credit for transmission (dynamic queues) */
+
+    /*
+     * When we emulate MAC overheads, or channel unavailability due
+     * to other traffic on a shared medium, we augment the packet at
+     * the head of the queue with an 'extra_bits' field representsing
+     * the additional delay the packet will be subject to:
+     *		extra_bits = bw*unavailable_time.
+     * With large bandwidth and large delays, extra_bits (and also numbytes)
+     * can become very large, so better play safe and use 64 bit
+     */
+    dn_key numbytes ;		/* credit for transmission (dynamic queues) */
+    dn_key extra_bits;		/* extra bits simulating unavailable channel */
 
     u_int64_t tot_pkts ;	/* statistics counters	*/
     u_int64_t tot_bytes ;
@@ -252,6 +263,7 @@ struct dn_flow_set {
 #define DN_IS_GENTLE_RED	0x0004
 #define DN_QSIZE_IS_BYTES	0x0008	/* queue size is measured in bytes */
 #define DN_NOERROR		0x0010	/* do not report ENOBUFS on drops  */
+#define	DN_HAS_PROFILE		0x0020	/* the pipe has a delay profile. */
 #define DN_IS_PIPE		0x4000
 #define DN_IS_QUEUE		0x8000
 
@@ -324,7 +336,9 @@ struct dn_pipe {		/* a pipe */
 
     dn_key V ;			/* virtual time */
     int sum;			/* sum of weights of all active sessions */
-    int numbytes;		/* bits I can transmit (more or less). */
+
+    /* Same as in dn_flow_queue, numbytes can become large */
+    dn_key numbytes;		/* bits I can transmit (more or less). */
 
     dn_key sched_time ;		/* time pipe was scheduled in ready_heap */
 
@@ -337,7 +351,25 @@ struct dn_pipe {		/* a pipe */
     int ready ; /* set if ifp != NULL and we got a signal from it */
 
     struct dn_flow_set fs ; /* used with fixed-rate flows */
+
+    /* fields to simulate a delay profile */
+
+#define ED_MAX_NAME_LEN		32
+    char name[ED_MAX_NAME_LEN];
+    int loss_level;
+    int samples_no;
+    int *samples;
 };
+
+/* dn_pipe_max is used to pass pipe configuration from userland onto
+ * kernel space and back
+ */
+#define ED_MAX_SAMPLES_NO	1024
+struct dn_pipe_max {
+	struct dn_pipe pipe;
+	int samples[ED_MAX_SAMPLES_NO];
+};
+
 SLIST_HEAD(dn_pipe_head, dn_pipe);
 
 #ifdef _KERNEL