7 files changed, 143 insertions, 56 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 7bac249..59d3e71 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -1,6 +1,43 @@
 #
 # Traffic control configuration.
 # 
+
+menuconfig NET_SCHED
+	bool "QoS and/or fair queueing"
+	---help---
+	  When the kernel has several packets to send out over a network
+	  device, it has to decide which ones to send first, which ones to
+	  delay, and which ones to drop. This is the job of the packet
+	  scheduler, and several different algorithms for how to do this
+	  "fairly" have been proposed.
+
+	  If you say N here, you will get the standard packet scheduler, which
+	  is a FIFO (first come, first served). If you say Y here, you will be
+	  able to choose from among several alternative algorithms which can
+	  then be attached to different network devices. This is useful for
+	  example if some of your network devices are real time devices that
+	  need a certain minimum data flow rate, or if you need to limit the
+	  maximum data flow rate for traffic which matches specified criteria.
+	  This code is considered to be experimental.
+
+	  To administer these schedulers, you'll need the user-level utilities
+	  from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>.
+	  That package also contains some documentation; for more, check out
+	  <http://snafu.freedom.org/linux2.2/iproute-notes.html>.
+
+	  This Quality of Service (QoS) support will enable you to use
+	  Differentiated Services (diffserv) and Resource Reservation Protocol
+	  (RSVP) on your Linux router if you also say Y to "QoS support",
+	  "Packet classifier API" and to some classifiers below. Documentation
+	  and software is at <http://diffserv.sourceforge.net/>.
+
+	  If you say Y here and to "/proc file system" below, you will be able
+	  to read status information about packet schedulers from the file
+	  /proc/net/psched.
+
+	  The available schedulers are listed in the following questions; you
+	  can say Y to as many as you like. If unsure, say N now.
+
 choice
 	prompt "Packet scheduler clock source"
 	depends on NET_SCHED
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8f58cec..e48d0d4 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -4,7 +4,7 @@
 
 obj-y	:= sch_generic.o
 
-obj-$(CONFIG_NET_SCHED)		+= sch_api.o sch_fifo.o
+obj-$(CONFIG_NET_SCHED)		+= sch_api.o sch_fifo.o sch_blackhole.o
 obj-$(CONFIG_NET_CLS)		+= cls_api.o
 obj-$(CONFIG_NET_CLS_ACT)	+= act_api.o
 obj-$(CONFIG_NET_ACT_POLICE)	+= police.o
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 48bb23c..53d98f8 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -205,11 +205,6 @@ META_COLLECTOR(int_protocol)
 	dst->value = skb->protocol;
 }
 
-META_COLLECTOR(int_security)
-{
-	dst->value = skb->security;
-}
-
 META_COLLECTOR(int_pkttype)
 {
 	dst->value = skb->pkt_type;
@@ -524,7 +519,6 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
 		[META_ID(REALDEV)]		= META_FUNC(int_realdev),
 		[META_ID(PRIORITY)]		= META_FUNC(int_priority),
 		[META_ID(PROTOCOL)]		= META_FUNC(int_protocol),
-		[META_ID(SECURITY)]		= META_FUNC(int_security),
 		[META_ID(PKTTYPE)]		= META_FUNC(int_pkttype),
 		[META_ID(PKTLEN)]		= META_FUNC(int_pktlen),
 		[META_ID(DATALEN)]		= META_FUNC(int_datalen),
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 05e6e0a..b9a069a 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -399,10 +399,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
 {
 	int err;
 	struct rtattr *kind = tca[TCA_KIND-1];
-	void *p = NULL;
 	struct Qdisc *sch;
 	struct Qdisc_ops *ops;
-	int size;
 
 	ops = qdisc_lookup_ops(kind);
 #ifdef CONFIG_KMOD
@@ -437,64 +435,55 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
 	if (ops == NULL)
 		goto err_out;
 
-	/* ensure that the Qdisc and the private data are 32-byte aligned */
-	size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST);
-	size += ops->priv_size + QDISC_ALIGN_CONST;
-
-	p = kmalloc(size, GFP_KERNEL);
-	err = -ENOBUFS;
-	if (!p)
+	sch = qdisc_alloc(dev, ops);
+	if (IS_ERR(sch)) {
+		err = PTR_ERR(sch);
 		goto err_out2;
-	memset(p, 0, size);
-	sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST)
-	                       & ~QDISC_ALIGN_CONST);
-	sch->padded = (char *)sch - (char *)p;
-
-	INIT_LIST_HEAD(&sch->list);
-	skb_queue_head_init(&sch->q);
+	}
 
-	if (handle == TC_H_INGRESS)
+	if (handle == TC_H_INGRESS) {
 		sch->flags |= TCQ_F_INGRESS;
-
-	sch->ops = ops;
-	sch->enqueue = ops->enqueue;
-	sch->dequeue = ops->dequeue;
-	sch->dev = dev;
-	dev_hold(dev);
-	atomic_set(&sch->refcnt, 1);
-	sch->stats_lock = &dev->queue_lock;
-	if (handle == 0) {
+		handle = TC_H_MAKE(TC_H_INGRESS, 0);
+	} else if (handle == 0) {
 		handle = qdisc_alloc_handle(dev);
 		err = -ENOMEM;
 		if (handle == 0)
 			goto err_out3;
 	}
 
-	if (handle == TC_H_INGRESS)
-                sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
-        else
-                sch->handle = handle;
+	sch->handle = handle;
 
 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
+#ifdef CONFIG_NET_ESTIMATOR
+		if (tca[TCA_RATE-1]) {
+			err = gen_new_estimator(&sch->bstats, &sch->rate_est,
+						sch->stats_lock,
+						tca[TCA_RATE-1]);
+			if (err) {
+				/*
+				 * Any broken qdiscs that would require
+				 * a ops->reset() here? The qdisc was never
+				 * in action so it shouldn't be necessary.
+				 */
+				if (ops->destroy)
+					ops->destroy(sch);
+				goto err_out3;
+			}
+		}
+#endif
 		qdisc_lock_tree(dev);
 		list_add_tail(&sch->list, &dev->qdisc_list);
 		qdisc_unlock_tree(dev);
 
-#ifdef CONFIG_NET_ESTIMATOR
-		if (tca[TCA_RATE-1])
-			gen_new_estimator(&sch->bstats, &sch->rate_est,
-				sch->stats_lock, tca[TCA_RATE-1]);
-#endif
 		return sch;
 	}
 err_out3:
 	dev_put(dev);
+	kfree((char *) sch - sch->padded);
 err_out2:
 	module_put(ops->owner);
 err_out:
 	*errp = err;
-	if (p)
-		kfree(p);
 	return NULL;
 }
 
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
new file mode 100644
index 0000000..81f0b83
--- /dev/null
+++ b/net/sched/sch_blackhole.c
@@ -0,0 +1,54 @@
+/*
+ * net/sched/sch_blackhole.c	Black hole queue
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * Note: Quantum tunneling is not supported.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+
+static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	qdisc_drop(skb, sch);
+	return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *blackhole_dequeue(struct Qdisc *sch)
+{
+	return NULL;
+}
+
+static struct Qdisc_ops blackhole_qdisc_ops = {
+	.id		= "blackhole",
+	.priv_size	= 0,
+	.enqueue	= blackhole_enqueue,
+	.dequeue	= blackhole_dequeue,
+	.owner		= THIS_MODULE,
+};
+
+static int __init blackhole_module_init(void)
+{
+	return register_qdisc(&blackhole_qdisc_ops);
+}
+
+static void __exit blackhole_module_exit(void)
+{
+	unregister_qdisc(&blackhole_qdisc_ops);
+}
+
+module_init(blackhole_module_init)
+module_exit(blackhole_module_exit)
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7683b34..73e218e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -395,24 +395,23 @@ static struct Qdisc_ops pfifo_fast_ops = {
 	.owner		=	THIS_MODULE,
 };
 
-struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
+struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
 {
 	void *p;
 	struct Qdisc *sch;
-	int size;
+	unsigned int size;
+	int err = -ENOBUFS;
 
 	/* ensure that the Qdisc and the private data are 32-byte aligned */
-	size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST);
-	size += ops->priv_size + QDISC_ALIGN_CONST;
+	size = QDISC_ALIGN(sizeof(*sch));
+	size += ops->priv_size + (QDISC_ALIGNTO - 1);
 
 	p = kmalloc(size, GFP_KERNEL);
 	if (!p)
-		return NULL;
+		goto errout;
 	memset(p, 0, size);
-
-	sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST) 
-			       & ~QDISC_ALIGN_CONST);
-	sch->padded = (char *)sch - (char *)p;
+	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
+	sch->padded = (char *) sch - (char *) p;
 
 	INIT_LIST_HEAD(&sch->list);
 	skb_queue_head_init(&sch->q);
@@ -423,11 +422,24 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
 	dev_hold(dev);
 	sch->stats_lock = &dev->queue_lock;
 	atomic_set(&sch->refcnt, 1);
+
+	return sch;
+errout:
+	return ERR_PTR(-err);
+}
+
+struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
+{
+	struct Qdisc *sch;
+	
+	sch = qdisc_alloc(dev, ops);
+	if (IS_ERR(sch))
+		goto errout;
+
 	if (!ops->init || ops->init(sch, NULL) == 0)
 		return sch;
 
-	dev_put(dev);
-	kfree(p);
+errout:
 	return NULL;
 }
 
@@ -591,6 +603,7 @@ EXPORT_SYMBOL(__netdev_watchdog_up);
 EXPORT_SYMBOL(noop_qdisc);
 EXPORT_SYMBOL(noop_qdisc_ops);
 EXPORT_SYMBOL(qdisc_create_dflt);
+EXPORT_SYMBOL(qdisc_alloc);
 EXPORT_SYMBOL(qdisc_destroy);
 EXPORT_SYMBOL(qdisc_reset);
 EXPORT_SYMBOL(qdisc_restart);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 664d0e4..7845d04 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -385,7 +385,7 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt)
 	memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256);
 
 	q->qcount = -1;
-	if (skb_queue_len(&sch->q) == 0)
+	if (skb_queue_empty(&sch->q))
 		PSCHED_SET_PASTPERFECT(q->qidlestart);
 	sch_tree_unlock(sch);
 	return 0;