From 48a8f519e0fe22a5c98523286b2a120841a11dd5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 31 Oct 2008 00:44:18 -0700
Subject: pkt_sched: Add ->peek() methods for fifo, prio and SFQ qdiscs.

From: Patrick McHardy <kaber@trash.net>

Just as a demonstration how easy adding a peek operation to the
work-conserving qdiscs actually is. It doesn't need to keep or change
any internal state in many cases thanks to the guarantee that the
packet will either be dequeued or, if another packet arrives, the
upper qdisc will immediately ->peek again to reevaluate the state.

(This is only slightly modified Patrick's patch.)

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fifo.c |  2 ++
 net/sched/sch_prio.c | 14 ++++++++++++++
 net/sched/sch_sfq.c  | 12 ++++++++++++
 3 files changed, 28 insertions(+)

(limited to 'net/sched')

diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 23d258b..8825e88 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -83,6 +83,7 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct fifo_sched_data),
 	.enqueue	=	pfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
+	.peek		=	qdisc_peek_head,
 	.requeue	=	qdisc_requeue,
 	.drop		=	qdisc_queue_drop,
 	.init		=	fifo_init,
@@ -98,6 +99,7 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct fifo_sched_data),
 	.enqueue	=	bfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
+	.peek		=	qdisc_peek_head,
 	.requeue	=	qdisc_requeue,
 	.drop		=	qdisc_queue_drop,
 	.init		=	fifo_init,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 504a78c..3651da3 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -120,6 +120,19 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
 	return ret;
 }
 
+static struct sk_buff *prio_peek(struct Qdisc *sch)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	for (prio = 0; prio < q->bands; prio++) {
+		struct Qdisc *qdisc = q->queues[prio];
+		struct sk_buff *skb = qdisc->ops->peek(qdisc);
+		if (skb)
+			return skb;
+	}
+	return NULL;
+}
 
 static struct sk_buff *prio_dequeue(struct Qdisc* sch)
 {
@@ -421,6 +434,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct prio_sched_data),
 	.enqueue	=	prio_enqueue,
 	.dequeue	=	prio_dequeue,
+	.peek		=	prio_peek,
 	.requeue	=	prio_requeue,
 	.drop		=	prio_drop,
 	.init		=	prio_init,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index fe1508e..198b83d 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -391,8 +391,19 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_CN;
 }
 
+static struct sk_buff *
+sfq_peek(struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	sfq_index a;
 
+	/* No active slots */
+	if (q->tail == SFQ_DEPTH)
+		return NULL;
 
+	a = q->next[q->tail];
+	return skb_peek(&q->qs[a]);
+}
 
 static struct sk_buff *
 sfq_dequeue(struct Qdisc *sch)
@@ -624,6 +635,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct sfq_sched_data),
 	.enqueue	=	sfq_enqueue,
 	.dequeue	=	sfq_dequeue,
+	.peek		=	sfq_peek,
 	.requeue	=	sfq_requeue,
 	.drop		=	sfq_drop,
 	.init		=	sfq_init,
-- 
cgit v1.1


From 99c0db26797edb39cf83c8c5f8972067f5426b4e Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 31 Oct 2008 00:45:27 -0700
Subject: pkt_sched: sch_generic: Add generic qdisc->ops->peek()
 implementation.

With feedback from Patrick McHardy.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c     | 12 ++++++++++++
 net/sched/sch_generic.c | 16 ++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'net/sched')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index b16ad29..e564661 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -102,6 +102,10 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
    requeues once dequeued packet. It is used for non-standard or
    just buggy devices, which can defer output even if netif_queue_stopped()=0.
 
+   ---peek
+
+   like dequeue but without removing a packet from the queue
+
    ---reset
 
    returns qdisc to initial state: purge all buffers, clear all
@@ -149,6 +153,14 @@ int register_qdisc(struct Qdisc_ops *qops)
 		qops->enqueue = noop_qdisc_ops.enqueue;
 	if (qops->requeue == NULL)
 		qops->requeue = noop_qdisc_ops.requeue;
+	if (qops->peek == NULL) {
+		if (qops->dequeue == NULL) {
+			qops->peek = noop_qdisc_ops.peek;
+		} else {
+			rc = -EINVAL;
+			goto out;
+		}
+	}
 	if (qops->dequeue == NULL)
 		qops->dequeue = noop_qdisc_ops.dequeue;
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 93cd30c..318c9f6 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -320,6 +320,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 	.priv_size	=	0,
 	.enqueue	=	noop_enqueue,
 	.dequeue	=	noop_dequeue,
+	.peek		=	noop_dequeue,
 	.requeue	=	noop_requeue,
 	.owner		=	THIS_MODULE,
 };
@@ -346,6 +347,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
 	.priv_size	=	0,
 	.enqueue	=	noop_enqueue,
 	.dequeue	=	noop_dequeue,
+	.peek		=	noop_dequeue,
 	.requeue	=	noop_requeue,
 	.owner		=	THIS_MODULE,
 };
@@ -411,6 +413,19 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
 	return NULL;
 }
 
+static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
+{
+	int prio;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
+		if (!skb_queue_empty(list + prio))
+			return skb_peek(list + prio);
+	}
+
+	return NULL;
+}
+
 static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
 {
 	qdisc->q.qlen++;
@@ -457,6 +472,7 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
 	.enqueue	=	pfifo_fast_enqueue,
 	.dequeue	=	pfifo_fast_dequeue,
+	.peek		=	pfifo_fast_peek,
 	.requeue	=	pfifo_fast_requeue,
 	.init		=	pfifo_fast_init,
 	.reset		=	pfifo_fast_reset,
-- 
cgit v1.1


From 8e3af97899db433111287e07d5105189f56fe191 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 31 Oct 2008 00:45:55 -0700
Subject: pkt_sched: Add qdisc->ops->peek() implementation.

Add qdisc->ops->peek() implementation for work-conserving qdiscs.
With feedback from Patrick McHardy.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c       | 10 ++++++++++
 net/sched/sch_blackhole.c |  1 +
 net/sched/sch_dsmark.c    | 10 ++++++++++
 net/sched/sch_gred.c      |  1 +
 net/sched/sch_multiq.c    | 29 +++++++++++++++++++++++++++++
 net/sched/sch_netem.c     |  1 +
 net/sched/sch_red.c       |  9 +++++++++
 net/sched/sch_teql.c      |  8 ++++++++
 8 files changed, 69 insertions(+)

(limited to 'net/sched')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 43d3725..f9eac08 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -522,6 +522,15 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
 	return skb;
 }
 
+static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+
+	pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p);
+
+	return p->link.q->ops->peek(p->link.q);
+}
+
 static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
@@ -694,6 +703,7 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
 	.priv_size	= sizeof(struct atm_qdisc_data),
 	.enqueue	= atm_tc_enqueue,
 	.dequeue	= atm_tc_dequeue,
+	.peek		= atm_tc_peek,
 	.requeue	= atm_tc_requeue,
 	.drop		= atm_tc_drop,
 	.init		= atm_tc_init,
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index 507fb48..094a874 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -33,6 +33,7 @@ static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {
 	.priv_size	= 0,
 	.enqueue	= blackhole_enqueue,
 	.dequeue	= blackhole_dequeue,
+	.peek		= blackhole_dequeue,
 	.owner		= THIS_MODULE,
 };
 
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index ba43aab..3e49147 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -313,6 +313,15 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 	return skb;
 }
 
+static struct sk_buff *dsmark_peek(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+
+	pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p);
+
+	return p->q->ops->peek(p->q);
+}
+
 static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
@@ -496,6 +505,7 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct dsmark_qdisc_data),
 	.enqueue	=	dsmark_enqueue,
 	.dequeue	=	dsmark_dequeue,
+	.peek		=	dsmark_peek,
 	.requeue	=	dsmark_requeue,
 	.drop		=	dsmark_drop,
 	.init		=	dsmark_init,
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index c1ad6b8..cb20ee3 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -602,6 +602,7 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct gred_sched),
 	.enqueue	=	gred_enqueue,
 	.dequeue	=	gred_dequeue,
+	.peek		=	qdisc_peek_head,
 	.requeue	=	gred_requeue,
 	.drop		=	gred_drop,
 	.init		=	gred_init,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 915f314..155648d 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -155,6 +155,34 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 
 }
 
+static struct sk_buff *multiq_peek(struct Qdisc *sch)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned int curband = q->curband;
+	struct Qdisc *qdisc;
+	struct sk_buff *skb;
+	int band;
+
+	for (band = 0; band < q->bands; band++) {
+		/* cycle through bands to ensure fairness */
+		curband++;
+		if (curband >= q->bands)
+			curband = 0;
+
+		/* Check that target subqueue is available before
+		 * pulling an skb to avoid excessive requeues
+		 */
+		if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
+			qdisc = q->queues[curband];
+			skb = qdisc->ops->peek(qdisc);
+			if (skb)
+				return skb;
+		}
+	}
+	return NULL;
+
+}
+
 static unsigned int multiq_drop(struct Qdisc *sch)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
@@ -451,6 +479,7 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct multiq_sched_data),
 	.enqueue	=	multiq_enqueue,
 	.dequeue	=	multiq_dequeue,
+	.peek		=	multiq_peek,
 	.requeue	=	multiq_requeue,
 	.drop		=	multiq_drop,
 	.init		=	multiq_init,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a119599..2898d9d 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -541,6 +541,7 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct fifo_sched_data),
 	.enqueue	=	tfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
+	.peek		=	qdisc_peek_head,
 	.requeue	=	qdisc_requeue,
 	.drop		=	qdisc_queue_drop,
 	.init		=	tfifo_init,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 5da0583..7abc514 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -140,6 +140,14 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch)
 	return skb;
 }
 
+static struct sk_buff * red_peek(struct Qdisc* sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *child = q->qdisc;
+
+	return child->ops->peek(child);
+}
+
 static unsigned int red_drop(struct Qdisc* sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
@@ -361,6 +369,7 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {
 	.cl_ops		=	&red_class_ops,
 	.enqueue	=	red_enqueue,
 	.dequeue	=	red_dequeue,
+	.peek		=	red_peek,
 	.requeue	=	red_requeue,
 	.drop		=	red_drop,
 	.init		=	red_init,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index d35ef05..bf03e7f 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -123,6 +123,13 @@ teql_dequeue(struct Qdisc* sch)
 	return skb;
 }
 
+static struct sk_buff *
+teql_peek(struct Qdisc* sch)
+{
+	/* teql is meant to be used as root qdisc */
+	return NULL;
+}
+
 static __inline__ void
 teql_neigh_release(struct neighbour *n)
 {
@@ -433,6 +440,7 @@ static __init void teql_master_setup(struct net_device *dev)
 
 	ops->enqueue	=	teql_enqueue;
 	ops->dequeue	=	teql_dequeue;
+	ops->peek	=	teql_peek;
 	ops->requeue	=	teql_requeue;
 	ops->init	=	teql_qdisc_init;
 	ops->reset	=	teql_reset;
-- 
cgit v1.1


From 03c05f0d4bb0c267edf12d614025a40e33c5a6f9 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 31 Oct 2008 00:46:19 -0700
Subject: pkt_sched: Use qdisc->ops->peek() instead of ->dequeue() &
 ->requeue()

Use qdisc->ops->peek() instead of ->dequeue() & ->requeue() pair.
After this patch the only remaining user of qdisc->ops->requeue() is
netem_enqueue(). Based on ideas of Herbert Xu, Patrick McHardy and
David S. Miller.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c   | 11 +++++++----
 net/sched/sch_hfsc.c  | 12 ++----------
 net/sched/sch_netem.c | 13 +++++--------
 net/sched/sch_tbf.c   | 12 +++++-------
 4 files changed, 19 insertions(+), 29 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index f9eac08..2ee0c1a 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -480,11 +480,14 @@ static void sch_atm_dequeue(unsigned long data)
 		 * If traffic is properly shaped, this won't generate nasty
 		 * little bursts. Otherwise, it may ... (but that's okay)
 		 */
-		while ((skb = flow->q->dequeue(flow->q))) {
-			if (!atm_may_send(flow->vcc, skb->truesize)) {
-				(void)flow->q->ops->requeue(skb, flow->q);
+		while ((skb = flow->q->ops->peek(flow->q))) {
+			if (!atm_may_send(flow->vcc, skb->truesize))
 				break;
-			}
+
+			skb = flow->q->dequeue(flow->q);
+			if (unlikely(!skb))
+				break;
+
 			pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
 			/* remove any LL header somebody else has attached */
 			skb_pull(skb, skb_network_offset(skb));
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c1e77da..ddfc408 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -880,28 +880,20 @@ set_passive(struct hfsc_class *cl)
 	 */
 }
 
-/*
- * hack to get length of first packet in queue.
- */
 static unsigned int
 qdisc_peek_len(struct Qdisc *sch)
 {
 	struct sk_buff *skb;
 	unsigned int len;
 
-	skb = sch->dequeue(sch);
+	skb = sch->ops->peek(sch);
 	if (skb == NULL) {
 		if (net_ratelimit())
 			printk("qdisc_peek_len: non work-conserving qdisc ?\n");
 		return 0;
 	}
 	len = qdisc_pkt_len(skb);
-	if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
-		if (net_ratelimit())
-			printk("qdisc_peek_len: failed to requeue\n");
-		qdisc_tree_decrease_qlen(sch, 1);
-		return 0;
-	}
+
 	return len;
 }
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 2898d9d..74fbdb5 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -283,25 +283,22 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 	if (sch->flags & TCQ_F_THROTTLED)
 		return NULL;
 
-	skb = q->qdisc->dequeue(q->qdisc);
+	skb = q->qdisc->ops->peek(q->qdisc);
 	if (skb) {
 		const struct netem_skb_cb *cb = netem_skb_cb(skb);
 		psched_time_t now = psched_get_time();
 
 		/* if more time remaining? */
 		if (cb->time_to_send <= now) {
+			skb = q->qdisc->dequeue(q->qdisc);
+			if (!skb)
+				return NULL;
+
 			pr_debug("netem_dequeue: return skb=%p\n", skb);
 			sch->q.qlen--;
 			return skb;
 		}
 
-		if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) {
-			qdisc_tree_decrease_qlen(q->qdisc, 1);
-			sch->qstats.drops++;
-			printk(KERN_ERR "netem: %s could not requeue\n",
-			       q->qdisc->ops->id);
-		}
-
 		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
 	}
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 94c6159..61fdc77 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -169,7 +169,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 
-	skb = q->qdisc->dequeue(q->qdisc);
+	skb = q->qdisc->ops->peek(q->qdisc);
 
 	if (skb) {
 		psched_time_t now;
@@ -192,6 +192,10 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 		toks -= L2T(q, len);
 
 		if ((toks|ptoks) >= 0) {
+			skb = q->qdisc->dequeue(q->qdisc);
+			if (unlikely(!skb))
+				return NULL;
+
 			q->t_c = now;
 			q->tokens = toks;
 			q->ptokens = ptoks;
@@ -214,12 +218,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 		   (cf. CSZ, HPFQ, HFSC)
 		 */
 
-		if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
-			/* When requeue fails skb is dropped */
-			qdisc_tree_decrease_qlen(q->qdisc, 1);
-			sch->qstats.drops++;
-		}
-
 		sch->qstats.overlimits++;
 	}
 	return NULL;
-- 
cgit v1.1


From 77be155cba4e163e8bba9fd27222a8b6189ec4f7 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 31 Oct 2008 00:47:01 -0700
Subject: pkt_sched: Add peek emulation for non-work-conserving qdiscs.

This patch adds qdisc_peek_dequeued() wrapper to emulate peek method
with qdisc->dequeue() and storing "peeked" skb in qdisc->gso_skb until
dequeuing. This is mainly for compatibility reasons not to break some
strange configs because peeking is expected for non-work-conserving
parent qdiscs to query work-conserving child qdiscs.

This implementation requires using qdisc_dequeue_peeked() wrapper
instead of directly calling qdisc->dequeue() for all qdiscs ever
querried with qdisc->ops->peek() or qdisc_peek_dequeued().

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c   | 4 ++--
 net/sched/sch_cbq.c   | 1 +
 net/sched/sch_hfsc.c  | 3 ++-
 net/sched/sch_htb.c   | 1 +
 net/sched/sch_netem.c | 5 +++--
 net/sched/sch_tbf.c   | 3 ++-
 6 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 2ee0c1a..6eb9a65 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -484,7 +484,7 @@ static void sch_atm_dequeue(unsigned long data)
 			if (!atm_may_send(flow->vcc, skb->truesize))
 				break;
 
-			skb = flow->q->dequeue(flow->q);
+			skb = qdisc_dequeue_peeked(flow->q);
 			if (unlikely(!skb))
 				break;
 
@@ -519,7 +519,7 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
 
 	pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
 	tasklet_schedule(&p->task);
-	skb = p->link.q->dequeue(p->link.q);
+	skb = qdisc_dequeue_peeked(p->link.q);
 	if (skb)
 		sch->q.qlen--;
 	return skb;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 03e389e..63efa70 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -2066,6 +2066,7 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct cbq_sched_data),
 	.enqueue	=	cbq_enqueue,
 	.dequeue	=	cbq_dequeue,
+	.peek		=	qdisc_peek_dequeued,
 	.requeue	=	cbq_requeue,
 	.drop		=	cbq_drop,
 	.init		=	cbq_init,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index ddfc408..d90b165 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1634,7 +1634,7 @@ hfsc_dequeue(struct Qdisc *sch)
 		}
 	}
 
-	skb = cl->qdisc->dequeue(cl->qdisc);
+	skb = qdisc_dequeue_peeked(cl->qdisc);
 	if (skb == NULL) {
 		if (net_ratelimit())
 			printk("HFSC: Non-work-conserving qdisc ?\n");
@@ -1727,6 +1727,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
 	.dump		= hfsc_dump_qdisc,
 	.enqueue	= hfsc_enqueue,
 	.dequeue	= hfsc_dequeue,
+	.peek		= qdisc_peek_dequeued,
 	.requeue	= hfsc_requeue,
 	.drop		= hfsc_drop,
 	.cl_ops		= &hfsc_class_ops,
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d14f020..3fda819 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1565,6 +1565,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct htb_sched),
 	.enqueue	=	htb_enqueue,
 	.dequeue	=	htb_dequeue,
+	.peek		=	qdisc_peek_dequeued,
 	.requeue	=	htb_requeue,
 	.drop		=	htb_drop,
 	.init		=	htb_init,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 74fbdb5..3080bd6 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -290,8 +290,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 
 		/* if more time remaining? */
 		if (cb->time_to_send <= now) {
-			skb = q->qdisc->dequeue(q->qdisc);
-			if (!skb)
+			skb = qdisc_dequeue_peeked(q->qdisc);
+			if (unlikely(!skb))
 				return NULL;
 
 			pr_debug("netem_dequeue: return skb=%p\n", skb);
@@ -714,6 +714,7 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct netem_sched_data),
 	.enqueue	=	netem_enqueue,
 	.dequeue	=	netem_dequeue,
+	.peek		=	qdisc_peek_dequeued,
 	.requeue	=	netem_requeue,
 	.drop		=	netem_drop,
 	.init		=	netem_init,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 61fdc77..435076c 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -192,7 +192,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
 		toks -= L2T(q, len);
 
 		if ((toks|ptoks) >= 0) {
-			skb = q->qdisc->dequeue(q->qdisc);
+			skb = qdisc_dequeue_peeked(q->qdisc);
 			if (unlikely(!skb))
 				return NULL;
 
@@ -467,6 +467,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
 	.priv_size	=	sizeof(struct tbf_sched_data),
 	.enqueue	=	tbf_enqueue,
 	.dequeue	=	tbf_dequeue,
+	.peek		=	qdisc_peek_dequeued,
 	.requeue	=	tbf_requeue,
 	.drop		=	tbf_drop,
 	.init		=	tbf_init,
-- 
cgit v1.1


From 02201464119334690fe209849843881b8e9cfa9f Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sun, 2 Nov 2008 00:35:24 -0700
Subject: sch_netem: Remove classful functionality

Patrick McHardy noticed that: "a lot of the functionality of netem
requires the inner tfifo anyways and rate-limiting is usually done
on top of netem. So I would suggest so either hard-wire the tfifo
qdisc or at least make the assumption that inner qdiscs are
work-conserving.", and later: "- a lot of other qdiscs still don't
work as inner qdiscs of netem [...]".

So, according to his suggestion, this patch removes classful options
of netem. The main reason of this change is to remove ops->requeue()
method, which is currently used only by netem.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 87 ---------------------------------------------------
 1 file changed, 87 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 3080bd6..2ad0959 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -622,95 +622,8 @@ nla_put_failure:
 	return -1;
 }
 
-static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
-			  struct sk_buff *skb, struct tcmsg *tcm)
-{
-	struct netem_sched_data *q = qdisc_priv(sch);
-
-	if (cl != 1) 	/* only one class */
-		return -ENOENT;
-
-	tcm->tcm_handle |= TC_H_MIN(1);
-	tcm->tcm_info = q->qdisc->handle;
-
-	return 0;
-}
-
-static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
-		     struct Qdisc **old)
-{
-	struct netem_sched_data *q = qdisc_priv(sch);
-
-	if (new == NULL)
-		new = &noop_qdisc;
-
-	sch_tree_lock(sch);
-	*old = xchg(&q->qdisc, new);
-	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-	qdisc_reset(*old);
-	sch_tree_unlock(sch);
-
-	return 0;
-}
-
-static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
-{
-	struct netem_sched_data *q = qdisc_priv(sch);
-	return q->qdisc;
-}
-
-static unsigned long netem_get(struct Qdisc *sch, u32 classid)
-{
-	return 1;
-}
-
-static void netem_put(struct Qdisc *sch, unsigned long arg)
-{
-}
-
-static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
-			    struct nlattr **tca, unsigned long *arg)
-{
-	return -ENOSYS;
-}
-
-static int netem_delete(struct Qdisc *sch, unsigned long arg)
-{
-	return -ENOSYS;
-}
-
-static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
-{
-	if (!walker->stop) {
-		if (walker->count >= walker->skip)
-			if (walker->fn(sch, 1, walker) < 0) {
-				walker->stop = 1;
-				return;
-			}
-		walker->count++;
-	}
-}
-
-static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
-{
-	return NULL;
-}
-
-static const struct Qdisc_class_ops netem_class_ops = {
-	.graft		=	netem_graft,
-	.leaf		=	netem_leaf,
-	.get		=	netem_get,
-	.put		=	netem_put,
-	.change		=	netem_change_class,
-	.delete		=	netem_delete,
-	.walk		=	netem_walk,
-	.tcf_chain	=	netem_find_tcf,
-	.dump		=	netem_dump_class,
-};
-
 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
 	.id		=	"netem",
-	.cl_ops		=	&netem_class_ops,
 	.priv_size	=	sizeof(struct netem_sched_data),
 	.enqueue	=	netem_enqueue,
 	.dequeue	=	netem_dequeue,
-- 
cgit v1.1


From 8ba25dad0ac78850cd46d91186a27d60f7314752 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sun, 2 Nov 2008 00:36:03 -0700
Subject: sch_netem: Replace ->requeue() method with open code

After removing netem classful functionality we are sure its inner
qdisc is tfifo, so we can replace qdisc->ops->requeue() method with
open code. After this patch there are no more ops->requeue() users.

The idea of this patch is by Patrick McHardy.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 2ad0959..1aa4345 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -233,7 +233,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		 */
 		cb->time_to_send = psched_get_time();
 		q->counter = 0;
-		ret = q->qdisc->ops->requeue(skb, q->qdisc);
+
+		__skb_queue_head(&q->qdisc->q, skb);
+		q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
+		q->qdisc->qstats.requeues++;
+		ret = NET_XMIT_SUCCESS;
 	}
 
 	if (likely(ret == NET_XMIT_SUCCESS)) {
-- 
cgit v1.1


From 67305ebc992abf2121fb2149fd8a707cd7cfcbd2 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 3 Nov 2008 02:52:50 -0800
Subject: pkt_sched: sch_generic: Kfree gso_skb in qdisc_reset()

Since gso_skb is re-used for qdisc_peek_dequeued(), and this skb is
counted in the qdisc->q.qlen, it has to be kfreed during qdisc_reset()
when qlen is zeroed.

With help from David S. Miller <davem@davemloft.net>

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net/sched')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 318c9f6..1192da2 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -542,6 +542,9 @@ void qdisc_reset(struct Qdisc *qdisc)
 
 	if (ops->reset)
 		ops->reset(qdisc);
+
+	kfree_skb(qdisc->gso_skb);
+	qdisc->gso_skb = NULL;
 }
 EXPORT_SYMBOL(qdisc_reset);
 
-- 
cgit v1.1


From 265eb67fb4e16be8e46a51e1e4e2ecd99fb15219 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 3 Nov 2008 21:13:26 -0800
Subject: netem: eliminate unneeded return values

All these individual parsing functions never return an error,
so they can be void.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1aa4345..f69698f 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -352,7 +352,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 	return 0;
 }
 
-static int get_correlation(struct Qdisc *sch, const struct nlattr *attr)
+static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	const struct tc_netem_corr *c = nla_data(attr);
@@ -360,27 +360,24 @@ static int get_correlation(struct Qdisc *sch, const struct nlattr *attr)
 	init_crandom(&q->delay_cor, c->delay_corr);
 	init_crandom(&q->loss_cor, c->loss_corr);
 	init_crandom(&q->dup_cor, c->dup_corr);
-	return 0;
 }
 
-static int get_reorder(struct Qdisc *sch, const struct nlattr *attr)
+static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	const struct tc_netem_reorder *r = nla_data(attr);
 
 	q->reorder = r->probability;
 	init_crandom(&q->reorder_cor, r->correlation);
-	return 0;
 }
 
-static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
+static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 	const struct tc_netem_corrupt *r = nla_data(attr);
 
 	q->corrupt = r->probability;
 	init_crandom(&q->corrupt_cor, r->correlation);
-	return 0;
 }
 
 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
@@ -439,11 +436,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (q->gap)
 		q->reorder = ~0;
 
-	if (tb[TCA_NETEM_CORR]) {
-		ret = get_correlation(sch, tb[TCA_NETEM_CORR]);
-		if (ret)
-			return ret;
-	}
+	if (tb[TCA_NETEM_CORR])
+		get_correlation(sch, tb[TCA_NETEM_CORR]);
 
 	if (tb[TCA_NETEM_DELAY_DIST]) {
 		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
@@ -451,17 +445,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 			return ret;
 	}
 
-	if (tb[TCA_NETEM_REORDER]) {
-		ret = get_reorder(sch, tb[TCA_NETEM_REORDER]);
-		if (ret)
-			return ret;
-	}
+	if (tb[TCA_NETEM_REORDER])
+		get_reorder(sch, tb[TCA_NETEM_REORDER]);
 
-	if (tb[TCA_NETEM_CORRUPT]) {
-		ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
-		if (ret)
-			return ret;
-	}
+	if (tb[TCA_NETEM_CORRUPT])
+		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
 
 	return 0;
 }
-- 
cgit v1.1


From f400923735ecbb67cbe4a3606c9479f694754f51 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 7 Nov 2008 22:56:00 -0800
Subject: pkt_sched: Control group classifier

The classifier should cover the most common use case and will work
without any special configuration.

The principle of the classifier is to directly access the
task_struct via get_current(). In order for this to work,
classification requests from softirqs must be ignored. This is
not a problem because the vast majority of packets in softirq
context are not assigned to a task anyway. For this to work, a
mechanism is needed to trace softirq context.

This repost goes back to the method of relying on the number of
nested bh disable calls for the sake of not adding too much
complexity and the option to come up with something more reliable
if actually needed.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig      |  11 ++
 net/sched/Makefile     |   1 +
 net/sched/cls_cgroup.c | 290 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 302 insertions(+)
 create mode 100644 net/sched/cls_cgroup.c

(limited to 'net/sched')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 6767e54..36543b6 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -316,6 +316,17 @@ config NET_CLS_FLOW
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_flow.
 
+config NET_CLS_CGROUP
+	bool "Control Group Classifier"
+	select NET_CLS
+	depends on CGROUPS
+	---help---
+	  Say Y here if you want to classify packets based on the control
+	  cgroup of their process.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_cgroup.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index e60c992..70b35f8 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
+obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
new file mode 100644
index 0000000..53ada2c
--- /dev/null
+++ b/net/sched/cls_cgroup.c
@@ -0,0 +1,290 @@
+/*
+ * net/sched/cls_cgroup.c	Control Group Classifier
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/cgroup.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+
+struct cgroup_cls_state
+{
+	struct cgroup_subsys_state css;
+	u32 classid;
+};
+
+static inline struct cgroup_cls_state *net_cls_state(struct cgroup *cgrp)
+{
+	return (struct cgroup_cls_state *)
+		cgroup_subsys_state(cgrp, net_cls_subsys_id);
+}
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+						 struct cgroup *cgrp)
+{
+	struct cgroup_cls_state *cs;
+
+	if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
+		return ERR_PTR(-ENOMEM);
+
+	if (cgrp->parent)
+		cs->classid = net_cls_state(cgrp->parent)->classid;
+
+	return &cs->css;
+}
+
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+	kfree(ss);
+}
+
+static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
+{
+	return net_cls_state(cgrp)->classid;
+}
+
+static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
+{
+	if (!cgroup_lock_live_group(cgrp))
+		return -ENODEV;
+
+	net_cls_state(cgrp)->classid = (u32) value;
+
+	cgroup_unlock();
+
+	return 0;
+}
+
+static struct cftype ss_files[] = {
+	{
+		.name = "classid",
+		.read_u64 = read_classid,
+		.write_u64 = write_classid,
+	},
+};
+
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+	return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
+}
+
+struct cgroup_subsys net_cls_subsys = {
+	.name		= "net_cls",
+	.create		= cgrp_create,
+	.destroy	= cgrp_destroy,
+	.populate	= cgrp_populate,
+	.subsys_id	= net_cls_subsys_id,
+};
+
+struct cls_cgroup_head
+{
+	u32			handle;
+	struct tcf_exts		exts;
+	struct tcf_ematch_tree	ematches;
+};
+
+static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			       struct tcf_result *res)
+{
+	struct cls_cgroup_head *head = tp->root;
+	struct cgroup_cls_state *cs;
+	int ret = 0;
+
+	/*
+	 * Due to the nature of the classifier it is required to ignore all
+	 * packets originating from softirq context as accessing `current'
+	 * would lead to false results.
+	 *
+	 * This test assumes that all callers of dev_queue_xmit() explicitely
+	 * disable bh. Knowing this, it is possible to detect softirq based
+	 * calls by looking at the number of nested bh disable calls because
+	 * softirqs always disables bh.
+	 */
+	if (softirq_count() != SOFTIRQ_OFFSET)
+		return -1;
+
+	rcu_read_lock();
+	cs = (struct cgroup_cls_state *) task_subsys_state(current,
+							   net_cls_subsys_id);
+	if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) {
+		res->classid = cs->classid;
+		res->class = 0;
+		ret = tcf_exts_exec(skb, &head->exts, res);
+	} else
+		ret = -1;
+
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
+{
+	return 0UL;
+}
+
+static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int cls_cgroup_init(struct tcf_proto *tp)
+{
+	return 0;
+}
+
+static const struct tcf_ext_map cgroup_ext_map = {
+	.action = TCA_CGROUP_ACT,
+	.police = TCA_CGROUP_POLICE,
+};
+
+static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
+	[TCA_CGROUP_EMATCHES]	= { .type = NLA_NESTED },
+};
+
+static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
+			     u32 handle, struct nlattr **tca,
+			     unsigned long *arg)
+{
+	struct nlattr *tb[TCA_CGROUP_MAX+1];
+	struct cls_cgroup_head *head = tp->root;
+	struct tcf_ematch_tree t;
+	struct tcf_exts e;
+	int err;
+
+	if (head == NULL) {
+		if (!handle)
+			return -EINVAL;
+
+		head = kzalloc(sizeof(*head), GFP_KERNEL);
+		if (head == NULL)
+			return -ENOBUFS;
+
+		head->handle = handle;
+
+		tcf_tree_lock(tp);
+		tp->root = head;
+		tcf_tree_unlock(tp);
+	}
+
+	if (handle != head->handle)
+		return -ENOENT;
+
+	err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
+			       cgroup_policy);
+	if (err < 0)
+		return err;
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
+	if (err < 0)
+		return err;
+
+	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
+	if (err < 0)
+		return err;
+
+	tcf_exts_change(tp, &head->exts, &e);
+	tcf_em_tree_change(tp, &head->ematches, &t);
+
+	return 0;
+}
+
+static void cls_cgroup_destroy(struct tcf_proto *tp)
+{
+	struct cls_cgroup_head *head;
+
+	head = (struct cls_cgroup_head *)xchg(&tp->root, NULL);
+
+	if (head) {
+		tcf_exts_destroy(tp, &head->exts);
+		tcf_em_tree_destroy(tp, &head->ematches);
+		kfree(head);
+	}
+}
+
+static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	return -EOPNOTSUPP;
+}
+
+static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct cls_cgroup_head *head = tp->root;
+
+	if (arg->count < arg->skip)
+		goto skip;
+
+	if (arg->fn(tp, (unsigned long) head, arg) < 0) {
+		arg->stop = 1;
+		return;
+	}
+skip:
+	arg->count++;
+}
+
+static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh,
+			   struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cls_cgroup_head *head = tp->root;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+
+	t->tcm_handle = head->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 ||
+	    tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0)
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
+	.kind		=	"cgroup",
+	.init		=	cls_cgroup_init,
+	.change		=	cls_cgroup_change,
+	.classify	=	cls_cgroup_classify,
+	.destroy	=	cls_cgroup_destroy,
+	.get		=	cls_cgroup_get,
+	.put		=	cls_cgroup_put,
+	.delete		=	cls_cgroup_delete,
+	.walk		=	cls_cgroup_walk,
+	.dump		=	cls_cgroup_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_cgroup_cls(void)
+{
+	return register_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+static void __exit exit_cgroup_cls(void)
+{
+	unregister_tcf_proto_ops(&cls_cgroup_ops);
+}
+
+module_init(init_cgroup_cls);
+module_exit(exit_cgroup_cls);
+MODULE_LICENSE("GPL");
-- 
cgit v1.1


From f30ab418a1d3c5a8b83493e7d70d6876a74aa0ce Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Thu, 13 Nov 2008 22:56:30 -0800
Subject: pkt_sched: Remove qdisc->ops->requeue() etc.

After implementing qdisc->ops->peek() and changing sch_netem into
classless qdisc there are no more qdisc->ops->requeue() users. This
patch removes this method with its wrappers (qdisc_requeue()), and
also unused qdisc->requeue structure. There are a few minor fixes of
warnings (htb_enqueue()) and comments btw.

The idea to kill ->requeue() and a similar patch were first developed
by David S. Miller.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c     |  7 ------
 net/sched/sch_atm.c     | 20 +---------------
 net/sched/sch_cbq.c     | 35 ---------------------------
 net/sched/sch_dsmark.c  | 21 -----------------
 net/sched/sch_fifo.c    |  2 --
 net/sched/sch_generic.c | 23 ------------------
 net/sched/sch_gred.c    | 21 -----------------
 net/sched/sch_hfsc.c    | 19 ---------------
 net/sched/sch_htb.c     | 44 +---------------------------------
 net/sched/sch_multiq.c  | 39 ++----------------------------
 net/sched/sch_netem.c   | 16 -------------
 net/sched/sch_prio.c    | 28 ----------------------
 net/sched/sch_red.c     | 18 --------------
 net/sched/sch_sfq.c     | 63 -------------------------------------------------
 net/sched/sch_tbf.c     | 14 -----------
 net/sched/sch_teql.c    | 11 ---------
 16 files changed, 4 insertions(+), 377 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e564661..5bcef13 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -97,11 +97,6 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 
    Auxiliary routines:
 
-   ---requeue
-
-   requeues once dequeued packet. It is used for non-standard or
-   just buggy devices, which can defer output even if netif_queue_stopped()=0.
-
    ---peek
 
    like dequeue but without removing a packet from the queue
@@ -151,8 +146,6 @@ int register_qdisc(struct Qdisc_ops *qops)
 
 	if (qops->enqueue == NULL)
 		qops->enqueue = noop_qdisc_ops.enqueue;
-	if (qops->requeue == NULL)
-		qops->requeue = noop_qdisc_ops.requeue;
 	if (qops->peek == NULL) {
 		if (qops->dequeue == NULL) {
 			qops->peek = noop_qdisc_ops.peek;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 6eb9a65..ca90f6e 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -62,7 +62,7 @@ struct atm_qdisc_data {
 	struct atm_flow_data	link;		/* unclassified skbs go here */
 	struct atm_flow_data	*flows;		/* NB: "link" is also on this
 						   list */
-	struct tasklet_struct	task;		/* requeue tasklet */
+	struct tasklet_struct	task;		/* dequeue tasklet */
 };
 
 /* ------------------------- Class/flow operations ------------------------- */
@@ -534,23 +534,6 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
 	return p->link.q->ops->peek(p->link.q);
 }
 
-static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct atm_qdisc_data *p = qdisc_priv(sch);
-	int ret;
-
-	pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
-	ret = p->link.q->ops->requeue(skb, p->link.q);
-	if (!ret) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-	} else if (net_xmit_drop_count(ret)) {
-		sch->qstats.drops++;
-		p->link.qstats.drops++;
-	}
-	return ret;
-}
-
 static unsigned int atm_tc_drop(struct Qdisc *sch)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
@@ -707,7 +690,6 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
 	.enqueue	= atm_tc_enqueue,
 	.dequeue	= atm_tc_dequeue,
 	.peek		= atm_tc_peek,
-	.requeue	= atm_tc_requeue,
 	.drop		= atm_tc_drop,
 	.init		= atm_tc_init,
 	.reset		= atm_tc_reset,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 63efa70..a99e37e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -405,40 +405,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-static int
-cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct cbq_sched_data *q = qdisc_priv(sch);
-	struct cbq_class *cl;
-	int ret;
-
-	if ((cl = q->tx_class) == NULL) {
-		kfree_skb(skb);
-		sch->qstats.drops++;
-		return NET_XMIT_CN;
-	}
-	q->tx_class = NULL;
-
-	cbq_mark_toplevel(q, cl);
-
-#ifdef CONFIG_NET_CLS_ACT
-	q->rx_class = cl;
-	cl->q->__parent = sch;
-#endif
-	if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-		if (!cl->next_alive)
-			cbq_activate_class(cl);
-		return 0;
-	}
-	if (net_xmit_drop_count(ret)) {
-		sch->qstats.drops++;
-		cl->qstats.drops++;
-	}
-	return ret;
-}
-
 /* Overlimit actions */
 
 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
@@ -2067,7 +2033,6 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
 	.enqueue	=	cbq_enqueue,
 	.dequeue	=	cbq_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.requeue	=	cbq_requeue,
 	.drop		=	cbq_drop,
 	.init		=	cbq_init,
 	.reset		=	cbq_reset,
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 3e49147..3f9427a 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -322,26 +322,6 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch)
 	return p->q->ops->peek(p->q);
 }
 
-static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct dsmark_qdisc_data *p = qdisc_priv(sch);
-	int err;
-
-	pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
-
-	err = p->q->ops->requeue(skb, p->q);
-	if (err != NET_XMIT_SUCCESS) {
-		if (net_xmit_drop_count(err))
-			sch->qstats.drops++;
-		return err;
-	}
-
-	sch->q.qlen++;
-	sch->qstats.requeues++;
-
-	return NET_XMIT_SUCCESS;
-}
-
 static unsigned int dsmark_drop(struct Qdisc *sch)
 {
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
@@ -506,7 +486,6 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
 	.enqueue	=	dsmark_enqueue,
 	.dequeue	=	dsmark_dequeue,
 	.peek		=	dsmark_peek,
-	.requeue	=	dsmark_requeue,
 	.drop		=	dsmark_drop,
 	.init		=	dsmark_init,
 	.reset		=	dsmark_reset,
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 8825e88..92cfc9d 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -84,7 +84,6 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
 	.enqueue	=	pfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
-	.requeue	=	qdisc_requeue,
 	.drop		=	qdisc_queue_drop,
 	.init		=	fifo_init,
 	.reset		=	qdisc_reset_queue,
@@ -100,7 +99,6 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 	.enqueue	=	bfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
-	.requeue	=	qdisc_requeue,
 	.drop		=	qdisc_queue_drop,
 	.init		=	fifo_init,
 	.reset		=	qdisc_reset_queue,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 1192da2..80c8f3d 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -306,22 +306,12 @@ static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
 	return NULL;
 }
 
-static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
-{
-	if (net_ratelimit())
-		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
-		       skb->dev->name);
-	kfree_skb(skb);
-	return NET_XMIT_CN;
-}
-
 struct Qdisc_ops noop_qdisc_ops __read_mostly = {
 	.id		=	"noop",
 	.priv_size	=	0,
 	.enqueue	=	noop_enqueue,
 	.dequeue	=	noop_dequeue,
 	.peek		=	noop_dequeue,
-	.requeue	=	noop_requeue,
 	.owner		=	THIS_MODULE,
 };
 
@@ -336,7 +326,6 @@ struct Qdisc noop_qdisc = {
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noop_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
-	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.dev_queue	=	&noop_netdev_queue,
 };
@@ -348,7 +337,6 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
 	.enqueue	=	noop_enqueue,
 	.dequeue	=	noop_dequeue,
 	.peek		=	noop_dequeue,
-	.requeue	=	noop_requeue,
 	.owner		=	THIS_MODULE,
 };
 
@@ -364,7 +352,6 @@ static struct Qdisc noqueue_qdisc = {
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noqueue_qdisc_ops,
 	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
-	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
 	.dev_queue	=	&noqueue_netdev_queue,
 };
@@ -426,12 +413,6 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
 	return NULL;
 }
 
-static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
-{
-	qdisc->q.qlen++;
-	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
-}
-
 static void pfifo_fast_reset(struct Qdisc* qdisc)
 {
 	int prio;
@@ -473,7 +454,6 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.enqueue	=	pfifo_fast_enqueue,
 	.dequeue	=	pfifo_fast_dequeue,
 	.peek		=	pfifo_fast_peek,
-	.requeue	=	pfifo_fast_requeue,
 	.init		=	pfifo_fast_init,
 	.reset		=	pfifo_fast_reset,
 	.dump		=	pfifo_fast_dump,
@@ -499,7 +479,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	sch->padded = (char *) sch - (char *) p;
 
 	INIT_LIST_HEAD(&sch->list);
-	skb_queue_head_init(&sch->requeue);
 	skb_queue_head_init(&sch->q);
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
@@ -571,8 +550,6 @@ void qdisc_destroy(struct Qdisc *qdisc)
 	dev_put(qdisc_dev(qdisc));
 
 	kfree_skb(qdisc->gso_skb);
-	__skb_queue_purge(&qdisc->requeue);
-
 	kfree((char *) qdisc - qdisc->padded);
 }
 EXPORT_SYMBOL(qdisc_destroy);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index cb20ee3..40408d5 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -240,26 +240,6 @@ congestion_drop:
 	return NET_XMIT_CN;
 }
 
-static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct gred_sched *t = qdisc_priv(sch);
-	struct gred_sched_data *q;
-	u16 dp = tc_index_to_dp(skb);
-
-	if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
-		if (net_ratelimit())
-			printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x "
-			       "for requeue, screwing up backlog.\n",
-			       tc_index_to_dp(skb));
-	} else {
-		if (red_is_idling(&q->parms))
-			red_end_of_idle_period(&q->parms);
-		q->backlog += qdisc_pkt_len(skb);
-	}
-
-	return qdisc_requeue(skb, sch);
-}
-
 static struct sk_buff *gred_dequeue(struct Qdisc* sch)
 {
 	struct sk_buff *skb;
@@ -603,7 +583,6 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
 	.enqueue	=	gred_enqueue,
 	.dequeue	=	gred_dequeue,
 	.peek		=	qdisc_peek_head,
-	.requeue	=	gred_requeue,
 	.drop		=	gred_drop,
 	.init		=	gred_init,
 	.reset		=	gred_reset,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d90b165..071c474 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -184,7 +184,6 @@ struct hfsc_sched
 	struct rb_root eligible;		/* eligible tree */
 	struct list_head droplist;		/* active leaf class list (for
 						   dropping) */
-	struct sk_buff_head requeue;		/* requeued packet */
 	struct qdisc_watchdog watchdog;		/* watchdog timer */
 };
 
@@ -1432,7 +1431,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
 		return err;
 	q->eligible = RB_ROOT;
 	INIT_LIST_HEAD(&q->droplist);
-	skb_queue_head_init(&q->requeue);
 
 	q->root.cl_common.classid = sch->handle;
 	q->root.refcnt  = 1;
@@ -1517,7 +1515,6 @@ hfsc_reset_qdisc(struct Qdisc *sch)
 		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
 			hfsc_reset_class(cl);
 	}
-	__skb_queue_purge(&q->requeue);
 	q->eligible = RB_ROOT;
 	INIT_LIST_HEAD(&q->droplist);
 	qdisc_watchdog_cancel(&q->watchdog);
@@ -1542,7 +1539,6 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
 			hfsc_destroy_class(sch, cl);
 	}
 	qdisc_class_hash_destroy(&q->clhash);
-	__skb_queue_purge(&q->requeue);
 	qdisc_watchdog_cancel(&q->watchdog);
 }
 
@@ -1609,8 +1605,6 @@ hfsc_dequeue(struct Qdisc *sch)
 
 	if (sch->q.qlen == 0)
 		return NULL;
-	if ((skb = __skb_dequeue(&q->requeue)))
-		goto out;
 
 	cur_time = psched_get_time();
 
@@ -1659,24 +1653,12 @@ hfsc_dequeue(struct Qdisc *sch)
 		set_passive(cl);
 	}
 
- out:
 	sch->flags &= ~TCQ_F_THROTTLED;
 	sch->q.qlen--;
 
 	return skb;
 }
 
-static int
-hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct hfsc_sched *q = qdisc_priv(sch);
-
-	__skb_queue_head(&q->requeue, skb);
-	sch->q.qlen++;
-	sch->qstats.requeues++;
-	return NET_XMIT_SUCCESS;
-}
-
 static unsigned int
 hfsc_drop(struct Qdisc *sch)
 {
@@ -1728,7 +1710,6 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
 	.enqueue	= hfsc_enqueue,
 	.dequeue	= hfsc_dequeue,
 	.peek		= qdisc_peek_dequeued,
-	.requeue	= hfsc_requeue,
 	.drop		= hfsc_drop,
 	.cl_ops		= &hfsc_class_ops,
 	.priv_size	= sizeof(struct hfsc_sched),
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 3fda819..83f5e69 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -551,7 +551,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 
 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-	int ret;
+	int uninitialized_var(ret);
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = htb_classify(skb, sch, &ret);
 
@@ -591,47 +591,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_SUCCESS;
 }
 
-/* TODO: requeuing packet charges it to policers again !! */
-static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	int ret;
-	struct htb_sched *q = qdisc_priv(sch);
-	struct htb_class *cl = htb_classify(skb, sch, &ret);
-	struct sk_buff *tskb;
-
-	if (cl == HTB_DIRECT) {
-		/* enqueue to helper queue */
-		if (q->direct_queue.qlen < q->direct_qlen) {
-			__skb_queue_head(&q->direct_queue, skb);
-		} else {
-			__skb_queue_head(&q->direct_queue, skb);
-			tskb = __skb_dequeue_tail(&q->direct_queue);
-			kfree_skb(tskb);
-			sch->qstats.drops++;
-			return NET_XMIT_CN;
-		}
-#ifdef CONFIG_NET_CLS_ACT
-	} else if (!cl) {
-		if (ret & __NET_XMIT_BYPASS)
-			sch->qstats.drops++;
-		kfree_skb(skb);
-		return ret;
-#endif
-	} else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) !=
-		   NET_XMIT_SUCCESS) {
-		if (net_xmit_drop_count(ret)) {
-			sch->qstats.drops++;
-			cl->qstats.drops++;
-		}
-		return ret;
-	} else
-		htb_activate(q, cl);
-
-	sch->q.qlen++;
-	sch->qstats.requeues++;
-	return NET_XMIT_SUCCESS;
-}
-
 /**
  * htb_charge_class - charges amount "bytes" to leaf and ancestors
  *
@@ -1566,7 +1525,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
 	.enqueue	=	htb_enqueue,
 	.dequeue	=	htb_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.requeue	=	htb_requeue,
 	.drop		=	htb_drop,
 	.init		=	htb_init,
 	.reset		=	htb_reset,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 155648d..f645ac5 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -92,40 +92,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-
-static int
-multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct Qdisc *qdisc;
-	struct multiq_sched_data *q = qdisc_priv(sch);
-	int ret;
-
-	qdisc = multiq_classify(skb, sch, &ret);
-#ifdef CONFIG_NET_CLS_ACT
-	if (qdisc == NULL) {
-		if (ret & __NET_XMIT_BYPASS)
-			sch->qstats.drops++;
-		kfree_skb(skb);
-		return ret;
-	}
-#endif
-
-	ret = qdisc->ops->requeue(skb, qdisc);
-	if (ret == NET_XMIT_SUCCESS) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-		if (q->curband)
-			q->curband--;
-		else
-			q->curband = q->bands - 1;
-		return NET_XMIT_SUCCESS;
-	}
-	if (net_xmit_drop_count(ret))
-		sch->qstats.drops++;
-	return ret;
-}
-
-
 static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 {
 	struct multiq_sched_data *q = qdisc_priv(sch);
@@ -140,7 +106,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 			q->curband = 0;
 
 		/* Check that target subqueue is available before
-		 * pulling an skb to avoid excessive requeues
+		 * pulling an skb to avoid head-of-line blocking.
 		 */
 		if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
 			qdisc = q->queues[q->curband];
@@ -170,7 +136,7 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch)
 			curband = 0;
 
 		/* Check that target subqueue is available before
-		 * pulling an skb to avoid excessive requeues
+		 * pulling an skb to avoid head-of-line blocking.
 		 */
 		if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
 			qdisc = q->queues[curband];
@@ -480,7 +446,6 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
 	.enqueue	=	multiq_enqueue,
 	.dequeue	=	multiq_dequeue,
 	.peek		=	multiq_peek,
-	.requeue	=	multiq_requeue,
 	.drop		=	multiq_drop,
 	.init		=	multiq_init,
 	.reset		=	multiq_reset,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index f69698f..3cbc3ff 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -252,20 +252,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-/* Requeue packets but don't change time stamp */
-static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct netem_sched_data *q = qdisc_priv(sch);
-	int ret;
-
-	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-	}
-
-	return ret;
-}
-
 static unsigned int netem_drop(struct Qdisc* sch)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -531,7 +517,6 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
 	.enqueue	=	tfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
-	.requeue	=	qdisc_requeue,
 	.drop		=	qdisc_queue_drop,
 	.init		=	tfifo_init,
 	.reset		=	qdisc_reset_queue,
@@ -620,7 +605,6 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
 	.enqueue	=	netem_enqueue,
 	.dequeue	=	netem_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.requeue	=	netem_requeue,
 	.drop		=	netem_drop,
 	.init		=	netem_init,
 	.reset		=	netem_reset,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 3651da3..ea65a87 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -93,33 +93,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return ret;
 }
 
-
-static int
-prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct Qdisc *qdisc;
-	int ret;
-
-	qdisc = prio_classify(skb, sch, &ret);
-#ifdef CONFIG_NET_CLS_ACT
-	if (qdisc == NULL) {
-		if (ret & __NET_XMIT_BYPASS)
-			sch->qstats.drops++;
-		kfree_skb(skb);
-		return ret;
-	}
-#endif
-
-	if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-		return NET_XMIT_SUCCESS;
-	}
-	if (net_xmit_drop_count(ret))
-		sch->qstats.drops++;
-	return ret;
-}
-
 static struct sk_buff *prio_peek(struct Qdisc *sch)
 {
 	struct prio_sched_data *q = qdisc_priv(sch);
@@ -435,7 +408,6 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
 	.enqueue	=	prio_enqueue,
 	.dequeue	=	prio_dequeue,
 	.peek		=	prio_peek,
-	.requeue	=	prio_requeue,
 	.drop		=	prio_drop,
 	.init		=	prio_init,
 	.reset		=	prio_reset,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 7abc514..6a0371c 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -108,23 +108,6 @@ congestion_drop:
 	return NET_XMIT_CN;
 }
 
-static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct red_sched_data *q = qdisc_priv(sch);
-	struct Qdisc *child = q->qdisc;
-	int ret;
-
-	if (red_is_idling(&q->parms))
-		red_end_of_idle_period(&q->parms);
-
-	ret = child->ops->requeue(skb, child);
-	if (likely(ret == NET_XMIT_SUCCESS)) {
-		sch->qstats.requeues++;
-		sch->q.qlen++;
-	}
-	return ret;
-}
-
 static struct sk_buff * red_dequeue(struct Qdisc* sch)
 {
 	struct sk_buff *skb;
@@ -370,7 +353,6 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {
 	.enqueue	=	red_enqueue,
 	.dequeue	=	red_dequeue,
 	.peek		=	red_peek,
-	.requeue	=	red_requeue,
 	.drop		=	red_drop,
 	.init		=	red_init,
 	.reset		=	red_reset,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 198b83d..ab8cfee 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -329,68 +329,6 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_CN;
 }
 
-static int
-sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
-{
-	struct sfq_sched_data *q = qdisc_priv(sch);
-	unsigned int hash;
-	sfq_index x;
-	int ret;
-
-	hash = sfq_classify(skb, sch, &ret);
-	if (hash == 0) {
-		if (ret & __NET_XMIT_BYPASS)
-			sch->qstats.drops++;
-		kfree_skb(skb);
-		return ret;
-	}
-	hash--;
-
-	x = q->ht[hash];
-	if (x == SFQ_DEPTH) {
-		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
-		q->hash[x] = hash;
-	}
-
-	sch->qstats.backlog += qdisc_pkt_len(skb);
-	__skb_queue_head(&q->qs[x], skb);
-	/* If selected queue has length q->limit+1, this means that
-	 * all another queues are empty and we do simple tail drop.
-	 * This packet is still requeued at head of queue, tail packet
-	 * is dropped.
-	 */
-	if (q->qs[x].qlen > q->limit) {
-		skb = q->qs[x].prev;
-		__skb_unlink(skb, &q->qs[x]);
-		sch->qstats.drops++;
-		sch->qstats.backlog -= qdisc_pkt_len(skb);
-		kfree_skb(skb);
-		return NET_XMIT_CN;
-	}
-
-	sfq_inc(q, x);
-	if (q->qs[x].qlen == 1) {		/* The flow is new */
-		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
-			q->tail = x;
-			q->next[x] = x;
-			q->allot[x] = q->quantum;
-		} else {
-			q->next[x] = q->next[q->tail];
-			q->next[q->tail] = x;
-			q->tail = x;
-		}
-	}
-
-	if (++sch->q.qlen <= q->limit) {
-		sch->qstats.requeues++;
-		return 0;
-	}
-
-	sch->qstats.drops++;
-	sfq_drop(sch);
-	return NET_XMIT_CN;
-}
-
 static struct sk_buff *
 sfq_peek(struct Qdisc *sch)
 {
@@ -636,7 +574,6 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
 	.enqueue	=	sfq_enqueue,
 	.dequeue	=	sfq_dequeue,
 	.peek		=	sfq_peek,
-	.requeue	=	sfq_requeue,
 	.drop		=	sfq_drop,
 	.init		=	sfq_init,
 	.reset		=	sfq_reset,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 435076c..bb7783d 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -139,19 +139,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	return 0;
 }
 
-static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct tbf_sched_data *q = qdisc_priv(sch);
-	int ret;
-
-	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
-		sch->q.qlen++;
-		sch->qstats.requeues++;
-	}
-
-	return ret;
-}
-
 static unsigned int tbf_drop(struct Qdisc* sch)
 {
 	struct tbf_sched_data *q = qdisc_priv(sch);
@@ -468,7 +455,6 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
 	.enqueue	=	tbf_enqueue,
 	.dequeue	=	tbf_dequeue,
 	.peek		=	qdisc_peek_dequeued,
-	.requeue	=	tbf_requeue,
 	.drop		=	tbf_drop,
 	.init		=	tbf_init,
 	.reset		=	tbf_reset,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index bf03e7f..cfc8e7c 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -93,16 +93,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	return NET_XMIT_DROP;
 }
 
-static int
-teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
-{
-	struct teql_sched_data *q = qdisc_priv(sch);
-
-	__skb_queue_head(&q->q, skb);
-	sch->qstats.requeues++;
-	return 0;
-}
-
 static struct sk_buff *
 teql_dequeue(struct Qdisc* sch)
 {
@@ -441,7 +431,6 @@ static __init void teql_master_setup(struct net_device *dev)
 	ops->enqueue	=	teql_enqueue;
 	ops->dequeue	=	teql_dequeue;
 	ops->peek	=	teql_peek;
-	ops->requeue	=	teql_requeue;
 	ops->init	=	teql_qdisc_init;
 	ops->reset	=	teql_reset;
 	ops->destroy	=	teql_destroy;
-- 
cgit v1.1


From 4d24b52ac5085ef8a264d044f1b302b7c029887a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 16 Nov 2008 23:01:49 -0800
Subject: ematch: simpler tcf_em_unregister()

Simply delete ops from list and let list debugging do the job.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/ematch.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index e82519e..aab5940 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -71,7 +71,7 @@
  *
  *      static void __exit exit_my_ematch(void)
  *      {
- *      	return tcf_em_unregister(&my_ops);
+ *      	tcf_em_unregister(&my_ops);
  *      }
  *
  *      module_init(init_my_ematch);
@@ -154,23 +154,11 @@ EXPORT_SYMBOL(tcf_em_register);
  *
  * Returns -ENOENT if no matching ematch was found.
  */
-int tcf_em_unregister(struct tcf_ematch_ops *ops)
+void tcf_em_unregister(struct tcf_ematch_ops *ops)
 {
-	int err = 0;
-	struct tcf_ematch_ops *e;
-
 	write_lock(&ematch_mod_lock);
-	list_for_each_entry(e, &ematch_ops, link) {
-		if (e == ops) {
-			list_del(&e->link);
-			goto out;
-		}
-	}
-
-	err = -ENOENT;
-out:
+	list_del(&ops->link);
 	write_unlock(&ematch_mod_lock);
-	return err;
 }
 EXPORT_SYMBOL(tcf_em_unregister);
 
-- 
cgit v1.1


From d314774cf2cd5dfeb39a00d37deee65d4c627927 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 19 Nov 2008 21:32:24 -0800
Subject: netdev: network device operations infrastructure

This patch changes the network device internal API to move adminstrative
operations out of the network device structure and into a separate structure.

This patch involves some hackery to maintain compatablity between the
new and old model, so all 300+ drivers don't have to be changed at once.
For drivers that aren't converted yet, the netdevice_ops virt function list
still resides in the net_device structure. For old protocols, the new
net_device_ops are copied out to the old net_device pointers.

After the transistion is completed the nag message can be changed to
an WARN_ON, and the compatiablity code can be made configurable.

Some function pointers aren't moved:
* destructor can't be in net_device_ops because
  it may need to be referenced after the module is unloaded.
* neighbor setup is manipulated in a couple of places that need special
  consideration
* hard_start_xmit is in the fast path for transmit.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 80c8f3d..95ab55c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -224,7 +224,7 @@ static void dev_watchdog(unsigned long arg)
 				char drivername[64];
 				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
 				       dev->name, netdev_drivername(dev, drivername, 64));
-				dev->tx_timeout(dev);
+				dev->netdev_ops->ndo_tx_timeout(dev);
 			}
 			if (!mod_timer(&dev->watchdog_timer,
 				       round_jiffies(jiffies +
@@ -239,7 +239,7 @@ static void dev_watchdog(unsigned long arg)
 
 void __netdev_watchdog_up(struct net_device *dev)
 {
-	if (dev->tx_timeout) {
+	if (dev->netdev_ops->ndo_tx_timeout) {
 		if (dev->watchdog_timeo <= 0)
 			dev->watchdog_timeo = 5*HZ;
 		if (!mod_timer(&dev->watchdog_timer,
-- 
cgit v1.1


From 13d2a1d2b032de08d7dcab6a1edcd47802681f96 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 20 Nov 2008 04:10:00 -0800
Subject: pkt_sched: add DRR scheduler

Add classful DRR scheduler as a more flexible replacement for SFQ.

The main difference to the algorithm described in "Efficient Fair Queueing
using Deficit Round Robin" is that this implementation doesn't drop packets
from the longest queue on overrun because its classful and limits are
handled by each individual child qdisc.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/Kconfig   |  11 ++
 net/sched/Makefile  |   1 +
 net/sched/sch_drr.c | 505 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 517 insertions(+)
 create mode 100644 net/sched/sch_drr.c

(limited to 'net/sched')

diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 36543b6..4f7ef0d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -194,6 +194,17 @@ config NET_SCH_NETEM
 
 	  If unsure, say N.
 
+config NET_SCH_DRR
+	tristate "Deficit Round Robin scheduler (DRR)"
+	help
+	  Say Y here if you want to use the Deficit Round Robin (DRR) packet
+	  scheduling algorithm.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sch_drr.
+
+	  If unsure, say N.
+
 config NET_SCH_INGRESS
 	tristate "Ingress Qdisc"
 	depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 70b35f8..54d950c 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o
 obj-$(CONFIG_NET_SCH_MULTIQ)	+= sch_multiq.o
 obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
+obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
 obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
new file mode 100644
index 0000000..e71a5de
--- /dev/null
+++ b/net/sched/sch_drr.c
@@ -0,0 +1,505 @@
+/*
+ * net/sched/sch_drr.c         Deficit Round Robin scheduler
+ *
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+
+struct drr_class {
+	struct Qdisc_class_common	common;
+	unsigned int			refcnt;
+	unsigned int			filter_cnt;
+
+	struct gnet_stats_basic		bstats;
+	struct gnet_stats_queue		qstats;
+	struct gnet_stats_rate_est	rate_est;
+	struct list_head		alist;
+	struct Qdisc			*qdisc;
+
+	u32				quantum;
+	u32				deficit;
+};
+
+struct drr_sched {
+	struct list_head		active;
+	struct tcf_proto		*filter_list;
+	struct Qdisc_class_hash		clhash;
+};
+
+static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct Qdisc_class_common *clc;
+
+	clc = qdisc_class_find(&q->clhash, classid);
+	if (clc == NULL)
+		return NULL;
+	return container_of(clc, struct drr_class, common);
+}
+
+static void drr_purge_queue(struct drr_class *cl)
+{
+	unsigned int len = cl->qdisc->q.qlen;
+
+	qdisc_reset(cl->qdisc);
+	qdisc_tree_decrease_qlen(cl->qdisc, len);
+}
+
+static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
+	[TCA_DRR_QUANTUM]	= { .type = NLA_U32 },
+};
+
+static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+			    struct nlattr **tca, unsigned long *arg)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl = (struct drr_class *)*arg;
+	struct nlattr *tb[TCA_DRR_MAX + 1];
+	u32 quantum;
+	int err;
+
+	err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_DRR_QUANTUM]) {
+		quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]);
+		if (quantum == 0)
+			return -EINVAL;
+	} else
+		quantum = psched_mtu(qdisc_dev(sch));
+
+	if (cl != NULL) {
+		sch_tree_lock(sch);
+		if (tb[TCA_DRR_QUANTUM])
+			cl->quantum = quantum;
+		sch_tree_unlock(sch);
+
+		if (tca[TCA_RATE])
+			gen_replace_estimator(&cl->bstats, &cl->rate_est,
+					      qdisc_root_sleeping_lock(sch),
+					      tca[TCA_RATE]);
+		return 0;
+	}
+
+	cl = kzalloc(sizeof(struct drr_class), GFP_KERNEL);
+	if (cl == NULL)
+		return -ENOBUFS;
+
+	cl->refcnt	   = 1;
+	cl->common.classid = classid;
+	cl->quantum	   = quantum;
+	cl->qdisc	   = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+					       &pfifo_qdisc_ops, classid);
+	if (cl->qdisc == NULL)
+		cl->qdisc = &noop_qdisc;
+
+	if (tca[TCA_RATE])
+		gen_replace_estimator(&cl->bstats, &cl->rate_est,
+				      qdisc_root_sleeping_lock(sch),
+				      tca[TCA_RATE]);
+
+	sch_tree_lock(sch);
+	qdisc_class_hash_insert(&q->clhash, &cl->common);
+	sch_tree_unlock(sch);
+
+	qdisc_class_hash_grow(sch, &q->clhash);
+
+	*arg = (unsigned long)cl;
+	return 0;
+}
+
+static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
+{
+	gen_kill_estimator(&cl->bstats, &cl->rate_est);
+	qdisc_destroy(cl->qdisc);
+	kfree(cl);
+}
+
+static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl = (struct drr_class *)arg;
+
+	if (cl->filter_cnt > 0)
+		return -EBUSY;
+
+	sch_tree_lock(sch);
+
+	drr_purge_queue(cl);
+	qdisc_class_hash_remove(&q->clhash, &cl->common);
+
+	if (--cl->refcnt == 0)
+		drr_destroy_class(sch, cl);
+
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static unsigned long drr_get_class(struct Qdisc *sch, u32 classid)
+{
+	struct drr_class *cl = drr_find_class(sch, classid);
+
+	if (cl != NULL)
+		cl->refcnt++;
+
+	return (unsigned long)cl;
+}
+
+static void drr_put_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+
+	if (--cl->refcnt == 0)
+		drr_destroy_class(sch, cl);
+}
+
+static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+
+	return &q->filter_list;
+}
+
+static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
+				  u32 classid)
+{
+	struct drr_class *cl = drr_find_class(sch, classid);
+
+	if (cl != NULL)
+		cl->filter_cnt++;
+
+	return (unsigned long)cl;
+}
+
+static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+
+	cl->filter_cnt--;
+}
+
+static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
+			   struct Qdisc *new, struct Qdisc **old)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+
+	if (new == NULL) {
+		new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+					&pfifo_qdisc_ops, cl->common.classid);
+		if (new == NULL)
+			new = &noop_qdisc;
+	}
+
+	sch_tree_lock(sch);
+	drr_purge_queue(cl);
+	*old = xchg(&cl->qdisc, new);
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static struct Qdisc *drr_class_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+
+	return cl->qdisc;
+}
+
+static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+
+	if (cl->qdisc->q.qlen == 0)
+		list_del(&cl->alist);
+}
+
+static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+	struct nlattr *nest;
+
+	tcm->tcm_parent	= TC_H_ROOT;
+	tcm->tcm_handle	= cl->common.classid;
+	tcm->tcm_info	= cl->qdisc->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	NLA_PUT_U32(skb, TCA_DRR_QUANTUM, cl->quantum);
+	return nla_nest_end(skb, nest);
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+				struct gnet_dump *d)
+{
+	struct drr_class *cl = (struct drr_class *)arg;
+	struct tc_drr_stats xstats;
+
+	memset(&xstats, 0, sizeof(xstats));
+	if (cl->qdisc->q.qlen)
+		xstats.deficit = cl->deficit;
+
+	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+	    gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
+		return -1;
+
+	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
+}
+
+static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	struct hlist_node *n;
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
+				      int *qerr)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	struct tcf_result res;
+	int result;
+
+	if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
+		cl = drr_find_class(sch, skb->priority);
+		if (cl != NULL)
+			return cl;
+	}
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	result = tc_classify(skb, q->filter_list, &res);
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_QUEUED:
+		case TC_ACT_STOLEN:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			return NULL;
+		}
+#endif
+		cl = (struct drr_class *)res.class;
+		if (cl == NULL)
+			cl = drr_find_class(sch, res.classid);
+		return cl;
+	}
+	return NULL;
+}
+
+static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	unsigned int len;
+	int err;
+
+	cl = drr_classify(skb, sch, &err);
+	if (cl == NULL) {
+		if (err & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return err;
+	}
+
+	len = qdisc_pkt_len(skb);
+	err = qdisc_enqueue(skb, cl->qdisc);
+	if (unlikely(err != NET_XMIT_SUCCESS)) {
+		if (net_xmit_drop_count(err)) {
+			cl->qstats.drops++;
+			sch->qstats.drops++;
+		}
+		return err;
+	}
+
+	if (cl->qdisc->q.qlen == 1) {
+		list_add_tail(&cl->alist, &q->active);
+		cl->deficit = cl->quantum;
+	}
+
+	cl->bstats.packets++;
+	cl->bstats.bytes += len;
+	sch->bstats.packets++;
+	sch->bstats.bytes += len;
+
+	sch->q.qlen++;
+	return err;
+}
+
+static struct sk_buff *drr_dequeue(struct Qdisc *sch)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	struct sk_buff *skb;
+	unsigned int len;
+
+	while (!list_empty(&q->active)) {
+		cl = list_first_entry(&q->active, struct drr_class, alist);
+		skb = cl->qdisc->ops->peek(cl->qdisc);
+		if (skb == NULL)
+			goto skip;
+
+		len = qdisc_pkt_len(skb);
+		if (len <= cl->deficit) {
+			cl->deficit -= len;
+			skb = qdisc_dequeue_peeked(cl->qdisc);
+			if (cl->qdisc->q.qlen == 0)
+				list_del(&cl->alist);
+			sch->q.qlen--;
+			return skb;
+		}
+
+		cl->deficit += cl->quantum;
+skip:
+		list_move_tail(&cl->alist, &q->active);
+	}
+	return NULL;
+}
+
+static unsigned int drr_drop(struct Qdisc *sch)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	unsigned int len;
+
+	list_for_each_entry(cl, &q->active, alist) {
+		if (cl->qdisc->ops->drop) {
+			len = cl->qdisc->ops->drop(cl->qdisc);
+			if (len > 0) {
+				if (cl->qdisc->q.qlen == 0)
+					list_del(&cl->alist);
+				return len;
+			}
+		}
+	}
+	return 0;
+}
+
+static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	int err;
+
+	err = qdisc_class_hash_init(&q->clhash);
+	if (err < 0)
+		return err;
+	INIT_LIST_HEAD(&q->active);
+	return 0;
+}
+
+static void drr_reset_qdisc(struct Qdisc *sch)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	struct hlist_node *n;
+	unsigned int i;
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+			if (cl->qdisc->q.qlen)
+				list_del(&cl->alist);
+			qdisc_reset(cl->qdisc);
+		}
+	}
+	sch->q.qlen = 0;
+}
+
+static void drr_destroy_qdisc(struct Qdisc *sch)
+{
+	struct drr_sched *q = qdisc_priv(sch);
+	struct drr_class *cl;
+	struct hlist_node *n, *next;
+	unsigned int i;
+
+	tcf_destroy_chain(&q->filter_list);
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+					  common.hnode)
+			drr_destroy_class(sch, cl);
+	}
+	qdisc_class_hash_destroy(&q->clhash);
+}
+
+static const struct Qdisc_class_ops drr_class_ops = {
+	.change		= drr_change_class,
+	.delete		= drr_delete_class,
+	.get		= drr_get_class,
+	.put		= drr_put_class,
+	.tcf_chain	= drr_tcf_chain,
+	.bind_tcf	= drr_bind_tcf,
+	.unbind_tcf	= drr_unbind_tcf,
+	.graft		= drr_graft_class,
+	.leaf		= drr_class_leaf,
+	.qlen_notify	= drr_qlen_notify,
+	.dump		= drr_dump_class,
+	.dump_stats	= drr_dump_class_stats,
+	.walk		= drr_walk,
+};
+
+static struct Qdisc_ops drr_qdisc_ops __read_mostly = {
+	.cl_ops		= &drr_class_ops,
+	.id		= "drr",
+	.priv_size	= sizeof(struct drr_sched),
+	.enqueue	= drr_enqueue,
+	.dequeue	= drr_dequeue,
+	.peek		= qdisc_peek_dequeued,
+	.drop		= drr_drop,
+	.init		= drr_init_qdisc,
+	.reset		= drr_reset_qdisc,
+	.destroy	= drr_destroy_qdisc,
+	.owner		= THIS_MODULE,
+};
+
+static int __init drr_init(void)
+{
+	return register_qdisc(&drr_qdisc_ops);
+}
+
+static void __exit drr_exit(void)
+{
+	unregister_qdisc(&drr_qdisc_ops);
+}
+
+module_init(drr_init);
+module_exit(drr_exit);
+MODULE_LICENSE("GPL");
-- 
cgit v1.1


From b94c8afcba3ae6584653b98e315446ea83be6ea5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 20 Nov 2008 04:11:36 -0800
Subject: pkt_sched: remove unnecessary xchg() in packet schedulers

The use of xchg() hasn't been necessary since 2.2.something when proper
locking was added to packet schedulers.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c    |  3 ++-
 net/sched/sch_cbq.c    |  7 ++++---
 net/sched/sch_drr.c    |  3 ++-
 net/sched/sch_dsmark.c |  3 ++-
 net/sched/sch_hfsc.c   |  3 ++-
 net/sched/sch_htb.c    |  4 +++-
 net/sched/sch_multiq.c | 16 +++++++++-------
 net/sched/sch_netem.c  |  5 ++---
 net/sched/sch_prio.c   | 16 +++++++++-------
 net/sched/sch_red.c    |  6 ++++--
 net/sched/sch_tbf.c    | 17 +++++++++++++----
 11 files changed, 52 insertions(+), 31 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index ca90f6e..2a8b83a 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -102,7 +102,8 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
 		return -EINVAL;
 	if (!new)
 		new = &noop_qdisc;
-	*old = xchg(&flow->q, new);
+	*old = flow->q;
+	flow->q = new;
 	if (*old)
 		qdisc_reset(*old);
 	return 0;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index a99e37e..3a9569a 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1635,7 +1635,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 #endif
 		}
 		sch_tree_lock(sch);
-		*old = xchg(&cl->q, new);
+		*old = cl->q;
+		cl->q = new;
 		qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
 		qdisc_reset(*old);
 		sch_tree_unlock(sch);
@@ -1776,8 +1777,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 			cbq_deactivate_class(cl);
 
 		if (rtab) {
-			rtab = xchg(&cl->R_tab, rtab);
-			qdisc_put_rtab(rtab);
+			qdisc_put_rtab(cl->R_tab);
+			cl->R_tab = rtab;
 		}
 
 		if (tb[TCA_CBQ_LSSOPT])
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e71a5de..8d523d9 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -208,7 +208,8 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
 
 	sch_tree_lock(sch);
 	drr_purge_queue(cl);
-	*old = xchg(&cl->qdisc, new);
+	*old = cl->qdisc;
+	cl->qdisc = new;
 	sch_tree_unlock(sch);
 	return 0;
 }
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 3f9427a..d303daa 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -68,7 +68,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
 	}
 
 	sch_tree_lock(sch);
-	*old = xchg(&p->q, new);
+	*old = p->q;
+	p->q = new;
 	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
 	qdisc_reset(*old);
 	sch_tree_unlock(sch);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 071c474..51dd3f4 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1202,7 +1202,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 
 	sch_tree_lock(sch);
 	hfsc_purge_queue(sch, cl);
-	*old = xchg(&cl->qdisc, new);
+	*old = cl->qdisc;
+	cl->qdisc = new;
 	sch_tree_unlock(sch);
 	return 0;
 }
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 83f5e69..3a119f5 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1100,7 +1100,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		    == NULL)
 			return -ENOBUFS;
 		sch_tree_lock(sch);
-		if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
+		*old = cl->un.leaf.q;
+		cl->un.leaf.q = new;
+		if (*old != NULL) {
 			qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
 			qdisc_reset(*old);
 		}
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index f645ac5..7e15186 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -214,7 +214,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
 	q->bands = qopt->bands;
 	for (i = q->bands; i < q->max_bands; i++) {
 		if (q->queues[i] != &noop_qdisc) {
-			struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+			struct Qdisc *child = q->queues[i];
+			q->queues[i] = &noop_qdisc;
 			qdisc_tree_decrease_qlen(child, child->q.qlen);
 			qdisc_destroy(child);
 		}
@@ -224,7 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
 
 	for (i = 0; i < q->bands; i++) {
 		if (q->queues[i] == &noop_qdisc) {
-			struct Qdisc *child;
+			struct Qdisc *child, *old;
 			child = qdisc_create_dflt(qdisc_dev(sch),
 						  sch->dev_queue,
 						  &pfifo_qdisc_ops,
@@ -232,12 +233,13 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
 							    i + 1));
 			if (child) {
 				sch_tree_lock(sch);
-				child = xchg(&q->queues[i], child);
+				old = q->queues[i];
+				q->queues[i] = child;
 
-				if (child != &noop_qdisc) {
-					qdisc_tree_decrease_qlen(child,
-								 child->q.qlen);
-					qdisc_destroy(child);
+				if (old != &noop_qdisc) {
+					qdisc_tree_decrease_qlen(old,
+								 old->q.qlen);
+					qdisc_destroy(old);
 				}
 				sch_tree_unlock(sch);
 			}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 3cbc3ff..f840d6b 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -331,10 +331,9 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 	root_lock = qdisc_root_sleeping_lock(sch);
 
 	spin_lock_bh(root_lock);
-	d = xchg(&q->delay_dist, d);
+	kfree(q->delay_dist);
+	q->delay_dist = d;
 	spin_unlock_bh(root_lock);
-
-	kfree(d);
 	return 0;
 }
 
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index ea65a87..94cecef 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -187,7 +187,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 
 	for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
-		struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+		struct Qdisc *child = q->queues[i];
+		q->queues[i] = &noop_qdisc;
 		if (child != &noop_qdisc) {
 			qdisc_tree_decrease_qlen(child, child->q.qlen);
 			qdisc_destroy(child);
@@ -197,18 +198,19 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 
 	for (i=0; i<q->bands; i++) {
 		if (q->queues[i] == &noop_qdisc) {
-			struct Qdisc *child;
+			struct Qdisc *child, *old;
 			child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 						  &pfifo_qdisc_ops,
 						  TC_H_MAKE(sch->handle, i + 1));
 			if (child) {
 				sch_tree_lock(sch);
-				child = xchg(&q->queues[i], child);
+				old = q->queues[i];
+				q->queues[i] = child;
 
-				if (child != &noop_qdisc) {
-					qdisc_tree_decrease_qlen(child,
-								 child->q.qlen);
-					qdisc_destroy(child);
+				if (old != &noop_qdisc) {
+					qdisc_tree_decrease_qlen(old,
+								 old->q.qlen);
+					qdisc_destroy(old);
 				}
 				sch_tree_unlock(sch);
 			}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 6a0371c..2bdf241 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -202,7 +202,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	q->limit = ctl->limit;
 	if (child) {
 		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
-		qdisc_destroy(xchg(&q->qdisc, child));
+		qdisc_destroy(q->qdisc);
+		q->qdisc = child;
 	}
 
 	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
@@ -283,7 +284,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		new = &noop_qdisc;
 
 	sch_tree_lock(sch);
-	*old = xchg(&q->qdisc, new);
+	*old = q->qdisc;
+	q->qdisc = new;
 	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
 	qdisc_reset(*old);
 	sch_tree_unlock(sch);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index bb7783d..a2f93c0 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -236,6 +236,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	struct tc_tbf_qopt *qopt;
 	struct qdisc_rate_table *rtab = NULL;
 	struct qdisc_rate_table *ptab = NULL;
+	struct qdisc_rate_table *tmp;
 	struct Qdisc *child = NULL;
 	int max_size,n;
 
@@ -284,7 +285,8 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	sch_tree_lock(sch);
 	if (child) {
 		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
-		qdisc_destroy(xchg(&q->qdisc, child));
+		qdisc_destroy(q->qdisc);
+		q->qdisc = child;
 	}
 	q->limit = qopt->limit;
 	q->mtu = qopt->mtu;
@@ -292,8 +294,14 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	q->buffer = qopt->buffer;
 	q->tokens = q->buffer;
 	q->ptokens = q->mtu;
-	rtab = xchg(&q->R_tab, rtab);
-	ptab = xchg(&q->P_tab, ptab);
+
+	tmp = q->R_tab;
+	q->R_tab = rtab;
+	rtab = tmp;
+
+	tmp = q->P_tab;
+	q->P_tab = ptab;
+	ptab = tmp;
 	sch_tree_unlock(sch);
 	err = 0;
 done:
@@ -383,7 +391,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 		new = &noop_qdisc;
 
 	sch_tree_lock(sch);
-	*old = xchg(&q->qdisc, new);
+	*old = q->qdisc;
+	q->qdisc = new;
 	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
 	qdisc_reset(*old);
 	sch_tree_unlock(sch);
-- 
cgit v1.1


From 47a1a1d4be2910b13a8e90f75c17e253c39531ff Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 19 Nov 2008 08:03:09 +0000
Subject: pkt_sched: remove unnecessary xchg() in packet classifiers

The use of xchg() hasn't been necessary since 2.2.something when proper
locking was added to packet schedulers. In the case of classifiers they
mostly weren't even necessary before that since they're mainly used
to assign a NULL pointer to the filter root in the ->destroy path;
the root is destroyed immediately after that.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c     |  3 ++-
 net/sched/cls_basic.c   |  2 +-
 net/sched/cls_cgroup.c  |  4 +---
 net/sched/cls_fw.c      |  2 +-
 net/sched/cls_route.c   |  2 +-
 net/sched/cls_tcindex.c |  6 ------
 net/sched/cls_u32.c     | 11 ++++++-----
 7 files changed, 12 insertions(+), 18 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 16e7ac9..173fcc4 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -531,7 +531,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
 	if (src->action) {
 		struct tc_action *act;
 		tcf_tree_lock(tp);
-		act = xchg(&dst->action, src->action);
+		act = dst->action;
+		dst->action = src->action;
 		tcf_tree_unlock(tp);
 		if (act)
 			tcf_action_destroy(act, TCA_ACT_UNBIND);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 956915c..4e2bda8 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -102,7 +102,7 @@ static inline void basic_delete_filter(struct tcf_proto *tp,
 
 static void basic_destroy(struct tcf_proto *tp)
 {
-	struct basic_head *head = (struct basic_head *) xchg(&tp->root, NULL);
+	struct basic_head *head = tp->root;
 	struct basic_filter *f, *n;
 
 	list_for_each_entry_safe(f, n, &head->flist, link) {
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 53ada2c..0d68b19 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -201,9 +201,7 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
 
 static void cls_cgroup_destroy(struct tcf_proto *tp)
 {
-	struct cls_cgroup_head *head;
-
-	head = (struct cls_cgroup_head *)xchg(&tp->root, NULL);
+	struct cls_cgroup_head *head = tp->root;
 
 	if (head) {
 		tcf_exts_destroy(tp, &head->exts);
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index b0f90e5..6d6e875 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -148,7 +148,7 @@ fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
 
 static void fw_destroy(struct tcf_proto *tp)
 {
-	struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL);
+	struct fw_head *head = tp->root;
 	struct fw_filter *f;
 	int h;
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index e3d8455..bdf1f41 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -260,7 +260,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
 
 static void route4_destroy(struct tcf_proto *tp)
 {
-	struct route4_head *head = xchg(&tp->root, NULL);
+	struct route4_head *head = tp->root;
 	int h1, h2;
 
 	if (head == NULL)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 7a7bff5..e806f23 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -13,12 +13,6 @@
 #include <net/netlink.h>
 #include <net/pkt_cls.h>
 
-
-/*
- * Not quite sure if we need all the xchgs Alexey uses when accessing things.
- * Can always add them later ... :)
- */
-
 /*
  * Passing parameters to the root seems to be done more awkwardly than really
  * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 246f906..05d1780 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -387,7 +387,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 static void u32_destroy(struct tcf_proto *tp)
 {
 	struct tc_u_common *tp_c = tp->data;
-	struct tc_u_hnode *root_ht = xchg(&tp->root, NULL);
+	struct tc_u_hnode *root_ht = tp->root;
 
 	WARN_ON(root_ht == NULL);
 
@@ -479,7 +479,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
 	err = -EINVAL;
 	if (tb[TCA_U32_LINK]) {
 		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
-		struct tc_u_hnode *ht_down = NULL;
+		struct tc_u_hnode *ht_down = NULL, *ht_old;
 
 		if (TC_U32_KEY(handle))
 			goto errout;
@@ -493,11 +493,12 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
 		}
 
 		tcf_tree_lock(tp);
-		ht_down = xchg(&n->ht_down, ht_down);
+		ht_old = n->ht_down;
+		n->ht_down = ht_down;
 		tcf_tree_unlock(tp);
 
-		if (ht_down)
-			ht_down->refcnt--;
+		if (ht_old)
+			ht_old->refcnt--;
 	}
 	if (tb[TCA_U32_CLASSID]) {
 		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
-- 
cgit v1.1


From 98aa9c80f1fee01e98dfdc484ab7316af45f8f17 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Fri, 21 Nov 2008 04:37:27 -0800
Subject: pkt_sched: sch_drr: Fix qlen in drr_drop()

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_drr.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/sched')

diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 8d523d9..37e6ab9 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -406,6 +406,7 @@ static unsigned int drr_drop(struct Qdisc *sch)
 		if (cl->qdisc->ops->drop) {
 			len = cl->qdisc->ops->drop(cl->qdisc);
 			if (len > 0) {
+				sch->q.qlen--;
 				if (cl->qdisc->q.qlen == 0)
 					list_del(&cl->alist);
 				return len;
-- 
cgit v1.1


From 3f0947c3ffaed33c1c38b79e4b17f75ba072d3e9 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 24 Nov 2008 15:46:08 -0800
Subject: pkt_sched: sch_drr: fix drr_dequeue loop()

Jarek Poplawski points out:

If all child qdiscs of sch_drr are non-work-conserving (e.g. sch_tbf)
drr_dequeue() will busy-loop waiting for skbs instead of leaving the
job for a watchdog. Checking for list_empty() in each loop isn't
necessary either, because this can never be true except the first time.

Using non-work-conserving qdiscs as children of DRR makes no sense,
simply bail out in that case.

Reported-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_drr.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 37e6ab9..e7a7e87 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -373,11 +373,13 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
 	struct sk_buff *skb;
 	unsigned int len;
 
-	while (!list_empty(&q->active)) {
+	if (list_empty(&q->active))
+		goto out;
+	while (1) {
 		cl = list_first_entry(&q->active, struct drr_class, alist);
 		skb = cl->qdisc->ops->peek(cl->qdisc);
 		if (skb == NULL)
-			goto skip;
+			goto out;
 
 		len = qdisc_pkt_len(skb);
 		if (len <= cl->deficit) {
@@ -390,9 +392,9 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
 		}
 
 		cl->deficit += cl->quantum;
-skip:
 		list_move_tail(&cl->alist, &q->active);
 	}
+out:
 	return NULL;
 }
 
-- 
cgit v1.1


From f6486d40b33d1ac2c44c7c55db7edf022d9f4329 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Tue, 25 Nov 2008 13:56:06 -0800
Subject: pkt_sched: sch_api: Remove qdisc_list_lock

After implementing qdisc->ops->peek() there is no more calling
qdisc_tree_decrease_qlen() without rtnl_lock(), so qdisc_list_lock
added by commit: f6e0b239a2657ea8cb67f0d83d0bfdbfd19a481b "pkt_sched:
Fix qdisc list locking" can be removed.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 25 ++++---------------------
 1 file changed, 4 insertions(+), 21 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 1ef25e6..3fcfd4e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -204,28 +204,16 @@ struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
 	return NULL;
 }
 
-/*
- * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen()
- * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue()
- */
-static DEFINE_SPINLOCK(qdisc_list_lock);
-
 static void qdisc_list_add(struct Qdisc *q)
 {
-	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
-		spin_lock_bh(&qdisc_list_lock);
+	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
 		list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
-		spin_unlock_bh(&qdisc_list_lock);
-	}
 }
 
 void qdisc_list_del(struct Qdisc *q)
 {
-	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
-		spin_lock_bh(&qdisc_list_lock);
+	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
 		list_del(&q->list);
-		spin_unlock_bh(&qdisc_list_lock);
-	}
 }
 EXPORT_SYMBOL(qdisc_list_del);
 
@@ -234,22 +222,17 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 	unsigned int i;
 	struct Qdisc *q;
 
-	spin_lock_bh(&qdisc_list_lock);
-
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
 		struct Qdisc *txq_root = txq->qdisc_sleeping;
 
 		q = qdisc_match_from_root(txq_root, handle);
 		if (q)
-			goto unlock;
+			goto out;
 	}
 
 	q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
-
-unlock:
-	spin_unlock_bh(&qdisc_list_lock);
-
+out:
 	return q;
 }
 
-- 
cgit v1.1


From dc0a0011cfa8ff01b86859006f4db3656c9a896b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 25 Nov 2008 16:50:02 -0800
Subject: pkt_sched: fix warning in net/sched/sch_hfsc.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

this warning:

  net/sched/sch_hfsc.c: In function ‘hfsc_enqueue’:
  net/sched/sch_hfsc.c:1577: warning: ‘err’ may be used uninitialized in this function

triggers because GCC does not recognize the (correct) error flow
between hfsc_classify(), 'cl' and 'err'.

Annotate it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_hfsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 51dd3f4..613179c9 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1563,7 +1563,7 @@ static int
 hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct hfsc_class *cl;
-	int err;
+	int uninitialized_var(err);
 
 	cl = hfsc_classify(skb, sch, &err);
 	if (cl == NULL) {
-- 
cgit v1.1


From 0e991ec6a0340916d3f29bd5dcb35299069e7226 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 25 Nov 2008 21:12:32 -0800
Subject: tc: propogate errors from tcf_hash_create

Allow tcf_hash_create to return different errors on estimator failure.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c     | 18 +++++++++++++-----
 net/sched/act_gact.c    |  4 ++--
 net/sched/act_ipt.c     |  4 ++--
 net/sched/act_mirred.c  |  4 ++--
 net/sched/act_nat.c     |  4 ++--
 net/sched/act_pedit.c   |  4 ++--
 net/sched/act_simple.c  |  4 ++--
 net/sched/act_skbedit.c |  4 ++--
 8 files changed, 27 insertions(+), 19 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8f457f1..9d03cc3 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -214,12 +214,14 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
 }
 EXPORT_SYMBOL(tcf_hash_check);
 
-struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo)
+struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
+				   struct tc_action *a, int size, int bind,
+				   u32 *idx_gen, struct tcf_hashinfo *hinfo)
 {
 	struct tcf_common *p = kzalloc(size, GFP_KERNEL);
 
 	if (unlikely(!p))
-		return p;
+		return ERR_PTR(-ENOMEM);
 	p->tcfc_refcnt = 1;
 	if (bind)
 		p->tcfc_bindcnt = 1;
@@ -228,9 +230,15 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_acti
 	p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
 	p->tcfc_tm.install = jiffies;
 	p->tcfc_tm.lastuse = jiffies;
-	if (est)
-		gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
-				  &p->tcfc_lock, est);
+	if (est) {
+		int err = gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
+					    &p->tcfc_lock, est);
+		if (err) {
+			kfree(p);
+			return ERR_PTR(err);
+		}
+	}
+
 	a->priv = (void *) p;
 	return p;
 }
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index ac04289..e7f796a 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -88,8 +88,8 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
 	if (!pc) {
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
 				     bind, &gact_idx_gen, &gact_hash_info);
-		if (unlikely(!pc))
-			return -ENOMEM;
+		if (IS_ERR(pc))
+		    return PTR_ERR(pc);
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 0453d79..082c520 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -136,8 +136,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
 	if (!pc) {
 		pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
 				     &ipt_idx_gen, &ipt_hash_info);
-		if (unlikely(!pc))
-			return -ENOMEM;
+		if (IS_ERR(pc))
+		    return PTR_ERR(pc);
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 70341c0..b9aaab4 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -105,8 +105,8 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
 			return -EINVAL;
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind,
 				     &mirred_idx_gen, &mirred_hash_info);
-		if (unlikely(!pc))
-			return -ENOMEM;
+		if (IS_ERR(pc))
+		    return PTR_ERR(pc);
 		ret = ACT_P_CREATED;
 	} else {
 		if (!ovr) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 7b39ed4..d885ba3 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -68,8 +68,8 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
 	if (!pc) {
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
 				     &nat_idx_gen, &nat_hash_info);
-		if (unlikely(!pc))
-			return -ENOMEM;
+		if (IS_ERR(pc))
+		    return PTR_ERR(pc);
 		p = to_tcf_nat(pc);
 		ret = ACT_P_CREATED;
 	} else {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index d5f4e34..96c0ed1 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -68,8 +68,8 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
 			return -EINVAL;
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
 				     &pedit_idx_gen, &pedit_hash_info);
-		if (unlikely(!pc))
-			return -ENOMEM;
+		if (IS_ERR(pc))
+		    return PTR_ERR(pc);
 		p = to_pedit(pc);
 		keys = kmalloc(ksize, GFP_KERNEL);
 		if (keys == NULL) {
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e7851ce..8daa1eb 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -124,8 +124,8 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
 	if (!pc) {
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
 				     &simp_idx_gen, &simp_hash_info);
-		if (unlikely(!pc))
-			return -ENOMEM;
+		if (IS_ERR(pc))
+		    return PTR_ERR(pc);
 
 		d = to_defact(pc);
 		ret = alloc_defdata(d, defdata);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index fe9777e..4ab916b 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -104,8 +104,8 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
 	if (!pc) {
 		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
 				     &skbedit_idx_gen, &skbedit_hash_info);
-		if (unlikely(!pc))
-			return -ENOMEM;
+		if (IS_ERR(pc))
+		    return PTR_ERR(pc);
 
 		d = to_skbedit(pc);
 		ret = ACT_P_CREATED;
-- 
cgit v1.1


From 71bcb09a57894fa35591ce93dd972065eeecb63a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 25 Nov 2008 21:13:31 -0800
Subject: tc: check for errors in gen_rate_estimator creation

The functions gen_new_estimator and gen_replace_estimator can return
errors, but they were being ignored.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_police.c | 25 +++++++++++++++++--------
 net/sched/sch_api.c    |  7 +++++--
 net/sched/sch_cbq.c    | 33 ++++++++++++++++++++++++---------
 net/sched/sch_drr.c    | 26 ++++++++++++++++++--------
 net/sched/sch_hfsc.c   | 25 ++++++++++++++++++-------
 net/sched/sch_htb.c    | 22 +++++++++++++++-------
 6 files changed, 97 insertions(+), 41 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 38015b4..e19a026 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -185,14 +185,21 @@ override:
 		if (parm->peakrate.rate) {
 			P_tab = qdisc_get_rtab(&parm->peakrate,
 					       tb[TCA_POLICE_PEAKRATE]);
-			if (P_tab == NULL) {
-				qdisc_put_rtab(R_tab);
+			if (P_tab == NULL)
 				goto failure;
-			}
 		}
 	}
-	/* No failure allowed after this point */
+
 	spin_lock_bh(&police->tcf_lock);
+	if (est) {
+		err = gen_replace_estimator(&police->tcf_bstats,
+					    &police->tcf_rate_est,
+					    &police->tcf_lock, est);
+		if (err)
+			goto failure_unlock;
+	}
+
+	/* No failure allowed after this point */
 	if (R_tab != NULL) {
 		qdisc_put_rtab(police->tcfp_R_tab);
 		police->tcfp_R_tab = R_tab;
@@ -217,10 +224,6 @@ override:
 
 	if (tb[TCA_POLICE_AVRATE])
 		police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
-	if (est)
-		gen_replace_estimator(&police->tcf_bstats,
-				      &police->tcf_rate_est,
-				      &police->tcf_lock, est);
 
 	spin_unlock_bh(&police->tcf_lock);
 	if (ret != ACT_P_CREATED)
@@ -238,7 +241,13 @@ override:
 	a->priv = police;
 	return ret;
 
+failure_unlock:
+	spin_unlock_bh(&police->tcf_lock);
 failure:
+	if (P_tab)
+		qdisc_put_rtab(P_tab);
+	if (R_tab)
+		qdisc_put_rtab(R_tab);
 	if (ret == ACT_P_CREATED)
 		kfree(police);
 	return err;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 3fcfd4e..f859dd5 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -880,9 +880,12 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 	sch->stab = stab;
 
 	if (tca[TCA_RATE])
+		/* NB: ignores errors from replace_estimator
+		   because change can't be undone. */
 		gen_replace_estimator(&sch->bstats, &sch->rate_est,
-				      qdisc_root_sleeping_lock(sch),
-				      tca[TCA_RATE]);
+					    qdisc_root_sleeping_lock(sch),
+					    tca[TCA_RATE]);
+
 	return 0;
 }
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 3a9569a..9e43ed9 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1765,11 +1765,23 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 		}
 
 		if (tb[TCA_CBQ_RATE]) {
-			rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
+			rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
+					      tb[TCA_CBQ_RTAB]);
 			if (rtab == NULL)
 				return -EINVAL;
 		}
 
+		if (tca[TCA_RATE]) {
+			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+						    qdisc_root_sleeping_lock(sch),
+						    tca[TCA_RATE]);
+			if (err) {
+				if (rtab)
+					qdisc_put_rtab(rtab);
+				return err;
+			}
+		}
+
 		/* Change class parameters */
 		sch_tree_lock(sch);
 
@@ -1805,10 +1817,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 		sch_tree_unlock(sch);
 
-		if (tca[TCA_RATE])
-			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      qdisc_root_sleeping_lock(sch),
-					      tca[TCA_RATE]);
 		return 0;
 	}
 
@@ -1855,6 +1863,17 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
 	if (cl == NULL)
 		goto failure;
+
+	if (tca[TCA_RATE]) {
+		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+					qdisc_root_sleeping_lock(sch),
+					tca[TCA_RATE]);
+		if (err) {
+			kfree(cl);
+			goto failure;
+		}
+	}
+
 	cl->R_tab = rtab;
 	rtab = NULL;
 	cl->refcnt = 1;
@@ -1896,10 +1915,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	qdisc_class_hash_grow(sch, &q->clhash);
 
-	if (tca[TCA_RATE])
-		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
-
 	*arg = (unsigned long)cl;
 	return 0;
 
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e7a7e87..f6b4fa9 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -82,15 +82,19 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		quantum = psched_mtu(qdisc_dev(sch));
 
 	if (cl != NULL) {
+		if (tca[TCA_RATE]) {
+			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+						    qdisc_root_sleeping_lock(sch),
+						    tca[TCA_RATE]);
+			if (err)
+				return err;
+		}
+
 		sch_tree_lock(sch);
 		if (tb[TCA_DRR_QUANTUM])
 			cl->quantum = quantum;
 		sch_tree_unlock(sch);
 
-		if (tca[TCA_RATE])
-			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      qdisc_root_sleeping_lock(sch),
-					      tca[TCA_RATE]);
 		return 0;
 	}
 
@@ -106,10 +110,16 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl->qdisc == NULL)
 		cl->qdisc = &noop_qdisc;
 
-	if (tca[TCA_RATE])
-		gen_replace_estimator(&cl->bstats, &cl->rate_est,
-				      qdisc_root_sleeping_lock(sch),
-				      tca[TCA_RATE]);
+	if (tca[TCA_RATE]) {
+		err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+					    qdisc_root_sleeping_lock(sch),
+					    tca[TCA_RATE]);
+		if (err) {
+			qdisc_destroy(cl->qdisc);
+			kfree(cl);
+			return err;
+		}
+	}
 
 	sch_tree_lock(sch);
 	qdisc_class_hash_insert(&q->clhash, &cl->common);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 613179c9..45c31b1 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1018,6 +1018,14 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		}
 		cur_time = psched_get_time();
 
+		if (tca[TCA_RATE]) {
+			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+					      qdisc_root_sleeping_lock(sch),
+					      tca[TCA_RATE]);
+			if (err)
+				return err;
+		}
+
 		sch_tree_lock(sch);
 		if (rsc != NULL)
 			hfsc_change_rsc(cl, rsc, cur_time);
@@ -1034,10 +1042,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		}
 		sch_tree_unlock(sch);
 
-		if (tca[TCA_RATE])
-			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      qdisc_root_sleeping_lock(sch),
-					      tca[TCA_RATE]);
 		return 0;
 	}
 
@@ -1063,6 +1067,16 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	if (cl == NULL)
 		return -ENOBUFS;
 
+	if (tca[TCA_RATE]) {
+		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+					qdisc_root_sleeping_lock(sch),
+					tca[TCA_RATE]);
+		if (err) {
+			kfree(cl);
+			return err;
+		}
+	}
+
 	if (rsc != NULL)
 		hfsc_change_rsc(cl, rsc, 0);
 	if (fsc != NULL)
@@ -1093,9 +1107,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	qdisc_class_hash_grow(sch, &q->clhash);
 
-	if (tca[TCA_RATE])
-		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
 	*arg = (unsigned long)cl;
 	return 0;
 }
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 3a119f5..8a45199 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1332,9 +1332,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
 			goto failure;
 
-		gen_new_estimator(&cl->bstats, &cl->rate_est,
-				  qdisc_root_sleeping_lock(sch),
-				  tca[TCA_RATE] ? : &est.nla);
+		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+					qdisc_root_sleeping_lock(sch),
+					tca[TCA_RATE] ? : &est.nla);
+		if (err) {
+			kfree(cl);
+			goto failure;
+		}
+
 		cl->refcnt = 1;
 		cl->children = 0;
 		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
@@ -1386,10 +1391,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		if (parent)
 			parent->children++;
 	} else {
-		if (tca[TCA_RATE])
-			gen_replace_estimator(&cl->bstats, &cl->rate_est,
-					      qdisc_root_sleeping_lock(sch),
-					      tca[TCA_RATE]);
+		if (tca[TCA_RATE]) {
+			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+						    qdisc_root_sleeping_lock(sch),
+						    tca[TCA_RATE]);
+			if (err)
+				return err;
+		}
 		sch_tree_lock(sch);
 	}
 
-- 
cgit v1.1


From c1b56878fb68e9c14070939ea4537ad4db79ffae Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 25 Nov 2008 21:14:06 -0800
Subject: tc: policing requires a rate estimator

Found that while trying average rate policing, it was possible to
request average rate policing without a rate estimator. This results
in no policing which is harmless but incorrect.

Since policing could be setup in two steps, need to check
in the kernel.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_police.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net/sched')

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index e19a026..c39f60c 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -182,6 +182,12 @@ override:
 		R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
 		if (R_tab == NULL)
 			goto failure;
+
+		if (!est && !gen_estimator_active(&police->tcf_rate_est)) {
+			err = -EINVAL;
+			goto failure;
+		}
+
 		if (parm->peakrate.rate) {
 			P_tab = qdisc_get_rtab(&parm->peakrate,
 					       tb[TCA_POLICE_PEAKRATE]);
-- 
cgit v1.1


From 244e6c2d0724bc4908a1995804704bdee3b31528 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 26 Nov 2008 15:24:32 -0800
Subject: pkt_sched: gen_estimator: Optimize gen_estimator_active()

Since all other gen_estimator functions use bstats and rate_est params
together, and searching for them is optimized now, let's use this also
in gen_estimator_active(). The return type of gen_estimator_active()
is changed to bool, and gen_find_node() parameters to const, btw.

In tcf_act_police_locate() a check for ACT_P_CREATED is added before
calling gen_estimator_active().

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_police.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net/sched')

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index c39f60c..5c72a11 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -183,7 +183,9 @@ override:
 		if (R_tab == NULL)
 			goto failure;
 
-		if (!est && !gen_estimator_active(&police->tcf_rate_est)) {
+		if (!est && (ret == ACT_P_CREATED ||
+			     !gen_estimator_active(&police->tcf_bstats,
+						   &police->tcf_rate_est))) {
 			err = -EINVAL;
 			goto failure;
 		}
-- 
cgit v1.1


From 6113b748fb9935399ec2bbca3a3dc82008f6167f Mon Sep 17 00:00:00 2001
From: Hannes Eder <hannes@hanneseder.net>
Date: Fri, 28 Nov 2008 03:06:46 -0800
Subject: pkt_sched: fix sparse warning

Impact: make global function static

Fix the following sparse warning:

  net/sched/sch_api.c:192:14: warning: symbol 'qdisc_match_from_root' was not declared. Should it be static?

Signed-off-by: Hannes Eder <hannes@hanneseder.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f859dd5..6bc29e8 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -189,7 +189,7 @@ EXPORT_SYMBOL(unregister_qdisc);
    (root qdisc, all its children, children of children etc.)
  */
 
-struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
+static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
 {
 	struct Qdisc *q;
 
-- 
cgit v1.1


From 4164d661b8c9602fbbf651a33377d2c51f68c451 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 3 Dec 2008 21:08:44 -0800
Subject: pkt_sched: sch_htb: Remove htb_class aprio field

Remove practically unused struct htb_class aprio field.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 8a45199..ce8b1ad 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -88,7 +88,6 @@ struct htb_class {
 		struct htb_class_leaf {
 			struct Qdisc *q;
 			int prio;
-			int aprio;
 			int quantum;
 			int deficit[TC_HTB_MAXDEPTH];
 			struct list_head drop_list;
@@ -527,10 +526,10 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 	WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
 
 	if (!cl->prio_activity) {
-		cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
+		cl->prio_activity = 1 << cl->un.leaf.prio;
 		htb_activate_prios(q, cl);
 		list_add_tail(&cl->un.leaf.drop_list,
-			      q->drops + cl->un.leaf.aprio);
+			      q->drops + cl->un.leaf.prio);
 	}
 }
 
-- 
cgit v1.1


From 633fe66ed8385ccf8b4a74a00a4c6eb40850d65f Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 3 Dec 2008 21:09:10 -0800
Subject: pkt_sched: sch_htb: Remove htb_sched nwc_hit field

Remove practically unused struct htb_sched nwc_hit field.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index ce8b1ad..7a71e94 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -151,9 +151,6 @@ struct htb_sched {
 	/* time of nearest event per level (row) */
 	psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
 
-	/* whether we hit non-work conserving class during this dequeue; we use */
-	int nwc_hit;		/* this to disable mindelay complaint in dequeue */
-
 	int defcls;		/* class where unclassified flows go to */
 
 	/* filters for qdisc itself */
@@ -807,7 +804,7 @@ next:
 			       cl->common.classid);
 			cl->warned = 1;
 		}
-		q->nwc_hit++;
+
 		htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 				  ptr[0]) + prio);
 		cl = htb_lookup_leaf(q->row[level] + prio, prio,
@@ -852,7 +849,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 	q->now = psched_get_time();
 
 	next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
-	q->nwc_hit = 0;
+
 	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 		/* common case optimization - skip event handler quickly */
 		int m;
-- 
cgit v1.1


From c19f7a34f7cc7543c62ad065952e146dc77d1a38 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 3 Dec 2008 21:09:45 -0800
Subject: pkt_sched: sch_htb: Clean htb_class prio and quantum fields

While implementing htb_parent_to_leaf() there where added backup prio
and quantum struct htb_class fields to preserve these values for inner
classes in case of their return to leaf. This patch cleans this a bit
by removing union leaf duplicates.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 41 ++++++++++++++++-------------------------
 1 file changed, 16 insertions(+), 25 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 7a71e94..80cb94d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -84,11 +84,12 @@ struct htb_class {
 	unsigned int children;
 	struct htb_class *parent;	/* parent class */
 
+	int prio;		/* these two are used only by leaves... */
+	int quantum;		/* but stored for parent-to-leaf return */
+
 	union {
 		struct htb_class_leaf {
 			struct Qdisc *q;
-			int prio;
-			int quantum;
 			int deficit[TC_HTB_MAXDEPTH];
 			struct list_head drop_list;
 		} leaf;
@@ -122,10 +123,6 @@ struct htb_class {
 	psched_tdiff_t mbuffer;	/* max wait time */
 	long tokens, ctokens;	/* current number of tokens */
 	psched_time_t t_c;	/* checkpoint time */
-
-	int prio;		/* For parent to leaf return possible here */
-	int quantum;		/* we do backup. Finally full replacement  */
-				/* of un.leaf originals should be done. */
 };
 
 static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
@@ -523,10 +520,10 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 	WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
 
 	if (!cl->prio_activity) {
-		cl->prio_activity = 1 << cl->un.leaf.prio;
+		cl->prio_activity = 1 << cl->prio;
 		htb_activate_prios(q, cl);
 		list_add_tail(&cl->un.leaf.drop_list,
-			      q->drops + cl->un.leaf.prio);
+			      q->drops + cl->prio);
 	}
 }
 
@@ -816,7 +813,7 @@ next:
 	if (likely(skb != NULL)) {
 		cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
 		if (cl->un.leaf.deficit[level] < 0) {
-			cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
+			cl->un.leaf.deficit[level] += cl->quantum;
 			htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
 					  ptr[0]) + prio);
 		}
@@ -1050,8 +1047,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	opt.buffer = cl->buffer;
 	opt.ceil = cl->ceil->rate;
 	opt.cbuffer = cl->cbuffer;
-	opt.quantum = cl->un.leaf.quantum;
-	opt.prio = cl->un.leaf.prio;
+	opt.quantum = cl->quantum;
+	opt.prio = cl->prio;
 	opt.level = cl->level;
 	NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
 
@@ -1155,8 +1152,6 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
 	memset(&parent->un.inner, 0, sizeof(parent->un.inner));
 	INIT_LIST_HEAD(&parent->un.leaf.drop_list);
 	parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
-	parent->un.leaf.quantum = parent->quantum;
-	parent->un.leaf.prio = parent->prio;
 	parent->tokens = parent->buffer;
 	parent->ctokens = parent->cbuffer;
 	parent->t_c = psched_get_time();
@@ -1400,27 +1395,23 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	/* it used to be a nasty bug here, we have to check that node
 	   is really leaf before changing cl->un.leaf ! */
 	if (!cl->level) {
-		cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
-		if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
+		cl->quantum = rtab->rate.rate / q->rate2quantum;
+		if (!hopt->quantum && cl->quantum < 1000) {
 			printk(KERN_WARNING
 			       "HTB: quantum of class %X is small. Consider r2q change.\n",
 			       cl->common.classid);
-			cl->un.leaf.quantum = 1000;
+			cl->quantum = 1000;
 		}
-		if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
+		if (!hopt->quantum && cl->quantum > 200000) {
 			printk(KERN_WARNING
 			       "HTB: quantum of class %X is big. Consider r2q change.\n",
 			       cl->common.classid);
-			cl->un.leaf.quantum = 200000;
+			cl->quantum = 200000;
 		}
 		if (hopt->quantum)
-			cl->un.leaf.quantum = hopt->quantum;
-		if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
-			cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
-
-		/* backup for htb_parent_to_leaf */
-		cl->quantum = cl->un.leaf.quantum;
-		cl->prio = cl->un.leaf.prio;
+			cl->quantum = hopt->quantum;
+		if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
+			cl->prio = TC_HTB_NUMPRIO - 1;
 	}
 
 	cl->buffer = hopt->buffer;
-- 
cgit v1.1


From 23cb913d25b20ed88b36a26f337cfdb4605e63f4 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 3 Dec 2008 21:16:58 -0800
Subject: pkt_sched: sch_htb: Remove L2T()

L2T() is currently used only in one place (and has one spurious
parameter, btw), so let's: 'get rid of L2T completely, and just
use "qdisc_l2t(rate, size)" directly.' - quote & feedback from
David S. Miller.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 80cb94d..fcd06e2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -125,13 +125,6 @@ struct htb_class {
 	psched_time_t t_c;	/* checkpoint time */
 };
 
-static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
-			   int size)
-{
-	long result = qdisc_l2t(rate, size);
-	return result;
-}
-
 struct htb_sched {
 	struct Qdisc_class_hash clhash;
 	struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
@@ -604,7 +597,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 
 #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
 	if (toks > cl->B) toks = cl->B; \
-	toks -= L2T(cl, cl->R, bytes); \
+	toks -= (long) qdisc_l2t(cl->R, bytes); \
 	if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
 	cl->T = toks
 
-- 
cgit v1.1


From 59e4220a1112bf65924bc2e47b5757911b6f349b Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Wed, 3 Dec 2008 21:17:27 -0800
Subject: pkt_sched: sch_htb: Replace HTB_ACCNT() macro with inlines

Replace HTB_ACCNT() macro with inlines to make it more readable.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 38 +++++++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 9 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index fcd06e2..f89fd71 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -577,6 +577,32 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	return NET_XMIT_SUCCESS;
 }
 
+static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
+{
+	long toks = diff + cl->tokens;
+
+	if (toks > cl->buffer)
+		toks = cl->buffer;
+	toks -= (long) qdisc_l2t(cl->rate, bytes);
+	if (toks <= -cl->mbuffer)
+		toks = 1 - cl->mbuffer;
+
+	cl->tokens = toks;
+}
+
+static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
+{
+	long toks = diff + cl->ctokens;
+
+	if (toks > cl->cbuffer)
+		toks = cl->cbuffer;
+	toks -= (long) qdisc_l2t(cl->ceil, bytes);
+	if (toks <= -cl->mbuffer)
+		toks = 1 - cl->mbuffer;
+
+	cl->ctokens = toks;
+}
+
 /**
  * htb_charge_class - charges amount "bytes" to leaf and ancestors
  *
@@ -592,26 +618,20 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 			     int level, struct sk_buff *skb)
 {
 	int bytes = qdisc_pkt_len(skb);
-	long toks, diff;
 	enum htb_cmode old_mode;
-
-#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
-	if (toks > cl->B) toks = cl->B; \
-	toks -= (long) qdisc_l2t(cl->R, bytes); \
-	if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
-	cl->T = toks
+	long diff;
 
 	while (cl) {
 		diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
 		if (cl->level >= level) {
 			if (cl->level == level)
 				cl->xstats.lends++;
-			HTB_ACCNT(tokens, buffer, rate);
+			htb_accnt_tokens(cl, bytes, diff);
 		} else {
 			cl->xstats.borrows++;
 			cl->tokens += diff;	/* we moved t_c; update tokens */
 		}
-		HTB_ACCNT(ctokens, cbuffer, ceil);
+		htb_accnt_ctokens(cl, bytes, diff);
 		cl->t_c = q->now;
 
 		old_mode = cl->cmode;
-- 
cgit v1.1


From 1b5c0077e1615bb16e777a10ec1fc1195ba059ac Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Tue, 9 Dec 2008 22:34:40 -0800
Subject: pkt_sched: sch_htb: Optimize htb_find_next_upper()

htb_id_find_next_upper() is usually called to find a class with next
id after some previously removed class, so let's move a check for
equality to the end: it's the least likely here.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index f89fd71..b820a0a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -698,14 +698,14 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 	while (n) {
 		struct htb_class *cl =
 		    rb_entry(n, struct htb_class, node[prio]);
-		if (id == cl->common.classid)
-			return n;
 
 		if (id > cl->common.classid) {
 			n = n->rb_right;
-		} else {
+		} else if (id < cl->common.classid) {
 			r = n;
 			n = n->rb_left;
+		} else {
+			return n;
 		}
 	}
 	return r;
-- 
cgit v1.1


From 512bb43eb5422ee69a1be05ea0d89dc074fac9a2 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Tue, 9 Dec 2008 22:35:02 -0800
Subject: pkt_sched: sch_htb: Optimize WARN_ONs in htb_dequeue_tree() etc.

We can skip WARN_ON() in htb_dequeue_tree() because there should be
always a similar warning from htb_lookup_leaf() earlier.

The first WARN_ON() in in htb_lookup_leaf() is changed to BUG_ON()
because most likly this should end with oops anyway.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index b820a0a..5070643 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -726,7 +726,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 		u32 *pid;
 	} stk[TC_HTB_MAXDEPTH], *sp = stk;
 
-	WARN_ON(!tree->rb_node);
+	BUG_ON(!tree->rb_node);
 	sp->root = tree->rb_node;
 	sp->pptr = pptr;
 	sp->pid = pid;
@@ -746,9 +746,10 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 				*sp->pptr = (*sp->pptr)->rb_left;
 			if (sp > stk) {
 				sp--;
-				WARN_ON(!*sp->pptr);
-				if (!*sp->pptr)
+				if (!*sp->pptr) {
+					WARN_ON(1);
 					return NULL;
+				}
 				htb_next_rb_node(sp->pptr);
 			}
 		} else {
@@ -779,8 +780,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
 
 	do {
 next:
-		WARN_ON(!cl);
-		if (!cl)
+		if (unlikely(!cl))
 			return NULL;
 
 		/* class can be empty - it is unlikely but can be true if leaf
-- 
cgit v1.1


From 7f3ff4f63f76c2702da6041d2da5eb30fac407f6 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sun, 21 Dec 2008 20:14:48 -0800
Subject: pkt_sched: Annotate uninitialized var in sfq_enqueue()

Some gcc versions warn that ret may be used uninitialized in
sfq_enqueue(). It's a false positive, so let's annotate this.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_sfq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index ab8cfee..f3965df 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -281,7 +281,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned int hash;
 	sfq_index x;
-	int ret;
+	int uninitialized_var(ret);
 
 	hash = sfq_classify(skb, sch, &ret);
 	if (hash == 0) {
-- 
cgit v1.1


From 05a8c1cbfe368df8c0d4eff710c370d2aa10245a Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Mon, 22 Dec 2008 19:44:13 -0800
Subject: pkt_sched: Remove smp_wmb() in qdisc_watchdog()

While implementing a TCQ_F_THROTTLED flag there was used an smp_wmb()
in qdisc_watchdog(), but since this flag is practically used only in
sch_netem(), and since it's not even clear what reordering is avoided
here (TCQ_F_THROTTLED vs. __QDISC_STATE_SCHED?) it seems the barrier
could be safely removed.

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c   | 1 -
 net/sched/sch_netem.c | 1 -
 2 files changed, 2 deletions(-)

(limited to 'net/sched')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6bc29e8..0fc4a18 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -450,7 +450,6 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 						 timer);
 
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
-	smp_wmb();
 	__netif_schedule(qdisc_root(wd->qdisc));
 
 	return HRTIMER_NORESTART;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 7e78f1c0..d876b87 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -266,7 +266,6 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 	struct netem_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 
-	smp_mb();
 	if (sch->flags & TCQ_F_THROTTLED)
 		return NULL;
 
-- 
cgit v1.1