45 files changed, 24150 insertions, 0 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
new file mode 100644
index 0000000..6767e54
--- /dev/null
+++ b/net/sched/Kconfig
@@ -0,0 +1,510 @@
+#
+# Traffic control configuration.
+# 
+
+menuconfig NET_SCHED
+	bool "QoS and/or fair queueing"
+	select NET_SCH_FIFO
+	---help---
+	  When the kernel has several packets to send out over a network
+	  device, it has to decide which ones to send first, which ones to
+	  delay, and which ones to drop. This is the job of the queueing
+	  disciplines, several different algorithms for how to do this
+	  "fairly" have been proposed.
+
+	  If you say N here, you will get the standard packet scheduler, which
+	  is a FIFO (first come, first served). If you say Y here, you will be
+	  able to choose from among several alternative algorithms which can
+	  then be attached to different network devices. This is useful for
+	  example if some of your network devices are real time devices that
+	  need a certain minimum data flow rate, or if you need to limit the
+	  maximum data flow rate for traffic which matches specified criteria.
+	  This code is considered to be experimental.
+
+	  To administer these schedulers, you'll need the user-level utilities
+	  from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>.
+	  That package also contains some documentation; for more, check out
+	  <http://linux-net.osdl.org/index.php/Iproute2>.
+
+	  This Quality of Service (QoS) support will enable you to use
+	  Differentiated Services (diffserv) and Resource Reservation Protocol
+	  (RSVP) on your Linux router if you also say Y to the corresponding
+	  classifiers below.  Documentation and software is at
+	  <http://diffserv.sourceforge.net/>.
+
+	  If you say Y here and to "/proc file system" below, you will be able
+	  to read status information about packet schedulers from the file
+	  /proc/net/psched.
+
+	  The available schedulers are listed in the following questions; you
+	  can say Y to as many as you like. If unsure, say N now.
+
+if NET_SCHED
+
+comment "Queueing/Scheduling"
+
+config NET_SCH_CBQ
+	tristate "Class Based Queueing (CBQ)"
+	---help---
+	  Say Y here if you want to use the Class-Based Queueing (CBQ) packet
+	  scheduling algorithm. This algorithm classifies the waiting packets
+	  into a tree-like hierarchy of classes; the leaves of this tree are
+	  in turn scheduled by separate algorithms.
+
+	  See the top of <file:net/sched/sch_cbq.c> for more details.
+
+	  CBQ is a commonly used scheduler, so if you're unsure, you should
+	  say Y here. Then say Y to all the queueing algorithms below that you
+	  want to use as leaf disciplines.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_cbq.
+
+config NET_SCH_HTB
+	tristate "Hierarchical Token Bucket (HTB)"
+	---help---
+	  Say Y here if you want to use the Hierarchical Token Buckets (HTB)
+	  packet scheduling algorithm. See
+	  <http://luxik.cdi.cz/~devik/qos/htb/> for complete manual and
+	  in-depth articles.
+
+	  HTB is very similar to CBQ regarding its goals however is has
+	  different properties and different algorithm.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_htb.
+
+config NET_SCH_HFSC
+	tristate "Hierarchical Fair Service Curve (HFSC)"
+	---help---
+	  Say Y here if you want to use the Hierarchical Fair Service Curve
+	  (HFSC) packet scheduling algorithm.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_hfsc.
+
+config NET_SCH_ATM
+	tristate "ATM Virtual Circuits (ATM)"
+	depends on ATM
+	---help---
+	  Say Y here if you want to use the ATM pseudo-scheduler.  This
+	  provides a framework for invoking classifiers, which in turn
+	  select classes of this queuing discipline.  Each class maps
+	  the flow(s) it is handling to a given virtual circuit.
+
+	  See the top of <file:net/sched/sch_atm.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_atm.
+
+config NET_SCH_PRIO
+	tristate "Multi Band Priority Queueing (PRIO)"
+	---help---
+	  Say Y here if you want to use an n-band priority queue packet
+	  scheduler.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_prio.
+
+config NET_SCH_MULTIQ
+	tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)"
+	---help---
+	  Say Y here if you want to use an n-band queue packet scheduler
+	  to support devices that have multiple hardware transmit queues.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_multiq.
+
+config NET_SCH_RED
+	tristate "Random Early Detection (RED)"
+	---help---
+	  Say Y here if you want to use the Random Early Detection (RED)
+	  packet scheduling algorithm.
+
+	  See the top of <file:net/sched/sch_red.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_red.
+
+config NET_SCH_SFQ
+	tristate "Stochastic Fairness Queueing (SFQ)"
+	---help---
+	  Say Y here if you want to use the Stochastic Fairness Queueing (SFQ)
+	  packet scheduling algorithm.
+
+	  See the top of <file:net/sched/sch_sfq.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_sfq.
+
+config NET_SCH_TEQL
+	tristate "True Link Equalizer (TEQL)"
+	---help---
+	  Say Y here if you want to use the True Link Equalizer (TLE) packet
+	  scheduling algorithm. This queueing discipline allows the combination
+	  of several physical devices into one virtual device.
+
+	  See the top of <file:net/sched/sch_teql.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_teql.
+
+config NET_SCH_TBF
+	tristate "Token Bucket Filter (TBF)"
+	---help---
+	  Say Y here if you want to use the Token Bucket Filter (TBF) packet
+	  scheduling algorithm.
+
+	  See the top of <file:net/sched/sch_tbf.c> for more details.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_tbf.
+
+config NET_SCH_GRED
+	tristate "Generic Random Early Detection (GRED)"
+	---help---
+	  Say Y here if you want to use the Generic Random Early Detection
+	  (GRED) packet scheduling algorithm for some of your network devices
+	  (see the top of <file:net/sched/sch_red.c> for details and
+	  references about the algorithm).
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_gred.
+
+config NET_SCH_DSMARK
+	tristate "Differentiated Services marker (DSMARK)"
+	---help---
+	  Say Y if you want to schedule packets according to the
+	  Differentiated Services architecture proposed in RFC 2475.
+	  Technical information on this method, with pointers to associated
+	  RFCs, is available at <http://www.gta.ufrj.br/diffserv/>.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_dsmark.
+
+config NET_SCH_NETEM
+	tristate "Network emulator (NETEM)"
+	---help---
+	  Say Y if you want to emulate network delay, loss, and packet
+	  re-ordering. This is often useful to simulate networks when
+	  testing applications or protocols.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called sch_netem.
+
+	  If unsure, say N.
+
+config NET_SCH_INGRESS
+	tristate "Ingress Qdisc"
+	depends on NET_CLS_ACT
+	---help---
+	  Say Y here if you want to use classifiers for incoming packets.
+	  If unsure, say Y.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_ingress.
+
+comment "Classification"
+
+config NET_CLS
+	boolean
+
+config NET_CLS_BASIC
+	tristate "Elementary classification (BASIC)"
+	select NET_CLS
+	---help---
+	  Say Y here if you want to be able to classify packets using
+	  only extended matches and actions.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_basic.
+
+config NET_CLS_TCINDEX
+	tristate "Traffic-Control Index (TCINDEX)"
+	select NET_CLS
+	---help---
+	  Say Y here if you want to be able to classify packets based on
+	  traffic control indices. You will want this feature if you want
+	  to implement Differentiated Services together with DSMARK.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_tcindex.
+
+config NET_CLS_ROUTE4
+	tristate "Routing decision (ROUTE)"
+	select NET_CLS_ROUTE
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets
+	  according to the route table entry they matched.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_route.
+
+config NET_CLS_ROUTE
+	bool
+
+config NET_CLS_FW
+	tristate "Netfilter mark (FW)"
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets
+	  according to netfilter/firewall marks.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_fw.
+
+config NET_CLS_U32
+	tristate "Universal 32bit comparisons w/ hashing (U32)"
+	select NET_CLS
+	---help---
+	  Say Y here to be able to classify packets using a universal
+	  32bit pieces based comparison scheme.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_u32.
+
+config CLS_U32_PERF
+	bool "Performance counters support"
+	depends on NET_CLS_U32
+	---help---
+	  Say Y here to make u32 gather additional statistics useful for
+	  fine tuning u32 classifiers.
+
+config CLS_U32_MARK
+	bool "Netfilter marks support"
+	depends on NET_CLS_U32
+	---help---
+	  Say Y here to be able to use netfilter marks as u32 key.
+
+config NET_CLS_RSVP
+	tristate "IPv4 Resource Reservation Protocol (RSVP)"
+	select NET_CLS
+	---help---
+	  The Resource Reservation Protocol (RSVP) permits end systems to
+	  request a minimum and maximum data flow rate for a connection; this
+	  is important for real time data such as streaming sound or video.
+
+	  Say Y here if you want to be able to classify outgoing packets based
+	  on their RSVP requests.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_rsvp.
+
+config NET_CLS_RSVP6
+	tristate "IPv6 Resource Reservation Protocol (RSVP6)"
+	select NET_CLS
+	---help---
+	  The Resource Reservation Protocol (RSVP) permits end systems to
+	  request a minimum and maximum data flow rate for a connection; this
+	  is important for real time data such as streaming sound or video.
+
+	  Say Y here if you want to be able to classify outgoing packets based
+	  on their RSVP requests and you are using the IPv6 protocol.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_rsvp6.
+
+config NET_CLS_FLOW
+	tristate "Flow classifier"
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets based on
+	  a configurable combination of packet keys. This is mostly useful
+	  in combination with SFQ.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_flow.
+
+config NET_EMATCH
+	bool "Extended Matches"
+	select NET_CLS
+	---help---
+	  Say Y here if you want to use extended matches on top of classifiers
+	  and select the extended matches below.
+
+	  Extended matches are small classification helpers not worth writing
+	  a separate classifier for.
+
+	  A recent version of the iproute2 package is required to use
+	  extended matches.
+
+config NET_EMATCH_STACK
+	int "Stack size"
+	depends on NET_EMATCH
+	default "32"
+	---help---
+	  Size of the local stack variable used while evaluating the tree of
+	  ematches. Limits the depth of the tree, i.e. the number of
+	  encapsulated precedences. Every level requires 4 bytes of additional
+	  stack space.
+
+config NET_EMATCH_CMP
+	tristate "Simple packet data comparison"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets based on
+	  simple packet data comparisons for 8, 16, and 32bit values.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_cmp.
+
+config NET_EMATCH_NBYTE
+	tristate "Multi byte comparison"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets based on
+	  multiple byte comparisons mainly useful for IPv6 address comparisons.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_nbyte.
+
+config NET_EMATCH_U32
+	tristate "U32 key"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets using
+	  the famous u32 key in combination with logic relations.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_u32.
+
+config NET_EMATCH_META
+	tristate "Metadata"
+	depends on NET_EMATCH
+	---help---
+	  Say Y here if you want to be able to classify packets based on
+	  metadata such as load average, netfilter attributes, socket
+	  attributes and routing decisions.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_meta.
+
+config NET_EMATCH_TEXT
+	tristate "Textsearch"
+	depends on NET_EMATCH
+	select TEXTSEARCH
+	select TEXTSEARCH_KMP
+	select TEXTSEARCH_BM
+	select TEXTSEARCH_FSM
+	---help---
+	  Say Y here if you want to be able to classify packets based on
+	  textsearch comparisons.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called em_text.
+
+config NET_CLS_ACT
+	bool "Actions"
+	---help---
+	  Say Y here if you want to use traffic control actions. Actions
+	  get attached to classifiers and are invoked after a successful
+	  classification. They are used to overwrite the classification
+	  result, instantly drop or redirect packets, etc.
+
+	  A recent version of the iproute2 package is required to use
+	  extended matches.
+
+config NET_ACT_POLICE
+	tristate "Traffic Policing"
+        depends on NET_CLS_ACT 
+        ---help---
+	  Say Y here if you want to do traffic policing, i.e. strict
+	  bandwidth limiting. This action replaces the existing policing
+	  module.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called police.
+
+config NET_ACT_GACT
+        tristate "Generic actions"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to take generic actions such as dropping and
+	  accepting packets.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called gact.
+
+config GACT_PROB
+        bool "Probability support"
+        depends on NET_ACT_GACT
+        ---help---
+	  Say Y here to use the generic action randomly or deterministically.
+
+config NET_ACT_MIRRED
+        tristate "Redirecting and Mirroring"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to allow packets to be mirrored or redirected to
+	  other devices.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called mirred.
+
+config NET_ACT_IPT
+        tristate "IPtables targets"
+        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+        ---help---
+	  Say Y here to be able to invoke iptables targets after successful
+	  classification.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called ipt.
+
+config NET_ACT_NAT
+        tristate "Stateless NAT"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to do stateless NAT on IPv4 packets.  You should use
+	  netfilter for NAT unless you know what you are doing.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called nat.
+
+config NET_ACT_PEDIT
+        tristate "Packet Editing"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here if you want to mangle the content of packets.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called pedit.
+
+config NET_ACT_SIMP
+        tristate "Simple Example (Debug)"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to add a simple action for demonstration purposes.
+	  It is meant as an example and for debugging purposes. It will
+	  print a configured policy string followed by the packet count
+	  to the console for every packet that passes by.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called simple.
+
+config NET_ACT_SKBEDIT
+        tristate "SKB Editing"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to change skb priority or queue_mapping settings.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called skbedit.
+
+config NET_CLS_IND
+	bool "Incoming device classification"
+	depends on NET_CLS_U32 || NET_CLS_FW
+	---help---
+	  Say Y here to extend the u32 and fw classifier to support
+	  classification based on the incoming device. This option is
+	  likely to disappear in favour of the metadata ematch.
+
+endif # NET_SCHED
+
+config NET_SCH_FIFO
+	bool
diff --git a/net/sched/Makefile b/net/sched/Makefile
new file mode 100644
index 0000000..e60c992
--- /dev/null
+++ b/net/sched/Makefile
@@ -0,0 +1,46 @@
+#
+# Makefile for the Linux Traffic Control Unit.
+#
+
+obj-y	:= sch_generic.o
+
+obj-$(CONFIG_NET_SCHED)		+= sch_api.o sch_blackhole.o
+obj-$(CONFIG_NET_CLS)		+= cls_api.o
+obj-$(CONFIG_NET_CLS_ACT)	+= act_api.o
+obj-$(CONFIG_NET_ACT_POLICE)	+= act_police.o
+obj-$(CONFIG_NET_ACT_GACT)	+= act_gact.o
+obj-$(CONFIG_NET_ACT_MIRRED)	+= act_mirred.o
+obj-$(CONFIG_NET_ACT_IPT)	+= act_ipt.o
+obj-$(CONFIG_NET_ACT_NAT)	+= act_nat.o
+obj-$(CONFIG_NET_ACT_PEDIT)	+= act_pedit.o
+obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
+obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
+obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
+obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
+obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
+obj-$(CONFIG_NET_SCH_HFSC)	+= sch_hfsc.o
+obj-$(CONFIG_NET_SCH_RED)	+= sch_red.o
+obj-$(CONFIG_NET_SCH_GRED)	+= sch_gred.o
+obj-$(CONFIG_NET_SCH_INGRESS)	+= sch_ingress.o 
+obj-$(CONFIG_NET_SCH_DSMARK)	+= sch_dsmark.o
+obj-$(CONFIG_NET_SCH_SFQ)	+= sch_sfq.o
+obj-$(CONFIG_NET_SCH_TBF)	+= sch_tbf.o
+obj-$(CONFIG_NET_SCH_TEQL)	+= sch_teql.o
+obj-$(CONFIG_NET_SCH_PRIO)	+= sch_prio.o
+obj-$(CONFIG_NET_SCH_MULTIQ)	+= sch_multiq.o
+obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
+obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
+obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
+obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
+obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
+obj-$(CONFIG_NET_CLS_RSVP)	+= cls_rsvp.o
+obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
+obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
+obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
+obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
+obj-$(CONFIG_NET_EMATCH)	+= ematch.o
+obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
+obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
+obj-$(CONFIG_NET_EMATCH_U32)	+= em_u32.o
+obj-$(CONFIG_NET_EMATCH_META)	+= em_meta.o
+obj-$(CONFIG_NET_EMATCH_TEXT)	+= em_text.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
new file mode 100644
index 0000000..8f457f1
--- /dev/null
+++ b/net/sched/act_api.c
@@ -0,0 +1,1110 @@
+/*
+ * net/sched/act_api.c	Packet action API.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Author:	Jamal Hadi Salim
+ *
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/err.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/sch_generic.h>
+#include <net/act_api.h>
+#include <net/netlink.h>
+
+void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+{
+	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
+	struct tcf_common **p1p;
+
+	for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
+		if (*p1p == p) {
+			write_lock_bh(hinfo->lock);
+			*p1p = p->tcfc_next;
+			write_unlock_bh(hinfo->lock);
+			gen_kill_estimator(&p->tcfc_bstats,
+					   &p->tcfc_rate_est);
+			kfree(p);
+			return;
+		}
+	}
+	WARN_ON(1);
+}
+EXPORT_SYMBOL(tcf_hash_destroy);
+
+int tcf_hash_release(struct tcf_common *p, int bind,
+		     struct tcf_hashinfo *hinfo)
+{
+	int ret = 0;
+
+	if (p) {
+		if (bind)
+			p->tcfc_bindcnt--;
+
+		p->tcfc_refcnt--;
+		if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
+			tcf_hash_destroy(p, hinfo);
+			ret = 1;
+		}
+	}
+	return ret;
+}
+EXPORT_SYMBOL(tcf_hash_release);
+
+static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
+			   struct tc_action *a, struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p;
+	int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
+	struct nlattr *nest;
+
+	read_lock_bh(hinfo->lock);
+
+	s_i = cb->args[0];
+
+	for (i = 0; i < (hinfo->hmask + 1); i++) {
+		p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
+
+		for (; p; p = p->tcfc_next) {
+			index++;
+			if (index < s_i)
+				continue;
+			a->priv = p;
+			a->order = n_i;
+
+			nest = nla_nest_start(skb, a->order);
+			if (nest == NULL)
+				goto nla_put_failure;
+			err = tcf_action_dump_1(skb, a, 0, 0);
+			if (err < 0) {
+				index--;
+				nlmsg_trim(skb, nest);
+				goto done;
+			}
+			nla_nest_end(skb, nest);
+			n_i++;
+			if (n_i >= TCA_ACT_MAX_PRIO)
+				goto done;
+		}
+	}
+done:
+	read_unlock_bh(hinfo->lock);
+	if (n_i)
+		cb->args[0] += n_i;
+	return n_i;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	goto done;
+}
+
+static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
+			  struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p, *s_p;
+	struct nlattr *nest;
+	int i= 0, n_i = 0;
+
+	nest = nla_nest_start(skb, a->order);
+	if (nest == NULL)
+		goto nla_put_failure;
+	NLA_PUT_STRING(skb, TCA_KIND, a->ops->kind);
+	for (i = 0; i < (hinfo->hmask + 1); i++) {
+		p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
+
+		while (p != NULL) {
+			s_p = p->tcfc_next;
+			if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
+				 module_put(a->ops->owner);
+			n_i++;
+			p = s_p;
+		}
+	}
+	NLA_PUT_U32(skb, TCA_FCNT, n_i);
+	nla_nest_end(skb, nest);
+
+	return n_i;
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EINVAL;
+}
+
+int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
+		       int type, struct tc_action *a)
+{
+	struct tcf_hashinfo *hinfo = a->ops->hinfo;
+
+	if (type == RTM_DELACTION) {
+		return tcf_del_walker(skb, a, hinfo);
+	} else if (type == RTM_GETACTION) {
+		return tcf_dump_walker(skb, cb, a, hinfo);
+	} else {
+		printk("tcf_generic_walker: unknown action %d\n", type);
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL(tcf_generic_walker);
+
+struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p;
+
+	read_lock_bh(hinfo->lock);
+	for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
+	     p = p->tcfc_next) {
+		if (p->tcfc_index == index)
+			break;
+	}
+	read_unlock_bh(hinfo->lock);
+
+	return p;
+}
+EXPORT_SYMBOL(tcf_hash_lookup);
+
+u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo)
+{
+	u32 val = *idx_gen;
+
+	do {
+		if (++val == 0)
+			val = 1;
+	} while (tcf_hash_lookup(val, hinfo));
+
+	return (*idx_gen = val);
+}
+EXPORT_SYMBOL(tcf_hash_new_index);
+
+int tcf_hash_search(struct tc_action *a, u32 index)
+{
+	struct tcf_hashinfo *hinfo = a->ops->hinfo;
+	struct tcf_common *p = tcf_hash_lookup(index, hinfo);
+
+	if (p) {
+		a->priv = p;
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(tcf_hash_search);
+
+struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
+				  struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p = NULL;
+	if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
+		if (bind)
+			p->tcfc_bindcnt++;
+		p->tcfc_refcnt++;
+		a->priv = p;
+	}
+	return p;
+}
+EXPORT_SYMBOL(tcf_hash_check);
+
+struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo)
+{
+	struct tcf_common *p = kzalloc(size, GFP_KERNEL);
+
+	if (unlikely(!p))
+		return p;
+	p->tcfc_refcnt = 1;
+	if (bind)
+		p->tcfc_bindcnt = 1;
+
+	spin_lock_init(&p->tcfc_lock);
+	p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
+	p->tcfc_tm.install = jiffies;
+	p->tcfc_tm.lastuse = jiffies;
+	if (est)
+		gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
+				  &p->tcfc_lock, est);
+	a->priv = (void *) p;
+	return p;
+}
+EXPORT_SYMBOL(tcf_hash_create);
+
+void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo)
+{
+	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
+
+	write_lock_bh(hinfo->lock);
+	p->tcfc_next = hinfo->htab[h];
+	hinfo->htab[h] = p;
+	write_unlock_bh(hinfo->lock);
+}
+EXPORT_SYMBOL(tcf_hash_insert);
+
+static struct tc_action_ops *act_base = NULL;
+static DEFINE_RWLOCK(act_mod_lock);
+
+int tcf_register_action(struct tc_action_ops *act)
+{
+	struct tc_action_ops *a, **ap;
+
+	write_lock(&act_mod_lock);
+	for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) {
+		if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
+			write_unlock(&act_mod_lock);
+			return -EEXIST;
+		}
+	}
+	act->next = NULL;
+	*ap = act;
+	write_unlock(&act_mod_lock);
+	return 0;
+}
+EXPORT_SYMBOL(tcf_register_action);
+
+int tcf_unregister_action(struct tc_action_ops *act)
+{
+	struct tc_action_ops *a, **ap;
+	int err = -ENOENT;
+
+	write_lock(&act_mod_lock);
+	for (ap = &act_base; (a = *ap) != NULL; ap = &a->next)
+		if (a == act)
+			break;
+	if (a) {
+		*ap = a->next;
+		a->next = NULL;
+		err = 0;
+	}
+	write_unlock(&act_mod_lock);
+	return err;
+}
+EXPORT_SYMBOL(tcf_unregister_action);
+
+/* lookup by name */
+static struct tc_action_ops *tc_lookup_action_n(char *kind)
+{
+	struct tc_action_ops *a = NULL;
+
+	if (kind) {
+		read_lock(&act_mod_lock);
+		for (a = act_base; a; a = a->next) {
+			if (strcmp(kind, a->kind) == 0) {
+				if (!try_module_get(a->owner)) {
+					read_unlock(&act_mod_lock);
+					return NULL;
+				}
+				break;
+			}
+		}
+		read_unlock(&act_mod_lock);
+	}
+	return a;
+}
+
+/* lookup by nlattr */
+static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
+{
+	struct tc_action_ops *a = NULL;
+
+	if (kind) {
+		read_lock(&act_mod_lock);
+		for (a = act_base; a; a = a->next) {
+			if (nla_strcmp(kind, a->kind) == 0) {
+				if (!try_module_get(a->owner)) {
+					read_unlock(&act_mod_lock);
+					return NULL;
+				}
+				break;
+			}
+		}
+		read_unlock(&act_mod_lock);
+	}
+	return a;
+}
+
+#if 0
+/* lookup by id */
+static struct tc_action_ops *tc_lookup_action_id(u32 type)
+{
+	struct tc_action_ops *a = NULL;
+
+	if (type) {
+		read_lock(&act_mod_lock);
+		for (a = act_base; a; a = a->next) {
+			if (a->type == type) {
+				if (!try_module_get(a->owner)) {
+					read_unlock(&act_mod_lock);
+					return NULL;
+				}
+				break;
+			}
+		}
+		read_unlock(&act_mod_lock);
+	}
+	return a;
+}
+#endif
+
+int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
+		    struct tcf_result *res)
+{
+	struct tc_action *a;
+	int ret = -1;
+
+	if (skb->tc_verd & TC_NCLS) {
+		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
+		ret = TC_ACT_OK;
+		goto exec_done;
+	}
+	while ((a = act) != NULL) {
+repeat:
+		if (a->ops && a->ops->act) {
+			ret = a->ops->act(skb, a, res);
+			if (TC_MUNGED & skb->tc_verd) {
+				/* copied already, allow trampling */
+				skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
+				skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
+			}
+			if (ret == TC_ACT_REPEAT)
+				goto repeat;	/* we need a ttl - JHS */
+			if (ret != TC_ACT_PIPE)
+				goto exec_done;
+		}
+		act = a->next;
+	}
+exec_done:
+	return ret;
+}
+EXPORT_SYMBOL(tcf_action_exec);
+
+void tcf_action_destroy(struct tc_action *act, int bind)
+{
+	struct tc_action *a;
+
+	for (a = act; a; a = act) {
+		if (a->ops && a->ops->cleanup) {
+			if (a->ops->cleanup(a, bind) == ACT_P_DELETED)
+				module_put(a->ops->owner);
+			act = act->next;
+			kfree(a);
+		} else { /*FIXME: Remove later - catch insertion bugs*/
+			printk("tcf_action_destroy: BUG? destroying NULL ops\n");
+			act = act->next;
+			kfree(a);
+		}
+	}
+}
+
+int
+tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	int err = -EINVAL;
+
+	if (a->ops == NULL || a->ops->dump == NULL)
+		return err;
+	return a->ops->dump(skb, a, bind, ref);
+}
+
+int
+tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	int err = -EINVAL;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+
+	if (a->ops == NULL || a->ops->dump == NULL)
+		return err;
+
+	NLA_PUT_STRING(skb, TCA_KIND, a->ops->kind);
+	if (tcf_action_copy_stats(skb, a, 0))
+		goto nla_put_failure;
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
+		nla_nest_end(skb, nest);
+		return err;
+	}
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+EXPORT_SYMBOL(tcf_action_dump_1);
+
+int
+tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
+{
+	struct tc_action *a;
+	int err = -EINVAL;
+	struct nlattr *nest;
+
+	while ((a = act) != NULL) {
+		act = a->next;
+		nest = nla_nest_start(skb, a->order);
+		if (nest == NULL)
+			goto nla_put_failure;
+		err = tcf_action_dump_1(skb, a, bind, ref);
+		if (err < 0)
+			goto errout;
+		nla_nest_end(skb, nest);
+	}
+
+	return 0;
+
+nla_put_failure:
+	err = -EINVAL;
+errout:
+	nla_nest_cancel(skb, nest);
+	return err;
+}
+
+struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
+				    char *name, int ovr, int bind)
+{
+	struct tc_action *a;
+	struct tc_action_ops *a_o;
+	char act_name[IFNAMSIZ];
+	struct nlattr *tb[TCA_ACT_MAX+1];
+	struct nlattr *kind;
+	int err;
+
+	if (name == NULL) {
+		err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
+		if (err < 0)
+			goto err_out;
+		err = -EINVAL;
+		kind = tb[TCA_ACT_KIND];
+		if (kind == NULL)
+			goto err_out;
+		if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
+			goto err_out;
+	} else {
+		err = -EINVAL;
+		if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
+			goto err_out;
+	}
+
+	a_o = tc_lookup_action_n(act_name);
+	if (a_o == NULL) {
+#ifdef CONFIG_MODULES
+		rtnl_unlock();
+		request_module("act_%s", act_name);
+		rtnl_lock();
+
+		a_o = tc_lookup_action_n(act_name);
+
+		/* We dropped the RTNL semaphore in order to
+		 * perform the module load.  So, even if we
+		 * succeeded in loading the module we have to
+		 * tell the caller to replay the request.  We
+		 * indicate this using -EAGAIN.
+		 */
+		if (a_o != NULL) {
+			err = -EAGAIN;
+			goto err_mod;
+		}
+#endif
+		err = -ENOENT;
+		goto err_out;
+	}
+
+	err = -ENOMEM;
+	a = kzalloc(sizeof(*a), GFP_KERNEL);
+	if (a == NULL)
+		goto err_mod;
+
+	/* backward compatibility for policer */
+	if (name == NULL)
+		err = a_o->init(tb[TCA_ACT_OPTIONS], est, a, ovr, bind);
+	else
+		err = a_o->init(nla, est, a, ovr, bind);
+	if (err < 0)
+		goto err_free;
+
+	/* module count goes up only when brand new policy is created
+	   if it exists and is only bound to in a_o->init() then
+	   ACT_P_CREATED is not returned (a zero is).
+	*/
+	if (err != ACT_P_CREATED)
+		module_put(a_o->owner);
+	a->ops = a_o;
+
+	return a;
+
+err_free:
+	kfree(a);
+err_mod:
+	module_put(a_o->owner);
+err_out:
+	return ERR_PTR(err);
+}
+
+struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
+				  char *name, int ovr, int bind)
+{
+	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+	struct tc_action *head = NULL, *act, *act_prev = NULL;
+	int err;
+	int i;
+
+	err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
+		act = tcf_action_init_1(tb[i], est, name, ovr, bind);
+		if (IS_ERR(act))
+			goto err;
+		act->order = i;
+
+		if (head == NULL)
+			head = act;
+		else
+			act_prev->next = act;
+		act_prev = act;
+	}
+	return head;
+
+err:
+	if (head != NULL)
+		tcf_action_destroy(head, bind);
+	return act;
+}
+
+int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
+			  int compat_mode)
+{
+	int err = 0;
+	struct gnet_dump d;
+	struct tcf_act_hdr *h = a->priv;
+
+	if (h == NULL)
+		goto errout;
+
+	/* compat_mode being true specifies a call that is supposed
+	 * to add additional backward compatiblity statistic TLVs.
+	 */
+	if (compat_mode) {
+		if (a->type == TCA_OLD_COMPAT)
+			err = gnet_stats_start_copy_compat(skb, 0,
+				TCA_STATS, TCA_XSTATS, &h->tcf_lock, &d);
+		else
+			return 0;
+	} else
+		err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
+					    &h->tcf_lock, &d);
+
+	if (err < 0)
+		goto errout;
+
+	if (a->ops != NULL && a->ops->get_stats != NULL)
+		if (a->ops->get_stats(skb, a) < 0)
+			goto errout;
+
+	if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 ||
+	    gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 ||
+	    gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0)
+		goto errout;
+
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto errout;
+
+	return 0;
+
+errout:
+	return -1;
+}
+
+static int
+tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
+	     u16 flags, int event, int bind, int ref)
+{
+	struct tcamsg *t;
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
+
+	t = NLMSG_DATA(nlh);
+	t->tca_family = AF_UNSPEC;
+	t->tca__pad1 = 0;
+	t->tca__pad2 = 0;
+
+	nest = nla_nest_start(skb, TCA_ACT_TAB);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (tcf_action_dump(skb, a, bind, ref) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+nla_put_failure:
+nlmsg_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int
+act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
+{
+	struct sk_buff *skb;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+	if (tca_get_fill(skb, a, pid, n->nlmsg_seq, 0, event, 0, 0) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return rtnl_unicast(skb, &init_net, pid);
+}
+
+static struct tc_action *
+tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
+{
+	struct nlattr *tb[TCA_ACT_MAX+1];
+	struct tc_action *a;
+	int index;
+	int err;
+
+	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
+	if (err < 0)
+		goto err_out;
+
+	err = -EINVAL;
+	if (tb[TCA_ACT_INDEX] == NULL ||
+	    nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
+		goto err_out;
+	index = nla_get_u32(tb[TCA_ACT_INDEX]);
+
+	err = -ENOMEM;
+	a = kzalloc(sizeof(struct tc_action), GFP_KERNEL);
+	if (a == NULL)
+		goto err_out;
+
+	err = -EINVAL;
+	a->ops = tc_lookup_action(tb[TCA_ACT_KIND]);
+	if (a->ops == NULL)
+		goto err_free;
+	if (a->ops->lookup == NULL)
+		goto err_mod;
+	err = -ENOENT;
+	if (a->ops->lookup(a, index) == 0)
+		goto err_mod;
+
+	module_put(a->ops->owner);
+	return a;
+
+err_mod:
+	module_put(a->ops->owner);
+err_free:
+	kfree(a);
+err_out:
+	return ERR_PTR(err);
+}
+
+static void cleanup_a(struct tc_action *act)
+{
+	struct tc_action *a;
+
+	for (a = act; a; a = act) {
+		act = a->next;
+		kfree(a);
+	}
+}
+
+static struct tc_action *create_a(int i)
+{
+	struct tc_action *act;
+
+	act = kzalloc(sizeof(*act), GFP_KERNEL);
+	if (act == NULL) {
+		printk("create_a: failed to alloc!\n");
+		return NULL;
+	}
+	act->order = i;
+	return act;
+}
+
+static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
+{
+	struct sk_buff *skb;
+	unsigned char *b;
+	struct nlmsghdr *nlh;
+	struct tcamsg *t;
+	struct netlink_callback dcb;
+	struct nlattr *nest;
+	struct nlattr *tb[TCA_ACT_MAX+1];
+	struct nlattr *kind;
+	struct tc_action *a = create_a(0);
+	int err = -ENOMEM;
+
+	if (a == NULL) {
+		printk("tca_action_flush: couldnt create tc_action\n");
+		return err;
+	}
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb) {
+		printk("tca_action_flush: failed skb alloc\n");
+		kfree(a);
+		return err;
+	}
+
+	b = skb_tail_pointer(skb);
+
+	err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
+	if (err < 0)
+		goto err_out;
+
+	err = -EINVAL;
+	kind = tb[TCA_ACT_KIND];
+	a->ops = tc_lookup_action(kind);
+	if (a->ops == NULL)
+		goto err_out;
+
+	nlh = NLMSG_PUT(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t));
+	t = NLMSG_DATA(nlh);
+	t->tca_family = AF_UNSPEC;
+	t->tca__pad1 = 0;
+	t->tca__pad2 = 0;
+
+	nest = nla_nest_start(skb, TCA_ACT_TAB);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	err = a->ops->walk(skb, &dcb, RTM_DELACTION, a);
+	if (err < 0)
+		goto nla_put_failure;
+	if (err == 0)
+		goto noflush_out;
+
+	nla_nest_end(skb, nest);
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	nlh->nlmsg_flags |= NLM_F_ROOT;
+	module_put(a->ops->owner);
+	kfree(a);
+	err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	if (err > 0)
+		return 0;
+
+	return err;
+
+nla_put_failure:
+nlmsg_failure:
+	module_put(a->ops->owner);
+err_out:
+noflush_out:
+	kfree_skb(skb);
+	kfree(a);
+	return err;
+}
+
+static int
+tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
+{
+	int i, ret;
+	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+	struct tc_action *head = NULL, *act, *act_prev = NULL;
+
+	ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
+	if (ret < 0)
+		return ret;
+
+	if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
+		if (tb[1] != NULL)
+			return tca_action_flush(tb[1], n, pid);
+		else
+			return -EINVAL;
+	}
+
+	for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
+		act = tcf_action_get_1(tb[i], n, pid);
+		if (IS_ERR(act)) {
+			ret = PTR_ERR(act);
+			goto err;
+		}
+		act->order = i;
+
+		if (head == NULL)
+			head = act;
+		else
+			act_prev->next = act;
+		act_prev = act;
+	}
+
+	if (event == RTM_GETACTION)
+		ret = act_get_notify(pid, n, head, event);
+	else { /* delete */
+		struct sk_buff *skb;
+
+		skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+		if (!skb) {
+			ret = -ENOBUFS;
+			goto err;
+		}
+
+		if (tca_get_fill(skb, head, pid, n->nlmsg_seq, 0, event,
+				 0, 1) <= 0) {
+			kfree_skb(skb);
+			ret = -EINVAL;
+			goto err;
+		}
+
+		/* now do the delete */
+		tcf_action_destroy(head, 0);
+		ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
+				     n->nlmsg_flags&NLM_F_ECHO);
+		if (ret > 0)
+			return 0;
+		return ret;
+	}
+err:
+	cleanup_a(head);
+	return ret;
+}
+
+static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
+			  u16 flags)
+{
+	struct tcamsg *t;
+	struct nlmsghdr *nlh;
+	struct sk_buff *skb;
+	struct nlattr *nest;
+	unsigned char *b;
+	int err = 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	b = skb_tail_pointer(skb);
+
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
+	t = NLMSG_DATA(nlh);
+	t->tca_family = AF_UNSPEC;
+	t->tca__pad1 = 0;
+	t->tca__pad2 = 0;
+
+	nest = nla_nest_start(skb, TCA_ACT_TAB);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (tcf_action_dump(skb, a, 0, 0) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	NETLINK_CB(skb).dst_group = RTNLGRP_TC;
+
+	err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+	if (err > 0)
+		err = 0;
+	return err;
+
+nla_put_failure:
+nlmsg_failure:
+	kfree_skb(skb);
+	return -1;
+}
+
+
+static int
+tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr)
+{
+	int ret = 0;
+	struct tc_action *act;
+	struct tc_action *a;
+	u32 seq = n->nlmsg_seq;
+
+	act = tcf_action_init(nla, NULL, NULL, ovr, 0);
+	if (act == NULL)
+		goto done;
+	if (IS_ERR(act)) {
+		ret = PTR_ERR(act);
+		goto done;
+	}
+
+	/* dump then free all the actions after update; inserted policy
+	 * stays intact
+	 * */
+	ret = tcf_add_notify(act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
+	for (a = act; a; a = act) {
+		act = a->next;
+		kfree(a);
+	}
+done:
+	return ret;
+}
+
+static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tca[TCA_ACT_MAX + 1];
+	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+	int ret = 0, ovr = 0;
+
+	if (net != &init_net)
+		return -EINVAL;
+
+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
+	if (ret < 0)
+		return ret;
+
+	if (tca[TCA_ACT_TAB] == NULL) {
+		printk("tc_ctl_action: received NO action attribs\n");
+		return -EINVAL;
+	}
+
+	/* n->nlmsg_flags&NLM_F_CREATE
+	 * */
+	switch (n->nlmsg_type) {
+	case RTM_NEWACTION:
+		/* we are going to assume all other flags
+		 * imply create only if it doesnt exist
+		 * Note that CREATE | EXCL implies that
+		 * but since we want avoid ambiguity (eg when flags
+		 * is zero) then just set this
+		 */
+		if (n->nlmsg_flags&NLM_F_REPLACE)
+			ovr = 1;
+replay:
+		ret = tcf_action_add(tca[TCA_ACT_TAB], n, pid, ovr);
+		if (ret == -EAGAIN)
+			goto replay;
+		break;
+	case RTM_DELACTION:
+		ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_DELACTION);
+		break;
+	case RTM_GETACTION:
+		ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_GETACTION);
+		break;
+	default:
+		BUG();
+	}
+
+	return ret;
+}
+
+static struct nlattr *
+find_dump_kind(struct nlmsghdr *n)
+{
+	struct nlattr *tb1, *tb2[TCA_ACT_MAX+1];
+	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
+	struct nlattr *nla[TCAA_MAX + 1];
+	struct nlattr *kind;
+
+	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, NULL) < 0)
+		return NULL;
+	tb1 = nla[TCA_ACT_TAB];
+	if (tb1 == NULL)
+		return NULL;
+
+	if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1),
+		      NLMSG_ALIGN(nla_len(tb1)), NULL) < 0)
+		return NULL;
+
+	if (tb[1] == NULL)
+		return NULL;
+	if (nla_parse(tb2, TCA_ACT_MAX, nla_data(tb[1]),
+		      nla_len(tb[1]), NULL) < 0)
+		return NULL;
+	kind = tb2[TCA_ACT_KIND];
+
+	return kind;
+}
+
+static int
+tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlmsghdr *nlh;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+	struct tc_action_ops *a_o;
+	struct tc_action a;
+	int ret = 0;
+	struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
+	struct nlattr *kind = find_dump_kind(cb->nlh);
+
+	if (net != &init_net)
+		return 0;
+
+	if (kind == NULL) {
+		printk("tc_dump_action: action bad kind\n");
+		return 0;
+	}
+
+	a_o = tc_lookup_action(kind);
+	if (a_o == NULL) {
+		return 0;
+	}
+
+	memset(&a, 0, sizeof(struct tc_action));
+	a.ops = a_o;
+
+	if (a_o->walk == NULL) {
+		printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind);
+		goto nla_put_failure;
+	}
+
+	nlh = NLMSG_PUT(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
+			cb->nlh->nlmsg_type, sizeof(*t));
+	t = NLMSG_DATA(nlh);
+	t->tca_family = AF_UNSPEC;
+	t->tca__pad1 = 0;
+	t->tca__pad2 = 0;
+
+	nest = nla_nest_start(skb, TCA_ACT_TAB);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	ret = a_o->walk(skb, cb, RTM_GETACTION, &a);
+	if (ret < 0)
+		goto nla_put_failure;
+
+	if (ret > 0) {
+		nla_nest_end(skb, nest);
+		ret = skb->len;
+	} else
+		nla_nest_cancel(skb, nest);
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	if (NETLINK_CB(cb->skb).pid && ret)
+		nlh->nlmsg_flags |= NLM_F_MULTI;
+	module_put(a_o->owner);
+	return skb->len;
+
+nla_put_failure:
+nlmsg_failure:
+	module_put(a_o->owner);
+	nlmsg_trim(skb, b);
+	return skb->len;
+}
+
+static int __init tc_action_init(void)
+{
+	rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action);
+
+	return 0;
+}
+
+subsys_initcall(tc_action_init);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
new file mode 100644
index 0000000..ac04289
--- /dev/null
+++ b/net/sched/act_gact.c
@@ -0,0 +1,218 @@
+/*
+ * net/sched/gact.c	Generic actions
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * copyright 	Jamal Hadi Salim (2002-4)
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_gact.h>
+#include <net/tc_act/tc_gact.h>
+
+#define GACT_TAB_MASK	15
+static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1];
+static u32 gact_idx_gen;
+static DEFINE_RWLOCK(gact_lock);
+
+static struct tcf_hashinfo gact_hash_info = {
+	.htab	=	tcf_gact_ht,
+	.hmask	=	GACT_TAB_MASK,
+	.lock	=	&gact_lock,
+};
+
+#ifdef CONFIG_GACT_PROB
+static int gact_net_rand(struct tcf_gact *gact)
+{
+	if (!gact->tcfg_pval || net_random() % gact->tcfg_pval)
+		return gact->tcf_action;
+	return gact->tcfg_paction;
+}
+
+static int gact_determ(struct tcf_gact *gact)
+{
+	if (!gact->tcfg_pval || gact->tcf_bstats.packets % gact->tcfg_pval)
+		return gact->tcf_action;
+	return gact->tcfg_paction;
+}
+
+typedef int (*g_rand)(struct tcf_gact *gact);
+static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
+#endif /* CONFIG_GACT_PROB */
+
+static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
+	[TCA_GACT_PARMS]	= { .len = sizeof(struct tc_gact) },
+	[TCA_GACT_PROB]		= { .len = sizeof(struct tc_gact_p) },
+};
+
+static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
+			 struct tc_action *a, int ovr, int bind)
+{
+	struct nlattr *tb[TCA_GACT_MAX + 1];
+	struct tc_gact *parm;
+	struct tcf_gact *gact;
+	struct tcf_common *pc;
+	int ret = 0;
+	int err;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_GACT_MAX, nla, gact_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_GACT_PARMS] == NULL)
+		return -EINVAL;
+	parm = nla_data(tb[TCA_GACT_PARMS]);
+
+#ifndef CONFIG_GACT_PROB
+	if (tb[TCA_GACT_PROB] != NULL)
+		return -EOPNOTSUPP;
+#endif
+
+	pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
+				     bind, &gact_idx_gen, &gact_hash_info);
+		if (unlikely(!pc))
+			return -ENOMEM;
+		ret = ACT_P_CREATED;
+	} else {
+		if (!ovr) {
+			tcf_hash_release(pc, bind, &gact_hash_info);
+			return -EEXIST;
+		}
+	}
+
+	gact = to_gact(pc);
+
+	spin_lock_bh(&gact->tcf_lock);
+	gact->tcf_action = parm->action;
+#ifdef CONFIG_GACT_PROB
+	if (tb[TCA_GACT_PROB] != NULL) {
+		struct tc_gact_p *p_parm = nla_data(tb[TCA_GACT_PROB]);
+		gact->tcfg_paction = p_parm->paction;
+		gact->tcfg_pval    = p_parm->pval;
+		gact->tcfg_ptype   = p_parm->ptype;
+	}
+#endif
+	spin_unlock_bh(&gact->tcf_lock);
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &gact_hash_info);
+	return ret;
+}
+
+static int tcf_gact_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_gact *gact = a->priv;
+
+	if (gact)
+		return tcf_hash_release(&gact->common, bind, &gact_hash_info);
+	return 0;
+}
+
+static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+{
+	struct tcf_gact *gact = a->priv;
+	int action = TC_ACT_SHOT;
+
+	spin_lock(&gact->tcf_lock);
+#ifdef CONFIG_GACT_PROB
+	if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL)
+		action = gact_rand[gact->tcfg_ptype](gact);
+	else
+		action = gact->tcf_action;
+#else
+	action = gact->tcf_action;
+#endif
+	gact->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	gact->tcf_bstats.packets++;
+	if (action == TC_ACT_SHOT)
+		gact->tcf_qstats.drops++;
+	gact->tcf_tm.lastuse = jiffies;
+	spin_unlock(&gact->tcf_lock);
+
+	return action;
+}
+
+static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_gact opt;
+	struct tcf_gact *gact = a->priv;
+	struct tcf_t t;
+
+	opt.index = gact->tcf_index;
+	opt.refcnt = gact->tcf_refcnt - ref;
+	opt.bindcnt = gact->tcf_bindcnt - bind;
+	opt.action = gact->tcf_action;
+	NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
+#ifdef CONFIG_GACT_PROB
+	if (gact->tcfg_ptype) {
+		struct tc_gact_p p_opt;
+		p_opt.paction = gact->tcfg_paction;
+		p_opt.pval = gact->tcfg_pval;
+		p_opt.ptype = gact->tcfg_ptype;
+		NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
+	}
+#endif
+	t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
+	NLA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tc_action_ops act_gact_ops = {
+	.kind		=	"gact",
+	.hinfo		=	&gact_hash_info,
+	.type		=	TCA_ACT_GACT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_gact,
+	.dump		=	tcf_gact_dump,
+	.cleanup	=	tcf_gact_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_gact_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
+MODULE_DESCRIPTION("Generic Classifier actions");
+MODULE_LICENSE("GPL");
+
+static int __init gact_init_module(void)
+{
+#ifdef CONFIG_GACT_PROB
+	printk("GACT probability on\n");
+#else
+	printk("GACT probability NOT on\n");
+#endif
+	return tcf_register_action(&act_gact_ops);
+}
+
+static void __exit gact_cleanup_module(void)
+{
+	tcf_unregister_action(&act_gact_ops);
+}
+
+module_init(gact_init_module);
+module_exit(gact_cleanup_module);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
new file mode 100644
index 0000000..0453d79
--- /dev/null
+++ b/net/sched/act_ipt.c
@@ -0,0 +1,314 @@
+/*
+ * net/sched/ipt.c	iptables target interface
+ *
+ *TODO: Add other tables. For now we only support the ipv4 table targets
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Copyright:	Jamal Hadi Salim (2002-4)
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_ipt.h>
+#include <net/tc_act/tc_ipt.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+
+#define IPT_TAB_MASK     15
+static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1];
+static u32 ipt_idx_gen;
+static DEFINE_RWLOCK(ipt_lock);
+
+static struct tcf_hashinfo ipt_hash_info = {
+	.htab	=	tcf_ipt_ht,
+	.hmask	=	IPT_TAB_MASK,
+	.lock	=	&ipt_lock,
+};
+
+static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
+{
+	struct xt_tgchk_param par;
+	struct xt_target *target;
+	int ret = 0;
+
+	target = xt_request_find_target(AF_INET, t->u.user.name,
+					t->u.user.revision);
+	if (!target)
+		return -ENOENT;
+
+	t->u.kernel.target = target;
+	par.table     = table;
+	par.entryinfo = NULL;
+	par.target    = target;
+	par.targinfo  = t->data;
+	par.hook_mask = hook;
+	par.family    = NFPROTO_IPV4;
+
+	ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
+	if (ret < 0) {
+		module_put(t->u.kernel.target->me);
+		return ret;
+	}
+	return 0;
+}
+
+static void ipt_destroy_target(struct ipt_entry_target *t)
+{
+	struct xt_tgdtor_param par = {
+		.target   = t->u.kernel.target,
+		.targinfo = t->data,
+	};
+	if (par.target->destroy != NULL)
+		par.target->destroy(&par);
+	module_put(par.target->me);
+}
+
+static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
+{
+	int ret = 0;
+	if (ipt) {
+		if (bind)
+			ipt->tcf_bindcnt--;
+		ipt->tcf_refcnt--;
+		if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) {
+			ipt_destroy_target(ipt->tcfi_t);
+			kfree(ipt->tcfi_tname);
+			kfree(ipt->tcfi_t);
+			tcf_hash_destroy(&ipt->common, &ipt_hash_info);
+			ret = ACT_P_DELETED;
+		}
+	}
+	return ret;
+}
+
+static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
+	[TCA_IPT_TABLE]	= { .type = NLA_STRING, .len = IFNAMSIZ },
+	[TCA_IPT_HOOK]	= { .type = NLA_U32 },
+	[TCA_IPT_INDEX]	= { .type = NLA_U32 },
+	[TCA_IPT_TARG]	= { .len = sizeof(struct ipt_entry_target) },
+};
+
+static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
+			struct tc_action *a, int ovr, int bind)
+{
+	struct nlattr *tb[TCA_IPT_MAX + 1];
+	struct tcf_ipt *ipt;
+	struct tcf_common *pc;
+	struct ipt_entry_target *td, *t;
+	char *tname;
+	int ret = 0, err;
+	u32 hook = 0;
+	u32 index = 0;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_IPT_MAX, nla, ipt_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_IPT_HOOK] == NULL)
+		return -EINVAL;
+	if (tb[TCA_IPT_TARG] == NULL)
+		return -EINVAL;
+
+	td = (struct ipt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
+	if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
+		return -EINVAL;
+
+	if (tb[TCA_IPT_INDEX] != NULL)
+		index = nla_get_u32(tb[TCA_IPT_INDEX]);
+
+	pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
+				     &ipt_idx_gen, &ipt_hash_info);
+		if (unlikely(!pc))
+			return -ENOMEM;
+		ret = ACT_P_CREATED;
+	} else {
+		if (!ovr) {
+			tcf_ipt_release(to_ipt(pc), bind);
+			return -EEXIST;
+		}
+	}
+	ipt = to_ipt(pc);
+
+	hook = nla_get_u32(tb[TCA_IPT_HOOK]);
+
+	err = -ENOMEM;
+	tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
+	if (unlikely(!tname))
+		goto err1;
+	if (tb[TCA_IPT_TABLE] == NULL ||
+	    nla_strlcpy(tname, tb[TCA_IPT_TABLE], IFNAMSIZ) >= IFNAMSIZ)
+		strcpy(tname, "mangle");
+
+	t = kmemdup(td, td->u.target_size, GFP_KERNEL);
+	if (unlikely(!t))
+		goto err2;
+
+	if ((err = ipt_init_target(t, tname, hook)) < 0)
+		goto err3;
+
+	spin_lock_bh(&ipt->tcf_lock);
+	if (ret != ACT_P_CREATED) {
+		ipt_destroy_target(ipt->tcfi_t);
+		kfree(ipt->tcfi_tname);
+		kfree(ipt->tcfi_t);
+	}
+	ipt->tcfi_tname = tname;
+	ipt->tcfi_t     = t;
+	ipt->tcfi_hook  = hook;
+	spin_unlock_bh(&ipt->tcf_lock);
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &ipt_hash_info);
+	return ret;
+
+err3:
+	kfree(t);
+err2:
+	kfree(tname);
+err1:
+	kfree(pc);
+	return err;
+}
+
+static int tcf_ipt_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_ipt *ipt = a->priv;
+	return tcf_ipt_release(ipt, bind);
+}
+
+static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
+		   struct tcf_result *res)
+{
+	int ret = 0, result = 0;
+	struct tcf_ipt *ipt = a->priv;
+	struct xt_target_param par;
+
+	if (skb_cloned(skb)) {
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			return TC_ACT_UNSPEC;
+	}
+
+	spin_lock(&ipt->tcf_lock);
+
+	ipt->tcf_tm.lastuse = jiffies;
+	ipt->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	ipt->tcf_bstats.packets++;
+
+	/* yes, we have to worry about both in and out dev
+	 worry later - danger - this API seems to have changed
+	 from earlier kernels */
+	par.in       = skb->dev;
+	par.out      = NULL;
+	par.hooknum  = ipt->tcfi_hook;
+	par.target   = ipt->tcfi_t->u.kernel.target;
+	par.targinfo = ipt->tcfi_t->data;
+	ret = par.target->target(skb, &par);
+
+	switch (ret) {
+	case NF_ACCEPT:
+		result = TC_ACT_OK;
+		break;
+	case NF_DROP:
+		result = TC_ACT_SHOT;
+		ipt->tcf_qstats.drops++;
+		break;
+	case IPT_CONTINUE:
+		result = TC_ACT_PIPE;
+		break;
+	default:
+		if (net_ratelimit())
+			printk("Bogus netfilter code %d assume ACCEPT\n", ret);
+		result = TC_POLICE_OK;
+		break;
+	}
+	spin_unlock(&ipt->tcf_lock);
+	return result;
+
+}
+
+static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_ipt *ipt = a->priv;
+	struct ipt_entry_target *t;
+	struct tcf_t tm;
+	struct tc_cnt c;
+
+	/* for simple targets kernel size == user size
+	** user name = target name
+	** for foolproof you need to not assume this
+	*/
+
+	t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
+	if (unlikely(!t))
+		goto nla_put_failure;
+
+	c.bindcnt = ipt->tcf_bindcnt - bind;
+	c.refcnt = ipt->tcf_refcnt - ref;
+	strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
+
+	NLA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
+	NLA_PUT_U32(skb, TCA_IPT_INDEX, ipt->tcf_index);
+	NLA_PUT_U32(skb, TCA_IPT_HOOK, ipt->tcfi_hook);
+	NLA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
+	NLA_PUT_STRING(skb, TCA_IPT_TABLE, ipt->tcfi_tname);
+	tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
+	tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
+	tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
+	NLA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
+	kfree(t);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	kfree(t);
+	return -1;
+}
+
+static struct tc_action_ops act_ipt_ops = {
+	.kind		=	"ipt",
+	.hinfo		=	&ipt_hash_info,
+	.type		=	TCA_ACT_IPT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_ipt,
+	.dump		=	tcf_ipt_dump,
+	.cleanup	=	tcf_ipt_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_ipt_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
+MODULE_DESCRIPTION("Iptables target actions");
+MODULE_LICENSE("GPL");
+
+static int __init ipt_init_module(void)
+{
+	return tcf_register_action(&act_ipt_ops);
+}
+
+static void __exit ipt_cleanup_module(void)
+{
+	tcf_unregister_action(&act_ipt_ops);
+}
+
+module_init(ipt_init_module);
+module_exit(ipt_cleanup_module);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
new file mode 100644
index 0000000..70341c0
--- /dev/null
+++ b/net/sched/act_mirred.c
@@ -0,0 +1,259 @@
+/*
+ * net/sched/mirred.c	packet mirroring and redirect actions
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Jamal Hadi Salim (2002-4)
+ *
+ * TODO: Add ingress support (and socket redirect support)
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <net/net_namespace.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_mirred.h>
+
+#include <linux/if_arp.h>
+
+#define MIRRED_TAB_MASK     7
+static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1];
+static u32 mirred_idx_gen;
+static DEFINE_RWLOCK(mirred_lock);
+
+static struct tcf_hashinfo mirred_hash_info = {
+	.htab	=	tcf_mirred_ht,
+	.hmask	=	MIRRED_TAB_MASK,
+	.lock	=	&mirred_lock,
+};
+
+static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
+{
+	if (m) {
+		if (bind)
+			m->tcf_bindcnt--;
+		m->tcf_refcnt--;
+		if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
+			dev_put(m->tcfm_dev);
+			tcf_hash_destroy(&m->common, &mirred_hash_info);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
+	[TCA_MIRRED_PARMS]	= { .len = sizeof(struct tc_mirred) },
+};
+
+static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
+			   struct tc_action *a, int ovr, int bind)
+{
+	struct nlattr *tb[TCA_MIRRED_MAX + 1];
+	struct tc_mirred *parm;
+	struct tcf_mirred *m;
+	struct tcf_common *pc;
+	struct net_device *dev = NULL;
+	int ret = 0, err;
+	int ok_push = 0;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_MIRRED_MAX, nla, mirred_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_MIRRED_PARMS] == NULL)
+		return -EINVAL;
+	parm = nla_data(tb[TCA_MIRRED_PARMS]);
+
+	if (parm->ifindex) {
+		dev = __dev_get_by_index(&init_net, parm->ifindex);
+		if (dev == NULL)
+			return -ENODEV;
+		switch (dev->type) {
+			case ARPHRD_TUNNEL:
+			case ARPHRD_TUNNEL6:
+			case ARPHRD_SIT:
+			case ARPHRD_IPGRE:
+			case ARPHRD_VOID:
+			case ARPHRD_NONE:
+				ok_push = 0;
+				break;
+			default:
+				ok_push = 1;
+				break;
+		}
+	}
+
+	pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info);
+	if (!pc) {
+		if (!parm->ifindex)
+			return -EINVAL;
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind,
+				     &mirred_idx_gen, &mirred_hash_info);
+		if (unlikely(!pc))
+			return -ENOMEM;
+		ret = ACT_P_CREATED;
+	} else {
+		if (!ovr) {
+			tcf_mirred_release(to_mirred(pc), bind);
+			return -EEXIST;
+		}
+	}
+	m = to_mirred(pc);
+
+	spin_lock_bh(&m->tcf_lock);
+	m->tcf_action = parm->action;
+	m->tcfm_eaction = parm->eaction;
+	if (parm->ifindex) {
+		m->tcfm_ifindex = parm->ifindex;
+		if (ret != ACT_P_CREATED)
+			dev_put(m->tcfm_dev);
+		m->tcfm_dev = dev;
+		dev_hold(dev);
+		m->tcfm_ok_push = ok_push;
+	}
+	spin_unlock_bh(&m->tcf_lock);
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &mirred_hash_info);
+
+	return ret;
+}
+
+static int tcf_mirred_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_mirred *m = a->priv;
+
+	if (m)
+		return tcf_mirred_release(m, bind);
+	return 0;
+}
+
+static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
+		      struct tcf_result *res)
+{
+	struct tcf_mirred *m = a->priv;
+	struct net_device *dev;
+	struct sk_buff *skb2 = NULL;
+	u32 at = G_TC_AT(skb->tc_verd);
+
+	spin_lock(&m->tcf_lock);
+
+	dev = m->tcfm_dev;
+	m->tcf_tm.lastuse = jiffies;
+
+	if (!(dev->flags&IFF_UP) ) {
+		if (net_ratelimit())
+			printk("mirred to Houston: device %s is gone!\n",
+			       dev->name);
+bad_mirred:
+		if (skb2 != NULL)
+			kfree_skb(skb2);
+		m->tcf_qstats.overlimits++;
+		m->tcf_bstats.bytes += qdisc_pkt_len(skb);
+		m->tcf_bstats.packets++;
+		spin_unlock(&m->tcf_lock);
+		/* should we be asking for packet to be dropped?
+		 * may make sense for redirect case only
+		*/
+		return TC_ACT_SHOT;
+	}
+
+	skb2 = skb_act_clone(skb, GFP_ATOMIC);
+	if (skb2 == NULL)
+		goto bad_mirred;
+	if (m->tcfm_eaction != TCA_EGRESS_MIRROR &&
+	    m->tcfm_eaction != TCA_EGRESS_REDIR) {
+		if (net_ratelimit())
+			printk("tcf_mirred unknown action %d\n",
+			       m->tcfm_eaction);
+		goto bad_mirred;
+	}
+
+	m->tcf_bstats.bytes += qdisc_pkt_len(skb2);
+	m->tcf_bstats.packets++;
+	if (!(at & AT_EGRESS))
+		if (m->tcfm_ok_push)
+			skb_push(skb2, skb2->dev->hard_header_len);
+
+	/* mirror is always swallowed */
+	if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
+		skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
+
+	skb2->dev = dev;
+	skb2->iif = skb->dev->ifindex;
+	dev_queue_xmit(skb2);
+	spin_unlock(&m->tcf_lock);
+	return m->tcf_action;
+}
+
+static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_mirred *m = a->priv;
+	struct tc_mirred opt;
+	struct tcf_t t;
+
+	opt.index = m->tcf_index;
+	opt.action = m->tcf_action;
+	opt.refcnt = m->tcf_refcnt - ref;
+	opt.bindcnt = m->tcf_bindcnt - bind;
+	opt.eaction = m->tcfm_eaction;
+	opt.ifindex = m->tcfm_ifindex;
+	NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
+	t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
+	NLA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tc_action_ops act_mirred_ops = {
+	.kind		=	"mirred",
+	.hinfo		=	&mirred_hash_info,
+	.type		=	TCA_ACT_MIRRED,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_mirred,
+	.dump		=	tcf_mirred_dump,
+	.cleanup	=	tcf_mirred_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_mirred_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002)");
+MODULE_DESCRIPTION("Device Mirror/redirect actions");
+MODULE_LICENSE("GPL");
+
+static int __init mirred_init_module(void)
+{
+	printk("Mirror/redirect action on\n");
+	return tcf_register_action(&act_mirred_ops);
+}
+
+static void __exit mirred_cleanup_module(void)
+{
+	tcf_unregister_action(&act_mirred_ops);
+}
+
+module_init(mirred_init_module);
+module_exit(mirred_cleanup_module);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
new file mode 100644
index 0000000..7b39ed4
--- /dev/null
+++ b/net/sched/act_nat.c
@@ -0,0 +1,329 @@
+/*
+ * Stateless NAT actions
+ *
+ * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/tc_act/tc_nat.h>
+#include <net/act_api.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/netlink.h>
+#include <net/tc_act/tc_nat.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+
+#define NAT_TAB_MASK	15
+static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1];
+static u32 nat_idx_gen;
+static DEFINE_RWLOCK(nat_lock);
+
+static struct tcf_hashinfo nat_hash_info = {
+	.htab	=	tcf_nat_ht,
+	.hmask	=	NAT_TAB_MASK,
+	.lock	=	&nat_lock,
+};
+
+static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
+	[TCA_NAT_PARMS]	= { .len = sizeof(struct tc_nat) },
+};
+
+static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
+			struct tc_action *a, int ovr, int bind)
+{
+	struct nlattr *tb[TCA_NAT_MAX + 1];
+	struct tc_nat *parm;
+	int ret = 0, err;
+	struct tcf_nat *p;
+	struct tcf_common *pc;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_NAT_PARMS] == NULL)
+		return -EINVAL;
+	parm = nla_data(tb[TCA_NAT_PARMS]);
+
+	pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
+				     &nat_idx_gen, &nat_hash_info);
+		if (unlikely(!pc))
+			return -ENOMEM;
+		p = to_tcf_nat(pc);
+		ret = ACT_P_CREATED;
+	} else {
+		p = to_tcf_nat(pc);
+		if (!ovr) {
+			tcf_hash_release(pc, bind, &nat_hash_info);
+			return -EEXIST;
+		}
+	}
+
+	spin_lock_bh(&p->tcf_lock);
+	p->old_addr = parm->old_addr;
+	p->new_addr = parm->new_addr;
+	p->mask = parm->mask;
+	p->flags = parm->flags;
+
+	p->tcf_action = parm->action;
+	spin_unlock_bh(&p->tcf_lock);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &nat_hash_info);
+
+	return ret;
+}
+
+static int tcf_nat_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_nat *p = a->priv;
+
+	return tcf_hash_release(&p->common, bind, &nat_hash_info);
+}
+
+static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
+		   struct tcf_result *res)
+{
+	struct tcf_nat *p = a->priv;
+	struct iphdr *iph;
+	__be32 old_addr;
+	__be32 new_addr;
+	__be32 mask;
+	__be32 addr;
+	int egress;
+	int action;
+	int ihl;
+
+	spin_lock(&p->tcf_lock);
+
+	p->tcf_tm.lastuse = jiffies;
+	old_addr = p->old_addr;
+	new_addr = p->new_addr;
+	mask = p->mask;
+	egress = p->flags & TCA_NAT_FLAG_EGRESS;
+	action = p->tcf_action;
+
+	p->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	p->tcf_bstats.packets++;
+
+	spin_unlock(&p->tcf_lock);
+
+	if (unlikely(action == TC_ACT_SHOT))
+		goto drop;
+
+	if (!pskb_may_pull(skb, sizeof(*iph)))
+		goto drop;
+
+	iph = ip_hdr(skb);
+
+	if (egress)
+		addr = iph->saddr;
+	else
+		addr = iph->daddr;
+
+	if (!((old_addr ^ addr) & mask)) {
+		if (skb_cloned(skb) &&
+		    !skb_clone_writable(skb, sizeof(*iph)) &&
+		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			goto drop;
+
+		new_addr &= mask;
+		new_addr |= addr & ~mask;
+
+		/* Rewrite IP header */
+		iph = ip_hdr(skb);
+		if (egress)
+			iph->saddr = new_addr;
+		else
+			iph->daddr = new_addr;
+
+		csum_replace4(&iph->check, addr, new_addr);
+	}
+
+	ihl = iph->ihl * 4;
+
+	/* It would be nice to share code with stateful NAT. */
+	switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
+	case IPPROTO_TCP:
+	{
+		struct tcphdr *tcph;
+
+		if (!pskb_may_pull(skb, ihl + sizeof(*tcph)) ||
+		    (skb_cloned(skb) &&
+		     !skb_clone_writable(skb, ihl + sizeof(*tcph)) &&
+		     pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+			goto drop;
+
+		tcph = (void *)(skb_network_header(skb) + ihl);
+		inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1);
+		break;
+	}
+	case IPPROTO_UDP:
+	{
+		struct udphdr *udph;
+
+		if (!pskb_may_pull(skb, ihl + sizeof(*udph)) ||
+		    (skb_cloned(skb) &&
+		     !skb_clone_writable(skb, ihl + sizeof(*udph)) &&
+		     pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+			goto drop;
+
+		udph = (void *)(skb_network_header(skb) + ihl);
+		if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+			inet_proto_csum_replace4(&udph->check, skb, addr,
+						 new_addr, 1);
+			if (!udph->check)
+				udph->check = CSUM_MANGLED_0;
+		}
+		break;
+	}
+	case IPPROTO_ICMP:
+	{
+		struct icmphdr *icmph;
+
+		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
+			goto drop;
+
+		icmph = (void *)(skb_network_header(skb) + ihl);
+
+		if ((icmph->type != ICMP_DEST_UNREACH) &&
+		    (icmph->type != ICMP_TIME_EXCEEDED) &&
+		    (icmph->type != ICMP_PARAMETERPROB))
+			break;
+
+		iph = (void *)(icmph + 1);
+		if (egress)
+			addr = iph->daddr;
+		else
+			addr = iph->saddr;
+
+		if ((old_addr ^ addr) & mask)
+			break;
+
+		if (skb_cloned(skb) &&
+		    !skb_clone_writable(skb,
+					ihl + sizeof(*icmph) + sizeof(*iph)) &&
+		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+			goto drop;
+
+		icmph = (void *)(skb_network_header(skb) + ihl);
+		iph = (void *)(icmph + 1);
+
+		new_addr &= mask;
+		new_addr |= addr & ~mask;
+
+		/* XXX Fix up the inner checksums. */
+		if (egress)
+			iph->daddr = new_addr;
+		else
+			iph->saddr = new_addr;
+
+		inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
+					 1);
+		break;
+	}
+	default:
+		break;
+	}
+
+	return action;
+
+drop:
+	spin_lock(&p->tcf_lock);
+	p->tcf_qstats.drops++;
+	spin_unlock(&p->tcf_lock);
+	return TC_ACT_SHOT;
+}
+
+static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
+			int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_nat *p = a->priv;
+	struct tc_nat *opt;
+	struct tcf_t t;
+	int s;
+
+	s = sizeof(*opt);
+
+	/* netlink spinlocks held above us - must use ATOMIC */
+	opt = kzalloc(s, GFP_ATOMIC);
+	if (unlikely(!opt))
+		return -ENOBUFS;
+
+	opt->old_addr = p->old_addr;
+	opt->new_addr = p->new_addr;
+	opt->mask = p->mask;
+	opt->flags = p->flags;
+
+	opt->index = p->tcf_index;
+	opt->action = p->tcf_action;
+	opt->refcnt = p->tcf_refcnt - ref;
+	opt->bindcnt = p->tcf_bindcnt - bind;
+
+	NLA_PUT(skb, TCA_NAT_PARMS, s, opt);
+	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+	NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t);
+
+	kfree(opt);
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	kfree(opt);
+	return -1;
+}
+
+static struct tc_action_ops act_nat_ops = {
+	.kind		=	"nat",
+	.hinfo		=	&nat_hash_info,
+	.type		=	TCA_ACT_NAT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_nat,
+	.dump		=	tcf_nat_dump,
+	.cleanup	=	tcf_nat_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_nat_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_DESCRIPTION("Stateless NAT actions");
+MODULE_LICENSE("GPL");
+
+static int __init nat_init_module(void)
+{
+	return tcf_register_action(&act_nat_ops);
+}
+
+static void __exit nat_cleanup_module(void)
+{
+	tcf_unregister_action(&act_nat_ops);
+}
+
+module_init(nat_init_module);
+module_exit(nat_cleanup_module);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
new file mode 100644
index 0000000..d5f4e34
--- /dev/null
+++ b/net/sched/act_pedit.c
@@ -0,0 +1,260 @@
+/*
+ * net/sched/pedit.c	Generic packet editor
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Jamal Hadi Salim (2002-4)
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <linux/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_pedit.h>
+
+#define PEDIT_TAB_MASK	15
+static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1];
+static u32 pedit_idx_gen;
+static DEFINE_RWLOCK(pedit_lock);
+
+static struct tcf_hashinfo pedit_hash_info = {
+	.htab	=	tcf_pedit_ht,
+	.hmask	=	PEDIT_TAB_MASK,
+	.lock	=	&pedit_lock,
+};
+
+static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
+	[TCA_PEDIT_PARMS]	= { .len = sizeof(struct tcf_pedit) },
+};
+
+static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
+			  struct tc_action *a, int ovr, int bind)
+{
+	struct nlattr *tb[TCA_PEDIT_MAX + 1];
+	struct tc_pedit *parm;
+	int ret = 0, err;
+	struct tcf_pedit *p;
+	struct tcf_common *pc;
+	struct tc_pedit_key *keys = NULL;
+	int ksize;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_PEDIT_MAX, nla, pedit_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_PEDIT_PARMS] == NULL)
+		return -EINVAL;
+	parm = nla_data(tb[TCA_PEDIT_PARMS]);
+	ksize = parm->nkeys * sizeof(struct tc_pedit_key);
+	if (nla_len(tb[TCA_PEDIT_PARMS]) < sizeof(*parm) + ksize)
+		return -EINVAL;
+
+	pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info);
+	if (!pc) {
+		if (!parm->nkeys)
+			return -EINVAL;
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
+				     &pedit_idx_gen, &pedit_hash_info);
+		if (unlikely(!pc))
+			return -ENOMEM;
+		p = to_pedit(pc);
+		keys = kmalloc(ksize, GFP_KERNEL);
+		if (keys == NULL) {
+			kfree(pc);
+			return -ENOMEM;
+		}
+		ret = ACT_P_CREATED;
+	} else {
+		p = to_pedit(pc);
+		if (!ovr) {
+			tcf_hash_release(pc, bind, &pedit_hash_info);
+			return -EEXIST;
+		}
+		if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
+			keys = kmalloc(ksize, GFP_KERNEL);
+			if (keys == NULL)
+				return -ENOMEM;
+		}
+	}
+
+	spin_lock_bh(&p->tcf_lock);
+	p->tcfp_flags = parm->flags;
+	p->tcf_action = parm->action;
+	if (keys) {
+		kfree(p->tcfp_keys);
+		p->tcfp_keys = keys;
+		p->tcfp_nkeys = parm->nkeys;
+	}
+	memcpy(p->tcfp_keys, parm->keys, ksize);
+	spin_unlock_bh(&p->tcf_lock);
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &pedit_hash_info);
+	return ret;
+}
+
+static int tcf_pedit_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_pedit *p = a->priv;
+
+	if (p) {
+		struct tc_pedit_key *keys = p->tcfp_keys;
+		if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) {
+			kfree(keys);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
+		     struct tcf_result *res)
+{
+	struct tcf_pedit *p = a->priv;
+	int i, munged = 0;
+	u8 *pptr;
+
+	if (!(skb->tc_verd & TC_OK2MUNGE)) {
+		/* should we set skb->cloned? */
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
+			return p->tcf_action;
+		}
+	}
+
+	pptr = skb_network_header(skb);
+
+	spin_lock(&p->tcf_lock);
+
+	p->tcf_tm.lastuse = jiffies;
+
+	if (p->tcfp_nkeys > 0) {
+		struct tc_pedit_key *tkey = p->tcfp_keys;
+
+		for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
+			u32 *ptr;
+			int offset = tkey->off;
+
+			if (tkey->offmask) {
+				if (skb->len > tkey->at) {
+					 char *j = pptr + tkey->at;
+					 offset += ((*j & tkey->offmask) >>
+						   tkey->shift);
+				} else {
+					goto bad;
+				}
+			}
+
+			if (offset % 4) {
+				printk("offset must be on 32 bit boundaries\n");
+				goto bad;
+			}
+			if (offset > 0 && offset > skb->len) {
+				printk("offset %d cant exceed pkt length %d\n",
+				       offset, skb->len);
+				goto bad;
+			}
+
+			ptr = (u32 *)(pptr+offset);
+			/* just do it, baby */
+			*ptr = ((*ptr & tkey->mask) ^ tkey->val);
+			munged++;
+		}
+
+		if (munged)
+			skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
+		goto done;
+	} else {
+		printk("pedit BUG: index %d\n", p->tcf_index);
+	}
+
+bad:
+	p->tcf_qstats.overlimits++;
+done:
+	p->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	p->tcf_bstats.packets++;
+	spin_unlock(&p->tcf_lock);
+	return p->tcf_action;
+}
+
+static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
+			  int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_pedit *p = a->priv;
+	struct tc_pedit *opt;
+	struct tcf_t t;
+	int s;
+
+	s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key);
+
+	/* netlink spinlocks held above us - must use ATOMIC */
+	opt = kzalloc(s, GFP_ATOMIC);
+	if (unlikely(!opt))
+		return -ENOBUFS;
+
+	memcpy(opt->keys, p->tcfp_keys,
+	       p->tcfp_nkeys * sizeof(struct tc_pedit_key));
+	opt->index = p->tcf_index;
+	opt->nkeys = p->tcfp_nkeys;
+	opt->flags = p->tcfp_flags;
+	opt->action = p->tcf_action;
+	opt->refcnt = p->tcf_refcnt - ref;
+	opt->bindcnt = p->tcf_bindcnt - bind;
+
+	NLA_PUT(skb, TCA_PEDIT_PARMS, s, opt);
+	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+	NLA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
+	kfree(opt);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	kfree(opt);
+	return -1;
+}
+
+static struct tc_action_ops act_pedit_ops = {
+	.kind		=	"pedit",
+	.hinfo		=	&pedit_hash_info,
+	.type		=	TCA_ACT_PEDIT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_pedit,
+	.dump		=	tcf_pedit_dump,
+	.cleanup	=	tcf_pedit_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_pedit_init,
+	.walk		=	tcf_generic_walker
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2002-4)");
+MODULE_DESCRIPTION("Generic Packet Editor actions");
+MODULE_LICENSE("GPL");
+
+static int __init pedit_init_module(void)
+{
+	return tcf_register_action(&act_pedit_ops);
+}
+
+static void __exit pedit_cleanup_module(void)
+{
+	tcf_unregister_action(&act_pedit_ops);
+}
+
+module_init(pedit_init_module);
+module_exit(pedit_cleanup_module);
+
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
new file mode 100644
index 0000000..38015b4
--- /dev/null
+++ b/net/sched/act_police.c
@@ -0,0 +1,382 @@
+/*
+ * net/sched/police.c	Input police filter.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * 		J Hadi Salim (action changes)
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <net/act_api.h>
+#include <net/netlink.h>
+
+#define L2T(p,L)   qdisc_l2t((p)->tcfp_R_tab, L)
+#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L)
+
+#define POL_TAB_MASK     15
+static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
+static u32 police_idx_gen;
+static DEFINE_RWLOCK(police_lock);
+
+static struct tcf_hashinfo police_hash_info = {
+	.htab	=	tcf_police_ht,
+	.hmask	=	POL_TAB_MASK,
+	.lock	=	&police_lock,
+};
+
+/* old policer structure from before tc actions */
+struct tc_police_compat
+{
+	u32			index;
+	int			action;
+	u32			limit;
+	u32			burst;
+	u32			mtu;
+	struct tc_ratespec	rate;
+	struct tc_ratespec	peakrate;
+};
+
+/* Each policer is serialized by its individual spinlock */
+
+static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb,
+			      int type, struct tc_action *a)
+{
+	struct tcf_common *p;
+	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+	struct nlattr *nest;
+
+	read_lock_bh(&police_lock);
+
+	s_i = cb->args[0];
+
+	for (i = 0; i < (POL_TAB_MASK + 1); i++) {
+		p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)];
+
+		for (; p; p = p->tcfc_next) {
+			index++;
+			if (index < s_i)
+				continue;
+			a->priv = p;
+			a->order = index;
+			nest = nla_nest_start(skb, a->order);
+			if (nest == NULL)
+				goto nla_put_failure;
+			if (type == RTM_DELACTION)
+				err = tcf_action_dump_1(skb, a, 0, 1);
+			else
+				err = tcf_action_dump_1(skb, a, 0, 0);
+			if (err < 0) {
+				index--;
+				nla_nest_cancel(skb, nest);
+				goto done;
+			}
+			nla_nest_end(skb, nest);
+			n_i++;
+		}
+	}
+done:
+	read_unlock_bh(&police_lock);
+	if (n_i)
+		cb->args[0] += n_i;
+	return n_i;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	goto done;
+}
+
+static void tcf_police_destroy(struct tcf_police *p)
+{
+	unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
+	struct tcf_common **p1p;
+
+	for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) {
+		if (*p1p == &p->common) {
+			write_lock_bh(&police_lock);
+			*p1p = p->tcf_next;
+			write_unlock_bh(&police_lock);
+			gen_kill_estimator(&p->tcf_bstats,
+					   &p->tcf_rate_est);
+			if (p->tcfp_R_tab)
+				qdisc_put_rtab(p->tcfp_R_tab);
+			if (p->tcfp_P_tab)
+				qdisc_put_rtab(p->tcfp_P_tab);
+			kfree(p);
+			return;
+		}
+	}
+	WARN_ON(1);
+}
+
+static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
+	[TCA_POLICE_RATE]	= { .len = TC_RTAB_SIZE },
+	[TCA_POLICE_PEAKRATE]	= { .len = TC_RTAB_SIZE },
+	[TCA_POLICE_AVRATE]	= { .type = NLA_U32 },
+	[TCA_POLICE_RESULT]	= { .type = NLA_U32 },
+};
+
+static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
+				 struct tc_action *a, int ovr, int bind)
+{
+	unsigned h;
+	int ret = 0, err;
+	struct nlattr *tb[TCA_POLICE_MAX + 1];
+	struct tc_police *parm;
+	struct tcf_police *police;
+	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
+	int size;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_POLICE_MAX, nla, police_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_POLICE_TBF] == NULL)
+		return -EINVAL;
+	size = nla_len(tb[TCA_POLICE_TBF]);
+	if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
+		return -EINVAL;
+	parm = nla_data(tb[TCA_POLICE_TBF]);
+
+	if (parm->index) {
+		struct tcf_common *pc;
+
+		pc = tcf_hash_lookup(parm->index, &police_hash_info);
+		if (pc != NULL) {
+			a->priv = pc;
+			police = to_police(pc);
+			if (bind) {
+				police->tcf_bindcnt += 1;
+				police->tcf_refcnt += 1;
+			}
+			if (ovr)
+				goto override;
+			return ret;
+		}
+	}
+
+	police = kzalloc(sizeof(*police), GFP_KERNEL);
+	if (police == NULL)
+		return -ENOMEM;
+	ret = ACT_P_CREATED;
+	police->tcf_refcnt = 1;
+	spin_lock_init(&police->tcf_lock);
+	if (bind)
+		police->tcf_bindcnt = 1;
+override:
+	if (parm->rate.rate) {
+		err = -ENOMEM;
+		R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
+		if (R_tab == NULL)
+			goto failure;
+		if (parm->peakrate.rate) {
+			P_tab = qdisc_get_rtab(&parm->peakrate,
+					       tb[TCA_POLICE_PEAKRATE]);
+			if (P_tab == NULL) {
+				qdisc_put_rtab(R_tab);
+				goto failure;
+			}
+		}
+	}
+	/* No failure allowed after this point */
+	spin_lock_bh(&police->tcf_lock);
+	if (R_tab != NULL) {
+		qdisc_put_rtab(police->tcfp_R_tab);
+		police->tcfp_R_tab = R_tab;
+	}
+	if (P_tab != NULL) {
+		qdisc_put_rtab(police->tcfp_P_tab);
+		police->tcfp_P_tab = P_tab;
+	}
+
+	if (tb[TCA_POLICE_RESULT])
+		police->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
+	police->tcfp_toks = police->tcfp_burst = parm->burst;
+	police->tcfp_mtu = parm->mtu;
+	if (police->tcfp_mtu == 0) {
+		police->tcfp_mtu = ~0;
+		if (police->tcfp_R_tab)
+			police->tcfp_mtu = 255<<police->tcfp_R_tab->rate.cell_log;
+	}
+	if (police->tcfp_P_tab)
+		police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu);
+	police->tcf_action = parm->action;
+
+	if (tb[TCA_POLICE_AVRATE])
+		police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
+	if (est)
+		gen_replace_estimator(&police->tcf_bstats,
+				      &police->tcf_rate_est,
+				      &police->tcf_lock, est);
+
+	spin_unlock_bh(&police->tcf_lock);
+	if (ret != ACT_P_CREATED)
+		return ret;
+
+	police->tcfp_t_c = psched_get_time();
+	police->tcf_index = parm->index ? parm->index :
+		tcf_hash_new_index(&police_idx_gen, &police_hash_info);
+	h = tcf_hash(police->tcf_index, POL_TAB_MASK);
+	write_lock_bh(&police_lock);
+	police->tcf_next = tcf_police_ht[h];
+	tcf_police_ht[h] = &police->common;
+	write_unlock_bh(&police_lock);
+
+	a->priv = police;
+	return ret;
+
+failure:
+	if (ret == ACT_P_CREATED)
+		kfree(police);
+	return err;
+}
+
+static int tcf_act_police_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_police *p = a->priv;
+	int ret = 0;
+
+	if (p != NULL) {
+		if (bind)
+			p->tcf_bindcnt--;
+
+		p->tcf_refcnt--;
+		if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) {
+			tcf_police_destroy(p);
+			ret = 1;
+		}
+	}
+	return ret;
+}
+
+static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
+			  struct tcf_result *res)
+{
+	struct tcf_police *police = a->priv;
+	psched_time_t now;
+	long toks;
+	long ptoks = 0;
+
+	spin_lock(&police->tcf_lock);
+
+	police->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	police->tcf_bstats.packets++;
+
+	if (police->tcfp_ewma_rate &&
+	    police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
+		police->tcf_qstats.overlimits++;
+		spin_unlock(&police->tcf_lock);
+		return police->tcf_action;
+	}
+
+	if (qdisc_pkt_len(skb) <= police->tcfp_mtu) {
+		if (police->tcfp_R_tab == NULL) {
+			spin_unlock(&police->tcf_lock);
+			return police->tcfp_result;
+		}
+
+		now = psched_get_time();
+		toks = psched_tdiff_bounded(now, police->tcfp_t_c,
+					    police->tcfp_burst);
+		if (police->tcfp_P_tab) {
+			ptoks = toks + police->tcfp_ptoks;
+			if (ptoks > (long)L2T_P(police, police->tcfp_mtu))
+				ptoks = (long)L2T_P(police, police->tcfp_mtu);
+			ptoks -= L2T_P(police, qdisc_pkt_len(skb));
+		}
+		toks += police->tcfp_toks;
+		if (toks > (long)police->tcfp_burst)
+			toks = police->tcfp_burst;
+		toks -= L2T(police, qdisc_pkt_len(skb));
+		if ((toks|ptoks) >= 0) {
+			police->tcfp_t_c = now;
+			police->tcfp_toks = toks;
+			police->tcfp_ptoks = ptoks;
+			spin_unlock(&police->tcf_lock);
+			return police->tcfp_result;
+		}
+	}
+
+	police->tcf_qstats.overlimits++;
+	spin_unlock(&police->tcf_lock);
+	return police->tcf_action;
+}
+
+static int
+tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_police *police = a->priv;
+	struct tc_police opt;
+
+	opt.index = police->tcf_index;
+	opt.action = police->tcf_action;
+	opt.mtu = police->tcfp_mtu;
+	opt.burst = police->tcfp_burst;
+	opt.refcnt = police->tcf_refcnt - ref;
+	opt.bindcnt = police->tcf_bindcnt - bind;
+	if (police->tcfp_R_tab)
+		opt.rate = police->tcfp_R_tab->rate;
+	else
+		memset(&opt.rate, 0, sizeof(opt.rate));
+	if (police->tcfp_P_tab)
+		opt.peakrate = police->tcfp_P_tab->rate;
+	else
+		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
+	NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
+	if (police->tcfp_result)
+		NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result);
+	if (police->tcfp_ewma_rate)
+		NLA_PUT_U32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+MODULE_AUTHOR("Alexey Kuznetsov");
+MODULE_DESCRIPTION("Policing actions");
+MODULE_LICENSE("GPL");
+
+static struct tc_action_ops act_police_ops = {
+	.kind		=	"police",
+	.hinfo		=	&police_hash_info,
+	.type		=	TCA_ID_POLICE,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_act_police,
+	.dump		=	tcf_act_police_dump,
+	.cleanup	=	tcf_act_police_cleanup,
+	.lookup		=	tcf_hash_search,
+	.init		=	tcf_act_police_locate,
+	.walk		=	tcf_act_police_walker
+};
+
+static int __init
+police_init_module(void)
+{
+	return tcf_register_action(&act_police_ops);
+}
+
+static void __exit
+police_cleanup_module(void)
+{
+	tcf_unregister_action(&act_police_ops);
+}
+
+module_init(police_init_module);
+module_exit(police_cleanup_module);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
new file mode 100644
index 0000000..e7851ce
--- /dev/null
+++ b/net/sched/act_simple.c
@@ -0,0 +1,217 @@
+/*
+ * net/sched/simp.c	Simple example of an action
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Jamal Hadi Salim (2005-8)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#define TCA_ACT_SIMP 22
+
+#include <linux/tc_act/tc_defact.h>
+#include <net/tc_act/tc_defact.h>
+
+#define SIMP_TAB_MASK     7
+static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1];
+static u32 simp_idx_gen;
+static DEFINE_RWLOCK(simp_lock);
+
+static struct tcf_hashinfo simp_hash_info = {
+	.htab	=	tcf_simp_ht,
+	.hmask	=	SIMP_TAB_MASK,
+	.lock	=	&simp_lock,
+};
+
+#define SIMP_MAX_DATA	32
+static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+{
+	struct tcf_defact *d = a->priv;
+
+	spin_lock(&d->tcf_lock);
+	d->tcf_tm.lastuse = jiffies;
+	d->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	d->tcf_bstats.packets++;
+
+	/* print policy string followed by _ then packet count
+	 * Example if this was the 3rd packet and the string was "hello"
+	 * then it would look like "hello_3" (without quotes)
+	 **/
+	printk("simple: %s_%d\n",
+	       (char *)d->tcfd_defdata, d->tcf_bstats.packets);
+	spin_unlock(&d->tcf_lock);
+	return d->tcf_action;
+}
+
+static int tcf_simp_release(struct tcf_defact *d, int bind)
+{
+	int ret = 0;
+	if (d) {
+		if (bind)
+			d->tcf_bindcnt--;
+		d->tcf_refcnt--;
+		if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) {
+			kfree(d->tcfd_defdata);
+			tcf_hash_destroy(&d->common, &simp_hash_info);
+			ret = 1;
+		}
+	}
+	return ret;
+}
+
+static int alloc_defdata(struct tcf_defact *d, char *defdata)
+{
+	d->tcfd_defdata = kstrndup(defdata, SIMP_MAX_DATA, GFP_KERNEL);
+	if (unlikely(!d->tcfd_defdata))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void reset_policy(struct tcf_defact *d, char *defdata,
+			 struct tc_defact *p)
+{
+	spin_lock_bh(&d->tcf_lock);
+	d->tcf_action = p->action;
+	memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
+	strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
+	spin_unlock_bh(&d->tcf_lock);
+}
+
+static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
+	[TCA_DEF_PARMS]	= { .len = sizeof(struct tc_defact) },
+	[TCA_DEF_DATA]	= { .type = NLA_STRING, .len = SIMP_MAX_DATA },
+};
+
+static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
+			 struct tc_action *a, int ovr, int bind)
+{
+	struct nlattr *tb[TCA_DEF_MAX + 1];
+	struct tc_defact *parm;
+	struct tcf_defact *d;
+	struct tcf_common *pc;
+	char *defdata;
+	int ret = 0, err;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_DEF_PARMS] == NULL)
+		return -EINVAL;
+
+	if (tb[TCA_DEF_DATA] == NULL)
+		return -EINVAL;
+
+	parm = nla_data(tb[TCA_DEF_PARMS]);
+	defdata = nla_data(tb[TCA_DEF_DATA]);
+
+	pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
+				     &simp_idx_gen, &simp_hash_info);
+		if (unlikely(!pc))
+			return -ENOMEM;
+
+		d = to_defact(pc);
+		ret = alloc_defdata(d, defdata);
+		if (ret < 0) {
+			kfree(pc);
+			return ret;
+		}
+		d->tcf_action = parm->action;
+		ret = ACT_P_CREATED;
+	} else {
+		d = to_defact(pc);
+		if (!ovr) {
+			tcf_simp_release(d, bind);
+			return -EEXIST;
+		}
+		reset_policy(d, defdata, parm);
+	}
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &simp_hash_info);
+	return ret;
+}
+
+static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_defact *d = a->priv;
+
+	if (d)
+		return tcf_simp_release(d, bind);
+	return 0;
+}
+
+static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
+				int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_defact *d = a->priv;
+	struct tc_defact opt;
+	struct tcf_t t;
+
+	opt.index = d->tcf_index;
+	opt.refcnt = d->tcf_refcnt - ref;
+	opt.bindcnt = d->tcf_bindcnt - bind;
+	opt.action = d->tcf_action;
+	NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
+	NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata);
+	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+	NLA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tc_action_ops act_simp_ops = {
+	.kind		=	"simple",
+	.hinfo		=	&simp_hash_info,
+	.type		=	TCA_ACT_SIMP,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_simp,
+	.dump		=	tcf_simp_dump,
+	.cleanup	=	tcf_simp_cleanup,
+	.init		=	tcf_simp_init,
+	.walk		=	tcf_generic_walker,
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim(2005)");
+MODULE_DESCRIPTION("Simple example action");
+MODULE_LICENSE("GPL");
+
+static int __init simp_init_module(void)
+{
+	int ret = tcf_register_action(&act_simp_ops);
+	if (!ret)
+		printk("Simple TC action Loaded\n");
+	return ret;
+}
+
+static void __exit simp_cleanup_module(void)
+{
+	tcf_unregister_action(&act_simp_ops);
+}
+
+module_init(simp_init_module);
+module_exit(simp_cleanup_module);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
new file mode 100644
index 0000000..fe9777e
--- /dev/null
+++ b/net/sched/act_skbedit.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_skbedit.h>
+#include <net/tc_act/tc_skbedit.h>
+
+#define SKBEDIT_TAB_MASK     15
+static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1];
+static u32 skbedit_idx_gen;
+static DEFINE_RWLOCK(skbedit_lock);
+
+static struct tcf_hashinfo skbedit_hash_info = {
+	.htab	=	tcf_skbedit_ht,
+	.hmask	=	SKBEDIT_TAB_MASK,
+	.lock	=	&skbedit_lock,
+};
+
+static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a,
+		       struct tcf_result *res)
+{
+	struct tcf_skbedit *d = a->priv;
+
+	spin_lock(&d->tcf_lock);
+	d->tcf_tm.lastuse = jiffies;
+	d->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	d->tcf_bstats.packets++;
+
+	if (d->flags & SKBEDIT_F_PRIORITY)
+		skb->priority = d->priority;
+	if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
+	    skb->dev->real_num_tx_queues > d->queue_mapping)
+		skb_set_queue_mapping(skb, d->queue_mapping);
+
+	spin_unlock(&d->tcf_lock);
+	return d->tcf_action;
+}
+
+static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
+	[TCA_SKBEDIT_PARMS]		= { .len = sizeof(struct tc_skbedit) },
+	[TCA_SKBEDIT_PRIORITY]		= { .len = sizeof(u32) },
+	[TCA_SKBEDIT_QUEUE_MAPPING]	= { .len = sizeof(u16) },
+};
+
+static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
+			 struct tc_action *a, int ovr, int bind)
+{
+	struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
+	struct tc_skbedit *parm;
+	struct tcf_skbedit *d;
+	struct tcf_common *pc;
+	u32 flags = 0, *priority = NULL;
+	u16 *queue_mapping = NULL;
+	int ret = 0, err;
+
+	if (nla == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_SKBEDIT_PARMS] == NULL)
+		return -EINVAL;
+
+	if (tb[TCA_SKBEDIT_PRIORITY] != NULL) {
+		flags |= SKBEDIT_F_PRIORITY;
+		priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]);
+	}
+
+	if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) {
+		flags |= SKBEDIT_F_QUEUE_MAPPING;
+		queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
+	}
+	if (!flags)
+		return -EINVAL;
+
+	parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
+
+	pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info);
+	if (!pc) {
+		pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
+				     &skbedit_idx_gen, &skbedit_hash_info);
+		if (unlikely(!pc))
+			return -ENOMEM;
+
+		d = to_skbedit(pc);
+		ret = ACT_P_CREATED;
+	} else {
+		d = to_skbedit(pc);
+		if (!ovr) {
+			tcf_hash_release(pc, bind, &skbedit_hash_info);
+			return -EEXIST;
+		}
+	}
+
+	spin_lock_bh(&d->tcf_lock);
+
+	d->flags = flags;
+	if (flags & SKBEDIT_F_PRIORITY)
+		d->priority = *priority;
+	if (flags & SKBEDIT_F_QUEUE_MAPPING)
+		d->queue_mapping = *queue_mapping;
+	d->tcf_action = parm->action;
+
+	spin_unlock_bh(&d->tcf_lock);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(pc, &skbedit_hash_info);
+	return ret;
+}
+
+static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_skbedit *d = a->priv;
+
+	if (d)
+		return tcf_hash_release(&d->common, bind, &skbedit_hash_info);
+	return 0;
+}
+
+static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
+				int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_skbedit *d = a->priv;
+	struct tc_skbedit opt;
+	struct tcf_t t;
+
+	opt.index = d->tcf_index;
+	opt.refcnt = d->tcf_refcnt - ref;
+	opt.bindcnt = d->tcf_bindcnt - bind;
+	opt.action = d->tcf_action;
+	NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt);
+	if (d->flags & SKBEDIT_F_PRIORITY)
+		NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
+			&d->priority);
+	if (d->flags & SKBEDIT_F_QUEUE_MAPPING)
+		NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING,
+			sizeof(d->queue_mapping), &d->queue_mapping);
+	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+	NLA_PUT(skb, TCA_SKBEDIT_TM, sizeof(t), &t);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tc_action_ops act_skbedit_ops = {
+	.kind		=	"skbedit",
+	.hinfo		=	&skbedit_hash_info,
+	.type		=	TCA_ACT_SKBEDIT,
+	.capab		=	TCA_CAP_NONE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_skbedit,
+	.dump		=	tcf_skbedit_dump,
+	.cleanup	=	tcf_skbedit_cleanup,
+	.init		=	tcf_skbedit_init,
+	.walk		=	tcf_generic_walker,
+};
+
+MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
+MODULE_DESCRIPTION("SKB Editing");
+MODULE_LICENSE("GPL");
+
+static int __init skbedit_init_module(void)
+{
+	return tcf_register_action(&act_skbedit_ops);
+}
+
+static void __exit skbedit_cleanup_module(void)
+{
+	tcf_unregister_action(&act_skbedit_ops);
+}
+
+module_init(skbedit_init_module);
+module_exit(skbedit_cleanup_module);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
new file mode 100644
index 0000000..16e7ac9
--- /dev/null
+++ b/net/sched/cls_api.c
@@ -0,0 +1,603 @@
+/*
+ * net/sched/cls_api.c	Packet classifier API.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ *
+ * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/netlink.h>
+#include <linux/err.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+
+/* The list of all installed classifier types */
+
+static struct tcf_proto_ops *tcf_proto_base __read_mostly;
+
+/* Protects list of registered TC modules. It is pure SMP lock. */
+static DEFINE_RWLOCK(cls_mod_lock);
+
+/* Find classifier type by string name */
+
+static struct tcf_proto_ops *tcf_proto_lookup_ops(struct nlattr *kind)
+{
+	struct tcf_proto_ops *t = NULL;
+
+	if (kind) {
+		read_lock(&cls_mod_lock);
+		for (t = tcf_proto_base; t; t = t->next) {
+			if (nla_strcmp(kind, t->kind) == 0) {
+				if (!try_module_get(t->owner))
+					t = NULL;
+				break;
+			}
+		}
+		read_unlock(&cls_mod_lock);
+	}
+	return t;
+}
+
+/* Register(unregister) new classifier type */
+
+int register_tcf_proto_ops(struct tcf_proto_ops *ops)
+{
+	struct tcf_proto_ops *t, **tp;
+	int rc = -EEXIST;
+
+	write_lock(&cls_mod_lock);
+	for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
+		if (!strcmp(ops->kind, t->kind))
+			goto out;
+
+	ops->next = NULL;
+	*tp = ops;
+	rc = 0;
+out:
+	write_unlock(&cls_mod_lock);
+	return rc;
+}
+EXPORT_SYMBOL(register_tcf_proto_ops);
+
+int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
+{
+	struct tcf_proto_ops *t, **tp;
+	int rc = -ENOENT;
+
+	write_lock(&cls_mod_lock);
+	for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next)
+		if (t == ops)
+			break;
+
+	if (!t)
+		goto out;
+	*tp = t->next;
+	rc = 0;
+out:
+	write_unlock(&cls_mod_lock);
+	return rc;
+}
+EXPORT_SYMBOL(unregister_tcf_proto_ops);
+
+static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			  struct tcf_proto *tp, unsigned long fh, int event);
+
+
+/* Select new prio value from the range, managed by kernel. */
+
+static inline u32 tcf_auto_prio(struct tcf_proto *tp)
+{
+	u32 first = TC_H_MAKE(0xC0000000U, 0U);
+
+	if (tp)
+		first = tp->prio-1;
+
+	return first;
+}
+
+/* Add/change/delete/get a filter node */
+
+static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tca[TCA_MAX + 1];
+	spinlock_t *root_lock;
+	struct tcmsg *t;
+	u32 protocol;
+	u32 prio;
+	u32 nprio;
+	u32 parent;
+	struct net_device *dev;
+	struct Qdisc  *q;
+	struct tcf_proto **back, **chain;
+	struct tcf_proto *tp;
+	struct tcf_proto_ops *tp_ops;
+	const struct Qdisc_class_ops *cops;
+	unsigned long cl;
+	unsigned long fh;
+	int err;
+
+	if (net != &init_net)
+		return -EINVAL;
+
+replay:
+	t = NLMSG_DATA(n);
+	protocol = TC_H_MIN(t->tcm_info);
+	prio = TC_H_MAJ(t->tcm_info);
+	nprio = prio;
+	parent = t->tcm_parent;
+	cl = 0;
+
+	if (prio == 0) {
+		/* If no priority is given, user wants we allocated it. */
+		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+			return -ENOENT;
+		prio = TC_H_MAKE(0x80000000U, 0U);
+	}
+
+	/* Find head of filter chain. */
+
+	/* Find link */
+	dev = __dev_get_by_index(&init_net, t->tcm_ifindex);
+	if (dev == NULL)
+		return -ENODEV;
+
+	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
+	if (err < 0)
+		return err;
+
+	/* Find qdisc */
+	if (!parent) {
+		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
+		q = dev_queue->qdisc_sleeping;
+		parent = q->handle;
+	} else {
+		q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
+		if (q == NULL)
+			return -EINVAL;
+	}
+
+	/* Is it classful? */
+	if ((cops = q->ops->cl_ops) == NULL)
+		return -EINVAL;
+
+	/* Do we search for filter, attached to class? */
+	if (TC_H_MIN(parent)) {
+		cl = cops->get(q, parent);
+		if (cl == 0)
+			return -ENOENT;
+	}
+
+	/* And the last stroke */
+	chain = cops->tcf_chain(q, cl);
+	err = -EINVAL;
+	if (chain == NULL)
+		goto errout;
+
+	/* Check the chain for existence of proto-tcf with this priority */
+	for (back = chain; (tp=*back) != NULL; back = &tp->next) {
+		if (tp->prio >= prio) {
+			if (tp->prio == prio) {
+				if (!nprio || (tp->protocol != protocol && protocol))
+					goto errout;
+			} else
+				tp = NULL;
+			break;
+		}
+	}
+
+	root_lock = qdisc_root_sleeping_lock(q);
+
+	if (tp == NULL) {
+		/* Proto-tcf does not exist, create new one */
+
+		if (tca[TCA_KIND] == NULL || !protocol)
+			goto errout;
+
+		err = -ENOENT;
+		if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+			goto errout;
+
+
+		/* Create new proto tcf */
+
+		err = -ENOBUFS;
+		tp = kzalloc(sizeof(*tp), GFP_KERNEL);
+		if (tp == NULL)
+			goto errout;
+		err = -ENOENT;
+		tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
+		if (tp_ops == NULL) {
+#ifdef CONFIG_MODULES
+			struct nlattr *kind = tca[TCA_KIND];
+			char name[IFNAMSIZ];
+
+			if (kind != NULL &&
+			    nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
+				rtnl_unlock();
+				request_module("cls_%s", name);
+				rtnl_lock();
+				tp_ops = tcf_proto_lookup_ops(kind);
+				/* We dropped the RTNL semaphore in order to
+				 * perform the module load.  So, even if we
+				 * succeeded in loading the module we have to
+				 * replay the request.  We indicate this using
+				 * -EAGAIN.
+				 */
+				if (tp_ops != NULL) {
+					module_put(tp_ops->owner);
+					err = -EAGAIN;
+				}
+			}
+#endif
+			kfree(tp);
+			goto errout;
+		}
+		tp->ops = tp_ops;
+		tp->protocol = protocol;
+		tp->prio = nprio ? : tcf_auto_prio(*back);
+		tp->q = q;
+		tp->classify = tp_ops->classify;
+		tp->classid = parent;
+
+		err = tp_ops->init(tp);
+		if (err != 0) {
+			module_put(tp_ops->owner);
+			kfree(tp);
+			goto errout;
+		}
+
+		spin_lock_bh(root_lock);
+		tp->next = *back;
+		*back = tp;
+		spin_unlock_bh(root_lock);
+
+	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind))
+		goto errout;
+
+	fh = tp->ops->get(tp, t->tcm_handle);
+
+	if (fh == 0) {
+		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
+			spin_lock_bh(root_lock);
+			*back = tp->next;
+			spin_unlock_bh(root_lock);
+
+			tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
+			tcf_destroy(tp);
+			err = 0;
+			goto errout;
+		}
+
+		err = -ENOENT;
+		if (n->nlmsg_type != RTM_NEWTFILTER ||
+		    !(n->nlmsg_flags & NLM_F_CREATE))
+			goto errout;
+	} else {
+		switch (n->nlmsg_type) {
+		case RTM_NEWTFILTER:
+			err = -EEXIST;
+			if (n->nlmsg_flags & NLM_F_EXCL)
+				goto errout;
+			break;
+		case RTM_DELTFILTER:
+			err = tp->ops->delete(tp, fh);
+			if (err == 0)
+				tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
+			goto errout;
+		case RTM_GETTFILTER:
+			err = tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+			goto errout;
+		default:
+			err = -EINVAL;
+			goto errout;
+		}
+	}
+
+	err = tp->ops->change(tp, cl, t->tcm_handle, tca, &fh);
+	if (err == 0)
+		tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+
+errout:
+	if (cl)
+		cops->put(q, cl);
+	if (err == -EAGAIN)
+		/* Replay the request. */
+		goto replay;
+	return err;
+}
+
+static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
+			 unsigned long fh, u32 pid, u32 seq, u16 flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char *b = skb_tail_pointer(skb);
+
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
+	tcm = NLMSG_DATA(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm__pad1 = 0;
+	tcm->tcm__pad1 = 0;
+	tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
+	tcm->tcm_parent = tp->classid;
+	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
+	NLA_PUT_STRING(skb, TCA_KIND, tp->ops->kind);
+	tcm->tcm_handle = fh;
+	if (RTM_DELTFILTER != event) {
+		tcm->tcm_handle = 0;
+		if (tp->ops->dump && tp->ops->dump(tp, fh, skb, tcm) < 0)
+			goto nla_put_failure;
+	}
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			  struct tcf_proto *tp, unsigned long fh, int event)
+{
+	struct sk_buff *skb;
+	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tcf_fill_node(skb, tp, fh, pid, n->nlmsg_seq, 0, event) <= 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
+			      n->nlmsg_flags & NLM_F_ECHO);
+}
+
+struct tcf_dump_args {
+	struct tcf_walker w;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+};
+
+static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
+			 struct tcf_walker *arg)
+{
+	struct tcf_dump_args *a = (void *)arg;
+
+	return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid,
+			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
+}
+
+static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct netdev_queue *dev_queue;
+	int t;
+	int s_t;
+	struct net_device *dev;
+	struct Qdisc *q;
+	struct tcf_proto *tp, **chain;
+	struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
+	unsigned long cl = 0;
+	const struct Qdisc_class_ops *cops;
+	struct tcf_dump_args arg;
+
+	if (net != &init_net)
+		return 0;
+
+	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+		return skb->len;
+	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+		return skb->len;
+
+	dev_queue = netdev_get_tx_queue(dev, 0);
+	if (!tcm->tcm_parent)
+		q = dev_queue->qdisc_sleeping;
+	else
+		q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+	if (!q)
+		goto out;
+	if ((cops = q->ops->cl_ops) == NULL)
+		goto errout;
+	if (TC_H_MIN(tcm->tcm_parent)) {
+		cl = cops->get(q, tcm->tcm_parent);
+		if (cl == 0)
+			goto errout;
+	}
+	chain = cops->tcf_chain(q, cl);
+	if (chain == NULL)
+		goto errout;
+
+	s_t = cb->args[0];
+
+	for (tp=*chain, t=0; tp; tp = tp->next, t++) {
+		if (t < s_t) continue;
+		if (TC_H_MAJ(tcm->tcm_info) &&
+		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
+			continue;
+		if (TC_H_MIN(tcm->tcm_info) &&
+		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
+			continue;
+		if (t > s_t)
+			memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+		if (cb->args[1] == 0) {
+			if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid,
+					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					  RTM_NEWTFILTER) <= 0)
+				break;
+
+			cb->args[1] = 1;
+		}
+		if (tp->ops->walk == NULL)
+			continue;
+		arg.w.fn = tcf_node_dump;
+		arg.skb = skb;
+		arg.cb = cb;
+		arg.w.stop = 0;
+		arg.w.skip = cb->args[1]-1;
+		arg.w.count = 0;
+		tp->ops->walk(tp, &arg.w);
+		cb->args[1] = arg.w.count+1;
+		if (arg.w.stop)
+			break;
+	}
+
+	cb->args[0] = t;
+
+errout:
+	if (cl)
+		cops->put(q, cl);
+out:
+	dev_put(dev);
+	return skb->len;
+}
+
+void tcf_exts_destroy(struct tcf_proto *tp, struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (exts->action) {
+		tcf_action_destroy(exts->action, TCA_ACT_UNBIND);
+		exts->action = NULL;
+	}
+#endif
+}
+EXPORT_SYMBOL(tcf_exts_destroy);
+
+int tcf_exts_validate(struct tcf_proto *tp, struct nlattr **tb,
+		  struct nlattr *rate_tlv, struct tcf_exts *exts,
+		  const struct tcf_ext_map *map)
+{
+	memset(exts, 0, sizeof(*exts));
+
+#ifdef CONFIG_NET_CLS_ACT
+	{
+		struct tc_action *act;
+
+		if (map->police && tb[map->police]) {
+			act = tcf_action_init_1(tb[map->police], rate_tlv,
+						"police", TCA_ACT_NOREPLACE,
+						TCA_ACT_BIND);
+			if (IS_ERR(act))
+				return PTR_ERR(act);
+
+			act->type = TCA_OLD_COMPAT;
+			exts->action = act;
+		} else if (map->action && tb[map->action]) {
+			act = tcf_action_init(tb[map->action], rate_tlv, NULL,
+					      TCA_ACT_NOREPLACE, TCA_ACT_BIND);
+			if (IS_ERR(act))
+				return PTR_ERR(act);
+
+			exts->action = act;
+		}
+	}
+#else
+	if ((map->action && tb[map->action]) ||
+	    (map->police && tb[map->police]))
+		return -EOPNOTSUPP;
+#endif
+
+	return 0;
+}
+EXPORT_SYMBOL(tcf_exts_validate);
+
+void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
+		     struct tcf_exts *src)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (src->action) {
+		struct tc_action *act;
+		tcf_tree_lock(tp);
+		act = xchg(&dst->action, src->action);
+		tcf_tree_unlock(tp);
+		if (act)
+			tcf_action_destroy(act, TCA_ACT_UNBIND);
+	}
+#endif
+}
+EXPORT_SYMBOL(tcf_exts_change);
+
+int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts,
+		  const struct tcf_ext_map *map)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (map->action && exts->action) {
+		/*
+		 * again for backward compatible mode - we want
+		 * to work with both old and new modes of entering
+		 * tc data even if iproute2  was newer - jhs
+		 */
+		struct nlattr *nest;
+
+		if (exts->action->type != TCA_OLD_COMPAT) {
+			nest = nla_nest_start(skb, map->action);
+			if (nest == NULL)
+				goto nla_put_failure;
+			if (tcf_action_dump(skb, exts->action, 0, 0) < 0)
+				goto nla_put_failure;
+			nla_nest_end(skb, nest);
+		} else if (map->police) {
+			nest = nla_nest_start(skb, map->police);
+			if (nest == NULL)
+				goto nla_put_failure;
+			if (tcf_action_dump_old(skb, exts->action, 0, 0) < 0)
+				goto nla_put_failure;
+			nla_nest_end(skb, nest);
+		}
+	}
+#endif
+	return 0;
+nla_put_failure: __attribute__ ((unused))
+	return -1;
+}
+EXPORT_SYMBOL(tcf_exts_dump);
+
+
+int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts,
+			const struct tcf_ext_map *map)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (exts->action)
+		if (tcf_action_copy_stats(skb, exts->action, 1) < 0)
+			goto nla_put_failure;
+#endif
+	return 0;
+nla_put_failure: __attribute__ ((unused))
+	return -1;
+}
+EXPORT_SYMBOL(tcf_exts_dump_stats);
+
+static int __init tc_filter_init(void)
+{
+	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
+						 tc_dump_tfilter);
+
+	return 0;
+}
+
+subsys_initcall(tc_filter_init);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
new file mode 100644
index 0000000..956915c
--- /dev/null
+++ b/net/sched/cls_basic.c
@@ -0,0 +1,304 @@
+/*
+ * net/sched/cls_basic.c	Basic Packet Classifier.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+struct basic_head
+{
+	u32			hgenerator;
+	struct list_head	flist;
+};
+
+struct basic_filter
+{
+	u32			handle;
+	struct tcf_exts		exts;
+	struct tcf_ematch_tree	ematches;
+	struct tcf_result	res;
+	struct list_head	link;
+};
+
+static const struct tcf_ext_map basic_ext_map = {
+	.action = TCA_BASIC_ACT,
+	.police = TCA_BASIC_POLICE
+};
+
+static int basic_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			  struct tcf_result *res)
+{
+	int r;
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *f;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
+			continue;
+		*res = f->res;
+		r = tcf_exts_exec(skb, &f->exts, res);
+		if (r < 0)
+			continue;
+		return r;
+	}
+	return -1;
+}
+
+static unsigned long basic_get(struct tcf_proto *tp, u32 handle)
+{
+	unsigned long l = 0UL;
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *f;
+
+	if (head == NULL)
+		return 0UL;
+
+	list_for_each_entry(f, &head->flist, link)
+		if (f->handle == handle)
+			l = (unsigned long) f;
+
+	return l;
+}
+
+static void basic_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int basic_init(struct tcf_proto *tp)
+{
+	struct basic_head *head;
+
+	head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (head == NULL)
+		return -ENOBUFS;
+	INIT_LIST_HEAD(&head->flist);
+	tp->root = head;
+	return 0;
+}
+
+static inline void basic_delete_filter(struct tcf_proto *tp,
+				       struct basic_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	tcf_em_tree_destroy(tp, &f->ematches);
+	kfree(f);
+}
+
+static void basic_destroy(struct tcf_proto *tp)
+{
+	struct basic_head *head = (struct basic_head *) xchg(&tp->root, NULL);
+	struct basic_filter *f, *n;
+
+	list_for_each_entry_safe(f, n, &head->flist, link) {
+		list_del(&f->link);
+		basic_delete_filter(tp, f);
+	}
+	kfree(head);
+}
+
+static int basic_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *t, *f = (struct basic_filter *) arg;
+
+	list_for_each_entry(t, &head->flist, link)
+		if (t == f) {
+			tcf_tree_lock(tp);
+			list_del(&t->link);
+			tcf_tree_unlock(tp);
+			basic_delete_filter(tp, t);
+			return 0;
+		}
+
+	return -ENOENT;
+}
+
+static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
+	[TCA_BASIC_CLASSID]	= { .type = NLA_U32 },
+	[TCA_BASIC_EMATCHES]	= { .type = NLA_NESTED },
+};
+
+static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
+				  unsigned long base, struct nlattr **tb,
+				  struct nlattr *est)
+{
+	int err = -EINVAL;
+	struct tcf_exts e;
+	struct tcf_ematch_tree t;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &basic_ext_map);
+	if (err < 0)
+		return err;
+
+	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
+	if (err < 0)
+		goto errout;
+
+	if (tb[TCA_BASIC_CLASSID]) {
+		f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+	tcf_exts_change(tp, &f->exts, &e);
+	tcf_em_tree_change(tp, &f->ematches, &t);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+			struct nlattr **tca, unsigned long *arg)
+{
+	int err;
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct nlattr *tb[TCA_BASIC_MAX + 1];
+	struct basic_filter *f = (struct basic_filter *) *arg;
+
+	if (tca[TCA_OPTIONS] == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS],
+			       basic_policy);
+	if (err < 0)
+		return err;
+
+	if (f != NULL) {
+		if (handle && f->handle != handle)
+			return -EINVAL;
+		return basic_set_parms(tp, f, base, tb, tca[TCA_RATE]);
+	}
+
+	err = -ENOBUFS;
+	f = kzalloc(sizeof(*f), GFP_KERNEL);
+	if (f == NULL)
+		goto errout;
+
+	err = -EINVAL;
+	if (handle)
+		f->handle = handle;
+	else {
+		unsigned int i = 0x80000000;
+		do {
+			if (++head->hgenerator == 0x7FFFFFFF)
+				head->hgenerator = 1;
+		} while (--i > 0 && basic_get(tp, head->hgenerator));
+
+		if (i <= 0) {
+			printk(KERN_ERR "Insufficient number of handles\n");
+			goto errout;
+		}
+
+		f->handle = head->hgenerator;
+	}
+
+	err = basic_set_parms(tp, f, base, tb, tca[TCA_RATE]);
+	if (err < 0)
+		goto errout;
+
+	tcf_tree_lock(tp);
+	list_add(&f->link, &head->flist);
+	tcf_tree_unlock(tp);
+	*arg = (unsigned long) f;
+
+	return 0;
+errout:
+	if (*arg == 0UL && f)
+		kfree(f);
+
+	return err;
+}
+
+static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct basic_head *head = (struct basic_head *) tp->root;
+	struct basic_filter *f;
+
+	list_for_each_entry(f, &head->flist, link) {
+		if (arg->count < arg->skip)
+			goto skip;
+
+		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+static int basic_dump(struct tcf_proto *tp, unsigned long fh,
+		      struct sk_buff *skb, struct tcmsg *t)
+{
+	struct basic_filter *f = (struct basic_filter *) fh;
+	struct nlattr *nest;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (f->res.classid)
+		NLA_PUT_U32(skb, TCA_BASIC_CLASSID, f->res.classid);
+
+	if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
+	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_basic_ops __read_mostly = {
+	.kind		=	"basic",
+	.classify	=	basic_classify,
+	.init		=	basic_init,
+	.destroy	=	basic_destroy,
+	.get		=	basic_get,
+	.put		=	basic_put,
+	.change		=	basic_change,
+	.delete		=	basic_delete,
+	.walk		=	basic_walk,
+	.dump		=	basic_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_basic(void)
+{
+	return register_tcf_proto_ops(&cls_basic_ops);
+}
+
+static void __exit exit_basic(void)
+{
+	unregister_tcf_proto_ops(&cls_basic_ops);
+}
+
+module_init(init_basic)
+module_exit(exit_basic)
+MODULE_LICENSE("GPL");
+
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
new file mode 100644
index 0000000..0ebaff6
--- /dev/null
+++ b/net/sched/cls_flow.c
@@ -0,0 +1,707 @@
+/*
+ * net/sched/cls_flow.c		Generic flow classifier
+ *
+ * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+#include <linux/pkt_cls.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/if_vlan.h>
+
+#include <net/pkt_cls.h>
+#include <net/ip.h>
+#include <net/route.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+struct flow_head {
+	struct list_head	filters;
+};
+
+struct flow_filter {
+	struct list_head	list;
+	struct tcf_exts		exts;
+	struct tcf_ematch_tree	ematches;
+	struct timer_list	perturb_timer;
+	u32			perturb_period;
+	u32			handle;
+
+	u32			nkeys;
+	u32			keymask;
+	u32			mode;
+	u32			mask;
+	u32			xor;
+	u32			rshift;
+	u32			addend;
+	u32			divisor;
+	u32			baseclass;
+	u32			hashrnd;
+};
+
+static const struct tcf_ext_map flow_ext_map = {
+	.action	= TCA_FLOW_ACT,
+	.police	= TCA_FLOW_POLICE,
+};
+
+static inline u32 addr_fold(void *addr)
+{
+	unsigned long a = (unsigned long)addr;
+
+	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
+}
+
+static u32 flow_get_src(const struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		return ntohl(ip_hdr(skb)->saddr);
+	case htons(ETH_P_IPV6):
+		return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
+	default:
+		return addr_fold(skb->sk);
+	}
+}
+
+static u32 flow_get_dst(const struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		return ntohl(ip_hdr(skb)->daddr);
+	case htons(ETH_P_IPV6):
+		return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
+	default:
+		return addr_fold(skb->dst) ^ (__force u16)skb->protocol;
+	}
+}
+
+static u32 flow_get_proto(const struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		return ip_hdr(skb)->protocol;
+	case htons(ETH_P_IPV6):
+		return ipv6_hdr(skb)->nexthdr;
+	default:
+		return 0;
+	}
+}
+
+static int has_ports(u8 protocol)
+{
+	switch (protocol) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+	case IPPROTO_SCTP:
+	case IPPROTO_DCCP:
+	case IPPROTO_ESP:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static u32 flow_get_proto_src(const struct sk_buff *skb)
+{
+	u32 res = 0;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP): {
+		struct iphdr *iph = ip_hdr(skb);
+
+		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+		    has_ports(iph->protocol))
+			res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
+		break;
+	}
+	case htons(ETH_P_IPV6): {
+		struct ipv6hdr *iph = ipv6_hdr(skb);
+
+		if (has_ports(iph->nexthdr))
+			res = ntohs(*(__be16 *)&iph[1]);
+		break;
+	}
+	default:
+		res = addr_fold(skb->sk);
+	}
+
+	return res;
+}
+
+static u32 flow_get_proto_dst(const struct sk_buff *skb)
+{
+	u32 res = 0;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP): {
+		struct iphdr *iph = ip_hdr(skb);
+
+		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+		    has_ports(iph->protocol))
+			res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
+		break;
+	}
+	case htons(ETH_P_IPV6): {
+		struct ipv6hdr *iph = ipv6_hdr(skb);
+
+		if (has_ports(iph->nexthdr))
+			res = ntohs(*(__be16 *)((void *)&iph[1] + 2));
+		break;
+	}
+	default:
+		res = addr_fold(skb->dst) ^ (__force u16)skb->protocol;
+	}
+
+	return res;
+}
+
+static u32 flow_get_iif(const struct sk_buff *skb)
+{
+	return skb->iif;
+}
+
+static u32 flow_get_priority(const struct sk_buff *skb)
+{
+	return skb->priority;
+}
+
+static u32 flow_get_mark(const struct sk_buff *skb)
+{
+	return skb->mark;
+}
+
+static u32 flow_get_nfct(const struct sk_buff *skb)
+{
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	return addr_fold(skb->nfct);
+#else
+	return 0;
+#endif
+}
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#define CTTUPLE(skb, member)						\
+({									\
+	enum ip_conntrack_info ctinfo;					\
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);			\
+	if (ct == NULL)							\
+		goto fallback;						\
+	ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;			\
+})
+#else
+#define CTTUPLE(skb, member)						\
+({									\
+	goto fallback;							\
+	0;								\
+})
+#endif
+
+static u32 flow_get_nfct_src(const struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		return ntohl(CTTUPLE(skb, src.u3.ip));
+	case htons(ETH_P_IPV6):
+		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
+	}
+fallback:
+	return flow_get_src(skb);
+}
+
+static u32 flow_get_nfct_dst(const struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		return ntohl(CTTUPLE(skb, dst.u3.ip));
+	case htons(ETH_P_IPV6):
+		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
+	}
+fallback:
+	return flow_get_dst(skb);
+}
+
+static u32 flow_get_nfct_proto_src(const struct sk_buff *skb)
+{
+	return ntohs(CTTUPLE(skb, src.u.all));
+fallback:
+	return flow_get_proto_src(skb);
+}
+
+static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb)
+{
+	return ntohs(CTTUPLE(skb, dst.u.all));
+fallback:
+	return flow_get_proto_dst(skb);
+}
+
+static u32 flow_get_rtclassid(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (skb->dst)
+		return skb->dst->tclassid;
+#endif
+	return 0;
+}
+
+static u32 flow_get_skuid(const struct sk_buff *skb)
+{
+	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
+		return skb->sk->sk_socket->file->f_uid;
+	return 0;
+}
+
+static u32 flow_get_skgid(const struct sk_buff *skb)
+{
+	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
+		return skb->sk->sk_socket->file->f_gid;
+	return 0;
+}
+
+static u32 flow_get_vlan_tag(const struct sk_buff *skb)
+{
+	u16 uninitialized_var(tag);
+
+	if (vlan_get_tag(skb, &tag) < 0)
+		return 0;
+	return tag & VLAN_VID_MASK;
+}
+
+static u32 flow_key_get(const struct sk_buff *skb, int key)
+{
+	switch (key) {
+	case FLOW_KEY_SRC:
+		return flow_get_src(skb);
+	case FLOW_KEY_DST:
+		return flow_get_dst(skb);
+	case FLOW_KEY_PROTO:
+		return flow_get_proto(skb);
+	case FLOW_KEY_PROTO_SRC:
+		return flow_get_proto_src(skb);
+	case FLOW_KEY_PROTO_DST:
+		return flow_get_proto_dst(skb);
+	case FLOW_KEY_IIF:
+		return flow_get_iif(skb);
+	case FLOW_KEY_PRIORITY:
+		return flow_get_priority(skb);
+	case FLOW_KEY_MARK:
+		return flow_get_mark(skb);
+	case FLOW_KEY_NFCT:
+		return flow_get_nfct(skb);
+	case FLOW_KEY_NFCT_SRC:
+		return flow_get_nfct_src(skb);
+	case FLOW_KEY_NFCT_DST:
+		return flow_get_nfct_dst(skb);
+	case FLOW_KEY_NFCT_PROTO_SRC:
+		return flow_get_nfct_proto_src(skb);
+	case FLOW_KEY_NFCT_PROTO_DST:
+		return flow_get_nfct_proto_dst(skb);
+	case FLOW_KEY_RTCLASSID:
+		return flow_get_rtclassid(skb);
+	case FLOW_KEY_SKUID:
+		return flow_get_skuid(skb);
+	case FLOW_KEY_SKGID:
+		return flow_get_skgid(skb);
+	case FLOW_KEY_VLAN_TAG:
+		return flow_get_vlan_tag(skb);
+	default:
+		WARN_ON(1);
+		return 0;
+	}
+}
+
+static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			 struct tcf_result *res)
+{
+	struct flow_head *head = tp->root;
+	struct flow_filter *f;
+	u32 keymask;
+	u32 classid;
+	unsigned int n, key;
+	int r;
+
+	list_for_each_entry(f, &head->filters, list) {
+		u32 keys[f->nkeys];
+
+		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
+			continue;
+
+		keymask = f->keymask;
+
+		for (n = 0; n < f->nkeys; n++) {
+			key = ffs(keymask) - 1;
+			keymask &= ~(1 << key);
+			keys[n] = flow_key_get(skb, key);
+		}
+
+		if (f->mode == FLOW_MODE_HASH)
+			classid = jhash2(keys, f->nkeys, f->hashrnd);
+		else {
+			classid = keys[0];
+			classid = (classid & f->mask) ^ f->xor;
+			classid = (classid >> f->rshift) + f->addend;
+		}
+
+		if (f->divisor)
+			classid %= f->divisor;
+
+		res->class   = 0;
+		res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);
+
+		r = tcf_exts_exec(skb, &f->exts, res);
+		if (r < 0)
+			continue;
+		return r;
+	}
+	return -1;
+}
+
+static void flow_perturbation(unsigned long arg)
+{
+	struct flow_filter *f = (struct flow_filter *)arg;
+
+	get_random_bytes(&f->hashrnd, 4);
+	if (f->perturb_period)
+		mod_timer(&f->perturb_timer, jiffies + f->perturb_period);
+}
+
+static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
+	[TCA_FLOW_KEYS]		= { .type = NLA_U32 },
+	[TCA_FLOW_MODE]		= { .type = NLA_U32 },
+	[TCA_FLOW_BASECLASS]	= { .type = NLA_U32 },
+	[TCA_FLOW_RSHIFT]	= { .type = NLA_U32 },
+	[TCA_FLOW_ADDEND]	= { .type = NLA_U32 },
+	[TCA_FLOW_MASK]		= { .type = NLA_U32 },
+	[TCA_FLOW_XOR]		= { .type = NLA_U32 },
+	[TCA_FLOW_DIVISOR]	= { .type = NLA_U32 },
+	[TCA_FLOW_ACT]		= { .type = NLA_NESTED },
+	[TCA_FLOW_POLICE]	= { .type = NLA_NESTED },
+	[TCA_FLOW_EMATCHES]	= { .type = NLA_NESTED },
+	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
+};
+
+static int flow_change(struct tcf_proto *tp, unsigned long base,
+		       u32 handle, struct nlattr **tca,
+		       unsigned long *arg)
+{
+	struct flow_head *head = tp->root;
+	struct flow_filter *f;
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_FLOW_MAX + 1];
+	struct tcf_exts e;
+	struct tcf_ematch_tree t;
+	unsigned int nkeys = 0;
+	unsigned int perturb_period = 0;
+	u32 baseclass = 0;
+	u32 keymask = 0;
+	u32 mode;
+	int err;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_FLOW_BASECLASS]) {
+		baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
+		if (TC_H_MIN(baseclass) == 0)
+			return -EINVAL;
+	}
+
+	if (tb[TCA_FLOW_KEYS]) {
+		keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);
+
+		nkeys = hweight32(keymask);
+		if (nkeys == 0)
+			return -EINVAL;
+
+		if (fls(keymask) - 1 > FLOW_KEY_MAX)
+			return -EOPNOTSUPP;
+	}
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
+	if (err < 0)
+		return err;
+
+	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
+	if (err < 0)
+		goto err1;
+
+	f = (struct flow_filter *)*arg;
+	if (f != NULL) {
+		err = -EINVAL;
+		if (f->handle != handle && handle)
+			goto err2;
+
+		mode = f->mode;
+		if (tb[TCA_FLOW_MODE])
+			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
+		if (mode != FLOW_MODE_HASH && nkeys > 1)
+			goto err2;
+
+		if (mode == FLOW_MODE_HASH)
+			perturb_period = f->perturb_period;
+		if (tb[TCA_FLOW_PERTURB]) {
+			if (mode != FLOW_MODE_HASH)
+				goto err2;
+			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
+		}
+	} else {
+		err = -EINVAL;
+		if (!handle)
+			goto err2;
+		if (!tb[TCA_FLOW_KEYS])
+			goto err2;
+
+		mode = FLOW_MODE_MAP;
+		if (tb[TCA_FLOW_MODE])
+			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
+		if (mode != FLOW_MODE_HASH && nkeys > 1)
+			goto err2;
+
+		if (tb[TCA_FLOW_PERTURB]) {
+			if (mode != FLOW_MODE_HASH)
+				goto err2;
+			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
+		}
+
+		if (TC_H_MAJ(baseclass) == 0)
+			baseclass = TC_H_MAKE(tp->q->handle, baseclass);
+		if (TC_H_MIN(baseclass) == 0)
+			baseclass = TC_H_MAKE(baseclass, 1);
+
+		err = -ENOBUFS;
+		f = kzalloc(sizeof(*f), GFP_KERNEL);
+		if (f == NULL)
+			goto err2;
+
+		f->handle = handle;
+		f->mask	  = ~0U;
+
+		get_random_bytes(&f->hashrnd, 4);
+		f->perturb_timer.function = flow_perturbation;
+		f->perturb_timer.data = (unsigned long)f;
+		init_timer_deferrable(&f->perturb_timer);
+	}
+
+	tcf_exts_change(tp, &f->exts, &e);
+	tcf_em_tree_change(tp, &f->ematches, &t);
+
+	tcf_tree_lock(tp);
+
+	if (tb[TCA_FLOW_KEYS]) {
+		f->keymask = keymask;
+		f->nkeys   = nkeys;
+	}
+
+	f->mode = mode;
+
+	if (tb[TCA_FLOW_MASK])
+		f->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
+	if (tb[TCA_FLOW_XOR])
+		f->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
+	if (tb[TCA_FLOW_RSHIFT])
+		f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
+	if (tb[TCA_FLOW_ADDEND])
+		f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
+
+	if (tb[TCA_FLOW_DIVISOR])
+		f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
+	if (baseclass)
+		f->baseclass = baseclass;
+
+	f->perturb_period = perturb_period;
+	del_timer(&f->perturb_timer);
+	if (perturb_period)
+		mod_timer(&f->perturb_timer, jiffies + perturb_period);
+
+	if (*arg == 0)
+		list_add_tail(&f->list, &head->filters);
+
+	tcf_tree_unlock(tp);
+
+	*arg = (unsigned long)f;
+	return 0;
+
+err2:
+	tcf_em_tree_destroy(tp, &t);
+err1:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
+{
+	del_timer_sync(&f->perturb_timer);
+	tcf_exts_destroy(tp, &f->exts);
+	tcf_em_tree_destroy(tp, &f->ematches);
+	kfree(f);
+}
+
+static int flow_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct flow_filter *f = (struct flow_filter *)arg;
+
+	tcf_tree_lock(tp);
+	list_del(&f->list);
+	tcf_tree_unlock(tp);
+	flow_destroy_filter(tp, f);
+	return 0;
+}
+
+static int flow_init(struct tcf_proto *tp)
+{
+	struct flow_head *head;
+
+	head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (head == NULL)
+		return -ENOBUFS;
+	INIT_LIST_HEAD(&head->filters);
+	tp->root = head;
+	return 0;
+}
+
+static void flow_destroy(struct tcf_proto *tp)
+{
+	struct flow_head *head = tp->root;
+	struct flow_filter *f, *next;
+
+	list_for_each_entry_safe(f, next, &head->filters, list) {
+		list_del(&f->list);
+		flow_destroy_filter(tp, f);
+	}
+	kfree(head);
+}
+
+static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
+{
+	struct flow_head *head = tp->root;
+	struct flow_filter *f;
+
+	list_for_each_entry(f, &head->filters, list)
+		if (f->handle == handle)
+			return (unsigned long)f;
+	return 0;
+}
+
+static void flow_put(struct tcf_proto *tp, unsigned long f)
+{
+	return;
+}
+
+static int flow_dump(struct tcf_proto *tp, unsigned long fh,
+		     struct sk_buff *skb, struct tcmsg *t)
+{
+	struct flow_filter *f = (struct flow_filter *)fh;
+	struct nlattr *nest;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT_U32(skb, TCA_FLOW_KEYS, f->keymask);
+	NLA_PUT_U32(skb, TCA_FLOW_MODE, f->mode);
+
+	if (f->mask != ~0 || f->xor != 0) {
+		NLA_PUT_U32(skb, TCA_FLOW_MASK, f->mask);
+		NLA_PUT_U32(skb, TCA_FLOW_XOR, f->xor);
+	}
+	if (f->rshift)
+		NLA_PUT_U32(skb, TCA_FLOW_RSHIFT, f->rshift);
+	if (f->addend)
+		NLA_PUT_U32(skb, TCA_FLOW_ADDEND, f->addend);
+
+	if (f->divisor)
+		NLA_PUT_U32(skb, TCA_FLOW_DIVISOR, f->divisor);
+	if (f->baseclass)
+		NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass);
+
+	if (f->perturb_period)
+		NLA_PUT_U32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ);
+
+	if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
+		goto nla_put_failure;
+#ifdef CONFIG_NET_EMATCH
+	if (f->ematches.hdr.nmatches &&
+	    tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
+		goto nla_put_failure;
+#endif
+	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0)
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, nest);
+	return -1;
+}
+
+static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct flow_head *head = tp->root;
+	struct flow_filter *f;
+
+	list_for_each_entry(f, &head->filters, list) {
+		if (arg->count < arg->skip)
+			goto skip;
+		if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+static struct tcf_proto_ops cls_flow_ops __read_mostly = {
+	.kind		= "flow",
+	.classify	= flow_classify,
+	.init		= flow_init,
+	.destroy	= flow_destroy,
+	.change		= flow_change,
+	.delete		= flow_delete,
+	.get		= flow_get,
+	.put		= flow_put,
+	.dump		= flow_dump,
+	.walk		= flow_walk,
+	.owner		= THIS_MODULE,
+};
+
+static int __init cls_flow_init(void)
+{
+	return register_tcf_proto_ops(&cls_flow_ops);
+}
+
+static void __exit cls_flow_exit(void)
+{
+	unregister_tcf_proto_ops(&cls_flow_ops);
+}
+
+module_init(cls_flow_init);
+module_exit(cls_flow_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("TC flow classifier");
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
new file mode 100644
index 0000000..b0f90e5
--- /dev/null
+++ b/net/sched/cls_fw.c
@@ -0,0 +1,400 @@
+/*
+ * net/sched/cls_fw.c	Classifier mapping ipchains' fwmark to traffic class.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_walk off by one
+ * Karlis Peisenieks <karlis@mt.lv> : 990415 : fw_delete killed all the filter (and kernel).
+ * Alex <alex@pilotsoft.com> : 2004xxyy: Added Action extension
+ *
+ * JHS: We should remove the CONFIG_NET_CLS_IND from here
+ * eventually when the meta match extension is made available
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
+
+struct fw_head
+{
+	struct fw_filter *ht[HTSIZE];
+	u32 mask;
+};
+
+struct fw_filter
+{
+	struct fw_filter	*next;
+	u32			id;
+	struct tcf_result	res;
+#ifdef CONFIG_NET_CLS_IND
+	char			indev[IFNAMSIZ];
+#endif /* CONFIG_NET_CLS_IND */
+	struct tcf_exts		exts;
+};
+
+static const struct tcf_ext_map fw_ext_map = {
+	.action = TCA_FW_ACT,
+	.police = TCA_FW_POLICE
+};
+
+static __inline__ int fw_hash(u32 handle)
+{
+	if (HTSIZE == 4096)
+		return ((handle >> 24) & 0xFFF) ^
+		       ((handle >> 12) & 0xFFF) ^
+		       (handle & 0xFFF);
+	else if (HTSIZE == 2048)
+		return ((handle >> 22) & 0x7FF) ^
+		       ((handle >> 11) & 0x7FF) ^
+		       (handle & 0x7FF);
+	else if (HTSIZE == 1024)
+		return ((handle >> 20) & 0x3FF) ^
+		       ((handle >> 10) & 0x3FF) ^
+		       (handle & 0x3FF);
+	else if (HTSIZE == 512)
+		return (handle >> 27) ^
+		       ((handle >> 18) & 0x1FF) ^
+		       ((handle >> 9) & 0x1FF) ^
+		       (handle & 0x1FF);
+	else if (HTSIZE == 256) {
+		u8 *t = (u8 *) &handle;
+		return t[0] ^ t[1] ^ t[2] ^ t[3];
+	} else
+		return handle & (HTSIZE - 1);
+}
+
+static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			  struct tcf_result *res)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_filter *f;
+	int r;
+	u32 id = skb->mark;
+
+	if (head != NULL) {
+		id &= head->mask;
+		for (f=head->ht[fw_hash(id)]; f; f=f->next) {
+			if (f->id == id) {
+				*res = f->res;
+#ifdef CONFIG_NET_CLS_IND
+				if (!tcf_match_indev(skb, f->indev))
+					continue;
+#endif /* CONFIG_NET_CLS_IND */
+				r = tcf_exts_exec(skb, &f->exts, res);
+				if (r < 0)
+					continue;
+
+				return r;
+			}
+		}
+	} else {
+		/* old method */
+		if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) {
+			res->classid = id;
+			res->class = 0;
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_filter *f;
+
+	if (head == NULL)
+		return 0;
+
+	for (f=head->ht[fw_hash(handle)]; f; f=f->next) {
+		if (f->id == handle)
+			return (unsigned long)f;
+	}
+	return 0;
+}
+
+static void fw_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int fw_init(struct tcf_proto *tp)
+{
+	return 0;
+}
+
+static inline void
+fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	kfree(f);
+}
+
+static void fw_destroy(struct tcf_proto *tp)
+{
+	struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL);
+	struct fw_filter *f;
+	int h;
+
+	if (head == NULL)
+		return;
+
+	for (h=0; h<HTSIZE; h++) {
+		while ((f=head->ht[h]) != NULL) {
+			head->ht[h] = f->next;
+			fw_delete_filter(tp, f);
+		}
+	}
+	kfree(head);
+}
+
+static int fw_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_filter *f = (struct fw_filter*)arg;
+	struct fw_filter **fp;
+
+	if (head == NULL || f == NULL)
+		goto out;
+
+	for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
+		if (*fp == f) {
+			tcf_tree_lock(tp);
+			*fp = f->next;
+			tcf_tree_unlock(tp);
+			fw_delete_filter(tp, f);
+			return 0;
+		}
+	}
+out:
+	return -EINVAL;
+}
+
+static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
+	[TCA_FW_CLASSID]	= { .type = NLA_U32 },
+	[TCA_FW_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
+	[TCA_FW_MASK]		= { .type = NLA_U32 },
+};
+
+static int
+fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f,
+	struct nlattr **tb, struct nlattr **tca, unsigned long base)
+{
+	struct fw_head *head = (struct fw_head *)tp->root;
+	struct tcf_exts e;
+	u32 mask;
+	int err;
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &fw_ext_map);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (tb[TCA_FW_CLASSID]) {
+		f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+#ifdef CONFIG_NET_CLS_IND
+	if (tb[TCA_FW_INDEV]) {
+		err = tcf_change_indev(tp, f->indev, tb[TCA_FW_INDEV]);
+		if (err < 0)
+			goto errout;
+	}
+#endif /* CONFIG_NET_CLS_IND */
+
+	if (tb[TCA_FW_MASK]) {
+		mask = nla_get_u32(tb[TCA_FW_MASK]);
+		if (mask != head->mask)
+			goto errout;
+	} else if (head->mask != 0xFFFFFFFF)
+		goto errout;
+
+	tcf_exts_change(tp, &f->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int fw_change(struct tcf_proto *tp, unsigned long base,
+		     u32 handle,
+		     struct nlattr **tca,
+		     unsigned long *arg)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	struct fw_filter *f = (struct fw_filter *) *arg;
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_FW_MAX + 1];
+	int err;
+
+	if (!opt)
+		return handle ? -EINVAL : 0;
+
+	err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy);
+	if (err < 0)
+		return err;
+
+	if (f != NULL) {
+		if (f->id != handle && handle)
+			return -EINVAL;
+		return fw_change_attrs(tp, f, tb, tca, base);
+	}
+
+	if (!handle)
+		return -EINVAL;
+
+	if (head == NULL) {
+		u32 mask = 0xFFFFFFFF;
+		if (tb[TCA_FW_MASK])
+			mask = nla_get_u32(tb[TCA_FW_MASK]);
+
+		head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
+		if (head == NULL)
+			return -ENOBUFS;
+		head->mask = mask;
+
+		tcf_tree_lock(tp);
+		tp->root = head;
+		tcf_tree_unlock(tp);
+	}
+
+	f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
+	if (f == NULL)
+		return -ENOBUFS;
+
+	f->id = handle;
+
+	err = fw_change_attrs(tp, f, tb, tca, base);
+	if (err < 0)
+		goto errout;
+
+	f->next = head->ht[fw_hash(handle)];
+	tcf_tree_lock(tp);
+	head->ht[fw_hash(handle)] = f;
+	tcf_tree_unlock(tp);
+
+	*arg = (unsigned long)f;
+	return 0;
+
+errout:
+	kfree(f);
+	return err;
+}
+
+static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct fw_head *head = (struct fw_head*)tp->root;
+	int h;
+
+	if (head == NULL)
+		arg->stop = 1;
+
+	if (arg->stop)
+		return;
+
+	for (h = 0; h < HTSIZE; h++) {
+		struct fw_filter *f;
+
+		for (f = head->ht[h]; f; f = f->next) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static int fw_dump(struct tcf_proto *tp, unsigned long fh,
+		   struct sk_buff *skb, struct tcmsg *t)
+{
+	struct fw_head *head = (struct fw_head *)tp->root;
+	struct fw_filter *f = (struct fw_filter*)fh;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->id;
+
+	if (!f->res.classid && !tcf_exts_is_available(&f->exts))
+		return skb->len;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (f->res.classid)
+		NLA_PUT_U32(skb, TCA_FW_CLASSID, f->res.classid);
+#ifdef CONFIG_NET_CLS_IND
+	if (strlen(f->indev))
+		NLA_PUT_STRING(skb, TCA_FW_INDEV, f->indev);
+#endif /* CONFIG_NET_CLS_IND */
+	if (head->mask != 0xFFFFFFFF)
+		NLA_PUT_U32(skb, TCA_FW_MASK, head->mask);
+
+	if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &fw_ext_map) < 0)
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_fw_ops __read_mostly = {
+	.kind		=	"fw",
+	.classify	=	fw_classify,
+	.init		=	fw_init,
+	.destroy	=	fw_destroy,
+	.get		=	fw_get,
+	.put		=	fw_put,
+	.change		=	fw_change,
+	.delete		=	fw_delete,
+	.walk		=	fw_walk,
+	.dump		=	fw_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_fw(void)
+{
+	return register_tcf_proto_ops(&cls_fw_ops);
+}
+
+static void __exit exit_fw(void)
+{
+	unregister_tcf_proto_ops(&cls_fw_ops);
+}
+
+module_init(init_fw)
+module_exit(exit_fw)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
new file mode 100644
index 0000000..e3d8455
--- /dev/null
+++ b/net/sched/cls_route.c
@@ -0,0 +1,620 @@
+/*
+ * net/sched/cls_route.c	ROUTE4 classifier.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/dst.h>
+#include <net/route.h>
+#include <net/netlink.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+/*
+   1. For now we assume that route tags < 256.
+      It allows to use direct table lookups, instead of hash tables.
+   2. For now we assume that "from TAG" and "fromdev DEV" statements
+      are mutually  exclusive.
+   3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
+ */
+
+struct route4_fastmap
+{
+	struct route4_filter	*filter;
+	u32			id;
+	int			iif;
+};
+
+struct route4_head
+{
+	struct route4_fastmap	fastmap[16];
+	struct route4_bucket	*table[256+1];
+};
+
+struct route4_bucket
+{
+	/* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
+	struct route4_filter	*ht[16+16+1];
+};
+
+struct route4_filter
+{
+	struct route4_filter	*next;
+	u32			id;
+	int			iif;
+
+	struct tcf_result	res;
+	struct tcf_exts		exts;
+	u32			handle;
+	struct route4_bucket	*bkt;
+};
+
+#define ROUTE4_FAILURE ((struct route4_filter*)(-1L))
+
+static const struct tcf_ext_map route_ext_map = {
+	.police = TCA_ROUTE4_POLICE,
+	.action = TCA_ROUTE4_ACT
+};
+
+static __inline__ int route4_fastmap_hash(u32 id, int iif)
+{
+	return id&0xF;
+}
+
+static inline
+void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
+{
+	spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
+
+	spin_lock_bh(root_lock);
+	memset(head->fastmap, 0, sizeof(head->fastmap));
+	spin_unlock_bh(root_lock);
+}
+
+static inline void
+route4_set_fastmap(struct route4_head *head, u32 id, int iif,
+		   struct route4_filter *f)
+{
+	int h = route4_fastmap_hash(id, iif);
+	head->fastmap[h].id = id;
+	head->fastmap[h].iif = iif;
+	head->fastmap[h].filter = f;
+}
+
+static __inline__ int route4_hash_to(u32 id)
+{
+	return id&0xFF;
+}
+
+static __inline__ int route4_hash_from(u32 id)
+{
+	return (id>>16)&0xF;
+}
+
+static __inline__ int route4_hash_iif(int iif)
+{
+	return 16 + ((iif>>16)&0xF);
+}
+
+static __inline__ int route4_hash_wild(void)
+{
+	return 32;
+}
+
+#define ROUTE4_APPLY_RESULT()					\
+{								\
+	*res = f->res;						\
+	if (tcf_exts_is_available(&f->exts)) {			\
+		int r = tcf_exts_exec(skb, &f->exts, res);	\
+		if (r < 0) {					\
+			dont_cache = 1;				\
+			continue;				\
+		}						\
+		return r;					\
+	} else if (!dont_cache)					\
+		route4_set_fastmap(head, id, iif, f);		\
+	return 0;						\
+}
+
+static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			   struct tcf_result *res)
+{
+	struct route4_head *head = (struct route4_head*)tp->root;
+	struct dst_entry *dst;
+	struct route4_bucket *b;
+	struct route4_filter *f;
+	u32 id, h;
+	int iif, dont_cache = 0;
+
+	if ((dst = skb->dst) == NULL)
+		goto failure;
+
+	id = dst->tclassid;
+	if (head == NULL)
+		goto old_method;
+
+	iif = ((struct rtable*)dst)->fl.iif;
+
+	h = route4_fastmap_hash(id, iif);
+	if (id == head->fastmap[h].id &&
+	    iif == head->fastmap[h].iif &&
+	    (f = head->fastmap[h].filter) != NULL) {
+		if (f == ROUTE4_FAILURE)
+			goto failure;
+
+		*res = f->res;
+		return 0;
+	}
+
+	h = route4_hash_to(id);
+
+restart:
+	if ((b = head->table[h]) != NULL) {
+		for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
+			if (f->id == id)
+				ROUTE4_APPLY_RESULT();
+
+		for (f = b->ht[route4_hash_iif(iif)]; f; f = f->next)
+			if (f->iif == iif)
+				ROUTE4_APPLY_RESULT();
+
+		for (f = b->ht[route4_hash_wild()]; f; f = f->next)
+			ROUTE4_APPLY_RESULT();
+
+	}
+	if (h < 256) {
+		h = 256;
+		id &= ~0xFFFF;
+		goto restart;
+	}
+
+	if (!dont_cache)
+		route4_set_fastmap(head, id, iif, ROUTE4_FAILURE);
+failure:
+	return -1;
+
+old_method:
+	if (id && (TC_H_MAJ(id) == 0 ||
+		   !(TC_H_MAJ(id^tp->q->handle)))) {
+		res->classid = id;
+		res->class = 0;
+		return 0;
+	}
+	return -1;
+}
+
+static inline u32 to_hash(u32 id)
+{
+	u32 h = id&0xFF;
+	if (id&0x8000)
+		h += 256;
+	return h;
+}
+
+static inline u32 from_hash(u32 id)
+{
+	id &= 0xFFFF;
+	if (id == 0xFFFF)
+		return 32;
+	if (!(id & 0x8000)) {
+		if (id > 255)
+			return 256;
+		return id&0xF;
+	}
+	return 16 + (id&0xF);
+}
+
+static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
+{
+	struct route4_head *head = (struct route4_head*)tp->root;
+	struct route4_bucket *b;
+	struct route4_filter *f;
+	unsigned h1, h2;
+
+	if (!head)
+		return 0;
+
+	h1 = to_hash(handle);
+	if (h1 > 256)
+		return 0;
+
+	h2 = from_hash(handle>>16);
+	if (h2 > 32)
+		return 0;
+
+	if ((b = head->table[h1]) != NULL) {
+		for (f = b->ht[h2]; f; f = f->next)
+			if (f->handle == handle)
+				return (unsigned long)f;
+	}
+	return 0;
+}
+
+static void route4_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int route4_init(struct tcf_proto *tp)
+{
+	return 0;
+}
+
+static inline void
+route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	kfree(f);
+}
+
+static void route4_destroy(struct tcf_proto *tp)
+{
+	struct route4_head *head = xchg(&tp->root, NULL);
+	int h1, h2;
+
+	if (head == NULL)
+		return;
+
+	for (h1=0; h1<=256; h1++) {
+		struct route4_bucket *b;
+
+		if ((b = head->table[h1]) != NULL) {
+			for (h2=0; h2<=32; h2++) {
+				struct route4_filter *f;
+
+				while ((f = b->ht[h2]) != NULL) {
+					b->ht[h2] = f->next;
+					route4_delete_filter(tp, f);
+				}
+			}
+			kfree(b);
+		}
+	}
+	kfree(head);
+}
+
+static int route4_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct route4_head *head = (struct route4_head*)tp->root;
+	struct route4_filter **fp, *f = (struct route4_filter*)arg;
+	unsigned h = 0;
+	struct route4_bucket *b;
+	int i;
+
+	if (!head || !f)
+		return -EINVAL;
+
+	h = f->handle;
+	b = f->bkt;
+
+	for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) {
+		if (*fp == f) {
+			tcf_tree_lock(tp);
+			*fp = f->next;
+			tcf_tree_unlock(tp);
+
+			route4_reset_fastmap(tp->q, head, f->id);
+			route4_delete_filter(tp, f);
+
+			/* Strip tree */
+
+			for (i=0; i<=32; i++)
+				if (b->ht[i])
+					return 0;
+
+			/* OK, session has no flows */
+			tcf_tree_lock(tp);
+			head->table[to_hash(h)] = NULL;
+			tcf_tree_unlock(tp);
+
+			kfree(b);
+			return 0;
+		}
+	}
+	return 0;
+}
+
+static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = {
+	[TCA_ROUTE4_CLASSID]	= { .type = NLA_U32 },
+	[TCA_ROUTE4_TO]		= { .type = NLA_U32 },
+	[TCA_ROUTE4_FROM]	= { .type = NLA_U32 },
+	[TCA_ROUTE4_IIF]	= { .type = NLA_U32 },
+};
+
+static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
+	struct route4_filter *f, u32 handle, struct route4_head *head,
+	struct nlattr **tb, struct nlattr *est, int new)
+{
+	int err;
+	u32 id = 0, to = 0, nhandle = 0x8000;
+	struct route4_filter *fp;
+	unsigned int h1;
+	struct route4_bucket *b;
+	struct tcf_exts e;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &route_ext_map);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (tb[TCA_ROUTE4_TO]) {
+		if (new && handle & 0x8000)
+			goto errout;
+		to = nla_get_u32(tb[TCA_ROUTE4_TO]);
+		if (to > 0xFF)
+			goto errout;
+		nhandle = to;
+	}
+
+	if (tb[TCA_ROUTE4_FROM]) {
+		if (tb[TCA_ROUTE4_IIF])
+			goto errout;
+		id = nla_get_u32(tb[TCA_ROUTE4_FROM]);
+		if (id > 0xFF)
+			goto errout;
+		nhandle |= id << 16;
+	} else if (tb[TCA_ROUTE4_IIF]) {
+		id = nla_get_u32(tb[TCA_ROUTE4_IIF]);
+		if (id > 0x7FFF)
+			goto errout;
+		nhandle |= (id | 0x8000) << 16;
+	} else
+		nhandle |= 0xFFFF << 16;
+
+	if (handle && new) {
+		nhandle |= handle & 0x7F00;
+		if (nhandle != handle)
+			goto errout;
+	}
+
+	h1 = to_hash(nhandle);
+	if ((b = head->table[h1]) == NULL) {
+		err = -ENOBUFS;
+		b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
+		if (b == NULL)
+			goto errout;
+
+		tcf_tree_lock(tp);
+		head->table[h1] = b;
+		tcf_tree_unlock(tp);
+	} else {
+		unsigned int h2 = from_hash(nhandle >> 16);
+		err = -EEXIST;
+		for (fp = b->ht[h2]; fp; fp = fp->next)
+			if (fp->handle == f->handle)
+				goto errout;
+	}
+
+	tcf_tree_lock(tp);
+	if (tb[TCA_ROUTE4_TO])
+		f->id = to;
+
+	if (tb[TCA_ROUTE4_FROM])
+		f->id = to | id<<16;
+	else if (tb[TCA_ROUTE4_IIF])
+		f->iif = id;
+
+	f->handle = nhandle;
+	f->bkt = b;
+	tcf_tree_unlock(tp);
+
+	if (tb[TCA_ROUTE4_CLASSID]) {
+		f->res.classid = nla_get_u32(tb[TCA_ROUTE4_CLASSID]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+	tcf_exts_change(tp, &f->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int route4_change(struct tcf_proto *tp, unsigned long base,
+		       u32 handle,
+		       struct nlattr **tca,
+		       unsigned long *arg)
+{
+	struct route4_head *head = tp->root;
+	struct route4_filter *f, *f1, **fp;
+	struct route4_bucket *b;
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_ROUTE4_MAX + 1];
+	unsigned int h, th;
+	u32 old_handle = 0;
+	int err;
+
+	if (opt == NULL)
+		return handle ? -EINVAL : 0;
+
+	err = nla_parse_nested(tb, TCA_ROUTE4_MAX, opt, route4_policy);
+	if (err < 0)
+		return err;
+
+	if ((f = (struct route4_filter*)*arg) != NULL) {
+		if (f->handle != handle && handle)
+			return -EINVAL;
+
+		if (f->bkt)
+			old_handle = f->handle;
+
+		err = route4_set_parms(tp, base, f, handle, head, tb,
+			tca[TCA_RATE], 0);
+		if (err < 0)
+			return err;
+
+		goto reinsert;
+	}
+
+	err = -ENOBUFS;
+	if (head == NULL) {
+		head = kzalloc(sizeof(struct route4_head), GFP_KERNEL);
+		if (head == NULL)
+			goto errout;
+
+		tcf_tree_lock(tp);
+		tp->root = head;
+		tcf_tree_unlock(tp);
+	}
+
+	f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL);
+	if (f == NULL)
+		goto errout;
+
+	err = route4_set_parms(tp, base, f, handle, head, tb,
+		tca[TCA_RATE], 1);
+	if (err < 0)
+		goto errout;
+
+reinsert:
+	h = from_hash(f->handle >> 16);
+	for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next)
+		if (f->handle < f1->handle)
+			break;
+
+	f->next = f1;
+	tcf_tree_lock(tp);
+	*fp = f;
+
+	if (old_handle && f->handle != old_handle) {
+		th = to_hash(old_handle);
+		h = from_hash(old_handle >> 16);
+		if ((b = head->table[th]) != NULL) {
+			for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
+				if (*fp == f) {
+					*fp = f->next;
+					break;
+				}
+			}
+		}
+	}
+	tcf_tree_unlock(tp);
+
+	route4_reset_fastmap(tp->q, head, f->id);
+	*arg = (unsigned long)f;
+	return 0;
+
+errout:
+	kfree(f);
+	return err;
+}
+
+static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct route4_head *head = tp->root;
+	unsigned h, h1;
+
+	if (head == NULL)
+		arg->stop = 1;
+
+	if (arg->stop)
+		return;
+
+	for (h = 0; h <= 256; h++) {
+		struct route4_bucket *b = head->table[h];
+
+		if (b) {
+			for (h1 = 0; h1 <= 32; h1++) {
+				struct route4_filter *f;
+
+				for (f = b->ht[h1]; f; f = f->next) {
+					if (arg->count < arg->skip) {
+						arg->count++;
+						continue;
+					}
+					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+						arg->stop = 1;
+						return;
+					}
+					arg->count++;
+				}
+			}
+		}
+	}
+}
+
+static int route4_dump(struct tcf_proto *tp, unsigned long fh,
+		       struct sk_buff *skb, struct tcmsg *t)
+{
+	struct route4_filter *f = (struct route4_filter*)fh;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+	u32 id;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (!(f->handle&0x8000)) {
+		id = f->id&0xFF;
+		NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
+	}
+	if (f->handle&0x80000000) {
+		if ((f->handle>>16) != 0xFFFF)
+			NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
+	} else {
+		id = f->id>>16;
+		NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
+	}
+	if (f->res.classid)
+		NLA_PUT_U32(skb, TCA_ROUTE4_CLASSID, f->res.classid);
+
+	if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &route_ext_map) < 0)
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_route4_ops __read_mostly = {
+	.kind		=	"route",
+	.classify	=	route4_classify,
+	.init		=	route4_init,
+	.destroy	=	route4_destroy,
+	.get		=	route4_get,
+	.put		=	route4_put,
+	.change		=	route4_change,
+	.delete		=	route4_delete,
+	.walk		=	route4_walk,
+	.dump		=	route4_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_route4(void)
+{
+	return register_tcf_proto_ops(&cls_route4_ops);
+}
+
+static void __exit exit_route4(void)
+{
+	unregister_tcf_proto_ops(&cls_route4_ops);
+}
+
+module_init(init_route4)
+module_exit(exit_route4)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
new file mode 100644
index 0000000..cbb5e0d
--- /dev/null
+++ b/net/sched/cls_rsvp.c
@@ -0,0 +1,28 @@
+/*
+ * net/sched/cls_rsvp.c	Special RSVP packet classifier for IPv4.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <net/netlink.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+#define RSVP_DST_LEN	1
+#define RSVP_ID		"rsvp"
+#define RSVP_OPS	cls_rsvp_ops
+
+#include "cls_rsvp.h"
+MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
new file mode 100644
index 0000000..7034ea4
--- /dev/null
+++ b/net/sched/cls_rsvp.h
@@ -0,0 +1,660 @@
+/*
+ * net/sched/cls_rsvp.h	Template file for RSVPv[46] classifiers.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+/*
+   Comparing to general packet classification problem,
+   RSVP needs only sevaral relatively simple rules:
+
+   * (dst, protocol) are always specified,
+     so that we are able to hash them.
+   * src may be exact, or may be wildcard, so that
+     we can keep a hash table plus one wildcard entry.
+   * source port (or flow label) is important only if src is given.
+
+   IMPLEMENTATION.
+
+   We use a two level hash table: The top level is keyed by
+   destination address and protocol ID, every bucket contains a list
+   of "rsvp sessions", identified by destination address, protocol and
+   DPI(="Destination Port ID"): triple (key, mask, offset).
+
+   Every bucket has a smaller hash table keyed by source address
+   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
+   Every bucket is again a list of "RSVP flows", selected by
+   source address and SPI(="Source Port ID" here rather than
+   "security parameter index"): triple (key, mask, offset).
+
+
+   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
+   and all fragmented packets go to the best-effort traffic class.
+
+
+   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
+   only one "Generalized Port Identifier". So that for classic
+   ah, esp (and udp,tcp) both *pi should coincide or one of them
+   should be wildcard.
+
+   At first sight, this redundancy is just a waste of CPU
+   resources. But DPI and SPI add the possibility to assign different
+   priorities to GPIs. Look also at note 4 about tunnels below.
+
+
+   NOTE 3. One complication is the case of tunneled packets.
+   We implement it as following: if the first lookup
+   matches a special session with "tunnelhdr" value not zero,
+   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
+   In this case, we pull tunnelhdr bytes and restart lookup
+   with tunnel ID added to the list of keys. Simple and stupid 8)8)
+   It's enough for PIMREG and IPIP.
+
+
+   NOTE 4. Two GPIs make it possible to parse even GRE packets.
+   F.e. DPI can select ETH_P_IP (and necessary flags to make
+   tunnelhdr correct) in GRE protocol field and SPI matches
+   GRE key. Is it not nice? 8)8)
+
+
+   Well, as result, despite its simplicity, we get a pretty
+   powerful classification engine.  */
+
+
+struct rsvp_head
+{
+	u32			tmap[256/32];
+	u32			hgenerator;
+	u8			tgenerator;
+	struct rsvp_session	*ht[256];
+};
+
+struct rsvp_session
+{
+	struct rsvp_session	*next;
+	__be32			dst[RSVP_DST_LEN];
+	struct tc_rsvp_gpi 	dpi;
+	u8			protocol;
+	u8			tunnelid;
+	/* 16 (src,sport) hash slots, and one wildcard source slot */
+	struct rsvp_filter	*ht[16+1];
+};
+
+
+struct rsvp_filter
+{
+	struct rsvp_filter	*next;
+	__be32			src[RSVP_DST_LEN];
+	struct tc_rsvp_gpi	spi;
+	u8			tunnelhdr;
+
+	struct tcf_result	res;
+	struct tcf_exts		exts;
+
+	u32			handle;
+	struct rsvp_session	*sess;
+};
+
+static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
+{
+	unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
+	h ^= h>>16;
+	h ^= h>>8;
+	return (h ^ protocol ^ tunnelid) & 0xFF;
+}
+
+static __inline__ unsigned hash_src(__be32 *src)
+{
+	unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
+	h ^= h>>16;
+	h ^= h>>8;
+	h ^= h>>4;
+	return h & 0xF;
+}
+
+static struct tcf_ext_map rsvp_ext_map = {
+	.police = TCA_RSVP_POLICE,
+	.action = TCA_RSVP_ACT
+};
+
+#define RSVP_APPLY_RESULT()				\
+{							\
+	int r = tcf_exts_exec(skb, &f->exts, res);	\
+	if (r < 0)					\
+		continue;				\
+	else if (r > 0)					\
+		return r;				\
+}
+
+static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			 struct tcf_result *res)
+{
+	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+	struct rsvp_session *s;
+	struct rsvp_filter *f;
+	unsigned h1, h2;
+	__be32 *dst, *src;
+	u8 protocol;
+	u8 tunnelid = 0;
+	u8 *xprt;
+#if RSVP_DST_LEN == 4
+	struct ipv6hdr *nhptr = ipv6_hdr(skb);
+#else
+	struct iphdr *nhptr = ip_hdr(skb);
+#endif
+
+restart:
+
+#if RSVP_DST_LEN == 4
+	src = &nhptr->saddr.s6_addr32[0];
+	dst = &nhptr->daddr.s6_addr32[0];
+	protocol = nhptr->nexthdr;
+	xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
+#else
+	src = &nhptr->saddr;
+	dst = &nhptr->daddr;
+	protocol = nhptr->protocol;
+	xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
+	if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
+		return -1;
+#endif
+
+	h1 = hash_dst(dst, protocol, tunnelid);
+	h2 = hash_src(src);
+
+	for (s = sht[h1]; s; s = s->next) {
+		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
+		    protocol == s->protocol &&
+		    !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
+#if RSVP_DST_LEN == 4
+		    && dst[0] == s->dst[0]
+		    && dst[1] == s->dst[1]
+		    && dst[2] == s->dst[2]
+#endif
+		    && tunnelid == s->tunnelid) {
+
+			for (f = s->ht[h2]; f; f = f->next) {
+				if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
+				    !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
+#if RSVP_DST_LEN == 4
+				    && src[0] == f->src[0]
+				    && src[1] == f->src[1]
+				    && src[2] == f->src[2]
+#endif
+				    ) {
+					*res = f->res;
+					RSVP_APPLY_RESULT();
+
+matched:
+					if (f->tunnelhdr == 0)
+						return 0;
+
+					tunnelid = f->res.classid;
+					nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
+					goto restart;
+				}
+			}
+
+			/* And wildcard bucket... */
+			for (f = s->ht[16]; f; f = f->next) {
+				*res = f->res;
+				RSVP_APPLY_RESULT();
+				goto matched;
+			}
+			return -1;
+		}
+	}
+	return -1;
+}
+
+static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
+{
+	struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+	struct rsvp_session *s;
+	struct rsvp_filter *f;
+	unsigned h1 = handle&0xFF;
+	unsigned h2 = (handle>>8)&0xFF;
+
+	if (h2 > 16)
+		return 0;
+
+	for (s = sht[h1]; s; s = s->next) {
+		for (f = s->ht[h2]; f; f = f->next) {
+			if (f->handle == handle)
+				return (unsigned long)f;
+		}
+	}
+	return 0;
+}
+
+static void rsvp_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static int rsvp_init(struct tcf_proto *tp)
+{
+	struct rsvp_head *data;
+
+	data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
+	if (data) {
+		tp->root = data;
+		return 0;
+	}
+	return -ENOBUFS;
+}
+
+static inline void
+rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
+{
+	tcf_unbind_filter(tp, &f->res);
+	tcf_exts_destroy(tp, &f->exts);
+	kfree(f);
+}
+
+static void rsvp_destroy(struct tcf_proto *tp)
+{
+	struct rsvp_head *data = xchg(&tp->root, NULL);
+	struct rsvp_session **sht;
+	int h1, h2;
+
+	if (data == NULL)
+		return;
+
+	sht = data->ht;
+
+	for (h1=0; h1<256; h1++) {
+		struct rsvp_session *s;
+
+		while ((s = sht[h1]) != NULL) {
+			sht[h1] = s->next;
+
+			for (h2=0; h2<=16; h2++) {
+				struct rsvp_filter *f;
+
+				while ((f = s->ht[h2]) != NULL) {
+					s->ht[h2] = f->next;
+					rsvp_delete_filter(tp, f);
+				}
+			}
+			kfree(s);
+		}
+	}
+	kfree(data);
+}
+
+static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
+	unsigned h = f->handle;
+	struct rsvp_session **sp;
+	struct rsvp_session *s = f->sess;
+	int i;
+
+	for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
+		if (*fp == f) {
+			tcf_tree_lock(tp);
+			*fp = f->next;
+			tcf_tree_unlock(tp);
+			rsvp_delete_filter(tp, f);
+
+			/* Strip tree */
+
+			for (i=0; i<=16; i++)
+				if (s->ht[i])
+					return 0;
+
+			/* OK, session has no flows */
+			for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
+			     *sp; sp = &(*sp)->next) {
+				if (*sp == s) {
+					tcf_tree_lock(tp);
+					*sp = s->next;
+					tcf_tree_unlock(tp);
+
+					kfree(s);
+					return 0;
+				}
+			}
+
+			return 0;
+		}
+	}
+	return 0;
+}
+
+static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
+{
+	struct rsvp_head *data = tp->root;
+	int i = 0xFFFF;
+
+	while (i-- > 0) {
+		u32 h;
+		if ((data->hgenerator += 0x10000) == 0)
+			data->hgenerator = 0x10000;
+		h = data->hgenerator|salt;
+		if (rsvp_get(tp, h) == 0)
+			return h;
+	}
+	return 0;
+}
+
+static int tunnel_bts(struct rsvp_head *data)
+{
+	int n = data->tgenerator>>5;
+	u32 b = 1<<(data->tgenerator&0x1F);
+
+	if (data->tmap[n]&b)
+		return 0;
+	data->tmap[n] |= b;
+	return 1;
+}
+
+static void tunnel_recycle(struct rsvp_head *data)
+{
+	struct rsvp_session **sht = data->ht;
+	u32 tmap[256/32];
+	int h1, h2;
+
+	memset(tmap, 0, sizeof(tmap));
+
+	for (h1=0; h1<256; h1++) {
+		struct rsvp_session *s;
+		for (s = sht[h1]; s; s = s->next) {
+			for (h2=0; h2<=16; h2++) {
+				struct rsvp_filter *f;
+
+				for (f = s->ht[h2]; f; f = f->next) {
+					if (f->tunnelhdr == 0)
+						continue;
+					data->tgenerator = f->res.classid;
+					tunnel_bts(data);
+				}
+			}
+		}
+	}
+
+	memcpy(data->tmap, tmap, sizeof(tmap));
+}
+
+static u32 gen_tunnel(struct rsvp_head *data)
+{
+	int i, k;
+
+	for (k=0; k<2; k++) {
+		for (i=255; i>0; i--) {
+			if (++data->tgenerator == 0)
+				data->tgenerator = 1;
+			if (tunnel_bts(data))
+				return data->tgenerator;
+		}
+		tunnel_recycle(data);
+	}
+	return 0;
+}
+
+static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
+	[TCA_RSVP_CLASSID]	= { .type = NLA_U32 },
+	[TCA_RSVP_DST]		= { .type = NLA_BINARY,
+				    .len = RSVP_DST_LEN * sizeof(u32) },
+	[TCA_RSVP_SRC]		= { .type = NLA_BINARY,
+				    .len = RSVP_DST_LEN * sizeof(u32) },
+	[TCA_RSVP_PINFO]	= { .len = sizeof(struct tc_rsvp_pinfo) },
+};
+
+static int rsvp_change(struct tcf_proto *tp, unsigned long base,
+		       u32 handle,
+		       struct nlattr **tca,
+		       unsigned long *arg)
+{
+	struct rsvp_head *data = tp->root;
+	struct rsvp_filter *f, **fp;
+	struct rsvp_session *s, **sp;
+	struct tc_rsvp_pinfo *pinfo = NULL;
+	struct nlattr *opt = tca[TCA_OPTIONS-1];
+	struct nlattr *tb[TCA_RSVP_MAX + 1];
+	struct tcf_exts e;
+	unsigned h1, h2;
+	__be32 *dst;
+	int err;
+
+	if (opt == NULL)
+		return handle ? -EINVAL : 0;
+
+	err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
+	if (err < 0)
+		return err;
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
+	if (err < 0)
+		return err;
+
+	if ((f = (struct rsvp_filter*)*arg) != NULL) {
+		/* Node exists: adjust only classid */
+
+		if (f->handle != handle && handle)
+			goto errout2;
+		if (tb[TCA_RSVP_CLASSID-1]) {
+			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+			tcf_bind_filter(tp, &f->res, base);
+		}
+
+		tcf_exts_change(tp, &f->exts, &e);
+		return 0;
+	}
+
+	/* Now more serious part... */
+	err = -EINVAL;
+	if (handle)
+		goto errout2;
+	if (tb[TCA_RSVP_DST-1] == NULL)
+		goto errout2;
+
+	err = -ENOBUFS;
+	f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
+	if (f == NULL)
+		goto errout2;
+
+	h2 = 16;
+	if (tb[TCA_RSVP_SRC-1]) {
+		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
+		h2 = hash_src(f->src);
+	}
+	if (tb[TCA_RSVP_PINFO-1]) {
+		pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
+		f->spi = pinfo->spi;
+		f->tunnelhdr = pinfo->tunnelhdr;
+	}
+	if (tb[TCA_RSVP_CLASSID-1])
+		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
+
+	dst = nla_data(tb[TCA_RSVP_DST-1]);
+	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
+
+	err = -ENOMEM;
+	if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
+		goto errout;
+
+	if (f->tunnelhdr) {
+		err = -EINVAL;
+		if (f->res.classid > 255)
+			goto errout;
+
+		err = -ENOMEM;
+		if (f->res.classid == 0 &&
+		    (f->res.classid = gen_tunnel(data)) == 0)
+			goto errout;
+	}
+
+	for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
+		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
+		    pinfo && pinfo->protocol == s->protocol &&
+		    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
+#if RSVP_DST_LEN == 4
+		    && dst[0] == s->dst[0]
+		    && dst[1] == s->dst[1]
+		    && dst[2] == s->dst[2]
+#endif
+		    && pinfo->tunnelid == s->tunnelid) {
+
+insert:
+			/* OK, we found appropriate session */
+
+			fp = &s->ht[h2];
+
+			f->sess = s;
+			if (f->tunnelhdr == 0)
+				tcf_bind_filter(tp, &f->res, base);
+
+			tcf_exts_change(tp, &f->exts, &e);
+
+			for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
+				if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
+					break;
+			f->next = *fp;
+			wmb();
+			*fp = f;
+
+			*arg = (unsigned long)f;
+			return 0;
+		}
+	}
+
+	/* No session found. Create new one. */
+
+	err = -ENOBUFS;
+	s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
+	if (s == NULL)
+		goto errout;
+	memcpy(s->dst, dst, sizeof(s->dst));
+
+	if (pinfo) {
+		s->dpi = pinfo->dpi;
+		s->protocol = pinfo->protocol;
+		s->tunnelid = pinfo->tunnelid;
+	}
+	for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
+		if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
+			break;
+	}
+	s->next = *sp;
+	wmb();
+	*sp = s;
+
+	goto insert;
+
+errout:
+	kfree(f);
+errout2:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct rsvp_head *head = tp->root;
+	unsigned h, h1;
+
+	if (arg->stop)
+		return;
+
+	for (h = 0; h < 256; h++) {
+		struct rsvp_session *s;
+
+		for (s = head->ht[h]; s; s = s->next) {
+			for (h1 = 0; h1 <= 16; h1++) {
+				struct rsvp_filter *f;
+
+				for (f = s->ht[h1]; f; f = f->next) {
+					if (arg->count < arg->skip) {
+						arg->count++;
+						continue;
+					}
+					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+						arg->stop = 1;
+						return;
+					}
+					arg->count++;
+				}
+			}
+		}
+	}
+}
+
+static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
+		     struct sk_buff *skb, struct tcmsg *t)
+{
+	struct rsvp_filter *f = (struct rsvp_filter*)fh;
+	struct rsvp_session *s;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+	struct tc_rsvp_pinfo pinfo;
+
+	if (f == NULL)
+		return skb->len;
+	s = f->sess;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
+	pinfo.dpi = s->dpi;
+	pinfo.spi = f->spi;
+	pinfo.protocol = s->protocol;
+	pinfo.tunnelid = s->tunnelid;
+	pinfo.tunnelhdr = f->tunnelhdr;
+	pinfo.pad = 0;
+	NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
+	if (f->res.classid)
+		NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
+	if (((f->handle>>8)&0xFF) != 16)
+		NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
+
+	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
+		goto nla_put_failure;
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tcf_proto_ops RSVP_OPS = {
+	.next		=	NULL,
+	.kind		=	RSVP_ID,
+	.classify	=	rsvp_classify,
+	.init		=	rsvp_init,
+	.destroy	=	rsvp_destroy,
+	.get		=	rsvp_get,
+	.put		=	rsvp_put,
+	.change		=	rsvp_change,
+	.delete		=	rsvp_delete,
+	.walk		=	rsvp_walk,
+	.dump		=	rsvp_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_rsvp(void)
+{
+	return register_tcf_proto_ops(&RSVP_OPS);
+}
+
+static void __exit exit_rsvp(void)
+{
+	unregister_tcf_proto_ops(&RSVP_OPS);
+}
+
+module_init(init_rsvp)
+module_exit(exit_rsvp)
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
new file mode 100644
index 0000000..dd08aea
--- /dev/null
+++ b/net/sched/cls_rsvp6.c
@@ -0,0 +1,28 @@
+/*
+ * net/sched/cls_rsvp6.c	Special RSVP packet classifier for IPv6.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ipv6.h>
+#include <linux/skbuff.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+#include <net/netlink.h>
+
+#define RSVP_DST_LEN	4
+#define RSVP_ID		"rsvp6"
+#define RSVP_OPS	cls_rsvp6_ops
+
+#include "cls_rsvp.h"
+MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
new file mode 100644
index 0000000..7a7bff5
--- /dev/null
+++ b/net/sched/cls_tcindex.c
@@ -0,0 +1,512 @@
+/*
+ * net/sched/cls_tcindex.c	Packet classifier for skb->tc_index
+ *
+ * Written 1998,1999 by Werner Almesberger, EPFL ICA
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <net/act_api.h>
+#include <net/netlink.h>
+#include <net/pkt_cls.h>
+
+
+/*
+ * Not quite sure if we need all the xchgs Alexey uses when accessing things.
+ * Can always add them later ... :)
+ */
+
+/*
+ * Passing parameters to the root seems to be done more awkwardly than really
+ * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
+ * verified. FIXME.
+ */
+
+#define PERFECT_HASH_THRESHOLD	64	/* use perfect hash if not bigger */
+#define DEFAULT_HASH_SIZE	64	/* optimized for diffserv */
+
+
+#define	PRIV(tp)	((struct tcindex_data *) (tp)->root)
+
+
+struct tcindex_filter_result {
+	struct tcf_exts		exts;
+	struct tcf_result	res;
+};
+
+struct tcindex_filter {
+	u16 key;
+	struct tcindex_filter_result result;
+	struct tcindex_filter *next;
+};
+
+
+struct tcindex_data {
+	struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
+	struct tcindex_filter **h; /* imperfect hash; only used if !perfect;
+				      NULL if unused */
+	u16 mask;		/* AND key with mask */
+	int shift;		/* shift ANDed key to the right */
+	int hash;		/* hash table size; 0 if undefined */
+	int alloc_hash;		/* allocated size */
+	int fall_through;	/* 0: only classify if explicit match */
+};
+
+static const struct tcf_ext_map tcindex_ext_map = {
+	.police = TCA_TCINDEX_POLICE,
+	.action = TCA_TCINDEX_ACT
+};
+
+static inline int
+tcindex_filter_is_set(struct tcindex_filter_result *r)
+{
+	return tcf_exts_is_predicative(&r->exts) || r->res.classid;
+}
+
+static struct tcindex_filter_result *
+tcindex_lookup(struct tcindex_data *p, u16 key)
+{
+	struct tcindex_filter *f;
+
+	if (p->perfect)
+		return tcindex_filter_is_set(p->perfect + key) ?
+			p->perfect + key : NULL;
+	else if (p->h) {
+		for (f = p->h[key % p->hash]; f; f = f->next)
+			if (f->key == key)
+				return &f->result;
+	}
+
+	return NULL;
+}
+
+
+static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			    struct tcf_result *res)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *f;
+	int key = (skb->tc_index & p->mask) >> p->shift;
+
+	pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
+		 skb, tp, res, p);
+
+	f = tcindex_lookup(p, key);
+	if (!f) {
+		if (!p->fall_through)
+			return -1;
+		res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
+		res->class = 0;
+		pr_debug("alg 0x%x\n", res->classid);
+		return 0;
+	}
+	*res = f->res;
+	pr_debug("map 0x%x\n", res->classid);
+
+	return tcf_exts_exec(skb, &f->exts, res);
+}
+
+
+static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r;
+
+	pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
+	if (p->perfect && handle >= p->alloc_hash)
+		return 0;
+	r = tcindex_lookup(p, handle);
+	return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL;
+}
+
+
+static void tcindex_put(struct tcf_proto *tp, unsigned long f)
+{
+	pr_debug("tcindex_put(tp %p,f 0x%lx)\n", tp, f);
+}
+
+
+static int tcindex_init(struct tcf_proto *tp)
+{
+	struct tcindex_data *p;
+
+	pr_debug("tcindex_init(tp %p)\n", tp);
+	p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	p->mask = 0xffff;
+	p->hash = DEFAULT_HASH_SIZE;
+	p->fall_through = 1;
+
+	tp->root = p;
+	return 0;
+}
+
+
+static int
+__tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
+	struct tcindex_filter *f = NULL;
+
+	pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n", tp, arg, p, f);
+	if (p->perfect) {
+		if (!r->res.class)
+			return -ENOENT;
+	} else {
+		int i;
+		struct tcindex_filter **walk = NULL;
+
+		for (i = 0; i < p->hash; i++)
+			for (walk = p->h+i; *walk; walk = &(*walk)->next)
+				if (&(*walk)->result == r)
+					goto found;
+		return -ENOENT;
+
+found:
+		f = *walk;
+		if (lock)
+			tcf_tree_lock(tp);
+		*walk = f->next;
+		if (lock)
+			tcf_tree_unlock(tp);
+	}
+	tcf_unbind_filter(tp, &r->res);
+	tcf_exts_destroy(tp, &r->exts);
+	kfree(f);
+	return 0;
+}
+
+static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	return __tcindex_delete(tp, arg, 1);
+}
+
+static inline int
+valid_perfect_hash(struct tcindex_data *p)
+{
+	return  p->hash > (p->mask >> p->shift);
+}
+
+static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
+	[TCA_TCINDEX_HASH]		= { .type = NLA_U32 },
+	[TCA_TCINDEX_MASK]		= { .type = NLA_U16 },
+	[TCA_TCINDEX_SHIFT]		= { .type = NLA_U32 },
+	[TCA_TCINDEX_FALL_THROUGH]	= { .type = NLA_U32 },
+	[TCA_TCINDEX_CLASSID]		= { .type = NLA_U32 },
+};
+
+static int
+tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
+		  struct tcindex_data *p, struct tcindex_filter_result *r,
+		  struct nlattr **tb, struct nlattr *est)
+{
+	int err, balloc = 0;
+	struct tcindex_filter_result new_filter_result, *old_r = r;
+	struct tcindex_filter_result cr;
+	struct tcindex_data cp;
+	struct tcindex_filter *f = NULL; /* make gcc behave */
+	struct tcf_exts e;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &tcindex_ext_map);
+	if (err < 0)
+		return err;
+
+	memcpy(&cp, p, sizeof(cp));
+	memset(&new_filter_result, 0, sizeof(new_filter_result));
+
+	if (old_r)
+		memcpy(&cr, r, sizeof(cr));
+	else
+		memset(&cr, 0, sizeof(cr));
+
+	if (tb[TCA_TCINDEX_HASH])
+		cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
+
+	if (tb[TCA_TCINDEX_MASK])
+		cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+
+	if (tb[TCA_TCINDEX_SHIFT])
+		cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+
+	err = -EBUSY;
+	/* Hash already allocated, make sure that we still meet the
+	 * requirements for the allocated hash.
+	 */
+	if (cp.perfect) {
+		if (!valid_perfect_hash(&cp) ||
+		    cp.hash > cp.alloc_hash)
+			goto errout;
+	} else if (cp.h && cp.hash != cp.alloc_hash)
+		goto errout;
+
+	err = -EINVAL;
+	if (tb[TCA_TCINDEX_FALL_THROUGH])
+		cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
+
+	if (!cp.hash) {
+		/* Hash not specified, use perfect hash if the upper limit
+		 * of the hashing index is below the threshold.
+		 */
+		if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
+			cp.hash = (cp.mask >> cp.shift)+1;
+		else
+			cp.hash = DEFAULT_HASH_SIZE;
+	}
+
+	if (!cp.perfect && !cp.h)
+		cp.alloc_hash = cp.hash;
+
+	/* Note: this could be as restrictive as if (handle & ~(mask >> shift))
+	 * but then, we'd fail handles that may become valid after some future
+	 * mask change. While this is extremely unlikely to ever matter,
+	 * the check below is safer (and also more backwards-compatible).
+	 */
+	if (cp.perfect || valid_perfect_hash(&cp))
+		if (handle >= cp.alloc_hash)
+			goto errout;
+
+
+	err = -ENOMEM;
+	if (!cp.perfect && !cp.h) {
+		if (valid_perfect_hash(&cp)) {
+			cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL);
+			if (!cp.perfect)
+				goto errout;
+			balloc = 1;
+		} else {
+			cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL);
+			if (!cp.h)
+				goto errout;
+			balloc = 2;
+		}
+	}
+
+	if (cp.perfect)
+		r = cp.perfect + handle;
+	else
+		r = tcindex_lookup(&cp, handle) ? : &new_filter_result;
+
+	if (r == &new_filter_result) {
+		f = kzalloc(sizeof(*f), GFP_KERNEL);
+		if (!f)
+			goto errout_alloc;
+	}
+
+	if (tb[TCA_TCINDEX_CLASSID]) {
+		cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
+		tcf_bind_filter(tp, &cr.res, base);
+	}
+
+	tcf_exts_change(tp, &cr.exts, &e);
+
+	tcf_tree_lock(tp);
+	if (old_r && old_r != r)
+		memset(old_r, 0, sizeof(*old_r));
+
+	memcpy(p, &cp, sizeof(cp));
+	memcpy(r, &cr, sizeof(cr));
+
+	if (r == &new_filter_result) {
+		struct tcindex_filter **fp;
+
+		f->key = handle;
+		f->result = new_filter_result;
+		f->next = NULL;
+		for (fp = p->h+(handle % p->hash); *fp; fp = &(*fp)->next)
+			/* nothing */;
+		*fp = f;
+	}
+	tcf_tree_unlock(tp);
+
+	return 0;
+
+errout_alloc:
+	if (balloc == 1)
+		kfree(cp.perfect);
+	else if (balloc == 2)
+		kfree(cp.h);
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int
+tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+	       struct nlattr **tca, unsigned long *arg)
+{
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
+	int err;
+
+	pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
+	    "p %p,r %p,*arg 0x%lx\n",
+	    tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L);
+
+	if (!opt)
+		return 0;
+
+	err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy);
+	if (err < 0)
+		return err;
+
+	return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE]);
+}
+
+
+static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter *f, *next;
+	int i;
+
+	pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
+	if (p->perfect) {
+		for (i = 0; i < p->hash; i++) {
+			if (!p->perfect[i].res.class)
+				continue;
+			if (walker->count >= walker->skip) {
+				if (walker->fn(tp,
+				    (unsigned long) (p->perfect+i), walker)
+				     < 0) {
+					walker->stop = 1;
+					return;
+				}
+			}
+			walker->count++;
+		}
+	}
+	if (!p->h)
+		return;
+	for (i = 0; i < p->hash; i++) {
+		for (f = p->h[i]; f; f = next) {
+			next = f->next;
+			if (walker->count >= walker->skip) {
+				if (walker->fn(tp, (unsigned long) &f->result,
+				    walker) < 0) {
+					walker->stop = 1;
+					return;
+				}
+			}
+			walker->count++;
+		}
+	}
+}
+
+
+static int tcindex_destroy_element(struct tcf_proto *tp,
+    unsigned long arg, struct tcf_walker *walker)
+{
+	return __tcindex_delete(tp, arg, 0);
+}
+
+
+static void tcindex_destroy(struct tcf_proto *tp)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcf_walker walker;
+
+	pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
+	walker.count = 0;
+	walker.skip = 0;
+	walker.fn = &tcindex_destroy_element;
+	tcindex_walk(tp, &walker);
+	kfree(p->perfect);
+	kfree(p->h);
+	kfree(p);
+	tp->root = NULL;
+}
+
+
+static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
+    struct sk_buff *skb, struct tcmsg *t)
+{
+	struct tcindex_data *p = PRIV(tp);
+	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+
+	pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
+		 tp, fh, skb, t, p, r, b);
+	pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (!fh) {
+		t->tcm_handle = ~0; /* whatever ... */
+		NLA_PUT_U32(skb, TCA_TCINDEX_HASH, p->hash);
+		NLA_PUT_U16(skb, TCA_TCINDEX_MASK, p->mask);
+		NLA_PUT_U32(skb, TCA_TCINDEX_SHIFT, p->shift);
+		NLA_PUT_U32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through);
+		nla_nest_end(skb, nest);
+	} else {
+		if (p->perfect) {
+			t->tcm_handle = r-p->perfect;
+		} else {
+			struct tcindex_filter *f;
+			int i;
+
+			t->tcm_handle = 0;
+			for (i = 0; !t->tcm_handle && i < p->hash; i++) {
+				for (f = p->h[i]; !t->tcm_handle && f;
+				     f = f->next) {
+					if (&f->result == r)
+						t->tcm_handle = f->key;
+				}
+			}
+		}
+		pr_debug("handle = %d\n", t->tcm_handle);
+		if (r->res.class)
+			NLA_PUT_U32(skb, TCA_TCINDEX_CLASSID, r->res.classid);
+
+		if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
+			goto nla_put_failure;
+		nla_nest_end(skb, nest);
+
+		if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0)
+			goto nla_put_failure;
+	}
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
+	.kind		=	"tcindex",
+	.classify	=	tcindex_classify,
+	.init		=	tcindex_init,
+	.destroy	=	tcindex_destroy,
+	.get		=	tcindex_get,
+	.put		=	tcindex_put,
+	.change		=	tcindex_change,
+	.delete		=	tcindex_delete,
+	.walk		=	tcindex_walk,
+	.dump		=	tcindex_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_tcindex(void)
+{
+	return register_tcf_proto_ops(&cls_tcindex_ops);
+}
+
+static void __exit exit_tcindex(void)
+{
+	unregister_tcf_proto_ops(&cls_tcindex_ops);
+}
+
+module_init(init_tcindex)
+module_exit(exit_tcindex)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
new file mode 100644
index 0000000..ea51fcd
--- /dev/null
+++ b/net/sched/cls_u32.c
@@ -0,0 +1,789 @@
+/*
+ * net/sched/cls_u32.c	Ugly (or Universal) 32bit key Packet Classifier.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ *	The filters are packed to hash tables of key nodes
+ *	with a set of 32bit key/mask pairs at every node.
+ *	Nodes reference next level hash tables etc.
+ *
+ *	This scheme is the best universal classifier I managed to
+ *	invent; it is not super-fast, but it is not slow (provided you
+ *	program it correctly), and general enough.  And its relative
+ *	speed grows as the number of rules becomes larger.
+ *
+ *	It seems that it represents the best middle point between
+ *	speed and manageability both by human and by machine.
+ *
+ *	It is especially useful for link sharing combined with QoS;
+ *	pure RSVP doesn't need such a general approach and can use
+ *	much simpler (and faster) schemes, sort of cls_rsvp.c.
+ *
+ *	JHS: We should remove the CONFIG_NET_CLS_IND from here
+ *	eventually when the meta match extension is made available
+ *
+ *	nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+
+struct tc_u_knode
+{
+	struct tc_u_knode	*next;
+	u32			handle;
+	struct tc_u_hnode	*ht_up;
+	struct tcf_exts		exts;
+#ifdef CONFIG_NET_CLS_IND
+	char                     indev[IFNAMSIZ];
+#endif
+	u8			fshift;
+	struct tcf_result	res;
+	struct tc_u_hnode	*ht_down;
+#ifdef CONFIG_CLS_U32_PERF
+	struct tc_u32_pcnt	*pf;
+#endif
+#ifdef CONFIG_CLS_U32_MARK
+	struct tc_u32_mark	mark;
+#endif
+	struct tc_u32_sel	sel;
+};
+
+struct tc_u_hnode
+{
+	struct tc_u_hnode	*next;
+	u32			handle;
+	u32			prio;
+	struct tc_u_common	*tp_c;
+	int			refcnt;
+	unsigned		divisor;
+	struct tc_u_knode	*ht[1];
+};
+
+struct tc_u_common
+{
+	struct tc_u_hnode	*hlist;
+	struct Qdisc		*q;
+	int			refcnt;
+	u32			hgenerator;
+};
+
+static const struct tcf_ext_map u32_ext_map = {
+	.action = TCA_U32_ACT,
+	.police = TCA_U32_POLICE
+};
+
+static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift)
+{
+	unsigned h = ntohl(key & sel->hmask)>>fshift;
+
+	return h;
+}
+
+static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res)
+{
+	struct {
+		struct tc_u_knode *knode;
+		u8		  *ptr;
+	} stack[TC_U32_MAXDEPTH];
+
+	struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
+	u8 *ptr = skb_network_header(skb);
+	struct tc_u_knode *n;
+	int sdepth = 0;
+	int off2 = 0;
+	int sel = 0;
+#ifdef CONFIG_CLS_U32_PERF
+	int j;
+#endif
+	int i, r;
+
+next_ht:
+	n = ht->ht[sel];
+
+next_knode:
+	if (n) {
+		struct tc_u32_key *key = n->sel.keys;
+
+#ifdef CONFIG_CLS_U32_PERF
+		n->pf->rcnt +=1;
+		j = 0;
+#endif
+
+#ifdef CONFIG_CLS_U32_MARK
+		if ((skb->mark & n->mark.mask) != n->mark.val) {
+			n = n->next;
+			goto next_knode;
+		} else {
+			n->mark.success++;
+		}
+#endif
+
+		for (i = n->sel.nkeys; i>0; i--, key++) {
+
+			if ((*(__be32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) {
+				n = n->next;
+				goto next_knode;
+			}
+#ifdef CONFIG_CLS_U32_PERF
+			n->pf->kcnts[j] +=1;
+			j++;
+#endif
+		}
+		if (n->ht_down == NULL) {
+check_terminal:
+			if (n->sel.flags&TC_U32_TERMINAL) {
+
+				*res = n->res;
+#ifdef CONFIG_NET_CLS_IND
+				if (!tcf_match_indev(skb, n->indev)) {
+					n = n->next;
+					goto next_knode;
+				}
+#endif
+#ifdef CONFIG_CLS_U32_PERF
+				n->pf->rhit +=1;
+#endif
+				r = tcf_exts_exec(skb, &n->exts, res);
+				if (r < 0) {
+					n = n->next;
+					goto next_knode;
+				}
+
+				return r;
+			}
+			n = n->next;
+			goto next_knode;
+		}
+
+		/* PUSH */
+		if (sdepth >= TC_U32_MAXDEPTH)
+			goto deadloop;
+		stack[sdepth].knode = n;
+		stack[sdepth].ptr = ptr;
+		sdepth++;
+
+		ht = n->ht_down;
+		sel = 0;
+		if (ht->divisor)
+			sel = ht->divisor&u32_hash_fold(*(__be32*)(ptr+n->sel.hoff), &n->sel,n->fshift);
+
+		if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
+			goto next_ht;
+
+		if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
+			off2 = n->sel.off + 3;
+			if (n->sel.flags&TC_U32_VAROFFSET)
+				off2 += ntohs(n->sel.offmask & *(__be16*)(ptr+n->sel.offoff)) >>n->sel.offshift;
+			off2 &= ~3;
+		}
+		if (n->sel.flags&TC_U32_EAT) {
+			ptr += off2;
+			off2 = 0;
+		}
+
+		if (ptr < skb_tail_pointer(skb))
+			goto next_ht;
+	}
+
+	/* POP */
+	if (sdepth--) {
+		n = stack[sdepth].knode;
+		ht = n->ht_up;
+		ptr = stack[sdepth].ptr;
+		goto check_terminal;
+	}
+	return -1;
+
+deadloop:
+	if (net_ratelimit())
+		printk("cls_u32: dead loop\n");
+	return -1;
+}
+
+static __inline__ struct tc_u_hnode *
+u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
+{
+	struct tc_u_hnode *ht;
+
+	for (ht = tp_c->hlist; ht; ht = ht->next)
+		if (ht->handle == handle)
+			break;
+
+	return ht;
+}
+
+static __inline__ struct tc_u_knode *
+u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
+{
+	unsigned sel;
+	struct tc_u_knode *n = NULL;
+
+	sel = TC_U32_HASH(handle);
+	if (sel > ht->divisor)
+		goto out;
+
+	for (n = ht->ht[sel]; n; n = n->next)
+		if (n->handle == handle)
+			break;
+out:
+	return n;
+}
+
+
+static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
+{
+	struct tc_u_hnode *ht;
+	struct tc_u_common *tp_c = tp->data;
+
+	if (TC_U32_HTID(handle) == TC_U32_ROOT)
+		ht = tp->root;
+	else
+		ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
+
+	if (!ht)
+		return 0;
+
+	if (TC_U32_KEY(handle) == 0)
+		return (unsigned long)ht;
+
+	return (unsigned long)u32_lookup_key(ht, handle);
+}
+
+static void u32_put(struct tcf_proto *tp, unsigned long f)
+{
+}
+
+static u32 gen_new_htid(struct tc_u_common *tp_c)
+{
+	int i = 0x800;
+
+	do {
+		if (++tp_c->hgenerator == 0x7FF)
+			tp_c->hgenerator = 1;
+	} while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
+
+	return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
+}
+
+static int u32_init(struct tcf_proto *tp)
+{
+	struct tc_u_hnode *root_ht;
+	struct tc_u_common *tp_c;
+
+	tp_c = tp->q->u32_node;
+
+	root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL);
+	if (root_ht == NULL)
+		return -ENOBUFS;
+
+	root_ht->divisor = 0;
+	root_ht->refcnt++;
+	root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
+	root_ht->prio = tp->prio;
+
+	if (tp_c == NULL) {
+		tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
+		if (tp_c == NULL) {
+			kfree(root_ht);
+			return -ENOBUFS;
+		}
+		tp_c->q = tp->q;
+		tp->q->u32_node = tp_c;
+	}
+
+	tp_c->refcnt++;
+	root_ht->next = tp_c->hlist;
+	tp_c->hlist = root_ht;
+	root_ht->tp_c = tp_c;
+
+	tp->root = root_ht;
+	tp->data = tp_c;
+	return 0;
+}
+
+static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
+{
+	tcf_unbind_filter(tp, &n->res);
+	tcf_exts_destroy(tp, &n->exts);
+	if (n->ht_down)
+		n->ht_down->refcnt--;
+#ifdef CONFIG_CLS_U32_PERF
+	kfree(n->pf);
+#endif
+	kfree(n);
+	return 0;
+}
+
+static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
+{
+	struct tc_u_knode **kp;
+	struct tc_u_hnode *ht = key->ht_up;
+
+	if (ht) {
+		for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) {
+			if (*kp == key) {
+				tcf_tree_lock(tp);
+				*kp = key->next;
+				tcf_tree_unlock(tp);
+
+				u32_destroy_key(tp, key);
+				return 0;
+			}
+		}
+	}
+	WARN_ON(1);
+	return 0;
+}
+
+static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+{
+	struct tc_u_knode *n;
+	unsigned h;
+
+	for (h=0; h<=ht->divisor; h++) {
+		while ((n = ht->ht[h]) != NULL) {
+			ht->ht[h] = n->next;
+
+			u32_destroy_key(tp, n);
+		}
+	}
+}
+
+static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode **hn;
+
+	WARN_ON(ht->refcnt);
+
+	u32_clear_hnode(tp, ht);
+
+	for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) {
+		if (*hn == ht) {
+			*hn = ht->next;
+			kfree(ht);
+			return 0;
+		}
+	}
+
+	WARN_ON(1);
+	return -ENOENT;
+}
+
+static void u32_destroy(struct tcf_proto *tp)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode *root_ht = xchg(&tp->root, NULL);
+
+	WARN_ON(root_ht == NULL);
+
+	if (root_ht && --root_ht->refcnt == 0)
+		u32_destroy_hnode(tp, root_ht);
+
+	if (--tp_c->refcnt == 0) {
+		struct tc_u_hnode *ht;
+
+		tp->q->u32_node = NULL;
+
+		for (ht = tp_c->hlist; ht; ht = ht->next) {
+			ht->refcnt--;
+			u32_clear_hnode(tp, ht);
+		}
+
+		while ((ht = tp_c->hlist) != NULL) {
+			tp_c->hlist = ht->next;
+
+			WARN_ON(ht->refcnt != 0);
+
+			kfree(ht);
+		}
+
+		kfree(tp_c);
+	}
+
+	tp->data = NULL;
+}
+
+static int u32_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct tc_u_hnode *ht = (struct tc_u_hnode*)arg;
+
+	if (ht == NULL)
+		return 0;
+
+	if (TC_U32_KEY(ht->handle))
+		return u32_delete_key(tp, (struct tc_u_knode*)ht);
+
+	if (tp->root == ht)
+		return -EINVAL;
+
+	if (ht->refcnt == 1) {
+		ht->refcnt--;
+		u32_destroy_hnode(tp, ht);
+	} else {
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
+{
+	struct tc_u_knode *n;
+	unsigned i = 0x7FF;
+
+	for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+		if (i < TC_U32_NODE(n->handle))
+			i = TC_U32_NODE(n->handle);
+	i++;
+
+	return handle|(i>0xFFF ? 0xFFF : i);
+}
+
+static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
+	[TCA_U32_CLASSID]	= { .type = NLA_U32 },
+	[TCA_U32_HASH]		= { .type = NLA_U32 },
+	[TCA_U32_LINK]		= { .type = NLA_U32 },
+	[TCA_U32_DIVISOR]	= { .type = NLA_U32 },
+	[TCA_U32_SEL]		= { .len = sizeof(struct tc_u32_sel) },
+	[TCA_U32_INDEV]		= { .type = NLA_STRING, .len = IFNAMSIZ },
+	[TCA_U32_MARK]		= { .len = sizeof(struct tc_u32_mark) },
+};
+
+static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
+			 struct tc_u_hnode *ht,
+			 struct tc_u_knode *n, struct nlattr **tb,
+			 struct nlattr *est)
+{
+	int err;
+	struct tcf_exts e;
+
+	err = tcf_exts_validate(tp, tb, est, &e, &u32_ext_map);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (tb[TCA_U32_LINK]) {
+		u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
+		struct tc_u_hnode *ht_down = NULL;
+
+		if (TC_U32_KEY(handle))
+			goto errout;
+
+		if (handle) {
+			ht_down = u32_lookup_ht(ht->tp_c, handle);
+
+			if (ht_down == NULL)
+				goto errout;
+			ht_down->refcnt++;
+		}
+
+		tcf_tree_lock(tp);
+		ht_down = xchg(&n->ht_down, ht_down);
+		tcf_tree_unlock(tp);
+
+		if (ht_down)
+			ht_down->refcnt--;
+	}
+	if (tb[TCA_U32_CLASSID]) {
+		n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
+		tcf_bind_filter(tp, &n->res, base);
+	}
+
+#ifdef CONFIG_NET_CLS_IND
+	if (tb[TCA_U32_INDEV]) {
+		err = tcf_change_indev(tp, n->indev, tb[TCA_U32_INDEV]);
+		if (err < 0)
+			goto errout;
+	}
+#endif
+	tcf_exts_change(tp, &n->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
+		      struct nlattr **tca,
+		      unsigned long *arg)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode *ht;
+	struct tc_u_knode *n;
+	struct tc_u32_sel *s;
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_U32_MAX + 1];
+	u32 htid;
+	int err;
+
+	if (opt == NULL)
+		return handle ? -EINVAL : 0;
+
+	err = nla_parse_nested(tb, TCA_U32_MAX, opt, u32_policy);
+	if (err < 0)
+		return err;
+
+	if ((n = (struct tc_u_knode*)*arg) != NULL) {
+		if (TC_U32_KEY(n->handle) == 0)
+			return -EINVAL;
+
+		return u32_set_parms(tp, base, n->ht_up, n, tb, tca[TCA_RATE]);
+	}
+
+	if (tb[TCA_U32_DIVISOR]) {
+		unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
+
+		if (--divisor > 0x100)
+			return -EINVAL;
+		if (TC_U32_KEY(handle))
+			return -EINVAL;
+		if (handle == 0) {
+			handle = gen_new_htid(tp->data);
+			if (handle == 0)
+				return -ENOMEM;
+		}
+		ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
+		if (ht == NULL)
+			return -ENOBUFS;
+		ht->tp_c = tp_c;
+		ht->refcnt = 1;
+		ht->divisor = divisor;
+		ht->handle = handle;
+		ht->prio = tp->prio;
+		ht->next = tp_c->hlist;
+		tp_c->hlist = ht;
+		*arg = (unsigned long)ht;
+		return 0;
+	}
+
+	if (tb[TCA_U32_HASH]) {
+		htid = nla_get_u32(tb[TCA_U32_HASH]);
+		if (TC_U32_HTID(htid) == TC_U32_ROOT) {
+			ht = tp->root;
+			htid = ht->handle;
+		} else {
+			ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
+			if (ht == NULL)
+				return -EINVAL;
+		}
+	} else {
+		ht = tp->root;
+		htid = ht->handle;
+	}
+
+	if (ht->divisor < TC_U32_HASH(htid))
+		return -EINVAL;
+
+	if (handle) {
+		if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
+			return -EINVAL;
+		handle = htid | TC_U32_NODE(handle);
+	} else
+		handle = gen_new_kid(ht, htid);
+
+	if (tb[TCA_U32_SEL] == NULL)
+		return -EINVAL;
+
+	s = nla_data(tb[TCA_U32_SEL]);
+
+	n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
+	if (n == NULL)
+		return -ENOBUFS;
+
+#ifdef CONFIG_CLS_U32_PERF
+	n->pf = kzalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL);
+	if (n->pf == NULL) {
+		kfree(n);
+		return -ENOBUFS;
+	}
+#endif
+
+	memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
+	n->ht_up = ht;
+	n->handle = handle;
+	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
+
+#ifdef CONFIG_CLS_U32_MARK
+	if (tb[TCA_U32_MARK]) {
+		struct tc_u32_mark *mark;
+
+		mark = nla_data(tb[TCA_U32_MARK]);
+		memcpy(&n->mark, mark, sizeof(struct tc_u32_mark));
+		n->mark.success = 0;
+	}
+#endif
+
+	err = u32_set_parms(tp, base, ht, n, tb, tca[TCA_RATE]);
+	if (err == 0) {
+		struct tc_u_knode **ins;
+		for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
+			if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle))
+				break;
+
+		n->next = *ins;
+		tcf_tree_lock(tp);
+		*ins = n;
+		tcf_tree_unlock(tp);
+
+		*arg = (unsigned long)n;
+		return 0;
+	}
+#ifdef CONFIG_CLS_U32_PERF
+	kfree(n->pf);
+#endif
+	kfree(n);
+	return err;
+}
+
+static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct tc_u_common *tp_c = tp->data;
+	struct tc_u_hnode *ht;
+	struct tc_u_knode *n;
+	unsigned h;
+
+	if (arg->stop)
+		return;
+
+	for (ht = tp_c->hlist; ht; ht = ht->next) {
+		if (ht->prio != tp->prio)
+			continue;
+		if (arg->count >= arg->skip) {
+			if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+		}
+		arg->count++;
+		for (h = 0; h <= ht->divisor; h++) {
+			for (n = ht->ht[h]; n; n = n->next) {
+				if (arg->count < arg->skip) {
+					arg->count++;
+					continue;
+				}
+				if (arg->fn(tp, (unsigned long)n, arg) < 0) {
+					arg->stop = 1;
+					return;
+				}
+				arg->count++;
+			}
+		}
+	}
+}
+
+static int u32_dump(struct tcf_proto *tp, unsigned long fh,
+		     struct sk_buff *skb, struct tcmsg *t)
+{
+	struct tc_u_knode *n = (struct tc_u_knode*)fh;
+	struct nlattr *nest;
+
+	if (n == NULL)
+		return skb->len;
+
+	t->tcm_handle = n->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	if (TC_U32_KEY(n->handle) == 0) {
+		struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
+		u32 divisor = ht->divisor+1;
+		NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
+	} else {
+		NLA_PUT(skb, TCA_U32_SEL,
+			sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
+			&n->sel);
+		if (n->ht_up) {
+			u32 htid = n->handle & 0xFFFFF000;
+			NLA_PUT_U32(skb, TCA_U32_HASH, htid);
+		}
+		if (n->res.classid)
+			NLA_PUT_U32(skb, TCA_U32_CLASSID, n->res.classid);
+		if (n->ht_down)
+			NLA_PUT_U32(skb, TCA_U32_LINK, n->ht_down->handle);
+
+#ifdef CONFIG_CLS_U32_MARK
+		if (n->mark.val || n->mark.mask)
+			NLA_PUT(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark);
+#endif
+
+		if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0)
+			goto nla_put_failure;
+
+#ifdef CONFIG_NET_CLS_IND
+		if(strlen(n->indev))
+			NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
+#endif
+#ifdef CONFIG_CLS_U32_PERF
+		NLA_PUT(skb, TCA_U32_PCNT,
+		sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
+			n->pf);
+#endif
+	}
+
+	nla_nest_end(skb, nest);
+
+	if (TC_U32_KEY(n->handle))
+		if (tcf_exts_dump_stats(skb, &n->exts, &u32_ext_map) < 0)
+			goto nla_put_failure;
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_u32_ops __read_mostly = {
+	.kind		=	"u32",
+	.classify	=	u32_classify,
+	.init		=	u32_init,
+	.destroy	=	u32_destroy,
+	.get		=	u32_get,
+	.put		=	u32_put,
+	.change		=	u32_change,
+	.delete		=	u32_delete,
+	.walk		=	u32_walk,
+	.dump		=	u32_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init init_u32(void)
+{
+	printk("u32 classifier\n");
+#ifdef CONFIG_CLS_U32_PERF
+	printk("    Performance counters on\n");
+#endif
+#ifdef CONFIG_NET_CLS_IND
+	printk("    input device check on \n");
+#endif
+#ifdef CONFIG_NET_CLS_ACT
+	printk("    Actions configured \n");
+#endif
+	return register_tcf_proto_ops(&cls_u32_ops);
+}
+
+static void __exit exit_u32(void)
+{
+	unregister_tcf_proto_ops(&cls_u32_ops);
+}
+
+module_init(init_u32)
+module_exit(exit_u32)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
new file mode 100644
index 0000000..bc45039
--- /dev/null
+++ b/net/sched/em_cmp.c
@@ -0,0 +1,98 @@
+/*
+ * net/sched/em_cmp.c	Simple packet data comparison ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_cmp.h>
+#include <asm/unaligned.h>
+#include <net/pkt_cls.h>
+
+static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp)
+{
+	return unlikely(cmp->flags & TCF_EM_CMP_TRANS);
+}
+
+static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	struct tcf_em_cmp *cmp = (struct tcf_em_cmp *) em->data;
+	unsigned char *ptr = tcf_get_base_ptr(skb, cmp->layer) + cmp->off;
+	u32 val = 0;
+
+	if (!tcf_valid_offset(skb, ptr, cmp->align))
+		return 0;
+
+	switch (cmp->align) {
+		case TCF_EM_ALIGN_U8:
+			val = *ptr;
+			break;
+
+		case TCF_EM_ALIGN_U16:
+			val = get_unaligned_be16(ptr);
+
+			if (cmp_needs_transformation(cmp))
+				val = be16_to_cpu(val);
+			break;
+
+		case TCF_EM_ALIGN_U32:
+			/* Worth checking boundries? The branching seems
+			 * to get worse. Visit again. */
+			val = get_unaligned_be32(ptr);
+
+			if (cmp_needs_transformation(cmp))
+				val = be32_to_cpu(val);
+			break;
+
+		default:
+			return 0;
+	}
+
+	if (cmp->mask)
+		val &= cmp->mask;
+
+	switch (cmp->opnd) {
+		case TCF_EM_OPND_EQ:
+			return val == cmp->val;
+		case TCF_EM_OPND_LT:
+			return val < cmp->val;
+		case TCF_EM_OPND_GT:
+			return val > cmp->val;
+	}
+
+	return 0;
+}
+
+static struct tcf_ematch_ops em_cmp_ops = {
+	.kind	  = TCF_EM_CMP,
+	.datalen  = sizeof(struct tcf_em_cmp),
+	.match	  = em_cmp_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_cmp_ops.link)
+};
+
+static int __init init_em_cmp(void)
+{
+	return tcf_em_register(&em_cmp_ops);
+}
+
+static void __exit exit_em_cmp(void)
+{
+	tcf_em_unregister(&em_cmp_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_cmp);
+module_exit(exit_em_cmp);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_CMP);
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
new file mode 100644
index 0000000..72cf86e
--- /dev/null
+++ b/net/sched/em_meta.c
@@ -0,0 +1,875 @@
+/*
+ * net/sched/em_meta.c	Metadata ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * ==========================================================================
+ *
+ * 	The metadata ematch compares two meta objects where each object
+ * 	represents either a meta value stored in the kernel or a static
+ * 	value provided by userspace. The objects are not provided by
+ * 	userspace itself but rather a definition providing the information
+ * 	to build them. Every object is of a certain type which must be
+ * 	equal to the object it is being compared to.
+ *
+ * 	The definition of a objects conists of the type (meta type), a
+ * 	identifier (meta id) and additional type specific information.
+ * 	The meta id is either TCF_META_TYPE_VALUE for values provided by
+ * 	userspace or a index to the meta operations table consisting of
+ * 	function pointers to type specific meta data collectors returning
+ * 	the value of the requested meta value.
+ *
+ * 	         lvalue                                   rvalue
+ * 	      +-----------+                           +-----------+
+ * 	      | type: INT |                           | type: INT |
+ * 	 def  | id: DEV   |                           | id: VALUE |
+ * 	      | data:     |                           | data: 3   |
+ * 	      +-----------+                           +-----------+
+ * 	            |                                       |
+ * 	            ---> meta_ops[INT][DEV](...)            |
+ *	                      |                             |
+ * 	            -----------                             |
+ * 	            V                                       V
+ * 	      +-----------+                           +-----------+
+ * 	      | type: INT |                           | type: INT |
+ * 	 obj  | id: DEV |                             | id: VALUE |
+ * 	      | data: 2   |<--data got filled out     | data: 3   |
+ * 	      +-----------+                           +-----------+
+ * 	            |                                         |
+ * 	            --------------> 2  equals 3 <--------------
+ *
+ * 	This is a simplified schema, the complexity varies depending
+ * 	on the meta type. Obviously, the length of the data must also
+ * 	be provided for non-numeric types.
+ *
+ * 	Additionaly, type dependant modifiers such as shift operators
+ * 	or mask may be applied to extend the functionaliy. As of now,
+ * 	the variable length type supports shifting the byte string to
+ * 	the right, eating up any number of octets and thus supporting
+ * 	wildcard interface name comparisons such as "ppp%" matching
+ * 	ppp0..9.
+ *
+ * 	NOTE: Certain meta values depend on other subsystems and are
+ * 	      only available if that subsystem is enabled in the kernel.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/if_vlan.h>
+#include <linux/tc_ematch/tc_em_meta.h>
+#include <net/dst.h>
+#include <net/route.h>
+#include <net/pkt_cls.h>
+#include <net/sock.h>
+
+struct meta_obj
+{
+	unsigned long		value;
+	unsigned int		len;
+};
+
+struct meta_value
+{
+	struct tcf_meta_val	hdr;
+	unsigned long		val;
+	unsigned int		len;
+};
+
+struct meta_match
+{
+	struct meta_value	lvalue;
+	struct meta_value	rvalue;
+};
+
+static inline int meta_id(struct meta_value *v)
+{
+	return TCF_META_ID(v->hdr.kind);
+}
+
+static inline int meta_type(struct meta_value *v)
+{
+	return TCF_META_TYPE(v->hdr.kind);
+}
+
+#define META_COLLECTOR(FUNC) static void meta_##FUNC(struct sk_buff *skb, \
+	struct tcf_pkt_info *info, struct meta_value *v, \
+	struct meta_obj *dst, int *err)
+
+/**************************************************************************
+ * System status & misc
+ **************************************************************************/
+
+META_COLLECTOR(int_random)
+{
+	get_random_bytes(&dst->value, sizeof(dst->value));
+}
+
+static inline unsigned long fixed_loadavg(int load)
+{
+	int rnd_load = load + (FIXED_1/200);
+	int rnd_frac = ((rnd_load & (FIXED_1-1)) * 100) >> FSHIFT;
+
+	return ((rnd_load >> FSHIFT) * 100) + rnd_frac;
+}
+
+META_COLLECTOR(int_loadavg_0)
+{
+	dst->value = fixed_loadavg(avenrun[0]);
+}
+
+META_COLLECTOR(int_loadavg_1)
+{
+	dst->value = fixed_loadavg(avenrun[1]);
+}
+
+META_COLLECTOR(int_loadavg_2)
+{
+	dst->value = fixed_loadavg(avenrun[2]);
+}
+
+/**************************************************************************
+ * Device names & indices
+ **************************************************************************/
+
+static inline int int_dev(struct net_device *dev, struct meta_obj *dst)
+{
+	if (unlikely(dev == NULL))
+		return -1;
+
+	dst->value = dev->ifindex;
+	return 0;
+}
+
+static inline int var_dev(struct net_device *dev, struct meta_obj *dst)
+{
+	if (unlikely(dev == NULL))
+		return -1;
+
+	dst->value = (unsigned long) dev->name;
+	dst->len = strlen(dev->name);
+	return 0;
+}
+
+META_COLLECTOR(int_dev)
+{
+	*err = int_dev(skb->dev, dst);
+}
+
+META_COLLECTOR(var_dev)
+{
+	*err = var_dev(skb->dev, dst);
+}
+
+/**************************************************************************
+ * vlan tag
+ **************************************************************************/
+
+META_COLLECTOR(int_vlan_tag)
+{
+	unsigned short uninitialized_var(tag);
+	if (vlan_get_tag(skb, &tag) < 0)
+		*err = -1;
+	else
+		dst->value = tag;
+}
+
+
+
+/**************************************************************************
+ * skb attributes
+ **************************************************************************/
+
+META_COLLECTOR(int_priority)
+{
+	dst->value = skb->priority;
+}
+
+META_COLLECTOR(int_protocol)
+{
+	/* Let userspace take care of the byte ordering */
+	dst->value = skb->protocol;
+}
+
+META_COLLECTOR(int_pkttype)
+{
+	dst->value = skb->pkt_type;
+}
+
+META_COLLECTOR(int_pktlen)
+{
+	dst->value = skb->len;
+}
+
+META_COLLECTOR(int_datalen)
+{
+	dst->value = skb->data_len;
+}
+
+META_COLLECTOR(int_maclen)
+{
+	dst->value = skb->mac_len;
+}
+
+/**************************************************************************
+ * Netfilter
+ **************************************************************************/
+
+META_COLLECTOR(int_mark)
+{
+	dst->value = skb->mark;
+}
+
+/**************************************************************************
+ * Traffic Control
+ **************************************************************************/
+
+META_COLLECTOR(int_tcindex)
+{
+	dst->value = skb->tc_index;
+}
+
+/**************************************************************************
+ * Routing
+ **************************************************************************/
+
+META_COLLECTOR(int_rtclassid)
+{
+	if (unlikely(skb->dst == NULL))
+		*err = -1;
+	else
+#ifdef CONFIG_NET_CLS_ROUTE
+		dst->value = skb->dst->tclassid;
+#else
+		dst->value = 0;
+#endif
+}
+
+META_COLLECTOR(int_rtiif)
+{
+	if (unlikely(skb->rtable == NULL))
+		*err = -1;
+	else
+		dst->value = skb->rtable->fl.iif;
+}
+
+/**************************************************************************
+ * Socket Attributes
+ **************************************************************************/
+
+#define SKIP_NONLOCAL(skb)			\
+	if (unlikely(skb->sk == NULL)) {	\
+		*err = -1;			\
+		return;				\
+	}
+
+META_COLLECTOR(int_sk_family)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_family;
+}
+
+META_COLLECTOR(int_sk_state)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_state;
+}
+
+META_COLLECTOR(int_sk_reuse)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_reuse;
+}
+
+META_COLLECTOR(int_sk_bound_if)
+{
+	SKIP_NONLOCAL(skb);
+	/* No error if bound_dev_if is 0, legal userspace check */
+	dst->value = skb->sk->sk_bound_dev_if;
+}
+
+META_COLLECTOR(var_sk_bound_if)
+{
+	SKIP_NONLOCAL(skb);
+
+	 if (skb->sk->sk_bound_dev_if == 0) {
+		dst->value = (unsigned long) "any";
+		dst->len = 3;
+	 } else  {
+		struct net_device *dev;
+
+		dev = dev_get_by_index(&init_net, skb->sk->sk_bound_dev_if);
+		*err = var_dev(dev, dst);
+		if (dev)
+			dev_put(dev);
+	 }
+}
+
+META_COLLECTOR(int_sk_refcnt)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = atomic_read(&skb->sk->sk_refcnt);
+}
+
+META_COLLECTOR(int_sk_rcvbuf)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_rcvbuf;
+}
+
+META_COLLECTOR(int_sk_shutdown)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_shutdown;
+}
+
+META_COLLECTOR(int_sk_proto)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_protocol;
+}
+
+META_COLLECTOR(int_sk_type)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_type;
+}
+
+META_COLLECTOR(int_sk_rmem_alloc)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = atomic_read(&skb->sk->sk_rmem_alloc);
+}
+
+META_COLLECTOR(int_sk_wmem_alloc)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = atomic_read(&skb->sk->sk_wmem_alloc);
+}
+
+META_COLLECTOR(int_sk_omem_alloc)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = atomic_read(&skb->sk->sk_omem_alloc);
+}
+
+META_COLLECTOR(int_sk_rcv_qlen)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_receive_queue.qlen;
+}
+
+META_COLLECTOR(int_sk_snd_qlen)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_write_queue.qlen;
+}
+
+META_COLLECTOR(int_sk_wmem_queued)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_wmem_queued;
+}
+
+META_COLLECTOR(int_sk_fwd_alloc)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_forward_alloc;
+}
+
+META_COLLECTOR(int_sk_sndbuf)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_sndbuf;
+}
+
+META_COLLECTOR(int_sk_alloc)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_allocation;
+}
+
+META_COLLECTOR(int_sk_route_caps)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_route_caps;
+}
+
+META_COLLECTOR(int_sk_hash)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_hash;
+}
+
+META_COLLECTOR(int_sk_lingertime)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_lingertime / HZ;
+}
+
+META_COLLECTOR(int_sk_err_qlen)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_error_queue.qlen;
+}
+
+META_COLLECTOR(int_sk_ack_bl)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_ack_backlog;
+}
+
+META_COLLECTOR(int_sk_max_ack_bl)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_max_ack_backlog;
+}
+
+META_COLLECTOR(int_sk_prio)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_priority;
+}
+
+META_COLLECTOR(int_sk_rcvlowat)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_rcvlowat;
+}
+
+META_COLLECTOR(int_sk_rcvtimeo)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_rcvtimeo / HZ;
+}
+
+META_COLLECTOR(int_sk_sndtimeo)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_sndtimeo / HZ;
+}
+
+META_COLLECTOR(int_sk_sendmsg_off)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_sndmsg_off;
+}
+
+META_COLLECTOR(int_sk_write_pend)
+{
+	SKIP_NONLOCAL(skb);
+	dst->value = skb->sk->sk_write_pending;
+}
+
+/**************************************************************************
+ * Meta value collectors assignment table
+ **************************************************************************/
+
+struct meta_ops
+{
+	void		(*get)(struct sk_buff *, struct tcf_pkt_info *,
+			       struct meta_value *, struct meta_obj *, int *);
+};
+
+#define META_ID(name) TCF_META_ID_##name
+#define META_FUNC(name) { .get = meta_##name }
+
+/* Meta value operations table listing all meta value collectors and
+ * assigns them to a type and meta id. */
+static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
+	[TCF_META_TYPE_VAR] = {
+		[META_ID(DEV)]			= META_FUNC(var_dev),
+		[META_ID(SK_BOUND_IF)] 		= META_FUNC(var_sk_bound_if),
+	},
+	[TCF_META_TYPE_INT] = {
+		[META_ID(RANDOM)]		= META_FUNC(int_random),
+		[META_ID(LOADAVG_0)]		= META_FUNC(int_loadavg_0),
+		[META_ID(LOADAVG_1)]		= META_FUNC(int_loadavg_1),
+		[META_ID(LOADAVG_2)]		= META_FUNC(int_loadavg_2),
+		[META_ID(DEV)]			= META_FUNC(int_dev),
+		[META_ID(PRIORITY)]		= META_FUNC(int_priority),
+		[META_ID(PROTOCOL)]		= META_FUNC(int_protocol),
+		[META_ID(PKTTYPE)]		= META_FUNC(int_pkttype),
+		[META_ID(PKTLEN)]		= META_FUNC(int_pktlen),
+		[META_ID(DATALEN)]		= META_FUNC(int_datalen),
+		[META_ID(MACLEN)]		= META_FUNC(int_maclen),
+		[META_ID(NFMARK)]		= META_FUNC(int_mark),
+		[META_ID(TCINDEX)]		= META_FUNC(int_tcindex),
+		[META_ID(RTCLASSID)]		= META_FUNC(int_rtclassid),
+		[META_ID(RTIIF)]		= META_FUNC(int_rtiif),
+		[META_ID(SK_FAMILY)]		= META_FUNC(int_sk_family),
+		[META_ID(SK_STATE)]		= META_FUNC(int_sk_state),
+		[META_ID(SK_REUSE)]		= META_FUNC(int_sk_reuse),
+		[META_ID(SK_BOUND_IF)]		= META_FUNC(int_sk_bound_if),
+		[META_ID(SK_REFCNT)]		= META_FUNC(int_sk_refcnt),
+		[META_ID(SK_RCVBUF)]		= META_FUNC(int_sk_rcvbuf),
+		[META_ID(SK_SNDBUF)]		= META_FUNC(int_sk_sndbuf),
+		[META_ID(SK_SHUTDOWN)]		= META_FUNC(int_sk_shutdown),
+		[META_ID(SK_PROTO)]		= META_FUNC(int_sk_proto),
+		[META_ID(SK_TYPE)]		= META_FUNC(int_sk_type),
+		[META_ID(SK_RMEM_ALLOC)]	= META_FUNC(int_sk_rmem_alloc),
+		[META_ID(SK_WMEM_ALLOC)]	= META_FUNC(int_sk_wmem_alloc),
+		[META_ID(SK_OMEM_ALLOC)]	= META_FUNC(int_sk_omem_alloc),
+		[META_ID(SK_WMEM_QUEUED)]	= META_FUNC(int_sk_wmem_queued),
+		[META_ID(SK_RCV_QLEN)]		= META_FUNC(int_sk_rcv_qlen),
+		[META_ID(SK_SND_QLEN)]		= META_FUNC(int_sk_snd_qlen),
+		[META_ID(SK_ERR_QLEN)]		= META_FUNC(int_sk_err_qlen),
+		[META_ID(SK_FORWARD_ALLOCS)]	= META_FUNC(int_sk_fwd_alloc),
+		[META_ID(SK_ALLOCS)]		= META_FUNC(int_sk_alloc),
+		[META_ID(SK_ROUTE_CAPS)]	= META_FUNC(int_sk_route_caps),
+		[META_ID(SK_HASH)]		= META_FUNC(int_sk_hash),
+		[META_ID(SK_LINGERTIME)]	= META_FUNC(int_sk_lingertime),
+		[META_ID(SK_ACK_BACKLOG)]	= META_FUNC(int_sk_ack_bl),
+		[META_ID(SK_MAX_ACK_BACKLOG)]	= META_FUNC(int_sk_max_ack_bl),
+		[META_ID(SK_PRIO)]		= META_FUNC(int_sk_prio),
+		[META_ID(SK_RCVLOWAT)]		= META_FUNC(int_sk_rcvlowat),
+		[META_ID(SK_RCVTIMEO)]		= META_FUNC(int_sk_rcvtimeo),
+		[META_ID(SK_SNDTIMEO)]		= META_FUNC(int_sk_sndtimeo),
+		[META_ID(SK_SENDMSG_OFF)]	= META_FUNC(int_sk_sendmsg_off),
+		[META_ID(SK_WRITE_PENDING)]	= META_FUNC(int_sk_write_pend),
+		[META_ID(VLAN_TAG)]		= META_FUNC(int_vlan_tag),
+	}
+};
+
+static inline struct meta_ops * meta_ops(struct meta_value *val)
+{
+	return &__meta_ops[meta_type(val)][meta_id(val)];
+}
+
+/**************************************************************************
+ * Type specific operations for TCF_META_TYPE_VAR
+ **************************************************************************/
+
+static int meta_var_compare(struct meta_obj *a, struct meta_obj *b)
+{
+	int r = a->len - b->len;
+
+	if (r == 0)
+		r = memcmp((void *) a->value, (void *) b->value, a->len);
+
+	return r;
+}
+
+static int meta_var_change(struct meta_value *dst, struct nlattr *nla)
+{
+	int len = nla_len(nla);
+
+	dst->val = (unsigned long)kmemdup(nla_data(nla), len, GFP_KERNEL);
+	if (dst->val == 0UL)
+		return -ENOMEM;
+	dst->len = len;
+	return 0;
+}
+
+static void meta_var_destroy(struct meta_value *v)
+{
+	kfree((void *) v->val);
+}
+
+static void meta_var_apply_extras(struct meta_value *v,
+				  struct meta_obj *dst)
+{
+	int shift = v->hdr.shift;
+
+	if (shift && shift < dst->len)
+		dst->len -= shift;
+}
+
+static int meta_var_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
+{
+	if (v->val && v->len)
+		NLA_PUT(skb, tlv, v->len, (void *) v->val);
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+/**************************************************************************
+ * Type specific operations for TCF_META_TYPE_INT
+ **************************************************************************/
+
+static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
+{
+	/* Let gcc optimize it, the unlikely is not really based on
+	 * some numbers but jump free code for mismatches seems
+	 * more logical. */
+	if (unlikely(a->value == b->value))
+		return 0;
+	else if (a->value < b->value)
+		return -1;
+	else
+		return 1;
+}
+
+static int meta_int_change(struct meta_value *dst, struct nlattr *nla)
+{
+	if (nla_len(nla) >= sizeof(unsigned long)) {
+		dst->val = *(unsigned long *) nla_data(nla);
+		dst->len = sizeof(unsigned long);
+	} else if (nla_len(nla) == sizeof(u32)) {
+		dst->val = nla_get_u32(nla);
+		dst->len = sizeof(u32);
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+static void meta_int_apply_extras(struct meta_value *v,
+				  struct meta_obj *dst)
+{
+	if (v->hdr.shift)
+		dst->value >>= v->hdr.shift;
+
+	if (v->val)
+		dst->value &= v->val;
+}
+
+static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
+{
+	if (v->len == sizeof(unsigned long))
+		NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
+	else if (v->len == sizeof(u32)) {
+		NLA_PUT_U32(skb, tlv, v->val);
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+/**************************************************************************
+ * Type specific operations table
+ **************************************************************************/
+
+struct meta_type_ops
+{
+	void	(*destroy)(struct meta_value *);
+	int	(*compare)(struct meta_obj *, struct meta_obj *);
+	int	(*change)(struct meta_value *, struct nlattr *);
+	void	(*apply_extras)(struct meta_value *, struct meta_obj *);
+	int	(*dump)(struct sk_buff *, struct meta_value *, int);
+};
+
+static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
+	[TCF_META_TYPE_VAR] = {
+		.destroy = meta_var_destroy,
+		.compare = meta_var_compare,
+		.change = meta_var_change,
+		.apply_extras = meta_var_apply_extras,
+		.dump = meta_var_dump
+	},
+	[TCF_META_TYPE_INT] = {
+		.compare = meta_int_compare,
+		.change = meta_int_change,
+		.apply_extras = meta_int_apply_extras,
+		.dump = meta_int_dump
+	}
+};
+
+static inline struct meta_type_ops * meta_type_ops(struct meta_value *v)
+{
+	return &__meta_type_ops[meta_type(v)];
+}
+
+/**************************************************************************
+ * Core
+ **************************************************************************/
+
+static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info,
+		    struct meta_value *v, struct meta_obj *dst)
+{
+	int err = 0;
+
+	if (meta_id(v) == TCF_META_ID_VALUE) {
+		dst->value = v->val;
+		dst->len = v->len;
+		return 0;
+	}
+
+	meta_ops(v)->get(skb, info, v, dst, &err);
+	if (err < 0)
+		return err;
+
+	if (meta_type_ops(v)->apply_extras)
+	    meta_type_ops(v)->apply_extras(v, dst);
+
+	return 0;
+}
+
+static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
+			 struct tcf_pkt_info *info)
+{
+	int r;
+	struct meta_match *meta = (struct meta_match *) m->data;
+	struct meta_obj l_value, r_value;
+
+	if (meta_get(skb, info, &meta->lvalue, &l_value) < 0 ||
+	    meta_get(skb, info, &meta->rvalue, &r_value) < 0)
+		return 0;
+
+	r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
+
+	switch (meta->lvalue.hdr.op) {
+		case TCF_EM_OPND_EQ:
+			return !r;
+		case TCF_EM_OPND_LT:
+			return r < 0;
+		case TCF_EM_OPND_GT:
+			return r > 0;
+	}
+
+	return 0;
+}
+
+static void meta_delete(struct meta_match *meta)
+{
+	if (meta) {
+		struct meta_type_ops *ops = meta_type_ops(&meta->lvalue);
+
+		if (ops && ops->destroy) {
+			ops->destroy(&meta->lvalue);
+			ops->destroy(&meta->rvalue);
+		}
+	}
+
+	kfree(meta);
+}
+
+static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla)
+{
+	if (nla) {
+		if (nla_len(nla) == 0)
+			return -EINVAL;
+
+		return meta_type_ops(dst)->change(dst, nla);
+	}
+
+	return 0;
+}
+
+static inline int meta_is_supported(struct meta_value *val)
+{
+	return (!meta_id(val) || meta_ops(val)->get);
+}
+
+static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
+	[TCA_EM_META_HDR]	= { .len = sizeof(struct tcf_meta_hdr) },
+};
+
+static int em_meta_change(struct tcf_proto *tp, void *data, int len,
+			  struct tcf_ematch *m)
+{
+	int err;
+	struct nlattr *tb[TCA_EM_META_MAX + 1];
+	struct tcf_meta_hdr *hdr;
+	struct meta_match *meta = NULL;
+
+	err = nla_parse(tb, TCA_EM_META_MAX, data, len, meta_policy);
+	if (err < 0)
+		goto errout;
+
+	err = -EINVAL;
+	if (tb[TCA_EM_META_HDR] == NULL)
+		goto errout;
+	hdr = nla_data(tb[TCA_EM_META_HDR]);
+
+	if (TCF_META_TYPE(hdr->left.kind) != TCF_META_TYPE(hdr->right.kind) ||
+	    TCF_META_TYPE(hdr->left.kind) > TCF_META_TYPE_MAX ||
+	    TCF_META_ID(hdr->left.kind) > TCF_META_ID_MAX ||
+	    TCF_META_ID(hdr->right.kind) > TCF_META_ID_MAX)
+		goto errout;
+
+	meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+	if (meta == NULL)
+		goto errout;
+
+	memcpy(&meta->lvalue.hdr, &hdr->left, sizeof(hdr->left));
+	memcpy(&meta->rvalue.hdr, &hdr->right, sizeof(hdr->right));
+
+	if (!meta_is_supported(&meta->lvalue) ||
+	    !meta_is_supported(&meta->rvalue)) {
+		err = -EOPNOTSUPP;
+		goto errout;
+	}
+
+	if (meta_change_data(&meta->lvalue, tb[TCA_EM_META_LVALUE]) < 0 ||
+	    meta_change_data(&meta->rvalue, tb[TCA_EM_META_RVALUE]) < 0)
+		goto errout;
+
+	m->datalen = sizeof(*meta);
+	m->data = (unsigned long) meta;
+
+	err = 0;
+errout:
+	if (err && meta)
+		meta_delete(meta);
+	return err;
+}
+
+static void em_meta_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
+{
+	if (m)
+		meta_delete((struct meta_match *) m->data);
+}
+
+static int em_meta_dump(struct sk_buff *skb, struct tcf_ematch *em)
+{
+	struct meta_match *meta = (struct meta_match *) em->data;
+	struct tcf_meta_hdr hdr;
+	struct meta_type_ops *ops;
+
+	memset(&hdr, 0, sizeof(hdr));
+	memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left));
+	memcpy(&hdr.right, &meta->rvalue.hdr, sizeof(hdr.right));
+
+	NLA_PUT(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr);
+
+	ops = meta_type_ops(&meta->lvalue);
+	if (ops->dump(skb, &meta->lvalue, TCA_EM_META_LVALUE) < 0 ||
+	    ops->dump(skb, &meta->rvalue, TCA_EM_META_RVALUE) < 0)
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct tcf_ematch_ops em_meta_ops = {
+	.kind	  = TCF_EM_META,
+	.change	  = em_meta_change,
+	.match	  = em_meta_match,
+	.destroy  = em_meta_destroy,
+	.dump	  = em_meta_dump,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_meta_ops.link)
+};
+
+static int __init init_em_meta(void)
+{
+	return tcf_em_register(&em_meta_ops);
+}
+
+static void __exit exit_em_meta(void)
+{
+	tcf_em_unregister(&em_meta_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_meta);
+module_exit(exit_em_meta);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_META);
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
new file mode 100644
index 0000000..370a1b2
--- /dev/null
+++ b/net/sched/em_nbyte.c
@@ -0,0 +1,80 @@
+/*
+ * net/sched/em_nbyte.c	N-Byte ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/tc_ematch/tc_em_nbyte.h>
+#include <net/pkt_cls.h>
+
+struct nbyte_data
+{
+	struct tcf_em_nbyte	hdr;
+	char			pattern[0];
+};
+
+static int em_nbyte_change(struct tcf_proto *tp, void *data, int data_len,
+			   struct tcf_ematch *em)
+{
+	struct tcf_em_nbyte *nbyte = data;
+
+	if (data_len < sizeof(*nbyte) ||
+	    data_len < (sizeof(*nbyte) + nbyte->len))
+		return -EINVAL;
+
+	em->datalen = sizeof(*nbyte) + nbyte->len;
+	em->data = (unsigned long)kmemdup(data, em->datalen, GFP_KERNEL);
+	if (em->data == 0UL)
+		return -ENOBUFS;
+
+	return 0;
+}
+
+static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em,
+			  struct tcf_pkt_info *info)
+{
+	struct nbyte_data *nbyte = (struct nbyte_data *) em->data;
+	unsigned char *ptr = tcf_get_base_ptr(skb, nbyte->hdr.layer);
+
+	ptr += nbyte->hdr.off;
+
+	if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len))
+		return 0;
+
+	return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len);
+}
+
+static struct tcf_ematch_ops em_nbyte_ops = {
+	.kind	  = TCF_EM_NBYTE,
+	.change	  = em_nbyte_change,
+	.match	  = em_nbyte_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_nbyte_ops.link)
+};
+
+static int __init init_em_nbyte(void)
+{
+	return tcf_em_register(&em_nbyte_ops);
+}
+
+static void __exit exit_em_nbyte(void)
+{
+	tcf_em_unregister(&em_nbyte_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_nbyte);
+module_exit(exit_em_nbyte);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_NBYTE);
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
new file mode 100644
index 0000000..853c5ea
--- /dev/null
+++ b/net/sched/em_text.c
@@ -0,0 +1,157 @@
+/*
+ * net/sched/em_text.c	Textsearch ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/textsearch.h>
+#include <linux/tc_ematch/tc_em_text.h>
+#include <net/pkt_cls.h>
+
+struct text_match
+{
+	u16			from_offset;
+	u16			to_offset;
+	u8			from_layer;
+	u8			to_layer;
+	struct ts_config	*config;
+};
+
+#define EM_TEXT_PRIV(m) ((struct text_match *) (m)->data)
+
+static int em_text_match(struct sk_buff *skb, struct tcf_ematch *m,
+			 struct tcf_pkt_info *info)
+{
+	struct text_match *tm = EM_TEXT_PRIV(m);
+	int from, to;
+	struct ts_state state;
+
+	from = tcf_get_base_ptr(skb, tm->from_layer) - skb->data;
+	from += tm->from_offset;
+
+	to = tcf_get_base_ptr(skb, tm->to_layer) - skb->data;
+	to += tm->to_offset;
+
+	return skb_find_text(skb, from, to, tm->config, &state) != UINT_MAX;
+}
+
+static int em_text_change(struct tcf_proto *tp, void *data, int len,
+			  struct tcf_ematch *m)
+{
+	struct text_match *tm;
+	struct tcf_em_text *conf = data;
+	struct ts_config *ts_conf;
+	int flags = 0;
+
+	if (len < sizeof(*conf) || len < (sizeof(*conf) + conf->pattern_len))
+		return -EINVAL;
+
+	if (conf->from_layer > conf->to_layer)
+		return -EINVAL;
+
+	if (conf->from_layer == conf->to_layer &&
+	    conf->from_offset > conf->to_offset)
+		return -EINVAL;
+
+retry:
+	ts_conf = textsearch_prepare(conf->algo, (u8 *) conf + sizeof(*conf),
+				     conf->pattern_len, GFP_KERNEL, flags);
+
+	if (flags & TS_AUTOLOAD)
+		rtnl_lock();
+
+	if (IS_ERR(ts_conf)) {
+		if (PTR_ERR(ts_conf) == -ENOENT && !(flags & TS_AUTOLOAD)) {
+			rtnl_unlock();
+			flags |= TS_AUTOLOAD;
+			goto retry;
+		} else
+			return PTR_ERR(ts_conf);
+	} else if (flags & TS_AUTOLOAD) {
+		textsearch_destroy(ts_conf);
+		return -EAGAIN;
+	}
+
+	tm = kmalloc(sizeof(*tm), GFP_KERNEL);
+	if (tm == NULL) {
+		textsearch_destroy(ts_conf);
+		return -ENOBUFS;
+	}
+
+	tm->from_offset = conf->from_offset;
+	tm->to_offset   = conf->to_offset;
+	tm->from_layer  = conf->from_layer;
+	tm->to_layer    = conf->to_layer;
+	tm->config      = ts_conf;
+
+	m->datalen = sizeof(*tm);
+	m->data = (unsigned long) tm;
+
+	return 0;
+}
+
+static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
+{
+	textsearch_destroy(EM_TEXT_PRIV(m)->config);
+}
+
+static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
+{
+	struct text_match *tm = EM_TEXT_PRIV(m);
+	struct tcf_em_text conf;
+
+	strncpy(conf.algo, tm->config->ops->name, sizeof(conf.algo) - 1);
+	conf.from_offset = tm->from_offset;
+	conf.to_offset = tm->to_offset;
+	conf.from_layer = tm->from_layer;
+	conf.to_layer = tm->to_layer;
+	conf.pattern_len = textsearch_get_pattern_len(tm->config);
+	conf.pad = 0;
+
+	if (nla_put_nohdr(skb, sizeof(conf), &conf) < 0)
+		goto nla_put_failure;
+	if (nla_append(skb, conf.pattern_len,
+		       textsearch_get_pattern(tm->config)) < 0)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct tcf_ematch_ops em_text_ops = {
+	.kind	  = TCF_EM_TEXT,
+	.change	  = em_text_change,
+	.match	  = em_text_match,
+	.destroy  = em_text_destroy,
+	.dump	  = em_text_dump,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_text_ops.link)
+};
+
+static int __init init_em_text(void)
+{
+	return tcf_em_register(&em_text_ops);
+}
+
+static void __exit exit_em_text(void)
+{
+	tcf_em_unregister(&em_text_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_text);
+module_exit(exit_em_text);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_TEXT);
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
new file mode 100644
index 0000000..953f147
--- /dev/null
+++ b/net/sched/em_u32.c
@@ -0,0 +1,64 @@
+/*
+ * net/sched/em_u32.c	U32 Ematch
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Based on net/sched/cls_u32.c
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+
+static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
+			struct tcf_pkt_info *info)
+{
+	struct tc_u32_key *key = (struct tc_u32_key *) em->data;
+	const unsigned char *ptr = skb_network_header(skb);
+
+	if (info) {
+		if (info->ptr)
+			ptr = info->ptr;
+		ptr += (info->nexthdr & key->offmask);
+	}
+
+	ptr += key->off;
+
+	if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
+		return 0;
+
+	return !(((*(__be32*) ptr)  ^ key->val) & key->mask);
+}
+
+static struct tcf_ematch_ops em_u32_ops = {
+	.kind	  = TCF_EM_U32,
+	.datalen  = sizeof(struct tc_u32_key),
+	.match	  = em_u32_match,
+	.owner	  = THIS_MODULE,
+	.link	  = LIST_HEAD_INIT(em_u32_ops.link)
+};
+
+static int __init init_em_u32(void)
+{
+	return tcf_em_register(&em_u32_ops);
+}
+
+static void __exit exit_em_u32(void)
+{
+	tcf_em_unregister(&em_u32_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_em_u32);
+module_exit(exit_em_u32);
+
+MODULE_ALIAS_TCF_EMATCH(TCF_EM_U32);
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
new file mode 100644
index 0000000..e82519e
--- /dev/null
+++ b/net/sched/ematch.c
@@ -0,0 +1,544 @@
+/*
+ * net/sched/ematch.c		Extended Match API
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * ==========================================================================
+ *
+ * An extended match (ematch) is a small classification tool not worth
+ * writing a full classifier for. Ematches can be interconnected to form
+ * a logic expression and get attached to classifiers to extend their
+ * functionatlity.
+ *
+ * The userspace part transforms the logic expressions into an array
+ * consisting of multiple sequences of interconnected ematches separated
+ * by markers. Precedence is implemented by a special ematch kind
+ * referencing a sequence beyond the marker of the current sequence
+ * causing the current position in the sequence to be pushed onto a stack
+ * to allow the current position to be overwritten by the position referenced
+ * in the special ematch. Matching continues in the new sequence until a
+ * marker is reached causing the position to be restored from the stack.
+ *
+ * Example:
+ *          A AND (B1 OR B2) AND C AND D
+ *
+ *              ------->-PUSH-------
+ *    -->--    /         -->--      \   -->--
+ *   /     \  /         /     \      \ /     \
+ * +-------+-------+-------+-------+-------+--------+
+ * | A AND | B AND | C AND | D END | B1 OR | B2 END |
+ * +-------+-------+-------+-------+-------+--------+
+ *                    \                      /
+ *                     --------<-POP---------
+ *
+ * where B is a virtual ematch referencing to sequence starting with B1.
+ *
+ * ==========================================================================
+ *
+ * How to write an ematch in 60 seconds
+ * ------------------------------------
+ *
+ *   1) Provide a matcher function:
+ *      static int my_match(struct sk_buff *skb, struct tcf_ematch *m,
+ *                          struct tcf_pkt_info *info)
+ *      {
+ *      	struct mydata *d = (struct mydata *) m->data;
+ *
+ *      	if (...matching goes here...)
+ *      		return 1;
+ *      	else
+ *      		return 0;
+ *      }
+ *
+ *   2) Fill out a struct tcf_ematch_ops:
+ *      static struct tcf_ematch_ops my_ops = {
+ *      	.kind = unique id,
+ *      	.datalen = sizeof(struct mydata),
+ *      	.match = my_match,
+ *      	.owner = THIS_MODULE,
+ *      };
+ *
+ *   3) Register/Unregister your ematch:
+ *      static int __init init_my_ematch(void)
+ *      {
+ *      	return tcf_em_register(&my_ops);
+ *      }
+ *
+ *      static void __exit exit_my_ematch(void)
+ *      {
+ *      	return tcf_em_unregister(&my_ops);
+ *      }
+ *
+ *      module_init(init_my_ematch);
+ *      module_exit(exit_my_ematch);
+ *
+ *   4) By now you should have two more seconds left, barely enough to
+ *      open up a beer to watch the compilation going.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/pkt_cls.h>
+
+static LIST_HEAD(ematch_ops);
+static DEFINE_RWLOCK(ematch_mod_lock);
+
+static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
+{
+	struct tcf_ematch_ops *e = NULL;
+
+	read_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link) {
+		if (kind == e->kind) {
+			if (!try_module_get(e->owner))
+				e = NULL;
+			read_unlock(&ematch_mod_lock);
+			return e;
+		}
+	}
+	read_unlock(&ematch_mod_lock);
+
+	return NULL;
+}
+
+/**
+ * tcf_em_register - register an extended match
+ *
+ * @ops: ematch operations lookup table
+ *
+ * This function must be called by ematches to announce their presence.
+ * The given @ops must have kind set to a unique identifier and the
+ * callback match() must be implemented. All other callbacks are optional
+ * and a fallback implementation is used instead.
+ *
+ * Returns -EEXISTS if an ematch of the same kind has already registered.
+ */
+int tcf_em_register(struct tcf_ematch_ops *ops)
+{
+	int err = -EEXIST;
+	struct tcf_ematch_ops *e;
+
+	if (ops->match == NULL)
+		return -EINVAL;
+
+	write_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link)
+		if (ops->kind == e->kind)
+			goto errout;
+
+	list_add_tail(&ops->link, &ematch_ops);
+	err = 0;
+errout:
+	write_unlock(&ematch_mod_lock);
+	return err;
+}
+EXPORT_SYMBOL(tcf_em_register);
+
+/**
+ * tcf_em_unregister - unregster and extended match
+ *
+ * @ops: ematch operations lookup table
+ *
+ * This function must be called by ematches to announce their disappearance
+ * for examples when the module gets unloaded. The @ops parameter must be
+ * the same as the one used for registration.
+ *
+ * Returns -ENOENT if no matching ematch was found.
+ */
+int tcf_em_unregister(struct tcf_ematch_ops *ops)
+{
+	int err = 0;
+	struct tcf_ematch_ops *e;
+
+	write_lock(&ematch_mod_lock);
+	list_for_each_entry(e, &ematch_ops, link) {
+		if (e == ops) {
+			list_del(&e->link);
+			goto out;
+		}
+	}
+
+	err = -ENOENT;
+out:
+	write_unlock(&ematch_mod_lock);
+	return err;
+}
+EXPORT_SYMBOL(tcf_em_unregister);
+
+static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
+						   int index)
+{
+	return &tree->matches[index];
+}
+
+
+static int tcf_em_validate(struct tcf_proto *tp,
+			   struct tcf_ematch_tree_hdr *tree_hdr,
+			   struct tcf_ematch *em, struct nlattr *nla, int idx)
+{
+	int err = -EINVAL;
+	struct tcf_ematch_hdr *em_hdr = nla_data(nla);
+	int data_len = nla_len(nla) - sizeof(*em_hdr);
+	void *data = (void *) em_hdr + sizeof(*em_hdr);
+
+	if (!TCF_EM_REL_VALID(em_hdr->flags))
+		goto errout;
+
+	if (em_hdr->kind == TCF_EM_CONTAINER) {
+		/* Special ematch called "container", carries an index
+		 * referencing an external ematch sequence. */
+		u32 ref;
+
+		if (data_len < sizeof(ref))
+			goto errout;
+		ref = *(u32 *) data;
+
+		if (ref >= tree_hdr->nmatches)
+			goto errout;
+
+		/* We do not allow backward jumps to avoid loops and jumps
+		 * to our own position are of course illegal. */
+		if (ref <= idx)
+			goto errout;
+
+
+		em->data = ref;
+	} else {
+		/* Note: This lookup will increase the module refcnt
+		 * of the ematch module referenced. In case of a failure,
+		 * a destroy function is called by the underlying layer
+		 * which automatically releases the reference again, therefore
+		 * the module MUST not be given back under any circumstances
+		 * here. Be aware, the destroy function assumes that the
+		 * module is held if the ops field is non zero. */
+		em->ops = tcf_em_lookup(em_hdr->kind);
+
+		if (em->ops == NULL) {
+			err = -ENOENT;
+#ifdef CONFIG_MODULES
+			__rtnl_unlock();
+			request_module("ematch-kind-%u", em_hdr->kind);
+			rtnl_lock();
+			em->ops = tcf_em_lookup(em_hdr->kind);
+			if (em->ops) {
+				/* We dropped the RTNL mutex in order to
+				 * perform the module load. Tell the caller
+				 * to replay the request. */
+				module_put(em->ops->owner);
+				err = -EAGAIN;
+			}
+#endif
+			goto errout;
+		}
+
+		/* ematch module provides expected length of data, so we
+		 * can do a basic sanity check. */
+		if (em->ops->datalen && data_len < em->ops->datalen)
+			goto errout;
+
+		if (em->ops->change) {
+			err = em->ops->change(tp, data, data_len, em);
+			if (err < 0)
+				goto errout;
+		} else if (data_len > 0) {
+			/* ematch module doesn't provide an own change
+			 * procedure and expects us to allocate and copy
+			 * the ematch data.
+			 *
+			 * TCF_EM_SIMPLE may be specified stating that the
+			 * data only consists of a u32 integer and the module
+			 * does not expected a memory reference but rather
+			 * the value carried. */
+			if (em_hdr->flags & TCF_EM_SIMPLE) {
+				if (data_len < sizeof(u32))
+					goto errout;
+				em->data = *(u32 *) data;
+			} else {
+				void *v = kmemdup(data, data_len, GFP_KERNEL);
+				if (v == NULL) {
+					err = -ENOBUFS;
+					goto errout;
+				}
+				em->data = (unsigned long) v;
+			}
+		}
+	}
+
+	em->matchid = em_hdr->matchid;
+	em->flags = em_hdr->flags;
+	em->datalen = data_len;
+
+	err = 0;
+errout:
+	return err;
+}
+
+static const struct nla_policy em_policy[TCA_EMATCH_TREE_MAX + 1] = {
+	[TCA_EMATCH_TREE_HDR]	= { .len = sizeof(struct tcf_ematch_tree_hdr) },
+	[TCA_EMATCH_TREE_LIST]	= { .type = NLA_NESTED },
+};
+
+/**
+ * tcf_em_tree_validate - validate ematch config TLV and build ematch tree
+ *
+ * @tp: classifier kind handle
+ * @nla: ematch tree configuration TLV
+ * @tree: destination ematch tree variable to store the resulting
+ *        ematch tree.
+ *
+ * This function validates the given configuration TLV @nla and builds an
+ * ematch tree in @tree. The resulting tree must later be copied into
+ * the private classifier data using tcf_em_tree_change(). You MUST NOT
+ * provide the ematch tree variable of the private classifier data directly,
+ * the changes would not be locked properly.
+ *
+ * Returns a negative error code if the configuration TLV contains errors.
+ */
+int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
+			 struct tcf_ematch_tree *tree)
+{
+	int idx, list_len, matches_len, err;
+	struct nlattr *tb[TCA_EMATCH_TREE_MAX + 1];
+	struct nlattr *rt_match, *rt_hdr, *rt_list;
+	struct tcf_ematch_tree_hdr *tree_hdr;
+	struct tcf_ematch *em;
+
+	memset(tree, 0, sizeof(*tree));
+	if (!nla)
+		return 0;
+
+	err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy);
+	if (err < 0)
+		goto errout;
+
+	err = -EINVAL;
+	rt_hdr = tb[TCA_EMATCH_TREE_HDR];
+	rt_list = tb[TCA_EMATCH_TREE_LIST];
+
+	if (rt_hdr == NULL || rt_list == NULL)
+		goto errout;
+
+	tree_hdr = nla_data(rt_hdr);
+	memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));
+
+	rt_match = nla_data(rt_list);
+	list_len = nla_len(rt_list);
+	matches_len = tree_hdr->nmatches * sizeof(*em);
+
+	tree->matches = kzalloc(matches_len, GFP_KERNEL);
+	if (tree->matches == NULL)
+		goto errout;
+
+	/* We do not use nla_parse_nested here because the maximum
+	 * number of attributes is unknown. This saves us the allocation
+	 * for a tb buffer which would serve no purpose at all.
+	 *
+	 * The array of rt attributes is parsed in the order as they are
+	 * provided, their type must be incremental from 1 to n. Even
+	 * if it does not serve any real purpose, a failure of sticking
+	 * to this policy will result in parsing failure. */
+	for (idx = 0; nla_ok(rt_match, list_len); idx++) {
+		err = -EINVAL;
+
+		if (rt_match->nla_type != (idx + 1))
+			goto errout_abort;
+
+		if (idx >= tree_hdr->nmatches)
+			goto errout_abort;
+
+		if (nla_len(rt_match) < sizeof(struct tcf_ematch_hdr))
+			goto errout_abort;
+
+		em = tcf_em_get_match(tree, idx);
+
+		err = tcf_em_validate(tp, tree_hdr, em, rt_match, idx);
+		if (err < 0)
+			goto errout_abort;
+
+		rt_match = nla_next(rt_match, &list_len);
+	}
+
+	/* Check if the number of matches provided by userspace actually
+	 * complies with the array of matches. The number was used for
+	 * the validation of references and a mismatch could lead to
+	 * undefined references during the matching process. */
+	if (idx != tree_hdr->nmatches) {
+		err = -EINVAL;
+		goto errout_abort;
+	}
+
+	err = 0;
+errout:
+	return err;
+
+errout_abort:
+	tcf_em_tree_destroy(tp, tree);
+	return err;
+}
+EXPORT_SYMBOL(tcf_em_tree_validate);
+
+/**
+ * tcf_em_tree_destroy - destroy an ematch tree
+ *
+ * @tp: classifier kind handle
+ * @tree: ematch tree to be deleted
+ *
+ * This functions destroys an ematch tree previously created by
+ * tcf_em_tree_validate()/tcf_em_tree_change(). You must ensure that
+ * the ematch tree is not in use before calling this function.
+ */
+void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
+{
+	int i;
+
+	if (tree->matches == NULL)
+		return;
+
+	for (i = 0; i < tree->hdr.nmatches; i++) {
+		struct tcf_ematch *em = tcf_em_get_match(tree, i);
+
+		if (em->ops) {
+			if (em->ops->destroy)
+				em->ops->destroy(tp, em);
+			else if (!tcf_em_is_simple(em))
+				kfree((void *) em->data);
+			module_put(em->ops->owner);
+		}
+	}
+
+	tree->hdr.nmatches = 0;
+	kfree(tree->matches);
+	tree->matches = NULL;
+}
+EXPORT_SYMBOL(tcf_em_tree_destroy);
+
+/**
+ * tcf_em_tree_dump - dump ematch tree into a rtnl message
+ *
+ * @skb: skb holding the rtnl message
+ * @t: ematch tree to be dumped
+ * @tlv: TLV type to be used to encapsulate the tree
+ *
+ * This function dumps a ematch tree into a rtnl message. It is valid to
+ * call this function while the ematch tree is in use.
+ *
+ * Returns -1 if the skb tailroom is insufficient.
+ */
+int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
+{
+	int i;
+	u8 *tail;
+	struct nlattr *top_start;
+	struct nlattr *list_start;
+
+	top_start = nla_nest_start(skb, tlv);
+	if (top_start == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
+
+	list_start = nla_nest_start(skb, TCA_EMATCH_TREE_LIST);
+	if (list_start == NULL)
+		goto nla_put_failure;
+
+	tail = skb_tail_pointer(skb);
+	for (i = 0; i < tree->hdr.nmatches; i++) {
+		struct nlattr *match_start = (struct nlattr *)tail;
+		struct tcf_ematch *em = tcf_em_get_match(tree, i);
+		struct tcf_ematch_hdr em_hdr = {
+			.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
+			.matchid = em->matchid,
+			.flags = em->flags
+		};
+
+		NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
+
+		if (em->ops && em->ops->dump) {
+			if (em->ops->dump(skb, em) < 0)
+				goto nla_put_failure;
+		} else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) {
+			u32 u = em->data;
+			nla_put_nohdr(skb, sizeof(u), &u);
+		} else if (em->datalen > 0)
+			nla_put_nohdr(skb, em->datalen, (void *) em->data);
+
+		tail = skb_tail_pointer(skb);
+		match_start->nla_len = tail - (u8 *)match_start;
+	}
+
+	nla_nest_end(skb, list_start);
+	nla_nest_end(skb, top_start);
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+EXPORT_SYMBOL(tcf_em_tree_dump);
+
+static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
+			       struct tcf_pkt_info *info)
+{
+	int r = em->ops->match(skb, em, info);
+	return tcf_em_is_inverted(em) ? !r : r;
+}
+
+/* Do not use this function directly, use tcf_em_tree_match instead */
+int __tcf_em_tree_match(struct sk_buff *skb, struct tcf_ematch_tree *tree,
+			struct tcf_pkt_info *info)
+{
+	int stackp = 0, match_idx = 0, res = 0;
+	struct tcf_ematch *cur_match;
+	int stack[CONFIG_NET_EMATCH_STACK];
+
+proceed:
+	while (match_idx < tree->hdr.nmatches) {
+		cur_match = tcf_em_get_match(tree, match_idx);
+
+		if (tcf_em_is_container(cur_match)) {
+			if (unlikely(stackp >= CONFIG_NET_EMATCH_STACK))
+				goto stack_overflow;
+
+			stack[stackp++] = match_idx;
+			match_idx = cur_match->data;
+			goto proceed;
+		}
+
+		res = tcf_em_match(skb, cur_match, info);
+
+		if (tcf_em_early_end(cur_match, res))
+			break;
+
+		match_idx++;
+	}
+
+pop_stack:
+	if (stackp > 0) {
+		match_idx = stack[--stackp];
+		cur_match = tcf_em_get_match(tree, match_idx);
+
+		if (tcf_em_early_end(cur_match, res))
+			goto pop_stack;
+		else {
+			match_idx++;
+			goto proceed;
+		}
+	}
+
+	return res;
+
+stack_overflow:
+	if (net_ratelimit())
+		printk("Local stack overflow, increase NET_EMATCH_STACK\n");
+	return -1;
+}
+EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
new file mode 100644
index 0000000..6ab4a2f
--- /dev/null
+++ b/net/sched/sch_api.c
@@ -0,0 +1,1719 @@
+/*
+ * net/sched/sch_api.c	Packet scheduler API.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Fixes:
+ *
+ * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
+ * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
+ * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/kmod.h>
+#include <linux/list.h>
+#include <linux/hrtimer.h>
+#include <linux/lockdep.h>
+
+#include <net/net_namespace.h>
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
+			struct Qdisc *old, struct Qdisc *new);
+static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			 struct Qdisc *q, unsigned long cl, int event);
+
+/*
+
+   Short review.
+   -------------
+
+   This file consists of two interrelated parts:
+
+   1. queueing disciplines manager frontend.
+   2. traffic classes manager frontend.
+
+   Generally, queueing discipline ("qdisc") is a black box,
+   which is able to enqueue packets and to dequeue them (when
+   device is ready to send something) in order and at times
+   determined by algorithm hidden in it.
+
+   qdisc's are divided to two categories:
+   - "queues", which have no internal structure visible from outside.
+   - "schedulers", which split all the packets to "traffic classes",
+     using "packet classifiers" (look at cls_api.c)
+
+   In turn, classes may have child qdiscs (as rule, queues)
+   attached to them etc. etc. etc.
+
+   The goal of the routines in this file is to translate
+   information supplied by user in the form of handles
+   to more intelligible for kernel form, to make some sanity
+   checks and part of work, which is common to all qdiscs
+   and to provide rtnetlink notifications.
+
+   All real intelligent work is done inside qdisc modules.
+
+
+
+   Every discipline has two major routines: enqueue and dequeue.
+
+   ---dequeue
+
+   dequeue usually returns a skb to send. It is allowed to return NULL,
+   but it does not mean that queue is empty, it just means that
+   discipline does not want to send anything this time.
+   Queue is really empty if q->q.qlen == 0.
+   For complicated disciplines with multiple queues q->q is not
+   real packet queue, but however q->q.qlen must be valid.
+
+   ---enqueue
+
+   enqueue returns 0, if packet was enqueued successfully.
+   If packet (this one or another one) was dropped, it returns
+   not zero error code.
+   NET_XMIT_DROP 	- this packet dropped
+     Expected action: do not backoff, but wait until queue will clear.
+   NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
+     Expected action: backoff or ignore
+   NET_XMIT_POLICED	- dropped by police.
+     Expected action: backoff or error to real-time apps.
+
+   Auxiliary routines:
+
+   ---requeue
+
+   requeues once dequeued packet. It is used for non-standard or
+   just buggy devices, which can defer output even if netif_queue_stopped()=0.
+
+   ---reset
+
+   returns qdisc to initial state: purge all buffers, clear all
+   timers, counters (except for statistics) etc.
+
+   ---init
+
+   initializes newly created qdisc.
+
+   ---destroy
+
+   destroys resources allocated by init and during lifetime of qdisc.
+
+   ---change
+
+   changes qdisc parameters.
+ */
+
+/* Protects list of registered TC modules. It is pure SMP lock. */
+static DEFINE_RWLOCK(qdisc_mod_lock);
+
+
+/************************************************
+ *	Queueing disciplines manipulation.	*
+ ************************************************/
+
+
+/* The list of all installed queueing disciplines. */
+
+static struct Qdisc_ops *qdisc_base;
+
+/* Register/uregister queueing discipline */
+
+int register_qdisc(struct Qdisc_ops *qops)
+{
+	struct Qdisc_ops *q, **qp;
+	int rc = -EEXIST;
+
+	write_lock(&qdisc_mod_lock);
+	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
+		if (!strcmp(qops->id, q->id))
+			goto out;
+
+	if (qops->enqueue == NULL)
+		qops->enqueue = noop_qdisc_ops.enqueue;
+	if (qops->requeue == NULL)
+		qops->requeue = noop_qdisc_ops.requeue;
+	if (qops->dequeue == NULL)
+		qops->dequeue = noop_qdisc_ops.dequeue;
+
+	qops->next = NULL;
+	*qp = qops;
+	rc = 0;
+out:
+	write_unlock(&qdisc_mod_lock);
+	return rc;
+}
+EXPORT_SYMBOL(register_qdisc);
+
+int unregister_qdisc(struct Qdisc_ops *qops)
+{
+	struct Qdisc_ops *q, **qp;
+	int err = -ENOENT;
+
+	write_lock(&qdisc_mod_lock);
+	for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
+		if (q == qops)
+			break;
+	if (q) {
+		*qp = q->next;
+		q->next = NULL;
+		err = 0;
+	}
+	write_unlock(&qdisc_mod_lock);
+	return err;
+}
+EXPORT_SYMBOL(unregister_qdisc);
+
+/* We know handle. Find qdisc among all qdisc's attached to device
+   (root qdisc, all its children, children of children etc.)
+ */
+
+struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
+{
+	struct Qdisc *q;
+
+	if (!(root->flags & TCQ_F_BUILTIN) &&
+	    root->handle == handle)
+		return root;
+
+	list_for_each_entry(q, &root->list, list) {
+		if (q->handle == handle)
+			return q;
+	}
+	return NULL;
+}
+
+/*
+ * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen()
+ * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue()
+ */
+static DEFINE_SPINLOCK(qdisc_list_lock);
+
+static void qdisc_list_add(struct Qdisc *q)
+{
+	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+		spin_lock_bh(&qdisc_list_lock);
+		list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
+		spin_unlock_bh(&qdisc_list_lock);
+	}
+}
+
+void qdisc_list_del(struct Qdisc *q)
+{
+	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+		spin_lock_bh(&qdisc_list_lock);
+		list_del(&q->list);
+		spin_unlock_bh(&qdisc_list_lock);
+	}
+}
+EXPORT_SYMBOL(qdisc_list_del);
+
+struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
+{
+	unsigned int i;
+	struct Qdisc *q;
+
+	spin_lock_bh(&qdisc_list_lock);
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+		struct Qdisc *txq_root = txq->qdisc_sleeping;
+
+		q = qdisc_match_from_root(txq_root, handle);
+		if (q)
+			goto unlock;
+	}
+
+	q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
+
+unlock:
+	spin_unlock_bh(&qdisc_list_lock);
+
+	return q;
+}
+
+static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
+{
+	unsigned long cl;
+	struct Qdisc *leaf;
+	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
+
+	if (cops == NULL)
+		return NULL;
+	cl = cops->get(p, classid);
+
+	if (cl == 0)
+		return NULL;
+	leaf = cops->leaf(p, cl);
+	cops->put(p, cl);
+	return leaf;
+}
+
+/* Find queueing discipline by name */
+
+static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
+{
+	struct Qdisc_ops *q = NULL;
+
+	if (kind) {
+		read_lock(&qdisc_mod_lock);
+		for (q = qdisc_base; q; q = q->next) {
+			if (nla_strcmp(kind, q->id) == 0) {
+				if (!try_module_get(q->owner))
+					q = NULL;
+				break;
+			}
+		}
+		read_unlock(&qdisc_mod_lock);
+	}
+	return q;
+}
+
+static struct qdisc_rate_table *qdisc_rtab_list;
+
+struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
+{
+	struct qdisc_rate_table *rtab;
+
+	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
+		if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
+			rtab->refcnt++;
+			return rtab;
+		}
+	}
+
+	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
+	    nla_len(tab) != TC_RTAB_SIZE)
+		return NULL;
+
+	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
+	if (rtab) {
+		rtab->rate = *r;
+		rtab->refcnt = 1;
+		memcpy(rtab->data, nla_data(tab), 1024);
+		rtab->next = qdisc_rtab_list;
+		qdisc_rtab_list = rtab;
+	}
+	return rtab;
+}
+EXPORT_SYMBOL(qdisc_get_rtab);
+
+void qdisc_put_rtab(struct qdisc_rate_table *tab)
+{
+	struct qdisc_rate_table *rtab, **rtabp;
+
+	if (!tab || --tab->refcnt)
+		return;
+
+	for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
+		if (rtab == tab) {
+			*rtabp = rtab->next;
+			kfree(rtab);
+			return;
+		}
+	}
+}
+EXPORT_SYMBOL(qdisc_put_rtab);
+
+static LIST_HEAD(qdisc_stab_list);
+static DEFINE_SPINLOCK(qdisc_stab_lock);
+
+static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
+	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
+	[TCA_STAB_DATA] = { .type = NLA_BINARY },
+};
+
+static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
+{
+	struct nlattr *tb[TCA_STAB_MAX + 1];
+	struct qdisc_size_table *stab;
+	struct tc_sizespec *s;
+	unsigned int tsize = 0;
+	u16 *tab = NULL;
+	int err;
+
+	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
+	if (err < 0)
+		return ERR_PTR(err);
+	if (!tb[TCA_STAB_BASE])
+		return ERR_PTR(-EINVAL);
+
+	s = nla_data(tb[TCA_STAB_BASE]);
+
+	if (s->tsize > 0) {
+		if (!tb[TCA_STAB_DATA])
+			return ERR_PTR(-EINVAL);
+		tab = nla_data(tb[TCA_STAB_DATA]);
+		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
+	}
+
+	if (!s || tsize != s->tsize || (!tab && tsize > 0))
+		return ERR_PTR(-EINVAL);
+
+	spin_lock(&qdisc_stab_lock);
+
+	list_for_each_entry(stab, &qdisc_stab_list, list) {
+		if (memcmp(&stab->szopts, s, sizeof(*s)))
+			continue;
+		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
+			continue;
+		stab->refcnt++;
+		spin_unlock(&qdisc_stab_lock);
+		return stab;
+	}
+
+	spin_unlock(&qdisc_stab_lock);
+
+	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
+	if (!stab)
+		return ERR_PTR(-ENOMEM);
+
+	stab->refcnt = 1;
+	stab->szopts = *s;
+	if (tsize > 0)
+		memcpy(stab->data, tab, tsize * sizeof(u16));
+
+	spin_lock(&qdisc_stab_lock);
+	list_add_tail(&stab->list, &qdisc_stab_list);
+	spin_unlock(&qdisc_stab_lock);
+
+	return stab;
+}
+
+void qdisc_put_stab(struct qdisc_size_table *tab)
+{
+	if (!tab)
+		return;
+
+	spin_lock(&qdisc_stab_lock);
+
+	if (--tab->refcnt == 0) {
+		list_del(&tab->list);
+		kfree(tab);
+	}
+
+	spin_unlock(&qdisc_stab_lock);
+}
+EXPORT_SYMBOL(qdisc_put_stab);
+
+static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, TCA_STAB);
+	if (nest == NULL)
+		goto nla_put_failure;
+	NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
+	nla_nest_end(skb, nest);
+
+	return skb->len;
+
+nla_put_failure:
+	return -1;
+}
+
+void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
+{
+	int pkt_len, slot;
+
+	pkt_len = skb->len + stab->szopts.overhead;
+	if (unlikely(!stab->szopts.tsize))
+		goto out;
+
+	slot = pkt_len + stab->szopts.cell_align;
+	if (unlikely(slot < 0))
+		slot = 0;
+
+	slot >>= stab->szopts.cell_log;
+	if (likely(slot < stab->szopts.tsize))
+		pkt_len = stab->data[slot];
+	else
+		pkt_len = stab->data[stab->szopts.tsize - 1] *
+				(slot / stab->szopts.tsize) +
+				stab->data[slot % stab->szopts.tsize];
+
+	pkt_len <<= stab->szopts.size_log;
+out:
+	if (unlikely(pkt_len < 1))
+		pkt_len = 1;
+	qdisc_skb_cb(skb)->pkt_len = pkt_len;
+}
+EXPORT_SYMBOL(qdisc_calculate_pkt_len);
+
+static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
+{
+	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
+						 timer);
+
+	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+	smp_wmb();
+	__netif_schedule(qdisc_root(wd->qdisc));
+
+	return HRTIMER_NORESTART;
+}
+
+void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
+{
+	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	wd->timer.function = qdisc_watchdog;
+	wd->qdisc = qdisc;
+}
+EXPORT_SYMBOL(qdisc_watchdog_init);
+
+void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
+{
+	ktime_t time;
+
+	if (test_bit(__QDISC_STATE_DEACTIVATED,
+		     &qdisc_root_sleeping(wd->qdisc)->state))
+		return;
+
+	wd->qdisc->flags |= TCQ_F_THROTTLED;
+	time = ktime_set(0, 0);
+	time = ktime_add_ns(time, PSCHED_US2NS(expires));
+	hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
+}
+EXPORT_SYMBOL(qdisc_watchdog_schedule);
+
+void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
+{
+	hrtimer_cancel(&wd->timer);
+	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+}
+EXPORT_SYMBOL(qdisc_watchdog_cancel);
+
+static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
+{
+	unsigned int size = n * sizeof(struct hlist_head), i;
+	struct hlist_head *h;
+
+	if (size <= PAGE_SIZE)
+		h = kmalloc(size, GFP_KERNEL);
+	else
+		h = (struct hlist_head *)
+			__get_free_pages(GFP_KERNEL, get_order(size));
+
+	if (h != NULL) {
+		for (i = 0; i < n; i++)
+			INIT_HLIST_HEAD(&h[i]);
+	}
+	return h;
+}
+
+static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
+{
+	unsigned int size = n * sizeof(struct hlist_head);
+
+	if (size <= PAGE_SIZE)
+		kfree(h);
+	else
+		free_pages((unsigned long)h, get_order(size));
+}
+
+void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
+{
+	struct Qdisc_class_common *cl;
+	struct hlist_node *n, *next;
+	struct hlist_head *nhash, *ohash;
+	unsigned int nsize, nmask, osize;
+	unsigned int i, h;
+
+	/* Rehash when load factor exceeds 0.75 */
+	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
+		return;
+	nsize = clhash->hashsize * 2;
+	nmask = nsize - 1;
+	nhash = qdisc_class_hash_alloc(nsize);
+	if (nhash == NULL)
+		return;
+
+	ohash = clhash->hash;
+	osize = clhash->hashsize;
+
+	sch_tree_lock(sch);
+	for (i = 0; i < osize; i++) {
+		hlist_for_each_entry_safe(cl, n, next, &ohash[i], hnode) {
+			h = qdisc_class_hash(cl->classid, nmask);
+			hlist_add_head(&cl->hnode, &nhash[h]);
+		}
+	}
+	clhash->hash     = nhash;
+	clhash->hashsize = nsize;
+	clhash->hashmask = nmask;
+	sch_tree_unlock(sch);
+
+	qdisc_class_hash_free(ohash, osize);
+}
+EXPORT_SYMBOL(qdisc_class_hash_grow);
+
+int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
+{
+	unsigned int size = 4;
+
+	clhash->hash = qdisc_class_hash_alloc(size);
+	if (clhash->hash == NULL)
+		return -ENOMEM;
+	clhash->hashsize  = size;
+	clhash->hashmask  = size - 1;
+	clhash->hashelems = 0;
+	return 0;
+}
+EXPORT_SYMBOL(qdisc_class_hash_init);
+
+void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
+{
+	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
+}
+EXPORT_SYMBOL(qdisc_class_hash_destroy);
+
+void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
+			     struct Qdisc_class_common *cl)
+{
+	unsigned int h;
+
+	INIT_HLIST_NODE(&cl->hnode);
+	h = qdisc_class_hash(cl->classid, clhash->hashmask);
+	hlist_add_head(&cl->hnode, &clhash->hash[h]);
+	clhash->hashelems++;
+}
+EXPORT_SYMBOL(qdisc_class_hash_insert);
+
+void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
+			     struct Qdisc_class_common *cl)
+{
+	hlist_del(&cl->hnode);
+	clhash->hashelems--;
+}
+EXPORT_SYMBOL(qdisc_class_hash_remove);
+
+/* Allocate an unique handle from space managed by kernel */
+
+static u32 qdisc_alloc_handle(struct net_device *dev)
+{
+	int i = 0x10000;
+	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
+
+	do {
+		autohandle += TC_H_MAKE(0x10000U, 0);
+		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
+			autohandle = TC_H_MAKE(0x80000000U, 0);
+	} while	(qdisc_lookup(dev, autohandle) && --i > 0);
+
+	return i>0 ? autohandle : 0;
+}
+
+/* Attach toplevel qdisc to device queue. */
+
+static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
+				     struct Qdisc *qdisc)
+{
+	struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
+	spinlock_t *root_lock;
+
+	root_lock = qdisc_lock(oqdisc);
+	spin_lock_bh(root_lock);
+
+	/* Prune old scheduler */
+	if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
+		qdisc_reset(oqdisc);
+
+	/* ... and graft new one */
+	if (qdisc == NULL)
+		qdisc = &noop_qdisc;
+	dev_queue->qdisc_sleeping = qdisc;
+	rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
+
+	spin_unlock_bh(root_lock);
+
+	return oqdisc;
+}
+
+void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
+{
+	const struct Qdisc_class_ops *cops;
+	unsigned long cl;
+	u32 parentid;
+
+	if (n == 0)
+		return;
+	while ((parentid = sch->parent)) {
+		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
+			return;
+
+		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
+		if (sch == NULL) {
+			WARN_ON(parentid != TC_H_ROOT);
+			return;
+		}
+		cops = sch->ops->cl_ops;
+		if (cops->qlen_notify) {
+			cl = cops->get(sch, parentid);
+			cops->qlen_notify(sch, cl);
+			cops->put(sch, cl);
+		}
+		sch->q.qlen -= n;
+	}
+}
+EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
+
+static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
+			       struct Qdisc *old, struct Qdisc *new)
+{
+	if (new || old)
+		qdisc_notify(skb, n, clid, old, new);
+
+	if (old)
+		qdisc_destroy(old);
+}
+
+/* Graft qdisc "new" to class "classid" of qdisc "parent" or
+ * to device "dev".
+ *
+ * When appropriate send a netlink notification using 'skb'
+ * and "n".
+ *
+ * On success, destroy old qdisc.
+ */
+
+static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
+		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
+		       struct Qdisc *new, struct Qdisc *old)
+{
+	struct Qdisc *q = old;
+	int err = 0;
+
+	if (parent == NULL) {
+		unsigned int i, num_q, ingress;
+
+		ingress = 0;
+		num_q = dev->num_tx_queues;
+		if ((q && q->flags & TCQ_F_INGRESS) ||
+		    (new && new->flags & TCQ_F_INGRESS)) {
+			num_q = 1;
+			ingress = 1;
+		}
+
+		if (dev->flags & IFF_UP)
+			dev_deactivate(dev);
+
+		for (i = 0; i < num_q; i++) {
+			struct netdev_queue *dev_queue = &dev->rx_queue;
+
+			if (!ingress)
+				dev_queue = netdev_get_tx_queue(dev, i);
+
+			old = dev_graft_qdisc(dev_queue, new);
+			if (new && i > 0)
+				atomic_inc(&new->refcnt);
+
+			notify_and_destroy(skb, n, classid, old, new);
+		}
+
+		if (dev->flags & IFF_UP)
+			dev_activate(dev);
+	} else {
+		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
+
+		err = -EINVAL;
+
+		if (cops) {
+			unsigned long cl = cops->get(parent, classid);
+			if (cl) {
+				err = cops->graft(parent, cl, new, &old);
+				cops->put(parent, cl);
+			}
+		}
+		if (!err)
+			notify_and_destroy(skb, n, classid, old, new);
+	}
+	return err;
+}
+
+/* lockdep annotation is needed for ingress; egress gets it only for name */
+static struct lock_class_key qdisc_tx_lock;
+static struct lock_class_key qdisc_rx_lock;
+
+/*
+   Allocate and initialize new qdisc.
+
+   Parameters are passed via opt.
+ */
+
+static struct Qdisc *
+qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
+	     u32 parent, u32 handle, struct nlattr **tca, int *errp)
+{
+	int err;
+	struct nlattr *kind = tca[TCA_KIND];
+	struct Qdisc *sch;
+	struct Qdisc_ops *ops;
+	struct qdisc_size_table *stab;
+
+	ops = qdisc_lookup_ops(kind);
+#ifdef CONFIG_MODULES
+	if (ops == NULL && kind != NULL) {
+		char name[IFNAMSIZ];
+		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
+			/* We dropped the RTNL semaphore in order to
+			 * perform the module load.  So, even if we
+			 * succeeded in loading the module we have to
+			 * tell the caller to replay the request.  We
+			 * indicate this using -EAGAIN.
+			 * We replay the request because the device may
+			 * go away in the mean time.
+			 */
+			rtnl_unlock();
+			request_module("sch_%s", name);
+			rtnl_lock();
+			ops = qdisc_lookup_ops(kind);
+			if (ops != NULL) {
+				/* We will try again qdisc_lookup_ops,
+				 * so don't keep a reference.
+				 */
+				module_put(ops->owner);
+				err = -EAGAIN;
+				goto err_out;
+			}
+		}
+	}
+#endif
+
+	err = -ENOENT;
+	if (ops == NULL)
+		goto err_out;
+
+	sch = qdisc_alloc(dev_queue, ops);
+	if (IS_ERR(sch)) {
+		err = PTR_ERR(sch);
+		goto err_out2;
+	}
+
+	sch->parent = parent;
+
+	if (handle == TC_H_INGRESS) {
+		sch->flags |= TCQ_F_INGRESS;
+		handle = TC_H_MAKE(TC_H_INGRESS, 0);
+		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
+	} else {
+		if (handle == 0) {
+			handle = qdisc_alloc_handle(dev);
+			err = -ENOMEM;
+			if (handle == 0)
+				goto err_out3;
+		}
+		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
+	}
+
+	sch->handle = handle;
+
+	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
+		if (tca[TCA_STAB]) {
+			stab = qdisc_get_stab(tca[TCA_STAB]);
+			if (IS_ERR(stab)) {
+				err = PTR_ERR(stab);
+				goto err_out3;
+			}
+			sch->stab = stab;
+		}
+		if (tca[TCA_RATE]) {
+			spinlock_t *root_lock;
+
+			if ((sch->parent != TC_H_ROOT) &&
+			    !(sch->flags & TCQ_F_INGRESS))
+				root_lock = qdisc_root_sleeping_lock(sch);
+			else
+				root_lock = qdisc_lock(sch);
+
+			err = gen_new_estimator(&sch->bstats, &sch->rate_est,
+						root_lock, tca[TCA_RATE]);
+			if (err) {
+				/*
+				 * Any broken qdiscs that would require
+				 * a ops->reset() here? The qdisc was never
+				 * in action so it shouldn't be necessary.
+				 */
+				if (ops->destroy)
+					ops->destroy(sch);
+				goto err_out3;
+			}
+		}
+
+		qdisc_list_add(sch);
+
+		return sch;
+	}
+err_out3:
+	qdisc_put_stab(sch->stab);
+	dev_put(dev);
+	kfree((char *) sch - sch->padded);
+err_out2:
+	module_put(ops->owner);
+err_out:
+	*errp = err;
+	return NULL;
+}
+
+static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
+{
+	struct qdisc_size_table *stab = NULL;
+	int err = 0;
+
+	if (tca[TCA_OPTIONS]) {
+		if (sch->ops->change == NULL)
+			return -EINVAL;
+		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
+		if (err)
+			return err;
+	}
+
+	if (tca[TCA_STAB]) {
+		stab = qdisc_get_stab(tca[TCA_STAB]);
+		if (IS_ERR(stab))
+			return PTR_ERR(stab);
+	}
+
+	qdisc_put_stab(sch->stab);
+	sch->stab = stab;
+
+	if (tca[TCA_RATE])
+		gen_replace_estimator(&sch->bstats, &sch->rate_est,
+				      qdisc_root_sleeping_lock(sch),
+				      tca[TCA_RATE]);
+	return 0;
+}
+
+struct check_loop_arg
+{
+	struct qdisc_walker 	w;
+	struct Qdisc		*p;
+	int			depth;
+};
+
+static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
+
+static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
+{
+	struct check_loop_arg	arg;
+
+	if (q->ops->cl_ops == NULL)
+		return 0;
+
+	arg.w.stop = arg.w.skip = arg.w.count = 0;
+	arg.w.fn = check_loop_fn;
+	arg.depth = depth;
+	arg.p = p;
+	q->ops->cl_ops->walk(q, &arg.w);
+	return arg.w.stop ? -ELOOP : 0;
+}
+
+static int
+check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
+{
+	struct Qdisc *leaf;
+	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+	struct check_loop_arg *arg = (struct check_loop_arg *)w;
+
+	leaf = cops->leaf(q, cl);
+	if (leaf) {
+		if (leaf == arg->p || arg->depth > 7)
+			return -ELOOP;
+		return check_loop(leaf, arg->p, arg->depth + 1);
+	}
+	return 0;
+}
+
+/*
+ * Delete/get qdisc.
+ */
+
+static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct tcmsg *tcm = NLMSG_DATA(n);
+	struct nlattr *tca[TCA_MAX + 1];
+	struct net_device *dev;
+	u32 clid = tcm->tcm_parent;
+	struct Qdisc *q = NULL;
+	struct Qdisc *p = NULL;
+	int err;
+
+	if (net != &init_net)
+		return -EINVAL;
+
+	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+		return -ENODEV;
+
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+	if (err < 0)
+		return err;
+
+	if (clid) {
+		if (clid != TC_H_ROOT) {
+			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
+				if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+					return -ENOENT;
+				q = qdisc_leaf(p, clid);
+			} else { /* ingress */
+				q = dev->rx_queue.qdisc_sleeping;
+			}
+		} else {
+			struct netdev_queue *dev_queue;
+			dev_queue = netdev_get_tx_queue(dev, 0);
+			q = dev_queue->qdisc_sleeping;
+		}
+		if (!q)
+			return -ENOENT;
+
+		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
+			return -EINVAL;
+	} else {
+		if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+			return -ENOENT;
+	}
+
+	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
+		return -EINVAL;
+
+	if (n->nlmsg_type == RTM_DELQDISC) {
+		if (!clid)
+			return -EINVAL;
+		if (q->handle == 0)
+			return -ENOENT;
+		if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
+			return err;
+	} else {
+		qdisc_notify(skb, n, clid, NULL, q);
+	}
+	return 0;
+}
+
+/*
+   Create/change qdisc.
+ */
+
+static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct tcmsg *tcm;
+	struct nlattr *tca[TCA_MAX + 1];
+	struct net_device *dev;
+	u32 clid;
+	struct Qdisc *q, *p;
+	int err;
+
+	if (net != &init_net)
+		return -EINVAL;
+
+replay:
+	/* Reinit, just in case something touches this. */
+	tcm = NLMSG_DATA(n);
+	clid = tcm->tcm_parent;
+	q = p = NULL;
+
+	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+		return -ENODEV;
+
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+	if (err < 0)
+		return err;
+
+	if (clid) {
+		if (clid != TC_H_ROOT) {
+			if (clid != TC_H_INGRESS) {
+				if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+					return -ENOENT;
+				q = qdisc_leaf(p, clid);
+			} else { /*ingress */
+				q = dev->rx_queue.qdisc_sleeping;
+			}
+		} else {
+			struct netdev_queue *dev_queue;
+			dev_queue = netdev_get_tx_queue(dev, 0);
+			q = dev_queue->qdisc_sleeping;
+		}
+
+		/* It may be default qdisc, ignore it */
+		if (q && q->handle == 0)
+			q = NULL;
+
+		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
+			if (tcm->tcm_handle) {
+				if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
+					return -EEXIST;
+				if (TC_H_MIN(tcm->tcm_handle))
+					return -EINVAL;
+				if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+					goto create_n_graft;
+				if (n->nlmsg_flags&NLM_F_EXCL)
+					return -EEXIST;
+				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
+					return -EINVAL;
+				if (q == p ||
+				    (p && check_loop(q, p, 0)))
+					return -ELOOP;
+				atomic_inc(&q->refcnt);
+				goto graft;
+			} else {
+				if (q == NULL)
+					goto create_n_graft;
+
+				/* This magic test requires explanation.
+				 *
+				 *   We know, that some child q is already
+				 *   attached to this parent and have choice:
+				 *   either to change it or to create/graft new one.
+				 *
+				 *   1. We are allowed to create/graft only
+				 *   if CREATE and REPLACE flags are set.
+				 *
+				 *   2. If EXCL is set, requestor wanted to say,
+				 *   that qdisc tcm_handle is not expected
+				 *   to exist, so that we choose create/graft too.
+				 *
+				 *   3. The last case is when no flags are set.
+				 *   Alas, it is sort of hole in API, we
+				 *   cannot decide what to do unambiguously.
+				 *   For now we select create/graft, if
+				 *   user gave KIND, which does not match existing.
+				 */
+				if ((n->nlmsg_flags&NLM_F_CREATE) &&
+				    (n->nlmsg_flags&NLM_F_REPLACE) &&
+				    ((n->nlmsg_flags&NLM_F_EXCL) ||
+				     (tca[TCA_KIND] &&
+				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
+					goto create_n_graft;
+			}
+		}
+	} else {
+		if (!tcm->tcm_handle)
+			return -EINVAL;
+		q = qdisc_lookup(dev, tcm->tcm_handle);
+	}
+
+	/* Change qdisc parameters */
+	if (q == NULL)
+		return -ENOENT;
+	if (n->nlmsg_flags&NLM_F_EXCL)
+		return -EEXIST;
+	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
+		return -EINVAL;
+	err = qdisc_change(q, tca);
+	if (err == 0)
+		qdisc_notify(skb, n, clid, NULL, q);
+	return err;
+
+create_n_graft:
+	if (!(n->nlmsg_flags&NLM_F_CREATE))
+		return -ENOENT;
+	if (clid == TC_H_INGRESS)
+		q = qdisc_create(dev, &dev->rx_queue,
+				 tcm->tcm_parent, tcm->tcm_parent,
+				 tca, &err);
+	else
+		q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
+				 tcm->tcm_parent, tcm->tcm_handle,
+				 tca, &err);
+	if (q == NULL) {
+		if (err == -EAGAIN)
+			goto replay;
+		return err;
+	}
+
+graft:
+	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
+	if (err) {
+		if (q)
+			qdisc_destroy(q);
+		return err;
+	}
+
+	return 0;
+}
+
+static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
+			 u32 pid, u32 seq, u16 flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct gnet_dump d;
+
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
+	tcm = NLMSG_DATA(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm__pad1 = 0;
+	tcm->tcm__pad2 = 0;
+	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+	tcm->tcm_parent = clid;
+	tcm->tcm_handle = q->handle;
+	tcm->tcm_info = atomic_read(&q->refcnt);
+	NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
+	if (q->ops->dump && q->ops->dump(q, skb) < 0)
+		goto nla_put_failure;
+	q->qstats.qlen = q->q.qlen;
+
+	if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+					 qdisc_root_sleeping_lock(q), &d) < 0)
+		goto nla_put_failure;
+
+	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
+	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+	    gnet_stats_copy_queue(&d, &q->qstats) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto nla_put_failure;
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			u32 clid, struct Qdisc *old, struct Qdisc *new)
+{
+	struct sk_buff *skb;
+	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (old && old->handle) {
+		if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
+			goto err_out;
+	}
+	if (new) {
+		if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+			goto err_out;
+	}
+
+	if (skb->len)
+		return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+
+err_out:
+	kfree_skb(skb);
+	return -EINVAL;
+}
+
+static bool tc_qdisc_dump_ignore(struct Qdisc *q)
+{
+	return (q->flags & TCQ_F_BUILTIN) ? true : false;
+}
+
+static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
+			      struct netlink_callback *cb,
+			      int *q_idx_p, int s_q_idx)
+{
+	int ret = 0, q_idx = *q_idx_p;
+	struct Qdisc *q;
+
+	if (!root)
+		return 0;
+
+	q = root;
+	if (q_idx < s_q_idx) {
+		q_idx++;
+	} else {
+		if (!tc_qdisc_dump_ignore(q) &&
+		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+			goto done;
+		q_idx++;
+	}
+	list_for_each_entry(q, &root->list, list) {
+		if (q_idx < s_q_idx) {
+			q_idx++;
+			continue;
+		}
+		if (!tc_qdisc_dump_ignore(q) && 
+		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+			goto done;
+		q_idx++;
+	}
+
+out:
+	*q_idx_p = q_idx;
+	return ret;
+done:
+	ret = -1;
+	goto out;
+}
+
+static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	int idx, q_idx;
+	int s_idx, s_q_idx;
+	struct net_device *dev;
+
+	if (net != &init_net)
+		return 0;
+
+	s_idx = cb->args[0];
+	s_q_idx = q_idx = cb->args[1];
+	read_lock(&dev_base_lock);
+	idx = 0;
+	for_each_netdev(&init_net, dev) {
+		struct netdev_queue *dev_queue;
+
+		if (idx < s_idx)
+			goto cont;
+		if (idx > s_idx)
+			s_q_idx = 0;
+		q_idx = 0;
+
+		dev_queue = netdev_get_tx_queue(dev, 0);
+		if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
+			goto done;
+
+		dev_queue = &dev->rx_queue;
+		if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
+			goto done;
+
+cont:
+		idx++;
+	}
+
+done:
+	read_unlock(&dev_base_lock);
+
+	cb->args[0] = idx;
+	cb->args[1] = q_idx;
+
+	return skb->len;
+}
+
+
+
+/************************************************
+ *	Traffic classes manipulation.		*
+ ************************************************/
+
+
+
+static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+{
+	struct net *net = sock_net(skb->sk);
+	struct netdev_queue *dev_queue;
+	struct tcmsg *tcm = NLMSG_DATA(n);
+	struct nlattr *tca[TCA_MAX + 1];
+	struct net_device *dev;
+	struct Qdisc *q = NULL;
+	const struct Qdisc_class_ops *cops;
+	unsigned long cl = 0;
+	unsigned long new_cl;
+	u32 pid = tcm->tcm_parent;
+	u32 clid = tcm->tcm_handle;
+	u32 qid = TC_H_MAJ(clid);
+	int err;
+
+	if (net != &init_net)
+		return -EINVAL;
+
+	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+		return -ENODEV;
+
+	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
+	if (err < 0)
+		return err;
+
+	/*
+	   parent == TC_H_UNSPEC - unspecified parent.
+	   parent == TC_H_ROOT   - class is root, which has no parent.
+	   parent == X:0	 - parent is root class.
+	   parent == X:Y	 - parent is a node in hierarchy.
+	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
+
+	   handle == 0:0	 - generate handle from kernel pool.
+	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
+	   handle == X:Y	 - clear.
+	   handle == X:0	 - root class.
+	 */
+
+	/* Step 1. Determine qdisc handle X:0 */
+
+	dev_queue = netdev_get_tx_queue(dev, 0);
+	if (pid != TC_H_ROOT) {
+		u32 qid1 = TC_H_MAJ(pid);
+
+		if (qid && qid1) {
+			/* If both majors are known, they must be identical. */
+			if (qid != qid1)
+				return -EINVAL;
+		} else if (qid1) {
+			qid = qid1;
+		} else if (qid == 0)
+			qid = dev_queue->qdisc_sleeping->handle;
+
+		/* Now qid is genuine qdisc handle consistent
+		   both with parent and child.
+
+		   TC_H_MAJ(pid) still may be unspecified, complete it now.
+		 */
+		if (pid)
+			pid = TC_H_MAKE(qid, pid);
+	} else {
+		if (qid == 0)
+			qid = dev_queue->qdisc_sleeping->handle;
+	}
+
+	/* OK. Locate qdisc */
+	if ((q = qdisc_lookup(dev, qid)) == NULL)
+		return -ENOENT;
+
+	/* An check that it supports classes */
+	cops = q->ops->cl_ops;
+	if (cops == NULL)
+		return -EINVAL;
+
+	/* Now try to get class */
+	if (clid == 0) {
+		if (pid == TC_H_ROOT)
+			clid = qid;
+	} else
+		clid = TC_H_MAKE(qid, clid);
+
+	if (clid)
+		cl = cops->get(q, clid);
+
+	if (cl == 0) {
+		err = -ENOENT;
+		if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
+			goto out;
+	} else {
+		switch (n->nlmsg_type) {
+		case RTM_NEWTCLASS:
+			err = -EEXIST;
+			if (n->nlmsg_flags&NLM_F_EXCL)
+				goto out;
+			break;
+		case RTM_DELTCLASS:
+			err = cops->delete(q, cl);
+			if (err == 0)
+				tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
+			goto out;
+		case RTM_GETTCLASS:
+			err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
+			goto out;
+		default:
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	new_cl = cl;
+	err = cops->change(q, clid, pid, tca, &new_cl);
+	if (err == 0)
+		tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
+
+out:
+	if (cl)
+		cops->put(q, cl);
+
+	return err;
+}
+
+
+static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
+			  unsigned long cl,
+			  u32 pid, u32 seq, u16 flags, int event)
+{
+	struct tcmsg *tcm;
+	struct nlmsghdr  *nlh;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct gnet_dump d;
+	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
+
+	nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
+	tcm = NLMSG_DATA(nlh);
+	tcm->tcm_family = AF_UNSPEC;
+	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+	tcm->tcm_parent = q->handle;
+	tcm->tcm_handle = q->handle;
+	tcm->tcm_info = 0;
+	NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
+	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
+					 qdisc_root_sleeping_lock(q), &d) < 0)
+		goto nla_put_failure;
+
+	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
+		goto nla_put_failure;
+
+	if (gnet_stats_finish_copy(&d) < 0)
+		goto nla_put_failure;
+
+	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
+			  struct Qdisc *q, unsigned long cl, int event)
+{
+	struct sk_buff *skb;
+	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOBUFS;
+
+	if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+}
+
+struct qdisc_dump_args
+{
+	struct qdisc_walker w;
+	struct sk_buff *skb;
+	struct netlink_callback *cb;
+};
+
+static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
+{
+	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
+
+	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
+			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
+}
+
+static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
+				struct tcmsg *tcm, struct netlink_callback *cb,
+				int *t_p, int s_t)
+{
+	struct qdisc_dump_args arg;
+
+	if (tc_qdisc_dump_ignore(q) ||
+	    *t_p < s_t || !q->ops->cl_ops ||
+	    (tcm->tcm_parent &&
+	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
+		(*t_p)++;
+		return 0;
+	}
+	if (*t_p > s_t)
+		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
+	arg.w.fn = qdisc_class_dump;
+	arg.skb = skb;
+	arg.cb = cb;
+	arg.w.stop  = 0;
+	arg.w.skip = cb->args[1];
+	arg.w.count = 0;
+	q->ops->cl_ops->walk(q, &arg.w);
+	cb->args[1] = arg.w.count;
+	if (arg.w.stop)
+		return -1;
+	(*t_p)++;
+	return 0;
+}
+
+static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
+			       struct tcmsg *tcm, struct netlink_callback *cb,
+			       int *t_p, int s_t)
+{
+	struct Qdisc *q;
+
+	if (!root)
+		return 0;
+
+	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
+		return -1;
+
+	list_for_each_entry(q, &root->list, list) {
+		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+	struct net *net = sock_net(skb->sk);
+	struct netdev_queue *dev_queue;
+	struct net_device *dev;
+	int t, s_t;
+
+	if (net != &init_net)
+		return 0;
+
+	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+		return 0;
+	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+		return 0;
+
+	s_t = cb->args[0];
+	t = 0;
+
+	dev_queue = netdev_get_tx_queue(dev, 0);
+	if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
+		goto done;
+
+	dev_queue = &dev->rx_queue;
+	if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
+		goto done;
+
+done:
+	cb->args[0] = t;
+
+	dev_put(dev);
+	return skb->len;
+}
+
+/* Main classifier routine: scans classifier chain attached
+   to this qdisc, (optionally) tests for protocol and asks
+   specific classifiers.
+ */
+int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
+		       struct tcf_result *res)
+{
+	__be16 protocol = skb->protocol;
+	int err = 0;
+
+	for (; tp; tp = tp->next) {
+		if ((tp->protocol == protocol ||
+		     tp->protocol == htons(ETH_P_ALL)) &&
+		    (err = tp->classify(skb, tp, res)) >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+			if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
+				skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
+#endif
+			return err;
+		}
+	}
+	return -1;
+}
+EXPORT_SYMBOL(tc_classify_compat);
+
+int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
+		struct tcf_result *res)
+{
+	int err = 0;
+	__be16 protocol;
+#ifdef CONFIG_NET_CLS_ACT
+	struct tcf_proto *otp = tp;
+reclassify:
+#endif
+	protocol = skb->protocol;
+
+	err = tc_classify_compat(skb, tp, res);
+#ifdef CONFIG_NET_CLS_ACT
+	if (err == TC_ACT_RECLASSIFY) {
+		u32 verd = G_TC_VERD(skb->tc_verd);
+		tp = otp;
+
+		if (verd++ >= MAX_REC_LOOP) {
+			printk("rule prio %u protocol %02x reclassify loop, "
+			       "packet dropped\n",
+			       tp->prio&0xffff, ntohs(tp->protocol));
+			return TC_ACT_SHOT;
+		}
+		skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
+		goto reclassify;
+	}
+#endif
+	return err;
+}
+EXPORT_SYMBOL(tc_classify);
+
+void tcf_destroy(struct tcf_proto *tp)
+{
+	tp->ops->destroy(tp);
+	module_put(tp->ops->owner);
+	kfree(tp);
+}
+
+void tcf_destroy_chain(struct tcf_proto **fl)
+{
+	struct tcf_proto *tp;
+
+	while ((tp = *fl) != NULL) {
+		*fl = tp->next;
+		tcf_destroy(tp);
+	}
+}
+EXPORT_SYMBOL(tcf_destroy_chain);
+
+#ifdef CONFIG_PROC_FS
+static int psched_show(struct seq_file *seq, void *v)
+{
+	struct timespec ts;
+
+	hrtimer_get_res(CLOCK_MONOTONIC, &ts);
+	seq_printf(seq, "%08x %08x %08x %08x\n",
+		   (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
+		   1000000,
+		   (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
+
+	return 0;
+}
+
+static int psched_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, psched_show, PDE(inode)->data);
+}
+
+static const struct file_operations psched_fops = {
+	.owner = THIS_MODULE,
+	.open = psched_open,
+	.read  = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+#endif
+
+static int __init pktsched_init(void)
+{
+	register_qdisc(&pfifo_qdisc_ops);
+	register_qdisc(&bfifo_qdisc_ops);
+	proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
+
+	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
+	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
+	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
+
+	return 0;
+}
+
+subsys_initcall(pktsched_init);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
new file mode 100644
index 0000000..43d3725
--- /dev/null
+++ b/net/sched/sch_atm.c
@@ -0,0 +1,718 @@
+/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */
+
+/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/atmdev.h>
+#include <linux/atmclip.h>
+#include <linux/rtnetlink.h>
+#include <linux/file.h>		/* for fput */
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+extern struct socket *sockfd_lookup(int fd, int *err);	/* @@@ fix this */
+
+/*
+ * The ATM queuing discipline provides a framework for invoking classifiers
+ * (aka "filters"), which in turn select classes of this queuing discipline.
+ * Each class maps the flow(s) it is handling to a given VC. Multiple classes
+ * may share the same VC.
+ *
+ * When creating a class, VCs are specified by passing the number of the open
+ * socket descriptor by which the calling process references the VC. The kernel
+ * keeps the VC open at least until all classes using it are removed.
+ *
+ * In this file, most functions are named atm_tc_* to avoid confusion with all
+ * the atm_* in net/atm. This naming convention differs from what's used in the
+ * rest of net/sched.
+ *
+ * Known bugs:
+ *  - sometimes messes up the IP stack
+ *  - any manipulations besides the few operations described in the README, are
+ *    untested and likely to crash the system
+ *  - should lock the flow while there is data in the queue (?)
+ */
+
+#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
+
+struct atm_flow_data {
+	struct Qdisc		*q;	/* FIFO, TBF, etc. */
+	struct tcf_proto	*filter_list;
+	struct atm_vcc		*vcc;	/* VCC; NULL if VCC is closed */
+	void			(*old_pop)(struct atm_vcc *vcc,
+					   struct sk_buff *skb); /* chaining */
+	struct atm_qdisc_data	*parent;	/* parent qdisc */
+	struct socket		*sock;		/* for closing */
+	u32			classid;	/* x:y type ID */
+	int			ref;		/* reference count */
+	struct gnet_stats_basic	bstats;
+	struct gnet_stats_queue	qstats;
+	struct atm_flow_data	*next;
+	struct atm_flow_data	*excess;	/* flow for excess traffic;
+						   NULL to set CLP instead */
+	int			hdr_len;
+	unsigned char		hdr[0];		/* header data; MUST BE LAST */
+};
+
+struct atm_qdisc_data {
+	struct atm_flow_data	link;		/* unclassified skbs go here */
+	struct atm_flow_data	*flows;		/* NB: "link" is also on this
+						   list */
+	struct tasklet_struct	task;		/* requeue tasklet */
+};
+
+/* ------------------------- Class/flow operations ------------------------- */
+
+static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow)
+{
+	struct atm_flow_data *walk;
+
+	pr_debug("find_flow(qdisc %p,flow %p)\n", qdisc, flow);
+	for (walk = qdisc->flows; walk; walk = walk->next)
+		if (walk == flow)
+			return 1;
+	pr_debug("find_flow: not found\n");
+	return 0;
+}
+
+static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow;
+
+	for (flow = p->flows; flow; flow = flow->next)
+		if (flow->classid == classid)
+			break;
+	return flow;
+}
+
+static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
+			struct Qdisc *new, struct Qdisc **old)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
+
+	pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
+		sch, p, flow, new, old);
+	if (!find_flow(p, flow))
+		return -EINVAL;
+	if (!new)
+		new = &noop_qdisc;
+	*old = xchg(&flow->q, new);
+	if (*old)
+		qdisc_reset(*old);
+	return 0;
+}
+
+static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
+{
+	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
+
+	pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow);
+	return flow ? flow->q : NULL;
+}
+
+static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid)
+{
+	struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
+	struct atm_flow_data *flow;
+
+	pr_debug("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid);
+	flow = lookup_flow(sch, classid);
+	if (flow)
+		flow->ref++;
+	pr_debug("atm_tc_get: flow %p\n", flow);
+	return (unsigned long)flow;
+}
+
+static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
+					unsigned long parent, u32 classid)
+{
+	return atm_tc_get(sch, classid);
+}
+
+/*
+ * atm_tc_put handles all destructions, including the ones that are explicitly
+ * requested (atm_tc_destroy, etc.). The assumption here is that we never drop
+ * anything that still seems to be in use.
+ */
+static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
+	struct atm_flow_data **prev;
+
+	pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
+	if (--flow->ref)
+		return;
+	pr_debug("atm_tc_put: destroying\n");
+	for (prev = &p->flows; *prev; prev = &(*prev)->next)
+		if (*prev == flow)
+			break;
+	if (!*prev) {
+		printk(KERN_CRIT "atm_tc_put: class %p not found\n", flow);
+		return;
+	}
+	*prev = flow->next;
+	pr_debug("atm_tc_put: qdisc %p\n", flow->q);
+	qdisc_destroy(flow->q);
+	tcf_destroy_chain(&flow->filter_list);
+	if (flow->sock) {
+		pr_debug("atm_tc_put: f_count %ld\n",
+			file_count(flow->sock->file));
+		flow->vcc->pop = flow->old_pop;
+		sockfd_put(flow->sock);
+	}
+	if (flow->excess)
+		atm_tc_put(sch, (unsigned long)flow->excess);
+	if (flow != &p->link)
+		kfree(flow);
+	/*
+	 * If flow == &p->link, the qdisc no longer works at this point and
+	 * needs to be removed. (By the caller of atm_tc_put.)
+	 */
+}
+
+static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+	struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent;
+
+	pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p);
+	VCC2FLOW(vcc)->old_pop(vcc, skb);
+	tasklet_schedule(&p->task);
+}
+
+static const u8 llc_oui_ip[] = {
+	0xaa,			/* DSAP: non-ISO */
+	0xaa,			/* SSAP: non-ISO */
+	0x03,			/* Ctrl: Unnumbered Information Command PDU */
+	0x00,			/* OUI: EtherType */
+	0x00, 0x00,
+	0x08, 0x00
+};				/* Ethertype IP (0800) */
+
+static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = {
+	[TCA_ATM_FD]		= { .type = NLA_U32 },
+	[TCA_ATM_EXCESS]	= { .type = NLA_U32 },
+};
+
+static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
+			 struct nlattr **tca, unsigned long *arg)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *)*arg;
+	struct atm_flow_data *excess = NULL;
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_ATM_MAX + 1];
+	struct socket *sock;
+	int fd, error, hdr_len;
+	void *hdr;
+
+	pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
+		"flow %p,opt %p)\n", sch, p, classid, parent, flow, opt);
+	/*
+	 * The concept of parents doesn't apply for this qdisc.
+	 */
+	if (parent && parent != TC_H_ROOT && parent != sch->handle)
+		return -EINVAL;
+	/*
+	 * ATM classes cannot be changed. In order to change properties of the
+	 * ATM connection, that socket needs to be modified directly (via the
+	 * native ATM API. In order to send a flow to a different VC, the old
+	 * class needs to be removed and a new one added. (This may be changed
+	 * later.)
+	 */
+	if (flow)
+		return -EBUSY;
+	if (opt == NULL)
+		return -EINVAL;
+
+	error = nla_parse_nested(tb, TCA_ATM_MAX, opt, atm_policy);
+	if (error < 0)
+		return error;
+
+	if (!tb[TCA_ATM_FD])
+		return -EINVAL;
+	fd = nla_get_u32(tb[TCA_ATM_FD]);
+	pr_debug("atm_tc_change: fd %d\n", fd);
+	if (tb[TCA_ATM_HDR]) {
+		hdr_len = nla_len(tb[TCA_ATM_HDR]);
+		hdr = nla_data(tb[TCA_ATM_HDR]);
+	} else {
+		hdr_len = RFC1483LLC_LEN;
+		hdr = NULL;	/* default LLC/SNAP for IP */
+	}
+	if (!tb[TCA_ATM_EXCESS])
+		excess = NULL;
+	else {
+		excess = (struct atm_flow_data *)
+			atm_tc_get(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
+		if (!excess)
+			return -ENOENT;
+	}
+	pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n",
+		 opt->nla_type, nla_len(opt), hdr_len);
+	sock = sockfd_lookup(fd, &error);
+	if (!sock)
+		return error;	/* f_count++ */
+	pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file));
+	if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
+		error = -EPROTOTYPE;
+		goto err_out;
+	}
+	/* @@@ should check if the socket is really operational or we'll crash
+	   on vcc->send */
+	if (classid) {
+		if (TC_H_MAJ(classid ^ sch->handle)) {
+			pr_debug("atm_tc_change: classid mismatch\n");
+			error = -EINVAL;
+			goto err_out;
+		}
+		if (find_flow(p, flow)) {
+			error = -EEXIST;
+			goto err_out;
+		}
+	} else {
+		int i;
+		unsigned long cl;
+
+		for (i = 1; i < 0x8000; i++) {
+			classid = TC_H_MAKE(sch->handle, 0x8000 | i);
+			cl = atm_tc_get(sch, classid);
+			if (!cl)
+				break;
+			atm_tc_put(sch, cl);
+		}
+	}
+	pr_debug("atm_tc_change: new id %x\n", classid);
+	flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL);
+	pr_debug("atm_tc_change: flow %p\n", flow);
+	if (!flow) {
+		error = -ENOBUFS;
+		goto err_out;
+	}
+	flow->filter_list = NULL;
+	flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+				    &pfifo_qdisc_ops, classid);
+	if (!flow->q)
+		flow->q = &noop_qdisc;
+	pr_debug("atm_tc_change: qdisc %p\n", flow->q);
+	flow->sock = sock;
+	flow->vcc = ATM_SD(sock);	/* speedup */
+	flow->vcc->user_back = flow;
+	pr_debug("atm_tc_change: vcc %p\n", flow->vcc);
+	flow->old_pop = flow->vcc->pop;
+	flow->parent = p;
+	flow->vcc->pop = sch_atm_pop;
+	flow->classid = classid;
+	flow->ref = 1;
+	flow->excess = excess;
+	flow->next = p->link.next;
+	p->link.next = flow;
+	flow->hdr_len = hdr_len;
+	if (hdr)
+		memcpy(flow->hdr, hdr, hdr_len);
+	else
+		memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip));
+	*arg = (unsigned long)flow;
+	return 0;
+err_out:
+	if (excess)
+		atm_tc_put(sch, (unsigned long)excess);
+	sockfd_put(sock);
+	return error;
+}
+
+static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
+
+	pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
+	if (!find_flow(qdisc_priv(sch), flow))
+		return -EINVAL;
+	if (flow->filter_list || flow == &p->link)
+		return -EBUSY;
+	/*
+	 * Reference count must be 2: one for "keepalive" (set at class
+	 * creation), and one for the reference held when calling delete.
+	 */
+	if (flow->ref < 2) {
+		printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref);
+		return -EINVAL;
+	}
+	if (flow->ref > 2)
+		return -EBUSY;	/* catch references via excess, etc. */
+	atm_tc_put(sch, arg);
+	return 0;
+}
+
+static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow;
+
+	pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
+	if (walker->stop)
+		return;
+	for (flow = p->flows; flow; flow = flow->next) {
+		if (walker->count >= walker->skip)
+			if (walker->fn(sch, (unsigned long)flow, walker) < 0) {
+				walker->stop = 1;
+				break;
+			}
+		walker->count++;
+	}
+}
+
+static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
+
+	pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
+	return flow ? &flow->filter_list : &p->link.filter_list;
+}
+
+/* --------------------------- Qdisc operations ---------------------------- */
+
+static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow = NULL;	/* @@@ */
+	struct tcf_result res;
+	int result;
+	int ret = NET_XMIT_POLICED;
+
+	pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+	result = TC_POLICE_OK;	/* be nice to gcc */
+	if (TC_H_MAJ(skb->priority) != sch->handle ||
+	    !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority)))
+		for (flow = p->flows; flow; flow = flow->next)
+			if (flow->filter_list) {
+				result = tc_classify_compat(skb,
+							    flow->filter_list,
+							    &res);
+				if (result < 0)
+					continue;
+				flow = (struct atm_flow_data *)res.class;
+				if (!flow)
+					flow = lookup_flow(sch, res.classid);
+				break;
+			}
+	if (!flow)
+		flow = &p->link;
+	else {
+		if (flow->vcc)
+			ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
+		/*@@@ looks good ... but it's not supposed to work :-) */
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_QUEUED:
+		case TC_ACT_STOLEN:
+			kfree_skb(skb);
+			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			kfree_skb(skb);
+			goto drop;
+		case TC_POLICE_RECLASSIFY:
+			if (flow->excess)
+				flow = flow->excess;
+			else
+				ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP;
+			break;
+		}
+#endif
+	}
+
+	ret = qdisc_enqueue(skb, flow->q);
+	if (ret != 0) {
+drop: __maybe_unused
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			if (flow)
+				flow->qstats.drops++;
+		}
+		return ret;
+	}
+	sch->bstats.bytes += qdisc_pkt_len(skb);
+	sch->bstats.packets++;
+	flow->bstats.bytes += qdisc_pkt_len(skb);
+	flow->bstats.packets++;
+	/*
+	 * Okay, this may seem weird. We pretend we've dropped the packet if
+	 * it goes via ATM. The reason for this is that the outer qdisc
+	 * expects to be able to q->dequeue the packet later on if we return
+	 * success at this place. Also, sch->q.qdisc needs to reflect whether
+	 * there is a packet egligible for dequeuing or not. Note that the
+	 * statistics of the outer qdisc are necessarily wrong because of all
+	 * this. There's currently no correct solution for this.
+	 */
+	if (flow == &p->link) {
+		sch->q.qlen++;
+		return 0;
+	}
+	tasklet_schedule(&p->task);
+	return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+}
+
+/*
+ * Dequeue packets and send them over ATM. Note that we quite deliberately
+ * avoid checking net_device's flow control here, simply because sch_atm
+ * uses its own channels, which have nothing to do with any CLIP/LANE/or
+ * non-ATM interfaces.
+ */
+
+static void sch_atm_dequeue(unsigned long data)
+{
+	struct Qdisc *sch = (struct Qdisc *)data;
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow;
+	struct sk_buff *skb;
+
+	pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
+	for (flow = p->link.next; flow; flow = flow->next)
+		/*
+		 * If traffic is properly shaped, this won't generate nasty
+		 * little bursts. Otherwise, it may ... (but that's okay)
+		 */
+		while ((skb = flow->q->dequeue(flow->q))) {
+			if (!atm_may_send(flow->vcc, skb->truesize)) {
+				(void)flow->q->ops->requeue(skb, flow->q);
+				break;
+			}
+			pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
+			/* remove any LL header somebody else has attached */
+			skb_pull(skb, skb_network_offset(skb));
+			if (skb_headroom(skb) < flow->hdr_len) {
+				struct sk_buff *new;
+
+				new = skb_realloc_headroom(skb, flow->hdr_len);
+				dev_kfree_skb(skb);
+				if (!new)
+					continue;
+				skb = new;
+			}
+			pr_debug("sch_atm_dequeue: ip %p, data %p\n",
+				 skb_network_header(skb), skb->data);
+			ATM_SKB(skb)->vcc = flow->vcc;
+			memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
+			       flow->hdr_len);
+			atomic_add(skb->truesize,
+				   &sk_atm(flow->vcc)->sk_wmem_alloc);
+			/* atm.atm_options are already set by atm_tc_enqueue */
+			flow->vcc->send(flow->vcc, skb);
+		}
+}
+
+static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
+	tasklet_schedule(&p->task);
+	skb = p->link.q->dequeue(p->link.q);
+	if (skb)
+		sch->q.qlen--;
+	return skb;
+}
+
+static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	int ret;
+
+	pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+	ret = p->link.q->ops->requeue(skb, p->link.q);
+	if (!ret) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+	} else if (net_xmit_drop_count(ret)) {
+		sch->qstats.drops++;
+		p->link.qstats.drops++;
+	}
+	return ret;
+}
+
+static unsigned int atm_tc_drop(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow;
+	unsigned int len;
+
+	pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
+	for (flow = p->flows; flow; flow = flow->next)
+		if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
+			return len;
+	return 0;
+}
+
+static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+
+	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
+	p->flows = &p->link;
+	p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+				      &pfifo_qdisc_ops, sch->handle);
+	if (!p->link.q)
+		p->link.q = &noop_qdisc;
+	pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
+	p->link.filter_list = NULL;
+	p->link.vcc = NULL;
+	p->link.sock = NULL;
+	p->link.classid = sch->handle;
+	p->link.ref = 1;
+	p->link.next = NULL;
+	tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
+	return 0;
+}
+
+static void atm_tc_reset(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow;
+
+	pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
+	for (flow = p->flows; flow; flow = flow->next)
+		qdisc_reset(flow->q);
+	sch->q.qlen = 0;
+}
+
+static void atm_tc_destroy(struct Qdisc *sch)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow;
+
+	pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
+	for (flow = p->flows; flow; flow = flow->next)
+		tcf_destroy_chain(&flow->filter_list);
+
+	/* races ? */
+	while ((flow = p->flows)) {
+		if (flow->ref > 1)
+			printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
+			       flow->ref);
+		atm_tc_put(sch, (unsigned long)flow);
+		if (p->flows == flow) {
+			printk(KERN_ERR "atm_destroy: putting flow %p didn't "
+			       "kill it\n", flow);
+			p->flows = flow->next;	/* brute force */
+			break;
+		}
+	}
+	tasklet_kill(&p->task);
+}
+
+static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
+			     struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct atm_qdisc_data *p = qdisc_priv(sch);
+	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
+	struct nlattr *nest;
+
+	pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
+		sch, p, flow, skb, tcm);
+	if (!find_flow(p, flow))
+		return -EINVAL;
+	tcm->tcm_handle = flow->classid;
+	tcm->tcm_info = flow->q->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr);
+	if (flow->vcc) {
+		struct sockaddr_atmpvc pvc;
+		int state;
+
+		pvc.sap_family = AF_ATMPVC;
+		pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
+		pvc.sap_addr.vpi = flow->vcc->vpi;
+		pvc.sap_addr.vci = flow->vcc->vci;
+		NLA_PUT(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc);
+		state = ATM_VF2VS(flow->vcc->flags);
+		NLA_PUT_U32(skb, TCA_ATM_STATE, state);
+	}
+	if (flow->excess)
+		NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
+	else {
+		NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
+	}
+
+	nla_nest_end(skb, nest);
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+static int
+atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+			struct gnet_dump *d)
+{
+	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
+
+	flow->qstats.qlen = flow->q->q.qlen;
+
+	if (gnet_stats_copy_basic(d, &flow->bstats) < 0 ||
+	    gnet_stats_copy_queue(d, &flow->qstats) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	return 0;
+}
+
+static const struct Qdisc_class_ops atm_class_ops = {
+	.graft		= atm_tc_graft,
+	.leaf		= atm_tc_leaf,
+	.get		= atm_tc_get,
+	.put		= atm_tc_put,
+	.change		= atm_tc_change,
+	.delete		= atm_tc_delete,
+	.walk		= atm_tc_walk,
+	.tcf_chain	= atm_tc_find_tcf,
+	.bind_tcf	= atm_tc_bind_filter,
+	.unbind_tcf	= atm_tc_put,
+	.dump		= atm_tc_dump_class,
+	.dump_stats	= atm_tc_dump_class_stats,
+};
+
+static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
+	.cl_ops		= &atm_class_ops,
+	.id		= "atm",
+	.priv_size	= sizeof(struct atm_qdisc_data),
+	.enqueue	= atm_tc_enqueue,
+	.dequeue	= atm_tc_dequeue,
+	.requeue	= atm_tc_requeue,
+	.drop		= atm_tc_drop,
+	.init		= atm_tc_init,
+	.reset		= atm_tc_reset,
+	.destroy	= atm_tc_destroy,
+	.dump		= atm_tc_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __init atm_init(void)
+{
+	return register_qdisc(&atm_qdisc_ops);
+}
+
+static void __exit atm_exit(void)
+{
+	unregister_qdisc(&atm_qdisc_ops);
+}
+
+module_init(atm_init)
+module_exit(atm_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
new file mode 100644
index 0000000..507fb48
--- /dev/null
+++ b/net/sched/sch_blackhole.c
@@ -0,0 +1,52 @@
+/*
+ * net/sched/sch_blackhole.c	Black hole queue
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * Note: Quantum tunneling is not supported.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+
+static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	qdisc_drop(skb, sch);
+	return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *blackhole_dequeue(struct Qdisc *sch)
+{
+	return NULL;
+}
+
+static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {
+	.id		= "blackhole",
+	.priv_size	= 0,
+	.enqueue	= blackhole_enqueue,
+	.dequeue	= blackhole_dequeue,
+	.owner		= THIS_MODULE,
+};
+
+static int __init blackhole_module_init(void)
+{
+	return register_qdisc(&blackhole_qdisc_ops);
+}
+
+static void __exit blackhole_module_exit(void)
+{
+	unregister_qdisc(&blackhole_qdisc_ops);
+}
+
+module_init(blackhole_module_init)
+module_exit(blackhole_module_exit)
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
new file mode 100644
index 0000000..03e389e
--- /dev/null
+++ b/net/sched/sch_cbq.c
@@ -0,0 +1,2090 @@
+/*
+ * net/sched/sch_cbq.c	Class-Based Queueing discipline.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+
+/*	Class-Based Queueing (CBQ) algorithm.
+	=======================================
+
+	Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
+		 Management Models for Packet Networks",
+		 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
+
+		 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
+
+		 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
+		 Parameters", 1996
+
+		 [4] Sally Floyd and Michael Speer, "Experimental Results
+		 for Class-Based Queueing", 1998, not published.
+
+	-----------------------------------------------------------------------
+
+	Algorithm skeleton was taken from NS simulator cbq.cc.
+	If someone wants to check this code against the LBL version,
+	he should take into account that ONLY the skeleton was borrowed,
+	the implementation is different. Particularly:
+
+	--- The WRR algorithm is different. Our version looks more
+	reasonable (I hope) and works when quanta are allowed to be
+	less than MTU, which is always the case when real time classes
+	have small rates. Note, that the statement of [3] is
+	incomplete, delay may actually be estimated even if class
+	per-round allotment is less than MTU. Namely, if per-round
+	allotment is W*r_i, and r_1+...+r_k = r < 1
+
+	delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
+
+	In the worst case we have IntServ estimate with D = W*r+k*MTU
+	and C = MTU*r. The proof (if correct at all) is trivial.
+
+
+	--- It seems that cbq-2.0 is not very accurate. At least, I cannot
+	interpret some places, which look like wrong translations
+	from NS. Anyone is advised to find these differences
+	and explain to me, why I am wrong 8).
+
+	--- Linux has no EOI event, so that we cannot estimate true class
+	idle time. Workaround is to consider the next dequeue event
+	as sign that previous packet is finished. This is wrong because of
+	internal device queueing, but on a permanently loaded link it is true.
+	Moreover, combined with clock integrator, this scheme looks
+	very close to an ideal solution.  */
+
+struct cbq_sched_data;
+
+
+struct cbq_class
+{
+	struct Qdisc_class_common common;
+	struct cbq_class	*next_alive;	/* next class with backlog in this priority band */
+
+/* Parameters */
+	unsigned char		priority;	/* class priority */
+	unsigned char		priority2;	/* priority to be used after overlimit */
+	unsigned char		ewma_log;	/* time constant for idle time calculation */
+	unsigned char		ovl_strategy;
+#ifdef CONFIG_NET_CLS_ACT
+	unsigned char		police;
+#endif
+
+	u32			defmap;
+
+	/* Link-sharing scheduler parameters */
+	long			maxidle;	/* Class parameters: see below. */
+	long			offtime;
+	long			minidle;
+	u32			avpkt;
+	struct qdisc_rate_table	*R_tab;
+
+	/* Overlimit strategy parameters */
+	void			(*overlimit)(struct cbq_class *cl);
+	psched_tdiff_t		penalty;
+
+	/* General scheduler (WRR) parameters */
+	long			allot;
+	long			quantum;	/* Allotment per WRR round */
+	long			weight;		/* Relative allotment: see below */
+
+	struct Qdisc		*qdisc;		/* Ptr to CBQ discipline */
+	struct cbq_class	*split;		/* Ptr to split node */
+	struct cbq_class	*share;		/* Ptr to LS parent in the class tree */
+	struct cbq_class	*tparent;	/* Ptr to tree parent in the class tree */
+	struct cbq_class	*borrow;	/* NULL if class is bandwidth limited;
+						   parent otherwise */
+	struct cbq_class	*sibling;	/* Sibling chain */
+	struct cbq_class	*children;	/* Pointer to children chain */
+
+	struct Qdisc		*q;		/* Elementary queueing discipline */
+
+
+/* Variables */
+	unsigned char		cpriority;	/* Effective priority */
+	unsigned char		delayed;
+	unsigned char		level;		/* level of the class in hierarchy:
+						   0 for leaf classes, and maximal
+						   level of children + 1 for nodes.
+						 */
+
+	psched_time_t		last;		/* Last end of service */
+	psched_time_t		undertime;
+	long			avgidle;
+	long			deficit;	/* Saved deficit for WRR */
+	psched_time_t		penalized;
+	struct gnet_stats_basic bstats;
+	struct gnet_stats_queue qstats;
+	struct gnet_stats_rate_est rate_est;
+	struct tc_cbq_xstats	xstats;
+
+	struct tcf_proto	*filter_list;
+
+	int			refcnt;
+	int			filters;
+
+	struct cbq_class 	*defaults[TC_PRIO_MAX+1];
+};
+
+struct cbq_sched_data
+{
+	struct Qdisc_class_hash	clhash;			/* Hash table of all classes */
+	int			nclasses[TC_CBQ_MAXPRIO+1];
+	unsigned		quanta[TC_CBQ_MAXPRIO+1];
+
+	struct cbq_class	link;
+
+	unsigned		activemask;
+	struct cbq_class	*active[TC_CBQ_MAXPRIO+1];	/* List of all classes
+								   with backlog */
+
+#ifdef CONFIG_NET_CLS_ACT
+	struct cbq_class	*rx_class;
+#endif
+	struct cbq_class	*tx_class;
+	struct cbq_class	*tx_borrowed;
+	int			tx_len;
+	psched_time_t		now;		/* Cached timestamp */
+	psched_time_t		now_rt;		/* Cached real time */
+	unsigned		pmask;
+
+	struct hrtimer		delay_timer;
+	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
+						   started when CBQ has
+						   backlog, but cannot
+						   transmit just now */
+	psched_tdiff_t		wd_expires;
+	int			toplevel;
+	u32			hgenerator;
+};
+
+
+#define L2T(cl,len)	qdisc_l2t((cl)->R_tab,len)
+
+static __inline__ struct cbq_class *
+cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
+{
+	struct Qdisc_class_common *clc;
+
+	clc = qdisc_class_find(&q->clhash, classid);
+	if (clc == NULL)
+		return NULL;
+	return container_of(clc, struct cbq_class, common);
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+
+static struct cbq_class *
+cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
+{
+	struct cbq_class *cl, *new;
+
+	for (cl = this->tparent; cl; cl = cl->tparent)
+		if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
+			return new;
+
+	return NULL;
+}
+
+#endif
+
+/* Classify packet. The procedure is pretty complicated, but
+   it allows us to combine link sharing and priority scheduling
+   transparently.
+
+   Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
+   so that it resolves to split nodes. Then packets are classified
+   by logical priority, or a more specific classifier may be attached
+   to the split node.
+ */
+
+static struct cbq_class *
+cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *head = &q->link;
+	struct cbq_class **defmap;
+	struct cbq_class *cl = NULL;
+	u32 prio = skb->priority;
+	struct tcf_result res;
+
+	/*
+	 *  Step 1. If skb->priority points to one of our classes, use it.
+	 */
+	if (TC_H_MAJ(prio^sch->handle) == 0 &&
+	    (cl = cbq_class_lookup(q, prio)) != NULL)
+		return cl;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	for (;;) {
+		int result = 0;
+		defmap = head->defaults;
+
+		/*
+		 * Step 2+n. Apply classifier.
+		 */
+		if (!head->filter_list ||
+		    (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
+			goto fallback;
+
+		if ((cl = (void*)res.class) == NULL) {
+			if (TC_H_MAJ(res.classid))
+				cl = cbq_class_lookup(q, res.classid);
+			else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
+				cl = defmap[TC_PRIO_BESTEFFORT];
+
+			if (cl == NULL || cl->level >= head->level)
+				goto fallback;
+		}
+
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_QUEUED:
+		case TC_ACT_STOLEN:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			return NULL;
+		case TC_ACT_RECLASSIFY:
+			return cbq_reclassify(skb, cl);
+		}
+#endif
+		if (cl->level == 0)
+			return cl;
+
+		/*
+		 * Step 3+n. If classifier selected a link sharing class,
+		 *	   apply agency specific classifier.
+		 *	   Repeat this procdure until we hit a leaf node.
+		 */
+		head = cl;
+	}
+
+fallback:
+	cl = head;
+
+	/*
+	 * Step 4. No success...
+	 */
+	if (TC_H_MAJ(prio) == 0 &&
+	    !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
+	    !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
+		return head;
+
+	return cl;
+}
+
+/*
+   A packet has just been enqueued on the empty class.
+   cbq_activate_class adds it to the tail of active class list
+   of its priority band.
+ */
+
+static __inline__ void cbq_activate_class(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+	int prio = cl->cpriority;
+	struct cbq_class *cl_tail;
+
+	cl_tail = q->active[prio];
+	q->active[prio] = cl;
+
+	if (cl_tail != NULL) {
+		cl->next_alive = cl_tail->next_alive;
+		cl_tail->next_alive = cl;
+	} else {
+		cl->next_alive = cl;
+		q->activemask |= (1<<prio);
+	}
+}
+
+/*
+   Unlink class from active chain.
+   Note that this same procedure is done directly in cbq_dequeue*
+   during round-robin procedure.
+ */
+
+static void cbq_deactivate_class(struct cbq_class *this)
+{
+	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
+	int prio = this->cpriority;
+	struct cbq_class *cl;
+	struct cbq_class *cl_prev = q->active[prio];
+
+	do {
+		cl = cl_prev->next_alive;
+		if (cl == this) {
+			cl_prev->next_alive = cl->next_alive;
+			cl->next_alive = NULL;
+
+			if (cl == q->active[prio]) {
+				q->active[prio] = cl_prev;
+				if (cl == q->active[prio]) {
+					q->active[prio] = NULL;
+					q->activemask &= ~(1<<prio);
+					return;
+				}
+			}
+			return;
+		}
+	} while ((cl_prev = cl) != q->active[prio]);
+}
+
+static void
+cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
+{
+	int toplevel = q->toplevel;
+
+	if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
+		psched_time_t now;
+		psched_tdiff_t incr;
+
+		now = psched_get_time();
+		incr = now - q->now_rt;
+		now = q->now + incr;
+
+		do {
+			if (cl->undertime < now) {
+				q->toplevel = cl->level;
+				return;
+			}
+		} while ((cl=cl->borrow) != NULL && toplevel > cl->level);
+	}
+}
+
+static int
+cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	int uninitialized_var(ret);
+	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
+
+#ifdef CONFIG_NET_CLS_ACT
+	q->rx_class = cl;
+#endif
+	if (cl == NULL) {
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+
+#ifdef CONFIG_NET_CLS_ACT
+	cl->q->__parent = sch;
+#endif
+	ret = qdisc_enqueue(skb, cl->q);
+	if (ret == NET_XMIT_SUCCESS) {
+		sch->q.qlen++;
+		sch->bstats.packets++;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
+		cbq_mark_toplevel(q, cl);
+		if (!cl->next_alive)
+			cbq_activate_class(cl);
+		return ret;
+	}
+
+	if (net_xmit_drop_count(ret)) {
+		sch->qstats.drops++;
+		cbq_mark_toplevel(q, cl);
+		cl->qstats.drops++;
+	}
+	return ret;
+}
+
+static int
+cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl;
+	int ret;
+
+	if ((cl = q->tx_class) == NULL) {
+		kfree_skb(skb);
+		sch->qstats.drops++;
+		return NET_XMIT_CN;
+	}
+	q->tx_class = NULL;
+
+	cbq_mark_toplevel(q, cl);
+
+#ifdef CONFIG_NET_CLS_ACT
+	q->rx_class = cl;
+	cl->q->__parent = sch;
+#endif
+	if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+		if (!cl->next_alive)
+			cbq_activate_class(cl);
+		return 0;
+	}
+	if (net_xmit_drop_count(ret)) {
+		sch->qstats.drops++;
+		cl->qstats.drops++;
+	}
+	return ret;
+}
+
+/* Overlimit actions */
+
+/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
+
+static void cbq_ovl_classic(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+	psched_tdiff_t delay = cl->undertime - q->now;
+
+	if (!cl->delayed) {
+		delay += cl->offtime;
+
+		/*
+		   Class goes to sleep, so that it will have no
+		   chance to work avgidle. Let's forgive it 8)
+
+		   BTW cbq-2.0 has a crap in this
+		   place, apparently they forgot to shift it by cl->ewma_log.
+		 */
+		if (cl->avgidle < 0)
+			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
+		if (cl->avgidle < cl->minidle)
+			cl->avgidle = cl->minidle;
+		if (delay <= 0)
+			delay = 1;
+		cl->undertime = q->now + delay;
+
+		cl->xstats.overactions++;
+		cl->delayed = 1;
+	}
+	if (q->wd_expires == 0 || q->wd_expires > delay)
+		q->wd_expires = delay;
+
+	/* Dirty work! We must schedule wakeups based on
+	   real available rate, rather than leaf rate,
+	   which may be tiny (even zero).
+	 */
+	if (q->toplevel == TC_CBQ_MAXLEVEL) {
+		struct cbq_class *b;
+		psched_tdiff_t base_delay = q->wd_expires;
+
+		for (b = cl->borrow; b; b = b->borrow) {
+			delay = b->undertime - q->now;
+			if (delay < base_delay) {
+				if (delay <= 0)
+					delay = 1;
+				base_delay = delay;
+			}
+		}
+
+		q->wd_expires = base_delay;
+	}
+}
+
+/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
+   they go overlimit
+ */
+
+static void cbq_ovl_rclassic(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+	struct cbq_class *this = cl;
+
+	do {
+		if (cl->level > q->toplevel) {
+			cl = NULL;
+			break;
+		}
+	} while ((cl = cl->borrow) != NULL);
+
+	if (cl == NULL)
+		cl = this;
+	cbq_ovl_classic(cl);
+}
+
+/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
+
+static void cbq_ovl_delay(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+	psched_tdiff_t delay = cl->undertime - q->now;
+
+	if (test_bit(__QDISC_STATE_DEACTIVATED,
+		     &qdisc_root_sleeping(cl->qdisc)->state))
+		return;
+
+	if (!cl->delayed) {
+		psched_time_t sched = q->now;
+		ktime_t expires;
+
+		delay += cl->offtime;
+		if (cl->avgidle < 0)
+			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
+		if (cl->avgidle < cl->minidle)
+			cl->avgidle = cl->minidle;
+		cl->undertime = q->now + delay;
+
+		if (delay > 0) {
+			sched += delay + cl->penalty;
+			cl->penalized = sched;
+			cl->cpriority = TC_CBQ_MAXPRIO;
+			q->pmask |= (1<<TC_CBQ_MAXPRIO);
+
+			expires = ktime_set(0, 0);
+			expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
+			if (hrtimer_try_to_cancel(&q->delay_timer) &&
+			    ktime_to_ns(ktime_sub(
+					hrtimer_get_expires(&q->delay_timer),
+					expires)) > 0)
+				hrtimer_set_expires(&q->delay_timer, expires);
+			hrtimer_restart(&q->delay_timer);
+			cl->delayed = 1;
+			cl->xstats.overactions++;
+			return;
+		}
+		delay = 1;
+	}
+	if (q->wd_expires == 0 || q->wd_expires > delay)
+		q->wd_expires = delay;
+}
+
+/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
+
+static void cbq_ovl_lowprio(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+
+	cl->penalized = q->now + cl->penalty;
+
+	if (cl->cpriority != cl->priority2) {
+		cl->cpriority = cl->priority2;
+		q->pmask |= (1<<cl->cpriority);
+		cl->xstats.overactions++;
+	}
+	cbq_ovl_classic(cl);
+}
+
+/* TC_CBQ_OVL_DROP: penalize class by dropping */
+
+static void cbq_ovl_drop(struct cbq_class *cl)
+{
+	if (cl->q->ops->drop)
+		if (cl->q->ops->drop(cl->q))
+			cl->qdisc->q.qlen--;
+	cl->xstats.overactions++;
+	cbq_ovl_classic(cl);
+}
+
+static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
+				       psched_time_t now)
+{
+	struct cbq_class *cl;
+	struct cbq_class *cl_prev = q->active[prio];
+	psched_time_t sched = now;
+
+	if (cl_prev == NULL)
+		return 0;
+
+	do {
+		cl = cl_prev->next_alive;
+		if (now - cl->penalized > 0) {
+			cl_prev->next_alive = cl->next_alive;
+			cl->next_alive = NULL;
+			cl->cpriority = cl->priority;
+			cl->delayed = 0;
+			cbq_activate_class(cl);
+
+			if (cl == q->active[prio]) {
+				q->active[prio] = cl_prev;
+				if (cl == q->active[prio]) {
+					q->active[prio] = NULL;
+					return 0;
+				}
+			}
+
+			cl = cl_prev->next_alive;
+		} else if (sched - cl->penalized > 0)
+			sched = cl->penalized;
+	} while ((cl_prev = cl) != q->active[prio]);
+
+	return sched - now;
+}
+
+static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
+{
+	struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
+						delay_timer);
+	struct Qdisc *sch = q->watchdog.qdisc;
+	psched_time_t now;
+	psched_tdiff_t delay = 0;
+	unsigned pmask;
+
+	now = psched_get_time();
+
+	pmask = q->pmask;
+	q->pmask = 0;
+
+	while (pmask) {
+		int prio = ffz(~pmask);
+		psched_tdiff_t tmp;
+
+		pmask &= ~(1<<prio);
+
+		tmp = cbq_undelay_prio(q, prio, now);
+		if (tmp > 0) {
+			q->pmask |= 1<<prio;
+			if (tmp < delay || delay == 0)
+				delay = tmp;
+		}
+	}
+
+	if (delay) {
+		ktime_t time;
+
+		time = ktime_set(0, 0);
+		time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
+		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
+	}
+
+	sch->flags &= ~TCQ_F_THROTTLED;
+	__netif_schedule(qdisc_root(sch));
+	return HRTIMER_NORESTART;
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
+{
+	struct Qdisc *sch = child->__parent;
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl = q->rx_class;
+
+	q->rx_class = NULL;
+
+	if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
+		int ret;
+
+		cbq_mark_toplevel(q, cl);
+
+		q->rx_class = cl;
+		cl->q->__parent = sch;
+
+		ret = qdisc_enqueue(skb, cl->q);
+		if (ret == NET_XMIT_SUCCESS) {
+			sch->q.qlen++;
+			sch->bstats.packets++;
+			sch->bstats.bytes += qdisc_pkt_len(skb);
+			if (!cl->next_alive)
+				cbq_activate_class(cl);
+			return 0;
+		}
+		if (net_xmit_drop_count(ret))
+			sch->qstats.drops++;
+		return 0;
+	}
+
+	sch->qstats.drops++;
+	return -1;
+}
+#endif
+
+/*
+   It is mission critical procedure.
+
+   We "regenerate" toplevel cutoff, if transmitting class
+   has backlog and it is not regulated. It is not part of
+   original CBQ description, but looks more reasonable.
+   Probably, it is wrong. This question needs further investigation.
+*/
+
+static __inline__ void
+cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
+		    struct cbq_class *borrowed)
+{
+	if (cl && q->toplevel >= borrowed->level) {
+		if (cl->q->q.qlen > 1) {
+			do {
+				if (borrowed->undertime == PSCHED_PASTPERFECT) {
+					q->toplevel = borrowed->level;
+					return;
+				}
+			} while ((borrowed=borrowed->borrow) != NULL);
+		}
+#if 0
+	/* It is not necessary now. Uncommenting it
+	   will save CPU cycles, but decrease fairness.
+	 */
+		q->toplevel = TC_CBQ_MAXLEVEL;
+#endif
+	}
+}
+
+static void
+cbq_update(struct cbq_sched_data *q)
+{
+	struct cbq_class *this = q->tx_class;
+	struct cbq_class *cl = this;
+	int len = q->tx_len;
+
+	q->tx_class = NULL;
+
+	for ( ; cl; cl = cl->share) {
+		long avgidle = cl->avgidle;
+		long idle;
+
+		cl->bstats.packets++;
+		cl->bstats.bytes += len;
+
+		/*
+		   (now - last) is total time between packet right edges.
+		   (last_pktlen/rate) is "virtual" busy time, so that
+
+			 idle = (now - last) - last_pktlen/rate
+		 */
+
+		idle = q->now - cl->last;
+		if ((unsigned long)idle > 128*1024*1024) {
+			avgidle = cl->maxidle;
+		} else {
+			idle -= L2T(cl, len);
+
+		/* true_avgidle := (1-W)*true_avgidle + W*idle,
+		   where W=2^{-ewma_log}. But cl->avgidle is scaled:
+		   cl->avgidle == true_avgidle/W,
+		   hence:
+		 */
+			avgidle += idle - (avgidle>>cl->ewma_log);
+		}
+
+		if (avgidle <= 0) {
+			/* Overlimit or at-limit */
+
+			if (avgidle < cl->minidle)
+				avgidle = cl->minidle;
+
+			cl->avgidle = avgidle;
+
+			/* Calculate expected time, when this class
+			   will be allowed to send.
+			   It will occur, when:
+			   (1-W)*true_avgidle + W*delay = 0, i.e.
+			   idle = (1/W - 1)*(-true_avgidle)
+			   or
+			   idle = (1 - W)*(-cl->avgidle);
+			 */
+			idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
+
+			/*
+			   That is not all.
+			   To maintain the rate allocated to the class,
+			   we add to undertime virtual clock,
+			   necessary to complete transmitted packet.
+			   (len/phys_bandwidth has been already passed
+			   to the moment of cbq_update)
+			 */
+
+			idle -= L2T(&q->link, len);
+			idle += L2T(cl, len);
+
+			cl->undertime = q->now + idle;
+		} else {
+			/* Underlimit */
+
+			cl->undertime = PSCHED_PASTPERFECT;
+			if (avgidle > cl->maxidle)
+				cl->avgidle = cl->maxidle;
+			else
+				cl->avgidle = avgidle;
+		}
+		cl->last = q->now;
+	}
+
+	cbq_update_toplevel(q, this, q->tx_borrowed);
+}
+
+static __inline__ struct cbq_class *
+cbq_under_limit(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+	struct cbq_class *this_cl = cl;
+
+	if (cl->tparent == NULL)
+		return cl;
+
+	if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
+		cl->delayed = 0;
+		return cl;
+	}
+
+	do {
+		/* It is very suspicious place. Now overlimit
+		   action is generated for not bounded classes
+		   only if link is completely congested.
+		   Though it is in agree with ancestor-only paradigm,
+		   it looks very stupid. Particularly,
+		   it means that this chunk of code will either
+		   never be called or result in strong amplification
+		   of burstiness. Dangerous, silly, and, however,
+		   no another solution exists.
+		 */
+		if ((cl = cl->borrow) == NULL) {
+			this_cl->qstats.overlimits++;
+			this_cl->overlimit(this_cl);
+			return NULL;
+		}
+		if (cl->level > q->toplevel)
+			return NULL;
+	} while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
+
+	cl->delayed = 0;
+	return cl;
+}
+
+static __inline__ struct sk_buff *
+cbq_dequeue_prio(struct Qdisc *sch, int prio)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl_tail, *cl_prev, *cl;
+	struct sk_buff *skb;
+	int deficit;
+
+	cl_tail = cl_prev = q->active[prio];
+	cl = cl_prev->next_alive;
+
+	do {
+		deficit = 0;
+
+		/* Start round */
+		do {
+			struct cbq_class *borrow = cl;
+
+			if (cl->q->q.qlen &&
+			    (borrow = cbq_under_limit(cl)) == NULL)
+				goto skip_class;
+
+			if (cl->deficit <= 0) {
+				/* Class exhausted its allotment per
+				   this round. Switch to the next one.
+				 */
+				deficit = 1;
+				cl->deficit += cl->quantum;
+				goto next_class;
+			}
+
+			skb = cl->q->dequeue(cl->q);
+
+			/* Class did not give us any skb :-(
+			   It could occur even if cl->q->q.qlen != 0
+			   f.e. if cl->q == "tbf"
+			 */
+			if (skb == NULL)
+				goto skip_class;
+
+			cl->deficit -= qdisc_pkt_len(skb);
+			q->tx_class = cl;
+			q->tx_borrowed = borrow;
+			if (borrow != cl) {
+#ifndef CBQ_XSTATS_BORROWS_BYTES
+				borrow->xstats.borrows++;
+				cl->xstats.borrows++;
+#else
+				borrow->xstats.borrows += qdisc_pkt_len(skb);
+				cl->xstats.borrows += qdisc_pkt_len(skb);
+#endif
+			}
+			q->tx_len = qdisc_pkt_len(skb);
+
+			if (cl->deficit <= 0) {
+				q->active[prio] = cl;
+				cl = cl->next_alive;
+				cl->deficit += cl->quantum;
+			}
+			return skb;
+
+skip_class:
+			if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
+				/* Class is empty or penalized.
+				   Unlink it from active chain.
+				 */
+				cl_prev->next_alive = cl->next_alive;
+				cl->next_alive = NULL;
+
+				/* Did cl_tail point to it? */
+				if (cl == cl_tail) {
+					/* Repair it! */
+					cl_tail = cl_prev;
+
+					/* Was it the last class in this band? */
+					if (cl == cl_tail) {
+						/* Kill the band! */
+						q->active[prio] = NULL;
+						q->activemask &= ~(1<<prio);
+						if (cl->q->q.qlen)
+							cbq_activate_class(cl);
+						return NULL;
+					}
+
+					q->active[prio] = cl_tail;
+				}
+				if (cl->q->q.qlen)
+					cbq_activate_class(cl);
+
+				cl = cl_prev;
+			}
+
+next_class:
+			cl_prev = cl;
+			cl = cl->next_alive;
+		} while (cl_prev != cl_tail);
+	} while (deficit);
+
+	q->active[prio] = cl_prev;
+
+	return NULL;
+}
+
+static __inline__ struct sk_buff *
+cbq_dequeue_1(struct Qdisc *sch)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	unsigned activemask;
+
+	activemask = q->activemask&0xFF;
+	while (activemask) {
+		int prio = ffz(~activemask);
+		activemask &= ~(1<<prio);
+		skb = cbq_dequeue_prio(sch, prio);
+		if (skb)
+			return skb;
+	}
+	return NULL;
+}
+
+static struct sk_buff *
+cbq_dequeue(struct Qdisc *sch)
+{
+	struct sk_buff *skb;
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	psched_time_t now;
+	psched_tdiff_t incr;
+
+	now = psched_get_time();
+	incr = now - q->now_rt;
+
+	if (q->tx_class) {
+		psched_tdiff_t incr2;
+		/* Time integrator. We calculate EOS time
+		   by adding expected packet transmission time.
+		   If real time is greater, we warp artificial clock,
+		   so that:
+
+		   cbq_time = max(real_time, work);
+		 */
+		incr2 = L2T(&q->link, q->tx_len);
+		q->now += incr2;
+		cbq_update(q);
+		if ((incr -= incr2) < 0)
+			incr = 0;
+	}
+	q->now += incr;
+	q->now_rt = now;
+
+	for (;;) {
+		q->wd_expires = 0;
+
+		skb = cbq_dequeue_1(sch);
+		if (skb) {
+			sch->q.qlen--;
+			sch->flags &= ~TCQ_F_THROTTLED;
+			return skb;
+		}
+
+		/* All the classes are overlimit.
+
+		   It is possible, if:
+
+		   1. Scheduler is empty.
+		   2. Toplevel cutoff inhibited borrowing.
+		   3. Root class is overlimit.
+
+		   Reset 2d and 3d conditions and retry.
+
+		   Note, that NS and cbq-2.0 are buggy, peeking
+		   an arbitrary class is appropriate for ancestor-only
+		   sharing, but not for toplevel algorithm.
+
+		   Our version is better, but slower, because it requires
+		   two passes, but it is unavoidable with top-level sharing.
+		*/
+
+		if (q->toplevel == TC_CBQ_MAXLEVEL &&
+		    q->link.undertime == PSCHED_PASTPERFECT)
+			break;
+
+		q->toplevel = TC_CBQ_MAXLEVEL;
+		q->link.undertime = PSCHED_PASTPERFECT;
+	}
+
+	/* No packets in scheduler or nobody wants to give them to us :-(
+	   Sigh... start watchdog timer in the last case. */
+
+	if (sch->q.qlen) {
+		sch->qstats.overlimits++;
+		if (q->wd_expires)
+			qdisc_watchdog_schedule(&q->watchdog,
+						now + q->wd_expires);
+	}
+	return NULL;
+}
+
+/* CBQ class maintanance routines */
+
+static void cbq_adjust_levels(struct cbq_class *this)
+{
+	if (this == NULL)
+		return;
+
+	do {
+		int level = 0;
+		struct cbq_class *cl;
+
+		if ((cl = this->children) != NULL) {
+			do {
+				if (cl->level > level)
+					level = cl->level;
+			} while ((cl = cl->sibling) != this->children);
+		}
+		this->level = level+1;
+	} while ((this = this->tparent) != NULL);
+}
+
+static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
+{
+	struct cbq_class *cl;
+	struct hlist_node *n;
+	unsigned int h;
+
+	if (q->quanta[prio] == 0)
+		return;
+
+	for (h = 0; h < q->clhash.hashsize; h++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
+			/* BUGGGG... Beware! This expression suffer of
+			   arithmetic overflows!
+			 */
+			if (cl->priority == prio) {
+				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
+					q->quanta[prio];
+			}
+			if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
+				printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
+				cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
+			}
+		}
+	}
+}
+
+static void cbq_sync_defmap(struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+	struct cbq_class *split = cl->split;
+	unsigned h;
+	int i;
+
+	if (split == NULL)
+		return;
+
+	for (i=0; i<=TC_PRIO_MAX; i++) {
+		if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
+			split->defaults[i] = NULL;
+	}
+
+	for (i=0; i<=TC_PRIO_MAX; i++) {
+		int level = split->level;
+
+		if (split->defaults[i])
+			continue;
+
+		for (h = 0; h < q->clhash.hashsize; h++) {
+			struct hlist_node *n;
+			struct cbq_class *c;
+
+			hlist_for_each_entry(c, n, &q->clhash.hash[h],
+					     common.hnode) {
+				if (c->split == split && c->level < level &&
+				    c->defmap&(1<<i)) {
+					split->defaults[i] = c;
+					level = c->level;
+				}
+			}
+		}
+	}
+}
+
+static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
+{
+	struct cbq_class *split = NULL;
+
+	if (splitid == 0) {
+		if ((split = cl->split) == NULL)
+			return;
+		splitid = split->common.classid;
+	}
+
+	if (split == NULL || split->common.classid != splitid) {
+		for (split = cl->tparent; split; split = split->tparent)
+			if (split->common.classid == splitid)
+				break;
+	}
+
+	if (split == NULL)
+		return;
+
+	if (cl->split != split) {
+		cl->defmap = 0;
+		cbq_sync_defmap(cl);
+		cl->split = split;
+		cl->defmap = def&mask;
+	} else
+		cl->defmap = (cl->defmap&~mask)|(def&mask);
+
+	cbq_sync_defmap(cl);
+}
+
+static void cbq_unlink_class(struct cbq_class *this)
+{
+	struct cbq_class *cl, **clp;
+	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
+
+	qdisc_class_hash_remove(&q->clhash, &this->common);
+
+	if (this->tparent) {
+		clp=&this->sibling;
+		cl = *clp;
+		do {
+			if (cl == this) {
+				*clp = cl->sibling;
+				break;
+			}
+			clp = &cl->sibling;
+		} while ((cl = *clp) != this->sibling);
+
+		if (this->tparent->children == this) {
+			this->tparent->children = this->sibling;
+			if (this->sibling == this)
+				this->tparent->children = NULL;
+		}
+	} else {
+		WARN_ON(this->sibling != this);
+	}
+}
+
+static void cbq_link_class(struct cbq_class *this)
+{
+	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
+	struct cbq_class *parent = this->tparent;
+
+	this->sibling = this;
+	qdisc_class_hash_insert(&q->clhash, &this->common);
+
+	if (parent == NULL)
+		return;
+
+	if (parent->children == NULL) {
+		parent->children = this;
+	} else {
+		this->sibling = parent->children->sibling;
+		parent->children->sibling = this;
+	}
+}
+
+static unsigned int cbq_drop(struct Qdisc* sch)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl, *cl_head;
+	int prio;
+	unsigned int len;
+
+	for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
+		if ((cl_head = q->active[prio]) == NULL)
+			continue;
+
+		cl = cl_head;
+		do {
+			if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
+				sch->q.qlen--;
+				if (!cl->q->q.qlen)
+					cbq_deactivate_class(cl);
+				return len;
+			}
+		} while ((cl = cl->next_alive) != cl_head);
+	}
+	return 0;
+}
+
+static void
+cbq_reset(struct Qdisc* sch)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl;
+	struct hlist_node *n;
+	int prio;
+	unsigned h;
+
+	q->activemask = 0;
+	q->pmask = 0;
+	q->tx_class = NULL;
+	q->tx_borrowed = NULL;
+	qdisc_watchdog_cancel(&q->watchdog);
+	hrtimer_cancel(&q->delay_timer);
+	q->toplevel = TC_CBQ_MAXLEVEL;
+	q->now = psched_get_time();
+	q->now_rt = q->now;
+
+	for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
+		q->active[prio] = NULL;
+
+	for (h = 0; h < q->clhash.hashsize; h++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
+			qdisc_reset(cl->q);
+
+			cl->next_alive = NULL;
+			cl->undertime = PSCHED_PASTPERFECT;
+			cl->avgidle = cl->maxidle;
+			cl->deficit = cl->quantum;
+			cl->cpriority = cl->priority;
+		}
+	}
+	sch->q.qlen = 0;
+}
+
+
+static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
+{
+	if (lss->change&TCF_CBQ_LSS_FLAGS) {
+		cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
+		cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
+	}
+	if (lss->change&TCF_CBQ_LSS_EWMA)
+		cl->ewma_log = lss->ewma_log;
+	if (lss->change&TCF_CBQ_LSS_AVPKT)
+		cl->avpkt = lss->avpkt;
+	if (lss->change&TCF_CBQ_LSS_MINIDLE)
+		cl->minidle = -(long)lss->minidle;
+	if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
+		cl->maxidle = lss->maxidle;
+		cl->avgidle = lss->maxidle;
+	}
+	if (lss->change&TCF_CBQ_LSS_OFFTIME)
+		cl->offtime = lss->offtime;
+	return 0;
+}
+
+static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
+{
+	q->nclasses[cl->priority]--;
+	q->quanta[cl->priority] -= cl->weight;
+	cbq_normalize_quanta(q, cl->priority);
+}
+
+static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
+{
+	q->nclasses[cl->priority]++;
+	q->quanta[cl->priority] += cl->weight;
+	cbq_normalize_quanta(q, cl->priority);
+}
+
+static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
+{
+	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
+
+	if (wrr->allot)
+		cl->allot = wrr->allot;
+	if (wrr->weight)
+		cl->weight = wrr->weight;
+	if (wrr->priority) {
+		cl->priority = wrr->priority-1;
+		cl->cpriority = cl->priority;
+		if (cl->priority >= cl->priority2)
+			cl->priority2 = TC_CBQ_MAXPRIO-1;
+	}
+
+	cbq_addprio(q, cl);
+	return 0;
+}
+
+static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
+{
+	switch (ovl->strategy) {
+	case TC_CBQ_OVL_CLASSIC:
+		cl->overlimit = cbq_ovl_classic;
+		break;
+	case TC_CBQ_OVL_DELAY:
+		cl->overlimit = cbq_ovl_delay;
+		break;
+	case TC_CBQ_OVL_LOWPRIO:
+		if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
+		    ovl->priority2-1 <= cl->priority)
+			return -EINVAL;
+		cl->priority2 = ovl->priority2-1;
+		cl->overlimit = cbq_ovl_lowprio;
+		break;
+	case TC_CBQ_OVL_DROP:
+		cl->overlimit = cbq_ovl_drop;
+		break;
+	case TC_CBQ_OVL_RCLASSIC:
+		cl->overlimit = cbq_ovl_rclassic;
+		break;
+	default:
+		return -EINVAL;
+	}
+	cl->penalty = ovl->penalty;
+	return 0;
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
+{
+	cl->police = p->police;
+
+	if (cl->q->handle) {
+		if (p->police == TC_POLICE_RECLASSIFY)
+			cl->q->reshape_fail = cbq_reshape_fail;
+		else
+			cl->q->reshape_fail = NULL;
+	}
+	return 0;
+}
+#endif
+
+static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
+{
+	cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
+	return 0;
+}
+
+static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
+	[TCA_CBQ_LSSOPT]	= { .len = sizeof(struct tc_cbq_lssopt) },
+	[TCA_CBQ_WRROPT]	= { .len = sizeof(struct tc_cbq_wrropt) },
+	[TCA_CBQ_FOPT]		= { .len = sizeof(struct tc_cbq_fopt) },
+	[TCA_CBQ_OVL_STRATEGY]	= { .len = sizeof(struct tc_cbq_ovl) },
+	[TCA_CBQ_RATE]		= { .len = sizeof(struct tc_ratespec) },
+	[TCA_CBQ_RTAB]		= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+	[TCA_CBQ_POLICE]	= { .len = sizeof(struct tc_cbq_police) },
+};
+
+static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_CBQ_MAX + 1];
+	struct tc_ratespec *r;
+	int err;
+
+	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
+		return -EINVAL;
+
+	r = nla_data(tb[TCA_CBQ_RATE]);
+
+	if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
+		return -EINVAL;
+
+	err = qdisc_class_hash_init(&q->clhash);
+	if (err < 0)
+		goto put_rtab;
+
+	q->link.refcnt = 1;
+	q->link.sibling = &q->link;
+	q->link.common.classid = sch->handle;
+	q->link.qdisc = sch;
+	if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+					    &pfifo_qdisc_ops,
+					    sch->handle)))
+		q->link.q = &noop_qdisc;
+
+	q->link.priority = TC_CBQ_MAXPRIO-1;
+	q->link.priority2 = TC_CBQ_MAXPRIO-1;
+	q->link.cpriority = TC_CBQ_MAXPRIO-1;
+	q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
+	q->link.overlimit = cbq_ovl_classic;
+	q->link.allot = psched_mtu(qdisc_dev(sch));
+	q->link.quantum = q->link.allot;
+	q->link.weight = q->link.R_tab->rate.rate;
+
+	q->link.ewma_log = TC_CBQ_DEF_EWMA;
+	q->link.avpkt = q->link.allot/2;
+	q->link.minidle = -0x7FFFFFFF;
+
+	qdisc_watchdog_init(&q->watchdog, sch);
+	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	q->delay_timer.function = cbq_undelay;
+	q->toplevel = TC_CBQ_MAXLEVEL;
+	q->now = psched_get_time();
+	q->now_rt = q->now;
+
+	cbq_link_class(&q->link);
+
+	if (tb[TCA_CBQ_LSSOPT])
+		cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
+
+	cbq_addprio(q, &q->link);
+	return 0;
+
+put_rtab:
+	qdisc_put_rtab(q->link.R_tab);
+	return err;
+}
+
+static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+
+	NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_cbq_lssopt opt;
+
+	opt.flags = 0;
+	if (cl->borrow == NULL)
+		opt.flags |= TCF_CBQ_LSS_BOUNDED;
+	if (cl->share == NULL)
+		opt.flags |= TCF_CBQ_LSS_ISOLATED;
+	opt.ewma_log = cl->ewma_log;
+	opt.level = cl->level;
+	opt.avpkt = cl->avpkt;
+	opt.maxidle = cl->maxidle;
+	opt.minidle = (u32)(-cl->minidle);
+	opt.offtime = cl->offtime;
+	opt.change = ~0;
+	NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_cbq_wrropt opt;
+
+	opt.flags = 0;
+	opt.allot = cl->allot;
+	opt.priority = cl->priority+1;
+	opt.cpriority = cl->cpriority+1;
+	opt.weight = cl->weight;
+	NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_cbq_ovl opt;
+
+	opt.strategy = cl->ovl_strategy;
+	opt.priority2 = cl->priority2+1;
+	opt.pad = 0;
+	opt.penalty = cl->penalty;
+	NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_cbq_fopt opt;
+
+	if (cl->split || cl->defmap) {
+		opt.split = cl->split ? cl->split->common.classid : 0;
+		opt.defmap = cl->defmap;
+		opt.defchange = ~0;
+		NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
+	}
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_cbq_police opt;
+
+	if (cl->police) {
+		opt.police = cl->police;
+		opt.__res1 = 0;
+		opt.__res2 = 0;
+		NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
+	}
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+#endif
+
+static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
+{
+	if (cbq_dump_lss(skb, cl) < 0 ||
+	    cbq_dump_rate(skb, cl) < 0 ||
+	    cbq_dump_wrr(skb, cl) < 0 ||
+	    cbq_dump_ovl(skb, cl) < 0 ||
+#ifdef CONFIG_NET_CLS_ACT
+	    cbq_dump_police(skb, cl) < 0 ||
+#endif
+	    cbq_dump_fopt(skb, cl) < 0)
+		return -1;
+	return 0;
+}
+
+static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	if (cbq_dump_attr(skb, &q->link) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest);
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static int
+cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+
+	q->link.xstats.avgidle = q->link.avgidle;
+	return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
+}
+
+static int
+cbq_dump_class(struct Qdisc *sch, unsigned long arg,
+	       struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct cbq_class *cl = (struct cbq_class*)arg;
+	struct nlattr *nest;
+
+	if (cl->tparent)
+		tcm->tcm_parent = cl->tparent->common.classid;
+	else
+		tcm->tcm_parent = TC_H_ROOT;
+	tcm->tcm_handle = cl->common.classid;
+	tcm->tcm_info = cl->q->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	if (cbq_dump_attr(skb, cl) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest);
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static int
+cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+	struct gnet_dump *d)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl = (struct cbq_class*)arg;
+
+	cl->qstats.qlen = cl->q->q.qlen;
+	cl->xstats.avgidle = cl->avgidle;
+	cl->xstats.undertime = 0;
+
+	if (cl->undertime != PSCHED_PASTPERFECT)
+		cl->xstats.undertime = cl->undertime - q->now;
+
+	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
+		return -1;
+
+	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
+}
+
+static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		     struct Qdisc **old)
+{
+	struct cbq_class *cl = (struct cbq_class*)arg;
+
+	if (cl) {
+		if (new == NULL) {
+			new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+						&pfifo_qdisc_ops,
+						cl->common.classid);
+			if (new == NULL)
+				return -ENOBUFS;
+		} else {
+#ifdef CONFIG_NET_CLS_ACT
+			if (cl->police == TC_POLICE_RECLASSIFY)
+				new->reshape_fail = cbq_reshape_fail;
+#endif
+		}
+		sch_tree_lock(sch);
+		*old = xchg(&cl->q, new);
+		qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+		qdisc_reset(*old);
+		sch_tree_unlock(sch);
+
+		return 0;
+	}
+	return -ENOENT;
+}
+
+static struct Qdisc *
+cbq_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct cbq_class *cl = (struct cbq_class*)arg;
+
+	return cl ? cl->q : NULL;
+}
+
+static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
+{
+	struct cbq_class *cl = (struct cbq_class *)arg;
+
+	if (cl->q->q.qlen == 0)
+		cbq_deactivate_class(cl);
+}
+
+static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl = cbq_class_lookup(q, classid);
+
+	if (cl) {
+		cl->refcnt++;
+		return (unsigned long)cl;
+	}
+	return 0;
+}
+
+static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+
+	WARN_ON(cl->filters);
+
+	tcf_destroy_chain(&cl->filter_list);
+	qdisc_destroy(cl->q);
+	qdisc_put_rtab(cl->R_tab);
+	gen_kill_estimator(&cl->bstats, &cl->rate_est);
+	if (cl != &q->link)
+		kfree(cl);
+}
+
+static void
+cbq_destroy(struct Qdisc* sch)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct hlist_node *n, *next;
+	struct cbq_class *cl;
+	unsigned h;
+
+#ifdef CONFIG_NET_CLS_ACT
+	q->rx_class = NULL;
+#endif
+	/*
+	 * Filters must be destroyed first because we don't destroy the
+	 * classes from root to leafs which means that filters can still
+	 * be bound to classes which have been destroyed already. --TGR '04
+	 */
+	for (h = 0; h < q->clhash.hashsize; h++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
+			tcf_destroy_chain(&cl->filter_list);
+	}
+	for (h = 0; h < q->clhash.hashsize; h++) {
+		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
+					  common.hnode)
+			cbq_destroy_class(sch, cl);
+	}
+	qdisc_class_hash_destroy(&q->clhash);
+}
+
+static void cbq_put(struct Qdisc *sch, unsigned long arg)
+{
+	struct cbq_class *cl = (struct cbq_class*)arg;
+
+	if (--cl->refcnt == 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
+		struct cbq_sched_data *q = qdisc_priv(sch);
+
+		spin_lock_bh(root_lock);
+		if (q->rx_class == cl)
+			q->rx_class = NULL;
+		spin_unlock_bh(root_lock);
+#endif
+
+		cbq_destroy_class(sch, cl);
+	}
+}
+
+static int
+cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
+		 unsigned long *arg)
+{
+	int err;
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl = (struct cbq_class*)*arg;
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_CBQ_MAX + 1];
+	struct cbq_class *parent;
+	struct qdisc_rate_table *rtab = NULL;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
+	if (err < 0)
+		return err;
+
+	if (cl) {
+		/* Check parent */
+		if (parentid) {
+			if (cl->tparent &&
+			    cl->tparent->common.classid != parentid)
+				return -EINVAL;
+			if (!cl->tparent && parentid != TC_H_ROOT)
+				return -EINVAL;
+		}
+
+		if (tb[TCA_CBQ_RATE]) {
+			rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
+			if (rtab == NULL)
+				return -EINVAL;
+		}
+
+		/* Change class parameters */
+		sch_tree_lock(sch);
+
+		if (cl->next_alive != NULL)
+			cbq_deactivate_class(cl);
+
+		if (rtab) {
+			rtab = xchg(&cl->R_tab, rtab);
+			qdisc_put_rtab(rtab);
+		}
+
+		if (tb[TCA_CBQ_LSSOPT])
+			cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
+
+		if (tb[TCA_CBQ_WRROPT]) {
+			cbq_rmprio(q, cl);
+			cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
+		}
+
+		if (tb[TCA_CBQ_OVL_STRATEGY])
+			cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
+
+#ifdef CONFIG_NET_CLS_ACT
+		if (tb[TCA_CBQ_POLICE])
+			cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
+#endif
+
+		if (tb[TCA_CBQ_FOPT])
+			cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
+
+		if (cl->q->q.qlen)
+			cbq_activate_class(cl);
+
+		sch_tree_unlock(sch);
+
+		if (tca[TCA_RATE])
+			gen_replace_estimator(&cl->bstats, &cl->rate_est,
+					      qdisc_root_sleeping_lock(sch),
+					      tca[TCA_RATE]);
+		return 0;
+	}
+
+	if (parentid == TC_H_ROOT)
+		return -EINVAL;
+
+	if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
+	    tb[TCA_CBQ_LSSOPT] == NULL)
+		return -EINVAL;
+
+	rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
+	if (rtab == NULL)
+		return -EINVAL;
+
+	if (classid) {
+		err = -EINVAL;
+		if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
+			goto failure;
+	} else {
+		int i;
+		classid = TC_H_MAKE(sch->handle,0x8000);
+
+		for (i=0; i<0x8000; i++) {
+			if (++q->hgenerator >= 0x8000)
+				q->hgenerator = 1;
+			if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
+				break;
+		}
+		err = -ENOSR;
+		if (i >= 0x8000)
+			goto failure;
+		classid = classid|q->hgenerator;
+	}
+
+	parent = &q->link;
+	if (parentid) {
+		parent = cbq_class_lookup(q, parentid);
+		err = -EINVAL;
+		if (parent == NULL)
+			goto failure;
+	}
+
+	err = -ENOBUFS;
+	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
+	if (cl == NULL)
+		goto failure;
+	cl->R_tab = rtab;
+	rtab = NULL;
+	cl->refcnt = 1;
+	if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+					&pfifo_qdisc_ops, classid)))
+		cl->q = &noop_qdisc;
+	cl->common.classid = classid;
+	cl->tparent = parent;
+	cl->qdisc = sch;
+	cl->allot = parent->allot;
+	cl->quantum = cl->allot;
+	cl->weight = cl->R_tab->rate.rate;
+
+	sch_tree_lock(sch);
+	cbq_link_class(cl);
+	cl->borrow = cl->tparent;
+	if (cl->tparent != &q->link)
+		cl->share = cl->tparent;
+	cbq_adjust_levels(parent);
+	cl->minidle = -0x7FFFFFFF;
+	cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
+	cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
+	if (cl->ewma_log==0)
+		cl->ewma_log = q->link.ewma_log;
+	if (cl->maxidle==0)
+		cl->maxidle = q->link.maxidle;
+	if (cl->avpkt==0)
+		cl->avpkt = q->link.avpkt;
+	cl->overlimit = cbq_ovl_classic;
+	if (tb[TCA_CBQ_OVL_STRATEGY])
+		cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
+#ifdef CONFIG_NET_CLS_ACT
+	if (tb[TCA_CBQ_POLICE])
+		cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
+#endif
+	if (tb[TCA_CBQ_FOPT])
+		cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
+	sch_tree_unlock(sch);
+
+	qdisc_class_hash_grow(sch, &q->clhash);
+
+	if (tca[TCA_RATE])
+		gen_new_estimator(&cl->bstats, &cl->rate_est,
+				  qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
+
+	*arg = (unsigned long)cl;
+	return 0;
+
+failure:
+	qdisc_put_rtab(rtab);
+	return err;
+}
+
+static int cbq_delete(struct Qdisc *sch, unsigned long arg)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl = (struct cbq_class*)arg;
+	unsigned int qlen;
+
+	if (cl->filters || cl->children || cl == &q->link)
+		return -EBUSY;
+
+	sch_tree_lock(sch);
+
+	qlen = cl->q->q.qlen;
+	qdisc_reset(cl->q);
+	qdisc_tree_decrease_qlen(cl->q, qlen);
+
+	if (cl->next_alive)
+		cbq_deactivate_class(cl);
+
+	if (q->tx_borrowed == cl)
+		q->tx_borrowed = q->tx_class;
+	if (q->tx_class == cl) {
+		q->tx_class = NULL;
+		q->tx_borrowed = NULL;
+	}
+#ifdef CONFIG_NET_CLS_ACT
+	if (q->rx_class == cl)
+		q->rx_class = NULL;
+#endif
+
+	cbq_unlink_class(cl);
+	cbq_adjust_levels(cl->tparent);
+	cl->defmap = 0;
+	cbq_sync_defmap(cl);
+
+	cbq_rmprio(q, cl);
+	sch_tree_unlock(sch);
+
+	if (--cl->refcnt == 0)
+		cbq_destroy_class(sch, cl);
+
+	return 0;
+}
+
+static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl = (struct cbq_class *)arg;
+
+	if (cl == NULL)
+		cl = &q->link;
+
+	return &cl->filter_list;
+}
+
+static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
+				     u32 classid)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *p = (struct cbq_class*)parent;
+	struct cbq_class *cl = cbq_class_lookup(q, classid);
+
+	if (cl) {
+		if (p && p->level <= cl->level)
+			return 0;
+		cl->filters++;
+		return (unsigned long)cl;
+	}
+	return 0;
+}
+
+static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
+{
+	struct cbq_class *cl = (struct cbq_class*)arg;
+
+	cl->filters--;
+}
+
+static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct cbq_sched_data *q = qdisc_priv(sch);
+	struct cbq_class *cl;
+	struct hlist_node *n;
+	unsigned h;
+
+	if (arg->stop)
+		return;
+
+	for (h = 0; h < q->clhash.hashsize; h++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static const struct Qdisc_class_ops cbq_class_ops = {
+	.graft		=	cbq_graft,
+	.leaf		=	cbq_leaf,
+	.qlen_notify	=	cbq_qlen_notify,
+	.get		=	cbq_get,
+	.put		=	cbq_put,
+	.change		=	cbq_change_class,
+	.delete		=	cbq_delete,
+	.walk		=	cbq_walk,
+	.tcf_chain	=	cbq_find_tcf,
+	.bind_tcf	=	cbq_bind_filter,
+	.unbind_tcf	=	cbq_unbind_filter,
+	.dump		=	cbq_dump_class,
+	.dump_stats	=	cbq_dump_class_stats,
+};
+
+static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
+	.next		=	NULL,
+	.cl_ops		=	&cbq_class_ops,
+	.id		=	"cbq",
+	.priv_size	=	sizeof(struct cbq_sched_data),
+	.enqueue	=	cbq_enqueue,
+	.dequeue	=	cbq_dequeue,
+	.requeue	=	cbq_requeue,
+	.drop		=	cbq_drop,
+	.init		=	cbq_init,
+	.reset		=	cbq_reset,
+	.destroy	=	cbq_destroy,
+	.change		=	NULL,
+	.dump		=	cbq_dump,
+	.dump_stats	=	cbq_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init cbq_module_init(void)
+{
+	return register_qdisc(&cbq_qdisc_ops);
+}
+static void __exit cbq_module_exit(void)
+{
+	unregister_qdisc(&cbq_qdisc_ops);
+}
+module_init(cbq_module_init)
+module_exit(cbq_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
new file mode 100644
index 0000000..ba43aab
--- /dev/null
+++ b/net/sched/sch_dsmark.c
@@ -0,0 +1,522 @@
+/* net/sched/sch_dsmark.c - Differentiated Services field marker */
+
+/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
+
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/bitops.h>
+#include <net/pkt_sched.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+#include <asm/byteorder.h>
+
+/*
+ * classid	class		marking
+ * -------	-----		-------
+ *   n/a	  0		n/a
+ *   x:0	  1		use entry [0]
+ *   ...	 ...		...
+ *   x:y y>0	 y+1		use entry [y]
+ *   ...	 ...		...
+ * x:indices-1	indices		use entry [indices-1]
+ *   ...	 ...		...
+ *   x:y	 y+1		use entry [y & (indices-1)]
+ *   ...	 ...		...
+ * 0xffff	0x10000		use entry [indices-1]
+ */
+
+
+#define NO_DEFAULT_INDEX	(1 << 16)
+
+struct dsmark_qdisc_data {
+	struct Qdisc		*q;
+	struct tcf_proto	*filter_list;
+	u8			*mask;	/* "owns" the array */
+	u8			*value;
+	u16			indices;
+	u32			default_index;	/* index range is 0...0xffff */
+	int			set_tc_index;
+};
+
+static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
+{
+	return (index <= p->indices && index > 0);
+}
+
+/* ------------------------- Class/flow operations ------------------------- */
+
+static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
+			struct Qdisc *new, struct Qdisc **old)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+
+	pr_debug("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",
+		sch, p, new, old);
+
+	if (new == NULL) {
+		new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+					&pfifo_qdisc_ops,
+					sch->handle);
+		if (new == NULL)
+			new = &noop_qdisc;
+	}
+
+	sch_tree_lock(sch);
+	*old = xchg(&p->q, new);
+	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	return p->q;
+}
+
+static unsigned long dsmark_get(struct Qdisc *sch, u32 classid)
+{
+	pr_debug("dsmark_get(sch %p,[qdisc %p],classid %x)\n",
+		sch, qdisc_priv(sch), classid);
+
+	return TC_H_MIN(classid) + 1;
+}
+
+static unsigned long dsmark_bind_filter(struct Qdisc *sch,
+					unsigned long parent, u32 classid)
+{
+	return dsmark_get(sch, classid);
+}
+
+static void dsmark_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = {
+	[TCA_DSMARK_INDICES]		= { .type = NLA_U16 },
+	[TCA_DSMARK_DEFAULT_INDEX]	= { .type = NLA_U16 },
+	[TCA_DSMARK_SET_TC_INDEX]	= { .type = NLA_FLAG },
+	[TCA_DSMARK_MASK]		= { .type = NLA_U8 },
+	[TCA_DSMARK_VALUE]		= { .type = NLA_U8 },
+};
+
+static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
+			 struct nlattr **tca, unsigned long *arg)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_DSMARK_MAX + 1];
+	int err = -EINVAL;
+	u8 mask = 0;
+
+	pr_debug("dsmark_change(sch %p,[qdisc %p],classid %x,parent %x),"
+		"arg 0x%lx\n", sch, p, classid, parent, *arg);
+
+	if (!dsmark_valid_index(p, *arg)) {
+		err = -ENOENT;
+		goto errout;
+	}
+
+	if (!opt)
+		goto errout;
+
+	err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy);
+	if (err < 0)
+		goto errout;
+
+	if (tb[TCA_DSMARK_MASK])
+		mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
+
+	if (tb[TCA_DSMARK_VALUE])
+		p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
+
+	if (tb[TCA_DSMARK_MASK])
+		p->mask[*arg-1] = mask;
+
+	err = 0;
+
+errout:
+	return err;
+}
+
+static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+
+	if (!dsmark_valid_index(p, arg))
+		return -EINVAL;
+
+	p->mask[arg-1] = 0xff;
+	p->value[arg-1] = 0;
+
+	return 0;
+}
+
+static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	int i;
+
+	pr_debug("dsmark_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
+
+	if (walker->stop)
+		return;
+
+	for (i = 0; i < p->indices; i++) {
+		if (p->mask[i] == 0xff && !p->value[i])
+			goto ignore;
+		if (walker->count >= walker->skip) {
+			if (walker->fn(sch, i+1, walker) < 0) {
+				walker->stop = 1;
+				break;
+			}
+		}
+ignore:
+		walker->count++;
+	}
+}
+
+static inline struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch,
+						 unsigned long cl)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	return &p->filter_list;
+}
+
+/* --------------------------- Qdisc operations ---------------------------- */
+
+static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	int err;
+
+	pr_debug("dsmark_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+
+	if (p->set_tc_index) {
+		switch (skb->protocol) {
+		case htons(ETH_P_IP):
+			if (skb_cow_head(skb, sizeof(struct iphdr)))
+				goto drop;
+
+			skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
+				& ~INET_ECN_MASK;
+			break;
+
+		case htons(ETH_P_IPV6):
+			if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
+				goto drop;
+
+			skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
+				& ~INET_ECN_MASK;
+			break;
+		default:
+			skb->tc_index = 0;
+			break;
+		}
+	}
+
+	if (TC_H_MAJ(skb->priority) == sch->handle)
+		skb->tc_index = TC_H_MIN(skb->priority);
+	else {
+		struct tcf_result res;
+		int result = tc_classify(skb, p->filter_list, &res);
+
+		pr_debug("result %d class 0x%04x\n", result, res.classid);
+
+		switch (result) {
+#ifdef CONFIG_NET_CLS_ACT
+		case TC_ACT_QUEUED:
+		case TC_ACT_STOLEN:
+			kfree_skb(skb);
+			return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+
+		case TC_ACT_SHOT:
+			goto drop;
+#endif
+		case TC_ACT_OK:
+			skb->tc_index = TC_H_MIN(res.classid);
+			break;
+
+		default:
+			if (p->default_index != NO_DEFAULT_INDEX)
+				skb->tc_index = p->default_index;
+			break;
+		}
+	}
+
+	err = qdisc_enqueue(skb, p->q);
+	if (err != NET_XMIT_SUCCESS) {
+		if (net_xmit_drop_count(err))
+			sch->qstats.drops++;
+		return err;
+	}
+
+	sch->bstats.bytes += qdisc_pkt_len(skb);
+	sch->bstats.packets++;
+	sch->q.qlen++;
+
+	return NET_XMIT_SUCCESS;
+
+drop:
+	kfree_skb(skb);
+	sch->qstats.drops++;
+	return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+}
+
+static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	struct sk_buff *skb;
+	u32 index;
+
+	pr_debug("dsmark_dequeue(sch %p,[qdisc %p])\n", sch, p);
+
+	skb = p->q->ops->dequeue(p->q);
+	if (skb == NULL)
+		return NULL;
+
+	sch->q.qlen--;
+
+	index = skb->tc_index & (p->indices - 1);
+	pr_debug("index %d->%d\n", skb->tc_index, index);
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
+				    p->value[index]);
+			break;
+	case htons(ETH_P_IPV6):
+		ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
+				    p->value[index]);
+			break;
+	default:
+		/*
+		 * Only complain if a change was actually attempted.
+		 * This way, we can send non-IP traffic through dsmark
+		 * and don't need yet another qdisc as a bypass.
+		 */
+		if (p->mask[index] != 0xff || p->value[index])
+			printk(KERN_WARNING
+			       "dsmark_dequeue: unsupported protocol %d\n",
+			       ntohs(skb->protocol));
+		break;
+	}
+
+	return skb;
+}
+
+static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	int err;
+
+	pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
+
+	err = p->q->ops->requeue(skb, p->q);
+	if (err != NET_XMIT_SUCCESS) {
+		if (net_xmit_drop_count(err))
+			sch->qstats.drops++;
+		return err;
+	}
+
+	sch->q.qlen++;
+	sch->qstats.requeues++;
+
+	return NET_XMIT_SUCCESS;
+}
+
+static unsigned int dsmark_drop(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	unsigned int len;
+
+	pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
+
+	if (p->q->ops->drop == NULL)
+		return 0;
+
+	len = p->q->ops->drop(p->q);
+	if (len)
+		sch->q.qlen--;
+
+	return len;
+}
+
+static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	struct nlattr *tb[TCA_DSMARK_MAX + 1];
+	int err = -EINVAL;
+	u32 default_index = NO_DEFAULT_INDEX;
+	u16 indices;
+	u8 *mask;
+
+	pr_debug("dsmark_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
+
+	if (!opt)
+		goto errout;
+
+	err = nla_parse_nested(tb, TCA_DSMARK_MAX, opt, dsmark_policy);
+	if (err < 0)
+		goto errout;
+
+	err = -EINVAL;
+	indices = nla_get_u16(tb[TCA_DSMARK_INDICES]);
+
+	if (hweight32(indices) != 1)
+		goto errout;
+
+	if (tb[TCA_DSMARK_DEFAULT_INDEX])
+		default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
+
+	mask = kmalloc(indices * 2, GFP_KERNEL);
+	if (mask == NULL) {
+		err = -ENOMEM;
+		goto errout;
+	}
+
+	p->mask = mask;
+	memset(p->mask, 0xff, indices);
+
+	p->value = p->mask + indices;
+	memset(p->value, 0, indices);
+
+	p->indices = indices;
+	p->default_index = default_index;
+	p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
+
+	p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+				 &pfifo_qdisc_ops, sch->handle);
+	if (p->q == NULL)
+		p->q = &noop_qdisc;
+
+	pr_debug("dsmark_init: qdisc %p\n", p->q);
+
+	err = 0;
+errout:
+	return err;
+}
+
+static void dsmark_reset(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+
+	pr_debug("dsmark_reset(sch %p,[qdisc %p])\n", sch, p);
+	qdisc_reset(p->q);
+	sch->q.qlen = 0;
+}
+
+static void dsmark_destroy(struct Qdisc *sch)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+
+	pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p);
+
+	tcf_destroy_chain(&p->filter_list);
+	qdisc_destroy(p->q);
+	kfree(p->mask);
+}
+
+static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
+			     struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	struct nlattr *opts = NULL;
+
+	pr_debug("dsmark_dump_class(sch %p,[qdisc %p],class %ld\n", sch, p, cl);
+
+	if (!dsmark_valid_index(p, cl))
+		return -EINVAL;
+
+	tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
+	tcm->tcm_info = p->q->handle;
+
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
+	NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]);
+	NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]);
+
+	return nla_nest_end(skb, opts);
+
+nla_put_failure:
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
+}
+
+static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct dsmark_qdisc_data *p = qdisc_priv(sch);
+	struct nlattr *opts = NULL;
+
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
+	NLA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices);
+
+	if (p->default_index != NO_DEFAULT_INDEX)
+		NLA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index);
+
+	if (p->set_tc_index)
+		NLA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX);
+
+	return nla_nest_end(skb, opts);
+
+nla_put_failure:
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
+}
+
+static const struct Qdisc_class_ops dsmark_class_ops = {
+	.graft		=	dsmark_graft,
+	.leaf		=	dsmark_leaf,
+	.get		=	dsmark_get,
+	.put		=	dsmark_put,
+	.change		=	dsmark_change,
+	.delete		=	dsmark_delete,
+	.walk		=	dsmark_walk,
+	.tcf_chain	=	dsmark_find_tcf,
+	.bind_tcf	=	dsmark_bind_filter,
+	.unbind_tcf	=	dsmark_put,
+	.dump		=	dsmark_dump_class,
+};
+
+static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
+	.next		=	NULL,
+	.cl_ops		=	&dsmark_class_ops,
+	.id		=	"dsmark",
+	.priv_size	=	sizeof(struct dsmark_qdisc_data),
+	.enqueue	=	dsmark_enqueue,
+	.dequeue	=	dsmark_dequeue,
+	.requeue	=	dsmark_requeue,
+	.drop		=	dsmark_drop,
+	.init		=	dsmark_init,
+	.reset		=	dsmark_reset,
+	.destroy	=	dsmark_destroy,
+	.change		=	NULL,
+	.dump		=	dsmark_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init dsmark_module_init(void)
+{
+	return register_qdisc(&dsmark_qdisc_ops);
+}
+
+static void __exit dsmark_module_exit(void)
+{
+	unregister_qdisc(&dsmark_qdisc_ops);
+}
+
+module_init(dsmark_module_init)
+module_exit(dsmark_module_exit)
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
new file mode 100644
index 0000000..23d258b
--- /dev/null
+++ b/net/sched/sch_fifo.c
@@ -0,0 +1,152 @@
+/*
+ * net/sched/sch_fifo.c	The simplest FIFO queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+
+/* 1 band FIFO pseudo-"scheduler" */
+
+struct fifo_sched_data
+{
+	u32 limit;
+};
+
+static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+
+	if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit))
+		return qdisc_enqueue_tail(skb, sch);
+
+	return qdisc_reshape_fail(skb, sch);
+}
+
+static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+
+	if (likely(skb_queue_len(&sch->q) < q->limit))
+		return qdisc_enqueue_tail(skb, sch);
+
+	return qdisc_reshape_fail(skb, sch);
+}
+
+static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+
+	if (opt == NULL) {
+		u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
+
+		if (sch->ops == &bfifo_qdisc_ops)
+			limit *= qdisc_dev(sch)->mtu;
+
+		q->limit = limit;
+	} else {
+		struct tc_fifo_qopt *ctl = nla_data(opt);
+
+		if (nla_len(opt) < sizeof(*ctl))
+			return -EINVAL;
+
+		q->limit = ctl->limit;
+	}
+
+	return 0;
+}
+
+static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+	struct tc_fifo_qopt opt = { .limit = q->limit };
+
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	return skb->len;
+
+nla_put_failure:
+	return -1;
+}
+
+struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
+	.id		=	"pfifo",
+	.priv_size	=	sizeof(struct fifo_sched_data),
+	.enqueue	=	pfifo_enqueue,
+	.dequeue	=	qdisc_dequeue_head,
+	.requeue	=	qdisc_requeue,
+	.drop		=	qdisc_queue_drop,
+	.init		=	fifo_init,
+	.reset		=	qdisc_reset_queue,
+	.change		=	fifo_init,
+	.dump		=	fifo_dump,
+	.owner		=	THIS_MODULE,
+};
+EXPORT_SYMBOL(pfifo_qdisc_ops);
+
+struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
+	.id		=	"bfifo",
+	.priv_size	=	sizeof(struct fifo_sched_data),
+	.enqueue	=	bfifo_enqueue,
+	.dequeue	=	qdisc_dequeue_head,
+	.requeue	=	qdisc_requeue,
+	.drop		=	qdisc_queue_drop,
+	.init		=	fifo_init,
+	.reset		=	qdisc_reset_queue,
+	.change		=	fifo_init,
+	.dump		=	fifo_dump,
+	.owner		=	THIS_MODULE,
+};
+EXPORT_SYMBOL(bfifo_qdisc_ops);
+
+/* Pass size change message down to embedded FIFO */
+int fifo_set_limit(struct Qdisc *q, unsigned int limit)
+{
+	struct nlattr *nla;
+	int ret = -ENOMEM;
+
+	/* Hack to avoid sending change message to non-FIFO */
+	if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
+		return 0;
+
+	nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
+	if (nla) {
+		nla->nla_type = RTM_NEWQDISC;
+		nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
+		((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
+
+		ret = q->ops->change(q, nla);
+		kfree(nla);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(fifo_set_limit);
+
+struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
+			       unsigned int limit)
+{
+	struct Qdisc *q;
+	int err = -ENOMEM;
+
+	q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+			      ops, TC_H_MAKE(sch->handle, 1));
+	if (q) {
+		err = fifo_set_limit(q, limit);
+		if (err < 0) {
+			qdisc_destroy(q);
+			q = NULL;
+		}
+	}
+
+	return q ? : ERR_PTR(err);
+}
+EXPORT_SYMBOL(fifo_create_dflt);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
new file mode 100644
index 0000000..cdcd16f
--- /dev/null
+++ b/net/sched/sch_generic.c
@@ -0,0 +1,741 @@
+/*
+ * net/sched/sch_generic.c	Generic packet scheduler routines.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
+ *              - Ingress support
+ */
+
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+#include <net/pkt_sched.h>
+
+/* Main transmission queue. */
+
+/* Modifications to data participating in scheduling must be protected with
+ * qdisc_lock(qdisc) spinlock.
+ *
+ * The idea is the following:
+ * - enqueue, dequeue are serialized via qdisc root lock
+ * - ingress filtering is also serialized via qdisc root lock
+ * - updates to tree and tree walking are only done under the rtnl mutex.
+ */
+
+static inline int qdisc_qlen(struct Qdisc *q)
+{
+	return q->q.qlen;
+}
+
+static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
+{
+	q->gso_skb = skb;
+	q->qstats.requeues++;
+	__netif_schedule(q);
+
+	return 0;
+}
+
+static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
+{
+	struct sk_buff *skb = q->gso_skb;
+
+	if (unlikely(skb)) {
+		struct net_device *dev = qdisc_dev(q);
+		struct netdev_queue *txq;
+
+		/* check the reason of requeuing without tx lock first */
+		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
+			q->gso_skb = NULL;
+		else
+			skb = NULL;
+	} else {
+		skb = q->dequeue(q);
+	}
+
+	return skb;
+}
+
+static inline int handle_dev_cpu_collision(struct sk_buff *skb,
+					   struct netdev_queue *dev_queue,
+					   struct Qdisc *q)
+{
+	int ret;
+
+	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
+		/*
+		 * Same CPU holding the lock. It may be a transient
+		 * configuration error, when hard_start_xmit() recurses. We
+		 * detect it by checking xmit owner and drop the packet when
+		 * deadloop is detected. Return OK to try the next skb.
+		 */
+		kfree_skb(skb);
+		if (net_ratelimit())
+			printk(KERN_WARNING "Dead loop on netdevice %s, "
+			       "fix it urgently!\n", dev_queue->dev->name);
+		ret = qdisc_qlen(q);
+	} else {
+		/*
+		 * Another cpu is holding lock, requeue & delay xmits for
+		 * some time.
+		 */
+		__get_cpu_var(netdev_rx_stat).cpu_collision++;
+		ret = dev_requeue_skb(skb, q);
+	}
+
+	return ret;
+}
+
+/*
+ * NOTE: Called under qdisc_lock(q) with locally disabled BH.
+ *
+ * __QDISC_STATE_RUNNING guarantees only one CPU can process
+ * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
+ * this queue.
+ *
+ *  netif_tx_lock serializes accesses to device driver.
+ *
+ *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
+ *  if one is grabbed, another must be free.
+ *
+ * Note, that this procedure can be called by a watchdog timer
+ *
+ * Returns to the caller:
+ *				0  - queue is empty or throttled.
+ *				>0 - queue is not empty.
+ *
+ */
+static inline int qdisc_restart(struct Qdisc *q)
+{
+	struct netdev_queue *txq;
+	int ret = NETDEV_TX_BUSY;
+	struct net_device *dev;
+	spinlock_t *root_lock;
+	struct sk_buff *skb;
+
+	/* Dequeue packet */
+	if (unlikely((skb = dequeue_skb(q)) == NULL))
+		return 0;
+
+	root_lock = qdisc_lock(q);
+
+	/* And release qdisc */
+	spin_unlock(root_lock);
+
+	dev = qdisc_dev(q);
+	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
+	HARD_TX_LOCK(dev, txq, smp_processor_id());
+	if (!netif_tx_queue_stopped(txq) &&
+	    !netif_tx_queue_frozen(txq))
+		ret = dev_hard_start_xmit(skb, dev, txq);
+	HARD_TX_UNLOCK(dev, txq);
+
+	spin_lock(root_lock);
+
+	switch (ret) {
+	case NETDEV_TX_OK:
+		/* Driver sent out skb successfully */
+		ret = qdisc_qlen(q);
+		break;
+
+	case NETDEV_TX_LOCKED:
+		/* Driver try lock failed */
+		ret = handle_dev_cpu_collision(skb, txq, q);
+		break;
+
+	default:
+		/* Driver returned NETDEV_TX_BUSY - requeue skb */
+		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
+			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
+			       dev->name, ret, q->q.qlen);
+
+		ret = dev_requeue_skb(skb, q);
+		break;
+	}
+
+	if (ret && (netif_tx_queue_stopped(txq) ||
+		    netif_tx_queue_frozen(txq)))
+		ret = 0;
+
+	return ret;
+}
+
+void __qdisc_run(struct Qdisc *q)
+{
+	unsigned long start_time = jiffies;
+
+	while (qdisc_restart(q)) {
+		/*
+		 * Postpone processing if
+		 * 1. another process needs the CPU;
+		 * 2. we've been doing it for too long.
+		 */
+		if (need_resched() || jiffies != start_time) {
+			__netif_schedule(q);
+			break;
+		}
+	}
+
+	clear_bit(__QDISC_STATE_RUNNING, &q->state);
+}
+
+static void dev_watchdog(unsigned long arg)
+{
+	struct net_device *dev = (struct net_device *)arg;
+
+	netif_tx_lock(dev);
+	if (!qdisc_tx_is_noop(dev)) {
+		if (netif_device_present(dev) &&
+		    netif_running(dev) &&
+		    netif_carrier_ok(dev)) {
+			int some_queue_stopped = 0;
+			unsigned int i;
+
+			for (i = 0; i < dev->num_tx_queues; i++) {
+				struct netdev_queue *txq;
+
+				txq = netdev_get_tx_queue(dev, i);
+				if (netif_tx_queue_stopped(txq)) {
+					some_queue_stopped = 1;
+					break;
+				}
+			}
+
+			if (some_queue_stopped &&
+			    time_after(jiffies, (dev->trans_start +
+						 dev->watchdog_timeo))) {
+				char drivername[64];
+				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
+				       dev->name, netdev_drivername(dev, drivername, 64));
+				dev->tx_timeout(dev);
+			}
+			if (!mod_timer(&dev->watchdog_timer,
+				       round_jiffies(jiffies +
+						     dev->watchdog_timeo)))
+				dev_hold(dev);
+		}
+	}
+	netif_tx_unlock(dev);
+
+	dev_put(dev);
+}
+
+void __netdev_watchdog_up(struct net_device *dev)
+{
+	if (dev->tx_timeout) {
+		if (dev->watchdog_timeo <= 0)
+			dev->watchdog_timeo = 5*HZ;
+		if (!mod_timer(&dev->watchdog_timer,
+			       round_jiffies(jiffies + dev->watchdog_timeo)))
+			dev_hold(dev);
+	}
+}
+
+static void dev_watchdog_up(struct net_device *dev)
+{
+	__netdev_watchdog_up(dev);
+}
+
+static void dev_watchdog_down(struct net_device *dev)
+{
+	netif_tx_lock_bh(dev);
+	if (del_timer(&dev->watchdog_timer))
+		dev_put(dev);
+	netif_tx_unlock_bh(dev);
+}
+
+/**
+ *	netif_carrier_on - set carrier
+ *	@dev: network device
+ *
+ * Device has detected that carrier.
+ */
+void netif_carrier_on(struct net_device *dev)
+{
+	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
+		if (dev->reg_state == NETREG_UNINITIALIZED)
+			return;
+		linkwatch_fire_event(dev);
+		if (netif_running(dev))
+			__netdev_watchdog_up(dev);
+	}
+}
+EXPORT_SYMBOL(netif_carrier_on);
+
+/**
+ *	netif_carrier_off - clear carrier
+ *	@dev: network device
+ *
+ * Device has detected loss of carrier.
+ */
+void netif_carrier_off(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
+		if (dev->reg_state == NETREG_UNINITIALIZED)
+			return;
+		linkwatch_fire_event(dev);
+	}
+}
+EXPORT_SYMBOL(netif_carrier_off);
+
+/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
+   under all circumstances. It is difficult to invent anything faster or
+   cheaper.
+ */
+
+static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
+{
+	kfree_skb(skb);
+	return NET_XMIT_CN;
+}
+
+static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
+{
+	return NULL;
+}
+
+static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+	if (net_ratelimit())
+		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
+		       skb->dev->name);
+	kfree_skb(skb);
+	return NET_XMIT_CN;
+}
+
+struct Qdisc_ops noop_qdisc_ops __read_mostly = {
+	.id		=	"noop",
+	.priv_size	=	0,
+	.enqueue	=	noop_enqueue,
+	.dequeue	=	noop_dequeue,
+	.requeue	=	noop_requeue,
+	.owner		=	THIS_MODULE,
+};
+
+static struct netdev_queue noop_netdev_queue = {
+	.qdisc		=	&noop_qdisc,
+	.qdisc_sleeping	=	&noop_qdisc,
+};
+
+struct Qdisc noop_qdisc = {
+	.enqueue	=	noop_enqueue,
+	.dequeue	=	noop_dequeue,
+	.flags		=	TCQ_F_BUILTIN,
+	.ops		=	&noop_qdisc_ops,
+	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
+	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
+	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
+	.dev_queue	=	&noop_netdev_queue,
+};
+EXPORT_SYMBOL(noop_qdisc);
+
+static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
+	.id		=	"noqueue",
+	.priv_size	=	0,
+	.enqueue	=	noop_enqueue,
+	.dequeue	=	noop_dequeue,
+	.requeue	=	noop_requeue,
+	.owner		=	THIS_MODULE,
+};
+
+static struct Qdisc noqueue_qdisc;
+static struct netdev_queue noqueue_netdev_queue = {
+	.qdisc		=	&noqueue_qdisc,
+	.qdisc_sleeping	=	&noqueue_qdisc,
+};
+
+static struct Qdisc noqueue_qdisc = {
+	.enqueue	=	NULL,
+	.dequeue	=	noop_dequeue,
+	.flags		=	TCQ_F_BUILTIN,
+	.ops		=	&noqueue_qdisc_ops,
+	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
+	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
+	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
+	.dev_queue	=	&noqueue_netdev_queue,
+};
+
+
+static const u8 prio2band[TC_PRIO_MAX+1] =
+	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
+
+/* 3-band FIFO queue: old style, but should be a bit faster than
+   generic prio+fifo combination.
+ */
+
+#define PFIFO_FAST_BANDS 3
+
+static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
+					     struct Qdisc *qdisc)
+{
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+	return list + prio2band[skb->priority & TC_PRIO_MAX];
+}
+
+static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+	struct sk_buff_head *list = prio2list(skb, qdisc);
+
+	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
+		qdisc->q.qlen++;
+		return __qdisc_enqueue_tail(skb, qdisc, list);
+	}
+
+	return qdisc_drop(skb, qdisc);
+}
+
+static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
+{
+	int prio;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
+		if (!skb_queue_empty(list + prio)) {
+			qdisc->q.qlen--;
+			return __qdisc_dequeue_head(qdisc, list + prio);
+		}
+	}
+
+	return NULL;
+}
+
+static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
+{
+	qdisc->q.qlen++;
+	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
+}
+
+static void pfifo_fast_reset(struct Qdisc* qdisc)
+{
+	int prio;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
+		__qdisc_reset_queue(qdisc, list + prio);
+
+	qdisc->qstats.backlog = 0;
+	qdisc->q.qlen = 0;
+}
+
+static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
+{
+	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
+
+	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	return skb->len;
+
+nla_put_failure:
+	return -1;
+}
+
+static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
+{
+	int prio;
+	struct sk_buff_head *list = qdisc_priv(qdisc);
+
+	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
+		skb_queue_head_init(list + prio);
+
+	return 0;
+}
+
+static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
+	.id		=	"pfifo_fast",
+	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
+	.enqueue	=	pfifo_fast_enqueue,
+	.dequeue	=	pfifo_fast_dequeue,
+	.requeue	=	pfifo_fast_requeue,
+	.init		=	pfifo_fast_init,
+	.reset		=	pfifo_fast_reset,
+	.dump		=	pfifo_fast_dump,
+	.owner		=	THIS_MODULE,
+};
+
+struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
+			  struct Qdisc_ops *ops)
+{
+	void *p;
+	struct Qdisc *sch;
+	unsigned int size;
+	int err = -ENOBUFS;
+
+	/* ensure that the Qdisc and the private data are 32-byte aligned */
+	size = QDISC_ALIGN(sizeof(*sch));
+	size += ops->priv_size + (QDISC_ALIGNTO - 1);
+
+	p = kzalloc(size, GFP_KERNEL);
+	if (!p)
+		goto errout;
+	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
+	sch->padded = (char *) sch - (char *) p;
+
+	INIT_LIST_HEAD(&sch->list);
+	skb_queue_head_init(&sch->requeue);
+	skb_queue_head_init(&sch->q);
+	sch->ops = ops;
+	sch->enqueue = ops->enqueue;
+	sch->dequeue = ops->dequeue;
+	sch->dev_queue = dev_queue;
+	dev_hold(qdisc_dev(sch));
+	atomic_set(&sch->refcnt, 1);
+
+	return sch;
+errout:
+	return ERR_PTR(err);
+}
+
+struct Qdisc * qdisc_create_dflt(struct net_device *dev,
+				 struct netdev_queue *dev_queue,
+				 struct Qdisc_ops *ops,
+				 unsigned int parentid)
+{
+	struct Qdisc *sch;
+
+	sch = qdisc_alloc(dev_queue, ops);
+	if (IS_ERR(sch))
+		goto errout;
+	sch->parent = parentid;
+
+	if (!ops->init || ops->init(sch, NULL) == 0)
+		return sch;
+
+	qdisc_destroy(sch);
+errout:
+	return NULL;
+}
+EXPORT_SYMBOL(qdisc_create_dflt);
+
+/* Under qdisc_lock(qdisc) and BH! */
+
+void qdisc_reset(struct Qdisc *qdisc)
+{
+	const struct Qdisc_ops *ops = qdisc->ops;
+
+	if (ops->reset)
+		ops->reset(qdisc);
+}
+EXPORT_SYMBOL(qdisc_reset);
+
+void qdisc_destroy(struct Qdisc *qdisc)
+{
+	const struct Qdisc_ops  *ops = qdisc->ops;
+
+	if (qdisc->flags & TCQ_F_BUILTIN ||
+	    !atomic_dec_and_test(&qdisc->refcnt))
+		return;
+
+#ifdef CONFIG_NET_SCHED
+	qdisc_list_del(qdisc);
+
+	qdisc_put_stab(qdisc->stab);
+#endif
+	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
+	if (ops->reset)
+		ops->reset(qdisc);
+	if (ops->destroy)
+		ops->destroy(qdisc);
+
+	module_put(ops->owner);
+	dev_put(qdisc_dev(qdisc));
+
+	kfree_skb(qdisc->gso_skb);
+	__skb_queue_purge(&qdisc->requeue);
+
+	kfree((char *) qdisc - qdisc->padded);
+}
+EXPORT_SYMBOL(qdisc_destroy);
+
+static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+		if (txq->qdisc_sleeping != &noop_qdisc)
+			return false;
+	}
+	return true;
+}
+
+static void attach_one_default_qdisc(struct net_device *dev,
+				     struct netdev_queue *dev_queue,
+				     void *_unused)
+{
+	struct Qdisc *qdisc;
+
+	if (dev->tx_queue_len) {
+		qdisc = qdisc_create_dflt(dev, dev_queue,
+					  &pfifo_fast_ops, TC_H_ROOT);
+		if (!qdisc) {
+			printk(KERN_INFO "%s: activation failed\n", dev->name);
+			return;
+		}
+	} else {
+		qdisc =  &noqueue_qdisc;
+	}
+	dev_queue->qdisc_sleeping = qdisc;
+}
+
+static void transition_one_qdisc(struct net_device *dev,
+				 struct netdev_queue *dev_queue,
+				 void *_need_watchdog)
+{
+	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
+	int *need_watchdog_p = _need_watchdog;
+
+	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
+		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
+
+	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
+	if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
+		*need_watchdog_p = 1;
+}
+
+void dev_activate(struct net_device *dev)
+{
+	int need_watchdog;
+
+	/* No queueing discipline is attached to device;
+	   create default one i.e. pfifo_fast for devices,
+	   which need queueing and noqueue_qdisc for
+	   virtual interfaces
+	 */
+
+	if (dev_all_qdisc_sleeping_noop(dev))
+		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
+
+	if (!netif_carrier_ok(dev))
+		/* Delay activation until next carrier-on event */
+		return;
+
+	need_watchdog = 0;
+	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
+	transition_one_qdisc(dev, &dev->rx_queue, NULL);
+
+	if (need_watchdog) {
+		dev->trans_start = jiffies;
+		dev_watchdog_up(dev);
+	}
+}
+
+static void dev_deactivate_queue(struct net_device *dev,
+				 struct netdev_queue *dev_queue,
+				 void *_qdisc_default)
+{
+	struct Qdisc *qdisc_default = _qdisc_default;
+	struct Qdisc *qdisc;
+
+	qdisc = dev_queue->qdisc;
+	if (qdisc) {
+		spin_lock_bh(qdisc_lock(qdisc));
+
+		if (!(qdisc->flags & TCQ_F_BUILTIN))
+			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
+
+		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
+		qdisc_reset(qdisc);
+
+		spin_unlock_bh(qdisc_lock(qdisc));
+	}
+}
+
+static bool some_qdisc_is_busy(struct net_device *dev)
+{
+	unsigned int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct netdev_queue *dev_queue;
+		spinlock_t *root_lock;
+		struct Qdisc *q;
+		int val;
+
+		dev_queue = netdev_get_tx_queue(dev, i);
+		q = dev_queue->qdisc_sleeping;
+		root_lock = qdisc_lock(q);
+
+		spin_lock_bh(root_lock);
+
+		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
+		       test_bit(__QDISC_STATE_SCHED, &q->state));
+
+		spin_unlock_bh(root_lock);
+
+		if (val)
+			return true;
+	}
+	return false;
+}
+
+void dev_deactivate(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
+	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
+
+	dev_watchdog_down(dev);
+
+	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
+	synchronize_rcu();
+
+	/* Wait for outstanding qdisc_run calls. */
+	while (some_qdisc_is_busy(dev))
+		yield();
+}
+
+static void dev_init_scheduler_queue(struct net_device *dev,
+				     struct netdev_queue *dev_queue,
+				     void *_qdisc)
+{
+	struct Qdisc *qdisc = _qdisc;
+
+	dev_queue->qdisc = qdisc;
+	dev_queue->qdisc_sleeping = qdisc;
+}
+
+void dev_init_scheduler(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
+	dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
+
+	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
+}
+
+static void shutdown_scheduler_queue(struct net_device *dev,
+				     struct netdev_queue *dev_queue,
+				     void *_qdisc_default)
+{
+	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+	struct Qdisc *qdisc_default = _qdisc_default;
+
+	if (qdisc) {
+		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
+		dev_queue->qdisc_sleeping = qdisc_default;
+
+		qdisc_destroy(qdisc);
+	}
+}
+
+void dev_shutdown(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
+	shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
+	WARN_ON(timer_pending(&dev->watchdog_timer));
+}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
new file mode 100644
index 0000000..c1ad6b8
--- /dev/null
+++ b/net/sched/sch_gred.c
@@ -0,0 +1,628 @@
+/*
+ * net/sched/sch_gred.c	Generic Random Early Detection queue.
+ *
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Authors:    J Hadi Salim (hadi@cyberus.ca) 1998-2002
+ *
+ *             991129: -  Bug fix with grio mode
+ *		       - a better sing. AvgQ mode with Grio(WRED)
+ *		       - A finer grained VQ dequeue based on sugestion
+ *		         from Ren Liu
+ *		       - More error checks
+ *
+ *  For all the glorious comments look at include/net/red.h
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+#include <net/red.h>
+
+#define GRED_DEF_PRIO (MAX_DPs / 2)
+#define GRED_VQ_MASK (MAX_DPs - 1)
+
+struct gred_sched_data;
+struct gred_sched;
+
+struct gred_sched_data
+{
+	u32		limit;		/* HARD maximal queue length	*/
+	u32      	DP;		/* the drop pramaters */
+	u32		bytesin;	/* bytes seen on virtualQ so far*/
+	u32		packetsin;	/* packets seen on virtualQ so far*/
+	u32		backlog;	/* bytes on the virtualQ */
+	u8		prio;		/* the prio of this vq */
+
+	struct red_parms parms;
+	struct red_stats stats;
+};
+
+enum {
+	GRED_WRED_MODE = 1,
+	GRED_RIO_MODE,
+};
+
+struct gred_sched
+{
+	struct gred_sched_data *tab[MAX_DPs];
+	unsigned long	flags;
+	u32		red_flags;
+	u32 		DPs;
+	u32 		def;
+	struct red_parms wred_set;
+};
+
+static inline int gred_wred_mode(struct gred_sched *table)
+{
+	return test_bit(GRED_WRED_MODE, &table->flags);
+}
+
+static inline void gred_enable_wred_mode(struct gred_sched *table)
+{
+	__set_bit(GRED_WRED_MODE, &table->flags);
+}
+
+static inline void gred_disable_wred_mode(struct gred_sched *table)
+{
+	__clear_bit(GRED_WRED_MODE, &table->flags);
+}
+
+static inline int gred_rio_mode(struct gred_sched *table)
+{
+	return test_bit(GRED_RIO_MODE, &table->flags);
+}
+
+static inline void gred_enable_rio_mode(struct gred_sched *table)
+{
+	__set_bit(GRED_RIO_MODE, &table->flags);
+}
+
+static inline void gred_disable_rio_mode(struct gred_sched *table)
+{
+	__clear_bit(GRED_RIO_MODE, &table->flags);
+}
+
+static inline int gred_wred_mode_check(struct Qdisc *sch)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	int i;
+
+	/* Really ugly O(n^2) but shouldn't be necessary too frequent. */
+	for (i = 0; i < table->DPs; i++) {
+		struct gred_sched_data *q = table->tab[i];
+		int n;
+
+		if (q == NULL)
+			continue;
+
+		for (n = 0; n < table->DPs; n++)
+			if (table->tab[n] && table->tab[n] != q &&
+			    table->tab[n]->prio == q->prio)
+				return 1;
+	}
+
+	return 0;
+}
+
+static inline unsigned int gred_backlog(struct gred_sched *table,
+					struct gred_sched_data *q,
+					struct Qdisc *sch)
+{
+	if (gred_wred_mode(table))
+		return sch->qstats.backlog;
+	else
+		return q->backlog;
+}
+
+static inline u16 tc_index_to_dp(struct sk_buff *skb)
+{
+	return skb->tc_index & GRED_VQ_MASK;
+}
+
+static inline void gred_load_wred_set(struct gred_sched *table,
+				      struct gred_sched_data *q)
+{
+	q->parms.qavg = table->wred_set.qavg;
+	q->parms.qidlestart = table->wred_set.qidlestart;
+}
+
+static inline void gred_store_wred_set(struct gred_sched *table,
+				       struct gred_sched_data *q)
+{
+	table->wred_set.qavg = q->parms.qavg;
+}
+
+static inline int gred_use_ecn(struct gred_sched *t)
+{
+	return t->red_flags & TC_RED_ECN;
+}
+
+static inline int gred_use_harddrop(struct gred_sched *t)
+{
+	return t->red_flags & TC_RED_HARDDROP;
+}
+
+static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct gred_sched_data *q=NULL;
+	struct gred_sched *t= qdisc_priv(sch);
+	unsigned long qavg = 0;
+	u16 dp = tc_index_to_dp(skb);
+
+	if (dp >= t->DPs  || (q = t->tab[dp]) == NULL) {
+		dp = t->def;
+
+		if ((q = t->tab[dp]) == NULL) {
+			/* Pass through packets not assigned to a DP
+			 * if no default DP has been configured. This
+			 * allows for DP flows to be left untouched.
+			 */
+			if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len)
+				return qdisc_enqueue_tail(skb, sch);
+			else
+				goto drop;
+		}
+
+		/* fix tc_index? --could be controvesial but needed for
+		   requeueing */
+		skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp;
+	}
+
+	/* sum up all the qaves of prios <= to ours to get the new qave */
+	if (!gred_wred_mode(t) && gred_rio_mode(t)) {
+		int i;
+
+		for (i = 0; i < t->DPs; i++) {
+			if (t->tab[i] && t->tab[i]->prio < q->prio &&
+			    !red_is_idling(&t->tab[i]->parms))
+				qavg +=t->tab[i]->parms.qavg;
+		}
+
+	}
+
+	q->packetsin++;
+	q->bytesin += qdisc_pkt_len(skb);
+
+	if (gred_wred_mode(t))
+		gred_load_wred_set(t, q);
+
+	q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch));
+
+	if (red_is_idling(&q->parms))
+		red_end_of_idle_period(&q->parms);
+
+	if (gred_wred_mode(t))
+		gred_store_wred_set(t, q);
+
+	switch (red_action(&q->parms, q->parms.qavg + qavg)) {
+		case RED_DONT_MARK:
+			break;
+
+		case RED_PROB_MARK:
+			sch->qstats.overlimits++;
+			if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
+				q->stats.prob_drop++;
+				goto congestion_drop;
+			}
+
+			q->stats.prob_mark++;
+			break;
+
+		case RED_HARD_MARK:
+			sch->qstats.overlimits++;
+			if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
+			    !INET_ECN_set_ce(skb)) {
+				q->stats.forced_drop++;
+				goto congestion_drop;
+			}
+			q->stats.forced_mark++;
+			break;
+	}
+
+	if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
+		q->backlog += qdisc_pkt_len(skb);
+		return qdisc_enqueue_tail(skb, sch);
+	}
+
+	q->stats.pdrop++;
+drop:
+	return qdisc_drop(skb, sch);
+
+congestion_drop:
+	qdisc_drop(skb, sch);
+	return NET_XMIT_CN;
+}
+
+static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct gred_sched *t = qdisc_priv(sch);
+	struct gred_sched_data *q;
+	u16 dp = tc_index_to_dp(skb);
+
+	if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
+		if (net_ratelimit())
+			printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x "
+			       "for requeue, screwing up backlog.\n",
+			       tc_index_to_dp(skb));
+	} else {
+		if (red_is_idling(&q->parms))
+			red_end_of_idle_period(&q->parms);
+		q->backlog += qdisc_pkt_len(skb);
+	}
+
+	return qdisc_requeue(skb, sch);
+}
+
+static struct sk_buff *gred_dequeue(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+	struct gred_sched *t = qdisc_priv(sch);
+
+	skb = qdisc_dequeue_head(sch);
+
+	if (skb) {
+		struct gred_sched_data *q;
+		u16 dp = tc_index_to_dp(skb);
+
+		if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
+			if (net_ratelimit())
+				printk(KERN_WARNING "GRED: Unable to relocate "
+				       "VQ 0x%x after dequeue, screwing up "
+				       "backlog.\n", tc_index_to_dp(skb));
+		} else {
+			q->backlog -= qdisc_pkt_len(skb);
+
+			if (!q->backlog && !gred_wred_mode(t))
+				red_start_of_idle_period(&q->parms);
+		}
+
+		return skb;
+	}
+
+	if (gred_wred_mode(t) && !red_is_idling(&t->wred_set))
+		red_start_of_idle_period(&t->wred_set);
+
+	return NULL;
+}
+
+static unsigned int gred_drop(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+	struct gred_sched *t = qdisc_priv(sch);
+
+	skb = qdisc_dequeue_tail(sch);
+	if (skb) {
+		unsigned int len = qdisc_pkt_len(skb);
+		struct gred_sched_data *q;
+		u16 dp = tc_index_to_dp(skb);
+
+		if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
+			if (net_ratelimit())
+				printk(KERN_WARNING "GRED: Unable to relocate "
+				       "VQ 0x%x while dropping, screwing up "
+				       "backlog.\n", tc_index_to_dp(skb));
+		} else {
+			q->backlog -= len;
+			q->stats.other++;
+
+			if (!q->backlog && !gred_wred_mode(t))
+				red_start_of_idle_period(&q->parms);
+		}
+
+		qdisc_drop(skb, sch);
+		return len;
+	}
+
+	if (gred_wred_mode(t) && !red_is_idling(&t->wred_set))
+		red_start_of_idle_period(&t->wred_set);
+
+	return 0;
+
+}
+
+static void gred_reset(struct Qdisc* sch)
+{
+	int i;
+	struct gred_sched *t = qdisc_priv(sch);
+
+	qdisc_reset_queue(sch);
+
+	for (i = 0; i < t->DPs; i++) {
+		struct gred_sched_data *q = t->tab[i];
+
+		if (!q)
+			continue;
+
+		red_restart(&q->parms);
+		q->backlog = 0;
+	}
+}
+
+static inline void gred_destroy_vq(struct gred_sched_data *q)
+{
+	kfree(q);
+}
+
+static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	struct tc_gred_sopt *sopt;
+	int i;
+
+	if (dps == NULL)
+		return -EINVAL;
+
+	sopt = nla_data(dps);
+
+	if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs)
+		return -EINVAL;
+
+	sch_tree_lock(sch);
+	table->DPs = sopt->DPs;
+	table->def = sopt->def_DP;
+	table->red_flags = sopt->flags;
+
+	/*
+	 * Every entry point to GRED is synchronized with the above code
+	 * and the DP is checked against DPs, i.e. shadowed VQs can no
+	 * longer be found so we can unlock right here.
+	 */
+	sch_tree_unlock(sch);
+
+	if (sopt->grio) {
+		gred_enable_rio_mode(table);
+		gred_disable_wred_mode(table);
+		if (gred_wred_mode_check(sch))
+			gred_enable_wred_mode(table);
+	} else {
+		gred_disable_rio_mode(table);
+		gred_disable_wred_mode(table);
+	}
+
+	for (i = table->DPs; i < MAX_DPs; i++) {
+		if (table->tab[i]) {
+			printk(KERN_WARNING "GRED: Warning: Destroying "
+			       "shadowed VQ 0x%x\n", i);
+			gred_destroy_vq(table->tab[i]);
+			table->tab[i] = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static inline int gred_change_vq(struct Qdisc *sch, int dp,
+				 struct tc_gred_qopt *ctl, int prio, u8 *stab)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	struct gred_sched_data *q;
+
+	if (table->tab[dp] == NULL) {
+		table->tab[dp] = kzalloc(sizeof(*q), GFP_KERNEL);
+		if (table->tab[dp] == NULL)
+			return -ENOMEM;
+	}
+
+	q = table->tab[dp];
+	q->DP = dp;
+	q->prio = prio;
+	q->limit = ctl->limit;
+
+	if (q->backlog == 0)
+		red_end_of_idle_period(&q->parms);
+
+	red_set_parms(&q->parms,
+		      ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog,
+		      ctl->Scell_log, stab);
+
+	return 0;
+}
+
+static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
+	[TCA_GRED_PARMS]	= { .len = sizeof(struct tc_gred_qopt) },
+	[TCA_GRED_STAB]		= { .len = 256 },
+	[TCA_GRED_DPS]		= { .len = sizeof(struct tc_gred_sopt) },
+};
+
+static int gred_change(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	struct tc_gred_qopt *ctl;
+	struct nlattr *tb[TCA_GRED_MAX + 1];
+	int err, prio = GRED_DEF_PRIO;
+	u8 *stab;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL)
+		return gred_change_table_def(sch, opt);
+
+	if (tb[TCA_GRED_PARMS] == NULL ||
+	    tb[TCA_GRED_STAB] == NULL)
+		return -EINVAL;
+
+	err = -EINVAL;
+	ctl = nla_data(tb[TCA_GRED_PARMS]);
+	stab = nla_data(tb[TCA_GRED_STAB]);
+
+	if (ctl->DP >= table->DPs)
+		goto errout;
+
+	if (gred_rio_mode(table)) {
+		if (ctl->prio == 0) {
+			int def_prio = GRED_DEF_PRIO;
+
+			if (table->tab[table->def])
+				def_prio = table->tab[table->def]->prio;
+
+			printk(KERN_DEBUG "GRED: DP %u does not have a prio "
+			       "setting default to %d\n", ctl->DP, def_prio);
+
+			prio = def_prio;
+		} else
+			prio = ctl->prio;
+	}
+
+	sch_tree_lock(sch);
+
+	err = gred_change_vq(sch, ctl->DP, ctl, prio, stab);
+	if (err < 0)
+		goto errout_locked;
+
+	if (gred_rio_mode(table)) {
+		gred_disable_wred_mode(table);
+		if (gred_wred_mode_check(sch))
+			gred_enable_wred_mode(table);
+	}
+
+	err = 0;
+
+errout_locked:
+	sch_tree_unlock(sch);
+errout:
+	return err;
+}
+
+static int gred_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct nlattr *tb[TCA_GRED_MAX + 1];
+	int err;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_GRED_MAX, opt, gred_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
+		return -EINVAL;
+
+	return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
+}
+
+static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	struct nlattr *parms, *opts = NULL;
+	int i;
+	struct tc_gred_sopt sopt = {
+		.DPs	= table->DPs,
+		.def_DP	= table->def,
+		.grio	= gred_rio_mode(table),
+		.flags	= table->red_flags,
+	};
+
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
+	NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
+	parms = nla_nest_start(skb, TCA_GRED_PARMS);
+	if (parms == NULL)
+		goto nla_put_failure;
+
+	for (i = 0; i < MAX_DPs; i++) {
+		struct gred_sched_data *q = table->tab[i];
+		struct tc_gred_qopt opt;
+
+		memset(&opt, 0, sizeof(opt));
+
+		if (!q) {
+			/* hack -- fix at some point with proper message
+			   This is how we indicate to tc that there is no VQ
+			   at this DP */
+
+			opt.DP = MAX_DPs + i;
+			goto append_opt;
+		}
+
+		opt.limit	= q->limit;
+		opt.DP		= q->DP;
+		opt.backlog	= q->backlog;
+		opt.prio	= q->prio;
+		opt.qth_min	= q->parms.qth_min >> q->parms.Wlog;
+		opt.qth_max	= q->parms.qth_max >> q->parms.Wlog;
+		opt.Wlog	= q->parms.Wlog;
+		opt.Plog	= q->parms.Plog;
+		opt.Scell_log	= q->parms.Scell_log;
+		opt.other	= q->stats.other;
+		opt.early	= q->stats.prob_drop;
+		opt.forced	= q->stats.forced_drop;
+		opt.pdrop	= q->stats.pdrop;
+		opt.packets	= q->packetsin;
+		opt.bytesin	= q->bytesin;
+
+		if (gred_wred_mode(table)) {
+			q->parms.qidlestart =
+				table->tab[table->def]->parms.qidlestart;
+			q->parms.qavg = table->tab[table->def]->parms.qavg;
+		}
+
+		opt.qave = red_calc_qavg(&q->parms, q->parms.qavg);
+
+append_opt:
+		if (nla_append(skb, sizeof(opt), &opt) < 0)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(skb, parms);
+
+	return nla_nest_end(skb, opts);
+
+nla_put_failure:
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
+}
+
+static void gred_destroy(struct Qdisc *sch)
+{
+	struct gred_sched *table = qdisc_priv(sch);
+	int i;
+
+	for (i = 0; i < table->DPs; i++) {
+		if (table->tab[i])
+			gred_destroy_vq(table->tab[i]);
+	}
+}
+
+static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
+	.id		=	"gred",
+	.priv_size	=	sizeof(struct gred_sched),
+	.enqueue	=	gred_enqueue,
+	.dequeue	=	gred_dequeue,
+	.requeue	=	gred_requeue,
+	.drop		=	gred_drop,
+	.init		=	gred_init,
+	.reset		=	gred_reset,
+	.destroy	=	gred_destroy,
+	.change		=	gred_change,
+	.dump		=	gred_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init gred_module_init(void)
+{
+	return register_qdisc(&gred_qdisc_ops);
+}
+
+static void __exit gred_module_exit(void)
+{
+	unregister_qdisc(&gred_qdisc_ops);
+}
+
+module_init(gred_module_init)
+module_exit(gred_module_exit)
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
new file mode 100644
index 0000000..c1e77da
--- /dev/null
+++ b/net/sched/sch_hfsc.c
@@ -0,0 +1,1759 @@
+/*
+ * Copyright (c) 2003 Patrick McHardy, <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * 2003-10-17 - Ported from altq
+ */
+/*
+ * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation is hereby granted (including for commercial or
+ * for-profit use), provided that both the copyright notice and this
+ * permission notice appear in all copies of the software, derivative
+ * works, or modified versions, and any portions thereof.
+ *
+ * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
+ * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
+ * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * Carnegie Mellon encourages (but does not require) users of this
+ * software to return any improvements or extensions that they make,
+ * and to grant Carnegie Mellon the rights to redistribute these
+ * changes without encumbrance.
+ */
+/*
+ * H-FSC is described in Proceedings of SIGCOMM'97,
+ * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
+ * Real-Time and Priority Service"
+ * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
+ *
+ * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
+ * when a class has an upperlimit, the fit-time is computed from the
+ * upperlimit service curve.  the link-sharing scheduler does not schedule
+ * a class whose fit-time exceeds the current time.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/compiler.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/init.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_sched.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <asm/div64.h>
+
+/*
+ * kernel internal service curve representation:
+ *   coordinates are given by 64 bit unsigned integers.
+ *   x-axis: unit is clock count.
+ *   y-axis: unit is byte.
+ *
+ *   The service curve parameters are converted to the internal
+ *   representation. The slope values are scaled to avoid overflow.
+ *   the inverse slope values as well as the y-projection of the 1st
+ *   segment are kept in order to to avoid 64-bit divide operations
+ *   that are expensive on 32-bit architectures.
+ */
+
+struct internal_sc
+{
+	u64	sm1;	/* scaled slope of the 1st segment */
+	u64	ism1;	/* scaled inverse-slope of the 1st segment */
+	u64	dx;	/* the x-projection of the 1st segment */
+	u64	dy;	/* the y-projection of the 1st segment */
+	u64	sm2;	/* scaled slope of the 2nd segment */
+	u64	ism2;	/* scaled inverse-slope of the 2nd segment */
+};
+
+/* runtime service curve */
+struct runtime_sc
+{
+	u64	x;	/* current starting position on x-axis */
+	u64	y;	/* current starting position on y-axis */
+	u64	sm1;	/* scaled slope of the 1st segment */
+	u64	ism1;	/* scaled inverse-slope of the 1st segment */
+	u64	dx;	/* the x-projection of the 1st segment */
+	u64	dy;	/* the y-projection of the 1st segment */
+	u64	sm2;	/* scaled slope of the 2nd segment */
+	u64	ism2;	/* scaled inverse-slope of the 2nd segment */
+};
+
+enum hfsc_class_flags
+{
+	HFSC_RSC = 0x1,
+	HFSC_FSC = 0x2,
+	HFSC_USC = 0x4
+};
+
+struct hfsc_class
+{
+	struct Qdisc_class_common cl_common;
+	unsigned int	refcnt;		/* usage count */
+
+	struct gnet_stats_basic bstats;
+	struct gnet_stats_queue qstats;
+	struct gnet_stats_rate_est rate_est;
+	unsigned int	level;		/* class level in hierarchy */
+	struct tcf_proto *filter_list;	/* filter list */
+	unsigned int	filter_cnt;	/* filter count */
+
+	struct hfsc_sched *sched;	/* scheduler data */
+	struct hfsc_class *cl_parent;	/* parent class */
+	struct list_head siblings;	/* sibling classes */
+	struct list_head children;	/* child classes */
+	struct Qdisc	*qdisc;		/* leaf qdisc */
+
+	struct rb_node el_node;		/* qdisc's eligible tree member */
+	struct rb_root vt_tree;		/* active children sorted by cl_vt */
+	struct rb_node vt_node;		/* parent's vt_tree member */
+	struct rb_root cf_tree;		/* active children sorted by cl_f */
+	struct rb_node cf_node;		/* parent's cf_heap member */
+	struct list_head dlist;		/* drop list member */
+
+	u64	cl_total;		/* total work in bytes */
+	u64	cl_cumul;		/* cumulative work in bytes done by
+					   real-time criteria */
+
+	u64 	cl_d;			/* deadline*/
+	u64 	cl_e;			/* eligible time */
+	u64	cl_vt;			/* virtual time */
+	u64	cl_f;			/* time when this class will fit for
+					   link-sharing, max(myf, cfmin) */
+	u64	cl_myf;			/* my fit-time (calculated from this
+					   class's own upperlimit curve) */
+	u64	cl_myfadj;		/* my fit-time adjustment (to cancel
+					   history dependence) */
+	u64	cl_cfmin;		/* earliest children's fit-time (used
+					   with cl_myf to obtain cl_f) */
+	u64	cl_cvtmin;		/* minimal virtual time among the
+					   children fit for link-sharing
+					   (monotonic within a period) */
+	u64	cl_vtadj;		/* intra-period cumulative vt
+					   adjustment */
+	u64	cl_vtoff;		/* inter-period cumulative vt offset */
+	u64	cl_cvtmax;		/* max child's vt in the last period */
+	u64	cl_cvtoff;		/* cumulative cvtmax of all periods */
+	u64	cl_pcvtoff;		/* parent's cvtoff at initialization
+					   time */
+
+	struct internal_sc cl_rsc;	/* internal real-time service curve */
+	struct internal_sc cl_fsc;	/* internal fair service curve */
+	struct internal_sc cl_usc;	/* internal upperlimit service curve */
+	struct runtime_sc cl_deadline;	/* deadline curve */
+	struct runtime_sc cl_eligible;	/* eligible curve */
+	struct runtime_sc cl_virtual;	/* virtual curve */
+	struct runtime_sc cl_ulimit;	/* upperlimit curve */
+
+	unsigned long	cl_flags;	/* which curves are valid */
+	unsigned long	cl_vtperiod;	/* vt period sequence number */
+	unsigned long	cl_parentperiod;/* parent's vt period sequence number*/
+	unsigned long	cl_nactive;	/* number of active children */
+};
+
+struct hfsc_sched
+{
+	u16	defcls;				/* default class id */
+	struct hfsc_class root;			/* root class */
+	struct Qdisc_class_hash clhash;		/* class hash */
+	struct rb_root eligible;		/* eligible tree */
+	struct list_head droplist;		/* active leaf class list (for
+						   dropping) */
+	struct sk_buff_head requeue;		/* requeued packet */
+	struct qdisc_watchdog watchdog;		/* watchdog timer */
+};
+
+#define	HT_INFINITY	0xffffffffffffffffULL	/* infinite time value */
+
+
+/*
+ * eligible tree holds backlogged classes being sorted by their eligible times.
+ * there is one eligible tree per hfsc instance.
+ */
+
+static void
+eltree_insert(struct hfsc_class *cl)
+{
+	struct rb_node **p = &cl->sched->eligible.rb_node;
+	struct rb_node *parent = NULL;
+	struct hfsc_class *cl1;
+
+	while (*p != NULL) {
+		parent = *p;
+		cl1 = rb_entry(parent, struct hfsc_class, el_node);
+		if (cl->cl_e >= cl1->cl_e)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->el_node, parent, p);
+	rb_insert_color(&cl->el_node, &cl->sched->eligible);
+}
+
+static inline void
+eltree_remove(struct hfsc_class *cl)
+{
+	rb_erase(&cl->el_node, &cl->sched->eligible);
+}
+
+static inline void
+eltree_update(struct hfsc_class *cl)
+{
+	eltree_remove(cl);
+	eltree_insert(cl);
+}
+
+/* find the class with the minimum deadline among the eligible classes */
+static inline struct hfsc_class *
+eltree_get_mindl(struct hfsc_sched *q, u64 cur_time)
+{
+	struct hfsc_class *p, *cl = NULL;
+	struct rb_node *n;
+
+	for (n = rb_first(&q->eligible); n != NULL; n = rb_next(n)) {
+		p = rb_entry(n, struct hfsc_class, el_node);
+		if (p->cl_e > cur_time)
+			break;
+		if (cl == NULL || p->cl_d < cl->cl_d)
+			cl = p;
+	}
+	return cl;
+}
+
+/* find the class with minimum eligible time among the eligible classes */
+static inline struct hfsc_class *
+eltree_get_minel(struct hfsc_sched *q)
+{
+	struct rb_node *n;
+
+	n = rb_first(&q->eligible);
+	if (n == NULL)
+		return NULL;
+	return rb_entry(n, struct hfsc_class, el_node);
+}
+
+/*
+ * vttree holds holds backlogged child classes being sorted by their virtual
+ * time. each intermediate class has one vttree.
+ */
+static void
+vttree_insert(struct hfsc_class *cl)
+{
+	struct rb_node **p = &cl->cl_parent->vt_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct hfsc_class *cl1;
+
+	while (*p != NULL) {
+		parent = *p;
+		cl1 = rb_entry(parent, struct hfsc_class, vt_node);
+		if (cl->cl_vt >= cl1->cl_vt)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->vt_node, parent, p);
+	rb_insert_color(&cl->vt_node, &cl->cl_parent->vt_tree);
+}
+
+static inline void
+vttree_remove(struct hfsc_class *cl)
+{
+	rb_erase(&cl->vt_node, &cl->cl_parent->vt_tree);
+}
+
+static inline void
+vttree_update(struct hfsc_class *cl)
+{
+	vttree_remove(cl);
+	vttree_insert(cl);
+}
+
+static inline struct hfsc_class *
+vttree_firstfit(struct hfsc_class *cl, u64 cur_time)
+{
+	struct hfsc_class *p;
+	struct rb_node *n;
+
+	for (n = rb_first(&cl->vt_tree); n != NULL; n = rb_next(n)) {
+		p = rb_entry(n, struct hfsc_class, vt_node);
+		if (p->cl_f <= cur_time)
+			return p;
+	}
+	return NULL;
+}
+
+/*
+ * get the leaf class with the minimum vt in the hierarchy
+ */
+static struct hfsc_class *
+vttree_get_minvt(struct hfsc_class *cl, u64 cur_time)
+{
+	/* if root-class's cfmin is bigger than cur_time nothing to do */
+	if (cl->cl_cfmin > cur_time)
+		return NULL;
+
+	while (cl->level > 0) {
+		cl = vttree_firstfit(cl, cur_time);
+		if (cl == NULL)
+			return NULL;
+		/*
+		 * update parent's cl_cvtmin.
+		 */
+		if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
+			cl->cl_parent->cl_cvtmin = cl->cl_vt;
+	}
+	return cl;
+}
+
+static void
+cftree_insert(struct hfsc_class *cl)
+{
+	struct rb_node **p = &cl->cl_parent->cf_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct hfsc_class *cl1;
+
+	while (*p != NULL) {
+		parent = *p;
+		cl1 = rb_entry(parent, struct hfsc_class, cf_node);
+		if (cl->cl_f >= cl1->cl_f)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->cf_node, parent, p);
+	rb_insert_color(&cl->cf_node, &cl->cl_parent->cf_tree);
+}
+
+static inline void
+cftree_remove(struct hfsc_class *cl)
+{
+	rb_erase(&cl->cf_node, &cl->cl_parent->cf_tree);
+}
+
+static inline void
+cftree_update(struct hfsc_class *cl)
+{
+	cftree_remove(cl);
+	cftree_insert(cl);
+}
+
+/*
+ * service curve support functions
+ *
+ *  external service curve parameters
+ *	m: bps
+ *	d: us
+ *  internal service curve parameters
+ *	sm: (bytes/psched_us) << SM_SHIFT
+ *	ism: (psched_us/byte) << ISM_SHIFT
+ *	dx: psched_us
+ *
+ * The clock source resolution with ktime is 1.024us.
+ *
+ * sm and ism are scaled in order to keep effective digits.
+ * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
+ * digits in decimal using the following table.
+ *
+ *  bits/sec      100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
+ *  ------------+-------------------------------------------------------
+ *  bytes/1.024us 12.8e-3    128e-3     1280e-3    12800e-3   128000e-3
+ *
+ *  1.024us/byte  78.125     7.8125     0.78125    0.078125   0.0078125
+ */
+#define	SM_SHIFT	20
+#define	ISM_SHIFT	18
+
+#define	SM_MASK		((1ULL << SM_SHIFT) - 1)
+#define	ISM_MASK	((1ULL << ISM_SHIFT) - 1)
+
+static inline u64
+seg_x2y(u64 x, u64 sm)
+{
+	u64 y;
+
+	/*
+	 * compute
+	 *	y = x * sm >> SM_SHIFT
+	 * but divide it for the upper and lower bits to avoid overflow
+	 */
+	y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
+	return y;
+}
+
+static inline u64
+seg_y2x(u64 y, u64 ism)
+{
+	u64 x;
+
+	if (y == 0)
+		x = 0;
+	else if (ism == HT_INFINITY)
+		x = HT_INFINITY;
+	else {
+		x = (y >> ISM_SHIFT) * ism
+		    + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
+	}
+	return x;
+}
+
+/* Convert m (bps) into sm (bytes/psched us) */
+static u64
+m2sm(u32 m)
+{
+	u64 sm;
+
+	sm = ((u64)m << SM_SHIFT);
+	sm += PSCHED_TICKS_PER_SEC - 1;
+	do_div(sm, PSCHED_TICKS_PER_SEC);
+	return sm;
+}
+
+/* convert m (bps) into ism (psched us/byte) */
+static u64
+m2ism(u32 m)
+{
+	u64 ism;
+
+	if (m == 0)
+		ism = HT_INFINITY;
+	else {
+		ism = ((u64)PSCHED_TICKS_PER_SEC << ISM_SHIFT);
+		ism += m - 1;
+		do_div(ism, m);
+	}
+	return ism;
+}
+
+/* convert d (us) into dx (psched us) */
+static u64
+d2dx(u32 d)
+{
+	u64 dx;
+
+	dx = ((u64)d * PSCHED_TICKS_PER_SEC);
+	dx += USEC_PER_SEC - 1;
+	do_div(dx, USEC_PER_SEC);
+	return dx;
+}
+
+/* convert sm (bytes/psched us) into m (bps) */
+static u32
+sm2m(u64 sm)
+{
+	u64 m;
+
+	m = (sm * PSCHED_TICKS_PER_SEC) >> SM_SHIFT;
+	return (u32)m;
+}
+
+/* convert dx (psched us) into d (us) */
+static u32
+dx2d(u64 dx)
+{
+	u64 d;
+
+	d = dx * USEC_PER_SEC;
+	do_div(d, PSCHED_TICKS_PER_SEC);
+	return (u32)d;
+}
+
+static void
+sc2isc(struct tc_service_curve *sc, struct internal_sc *isc)
+{
+	isc->sm1  = m2sm(sc->m1);
+	isc->ism1 = m2ism(sc->m1);
+	isc->dx   = d2dx(sc->d);
+	isc->dy   = seg_x2y(isc->dx, isc->sm1);
+	isc->sm2  = m2sm(sc->m2);
+	isc->ism2 = m2ism(sc->m2);
+}
+
+/*
+ * initialize the runtime service curve with the given internal
+ * service curve starting at (x, y).
+ */
+static void
+rtsc_init(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
+{
+	rtsc->x	   = x;
+	rtsc->y    = y;
+	rtsc->sm1  = isc->sm1;
+	rtsc->ism1 = isc->ism1;
+	rtsc->dx   = isc->dx;
+	rtsc->dy   = isc->dy;
+	rtsc->sm2  = isc->sm2;
+	rtsc->ism2 = isc->ism2;
+}
+
+/*
+ * calculate the y-projection of the runtime service curve by the
+ * given x-projection value
+ */
+static u64
+rtsc_y2x(struct runtime_sc *rtsc, u64 y)
+{
+	u64 x;
+
+	if (y < rtsc->y)
+		x = rtsc->x;
+	else if (y <= rtsc->y + rtsc->dy) {
+		/* x belongs to the 1st segment */
+		if (rtsc->dy == 0)
+			x = rtsc->x + rtsc->dx;
+		else
+			x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
+	} else {
+		/* x belongs to the 2nd segment */
+		x = rtsc->x + rtsc->dx
+		    + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
+	}
+	return x;
+}
+
+static u64
+rtsc_x2y(struct runtime_sc *rtsc, u64 x)
+{
+	u64 y;
+
+	if (x <= rtsc->x)
+		y = rtsc->y;
+	else if (x <= rtsc->x + rtsc->dx)
+		/* y belongs to the 1st segment */
+		y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
+	else
+		/* y belongs to the 2nd segment */
+		y = rtsc->y + rtsc->dy
+		    + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
+	return y;
+}
+
+/*
+ * update the runtime service curve by taking the minimum of the current
+ * runtime service curve and the service curve starting at (x, y).
+ */
+static void
+rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
+{
+	u64 y1, y2, dx, dy;
+	u32 dsm;
+
+	if (isc->sm1 <= isc->sm2) {
+		/* service curve is convex */
+		y1 = rtsc_x2y(rtsc, x);
+		if (y1 < y)
+			/* the current rtsc is smaller */
+			return;
+		rtsc->x = x;
+		rtsc->y = y;
+		return;
+	}
+
+	/*
+	 * service curve is concave
+	 * compute the two y values of the current rtsc
+	 *	y1: at x
+	 *	y2: at (x + dx)
+	 */
+	y1 = rtsc_x2y(rtsc, x);
+	if (y1 <= y) {
+		/* rtsc is below isc, no change to rtsc */
+		return;
+	}
+
+	y2 = rtsc_x2y(rtsc, x + isc->dx);
+	if (y2 >= y + isc->dy) {
+		/* rtsc is above isc, replace rtsc by isc */
+		rtsc->x = x;
+		rtsc->y = y;
+		rtsc->dx = isc->dx;
+		rtsc->dy = isc->dy;
+		return;
+	}
+
+	/*
+	 * the two curves intersect
+	 * compute the offsets (dx, dy) using the reverse
+	 * function of seg_x2y()
+	 *	seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
+	 */
+	dx = (y1 - y) << SM_SHIFT;
+	dsm = isc->sm1 - isc->sm2;
+	do_div(dx, dsm);
+	/*
+	 * check if (x, y1) belongs to the 1st segment of rtsc.
+	 * if so, add the offset.
+	 */
+	if (rtsc->x + rtsc->dx > x)
+		dx += rtsc->x + rtsc->dx - x;
+	dy = seg_x2y(dx, isc->sm1);
+
+	rtsc->x = x;
+	rtsc->y = y;
+	rtsc->dx = dx;
+	rtsc->dy = dy;
+	return;
+}
+
+static void
+init_ed(struct hfsc_class *cl, unsigned int next_len)
+{
+	u64 cur_time = psched_get_time();
+
+	/* update the deadline curve */
+	rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
+
+	/*
+	 * update the eligible curve.
+	 * for concave, it is equal to the deadline curve.
+	 * for convex, it is a linear curve with slope m2.
+	 */
+	cl->cl_eligible = cl->cl_deadline;
+	if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) {
+		cl->cl_eligible.dx = 0;
+		cl->cl_eligible.dy = 0;
+	}
+
+	/* compute e and d */
+	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+	eltree_insert(cl);
+}
+
+static void
+update_ed(struct hfsc_class *cl, unsigned int next_len)
+{
+	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+
+	eltree_update(cl);
+}
+
+static inline void
+update_d(struct hfsc_class *cl, unsigned int next_len)
+{
+	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
+}
+
+static inline void
+update_cfmin(struct hfsc_class *cl)
+{
+	struct rb_node *n = rb_first(&cl->cf_tree);
+	struct hfsc_class *p;
+
+	if (n == NULL) {
+		cl->cl_cfmin = 0;
+		return;
+	}
+	p = rb_entry(n, struct hfsc_class, cf_node);
+	cl->cl_cfmin = p->cl_f;
+}
+
+static void
+init_vf(struct hfsc_class *cl, unsigned int len)
+{
+	struct hfsc_class *max_cl;
+	struct rb_node *n;
+	u64 vt, f, cur_time;
+	int go_active;
+
+	cur_time = 0;
+	go_active = 1;
+	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+		if (go_active && cl->cl_nactive++ == 0)
+			go_active = 1;
+		else
+			go_active = 0;
+
+		if (go_active) {
+			n = rb_last(&cl->cl_parent->vt_tree);
+			if (n != NULL) {
+				max_cl = rb_entry(n, struct hfsc_class,vt_node);
+				/*
+				 * set vt to the average of the min and max
+				 * classes.  if the parent's period didn't
+				 * change, don't decrease vt of the class.
+				 */
+				vt = max_cl->cl_vt;
+				if (cl->cl_parent->cl_cvtmin != 0)
+					vt = (cl->cl_parent->cl_cvtmin + vt)/2;
+
+				if (cl->cl_parent->cl_vtperiod !=
+				    cl->cl_parentperiod || vt > cl->cl_vt)
+					cl->cl_vt = vt;
+			} else {
+				/*
+				 * first child for a new parent backlog period.
+				 * add parent's cvtmax to cvtoff to make a new
+				 * vt (vtoff + vt) larger than the vt in the
+				 * last period for all children.
+				 */
+				vt = cl->cl_parent->cl_cvtmax;
+				cl->cl_parent->cl_cvtoff += vt;
+				cl->cl_parent->cl_cvtmax = 0;
+				cl->cl_parent->cl_cvtmin = 0;
+				cl->cl_vt = 0;
+			}
+
+			cl->cl_vtoff = cl->cl_parent->cl_cvtoff -
+							cl->cl_pcvtoff;
+
+			/* update the virtual curve */
+			vt = cl->cl_vt + cl->cl_vtoff;
+			rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt,
+						      cl->cl_total);
+			if (cl->cl_virtual.x == vt) {
+				cl->cl_virtual.x -= cl->cl_vtoff;
+				cl->cl_vtoff = 0;
+			}
+			cl->cl_vtadj = 0;
+
+			cl->cl_vtperiod++;  /* increment vt period */
+			cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
+			if (cl->cl_parent->cl_nactive == 0)
+				cl->cl_parentperiod++;
+			cl->cl_f = 0;
+
+			vttree_insert(cl);
+			cftree_insert(cl);
+
+			if (cl->cl_flags & HFSC_USC) {
+				/* class has upper limit curve */
+				if (cur_time == 0)
+					cur_time = psched_get_time();
+
+				/* update the ulimit curve */
+				rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time,
+					 cl->cl_total);
+				/* compute myf */
+				cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
+						      cl->cl_total);
+				cl->cl_myfadj = 0;
+			}
+		}
+
+		f = max(cl->cl_myf, cl->cl_cfmin);
+		if (f != cl->cl_f) {
+			cl->cl_f = f;
+			cftree_update(cl);
+			update_cfmin(cl->cl_parent);
+		}
+	}
+}
+
+static void
+update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
+{
+	u64 f; /* , myf_bound, delta; */
+	int go_passive = 0;
+
+	if (cl->qdisc->q.qlen == 0 && cl->cl_flags & HFSC_FSC)
+		go_passive = 1;
+
+	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
+		cl->cl_total += len;
+
+		if (!(cl->cl_flags & HFSC_FSC) || cl->cl_nactive == 0)
+			continue;
+
+		if (go_passive && --cl->cl_nactive == 0)
+			go_passive = 1;
+		else
+			go_passive = 0;
+
+		if (go_passive) {
+			/* no more active child, going passive */
+
+			/* update cvtmax of the parent class */
+			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
+				cl->cl_parent->cl_cvtmax = cl->cl_vt;
+
+			/* remove this class from the vt tree */
+			vttree_remove(cl);
+
+			cftree_remove(cl);
+			update_cfmin(cl->cl_parent);
+
+			continue;
+		}
+
+		/*
+		 * update vt and f
+		 */
+		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
+			    - cl->cl_vtoff + cl->cl_vtadj;
+
+		/*
+		 * if vt of the class is smaller than cvtmin,
+		 * the class was skipped in the past due to non-fit.
+		 * if so, we need to adjust vtadj.
+		 */
+		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
+			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
+			cl->cl_vt = cl->cl_parent->cl_cvtmin;
+		}
+
+		/* update the vt tree */
+		vttree_update(cl);
+
+		if (cl->cl_flags & HFSC_USC) {
+			cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
+							      cl->cl_total);
+#if 0
+			/*
+			 * This code causes classes to stay way under their
+			 * limit when multiple classes are used at gigabit
+			 * speed. needs investigation. -kaber
+			 */
+			/*
+			 * if myf lags behind by more than one clock tick
+			 * from the current time, adjust myfadj to prevent
+			 * a rate-limited class from going greedy.
+			 * in a steady state under rate-limiting, myf
+			 * fluctuates within one clock tick.
+			 */
+			myf_bound = cur_time - PSCHED_JIFFIE2US(1);
+			if (cl->cl_myf < myf_bound) {
+				delta = cur_time - cl->cl_myf;
+				cl->cl_myfadj += delta;
+				cl->cl_myf += delta;
+			}
+#endif
+		}
+
+		f = max(cl->cl_myf, cl->cl_cfmin);
+		if (f != cl->cl_f) {
+			cl->cl_f = f;
+			cftree_update(cl);
+			update_cfmin(cl->cl_parent);
+		}
+	}
+}
+
+static void
+set_active(struct hfsc_class *cl, unsigned int len)
+{
+	if (cl->cl_flags & HFSC_RSC)
+		init_ed(cl, len);
+	if (cl->cl_flags & HFSC_FSC)
+		init_vf(cl, len);
+
+	list_add_tail(&cl->dlist, &cl->sched->droplist);
+}
+
+static void
+set_passive(struct hfsc_class *cl)
+{
+	if (cl->cl_flags & HFSC_RSC)
+		eltree_remove(cl);
+
+	list_del(&cl->dlist);
+
+	/*
+	 * vttree is now handled in update_vf() so that update_vf(cl, 0, 0)
+	 * needs to be called explicitly to remove a class from vttree.
+	 */
+}
+
+/*
+ * hack to get length of first packet in queue.
+ */
+static unsigned int
+qdisc_peek_len(struct Qdisc *sch)
+{
+	struct sk_buff *skb;
+	unsigned int len;
+
+	skb = sch->dequeue(sch);
+	if (skb == NULL) {
+		if (net_ratelimit())
+			printk("qdisc_peek_len: non work-conserving qdisc ?\n");
+		return 0;
+	}
+	len = qdisc_pkt_len(skb);
+	if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
+		if (net_ratelimit())
+			printk("qdisc_peek_len: failed to requeue\n");
+		qdisc_tree_decrease_qlen(sch, 1);
+		return 0;
+	}
+	return len;
+}
+
+static void
+hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
+{
+	unsigned int len = cl->qdisc->q.qlen;
+
+	qdisc_reset(cl->qdisc);
+	qdisc_tree_decrease_qlen(cl->qdisc, len);
+}
+
+static void
+hfsc_adjust_levels(struct hfsc_class *cl)
+{
+	struct hfsc_class *p;
+	unsigned int level;
+
+	do {
+		level = 0;
+		list_for_each_entry(p, &cl->children, siblings) {
+			if (p->level >= level)
+				level = p->level + 1;
+		}
+		cl->level = level;
+	} while ((cl = cl->cl_parent) != NULL);
+}
+
+static inline struct hfsc_class *
+hfsc_find_class(u32 classid, struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct Qdisc_class_common *clc;
+
+	clc = qdisc_class_find(&q->clhash, classid);
+	if (clc == NULL)
+		return NULL;
+	return container_of(clc, struct hfsc_class, cl_common);
+}
+
+static void
+hfsc_change_rsc(struct hfsc_class *cl, struct tc_service_curve *rsc,
+		u64 cur_time)
+{
+	sc2isc(rsc, &cl->cl_rsc);
+	rtsc_init(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
+	cl->cl_eligible = cl->cl_deadline;
+	if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) {
+		cl->cl_eligible.dx = 0;
+		cl->cl_eligible.dy = 0;
+	}
+	cl->cl_flags |= HFSC_RSC;
+}
+
+static void
+hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
+{
+	sc2isc(fsc, &cl->cl_fsc);
+	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
+	cl->cl_flags |= HFSC_FSC;
+}
+
+static void
+hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
+		u64 cur_time)
+{
+	sc2isc(usc, &cl->cl_usc);
+	rtsc_init(&cl->cl_ulimit, &cl->cl_usc, cur_time, cl->cl_total);
+	cl->cl_flags |= HFSC_USC;
+}
+
+static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = {
+	[TCA_HFSC_RSC]	= { .len = sizeof(struct tc_service_curve) },
+	[TCA_HFSC_FSC]	= { .len = sizeof(struct tc_service_curve) },
+	[TCA_HFSC_USC]	= { .len = sizeof(struct tc_service_curve) },
+};
+
+static int
+hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+		  struct nlattr **tca, unsigned long *arg)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl = (struct hfsc_class *)*arg;
+	struct hfsc_class *parent = NULL;
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct nlattr *tb[TCA_HFSC_MAX + 1];
+	struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL;
+	u64 cur_time;
+	int err;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_HFSC_MAX, opt, hfsc_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_HFSC_RSC]) {
+		rsc = nla_data(tb[TCA_HFSC_RSC]);
+		if (rsc->m1 == 0 && rsc->m2 == 0)
+			rsc = NULL;
+	}
+
+	if (tb[TCA_HFSC_FSC]) {
+		fsc = nla_data(tb[TCA_HFSC_FSC]);
+		if (fsc->m1 == 0 && fsc->m2 == 0)
+			fsc = NULL;
+	}
+
+	if (tb[TCA_HFSC_USC]) {
+		usc = nla_data(tb[TCA_HFSC_USC]);
+		if (usc->m1 == 0 && usc->m2 == 0)
+			usc = NULL;
+	}
+
+	if (cl != NULL) {
+		if (parentid) {
+			if (cl->cl_parent &&
+			    cl->cl_parent->cl_common.classid != parentid)
+				return -EINVAL;
+			if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
+				return -EINVAL;
+		}
+		cur_time = psched_get_time();
+
+		sch_tree_lock(sch);
+		if (rsc != NULL)
+			hfsc_change_rsc(cl, rsc, cur_time);
+		if (fsc != NULL)
+			hfsc_change_fsc(cl, fsc);
+		if (usc != NULL)
+			hfsc_change_usc(cl, usc, cur_time);
+
+		if (cl->qdisc->q.qlen != 0) {
+			if (cl->cl_flags & HFSC_RSC)
+				update_ed(cl, qdisc_peek_len(cl->qdisc));
+			if (cl->cl_flags & HFSC_FSC)
+				update_vf(cl, 0, cur_time);
+		}
+		sch_tree_unlock(sch);
+
+		if (tca[TCA_RATE])
+			gen_replace_estimator(&cl->bstats, &cl->rate_est,
+					      qdisc_root_sleeping_lock(sch),
+					      tca[TCA_RATE]);
+		return 0;
+	}
+
+	if (parentid == TC_H_ROOT)
+		return -EEXIST;
+
+	parent = &q->root;
+	if (parentid) {
+		parent = hfsc_find_class(parentid, sch);
+		if (parent == NULL)
+			return -ENOENT;
+	}
+
+	if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
+		return -EINVAL;
+	if (hfsc_find_class(classid, sch))
+		return -EEXIST;
+
+	if (rsc == NULL && fsc == NULL)
+		return -EINVAL;
+
+	cl = kzalloc(sizeof(struct hfsc_class), GFP_KERNEL);
+	if (cl == NULL)
+		return -ENOBUFS;
+
+	if (rsc != NULL)
+		hfsc_change_rsc(cl, rsc, 0);
+	if (fsc != NULL)
+		hfsc_change_fsc(cl, fsc);
+	if (usc != NULL)
+		hfsc_change_usc(cl, usc, 0);
+
+	cl->cl_common.classid = classid;
+	cl->refcnt    = 1;
+	cl->sched     = q;
+	cl->cl_parent = parent;
+	cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+				      &pfifo_qdisc_ops, classid);
+	if (cl->qdisc == NULL)
+		cl->qdisc = &noop_qdisc;
+	INIT_LIST_HEAD(&cl->children);
+	cl->vt_tree = RB_ROOT;
+	cl->cf_tree = RB_ROOT;
+
+	sch_tree_lock(sch);
+	qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
+	list_add_tail(&cl->siblings, &parent->children);
+	if (parent->level == 0)
+		hfsc_purge_queue(sch, parent);
+	hfsc_adjust_levels(parent);
+	cl->cl_pcvtoff = parent->cl_cvtoff;
+	sch_tree_unlock(sch);
+
+	qdisc_class_hash_grow(sch, &q->clhash);
+
+	if (tca[TCA_RATE])
+		gen_new_estimator(&cl->bstats, &cl->rate_est,
+				  qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
+	*arg = (unsigned long)cl;
+	return 0;
+}
+
+static void
+hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+
+	tcf_destroy_chain(&cl->filter_list);
+	qdisc_destroy(cl->qdisc);
+	gen_kill_estimator(&cl->bstats, &cl->rate_est);
+	if (cl != &q->root)
+		kfree(cl);
+}
+
+static int
+hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root)
+		return -EBUSY;
+
+	sch_tree_lock(sch);
+
+	list_del(&cl->siblings);
+	hfsc_adjust_levels(cl->cl_parent);
+
+	hfsc_purge_queue(sch, cl);
+	qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
+
+	if (--cl->refcnt == 0)
+		hfsc_destroy_class(sch, cl);
+
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static struct hfsc_class *
+hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl;
+	struct tcf_result res;
+	struct tcf_proto *tcf;
+	int result;
+
+	if (TC_H_MAJ(skb->priority ^ sch->handle) == 0 &&
+	    (cl = hfsc_find_class(skb->priority, sch)) != NULL)
+		if (cl->level == 0)
+			return cl;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	tcf = q->root.filter_list;
+	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_QUEUED:
+		case TC_ACT_STOLEN:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			return NULL;
+		}
+#endif
+		if ((cl = (struct hfsc_class *)res.class) == NULL) {
+			if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
+				break; /* filter selected invalid classid */
+		}
+
+		if (cl->level == 0)
+			return cl; /* hit leaf class */
+
+		/* apply inner filter chain */
+		tcf = cl->filter_list;
+	}
+
+	/* classification failed, try default class */
+	cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
+	if (cl == NULL || cl->level > 0)
+		return NULL;
+
+	return cl;
+}
+
+static int
+hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		 struct Qdisc **old)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl == NULL)
+		return -ENOENT;
+	if (cl->level > 0)
+		return -EINVAL;
+	if (new == NULL) {
+		new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+					&pfifo_qdisc_ops,
+					cl->cl_common.classid);
+		if (new == NULL)
+			new = &noop_qdisc;
+	}
+
+	sch_tree_lock(sch);
+	hfsc_purge_queue(sch, cl);
+	*old = xchg(&cl->qdisc, new);
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static struct Qdisc *
+hfsc_class_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl != NULL && cl->level == 0)
+		return cl->qdisc;
+
+	return NULL;
+}
+
+static void
+hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl->qdisc->q.qlen == 0) {
+		update_vf(cl, 0, 0);
+		set_passive(cl);
+	}
+}
+
+static unsigned long
+hfsc_get_class(struct Qdisc *sch, u32 classid)
+{
+	struct hfsc_class *cl = hfsc_find_class(classid, sch);
+
+	if (cl != NULL)
+		cl->refcnt++;
+
+	return (unsigned long)cl;
+}
+
+static void
+hfsc_put_class(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (--cl->refcnt == 0)
+		hfsc_destroy_class(sch, cl);
+}
+
+static unsigned long
+hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid)
+{
+	struct hfsc_class *p = (struct hfsc_class *)parent;
+	struct hfsc_class *cl = hfsc_find_class(classid, sch);
+
+	if (cl != NULL) {
+		if (p != NULL && p->level <= cl->level)
+			return 0;
+		cl->filter_cnt++;
+	}
+
+	return (unsigned long)cl;
+}
+
+static void
+hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	cl->filter_cnt--;
+}
+
+static struct tcf_proto **
+hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+
+	if (cl == NULL)
+		cl = &q->root;
+
+	return &cl->filter_list;
+}
+
+static int
+hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
+{
+	struct tc_service_curve tsc;
+
+	tsc.m1 = sm2m(sc->sm1);
+	tsc.d  = dx2d(sc->dx);
+	tsc.m2 = sm2m(sc->sm2);
+	NLA_PUT(skb, attr, sizeof(tsc), &tsc);
+
+	return skb->len;
+
+ nla_put_failure:
+	return -1;
+}
+
+static inline int
+hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
+{
+	if ((cl->cl_flags & HFSC_RSC) &&
+	    (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0))
+		goto nla_put_failure;
+
+	if ((cl->cl_flags & HFSC_FSC) &&
+	    (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0))
+		goto nla_put_failure;
+
+	if ((cl->cl_flags & HFSC_USC) &&
+	    (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0))
+		goto nla_put_failure;
+
+	return skb->len;
+
+ nla_put_failure:
+	return -1;
+}
+
+static int
+hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
+		struct tcmsg *tcm)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+	struct nlattr *nest;
+
+	tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->cl_common.classid :
+					  TC_H_ROOT;
+	tcm->tcm_handle = cl->cl_common.classid;
+	if (cl->level == 0)
+		tcm->tcm_info = cl->qdisc->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	if (hfsc_dump_curves(skb, cl) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest);
+	return skb->len;
+
+ nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
+}
+
+static int
+hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+	struct gnet_dump *d)
+{
+	struct hfsc_class *cl = (struct hfsc_class *)arg;
+	struct tc_hfsc_stats xstats;
+
+	cl->qstats.qlen = cl->qdisc->q.qlen;
+	xstats.level   = cl->level;
+	xstats.period  = cl->cl_vtperiod;
+	xstats.work    = cl->cl_total;
+	xstats.rtwork  = cl->cl_cumul;
+
+	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
+		return -1;
+
+	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
+}
+
+
+
+static void
+hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hlist_node *n;
+	struct hfsc_class *cl;
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i],
+				     cl_common.hnode) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static void
+hfsc_schedule_watchdog(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl;
+	u64 next_time = 0;
+
+	if ((cl = eltree_get_minel(q)) != NULL)
+		next_time = cl->cl_e;
+	if (q->root.cl_cfmin != 0) {
+		if (next_time == 0 || next_time > q->root.cl_cfmin)
+			next_time = q->root.cl_cfmin;
+	}
+	WARN_ON(next_time == 0);
+	qdisc_watchdog_schedule(&q->watchdog, next_time);
+}
+
+static int
+hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct tc_hfsc_qopt *qopt;
+	int err;
+
+	if (opt == NULL || nla_len(opt) < sizeof(*qopt))
+		return -EINVAL;
+	qopt = nla_data(opt);
+
+	q->defcls = qopt->defcls;
+	err = qdisc_class_hash_init(&q->clhash);
+	if (err < 0)
+		return err;
+	q->eligible = RB_ROOT;
+	INIT_LIST_HEAD(&q->droplist);
+	skb_queue_head_init(&q->requeue);
+
+	q->root.cl_common.classid = sch->handle;
+	q->root.refcnt  = 1;
+	q->root.sched   = q;
+	q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+					  &pfifo_qdisc_ops,
+					  sch->handle);
+	if (q->root.qdisc == NULL)
+		q->root.qdisc = &noop_qdisc;
+	INIT_LIST_HEAD(&q->root.children);
+	q->root.vt_tree = RB_ROOT;
+	q->root.cf_tree = RB_ROOT;
+
+	qdisc_class_hash_insert(&q->clhash, &q->root.cl_common);
+	qdisc_class_hash_grow(sch, &q->clhash);
+
+	qdisc_watchdog_init(&q->watchdog, sch);
+
+	return 0;
+}
+
+static int
+hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct tc_hfsc_qopt *qopt;
+
+	if (opt == NULL || nla_len(opt) < sizeof(*qopt))
+		return -EINVAL;
+	qopt = nla_data(opt);
+
+	sch_tree_lock(sch);
+	q->defcls = qopt->defcls;
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static void
+hfsc_reset_class(struct hfsc_class *cl)
+{
+	cl->cl_total        = 0;
+	cl->cl_cumul        = 0;
+	cl->cl_d            = 0;
+	cl->cl_e            = 0;
+	cl->cl_vt           = 0;
+	cl->cl_vtadj        = 0;
+	cl->cl_vtoff        = 0;
+	cl->cl_cvtmin       = 0;
+	cl->cl_cvtmax       = 0;
+	cl->cl_cvtoff       = 0;
+	cl->cl_pcvtoff      = 0;
+	cl->cl_vtperiod     = 0;
+	cl->cl_parentperiod = 0;
+	cl->cl_f            = 0;
+	cl->cl_myf          = 0;
+	cl->cl_myfadj       = 0;
+	cl->cl_cfmin        = 0;
+	cl->cl_nactive      = 0;
+
+	cl->vt_tree = RB_ROOT;
+	cl->cf_tree = RB_ROOT;
+	qdisc_reset(cl->qdisc);
+
+	if (cl->cl_flags & HFSC_RSC)
+		rtsc_init(&cl->cl_deadline, &cl->cl_rsc, 0, 0);
+	if (cl->cl_flags & HFSC_FSC)
+		rtsc_init(&cl->cl_virtual, &cl->cl_fsc, 0, 0);
+	if (cl->cl_flags & HFSC_USC)
+		rtsc_init(&cl->cl_ulimit, &cl->cl_usc, 0, 0);
+}
+
+static void
+hfsc_reset_qdisc(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl;
+	struct hlist_node *n;
+	unsigned int i;
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
+			hfsc_reset_class(cl);
+	}
+	__skb_queue_purge(&q->requeue);
+	q->eligible = RB_ROOT;
+	INIT_LIST_HEAD(&q->droplist);
+	qdisc_watchdog_cancel(&q->watchdog);
+	sch->q.qlen = 0;
+}
+
+static void
+hfsc_destroy_qdisc(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hlist_node *n, *next;
+	struct hfsc_class *cl;
+	unsigned int i;
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
+			tcf_destroy_chain(&cl->filter_list);
+	}
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+					  cl_common.hnode)
+			hfsc_destroy_class(sch, cl);
+	}
+	qdisc_class_hash_destroy(&q->clhash);
+	__skb_queue_purge(&q->requeue);
+	qdisc_watchdog_cancel(&q->watchdog);
+}
+
+static int
+hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_hfsc_qopt qopt;
+
+	qopt.defcls = q->defcls;
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
+	return skb->len;
+
+ nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int
+hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct hfsc_class *cl;
+	int err;
+
+	cl = hfsc_classify(skb, sch, &err);
+	if (cl == NULL) {
+		if (err & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return err;
+	}
+
+	err = qdisc_enqueue(skb, cl->qdisc);
+	if (unlikely(err != NET_XMIT_SUCCESS)) {
+		if (net_xmit_drop_count(err)) {
+			cl->qstats.drops++;
+			sch->qstats.drops++;
+		}
+		return err;
+	}
+
+	if (cl->qdisc->q.qlen == 1)
+		set_active(cl, qdisc_pkt_len(skb));
+
+	cl->bstats.packets++;
+	cl->bstats.bytes += qdisc_pkt_len(skb);
+	sch->bstats.packets++;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
+	sch->q.qlen++;
+
+	return NET_XMIT_SUCCESS;
+}
+
+static struct sk_buff *
+hfsc_dequeue(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl;
+	struct sk_buff *skb;
+	u64 cur_time;
+	unsigned int next_len;
+	int realtime = 0;
+
+	if (sch->q.qlen == 0)
+		return NULL;
+	if ((skb = __skb_dequeue(&q->requeue)))
+		goto out;
+
+	cur_time = psched_get_time();
+
+	/*
+	 * if there are eligible classes, use real-time criteria.
+	 * find the class with the minimum deadline among
+	 * the eligible classes.
+	 */
+	if ((cl = eltree_get_mindl(q, cur_time)) != NULL) {
+		realtime = 1;
+	} else {
+		/*
+		 * use link-sharing criteria
+		 * get the class with the minimum vt in the hierarchy
+		 */
+		cl = vttree_get_minvt(&q->root, cur_time);
+		if (cl == NULL) {
+			sch->qstats.overlimits++;
+			hfsc_schedule_watchdog(sch);
+			return NULL;
+		}
+	}
+
+	skb = cl->qdisc->dequeue(cl->qdisc);
+	if (skb == NULL) {
+		if (net_ratelimit())
+			printk("HFSC: Non-work-conserving qdisc ?\n");
+		return NULL;
+	}
+
+	update_vf(cl, qdisc_pkt_len(skb), cur_time);
+	if (realtime)
+		cl->cl_cumul += qdisc_pkt_len(skb);
+
+	if (cl->qdisc->q.qlen != 0) {
+		if (cl->cl_flags & HFSC_RSC) {
+			/* update ed */
+			next_len = qdisc_peek_len(cl->qdisc);
+			if (realtime)
+				update_ed(cl, next_len);
+			else
+				update_d(cl, next_len);
+		}
+	} else {
+		/* the class becomes passive */
+		set_passive(cl);
+	}
+
+ out:
+	sch->flags &= ~TCQ_F_THROTTLED;
+	sch->q.qlen--;
+
+	return skb;
+}
+
+static int
+hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+
+	__skb_queue_head(&q->requeue, skb);
+	sch->q.qlen++;
+	sch->qstats.requeues++;
+	return NET_XMIT_SUCCESS;
+}
+
+static unsigned int
+hfsc_drop(struct Qdisc *sch)
+{
+	struct hfsc_sched *q = qdisc_priv(sch);
+	struct hfsc_class *cl;
+	unsigned int len;
+
+	list_for_each_entry(cl, &q->droplist, dlist) {
+		if (cl->qdisc->ops->drop != NULL &&
+		    (len = cl->qdisc->ops->drop(cl->qdisc)) > 0) {
+			if (cl->qdisc->q.qlen == 0) {
+				update_vf(cl, 0, 0);
+				set_passive(cl);
+			} else {
+				list_move_tail(&cl->dlist, &q->droplist);
+			}
+			cl->qstats.drops++;
+			sch->qstats.drops++;
+			sch->q.qlen--;
+			return len;
+		}
+	}
+	return 0;
+}
+
+static const struct Qdisc_class_ops hfsc_class_ops = {
+	.change		= hfsc_change_class,
+	.delete		= hfsc_delete_class,
+	.graft		= hfsc_graft_class,
+	.leaf		= hfsc_class_leaf,
+	.qlen_notify	= hfsc_qlen_notify,
+	.get		= hfsc_get_class,
+	.put		= hfsc_put_class,
+	.bind_tcf	= hfsc_bind_tcf,
+	.unbind_tcf	= hfsc_unbind_tcf,
+	.tcf_chain	= hfsc_tcf_chain,
+	.dump		= hfsc_dump_class,
+	.dump_stats	= hfsc_dump_class_stats,
+	.walk		= hfsc_walk
+};
+
+static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
+	.id		= "hfsc",
+	.init		= hfsc_init_qdisc,
+	.change		= hfsc_change_qdisc,
+	.reset		= hfsc_reset_qdisc,
+	.destroy	= hfsc_destroy_qdisc,
+	.dump		= hfsc_dump_qdisc,
+	.enqueue	= hfsc_enqueue,
+	.dequeue	= hfsc_dequeue,
+	.requeue	= hfsc_requeue,
+	.drop		= hfsc_drop,
+	.cl_ops		= &hfsc_class_ops,
+	.priv_size	= sizeof(struct hfsc_sched),
+	.owner		= THIS_MODULE
+};
+
+static int __init
+hfsc_init(void)
+{
+	return register_qdisc(&hfsc_qdisc_ops);
+}
+
+static void __exit
+hfsc_cleanup(void)
+{
+	unregister_qdisc(&hfsc_qdisc_ops);
+}
+
+MODULE_LICENSE("GPL");
+module_init(hfsc_init);
+module_exit(hfsc_cleanup);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
new file mode 100644
index 0000000..d2943a4
--- /dev/null
+++ b/net/sched/sch_htb.c
@@ -0,0 +1,1590 @@
+/*
+ * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Martin Devera, <devik@cdi.cz>
+ *
+ * Credits (in time order) for older HTB versions:
+ *              Stef Coene <stef.coene@docum.org>
+ *			HTB support at LARTC mailing list
+ *		Ondrej Kraus, <krauso@barr.cz>
+ *			found missing INIT_QDISC(htb)
+ *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
+ *			helped a lot to locate nasty class stall bug
+ *		Andi Kleen, Jamal Hadi, Bert Hubert
+ *			code review and helpful comments on shaping
+ *		Tomasz Wrona, <tw@eter.tym.pl>
+ *			created test case so that I was able to fix nasty bug
+ *		Wilfried Weissmann
+ *			spotted bug in dequeue code and helped with fix
+ *		Jiri Fojtasek
+ *			fixed requeue routine
+ *		and many others. thanks.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/compiler.h>
+#include <linux/rbtree.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+/* HTB algorithm.
+    Author: devik@cdi.cz
+    ========================================================================
+    HTB is like TBF with multiple classes. It is also similar to CBQ because
+    it allows to assign priority to each class in hierarchy.
+    In fact it is another implementation of Floyd's formal sharing.
+
+    Levels:
+    Each class is assigned level. Leaf has ALWAYS level 0 and root
+    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
+    one less than their parent.
+*/
+
+static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
+#define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
+
+#if HTB_VER >> 16 != TC_HTB_PROTOVER
+#error "Mismatched sch_htb.c and pkt_sch.h"
+#endif
+
+/* Module parameter and sysfs export */
+module_param    (htb_hysteresis, int, 0640);
+MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
+
+/* used internaly to keep status of single class */
+enum htb_cmode {
+	HTB_CANT_SEND,		/* class can't send and can't borrow */
+	HTB_MAY_BORROW,		/* class can't send but may borrow */
+	HTB_CAN_SEND		/* class can send */
+};
+
+/* interior & leaf nodes; props specific to leaves are marked L: */
+struct htb_class {
+	struct Qdisc_class_common common;
+	/* general class parameters */
+	struct gnet_stats_basic bstats;
+	struct gnet_stats_queue qstats;
+	struct gnet_stats_rate_est rate_est;
+	struct tc_htb_xstats xstats;	/* our special stats */
+	int refcnt;		/* usage count of this class */
+
+	/* topology */
+	int level;		/* our level (see above) */
+	unsigned int children;
+	struct htb_class *parent;	/* parent class */
+
+	union {
+		struct htb_class_leaf {
+			struct Qdisc *q;
+			int prio;
+			int aprio;
+			int quantum;
+			int deficit[TC_HTB_MAXDEPTH];
+			struct list_head drop_list;
+		} leaf;
+		struct htb_class_inner {
+			struct rb_root feed[TC_HTB_NUMPRIO];	/* feed trees */
+			struct rb_node *ptr[TC_HTB_NUMPRIO];	/* current class ptr */
+			/* When class changes from state 1->2 and disconnects from
+			   parent's feed then we lost ptr value and start from the
+			   first child again. Here we store classid of the
+			   last valid ptr (used when ptr is NULL). */
+			u32 last_ptr_id[TC_HTB_NUMPRIO];
+		} inner;
+	} un;
+	struct rb_node node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
+	struct rb_node pq_node;	/* node for event queue */
+	psched_time_t pq_key;
+
+	int prio_activity;	/* for which prios are we active */
+	enum htb_cmode cmode;	/* current mode of the class */
+
+	/* class attached filters */
+	struct tcf_proto *filter_list;
+	int filter_cnt;
+
+	int warned;		/* only one warning about non work conserving .. */
+
+	/* token bucket parameters */
+	struct qdisc_rate_table *rate;	/* rate table of the class itself */
+	struct qdisc_rate_table *ceil;	/* ceiling rate (limits borrows too) */
+	long buffer, cbuffer;	/* token bucket depth/rate */
+	psched_tdiff_t mbuffer;	/* max wait time */
+	long tokens, ctokens;	/* current number of tokens */
+	psched_time_t t_c;	/* checkpoint time */
+
+	int prio;		/* For parent to leaf return possible here */
+	int quantum;		/* we do backup. Finally full replacement  */
+				/* of un.leaf originals should be done. */
+};
+
+static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
+			   int size)
+{
+	long result = qdisc_l2t(rate, size);
+	return result;
+}
+
+struct htb_sched {
+	struct Qdisc_class_hash clhash;
+	struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
+
+	/* self list - roots of self generating tree */
+	struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+	int row_mask[TC_HTB_MAXDEPTH];
+	struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+	u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
+
+	/* self wait list - roots of wait PQs per row */
+	struct rb_root wait_pq[TC_HTB_MAXDEPTH];
+
+	/* time of nearest event per level (row) */
+	psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
+
+	/* whether we hit non-work conserving class during this dequeue; we use */
+	int nwc_hit;		/* this to disable mindelay complaint in dequeue */
+
+	int defcls;		/* class where unclassified flows go to */
+
+	/* filters for qdisc itself */
+	struct tcf_proto *filter_list;
+
+	int rate2quantum;	/* quant = rate / rate2quantum */
+	psched_time_t now;	/* cached dequeue time */
+	struct qdisc_watchdog watchdog;
+
+	/* non shaped skbs; let them go directly thru */
+	struct sk_buff_head direct_queue;
+	int direct_qlen;	/* max qlen of above */
+
+	long direct_pkts;
+};
+
+/* find class in global hash table using given handle */
+static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct Qdisc_class_common *clc;
+
+	clc = qdisc_class_find(&q->clhash, handle);
+	if (clc == NULL)
+		return NULL;
+	return container_of(clc, struct htb_class, common);
+}
+
+/**
+ * htb_classify - classify a packet into class
+ *
+ * It returns NULL if the packet should be dropped or -1 if the packet
+ * should be passed directly thru. In all other cases leaf class is returned.
+ * We allow direct class selection by classid in priority. The we examine
+ * filters in qdisc and in inner nodes (if higher filter points to the inner
+ * node). If we end up with classid MAJOR:0 we enqueue the skb into special
+ * internal fifo (direct). These packets then go directly thru. If we still
+ * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
+ * then finish and return direct queue.
+ */
+#define HTB_DIRECT (struct htb_class*)-1
+
+static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
+				      int *qerr)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl;
+	struct tcf_result res;
+	struct tcf_proto *tcf;
+	int result;
+
+	/* allow to select class by setting skb->priority to valid classid;
+	   note that nfmark can be used too by attaching filter fw with no
+	   rules in it */
+	if (skb->priority == sch->handle)
+		return HTB_DIRECT;	/* X:0 (direct flow) selected */
+	if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
+		return cl;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	tcf = q->filter_list;
+	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_QUEUED:
+		case TC_ACT_STOLEN:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			return NULL;
+		}
+#endif
+		if ((cl = (void *)res.class) == NULL) {
+			if (res.classid == sch->handle)
+				return HTB_DIRECT;	/* X:0 (direct flow) */
+			if ((cl = htb_find(res.classid, sch)) == NULL)
+				break;	/* filter selected invalid classid */
+		}
+		if (!cl->level)
+			return cl;	/* we hit leaf; return it */
+
+		/* we have got inner class; apply inner filter chain */
+		tcf = cl->filter_list;
+	}
+	/* classification failed; try to use default class */
+	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
+	if (!cl || cl->level)
+		return HTB_DIRECT;	/* bad default .. this is safe bet */
+	return cl;
+}
+
+/**
+ * htb_add_to_id_tree - adds class to the round robin list
+ *
+ * Routine adds class to the list (actually tree) sorted by classid.
+ * Make sure that class is not already on such list for given prio.
+ */
+static void htb_add_to_id_tree(struct rb_root *root,
+			       struct htb_class *cl, int prio)
+{
+	struct rb_node **p = &root->rb_node, *parent = NULL;
+
+	while (*p) {
+		struct htb_class *c;
+		parent = *p;
+		c = rb_entry(parent, struct htb_class, node[prio]);
+
+		if (cl->common.classid > c->common.classid)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->node[prio], parent, p);
+	rb_insert_color(&cl->node[prio], root);
+}
+
+/**
+ * htb_add_to_wait_tree - adds class to the event queue with delay
+ *
+ * The class is added to priority event queue to indicate that class will
+ * change its mode in cl->pq_key microseconds. Make sure that class is not
+ * already in the queue.
+ */
+static void htb_add_to_wait_tree(struct htb_sched *q,
+				 struct htb_class *cl, long delay)
+{
+	struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
+
+	cl->pq_key = q->now + delay;
+	if (cl->pq_key == q->now)
+		cl->pq_key++;
+
+	/* update the nearest event cache */
+	if (q->near_ev_cache[cl->level] > cl->pq_key)
+		q->near_ev_cache[cl->level] = cl->pq_key;
+
+	while (*p) {
+		struct htb_class *c;
+		parent = *p;
+		c = rb_entry(parent, struct htb_class, pq_node);
+		if (cl->pq_key >= c->pq_key)
+			p = &parent->rb_right;
+		else
+			p = &parent->rb_left;
+	}
+	rb_link_node(&cl->pq_node, parent, p);
+	rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
+}
+
+/**
+ * htb_next_rb_node - finds next node in binary tree
+ *
+ * When we are past last key we return NULL.
+ * Average complexity is 2 steps per call.
+ */
+static inline void htb_next_rb_node(struct rb_node **n)
+{
+	*n = rb_next(*n);
+}
+
+/**
+ * htb_add_class_to_row - add class to its row
+ *
+ * The class is added to row at priorities marked in mask.
+ * It does nothing if mask == 0.
+ */
+static inline void htb_add_class_to_row(struct htb_sched *q,
+					struct htb_class *cl, int mask)
+{
+	q->row_mask[cl->level] |= mask;
+	while (mask) {
+		int prio = ffz(~mask);
+		mask &= ~(1 << prio);
+		htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio);
+	}
+}
+
+/* If this triggers, it is a bug in this code, but it need not be fatal */
+static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
+{
+	if (RB_EMPTY_NODE(rb)) {
+		WARN_ON(1);
+	} else {
+		rb_erase(rb, root);
+		RB_CLEAR_NODE(rb);
+	}
+}
+
+
+/**
+ * htb_remove_class_from_row - removes class from its row
+ *
+ * The class is removed from row at priorities marked in mask.
+ * It does nothing if mask == 0.
+ */
+static inline void htb_remove_class_from_row(struct htb_sched *q,
+						 struct htb_class *cl, int mask)
+{
+	int m = 0;
+
+	while (mask) {
+		int prio = ffz(~mask);
+
+		mask &= ~(1 << prio);
+		if (q->ptr[cl->level][prio] == cl->node + prio)
+			htb_next_rb_node(q->ptr[cl->level] + prio);
+
+		htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio);
+		if (!q->row[cl->level][prio].rb_node)
+			m |= 1 << prio;
+	}
+	q->row_mask[cl->level] &= ~m;
+}
+
+/**
+ * htb_activate_prios - creates active classe's feed chain
+ *
+ * The class is connected to ancestors and/or appropriate rows
+ * for priorities it is participating on. cl->cmode must be new
+ * (activated) mode. It does nothing if cl->prio_activity == 0.
+ */
+static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
+{
+	struct htb_class *p = cl->parent;
+	long m, mask = cl->prio_activity;
+
+	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
+		m = mask;
+		while (m) {
+			int prio = ffz(~m);
+			m &= ~(1 << prio);
+
+			if (p->un.inner.feed[prio].rb_node)
+				/* parent already has its feed in use so that
+				   reset bit in mask as parent is already ok */
+				mask &= ~(1 << prio);
+
+			htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
+		}
+		p->prio_activity |= mask;
+		cl = p;
+		p = cl->parent;
+
+	}
+	if (cl->cmode == HTB_CAN_SEND && mask)
+		htb_add_class_to_row(q, cl, mask);
+}
+
+/**
+ * htb_deactivate_prios - remove class from feed chain
+ *
+ * cl->cmode must represent old mode (before deactivation). It does
+ * nothing if cl->prio_activity == 0. Class is removed from all feed
+ * chains and rows.
+ */
+static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
+{
+	struct htb_class *p = cl->parent;
+	long m, mask = cl->prio_activity;
+
+	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
+		m = mask;
+		mask = 0;
+		while (m) {
+			int prio = ffz(~m);
+			m &= ~(1 << prio);
+
+			if (p->un.inner.ptr[prio] == cl->node + prio) {
+				/* we are removing child which is pointed to from
+				   parent feed - forget the pointer but remember
+				   classid */
+				p->un.inner.last_ptr_id[prio] = cl->common.classid;
+				p->un.inner.ptr[prio] = NULL;
+			}
+
+			htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio);
+
+			if (!p->un.inner.feed[prio].rb_node)
+				mask |= 1 << prio;
+		}
+
+		p->prio_activity &= ~mask;
+		cl = p;
+		p = cl->parent;
+
+	}
+	if (cl->cmode == HTB_CAN_SEND && mask)
+		htb_remove_class_from_row(q, cl, mask);
+}
+
+static inline long htb_lowater(const struct htb_class *cl)
+{
+	if (htb_hysteresis)
+		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
+	else
+		return 0;
+}
+static inline long htb_hiwater(const struct htb_class *cl)
+{
+	if (htb_hysteresis)
+		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
+	else
+		return 0;
+}
+
+
+/**
+ * htb_class_mode - computes and returns current class mode
+ *
+ * It computes cl's mode at time cl->t_c+diff and returns it. If mode
+ * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
+ * from now to time when cl will change its state.
+ * Also it is worth to note that class mode doesn't change simply
+ * at cl->{c,}tokens == 0 but there can rather be hysteresis of
+ * 0 .. -cl->{c,}buffer range. It is meant to limit number of
+ * mode transitions per time unit. The speed gain is about 1/6.
+ */
+static inline enum htb_cmode
+htb_class_mode(struct htb_class *cl, long *diff)
+{
+	long toks;
+
+	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
+		*diff = -toks;
+		return HTB_CANT_SEND;
+	}
+
+	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
+		return HTB_CAN_SEND;
+
+	*diff = -toks;
+	return HTB_MAY_BORROW;
+}
+
+/**
+ * htb_change_class_mode - changes classe's mode
+ *
+ * This should be the only way how to change classe's mode under normal
+ * cirsumstances. Routine will update feed lists linkage, change mode
+ * and add class to the wait event queue if appropriate. New mode should
+ * be different from old one and cl->pq_key has to be valid if changing
+ * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
+ */
+static void
+htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
+{
+	enum htb_cmode new_mode = htb_class_mode(cl, diff);
+
+	if (new_mode == cl->cmode)
+		return;
+
+	if (cl->prio_activity) {	/* not necessary: speed optimization */
+		if (cl->cmode != HTB_CANT_SEND)
+			htb_deactivate_prios(q, cl);
+		cl->cmode = new_mode;
+		if (new_mode != HTB_CANT_SEND)
+			htb_activate_prios(q, cl);
+	} else
+		cl->cmode = new_mode;
+}
+
+/**
+ * htb_activate - inserts leaf cl into appropriate active feeds
+ *
+ * Routine learns (new) priority of leaf and activates feed chain
+ * for the prio. It can be called on already active leaf safely.
+ * It also adds leaf into droplist.
+ */
+static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
+{
+	WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
+
+	if (!cl->prio_activity) {
+		cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
+		htb_activate_prios(q, cl);
+		list_add_tail(&cl->un.leaf.drop_list,
+			      q->drops + cl->un.leaf.aprio);
+	}
+}
+
+/**
+ * htb_deactivate - remove leaf cl from active feeds
+ *
+ * Make sure that leaf is active. In the other words it can't be called
+ * with non-active leaf. It also removes class from the drop list.
+ */
+static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
+{
+	WARN_ON(!cl->prio_activity);
+
+	htb_deactivate_prios(q, cl);
+	cl->prio_activity = 0;
+	list_del_init(&cl->un.leaf.drop_list);
+}
+
+static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	int ret;
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = htb_classify(skb, sch, &ret);
+
+	if (cl == HTB_DIRECT) {
+		/* enqueue to helper queue */
+		if (q->direct_queue.qlen < q->direct_qlen) {
+			__skb_queue_tail(&q->direct_queue, skb);
+			q->direct_pkts++;
+		} else {
+			kfree_skb(skb);
+			sch->qstats.drops++;
+			return NET_XMIT_DROP;
+		}
+#ifdef CONFIG_NET_CLS_ACT
+	} else if (!cl) {
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+#endif
+	} else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			cl->qstats.drops++;
+		}
+		return ret;
+	} else {
+		cl->bstats.packets +=
+			skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
+		cl->bstats.bytes += qdisc_pkt_len(skb);
+		htb_activate(q, cl);
+	}
+
+	sch->q.qlen++;
+	sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
+	return NET_XMIT_SUCCESS;
+}
+
+/* TODO: requeuing packet charges it to policers again !! */
+static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	int ret;
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = htb_classify(skb, sch, &ret);
+	struct sk_buff *tskb;
+
+	if (cl == HTB_DIRECT) {
+		/* enqueue to helper queue */
+		if (q->direct_queue.qlen < q->direct_qlen) {
+			__skb_queue_head(&q->direct_queue, skb);
+		} else {
+			__skb_queue_head(&q->direct_queue, skb);
+			tskb = __skb_dequeue_tail(&q->direct_queue);
+			kfree_skb(tskb);
+			sch->qstats.drops++;
+			return NET_XMIT_CN;
+		}
+#ifdef CONFIG_NET_CLS_ACT
+	} else if (!cl) {
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+#endif
+	} else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) !=
+		   NET_XMIT_SUCCESS) {
+		if (net_xmit_drop_count(ret)) {
+			sch->qstats.drops++;
+			cl->qstats.drops++;
+		}
+		return ret;
+	} else
+		htb_activate(q, cl);
+
+	sch->q.qlen++;
+	sch->qstats.requeues++;
+	return NET_XMIT_SUCCESS;
+}
+
+/**
+ * htb_charge_class - charges amount "bytes" to leaf and ancestors
+ *
+ * Routine assumes that packet "bytes" long was dequeued from leaf cl
+ * borrowing from "level". It accounts bytes to ceil leaky bucket for
+ * leaf and all ancestors and to rate bucket for ancestors at levels
+ * "level" and higher. It also handles possible change of mode resulting
+ * from the update. Note that mode can also increase here (MAY_BORROW to
+ * CAN_SEND) because we can use more precise clock that event queue here.
+ * In such case we remove class from event queue first.
+ */
+static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
+			     int level, struct sk_buff *skb)
+{
+	int bytes = qdisc_pkt_len(skb);
+	long toks, diff;
+	enum htb_cmode old_mode;
+
+#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
+	if (toks > cl->B) toks = cl->B; \
+	toks -= L2T(cl, cl->R, bytes); \
+	if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
+	cl->T = toks
+
+	while (cl) {
+		diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
+		if (cl->level >= level) {
+			if (cl->level == level)
+				cl->xstats.lends++;
+			HTB_ACCNT(tokens, buffer, rate);
+		} else {
+			cl->xstats.borrows++;
+			cl->tokens += diff;	/* we moved t_c; update tokens */
+		}
+		HTB_ACCNT(ctokens, cbuffer, ceil);
+		cl->t_c = q->now;
+
+		old_mode = cl->cmode;
+		diff = 0;
+		htb_change_class_mode(q, cl, &diff);
+		if (old_mode != cl->cmode) {
+			if (old_mode != HTB_CAN_SEND)
+				htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
+			if (cl->cmode != HTB_CAN_SEND)
+				htb_add_to_wait_tree(q, cl, diff);
+		}
+
+		/* update byte stats except for leaves which are already updated */
+		if (cl->level) {
+			cl->bstats.bytes += bytes;
+			cl->bstats.packets += skb_is_gso(skb)?
+					skb_shinfo(skb)->gso_segs:1;
+		}
+		cl = cl->parent;
+	}
+}
+
+/**
+ * htb_do_events - make mode changes to classes at the level
+ *
+ * Scans event queue for pending events and applies them. Returns time of
+ * next pending event (0 for no event in pq).
+ * Note: Applied are events whose have cl->pq_key <= q->now.
+ */
+static psched_time_t htb_do_events(struct htb_sched *q, int level)
+{
+	/* don't run for longer than 2 jiffies; 2 is used instead of
+	   1 to simplify things when jiffy is going to be incremented
+	   too soon */
+	unsigned long stop_at = jiffies + 2;
+	while (time_before(jiffies, stop_at)) {
+		struct htb_class *cl;
+		long diff;
+		struct rb_node *p = rb_first(&q->wait_pq[level]);
+
+		if (!p)
+			return 0;
+
+		cl = rb_entry(p, struct htb_class, pq_node);
+		if (cl->pq_key > q->now)
+			return cl->pq_key;
+
+		htb_safe_rb_erase(p, q->wait_pq + level);
+		diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
+		htb_change_class_mode(q, cl, &diff);
+		if (cl->cmode != HTB_CAN_SEND)
+			htb_add_to_wait_tree(q, cl, diff);
+	}
+	/* too much load - let's continue on next jiffie */
+	return q->now + PSCHED_TICKS_PER_SEC / HZ;
+}
+
+/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
+   is no such one exists. */
+static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
+					      u32 id)
+{
+	struct rb_node *r = NULL;
+	while (n) {
+		struct htb_class *cl =
+		    rb_entry(n, struct htb_class, node[prio]);
+		if (id == cl->common.classid)
+			return n;
+
+		if (id > cl->common.classid) {
+			n = n->rb_right;
+		} else {
+			r = n;
+			n = n->rb_left;
+		}
+	}
+	return r;
+}
+
+/**
+ * htb_lookup_leaf - returns next leaf class in DRR order
+ *
+ * Find leaf where current feed pointers points to.
+ */
+static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
+					 struct rb_node **pptr, u32 * pid)
+{
+	int i;
+	struct {
+		struct rb_node *root;
+		struct rb_node **pptr;
+		u32 *pid;
+	} stk[TC_HTB_MAXDEPTH], *sp = stk;
+
+	WARN_ON(!tree->rb_node);
+	sp->root = tree->rb_node;
+	sp->pptr = pptr;
+	sp->pid = pid;
+
+	for (i = 0; i < 65535; i++) {
+		if (!*sp->pptr && *sp->pid) {
+			/* ptr was invalidated but id is valid - try to recover
+			   the original or next ptr */
+			*sp->pptr =
+			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
+		}
+		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
+				   can become out of date quickly */
+		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
+			*sp->pptr = sp->root;
+			while ((*sp->pptr)->rb_left)
+				*sp->pptr = (*sp->pptr)->rb_left;
+			if (sp > stk) {
+				sp--;
+				WARN_ON(!*sp->pptr);
+				if (!*sp->pptr)
+					return NULL;
+				htb_next_rb_node(sp->pptr);
+			}
+		} else {
+			struct htb_class *cl;
+			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
+			if (!cl->level)
+				return cl;
+			(++sp)->root = cl->un.inner.feed[prio].rb_node;
+			sp->pptr = cl->un.inner.ptr + prio;
+			sp->pid = cl->un.inner.last_ptr_id + prio;
+		}
+	}
+	WARN_ON(1);
+	return NULL;
+}
+
+/* dequeues packet at given priority and level; call only if
+   you are sure that there is active class at prio/level */
+static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
+					int level)
+{
+	struct sk_buff *skb = NULL;
+	struct htb_class *cl, *start;
+	/* look initial class up in the row */
+	start = cl = htb_lookup_leaf(q->row[level] + prio, prio,
+				     q->ptr[level] + prio,
+				     q->last_ptr_id[level] + prio);
+
+	do {
+next:
+		WARN_ON(!cl);
+		if (!cl)
+			return NULL;
+
+		/* class can be empty - it is unlikely but can be true if leaf
+		   qdisc drops packets in enqueue routine or if someone used
+		   graft operation on the leaf since last dequeue;
+		   simply deactivate and skip such class */
+		if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
+			struct htb_class *next;
+			htb_deactivate(q, cl);
+
+			/* row/level might become empty */
+			if ((q->row_mask[level] & (1 << prio)) == 0)
+				return NULL;
+
+			next = htb_lookup_leaf(q->row[level] + prio,
+					       prio, q->ptr[level] + prio,
+					       q->last_ptr_id[level] + prio);
+
+			if (cl == start)	/* fix start if we just deleted it */
+				start = next;
+			cl = next;
+			goto next;
+		}
+
+		skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
+		if (likely(skb != NULL))
+			break;
+		if (!cl->warned) {
+			printk(KERN_WARNING
+			       "htb: class %X isn't work conserving ?!\n",
+			       cl->common.classid);
+			cl->warned = 1;
+		}
+		q->nwc_hit++;
+		htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
+				  ptr[0]) + prio);
+		cl = htb_lookup_leaf(q->row[level] + prio, prio,
+				     q->ptr[level] + prio,
+				     q->last_ptr_id[level] + prio);
+
+	} while (cl != start);
+
+	if (likely(skb != NULL)) {
+		cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
+		if (cl->un.leaf.deficit[level] < 0) {
+			cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
+			htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
+					  ptr[0]) + prio);
+		}
+		/* this used to be after charge_class but this constelation
+		   gives us slightly better performance */
+		if (!cl->un.leaf.q->q.qlen)
+			htb_deactivate(q, cl);
+		htb_charge_class(q, cl, level, skb);
+	}
+	return skb;
+}
+
+static struct sk_buff *htb_dequeue(struct Qdisc *sch)
+{
+	struct sk_buff *skb = NULL;
+	struct htb_sched *q = qdisc_priv(sch);
+	int level;
+	psched_time_t next_event;
+
+	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
+	skb = __skb_dequeue(&q->direct_queue);
+	if (skb != NULL) {
+		sch->flags &= ~TCQ_F_THROTTLED;
+		sch->q.qlen--;
+		return skb;
+	}
+
+	if (!sch->q.qlen)
+		goto fin;
+	q->now = psched_get_time();
+
+	next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
+	q->nwc_hit = 0;
+	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
+		/* common case optimization - skip event handler quickly */
+		int m;
+		psched_time_t event;
+
+		if (q->now >= q->near_ev_cache[level]) {
+			event = htb_do_events(q, level);
+			if (!event)
+				event = q->now + PSCHED_TICKS_PER_SEC;
+			q->near_ev_cache[level] = event;
+		} else
+			event = q->near_ev_cache[level];
+
+		if (event && next_event > event)
+			next_event = event;
+
+		m = ~q->row_mask[level];
+		while (m != (int)(-1)) {
+			int prio = ffz(m);
+			m |= 1 << prio;
+			skb = htb_dequeue_tree(q, prio, level);
+			if (likely(skb != NULL)) {
+				sch->q.qlen--;
+				sch->flags &= ~TCQ_F_THROTTLED;
+				goto fin;
+			}
+		}
+	}
+	sch->qstats.overlimits++;
+	qdisc_watchdog_cancel(&q->watchdog);
+	qdisc_watchdog_schedule(&q->watchdog, next_event);
+fin:
+	return skb;
+}
+
+/* try to drop from each class (by prio) until one succeed */
+static unsigned int htb_drop(struct Qdisc *sch)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	int prio;
+
+	for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
+		struct list_head *p;
+		list_for_each(p, q->drops + prio) {
+			struct htb_class *cl = list_entry(p, struct htb_class,
+							  un.leaf.drop_list);
+			unsigned int len;
+			if (cl->un.leaf.q->ops->drop &&
+			    (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
+				sch->q.qlen--;
+				if (!cl->un.leaf.q->q.qlen)
+					htb_deactivate(q, cl);
+				return len;
+			}
+		}
+	}
+	return 0;
+}
+
+/* reset all classes */
+/* always caled under BH & queue lock */
+static void htb_reset(struct Qdisc *sch)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl;
+	struct hlist_node *n;
+	unsigned int i;
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+			if (cl->level)
+				memset(&cl->un.inner, 0, sizeof(cl->un.inner));
+			else {
+				if (cl->un.leaf.q)
+					qdisc_reset(cl->un.leaf.q);
+				INIT_LIST_HEAD(&cl->un.leaf.drop_list);
+			}
+			cl->prio_activity = 0;
+			cl->cmode = HTB_CAN_SEND;
+
+		}
+	}
+	qdisc_watchdog_cancel(&q->watchdog);
+	__skb_queue_purge(&q->direct_queue);
+	sch->q.qlen = 0;
+	memset(q->row, 0, sizeof(q->row));
+	memset(q->row_mask, 0, sizeof(q->row_mask));
+	memset(q->wait_pq, 0, sizeof(q->wait_pq));
+	memset(q->ptr, 0, sizeof(q->ptr));
+	for (i = 0; i < TC_HTB_NUMPRIO; i++)
+		INIT_LIST_HEAD(q->drops + i);
+}
+
+static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
+	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
+	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
+	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+};
+
+static int htb_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_HTB_INIT + 1];
+	struct tc_htb_glob *gopt;
+	int err;
+	int i;
+
+	if (!opt)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_HTB_INIT] == NULL) {
+		printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
+		return -EINVAL;
+	}
+	gopt = nla_data(tb[TCA_HTB_INIT]);
+	if (gopt->version != HTB_VER >> 16) {
+		printk(KERN_ERR
+		       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
+		       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
+		return -EINVAL;
+	}
+
+	err = qdisc_class_hash_init(&q->clhash);
+	if (err < 0)
+		return err;
+	for (i = 0; i < TC_HTB_NUMPRIO; i++)
+		INIT_LIST_HEAD(q->drops + i);
+
+	qdisc_watchdog_init(&q->watchdog, sch);
+	skb_queue_head_init(&q->direct_queue);
+
+	q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
+	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
+		q->direct_qlen = 2;
+
+	if ((q->rate2quantum = gopt->rate2quantum) < 1)
+		q->rate2quantum = 1;
+	q->defcls = gopt->defcls;
+
+	return 0;
+}
+
+static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
+	struct htb_sched *q = qdisc_priv(sch);
+	struct nlattr *nest;
+	struct tc_htb_glob gopt;
+
+	spin_lock_bh(root_lock);
+
+	gopt.direct_pkts = q->direct_pkts;
+	gopt.version = HTB_VER;
+	gopt.rate2quantum = q->rate2quantum;
+	gopt.defcls = q->defcls;
+	gopt.debug = 0;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
+	nla_nest_end(skb, nest);
+
+	spin_unlock_bh(root_lock);
+	return skb->len;
+
+nla_put_failure:
+	spin_unlock_bh(root_lock);
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct htb_class *cl = (struct htb_class *)arg;
+	spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
+	struct nlattr *nest;
+	struct tc_htb_opt opt;
+
+	spin_lock_bh(root_lock);
+	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
+	tcm->tcm_handle = cl->common.classid;
+	if (!cl->level && cl->un.leaf.q)
+		tcm->tcm_info = cl->un.leaf.q->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	memset(&opt, 0, sizeof(opt));
+
+	opt.rate = cl->rate->rate;
+	opt.buffer = cl->buffer;
+	opt.ceil = cl->ceil->rate;
+	opt.cbuffer = cl->cbuffer;
+	opt.quantum = cl->un.leaf.quantum;
+	opt.prio = cl->un.leaf.prio;
+	opt.level = cl->level;
+	NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
+
+	nla_nest_end(skb, nest);
+	spin_unlock_bh(root_lock);
+	return skb->len;
+
+nla_put_failure:
+	spin_unlock_bh(root_lock);
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static int
+htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
+{
+	struct htb_class *cl = (struct htb_class *)arg;
+
+	if (!cl->level && cl->un.leaf.q)
+		cl->qstats.qlen = cl->un.leaf.q->q.qlen;
+	cl->xstats.tokens = cl->tokens;
+	cl->xstats.ctokens = cl->ctokens;
+
+	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
+		return -1;
+
+	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
+}
+
+static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		     struct Qdisc **old)
+{
+	struct htb_class *cl = (struct htb_class *)arg;
+
+	if (cl && !cl->level) {
+		if (new == NULL &&
+		    (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+					     &pfifo_qdisc_ops,
+					     cl->common.classid))
+		    == NULL)
+			return -ENOBUFS;
+		sch_tree_lock(sch);
+		if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
+			qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+			qdisc_reset(*old);
+		}
+		sch_tree_unlock(sch);
+		return 0;
+	}
+	return -ENOENT;
+}
+
+static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct htb_class *cl = (struct htb_class *)arg;
+	return (cl && !cl->level) ? cl->un.leaf.q : NULL;
+}
+
+static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
+{
+	struct htb_class *cl = (struct htb_class *)arg;
+
+	if (cl->un.leaf.q->q.qlen == 0)
+		htb_deactivate(qdisc_priv(sch), cl);
+}
+
+static unsigned long htb_get(struct Qdisc *sch, u32 classid)
+{
+	struct htb_class *cl = htb_find(classid, sch);
+	if (cl)
+		cl->refcnt++;
+	return (unsigned long)cl;
+}
+
+static inline int htb_parent_last_child(struct htb_class *cl)
+{
+	if (!cl->parent)
+		/* the root class */
+		return 0;
+	if (cl->parent->children > 1)
+		/* not the last child */
+		return 0;
+	return 1;
+}
+
+static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
+			       struct Qdisc *new_q)
+{
+	struct htb_class *parent = cl->parent;
+
+	WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
+
+	if (parent->cmode != HTB_CAN_SEND)
+		htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
+
+	parent->level = 0;
+	memset(&parent->un.inner, 0, sizeof(parent->un.inner));
+	INIT_LIST_HEAD(&parent->un.leaf.drop_list);
+	parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
+	parent->un.leaf.quantum = parent->quantum;
+	parent->un.leaf.prio = parent->prio;
+	parent->tokens = parent->buffer;
+	parent->ctokens = parent->cbuffer;
+	parent->t_c = psched_get_time();
+	parent->cmode = HTB_CAN_SEND;
+}
+
+static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
+{
+	if (!cl->level) {
+		WARN_ON(!cl->un.leaf.q);
+		qdisc_destroy(cl->un.leaf.q);
+	}
+	gen_kill_estimator(&cl->bstats, &cl->rate_est);
+	qdisc_put_rtab(cl->rate);
+	qdisc_put_rtab(cl->ceil);
+
+	tcf_destroy_chain(&cl->filter_list);
+	kfree(cl);
+}
+
+/* always caled under BH & queue lock */
+static void htb_destroy(struct Qdisc *sch)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct hlist_node *n, *next;
+	struct htb_class *cl;
+	unsigned int i;
+
+	qdisc_watchdog_cancel(&q->watchdog);
+	/* This line used to be after htb_destroy_class call below
+	   and surprisingly it worked in 2.4. But it must precede it
+	   because filter need its target class alive to be able to call
+	   unbind_filter on it (without Oops). */
+	tcf_destroy_chain(&q->filter_list);
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
+			tcf_destroy_chain(&cl->filter_list);
+	}
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+					  common.hnode)
+			htb_destroy_class(sch, cl);
+	}
+	qdisc_class_hash_destroy(&q->clhash);
+	__skb_queue_purge(&q->direct_queue);
+}
+
+static int htb_delete(struct Qdisc *sch, unsigned long arg)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = (struct htb_class *)arg;
+	unsigned int qlen;
+	struct Qdisc *new_q = NULL;
+	int last_child = 0;
+
+	// TODO: why don't allow to delete subtree ? references ? does
+	// tc subsys quarantee us that in htb_destroy it holds no class
+	// refs so that we can remove children safely there ?
+	if (cl->children || cl->filter_cnt)
+		return -EBUSY;
+
+	if (!cl->level && htb_parent_last_child(cl)) {
+		new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+					  &pfifo_qdisc_ops,
+					  cl->parent->common.classid);
+		last_child = 1;
+	}
+
+	sch_tree_lock(sch);
+
+	if (!cl->level) {
+		qlen = cl->un.leaf.q->q.qlen;
+		qdisc_reset(cl->un.leaf.q);
+		qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
+	}
+
+	/* delete from hash and active; remainder in destroy_class */
+	qdisc_class_hash_remove(&q->clhash, &cl->common);
+	if (cl->parent)
+		cl->parent->children--;
+
+	if (cl->prio_activity)
+		htb_deactivate(q, cl);
+
+	if (cl->cmode != HTB_CAN_SEND)
+		htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level);
+
+	if (last_child)
+		htb_parent_to_leaf(q, cl, new_q);
+
+	if (--cl->refcnt == 0)
+		htb_destroy_class(sch, cl);
+
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static void htb_put(struct Qdisc *sch, unsigned long arg)
+{
+	struct htb_class *cl = (struct htb_class *)arg;
+
+	if (--cl->refcnt == 0)
+		htb_destroy_class(sch, cl);
+}
+
+static int htb_change_class(struct Qdisc *sch, u32 classid,
+			    u32 parentid, struct nlattr **tca,
+			    unsigned long *arg)
+{
+	int err = -EINVAL;
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = (struct htb_class *)*arg, *parent;
+	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
+	struct nlattr *tb[TCA_HTB_RTAB + 1];
+	struct tc_htb_opt *hopt;
+
+	/* extract all subattrs from opt attr */
+	if (!opt)
+		goto failure;
+
+	err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
+	if (err < 0)
+		goto failure;
+
+	err = -EINVAL;
+	if (tb[TCA_HTB_PARMS] == NULL)
+		goto failure;
+
+	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
+
+	hopt = nla_data(tb[TCA_HTB_PARMS]);
+
+	rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
+	ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
+	if (!rtab || !ctab)
+		goto failure;
+
+	if (!cl) {		/* new class */
+		struct Qdisc *new_q;
+		int prio;
+		struct {
+			struct nlattr		nla;
+			struct gnet_estimator	opt;
+		} est = {
+			.nla = {
+				.nla_len	= nla_attr_size(sizeof(est.opt)),
+				.nla_type	= TCA_RATE,
+			},
+			.opt = {
+				/* 4s interval, 16s averaging constant */
+				.interval	= 2,
+				.ewma_log	= 2,
+			},
+		};
+
+		/* check for valid classid */
+		if (!classid || TC_H_MAJ(classid ^ sch->handle)
+		    || htb_find(classid, sch))
+			goto failure;
+
+		/* check maximal depth */
+		if (parent && parent->parent && parent->parent->level < 2) {
+			printk(KERN_ERR "htb: tree is too deep\n");
+			goto failure;
+		}
+		err = -ENOBUFS;
+		if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
+			goto failure;
+
+		gen_new_estimator(&cl->bstats, &cl->rate_est,
+				  qdisc_root_sleeping_lock(sch),
+				  tca[TCA_RATE] ? : &est.nla);
+		cl->refcnt = 1;
+		cl->children = 0;
+		INIT_LIST_HEAD(&cl->un.leaf.drop_list);
+		RB_CLEAR_NODE(&cl->pq_node);
+
+		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
+			RB_CLEAR_NODE(&cl->node[prio]);
+
+		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
+		   so that can't be used inside of sch_tree_lock
+		   -- thanks to Karlis Peisenieks */
+		new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+					  &pfifo_qdisc_ops, classid);
+		sch_tree_lock(sch);
+		if (parent && !parent->level) {
+			unsigned int qlen = parent->un.leaf.q->q.qlen;
+
+			/* turn parent into inner node */
+			qdisc_reset(parent->un.leaf.q);
+			qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
+			qdisc_destroy(parent->un.leaf.q);
+			if (parent->prio_activity)
+				htb_deactivate(q, parent);
+
+			/* remove from evt list because of level change */
+			if (parent->cmode != HTB_CAN_SEND) {
+				htb_safe_rb_erase(&parent->pq_node, q->wait_pq);
+				parent->cmode = HTB_CAN_SEND;
+			}
+			parent->level = (parent->parent ? parent->parent->level
+					 : TC_HTB_MAXDEPTH) - 1;
+			memset(&parent->un.inner, 0, sizeof(parent->un.inner));
+		}
+		/* leaf (we) needs elementary qdisc */
+		cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
+
+		cl->common.classid = classid;
+		cl->parent = parent;
+
+		/* set class to be in HTB_CAN_SEND state */
+		cl->tokens = hopt->buffer;
+		cl->ctokens = hopt->cbuffer;
+		cl->mbuffer = 60 * PSCHED_TICKS_PER_SEC;	/* 1min */
+		cl->t_c = psched_get_time();
+		cl->cmode = HTB_CAN_SEND;
+
+		/* attach to the hash list and parent's family */
+		qdisc_class_hash_insert(&q->clhash, &cl->common);
+		if (parent)
+			parent->children++;
+	} else {
+		if (tca[TCA_RATE])
+			gen_replace_estimator(&cl->bstats, &cl->rate_est,
+					      qdisc_root_sleeping_lock(sch),
+					      tca[TCA_RATE]);
+		sch_tree_lock(sch);
+	}
+
+	/* it used to be a nasty bug here, we have to check that node
+	   is really leaf before changing cl->un.leaf ! */
+	if (!cl->level) {
+		cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
+		if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
+			printk(KERN_WARNING
+			       "HTB: quantum of class %X is small. Consider r2q change.\n",
+			       cl->common.classid);
+			cl->un.leaf.quantum = 1000;
+		}
+		if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
+			printk(KERN_WARNING
+			       "HTB: quantum of class %X is big. Consider r2q change.\n",
+			       cl->common.classid);
+			cl->un.leaf.quantum = 200000;
+		}
+		if (hopt->quantum)
+			cl->un.leaf.quantum = hopt->quantum;
+		if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
+			cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
+
+		/* backup for htb_parent_to_leaf */
+		cl->quantum = cl->un.leaf.quantum;
+		cl->prio = cl->un.leaf.prio;
+	}
+
+	cl->buffer = hopt->buffer;
+	cl->cbuffer = hopt->cbuffer;
+	if (cl->rate)
+		qdisc_put_rtab(cl->rate);
+	cl->rate = rtab;
+	if (cl->ceil)
+		qdisc_put_rtab(cl->ceil);
+	cl->ceil = ctab;
+	sch_tree_unlock(sch);
+
+	qdisc_class_hash_grow(sch, &q->clhash);
+
+	*arg = (unsigned long)cl;
+	return 0;
+
+failure:
+	if (rtab)
+		qdisc_put_rtab(rtab);
+	if (ctab)
+		qdisc_put_rtab(ctab);
+	return err;
+}
+
+static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl = (struct htb_class *)arg;
+	struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
+
+	return fl;
+}
+
+static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
+				     u32 classid)
+{
+	struct htb_class *cl = htb_find(classid, sch);
+
+	/*if (cl && !cl->level) return 0;
+	   The line above used to be there to prevent attaching filters to
+	   leaves. But at least tc_index filter uses this just to get class
+	   for other reasons so that we have to allow for it.
+	   ----
+	   19.6.2002 As Werner explained it is ok - bind filter is just
+	   another way to "lock" the class - unlike "get" this lock can
+	   be broken by class during destroy IIUC.
+	 */
+	if (cl)
+		cl->filter_cnt++;
+	return (unsigned long)cl;
+}
+
+static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
+{
+	struct htb_class *cl = (struct htb_class *)arg;
+
+	if (cl)
+		cl->filter_cnt--;
+}
+
+static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct htb_sched *q = qdisc_priv(sch);
+	struct htb_class *cl;
+	struct hlist_node *n;
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < q->clhash.hashsize; i++) {
+		hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+			if (arg->count < arg->skip) {
+				arg->count++;
+				continue;
+			}
+			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+				arg->stop = 1;
+				return;
+			}
+			arg->count++;
+		}
+	}
+}
+
+static const struct Qdisc_class_ops htb_class_ops = {
+	.graft		=	htb_graft,
+	.leaf		=	htb_leaf,
+	.qlen_notify	=	htb_qlen_notify,
+	.get		=	htb_get,
+	.put		=	htb_put,
+	.change		=	htb_change_class,
+	.delete		=	htb_delete,
+	.walk		=	htb_walk,
+	.tcf_chain	=	htb_find_tcf,
+	.bind_tcf	=	htb_bind_filter,
+	.unbind_tcf	=	htb_unbind_filter,
+	.dump		=	htb_dump_class,
+	.dump_stats	=	htb_dump_class_stats,
+};
+
+static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
+	.next		=	NULL,
+	.cl_ops		=	&htb_class_ops,
+	.id		=	"htb",
+	.priv_size	=	sizeof(struct htb_sched),
+	.enqueue	=	htb_enqueue,
+	.dequeue	=	htb_dequeue,
+	.requeue	=	htb_requeue,
+	.drop		=	htb_drop,
+	.init		=	htb_init,
+	.reset		=	htb_reset,
+	.destroy	=	htb_destroy,
+	.change		=	NULL /* htb_change */,
+	.dump		=	htb_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init htb_module_init(void)
+{
+	return register_qdisc(&htb_qdisc_ops);
+}
+static void __exit htb_module_exit(void)
+{
+	unregister_qdisc(&htb_qdisc_ops);
+}
+
+module_init(htb_module_init)
+module_exit(htb_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
new file mode 100644
index 0000000..4a2b773
--- /dev/null
+++ b/net/sched/sch_ingress.c
@@ -0,0 +1,159 @@
+/* net/sched/sch_ingress.c - Ingress qdisc
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Authors:     Jamal Hadi Salim 1999
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+
+struct ingress_qdisc_data {
+	struct tcf_proto	*filter_list;
+};
+
+/* ------------------------- Class/flow operations ------------------------- */
+
+static int ingress_graft(struct Qdisc *sch, unsigned long arg,
+			 struct Qdisc *new, struct Qdisc **old)
+{
+	return -EOPNOTSUPP;
+}
+
+static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+static unsigned long ingress_get(struct Qdisc *sch, u32 classid)
+{
+	return TC_H_MIN(classid) + 1;
+}
+
+static unsigned long ingress_bind_filter(struct Qdisc *sch,
+					 unsigned long parent, u32 classid)
+{
+	return ingress_get(sch, classid);
+}
+
+static void ingress_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent,
+			  struct nlattr **tca, unsigned long *arg)
+{
+	return 0;
+}
+
+static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	return;
+}
+
+static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct ingress_qdisc_data *p = qdisc_priv(sch);
+
+	return &p->filter_list;
+}
+
+/* --------------------------- Qdisc operations ---------------------------- */
+
+static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct ingress_qdisc_data *p = qdisc_priv(sch);
+	struct tcf_result res;
+	int result;
+
+	result = tc_classify(skb, p->filter_list, &res);
+
+	sch->bstats.packets++;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
+	switch (result) {
+	case TC_ACT_SHOT:
+		result = TC_ACT_SHOT;
+		sch->qstats.drops++;
+		break;
+	case TC_ACT_STOLEN:
+	case TC_ACT_QUEUED:
+		result = TC_ACT_STOLEN;
+		break;
+	case TC_ACT_RECLASSIFY:
+	case TC_ACT_OK:
+		skb->tc_index = TC_H_MIN(res.classid);
+	default:
+		result = TC_ACT_OK;
+		break;
+	}
+
+	return result;
+}
+
+/* ------------------------------------------------------------- */
+
+static void ingress_destroy(struct Qdisc *sch)
+{
+	struct ingress_qdisc_data *p = qdisc_priv(sch);
+
+	tcf_destroy_chain(&p->filter_list);
+}
+
+static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct nlattr *nest;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest);
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static const struct Qdisc_class_ops ingress_class_ops = {
+	.graft		=	ingress_graft,
+	.leaf		=	ingress_leaf,
+	.get		=	ingress_get,
+	.put		=	ingress_put,
+	.change		=	ingress_change,
+	.walk		=	ingress_walk,
+	.tcf_chain	=	ingress_find_tcf,
+	.bind_tcf	=	ingress_bind_filter,
+	.unbind_tcf	=	ingress_put,
+};
+
+static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
+	.cl_ops		=	&ingress_class_ops,
+	.id		=	"ingress",
+	.priv_size	=	sizeof(struct ingress_qdisc_data),
+	.enqueue	=	ingress_enqueue,
+	.destroy	=	ingress_destroy,
+	.dump		=	ingress_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init ingress_module_init(void)
+{
+	return register_qdisc(&ingress_qdisc_ops);
+}
+
+static void __exit ingress_module_exit(void)
+{
+	unregister_qdisc(&ingress_qdisc_ops);
+}
+
+module_init(ingress_module_init)
+module_exit(ingress_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
new file mode 100644
index 0000000..915f314
--- /dev/null
+++ b/net/sched/sch_multiq.c
@@ -0,0 +1,477 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+
+struct multiq_sched_data {
+	u16 bands;
+	u16 max_bands;
+	u16 curband;
+	struct tcf_proto *filter_list;
+	struct Qdisc **queues;
+};
+
+
+static struct Qdisc *
+multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	u32 band;
+	struct tcf_result res;
+	int err;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	err = tc_classify(skb, q->filter_list, &res);
+#ifdef CONFIG_NET_CLS_ACT
+	switch (err) {
+	case TC_ACT_STOLEN:
+	case TC_ACT_QUEUED:
+		*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+	case TC_ACT_SHOT:
+		return NULL;
+	}
+#endif
+	band = skb_get_queue_mapping(skb);
+
+	if (band >= q->bands)
+		return q->queues[0];
+
+	return q->queues[band];
+}
+
+static int
+multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = multiq_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	ret = qdisc_enqueue(skb, qdisc);
+	if (ret == NET_XMIT_SUCCESS) {
+		sch->bstats.bytes += qdisc_pkt_len(skb);
+		sch->bstats.packets++;
+		sch->q.qlen++;
+		return NET_XMIT_SUCCESS;
+	}
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
+	return ret;
+}
+
+
+static int
+multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct Qdisc *qdisc;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	qdisc = multiq_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	ret = qdisc->ops->requeue(skb, qdisc);
+	if (ret == NET_XMIT_SUCCESS) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+		if (q->curband)
+			q->curband--;
+		else
+			q->curband = q->bands - 1;
+		return NET_XMIT_SUCCESS;
+	}
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
+	return ret;
+}
+
+
+static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *qdisc;
+	struct sk_buff *skb;
+	int band;
+
+	for (band = 0; band < q->bands; band++) {
+		/* cycle through bands to ensure fairness */
+		q->curband++;
+		if (q->curband >= q->bands)
+			q->curband = 0;
+
+		/* Check that target subqueue is available before
+		 * pulling an skb to avoid excessive requeues
+		 */
+		if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+			qdisc = q->queues[q->curband];
+			skb = qdisc->dequeue(qdisc);
+			if (skb) {
+				sch->q.qlen--;
+				return skb;
+			}
+		}
+	}
+	return NULL;
+
+}
+
+static unsigned int multiq_drop(struct Qdisc *sch)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	int band;
+	unsigned int len;
+	struct Qdisc *qdisc;
+
+	for (band = q->bands-1; band >= 0; band--) {
+		qdisc = q->queues[band];
+		if (qdisc->ops->drop) {
+			len = qdisc->ops->drop(qdisc);
+			if (len != 0) {
+				sch->q.qlen--;
+				return len;
+			}
+		}
+	}
+	return 0;
+}
+
+
+static void
+multiq_reset(struct Qdisc *sch)
+{
+	u16 band;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	for (band = 0; band < q->bands; band++)
+		qdisc_reset(q->queues[band]);
+	sch->q.qlen = 0;
+	q->curband = 0;
+}
+
+static void
+multiq_destroy(struct Qdisc *sch)
+{
+	int band;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	tcf_destroy_chain(&q->filter_list);
+	for (band = 0; band < q->bands; band++)
+		qdisc_destroy(q->queues[band]);
+
+	kfree(q->queues);
+}
+
+static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	struct tc_multiq_qopt *qopt;
+	int i;
+
+	if (!netif_is_multiqueue(qdisc_dev(sch)))
+		return -EINVAL;
+	if (nla_len(opt) < sizeof(*qopt))
+		return -EINVAL;
+
+	qopt = nla_data(opt);
+
+	qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
+
+	sch_tree_lock(sch);
+	q->bands = qopt->bands;
+	for (i = q->bands; i < q->max_bands; i++) {
+		if (q->queues[i] != &noop_qdisc) {
+			struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+			qdisc_tree_decrease_qlen(child, child->q.qlen);
+			qdisc_destroy(child);
+		}
+	}
+
+	sch_tree_unlock(sch);
+
+	for (i = 0; i < q->bands; i++) {
+		if (q->queues[i] == &noop_qdisc) {
+			struct Qdisc *child;
+			child = qdisc_create_dflt(qdisc_dev(sch),
+						  sch->dev_queue,
+						  &pfifo_qdisc_ops,
+						  TC_H_MAKE(sch->handle,
+							    i + 1));
+			if (child) {
+				sch_tree_lock(sch);
+				child = xchg(&q->queues[i], child);
+
+				if (child != &noop_qdisc) {
+					qdisc_tree_decrease_qlen(child,
+								 child->q.qlen);
+					qdisc_destroy(child);
+				}
+				sch_tree_unlock(sch);
+			}
+		}
+	}
+	return 0;
+}
+
+static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	int i, err;
+
+	q->queues = NULL;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	q->max_bands = qdisc_dev(sch)->num_tx_queues;
+
+	q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL);
+	if (!q->queues)
+		return -ENOBUFS;
+	for (i = 0; i < q->max_bands; i++)
+		q->queues[i] = &noop_qdisc;
+
+	err = multiq_tune(sch,opt);
+
+	if (err)
+		kfree(q->queues);
+
+	return err;
+}
+
+static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_multiq_qopt opt;
+
+	opt.bands = q->bands;
+	opt.max_bands = q->max_bands;
+
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		      struct Qdisc **old)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return -EINVAL;
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = q->queues[band];
+	q->queues[band] = new;
+	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *
+multiq_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return NULL;
+
+	return q->queues[band];
+}
+
+static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	unsigned long band = TC_H_MIN(classid);
+
+	if (band - 1 >= q->bands)
+		return 0;
+	return band;
+}
+
+static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
+				 u32 classid)
+{
+	return multiq_get(sch, classid);
+}
+
+
+static void multiq_put(struct Qdisc *q, unsigned long cl)
+{
+	return;
+}
+
+static int multiq_change(struct Qdisc *sch, u32 handle, u32 parent,
+			 struct nlattr **tca, unsigned long *arg)
+{
+	unsigned long cl = *arg;
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+static int multiq_delete(struct Qdisc *sch, unsigned long cl)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+
+static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
+			     struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	if (q->queues[cl-1])
+		tcm->tcm_info = q->queues[cl-1]->handle;
+	return 0;
+}
+
+static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				 struct gnet_dump *d)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *cl_q;
+
+	cl_q = q->queues[cl - 1];
+	if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
+	    gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
+		return -1;
+
+	return 0;
+}
+
+static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+	int band;
+
+	if (arg->stop)
+		return;
+
+	for (band = 0; band < q->bands; band++) {
+		if (arg->count < arg->skip) {
+			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, band+1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct multiq_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return &q->filter_list;
+}
+
+static const struct Qdisc_class_ops multiq_class_ops = {
+	.graft		=	multiq_graft,
+	.leaf		=	multiq_leaf,
+	.get		=	multiq_get,
+	.put		=	multiq_put,
+	.change		=	multiq_change,
+	.delete		=	multiq_delete,
+	.walk		=	multiq_walk,
+	.tcf_chain	=	multiq_find_tcf,
+	.bind_tcf	=	multiq_bind,
+	.unbind_tcf	=	multiq_put,
+	.dump		=	multiq_dump_class,
+	.dump_stats	=	multiq_dump_class_stats,
+};
+
+static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
+	.next		=	NULL,
+	.cl_ops		=	&multiq_class_ops,
+	.id		=	"multiq",
+	.priv_size	=	sizeof(struct multiq_sched_data),
+	.enqueue	=	multiq_enqueue,
+	.dequeue	=	multiq_dequeue,
+	.requeue	=	multiq_requeue,
+	.drop		=	multiq_drop,
+	.init		=	multiq_init,
+	.reset		=	multiq_reset,
+	.destroy	=	multiq_destroy,
+	.change		=	multiq_tune,
+	.dump		=	multiq_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init multiq_module_init(void)
+{
+	return register_qdisc(&multiq_qdisc_ops);
+}
+
+static void __exit multiq_module_exit(void)
+{
+	unregister_qdisc(&multiq_qdisc_ops);
+}
+
+module_init(multiq_module_init)
+module_exit(multiq_module_exit)
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
new file mode 100644
index 0000000..98402f0
--- /dev/null
+++ b/net/sched/sch_netem.c
@@ -0,0 +1,738 @@
+/*
+ * net/sched/sch_netem.c	Network emulator
+ *
+ * 		This program is free software; you can redistribute it and/or
+ * 		modify it under the terms of the GNU General Public License
+ * 		as published by the Free Software Foundation; either version
+ * 		2 of the License.
+ *
+ *  		Many of the algorithms and ideas for this came from
+ *		NIST Net which is not copyrighted.
+ *
+ * Authors:	Stephen Hemminger <shemminger@osdl.org>
+ *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#define VERSION "1.2"
+
+/*	Network Emulation Queuing algorithm.
+	====================================
+
+	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
+		 Network Emulation Tool
+		 [2] Luigi Rizzo, DummyNet for FreeBSD
+
+	 ----------------------------------------------------------------
+
+	 This started out as a simple way to delay outgoing packets to
+	 test TCP but has grown to include most of the functionality
+	 of a full blown network emulator like NISTnet. It can delay
+	 packets and add random jitter (and correlation). The random
+	 distribution can be loaded from a table as well to provide
+	 normal, Pareto, or experimental curves. Packet loss,
+	 duplication, and reordering can also be emulated.
+
+	 This qdisc does not do classification that can be handled in
+	 layering other disciplines.  It does not need to do bandwidth
+	 control either since that can be handled by using token
+	 bucket or other rate control.
+*/
+
+struct netem_sched_data {
+	struct Qdisc	*qdisc;
+	struct qdisc_watchdog watchdog;
+
+	psched_tdiff_t latency;
+	psched_tdiff_t jitter;
+
+	u32 loss;
+	u32 limit;
+	u32 counter;
+	u32 gap;
+	u32 duplicate;
+	u32 reorder;
+	u32 corrupt;
+
+	struct crndstate {
+		u32 last;
+		u32 rho;
+	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
+
+	struct disttable {
+		u32  size;
+		s16 table[0];
+	} *delay_dist;
+};
+
+/* Time stamp put into socket buffer control block */
+struct netem_skb_cb {
+	psched_time_t	time_to_send;
+};
+
+static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(skb->cb) <
+		sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb));
+	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
+/* init_crandom - initialize correlated random number generator
+ * Use entropy source for initial seed.
+ */
+static void init_crandom(struct crndstate *state, unsigned long rho)
+{
+	state->rho = rho;
+	state->last = net_random();
+}
+
+/* get_crandom - correlated random number generator
+ * Next number depends on last value.
+ * rho is scaled to avoid floating point.
+ */
+static u32 get_crandom(struct crndstate *state)
+{
+	u64 value, rho;
+	unsigned long answer;
+
+	if (state->rho == 0)	/* no correlation */
+		return net_random();
+
+	value = net_random();
+	rho = (u64)state->rho + 1;
+	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
+	state->last = answer;
+	return answer;
+}
+
+/* tabledist - return a pseudo-randomly distributed value with mean mu and
+ * std deviation sigma.  Uses table lookup to approximate the desired
+ * distribution, and a uniformly-distributed pseudo-random source.
+ */
+static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
+				struct crndstate *state,
+				const struct disttable *dist)
+{
+	psched_tdiff_t x;
+	long t;
+	u32 rnd;
+
+	if (sigma == 0)
+		return mu;
+
+	rnd = get_crandom(state);
+
+	/* default uniform distribution */
+	if (dist == NULL)
+		return (rnd % (2*sigma)) - sigma + mu;
+
+	t = dist->table[rnd % dist->size];
+	x = (sigma % NETEM_DIST_SCALE) * t;
+	if (x >= 0)
+		x += NETEM_DIST_SCALE/2;
+	else
+		x -= NETEM_DIST_SCALE/2;
+
+	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
+}
+
+/*
+ * Insert one skb into qdisc.
+ * Note: parent depends on return value to account for queue length.
+ * 	NET_XMIT_DROP: queue length didn't change.
+ *      NET_XMIT_SUCCESS: one skb was queued.
+ */
+static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	/* We don't fill cb now as skb_unshare() may invalidate it */
+	struct netem_skb_cb *cb;
+	struct sk_buff *skb2;
+	int ret;
+	int count = 1;
+
+	pr_debug("netem_enqueue skb=%p\n", skb);
+
+	/* Random duplication */
+	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
+		++count;
+
+	/* Random packet drop 0 => none, ~0 => all */
+	if (q->loss && q->loss >= get_crandom(&q->loss_cor))
+		--count;
+
+	if (count == 0) {
+		sch->qstats.drops++;
+		kfree_skb(skb);
+		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	}
+
+	skb_orphan(skb);
+
+	/*
+	 * If we need to duplicate packet, then re-insert at top of the
+	 * qdisc tree, since parent queuer expects that only one
+	 * skb will be queued.
+	 */
+	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
+		struct Qdisc *rootq = qdisc_root(sch);
+		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
+		q->duplicate = 0;
+
+		qdisc_enqueue_root(skb2, rootq);
+		q->duplicate = dupsave;
+	}
+
+	/*
+	 * Randomized packet corruption.
+	 * Make copy if needed since we are modifying
+	 * If packet is going to be hardware checksummed, then
+	 * do it now in software before we mangle it.
+	 */
+	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+		if (!(skb = skb_unshare(skb, GFP_ATOMIC))
+		    || (skb->ip_summed == CHECKSUM_PARTIAL
+			&& skb_checksum_help(skb))) {
+			sch->qstats.drops++;
+			return NET_XMIT_DROP;
+		}
+
+		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
+	}
+
+	cb = netem_skb_cb(skb);
+	if (q->gap == 0 		/* not doing reordering */
+	    || q->counter < q->gap 	/* inside last reordering gap */
+	    || q->reorder < get_crandom(&q->reorder_cor)) {
+		psched_time_t now;
+		psched_tdiff_t delay;
+
+		delay = tabledist(q->latency, q->jitter,
+				  &q->delay_cor, q->delay_dist);
+
+		now = psched_get_time();
+		cb->time_to_send = now + delay;
+		++q->counter;
+		ret = qdisc_enqueue(skb, q->qdisc);
+	} else {
+		/*
+		 * Do re-ordering by putting one out of N packets at the front
+		 * of the queue.
+		 */
+		cb->time_to_send = psched_get_time();
+		q->counter = 0;
+		ret = q->qdisc->ops->requeue(skb, q->qdisc);
+	}
+
+	if (likely(ret == NET_XMIT_SUCCESS)) {
+		sch->q.qlen++;
+		sch->bstats.bytes += qdisc_pkt_len(skb);
+		sch->bstats.packets++;
+	} else if (net_xmit_drop_count(ret)) {
+		sch->qstats.drops++;
+	}
+
+	pr_debug("netem: enqueue ret %d\n", ret);
+	return ret;
+}
+
+/* Requeue packets but don't change time stamp */
+static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+	}
+
+	return ret;
+}
+
+static unsigned int netem_drop(struct Qdisc* sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	unsigned int len = 0;
+
+	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
+		sch->q.qlen--;
+		sch->qstats.drops++;
+	}
+	return len;
+}
+
+static struct sk_buff *netem_dequeue(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	smp_mb();
+	if (sch->flags & TCQ_F_THROTTLED)
+		return NULL;
+
+	skb = q->qdisc->dequeue(q->qdisc);
+	if (skb) {
+		const struct netem_skb_cb *cb = netem_skb_cb(skb);
+		psched_time_t now = psched_get_time();
+
+		/* if more time remaining? */
+		if (cb->time_to_send <= now) {
+			pr_debug("netem_dequeue: return skb=%p\n", skb);
+			sch->q.qlen--;
+			return skb;
+		}
+
+		if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) {
+			qdisc_tree_decrease_qlen(q->qdisc, 1);
+			sch->qstats.drops++;
+			printk(KERN_ERR "netem: %s could not requeue\n",
+			       q->qdisc->ops->id);
+		}
+
+		qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
+	}
+
+	return NULL;
+}
+
+static void netem_reset(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	qdisc_reset(q->qdisc);
+	sch->q.qlen = 0;
+	qdisc_watchdog_cancel(&q->watchdog);
+}
+
+/*
+ * Distribution data is a variable size payload containing
+ * signed 16 bit values.
+ */
+static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	unsigned long n = nla_len(attr)/sizeof(__s16);
+	const __s16 *data = nla_data(attr);
+	spinlock_t *root_lock;
+	struct disttable *d;
+	int i;
+
+	if (n > 65536)
+		return -EINVAL;
+
+	d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->size = n;
+	for (i = 0; i < n; i++)
+		d->table[i] = data[i];
+
+	root_lock = qdisc_root_sleeping_lock(sch);
+
+	spin_lock_bh(root_lock);
+	d = xchg(&q->delay_dist, d);
+	spin_unlock_bh(root_lock);
+
+	kfree(d);
+	return 0;
+}
+
+static int get_correlation(struct Qdisc *sch, const struct nlattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	const struct tc_netem_corr *c = nla_data(attr);
+
+	init_crandom(&q->delay_cor, c->delay_corr);
+	init_crandom(&q->loss_cor, c->loss_corr);
+	init_crandom(&q->dup_cor, c->dup_corr);
+	return 0;
+}
+
+static int get_reorder(struct Qdisc *sch, const struct nlattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	const struct tc_netem_reorder *r = nla_data(attr);
+
+	q->reorder = r->probability;
+	init_crandom(&q->reorder_cor, r->correlation);
+	return 0;
+}
+
+static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	const struct tc_netem_corrupt *r = nla_data(attr);
+
+	q->corrupt = r->probability;
+	init_crandom(&q->corrupt_cor, r->correlation);
+	return 0;
+}
+
+static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
+	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
+	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
+	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
+};
+
+static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+		      const struct nla_policy *policy, int len)
+{
+	int nested_len = nla_len(nla) - NLA_ALIGN(len);
+
+	if (nested_len < 0)
+		return -EINVAL;
+	if (nested_len >= nla_attr_size(0))
+		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
+				 nested_len, policy);
+	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+	return 0;
+}
+
+/* Parse netlink message to set options */
+static int netem_change(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_NETEM_MAX + 1];
+	struct tc_netem_qopt *qopt;
+	int ret;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	qopt = nla_data(opt);
+	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
+	if (ret < 0)
+		return ret;
+
+	ret = fifo_set_limit(q->qdisc, qopt->limit);
+	if (ret) {
+		pr_debug("netem: can't set fifo limit\n");
+		return ret;
+	}
+
+	q->latency = qopt->latency;
+	q->jitter = qopt->jitter;
+	q->limit = qopt->limit;
+	q->gap = qopt->gap;
+	q->counter = 0;
+	q->loss = qopt->loss;
+	q->duplicate = qopt->duplicate;
+
+	/* for compatibility with earlier versions.
+	 * if gap is set, need to assume 100% probability
+	 */
+	if (q->gap)
+		q->reorder = ~0;
+
+	if (tb[TCA_NETEM_CORR]) {
+		ret = get_correlation(sch, tb[TCA_NETEM_CORR]);
+		if (ret)
+			return ret;
+	}
+
+	if (tb[TCA_NETEM_DELAY_DIST]) {
+		ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
+		if (ret)
+			return ret;
+	}
+
+	if (tb[TCA_NETEM_REORDER]) {
+		ret = get_reorder(sch, tb[TCA_NETEM_REORDER]);
+		if (ret)
+			return ret;
+	}
+
+	if (tb[TCA_NETEM_CORRUPT]) {
+		ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Special case version of FIFO queue for use by netem.
+ * It queues in order based on timestamps in skb's
+ */
+struct fifo_sched_data {
+	u32 limit;
+	psched_time_t oldest;
+};
+
+static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+	struct sk_buff_head *list = &sch->q;
+	psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
+	struct sk_buff *skb;
+
+	if (likely(skb_queue_len(list) < q->limit)) {
+		/* Optimize for add at tail */
+		if (likely(skb_queue_empty(list) || tnext >= q->oldest)) {
+			q->oldest = tnext;
+			return qdisc_enqueue_tail(nskb, sch);
+		}
+
+		skb_queue_reverse_walk(list, skb) {
+			const struct netem_skb_cb *cb = netem_skb_cb(skb);
+
+			if (tnext >= cb->time_to_send)
+				break;
+		}
+
+		__skb_queue_after(list, skb, nskb);
+
+		sch->qstats.backlog += qdisc_pkt_len(nskb);
+		sch->bstats.bytes += qdisc_pkt_len(nskb);
+		sch->bstats.packets++;
+
+		return NET_XMIT_SUCCESS;
+	}
+
+	return qdisc_reshape_fail(nskb, sch);
+}
+
+static int tfifo_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+
+	if (opt) {
+		struct tc_fifo_qopt *ctl = nla_data(opt);
+		if (nla_len(opt) < sizeof(*ctl))
+			return -EINVAL;
+
+		q->limit = ctl->limit;
+	} else
+		q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
+
+	q->oldest = PSCHED_PASTPERFECT;
+	return 0;
+}
+
+static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct fifo_sched_data *q = qdisc_priv(sch);
+	struct tc_fifo_qopt opt = { .limit = q->limit };
+
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+	return skb->len;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
+	.id		=	"tfifo",
+	.priv_size	=	sizeof(struct fifo_sched_data),
+	.enqueue	=	tfifo_enqueue,
+	.dequeue	=	qdisc_dequeue_head,
+	.requeue	=	qdisc_requeue,
+	.drop		=	qdisc_queue_drop,
+	.init		=	tfifo_init,
+	.reset		=	qdisc_reset_queue,
+	.change		=	tfifo_init,
+	.dump		=	tfifo_dump,
+};
+
+static int netem_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	if (!opt)
+		return -EINVAL;
+
+	qdisc_watchdog_init(&q->watchdog, sch);
+
+	q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+				     &tfifo_qdisc_ops,
+				     TC_H_MAKE(sch->handle, 1));
+	if (!q->qdisc) {
+		pr_debug("netem: qdisc create failed\n");
+		return -ENOMEM;
+	}
+
+	ret = netem_change(sch, opt);
+	if (ret) {
+		pr_debug("netem: change failed\n");
+		qdisc_destroy(q->qdisc);
+	}
+	return ret;
+}
+
+static void netem_destroy(struct Qdisc *sch)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	qdisc_watchdog_cancel(&q->watchdog);
+	qdisc_destroy(q->qdisc);
+	kfree(q->delay_dist);
+}
+
+static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	const struct netem_sched_data *q = qdisc_priv(sch);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nla = (struct nlattr *) b;
+	struct tc_netem_qopt qopt;
+	struct tc_netem_corr cor;
+	struct tc_netem_reorder reorder;
+	struct tc_netem_corrupt corrupt;
+
+	qopt.latency = q->latency;
+	qopt.jitter = q->jitter;
+	qopt.limit = q->limit;
+	qopt.loss = q->loss;
+	qopt.gap = q->gap;
+	qopt.duplicate = q->duplicate;
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
+
+	cor.delay_corr = q->delay_cor.rho;
+	cor.loss_corr = q->loss_cor.rho;
+	cor.dup_corr = q->dup_cor.rho;
+	NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
+
+	reorder.probability = q->reorder;
+	reorder.correlation = q->reorder_cor.rho;
+	NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
+
+	corrupt.probability = q->corrupt;
+	corrupt.correlation = q->corrupt_cor.rho;
+	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
+
+	nla->nla_len = skb_tail_pointer(skb) - b;
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	if (cl != 1) 	/* only one class */
+		return -ENOENT;
+
+	tcm->tcm_handle |= TC_H_MIN(1);
+	tcm->tcm_info = q->qdisc->handle;
+
+	return 0;
+}
+
+static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		     struct Qdisc **old)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = xchg(&q->qdisc, new);
+	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	return q->qdisc;
+}
+
+static unsigned long netem_get(struct Qdisc *sch, u32 classid)
+{
+	return 1;
+}
+
+static void netem_put(struct Qdisc *sch, unsigned long arg)
+{
+}
+
+static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+			    struct nlattr **tca, unsigned long *arg)
+{
+	return -ENOSYS;
+}
+
+static int netem_delete(struct Qdisc *sch, unsigned long arg)
+{
+	return -ENOSYS;
+}
+
+static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	if (!walker->stop) {
+		if (walker->count >= walker->skip)
+			if (walker->fn(sch, 1, walker) < 0) {
+				walker->stop = 1;
+				return;
+			}
+		walker->count++;
+	}
+}
+
+static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	return NULL;
+}
+
+static const struct Qdisc_class_ops netem_class_ops = {
+	.graft		=	netem_graft,
+	.leaf		=	netem_leaf,
+	.get		=	netem_get,
+	.put		=	netem_put,
+	.change		=	netem_change_class,
+	.delete		=	netem_delete,
+	.walk		=	netem_walk,
+	.tcf_chain	=	netem_find_tcf,
+	.dump		=	netem_dump_class,
+};
+
+static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
+	.id		=	"netem",
+	.cl_ops		=	&netem_class_ops,
+	.priv_size	=	sizeof(struct netem_sched_data),
+	.enqueue	=	netem_enqueue,
+	.dequeue	=	netem_dequeue,
+	.requeue	=	netem_requeue,
+	.drop		=	netem_drop,
+	.init		=	netem_init,
+	.reset		=	netem_reset,
+	.destroy	=	netem_destroy,
+	.change		=	netem_change,
+	.dump		=	netem_dump,
+	.owner		=	THIS_MODULE,
+};
+
+
+static int __init netem_module_init(void)
+{
+	pr_info("netem: version " VERSION "\n");
+	return register_qdisc(&netem_qdisc_ops);
+}
+static void __exit netem_module_exit(void)
+{
+	unregister_qdisc(&netem_qdisc_ops);
+}
+module_init(netem_module_init)
+module_exit(netem_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
new file mode 100644
index 0000000..504a78c
--- /dev/null
+++ b/net/sched/sch_prio.c
@@ -0,0 +1,447 @@
+/*
+ * net/sched/sch_prio.c	Simple 3-band priority "scheduler".
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * Fixes:       19990609: J Hadi Salim <hadi@nortelnetworks.com>:
+ *              Init --  EINVAL when opt undefined
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+
+struct prio_sched_data
+{
+	int bands;
+	struct tcf_proto *filter_list;
+	u8  prio2band[TC_PRIO_MAX+1];
+	struct Qdisc *queues[TCQ_PRIO_BANDS];
+};
+
+
+static struct Qdisc *
+prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	u32 band = skb->priority;
+	struct tcf_result res;
+	int err;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	if (TC_H_MAJ(skb->priority) != sch->handle) {
+		err = tc_classify(skb, q->filter_list, &res);
+#ifdef CONFIG_NET_CLS_ACT
+		switch (err) {
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			return NULL;
+		}
+#endif
+		if (!q->filter_list || err < 0) {
+			if (TC_H_MAJ(band))
+				band = 0;
+			return q->queues[q->prio2band[band&TC_PRIO_MAX]];
+		}
+		band = res.classid;
+	}
+	band = TC_H_MIN(band) - 1;
+	if (band >= q->bands)
+		return q->queues[q->prio2band[0]];
+
+	return q->queues[band];
+}
+
+static int
+prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = prio_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	ret = qdisc_enqueue(skb, qdisc);
+	if (ret == NET_XMIT_SUCCESS) {
+		sch->bstats.bytes += qdisc_pkt_len(skb);
+		sch->bstats.packets++;
+		sch->q.qlen++;
+		return NET_XMIT_SUCCESS;
+	}
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
+	return ret;
+}
+
+
+static int
+prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct Qdisc *qdisc;
+	int ret;
+
+	qdisc = prio_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+	if (qdisc == NULL) {
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+#endif
+
+	if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+		return NET_XMIT_SUCCESS;
+	}
+	if (net_xmit_drop_count(ret))
+		sch->qstats.drops++;
+	return ret;
+}
+
+
+static struct sk_buff *prio_dequeue(struct Qdisc* sch)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	for (prio = 0; prio < q->bands; prio++) {
+		struct Qdisc *qdisc = q->queues[prio];
+		struct sk_buff *skb = qdisc->dequeue(qdisc);
+		if (skb) {
+			sch->q.qlen--;
+			return skb;
+		}
+	}
+	return NULL;
+
+}
+
+static unsigned int prio_drop(struct Qdisc* sch)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int prio;
+	unsigned int len;
+	struct Qdisc *qdisc;
+
+	for (prio = q->bands-1; prio >= 0; prio--) {
+		qdisc = q->queues[prio];
+		if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) {
+			sch->q.qlen--;
+			return len;
+		}
+	}
+	return 0;
+}
+
+
+static void
+prio_reset(struct Qdisc* sch)
+{
+	int prio;
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	for (prio=0; prio<q->bands; prio++)
+		qdisc_reset(q->queues[prio]);
+	sch->q.qlen = 0;
+}
+
+static void
+prio_destroy(struct Qdisc* sch)
+{
+	int prio;
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	tcf_destroy_chain(&q->filter_list);
+	for (prio=0; prio<q->bands; prio++)
+		qdisc_destroy(q->queues[prio]);
+}
+
+static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	struct tc_prio_qopt *qopt;
+	int i;
+
+	if (nla_len(opt) < sizeof(*qopt))
+		return -EINVAL;
+	qopt = nla_data(opt);
+
+	if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
+		return -EINVAL;
+
+	for (i=0; i<=TC_PRIO_MAX; i++) {
+		if (qopt->priomap[i] >= qopt->bands)
+			return -EINVAL;
+	}
+
+	sch_tree_lock(sch);
+	q->bands = qopt->bands;
+	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
+
+	for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
+		struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+		if (child != &noop_qdisc) {
+			qdisc_tree_decrease_qlen(child, child->q.qlen);
+			qdisc_destroy(child);
+		}
+	}
+	sch_tree_unlock(sch);
+
+	for (i=0; i<q->bands; i++) {
+		if (q->queues[i] == &noop_qdisc) {
+			struct Qdisc *child;
+			child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+						  &pfifo_qdisc_ops,
+						  TC_H_MAKE(sch->handle, i + 1));
+			if (child) {
+				sch_tree_lock(sch);
+				child = xchg(&q->queues[i], child);
+
+				if (child != &noop_qdisc) {
+					qdisc_tree_decrease_qlen(child,
+								 child->q.qlen);
+					qdisc_destroy(child);
+				}
+				sch_tree_unlock(sch);
+			}
+		}
+	}
+	return 0;
+}
+
+static int prio_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int i;
+
+	for (i=0; i<TCQ_PRIO_BANDS; i++)
+		q->queues[i] = &noop_qdisc;
+
+	if (opt == NULL) {
+		return -EINVAL;
+	} else {
+		int err;
+
+		if ((err= prio_tune(sch, opt)) != 0)
+			return err;
+	}
+	return 0;
+}
+
+static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_prio_qopt opt;
+
+	opt.bands = q->bands;
+	memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
+
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		      struct Qdisc **old)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return -EINVAL;
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = q->queues[band];
+	q->queues[band] = new;
+	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *
+prio_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	unsigned long band = arg - 1;
+
+	if (band >= q->bands)
+		return NULL;
+
+	return q->queues[band];
+}
+
+static unsigned long prio_get(struct Qdisc *sch, u32 classid)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	unsigned long band = TC_H_MIN(classid);
+
+	if (band - 1 >= q->bands)
+		return 0;
+	return band;
+}
+
+static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
+{
+	return prio_get(sch, classid);
+}
+
+
+static void prio_put(struct Qdisc *q, unsigned long cl)
+{
+	return;
+}
+
+static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct nlattr **tca, unsigned long *arg)
+{
+	unsigned long cl = *arg;
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+static int prio_delete(struct Qdisc *sch, unsigned long cl)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	return 0;
+}
+
+
+static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb,
+			   struct tcmsg *tcm)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	if (cl - 1 > q->bands)
+		return -ENOENT;
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	if (q->queues[cl-1])
+		tcm->tcm_info = q->queues[cl-1]->handle;
+	return 0;
+}
+
+static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				 struct gnet_dump *d)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *cl_q;
+
+	cl_q = q->queues[cl - 1];
+	if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
+	    gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
+		return -1;
+
+	return 0;
+}
+
+static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+	int prio;
+
+	if (arg->stop)
+		return;
+
+	for (prio = 0; prio < q->bands; prio++) {
+		if (arg->count < arg->skip) {
+			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, prio+1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct prio_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return &q->filter_list;
+}
+
+static const struct Qdisc_class_ops prio_class_ops = {
+	.graft		=	prio_graft,
+	.leaf		=	prio_leaf,
+	.get		=	prio_get,
+	.put		=	prio_put,
+	.change		=	prio_change,
+	.delete		=	prio_delete,
+	.walk		=	prio_walk,
+	.tcf_chain	=	prio_find_tcf,
+	.bind_tcf	=	prio_bind,
+	.unbind_tcf	=	prio_put,
+	.dump		=	prio_dump_class,
+	.dump_stats	=	prio_dump_class_stats,
+};
+
+static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
+	.next		=	NULL,
+	.cl_ops		=	&prio_class_ops,
+	.id		=	"prio",
+	.priv_size	=	sizeof(struct prio_sched_data),
+	.enqueue	=	prio_enqueue,
+	.dequeue	=	prio_dequeue,
+	.requeue	=	prio_requeue,
+	.drop		=	prio_drop,
+	.init		=	prio_init,
+	.reset		=	prio_reset,
+	.destroy	=	prio_destroy,
+	.change		=	prio_tune,
+	.dump		=	prio_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init prio_module_init(void)
+{
+	return register_qdisc(&prio_qdisc_ops);
+}
+
+static void __exit prio_module_exit(void)
+{
+	unregister_qdisc(&prio_qdisc_ops);
+}
+
+module_init(prio_module_init)
+module_exit(prio_module_exit)
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
new file mode 100644
index 0000000..5da0583
--- /dev/null
+++ b/net/sched/sch_red.c
@@ -0,0 +1,388 @@
+/*
+ * net/sched/sch_red.c	Random Early Detection queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes:
+ * J Hadi Salim 980914:	computation fixes
+ * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
+ * J Hadi Salim 980816:  ECN support
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+#include <net/red.h>
+
+
+/*	Parameters, settable by user:
+	-----------------------------
+
+	limit		- bytes (must be > qth_max + burst)
+
+	Hard limit on queue length, should be chosen >qth_max
+	to allow packet bursts. This parameter does not
+	affect the algorithms behaviour and can be chosen
+	arbitrarily high (well, less than ram size)
+	Really, this limit will never be reached
+	if RED works correctly.
+ */
+
+struct red_sched_data
+{
+	u32			limit;		/* HARD maximal queue length */
+	unsigned char		flags;
+	struct red_parms	parms;
+	struct red_stats	stats;
+	struct Qdisc		*qdisc;
+};
+
+static inline int red_use_ecn(struct red_sched_data *q)
+{
+	return q->flags & TC_RED_ECN;
+}
+
+static inline int red_use_harddrop(struct red_sched_data *q)
+{
+	return q->flags & TC_RED_HARDDROP;
+}
+
+static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *child = q->qdisc;
+	int ret;
+
+	q->parms.qavg = red_calc_qavg(&q->parms, child->qstats.backlog);
+
+	if (red_is_idling(&q->parms))
+		red_end_of_idle_period(&q->parms);
+
+	switch (red_action(&q->parms, q->parms.qavg)) {
+		case RED_DONT_MARK:
+			break;
+
+		case RED_PROB_MARK:
+			sch->qstats.overlimits++;
+			if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
+				q->stats.prob_drop++;
+				goto congestion_drop;
+			}
+
+			q->stats.prob_mark++;
+			break;
+
+		case RED_HARD_MARK:
+			sch->qstats.overlimits++;
+			if (red_use_harddrop(q) || !red_use_ecn(q) ||
+			    !INET_ECN_set_ce(skb)) {
+				q->stats.forced_drop++;
+				goto congestion_drop;
+			}
+
+			q->stats.forced_mark++;
+			break;
+	}
+
+	ret = qdisc_enqueue(skb, child);
+	if (likely(ret == NET_XMIT_SUCCESS)) {
+		sch->bstats.bytes += qdisc_pkt_len(skb);
+		sch->bstats.packets++;
+		sch->q.qlen++;
+	} else if (net_xmit_drop_count(ret)) {
+		q->stats.pdrop++;
+		sch->qstats.drops++;
+	}
+	return ret;
+
+congestion_drop:
+	qdisc_drop(skb, sch);
+	return NET_XMIT_CN;
+}
+
+static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *child = q->qdisc;
+	int ret;
+
+	if (red_is_idling(&q->parms))
+		red_end_of_idle_period(&q->parms);
+
+	ret = child->ops->requeue(skb, child);
+	if (likely(ret == NET_XMIT_SUCCESS)) {
+		sch->qstats.requeues++;
+		sch->q.qlen++;
+	}
+	return ret;
+}
+
+static struct sk_buff * red_dequeue(struct Qdisc* sch)
+{
+	struct sk_buff *skb;
+	struct red_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *child = q->qdisc;
+
+	skb = child->dequeue(child);
+	if (skb)
+		sch->q.qlen--;
+	else if (!red_is_idling(&q->parms))
+		red_start_of_idle_period(&q->parms);
+
+	return skb;
+}
+
+static unsigned int red_drop(struct Qdisc* sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	struct Qdisc *child = q->qdisc;
+	unsigned int len;
+
+	if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
+		q->stats.other++;
+		sch->qstats.drops++;
+		sch->q.qlen--;
+		return len;
+	}
+
+	if (!red_is_idling(&q->parms))
+		red_start_of_idle_period(&q->parms);
+
+	return 0;
+}
+
+static void red_reset(struct Qdisc* sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	qdisc_reset(q->qdisc);
+	sch->q.qlen = 0;
+	red_restart(&q->parms);
+}
+
+static void red_destroy(struct Qdisc *sch)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	qdisc_destroy(q->qdisc);
+}
+
+static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
+	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
+	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
+};
+
+static int red_change(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_RED_MAX + 1];
+	struct tc_red_qopt *ctl;
+	struct Qdisc *child = NULL;
+	int err;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_RED_PARMS] == NULL ||
+	    tb[TCA_RED_STAB] == NULL)
+		return -EINVAL;
+
+	ctl = nla_data(tb[TCA_RED_PARMS]);
+
+	if (ctl->limit > 0) {
+		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
+		if (IS_ERR(child))
+			return PTR_ERR(child);
+	}
+
+	sch_tree_lock(sch);
+	q->flags = ctl->flags;
+	q->limit = ctl->limit;
+	if (child) {
+		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+		qdisc_destroy(xchg(&q->qdisc, child));
+	}
+
+	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
+				 ctl->Plog, ctl->Scell_log,
+				 nla_data(tb[TCA_RED_STAB]));
+
+	if (skb_queue_empty(&sch->q))
+		red_end_of_idle_period(&q->parms);
+
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static int red_init(struct Qdisc* sch, struct nlattr *opt)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	q->qdisc = &noop_qdisc;
+	return red_change(sch, opt);
+}
+
+static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	struct nlattr *opts = NULL;
+	struct tc_red_qopt opt = {
+		.limit		= q->limit,
+		.flags		= q->flags,
+		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
+		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
+		.Wlog		= q->parms.Wlog,
+		.Plog		= q->parms.Plog,
+		.Scell_log	= q->parms.Scell_log,
+	};
+
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
+	NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
+	return nla_nest_end(skb, opts);
+
+nla_put_failure:
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
+}
+
+static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	struct tc_red_xstats st = {
+		.early	= q->stats.prob_drop + q->stats.forced_drop,
+		.pdrop	= q->stats.pdrop,
+		.other	= q->stats.other,
+		.marked	= q->stats.prob_mark + q->stats.forced_mark,
+	};
+
+	return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static int red_dump_class(struct Qdisc *sch, unsigned long cl,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	if (cl != 1)
+		return -ENOENT;
+	tcm->tcm_handle |= TC_H_MIN(1);
+	tcm->tcm_info = q->qdisc->handle;
+	return 0;
+}
+
+static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		     struct Qdisc **old)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = xchg(&q->qdisc, new);
+	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct red_sched_data *q = qdisc_priv(sch);
+	return q->qdisc;
+}
+
+static unsigned long red_get(struct Qdisc *sch, u32 classid)
+{
+	return 1;
+}
+
+static void red_put(struct Qdisc *sch, unsigned long arg)
+{
+	return;
+}
+
+static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+			    struct nlattr **tca, unsigned long *arg)
+{
+	return -ENOSYS;
+}
+
+static int red_delete(struct Qdisc *sch, unsigned long cl)
+{
+	return -ENOSYS;
+}
+
+static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	if (!walker->stop) {
+		if (walker->count >= walker->skip)
+			if (walker->fn(sch, 1, walker) < 0) {
+				walker->stop = 1;
+				return;
+			}
+		walker->count++;
+	}
+}
+
+static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	return NULL;
+}
+
+static const struct Qdisc_class_ops red_class_ops = {
+	.graft		=	red_graft,
+	.leaf		=	red_leaf,
+	.get		=	red_get,
+	.put		=	red_put,
+	.change		=	red_change_class,
+	.delete		=	red_delete,
+	.walk		=	red_walk,
+	.tcf_chain	=	red_find_tcf,
+	.dump		=	red_dump_class,
+};
+
+static struct Qdisc_ops red_qdisc_ops __read_mostly = {
+	.id		=	"red",
+	.priv_size	=	sizeof(struct red_sched_data),
+	.cl_ops		=	&red_class_ops,
+	.enqueue	=	red_enqueue,
+	.dequeue	=	red_dequeue,
+	.requeue	=	red_requeue,
+	.drop		=	red_drop,
+	.init		=	red_init,
+	.reset		=	red_reset,
+	.destroy	=	red_destroy,
+	.change		=	red_change,
+	.dump		=	red_dump,
+	.dump_stats	=	red_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init red_module_init(void)
+{
+	return register_qdisc(&red_qdisc_ops);
+}
+
+static void __exit red_module_exit(void)
+{
+	unregister_qdisc(&red_qdisc_ops);
+}
+
+module_init(red_module_init)
+module_exit(red_module_exit)
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
new file mode 100644
index 0000000..fe1508e
--- /dev/null
+++ b/net/sched/sch_sfq.c
@@ -0,0 +1,647 @@
+/*
+ * net/sched/sch_sfq.c	Stochastic Fairness Queueing discipline.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/ipv6.h>
+#include <linux/skbuff.h>
+#include <linux/jhash.h>
+#include <net/ip.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+
+/*	Stochastic Fairness Queuing algorithm.
+	=======================================
+
+	Source:
+	Paul E. McKenney "Stochastic Fairness Queuing",
+	IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
+
+	Paul E. McKenney "Stochastic Fairness Queuing",
+	"Interworking: Research and Experience", v.2, 1991, p.113-131.
+
+
+	See also:
+	M. Shreedhar and George Varghese "Efficient Fair
+	Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
+
+
+	This is not the thing that is usually called (W)FQ nowadays.
+	It does not use any timestamp mechanism, but instead
+	processes queues in round-robin order.
+
+	ADVANTAGE:
+
+	- It is very cheap. Both CPU and memory requirements are minimal.
+
+	DRAWBACKS:
+
+	- "Stochastic" -> It is not 100% fair.
+	When hash collisions occur, several flows are considered as one.
+
+	- "Round-robin" -> It introduces larger delays than virtual clock
+	based schemes, and should not be used for isolating interactive
+	traffic	from non-interactive. It means, that this scheduler
+	should be used as leaf of CBQ or P3, which put interactive traffic
+	to higher priority band.
+
+	We still need true WFQ for top level CSZ, but using WFQ
+	for the best effort traffic is absolutely pointless:
+	SFQ is superior for this purpose.
+
+	IMPLEMENTATION:
+	This implementation limits maximal queue length to 128;
+	maximal mtu to 2^15-1; number of hash buckets to 1024.
+	The only goal of this restrictions was that all data
+	fit into one 4K page :-). Struct sfq_sched_data is
+	organized in anti-cache manner: all the data for a bucket
+	are scattered over different locations. This is not good,
+	but it allowed me to put it into 4K.
+
+	It is easy to increase these values, but not in flight.  */
+
+#define SFQ_DEPTH		128
+#define SFQ_HASH_DIVISOR	1024
+
+/* This type should contain at least SFQ_DEPTH*2 values */
+typedef unsigned char sfq_index;
+
+struct sfq_head
+{
+	sfq_index	next;
+	sfq_index	prev;
+};
+
+struct sfq_sched_data
+{
+/* Parameters */
+	int		perturb_period;
+	unsigned	quantum;	/* Allotment per round: MUST BE >= MTU */
+	int		limit;
+
+/* Variables */
+	struct tcf_proto *filter_list;
+	struct timer_list perturb_timer;
+	u32		perturbation;
+	sfq_index	tail;		/* Index of current slot in round */
+	sfq_index	max_depth;	/* Maximal depth */
+
+	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
+	sfq_index	next[SFQ_DEPTH];	/* Active slots link */
+	short		allot[SFQ_DEPTH];	/* Current allotment per slot */
+	unsigned short	hash[SFQ_DEPTH];	/* Hash value indexed by slots */
+	struct sk_buff_head	qs[SFQ_DEPTH];		/* Slot queue */
+	struct sfq_head	dep[SFQ_DEPTH*2];	/* Linked list of slots, indexed by depth */
+};
+
+static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+{
+	return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
+}
+
+static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+{
+	u32 h, h2;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+	{
+		const struct iphdr *iph = ip_hdr(skb);
+		h = iph->daddr;
+		h2 = iph->saddr ^ iph->protocol;
+		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+		    (iph->protocol == IPPROTO_TCP ||
+		     iph->protocol == IPPROTO_UDP ||
+		     iph->protocol == IPPROTO_UDPLITE ||
+		     iph->protocol == IPPROTO_SCTP ||
+		     iph->protocol == IPPROTO_DCCP ||
+		     iph->protocol == IPPROTO_ESP))
+			h2 ^= *(((u32*)iph) + iph->ihl);
+		break;
+	}
+	case htons(ETH_P_IPV6):
+	{
+		struct ipv6hdr *iph = ipv6_hdr(skb);
+		h = iph->daddr.s6_addr32[3];
+		h2 = iph->saddr.s6_addr32[3] ^ iph->nexthdr;
+		if (iph->nexthdr == IPPROTO_TCP ||
+		    iph->nexthdr == IPPROTO_UDP ||
+		    iph->nexthdr == IPPROTO_UDPLITE ||
+		    iph->nexthdr == IPPROTO_SCTP ||
+		    iph->nexthdr == IPPROTO_DCCP ||
+		    iph->nexthdr == IPPROTO_ESP)
+			h2 ^= *(u32*)&iph[1];
+		break;
+	}
+	default:
+		h = (unsigned long)skb->dst ^ skb->protocol;
+		h2 = (unsigned long)skb->sk;
+	}
+
+	return sfq_fold_hash(q, h, h2);
+}
+
+static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
+				 int *qerr)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct tcf_result res;
+	int result;
+
+	if (TC_H_MAJ(skb->priority) == sch->handle &&
+	    TC_H_MIN(skb->priority) > 0 &&
+	    TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
+		return TC_H_MIN(skb->priority);
+
+	if (!q->filter_list)
+		return sfq_hash(q, skb) + 1;
+
+	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	result = tc_classify(skb, q->filter_list, &res);
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			return 0;
+		}
+#endif
+		if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
+			return TC_H_MIN(res.classid);
+	}
+	return 0;
+}
+
+static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
+{
+	sfq_index p, n;
+	int d = q->qs[x].qlen + SFQ_DEPTH;
+
+	p = d;
+	n = q->dep[d].next;
+	q->dep[x].next = n;
+	q->dep[x].prev = p;
+	q->dep[p].next = q->dep[n].prev = x;
+}
+
+static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
+{
+	sfq_index p, n;
+
+	n = q->dep[x].next;
+	p = q->dep[x].prev;
+	q->dep[p].next = n;
+	q->dep[n].prev = p;
+
+	if (n == p && q->max_depth == q->qs[x].qlen + 1)
+		q->max_depth--;
+
+	sfq_link(q, x);
+}
+
+static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
+{
+	sfq_index p, n;
+	int d;
+
+	n = q->dep[x].next;
+	p = q->dep[x].prev;
+	q->dep[p].next = n;
+	q->dep[n].prev = p;
+	d = q->qs[x].qlen;
+	if (q->max_depth < d)
+		q->max_depth = d;
+
+	sfq_link(q, x);
+}
+
+static unsigned int sfq_drop(struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	sfq_index d = q->max_depth;
+	struct sk_buff *skb;
+	unsigned int len;
+
+	/* Queue is full! Find the longest slot and
+	   drop a packet from it */
+
+	if (d > 1) {
+		sfq_index x = q->dep[d + SFQ_DEPTH].next;
+		skb = q->qs[x].prev;
+		len = qdisc_pkt_len(skb);
+		__skb_unlink(skb, &q->qs[x]);
+		kfree_skb(skb);
+		sfq_dec(q, x);
+		sch->q.qlen--;
+		sch->qstats.drops++;
+		sch->qstats.backlog -= len;
+		return len;
+	}
+
+	if (d == 1) {
+		/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
+		d = q->next[q->tail];
+		q->next[q->tail] = q->next[d];
+		q->allot[q->next[d]] += q->quantum;
+		skb = q->qs[d].prev;
+		len = qdisc_pkt_len(skb);
+		__skb_unlink(skb, &q->qs[d]);
+		kfree_skb(skb);
+		sfq_dec(q, d);
+		sch->q.qlen--;
+		q->ht[q->hash[d]] = SFQ_DEPTH;
+		sch->qstats.drops++;
+		sch->qstats.backlog -= len;
+		return len;
+	}
+
+	return 0;
+}
+
+static int
+sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	unsigned int hash;
+	sfq_index x;
+	int ret;
+
+	hash = sfq_classify(skb, sch, &ret);
+	if (hash == 0) {
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+	hash--;
+
+	x = q->ht[hash];
+	if (x == SFQ_DEPTH) {
+		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
+		q->hash[x] = hash;
+	}
+
+	/* If selected queue has length q->limit, this means that
+	 * all another queues are empty and that we do simple tail drop,
+	 * i.e. drop _this_ packet.
+	 */
+	if (q->qs[x].qlen >= q->limit)
+		return qdisc_drop(skb, sch);
+
+	sch->qstats.backlog += qdisc_pkt_len(skb);
+	__skb_queue_tail(&q->qs[x], skb);
+	sfq_inc(q, x);
+	if (q->qs[x].qlen == 1) {		/* The flow is new */
+		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
+			q->tail = x;
+			q->next[x] = x;
+			q->allot[x] = q->quantum;
+		} else {
+			q->next[x] = q->next[q->tail];
+			q->next[q->tail] = x;
+			q->tail = x;
+		}
+	}
+	if (++sch->q.qlen <= q->limit) {
+		sch->bstats.bytes += qdisc_pkt_len(skb);
+		sch->bstats.packets++;
+		return 0;
+	}
+
+	sfq_drop(sch);
+	return NET_XMIT_CN;
+}
+
+static int
+sfq_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	unsigned int hash;
+	sfq_index x;
+	int ret;
+
+	hash = sfq_classify(skb, sch, &ret);
+	if (hash == 0) {
+		if (ret & __NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+	hash--;
+
+	x = q->ht[hash];
+	if (x == SFQ_DEPTH) {
+		q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
+		q->hash[x] = hash;
+	}
+
+	sch->qstats.backlog += qdisc_pkt_len(skb);
+	__skb_queue_head(&q->qs[x], skb);
+	/* If selected queue has length q->limit+1, this means that
+	 * all another queues are empty and we do simple tail drop.
+	 * This packet is still requeued at head of queue, tail packet
+	 * is dropped.
+	 */
+	if (q->qs[x].qlen > q->limit) {
+		skb = q->qs[x].prev;
+		__skb_unlink(skb, &q->qs[x]);
+		sch->qstats.drops++;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
+		kfree_skb(skb);
+		return NET_XMIT_CN;
+	}
+
+	sfq_inc(q, x);
+	if (q->qs[x].qlen == 1) {		/* The flow is new */
+		if (q->tail == SFQ_DEPTH) {	/* It is the first flow */
+			q->tail = x;
+			q->next[x] = x;
+			q->allot[x] = q->quantum;
+		} else {
+			q->next[x] = q->next[q->tail];
+			q->next[q->tail] = x;
+			q->tail = x;
+		}
+	}
+
+	if (++sch->q.qlen <= q->limit) {
+		sch->qstats.requeues++;
+		return 0;
+	}
+
+	sch->qstats.drops++;
+	sfq_drop(sch);
+	return NET_XMIT_CN;
+}
+
+
+
+
+static struct sk_buff *
+sfq_dequeue(struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+	sfq_index a, old_a;
+
+	/* No active slots */
+	if (q->tail == SFQ_DEPTH)
+		return NULL;
+
+	a = old_a = q->next[q->tail];
+
+	/* Grab packet */
+	skb = __skb_dequeue(&q->qs[a]);
+	sfq_dec(q, a);
+	sch->q.qlen--;
+	sch->qstats.backlog -= qdisc_pkt_len(skb);
+
+	/* Is the slot empty? */
+	if (q->qs[a].qlen == 0) {
+		q->ht[q->hash[a]] = SFQ_DEPTH;
+		a = q->next[a];
+		if (a == old_a) {
+			q->tail = SFQ_DEPTH;
+			return skb;
+		}
+		q->next[q->tail] = a;
+		q->allot[a] += q->quantum;
+	} else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
+		q->tail = a;
+		a = q->next[a];
+		q->allot[a] += q->quantum;
+	}
+	return skb;
+}
+
+static void
+sfq_reset(struct Qdisc *sch)
+{
+	struct sk_buff *skb;
+
+	while ((skb = sfq_dequeue(sch)) != NULL)
+		kfree_skb(skb);
+}
+
+static void sfq_perturbation(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc *)arg;
+	struct sfq_sched_data *q = qdisc_priv(sch);
+
+	q->perturbation = net_random();
+
+	if (q->perturb_period)
+		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
+}
+
+static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct tc_sfq_qopt *ctl = nla_data(opt);
+	unsigned int qlen;
+
+	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
+		return -EINVAL;
+
+	sch_tree_lock(sch);
+	q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
+	q->perturb_period = ctl->perturb_period * HZ;
+	if (ctl->limit)
+		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
+
+	qlen = sch->q.qlen;
+	while (sch->q.qlen > q->limit)
+		sfq_drop(sch);
+	qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+
+	del_timer(&q->perturb_timer);
+	if (q->perturb_period) {
+		mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
+		q->perturbation = net_random();
+	}
+	sch_tree_unlock(sch);
+	return 0;
+}
+
+static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	int i;
+
+	q->perturb_timer.function = sfq_perturbation;
+	q->perturb_timer.data = (unsigned long)sch;;
+	init_timer_deferrable(&q->perturb_timer);
+
+	for (i = 0; i < SFQ_HASH_DIVISOR; i++)
+		q->ht[i] = SFQ_DEPTH;
+
+	for (i = 0; i < SFQ_DEPTH; i++) {
+		skb_queue_head_init(&q->qs[i]);
+		q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;
+		q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
+	}
+
+	q->limit = SFQ_DEPTH - 1;
+	q->max_depth = 0;
+	q->tail = SFQ_DEPTH;
+	if (opt == NULL) {
+		q->quantum = psched_mtu(qdisc_dev(sch));
+		q->perturb_period = 0;
+		q->perturbation = net_random();
+	} else {
+		int err = sfq_change(sch, opt);
+		if (err)
+			return err;
+	}
+
+	for (i = 0; i < SFQ_DEPTH; i++)
+		sfq_link(q, i);
+	return 0;
+}
+
+static void sfq_destroy(struct Qdisc *sch)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+
+	tcf_destroy_chain(&q->filter_list);
+	q->perturb_period = 0;
+	del_timer_sync(&q->perturb_timer);
+}
+
+static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_sfq_qopt opt;
+
+	opt.quantum = q->quantum;
+	opt.perturb_period = q->perturb_period / HZ;
+
+	opt.limit = q->limit;
+	opt.divisor = SFQ_HASH_DIVISOR;
+	opt.flows = q->limit;
+
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int sfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+			    struct nlattr **tca, unsigned long *arg)
+{
+	return -EOPNOTSUPP;
+}
+
+static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
+{
+	return 0;
+}
+
+static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return &q->filter_list;
+}
+
+static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+				struct gnet_dump *d)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	sfq_index idx = q->ht[cl-1];
+	struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen };
+	struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
+
+	if (gnet_stats_copy_queue(d, &qs) < 0)
+		return -1;
+	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
+}
+
+static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	unsigned int i;
+
+	if (arg->stop)
+		return;
+
+	for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
+		if (q->ht[i] == SFQ_DEPTH ||
+		    arg->count < arg->skip) {
+			arg->count++;
+			continue;
+		}
+		if (arg->fn(sch, i + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops sfq_class_ops = {
+	.get		=	sfq_get,
+	.change		=	sfq_change_class,
+	.tcf_chain	=	sfq_find_tcf,
+	.dump		=	sfq_dump_class,
+	.dump_stats	=	sfq_dump_class_stats,
+	.walk		=	sfq_walk,
+};
+
+static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
+	.cl_ops		=	&sfq_class_ops,
+	.id		=	"sfq",
+	.priv_size	=	sizeof(struct sfq_sched_data),
+	.enqueue	=	sfq_enqueue,
+	.dequeue	=	sfq_dequeue,
+	.requeue	=	sfq_requeue,
+	.drop		=	sfq_drop,
+	.init		=	sfq_init,
+	.reset		=	sfq_reset,
+	.destroy	=	sfq_destroy,
+	.change		=	NULL,
+	.dump		=	sfq_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init sfq_module_init(void)
+{
+	return register_qdisc(&sfq_qdisc_ops);
+}
+static void __exit sfq_module_exit(void)
+{
+	unregister_qdisc(&sfq_qdisc_ops);
+}
+module_init(sfq_module_init)
+module_exit(sfq_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
new file mode 100644
index 0000000..94c6159
--- /dev/null
+++ b/net/sched/sch_tbf.c
@@ -0,0 +1,493 @@
+/*
+ * net/sched/sch_tbf.c	Token Bucket Filter queue.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
+ *						 original idea by Martin Devera
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+
+/*	Simple Token Bucket Filter.
+	=======================================
+
+	SOURCE.
+	-------
+
+	None.
+
+	Description.
+	------------
+
+	A data flow obeys TBF with rate R and depth B, if for any
+	time interval t_i...t_f the number of transmitted bits
+	does not exceed B + R*(t_f-t_i).
+
+	Packetized version of this definition:
+	The sequence of packets of sizes s_i served at moments t_i
+	obeys TBF, if for any i<=k:
+
+	s_i+....+s_k <= B + R*(t_k - t_i)
+
+	Algorithm.
+	----------
+
+	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
+
+	N(t+delta) = min{B/R, N(t) + delta}
+
+	If the first packet in queue has length S, it may be
+	transmitted only at the time t_* when S/R <= N(t_*),
+	and in this case N(t) jumps:
+
+	N(t_* + 0) = N(t_* - 0) - S/R.
+
+
+
+	Actually, QoS requires two TBF to be applied to a data stream.
+	One of them controls steady state burst size, another
+	one with rate P (peak rate) and depth M (equal to link MTU)
+	limits bursts at a smaller time scale.
+
+	It is easy to see that P>R, and B>M. If P is infinity, this double
+	TBF is equivalent to a single one.
+
+	When TBF works in reshaping mode, latency is estimated as:
+
+	lat = max ((L-B)/R, (L-M)/P)
+
+
+	NOTES.
+	------
+
+	If TBF throttles, it starts a watchdog timer, which will wake it up
+	when it is ready to transmit.
+	Note that the minimal timer resolution is 1/HZ.
+	If no new packets arrive during this period,
+	or if the device is not awaken by EOI for some previous packet,
+	TBF can stop its activity for 1/HZ.
+
+
+	This means, that with depth B, the maximal rate is
+
+	R_crit = B*HZ
+
+	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
+
+	Note that the peak rate TBF is much more tough: with MTU 1500
+	P_crit = 150Kbytes/sec. So, if you need greater peak
+	rates, use alpha with HZ=1000 :-)
+
+	With classful TBF, limit is just kept for backwards compatibility.
+	It is passed to the default bfifo qdisc - if the inner qdisc is
+	changed the limit is not effective anymore.
+*/
+
+struct tbf_sched_data
+{
+/* Parameters */
+	u32		limit;		/* Maximal length of backlog: bytes */
+	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
+	u32		mtu;
+	u32		max_size;
+	struct qdisc_rate_table	*R_tab;
+	struct qdisc_rate_table	*P_tab;
+
+/* Variables */
+	long	tokens;			/* Current number of B tokens */
+	long	ptokens;		/* Current number of P tokens */
+	psched_time_t	t_c;		/* Time check-point */
+	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
+	struct qdisc_watchdog watchdog;	/* Watchdog timer */
+};
+
+#define L2T(q,L)   qdisc_l2t((q)->R_tab,L)
+#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
+
+static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	if (qdisc_pkt_len(skb) > q->max_size)
+		return qdisc_reshape_fail(skb, sch);
+
+	ret = qdisc_enqueue(skb, q->qdisc);
+	if (ret != 0) {
+		if (net_xmit_drop_count(ret))
+			sch->qstats.drops++;
+		return ret;
+	}
+
+	sch->q.qlen++;
+	sch->bstats.bytes += qdisc_pkt_len(skb);
+	sch->bstats.packets++;
+	return 0;
+}
+
+static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	int ret;
+
+	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
+		sch->q.qlen++;
+		sch->qstats.requeues++;
+	}
+
+	return ret;
+}
+
+static unsigned int tbf_drop(struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	unsigned int len = 0;
+
+	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
+		sch->q.qlen--;
+		sch->qstats.drops++;
+	}
+	return len;
+}
+
+static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	skb = q->qdisc->dequeue(q->qdisc);
+
+	if (skb) {
+		psched_time_t now;
+		long toks;
+		long ptoks = 0;
+		unsigned int len = qdisc_pkt_len(skb);
+
+		now = psched_get_time();
+		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
+
+		if (q->P_tab) {
+			ptoks = toks + q->ptokens;
+			if (ptoks > (long)q->mtu)
+				ptoks = q->mtu;
+			ptoks -= L2T_P(q, len);
+		}
+		toks += q->tokens;
+		if (toks > (long)q->buffer)
+			toks = q->buffer;
+		toks -= L2T(q, len);
+
+		if ((toks|ptoks) >= 0) {
+			q->t_c = now;
+			q->tokens = toks;
+			q->ptokens = ptoks;
+			sch->q.qlen--;
+			sch->flags &= ~TCQ_F_THROTTLED;
+			return skb;
+		}
+
+		qdisc_watchdog_schedule(&q->watchdog,
+					now + max_t(long, -toks, -ptoks));
+
+		/* Maybe we have a shorter packet in the queue,
+		   which can be sent now. It sounds cool,
+		   but, however, this is wrong in principle.
+		   We MUST NOT reorder packets under these circumstances.
+
+		   Really, if we split the flow into independent
+		   subflows, it would be a very good solution.
+		   This is the main idea of all FQ algorithms
+		   (cf. CSZ, HPFQ, HFSC)
+		 */
+
+		if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
+			/* When requeue fails skb is dropped */
+			qdisc_tree_decrease_qlen(q->qdisc, 1);
+			sch->qstats.drops++;
+		}
+
+		sch->qstats.overlimits++;
+	}
+	return NULL;
+}
+
+static void tbf_reset(struct Qdisc* sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	qdisc_reset(q->qdisc);
+	sch->q.qlen = 0;
+	q->t_c = psched_get_time();
+	q->tokens = q->buffer;
+	q->ptokens = q->mtu;
+	qdisc_watchdog_cancel(&q->watchdog);
+}
+
+static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
+	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
+	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+};
+
+static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
+{
+	int err;
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_TBF_PTAB + 1];
+	struct tc_tbf_qopt *qopt;
+	struct qdisc_rate_table *rtab = NULL;
+	struct qdisc_rate_table *ptab = NULL;
+	struct Qdisc *child = NULL;
+	int max_size,n;
+
+	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
+	if (err < 0)
+		return err;
+
+	err = -EINVAL;
+	if (tb[TCA_TBF_PARMS] == NULL)
+		goto done;
+
+	qopt = nla_data(tb[TCA_TBF_PARMS]);
+	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
+	if (rtab == NULL)
+		goto done;
+
+	if (qopt->peakrate.rate) {
+		if (qopt->peakrate.rate > qopt->rate.rate)
+			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
+		if (ptab == NULL)
+			goto done;
+	}
+
+	for (n = 0; n < 256; n++)
+		if (rtab->data[n] > qopt->buffer) break;
+	max_size = (n << qopt->rate.cell_log)-1;
+	if (ptab) {
+		int size;
+
+		for (n = 0; n < 256; n++)
+			if (ptab->data[n] > qopt->mtu) break;
+		size = (n << qopt->peakrate.cell_log)-1;
+		if (size < max_size) max_size = size;
+	}
+	if (max_size < 0)
+		goto done;
+
+	if (qopt->limit > 0) {
+		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
+		if (IS_ERR(child)) {
+			err = PTR_ERR(child);
+			goto done;
+		}
+	}
+
+	sch_tree_lock(sch);
+	if (child) {
+		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+		qdisc_destroy(xchg(&q->qdisc, child));
+	}
+	q->limit = qopt->limit;
+	q->mtu = qopt->mtu;
+	q->max_size = max_size;
+	q->buffer = qopt->buffer;
+	q->tokens = q->buffer;
+	q->ptokens = q->mtu;
+	rtab = xchg(&q->R_tab, rtab);
+	ptab = xchg(&q->P_tab, ptab);
+	sch_tree_unlock(sch);
+	err = 0;
+done:
+	if (rtab)
+		qdisc_put_rtab(rtab);
+	if (ptab)
+		qdisc_put_rtab(ptab);
+	return err;
+}
+
+static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	q->t_c = psched_get_time();
+	qdisc_watchdog_init(&q->watchdog, sch);
+	q->qdisc = &noop_qdisc;
+
+	return tbf_change(sch, opt);
+}
+
+static void tbf_destroy(struct Qdisc *sch)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	qdisc_watchdog_cancel(&q->watchdog);
+
+	if (q->P_tab)
+		qdisc_put_rtab(q->P_tab);
+	if (q->R_tab)
+		qdisc_put_rtab(q->R_tab);
+
+	qdisc_destroy(q->qdisc);
+}
+
+static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	struct nlattr *nest;
+	struct tc_tbf_qopt opt;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	opt.limit = q->limit;
+	opt.rate = q->R_tab->rate;
+	if (q->P_tab)
+		opt.peakrate = q->P_tab->rate;
+	else
+		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
+	opt.mtu = q->mtu;
+	opt.buffer = q->buffer;
+	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
+
+	nla_nest_end(skb, nest);
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	if (cl != 1) 	/* only one class */
+		return -ENOENT;
+
+	tcm->tcm_handle |= TC_H_MIN(1);
+	tcm->tcm_info = q->qdisc->handle;
+
+	return 0;
+}
+
+static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+		     struct Qdisc **old)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+
+	if (new == NULL)
+		new = &noop_qdisc;
+
+	sch_tree_lock(sch);
+	*old = xchg(&q->qdisc, new);
+	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+	qdisc_reset(*old);
+	sch_tree_unlock(sch);
+
+	return 0;
+}
+
+static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	struct tbf_sched_data *q = qdisc_priv(sch);
+	return q->qdisc;
+}
+
+static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
+{
+	return 1;
+}
+
+static void tbf_put(struct Qdisc *sch, unsigned long arg)
+{
+}
+
+static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+			    struct nlattr **tca, unsigned long *arg)
+{
+	return -ENOSYS;
+}
+
+static int tbf_delete(struct Qdisc *sch, unsigned long arg)
+{
+	return -ENOSYS;
+}
+
+static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	if (!walker->stop) {
+		if (walker->count >= walker->skip)
+			if (walker->fn(sch, 1, walker) < 0) {
+				walker->stop = 1;
+				return;
+			}
+		walker->count++;
+	}
+}
+
+static struct tcf_proto **tbf_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	return NULL;
+}
+
+static const struct Qdisc_class_ops tbf_class_ops =
+{
+	.graft		=	tbf_graft,
+	.leaf		=	tbf_leaf,
+	.get		=	tbf_get,
+	.put		=	tbf_put,
+	.change		=	tbf_change_class,
+	.delete		=	tbf_delete,
+	.walk		=	tbf_walk,
+	.tcf_chain	=	tbf_find_tcf,
+	.dump		=	tbf_dump_class,
+};
+
+static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
+	.next		=	NULL,
+	.cl_ops		=	&tbf_class_ops,
+	.id		=	"tbf",
+	.priv_size	=	sizeof(struct tbf_sched_data),
+	.enqueue	=	tbf_enqueue,
+	.dequeue	=	tbf_dequeue,
+	.requeue	=	tbf_requeue,
+	.drop		=	tbf_drop,
+	.init		=	tbf_init,
+	.reset		=	tbf_reset,
+	.destroy	=	tbf_destroy,
+	.change		=	tbf_change,
+	.dump		=	tbf_dump,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init tbf_module_init(void)
+{
+	return register_qdisc(&tbf_qdisc_ops);
+}
+
+static void __exit tbf_module_exit(void)
+{
+	unregister_qdisc(&tbf_qdisc_ops);
+}
+module_init(tbf_module_init)
+module_exit(tbf_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
new file mode 100644
index 0000000..d35ef05
--- /dev/null
+++ b/net/sched/sch_teql.c
@@ -0,0 +1,513 @@
+/* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/if_arp.h>
+#include <linux/netdevice.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/moduleparam.h>
+#include <net/dst.h>
+#include <net/neighbour.h>
+#include <net/pkt_sched.h>
+
+/*
+   How to setup it.
+   ----------------
+
+   After loading this module you will find a new device teqlN
+   and new qdisc with the same name. To join a slave to the equalizer
+   you should just set this qdisc on a device f.e.
+
+   # tc qdisc add dev eth0 root teql0
+   # tc qdisc add dev eth1 root teql0
+
+   That's all. Full PnP 8)
+
+   Applicability.
+   --------------
+
+   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
+      signal and generate EOI events. If you want to equalize virtual devices
+      like tunnels, use a normal eql device.
+   2. This device puts no limitations on physical slave characteristics
+      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
+      Certainly, large difference in link speeds will make the resulting
+      eqalized link unusable, because of huge packet reordering.
+      I estimate an upper useful difference as ~10 times.
+   3. If the slave requires address resolution, only protocols using
+      neighbour cache (IPv4/IPv6) will work over the equalized link.
+      Other protocols are still allowed to use the slave device directly,
+      which will not break load balancing, though native slave
+      traffic will have the highest priority.  */
+
+struct teql_master
+{
+	struct Qdisc_ops qops;
+	struct net_device *dev;
+	struct Qdisc *slaves;
+	struct list_head master_list;
+	struct net_device_stats stats;
+};
+
+struct teql_sched_data
+{
+	struct Qdisc *next;
+	struct teql_master *m;
+	struct neighbour *ncache;
+	struct sk_buff_head q;
+};
+
+#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
+
+#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
+
+/* "teql*" qdisc routines */
+
+static int
+teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct teql_sched_data *q = qdisc_priv(sch);
+
+	if (q->q.qlen < dev->tx_queue_len) {
+		__skb_queue_tail(&q->q, skb);
+		sch->bstats.bytes += qdisc_pkt_len(skb);
+		sch->bstats.packets++;
+		return 0;
+	}
+
+	kfree_skb(skb);
+	sch->qstats.drops++;
+	return NET_XMIT_DROP;
+}
+
+static int
+teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct teql_sched_data *q = qdisc_priv(sch);
+
+	__skb_queue_head(&q->q, skb);
+	sch->qstats.requeues++;
+	return 0;
+}
+
+static struct sk_buff *
+teql_dequeue(struct Qdisc* sch)
+{
+	struct teql_sched_data *dat = qdisc_priv(sch);
+	struct netdev_queue *dat_queue;
+	struct sk_buff *skb;
+
+	skb = __skb_dequeue(&dat->q);
+	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
+	if (skb == NULL) {
+		struct net_device *m = qdisc_dev(dat_queue->qdisc);
+		if (m) {
+			dat->m->slaves = sch;
+			netif_wake_queue(m);
+		}
+	}
+	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
+	return skb;
+}
+
+static __inline__ void
+teql_neigh_release(struct neighbour *n)
+{
+	if (n)
+		neigh_release(n);
+}
+
+static void
+teql_reset(struct Qdisc* sch)
+{
+	struct teql_sched_data *dat = qdisc_priv(sch);
+
+	skb_queue_purge(&dat->q);
+	sch->q.qlen = 0;
+	teql_neigh_release(xchg(&dat->ncache, NULL));
+}
+
+static void
+teql_destroy(struct Qdisc* sch)
+{
+	struct Qdisc *q, *prev;
+	struct teql_sched_data *dat = qdisc_priv(sch);
+	struct teql_master *master = dat->m;
+
+	if ((prev = master->slaves) != NULL) {
+		do {
+			q = NEXT_SLAVE(prev);
+			if (q == sch) {
+				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
+				if (q == master->slaves) {
+					master->slaves = NEXT_SLAVE(q);
+					if (q == master->slaves) {
+						struct netdev_queue *txq;
+						spinlock_t *root_lock;
+
+						txq = netdev_get_tx_queue(master->dev, 0);
+						master->slaves = NULL;
+
+						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
+						spin_lock_bh(root_lock);
+						qdisc_reset(txq->qdisc);
+						spin_unlock_bh(root_lock);
+					}
+				}
+				skb_queue_purge(&dat->q);
+				teql_neigh_release(xchg(&dat->ncache, NULL));
+				break;
+			}
+
+		} while ((prev = q) != master->slaves);
+	}
+}
+
+static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct teql_master *m = (struct teql_master*)sch->ops;
+	struct teql_sched_data *q = qdisc_priv(sch);
+
+	if (dev->hard_header_len > m->dev->hard_header_len)
+		return -EINVAL;
+
+	if (m->dev == dev)
+		return -ELOOP;
+
+	q->m = m;
+
+	skb_queue_head_init(&q->q);
+
+	if (m->slaves) {
+		if (m->dev->flags & IFF_UP) {
+			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
+			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
+			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
+			    || dev->mtu < m->dev->mtu)
+				return -EINVAL;
+		} else {
+			if (!(dev->flags&IFF_POINTOPOINT))
+				m->dev->flags &= ~IFF_POINTOPOINT;
+			if (!(dev->flags&IFF_BROADCAST))
+				m->dev->flags &= ~IFF_BROADCAST;
+			if (!(dev->flags&IFF_MULTICAST))
+				m->dev->flags &= ~IFF_MULTICAST;
+			if (dev->mtu < m->dev->mtu)
+				m->dev->mtu = dev->mtu;
+		}
+		q->next = NEXT_SLAVE(m->slaves);
+		NEXT_SLAVE(m->slaves) = sch;
+	} else {
+		q->next = sch;
+		m->slaves = sch;
+		m->dev->mtu = dev->mtu;
+		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
+	}
+	return 0;
+}
+
+
+static int
+__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
+{
+	struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
+	struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
+	struct neighbour *mn = skb->dst->neighbour;
+	struct neighbour *n = q->ncache;
+
+	if (mn->tbl == NULL)
+		return -EINVAL;
+	if (n && n->tbl == mn->tbl &&
+	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
+		atomic_inc(&n->refcnt);
+	} else {
+		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
+		if (IS_ERR(n))
+			return PTR_ERR(n);
+	}
+	if (neigh_event_send(n, skb_res) == 0) {
+		int err;
+
+		read_lock(&n->lock);
+		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+				      n->ha, NULL, skb->len);
+		read_unlock(&n->lock);
+
+		if (err < 0) {
+			neigh_release(n);
+			return -EINVAL;
+		}
+		teql_neigh_release(xchg(&q->ncache, n));
+		return 0;
+	}
+	neigh_release(n);
+	return (skb_res == NULL) ? -EAGAIN : 1;
+}
+
+static inline int teql_resolve(struct sk_buff *skb,
+			       struct sk_buff *skb_res, struct net_device *dev)
+{
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
+	if (txq->qdisc == &noop_qdisc)
+		return -ENODEV;
+
+	if (dev->header_ops == NULL ||
+	    skb->dst == NULL ||
+	    skb->dst->neighbour == NULL)
+		return 0;
+	return __teql_resolve(skb, skb_res, dev);
+}
+
+static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct teql_master *master = netdev_priv(dev);
+	struct Qdisc *start, *q;
+	int busy;
+	int nores;
+	int subq = skb_get_queue_mapping(skb);
+	struct sk_buff *skb_res = NULL;
+
+	start = master->slaves;
+
+restart:
+	nores = 0;
+	busy = 0;
+
+	if ((q = start) == NULL)
+		goto drop;
+
+	do {
+		struct net_device *slave = qdisc_dev(q);
+		struct netdev_queue *slave_txq;
+
+		slave_txq = netdev_get_tx_queue(slave, 0);
+		if (slave_txq->qdisc_sleeping != q)
+			continue;
+		if (__netif_subqueue_stopped(slave, subq) ||
+		    !netif_running(slave)) {
+			busy = 1;
+			continue;
+		}
+
+		switch (teql_resolve(skb, skb_res, slave)) {
+		case 0:
+			if (__netif_tx_trylock(slave_txq)) {
+				if (!netif_tx_queue_stopped(slave_txq) &&
+				    !netif_tx_queue_frozen(slave_txq) &&
+				    slave->hard_start_xmit(skb, slave) == 0) {
+					__netif_tx_unlock(slave_txq);
+					master->slaves = NEXT_SLAVE(q);
+					netif_wake_queue(dev);
+					master->stats.tx_packets++;
+					master->stats.tx_bytes +=
+						qdisc_pkt_len(skb);
+					return 0;
+				}
+				__netif_tx_unlock(slave_txq);
+			}
+			if (netif_queue_stopped(dev))
+				busy = 1;
+			break;
+		case 1:
+			master->slaves = NEXT_SLAVE(q);
+			return 0;
+		default:
+			nores = 1;
+			break;
+		}
+		__skb_pull(skb, skb_network_offset(skb));
+	} while ((q = NEXT_SLAVE(q)) != start);
+
+	if (nores && skb_res == NULL) {
+		skb_res = skb;
+		goto restart;
+	}
+
+	if (busy) {
+		netif_stop_queue(dev);
+		return 1;
+	}
+	master->stats.tx_errors++;
+
+drop:
+	master->stats.tx_dropped++;
+	dev_kfree_skb(skb);
+	return 0;
+}
+
+static int teql_master_open(struct net_device *dev)
+{
+	struct Qdisc * q;
+	struct teql_master *m = netdev_priv(dev);
+	int mtu = 0xFFFE;
+	unsigned flags = IFF_NOARP|IFF_MULTICAST;
+
+	if (m->slaves == NULL)
+		return -EUNATCH;
+
+	flags = FMASK;
+
+	q = m->slaves;
+	do {
+		struct net_device *slave = qdisc_dev(q);
+
+		if (slave == NULL)
+			return -EUNATCH;
+
+		if (slave->mtu < mtu)
+			mtu = slave->mtu;
+		if (slave->hard_header_len > LL_MAX_HEADER)
+			return -EINVAL;
+
+		/* If all the slaves are BROADCAST, master is BROADCAST
+		   If all the slaves are PtP, master is PtP
+		   Otherwise, master is NBMA.
+		 */
+		if (!(slave->flags&IFF_POINTOPOINT))
+			flags &= ~IFF_POINTOPOINT;
+		if (!(slave->flags&IFF_BROADCAST))
+			flags &= ~IFF_BROADCAST;
+		if (!(slave->flags&IFF_MULTICAST))
+			flags &= ~IFF_MULTICAST;
+	} while ((q = NEXT_SLAVE(q)) != m->slaves);
+
+	m->dev->mtu = mtu;
+	m->dev->flags = (m->dev->flags&~FMASK) | flags;
+	netif_start_queue(m->dev);
+	return 0;
+}
+
+static int teql_master_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+	return 0;
+}
+
+static struct net_device_stats *teql_master_stats(struct net_device *dev)
+{
+	struct teql_master *m = netdev_priv(dev);
+	return &m->stats;
+}
+
+static int teql_master_mtu(struct net_device *dev, int new_mtu)
+{
+	struct teql_master *m = netdev_priv(dev);
+	struct Qdisc *q;
+
+	if (new_mtu < 68)
+		return -EINVAL;
+
+	q = m->slaves;
+	if (q) {
+		do {
+			if (new_mtu > qdisc_dev(q)->mtu)
+				return -EINVAL;
+		} while ((q=NEXT_SLAVE(q)) != m->slaves);
+	}
+
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static __init void teql_master_setup(struct net_device *dev)
+{
+	struct teql_master *master = netdev_priv(dev);
+	struct Qdisc_ops *ops = &master->qops;
+
+	master->dev	= dev;
+	ops->priv_size  = sizeof(struct teql_sched_data);
+
+	ops->enqueue	=	teql_enqueue;
+	ops->dequeue	=	teql_dequeue;
+	ops->requeue	=	teql_requeue;
+	ops->init	=	teql_qdisc_init;
+	ops->reset	=	teql_reset;
+	ops->destroy	=	teql_destroy;
+	ops->owner	=	THIS_MODULE;
+
+	dev->open		= teql_master_open;
+	dev->hard_start_xmit	= teql_master_xmit;
+	dev->stop		= teql_master_close;
+	dev->get_stats		= teql_master_stats;
+	dev->change_mtu		= teql_master_mtu;
+	dev->type		= ARPHRD_VOID;
+	dev->mtu		= 1500;
+	dev->tx_queue_len	= 100;
+	dev->flags		= IFF_NOARP;
+	dev->hard_header_len	= LL_MAX_HEADER;
+}
+
+static LIST_HEAD(master_dev_list);
+static int max_equalizers = 1;
+module_param(max_equalizers, int, 0);
+MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
+
+static int __init teql_init(void)
+{
+	int i;
+	int err = -ENODEV;
+
+	for (i = 0; i < max_equalizers; i++) {
+		struct net_device *dev;
+		struct teql_master *master;
+
+		dev = alloc_netdev(sizeof(struct teql_master),
+				  "teql%d", teql_master_setup);
+		if (!dev) {
+			err = -ENOMEM;
+			break;
+		}
+
+		if ((err = register_netdev(dev))) {
+			free_netdev(dev);
+			break;
+		}
+
+		master = netdev_priv(dev);
+
+		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
+		err = register_qdisc(&master->qops);
+
+		if (err) {
+			unregister_netdev(dev);
+			free_netdev(dev);
+			break;
+		}
+
+		list_add_tail(&master->master_list, &master_dev_list);
+	}
+	return i ? 0 : err;
+}
+
+static void __exit teql_exit(void)
+{
+	struct teql_master *master, *nxt;
+
+	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
+
+		list_del(&master->master_list);
+
+		unregister_qdisc(&master->qops);
+		unregister_netdev(master->dev);
+		free_netdev(master->dev);
+	}
+}
+
+module_init(teql_init);
+module_exit(teql_exit);
+
+MODULE_LICENSE("GPL");