summaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig48
-rw-r--r--net/sched/Makefile3
-rw-r--r--net/sched/act_api.c9
-rw-r--r--net/sched/act_csum.c8
-rw-r--r--net/sched/act_gact.c9
-rw-r--r--net/sched/act_ipt.c21
-rw-r--r--net/sched/act_mirred.c11
-rw-r--r--net/sched/act_nat.c6
-rw-r--r--net/sched/act_pedit.c6
-rw-r--r--net/sched/act_police.c13
-rw-r--r--net/sched/act_simple.c8
-rw-r--r--net/sched/act_skbedit.c27
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_basic.c5
-rw-r--r--net/sched/cls_cgroup.c37
-rw-r--r--net/sched/cls_flow.c35
-rw-r--r--net/sched/cls_fw.c15
-rw-r--r--net/sched/cls_route.c16
-rw-r--r--net/sched/cls_rsvp.h16
-rw-r--r--net/sched/cls_tcindex.c14
-rw-r--r--net/sched/cls_u32.c43
-rw-r--r--net/sched/em_meta.c19
-rw-r--r--net/sched/ematch.c10
-rw-r--r--net/sched/sch_api.c19
-rw-r--r--net/sched/sch_atm.c25
-rw-r--r--net/sched/sch_cbq.c18
-rw-r--r--net/sched/sch_choke.c13
-rw-r--r--net/sched/sch_codel.c276
-rw-r--r--net/sched/sch_drr.c7
-rw-r--r--net/sched/sch_dsmark.c21
-rw-r--r--net/sched/sch_fifo.c3
-rw-r--r--net/sched/sch_fq_codel.c626
-rw-r--r--net/sched/sch_generic.c14
-rw-r--r--net/sched/sch_gred.c25
-rw-r--r--net/sched/sch_hfsc.c8
-rw-r--r--net/sched/sch_htb.c12
-rw-r--r--net/sched/sch_mqprio.c3
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_netem.c45
-rw-r--r--net/sched/sch_plug.c233
-rw-r--r--net/sched/sch_prio.c3
-rw-r--r--net/sched/sch_qfq.c5
-rw-r--r--net/sched/sch_red.c5
-rw-r--r--net/sched/sch_sfb.c3
-rw-r--r--net/sched/sch_sfq.c3
-rw-r--r--net/sched/sch_tbf.c3
-rw-r--r--net/sched/sch_teql.c4
47 files changed, 1514 insertions, 245 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2590e91..e7a8976 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -250,6 +250,28 @@ config NET_SCH_QFQ
If unsure, say N.
+config NET_SCH_CODEL
+ tristate "Controlled Delay AQM (CODEL)"
+ help
+ Say Y here if you want to use the Controlled Delay (CODEL)
+ packet scheduling algorithm.
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_codel.
+
+ If unsure, say N.
+
+config NET_SCH_FQ_CODEL
+ tristate "Fair Queue Controlled Delay AQM (FQ_CODEL)"
+ help
+ Say Y here if you want to use the FQ Controlled Delay (FQ_CODEL)
+ packet scheduling algorithm.
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_fq_codel.
+
+ If unsure, say N.
+
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_CLS_ACT
@@ -260,6 +282,32 @@ config NET_SCH_INGRESS
To compile this code as a module, choose M here: the
module will be called sch_ingress.
+config NET_SCH_PLUG
+ tristate "Plug network traffic until release (PLUG)"
+ ---help---
+
+ This queuing discipline allows userspace to plug/unplug a network
+ output queue, using the netlink interface. When it receives an
+ enqueue command it inserts a plug into the outbound queue that
+ causes following packets to enqueue until a dequeue command arrives
+ over netlink, causing the plug to be removed and resuming the normal
+ packet flow.
+
+ This module also provides a generic "network output buffering"
+ functionality (aka output commit), wherein upon arrival of a dequeue
+ command, only packets up to the first plug are released for delivery.
+ The Remus HA project uses this module to enable speculative execution
+ of virtual machines by allowing the generated network output to be rolled
+ back if needed.
+
+ For more information, please refer to http://wiki.xensource.com/xenwiki/Remus
+
+ Say Y here if you are using this kernel for Xen dom0 and
+ want to protect Xen guests with Remus.
+
+ To compile this code as a module, choose M here: the
+ module will be called sch_plug.
+
comment "Classification"
config NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index dc5889c..5940a19 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -33,9 +33,12 @@ obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
+obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
+obj-$(CONFIG_NET_SCH_CODEL) += sch_codel.o
+obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 93fdf13..5cfb160 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -127,7 +127,8 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
nest = nla_nest_start(skb, a->order);
if (nest == NULL)
goto nla_put_failure;
- NLA_PUT_STRING(skb, TCA_KIND, a->ops->kind);
+ if (nla_put_string(skb, TCA_KIND, a->ops->kind))
+ goto nla_put_failure;
for (i = 0; i < (hinfo->hmask + 1); i++) {
p = hinfo->htab[tcf_hash(i, hinfo->hmask)];
@@ -139,7 +140,8 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
p = s_p;
}
}
- NLA_PUT_U32(skb, TCA_FCNT, n_i);
+ if (nla_put_u32(skb, TCA_FCNT, n_i))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
return n_i;
@@ -437,7 +439,8 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
if (a->ops == NULL || a->ops->dump == NULL)
return err;
- NLA_PUT_STRING(skb, TCA_KIND, a->ops->kind);
+ if (nla_put_string(skb, TCA_KIND, a->ops->kind))
+ goto nla_put_failure;
if (tcf_action_copy_stats(skb, a, 0))
goto nla_put_failure;
nest = nla_nest_start(skb, TCA_OPTIONS);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 453a734..2c8ad7c 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -397,7 +397,7 @@ static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
while (len > 1) {
switch (xh[off]) {
- case IPV6_TLV_PAD0:
+ case IPV6_TLV_PAD1:
optlen = 1;
break;
case IPV6_TLV_JUMBO:
@@ -550,11 +550,13 @@ static int tcf_csum_dump(struct sk_buff *skb,
};
struct tcf_t t;
- NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
- NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t);
+ if (nla_put(skb, TCA_CSUM_TM, sizeof(t), &t))
+ goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b77f5a0..f10fb82 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -162,7 +162,8 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
};
struct tcf_t t;
- NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_GACT_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
#ifdef CONFIG_GACT_PROB
if (gact->tcfg_ptype) {
struct tc_gact_p p_opt = {
@@ -171,13 +172,15 @@ static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
.ptype = gact->tcfg_ptype,
};
- NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
+ if (nla_put(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt))
+ goto nla_put_failure;
}
#endif
t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
- NLA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
+ if (nla_put(skb, TCA_GACT_TM, sizeof(t), &t))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 60f8f61..60e281a 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -1,5 +1,5 @@
/*
- * net/sched/ipt.c iptables target interface
+ * net/sched/ipt.c iptables target interface
*
*TODO: Add other tables. For now we only support the ipv4 table targets
*
@@ -235,9 +235,8 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
result = TC_ACT_PIPE;
break;
default:
- if (net_ratelimit())
- pr_notice("tc filter: Bogus netfilter code"
- " %d assume ACCEPT\n", ret);
+ net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n",
+ ret);
result = TC_POLICE_OK;
break;
}
@@ -267,15 +266,17 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
c.refcnt = ipt->tcf_refcnt - ref;
strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
- NLA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
- NLA_PUT_U32(skb, TCA_IPT_INDEX, ipt->tcf_index);
- NLA_PUT_U32(skb, TCA_IPT_HOOK, ipt->tcfi_hook);
- NLA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
- NLA_PUT_STRING(skb, TCA_IPT_TABLE, ipt->tcfi_tname);
+ if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) ||
+ nla_put_u32(skb, TCA_IPT_INDEX, ipt->tcf_index) ||
+ nla_put_u32(skb, TCA_IPT_HOOK, ipt->tcfi_hook) ||
+ nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) ||
+ nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname))
+ goto nla_put_failure;
tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
- NLA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
+ if (nla_put(skb, TCA_IPT_TM, sizeof (tm), &tm))
+ goto nla_put_failure;
kfree(t);
return skb->len;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index e051398..fe81cc1 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -174,9 +174,8 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
}
if (!(dev->flags & IFF_UP)) {
- if (net_ratelimit())
- pr_notice("tc mirred to Houston: device %s is down\n",
- dev->name);
+ net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
+ dev->name);
goto out;
}
@@ -227,11 +226,13 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
};
struct tcf_t t;
- NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(m->tcf_tm.expires);
- NLA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t);
+ if (nla_put(skb, TCA_MIRRED_TM, sizeof(t), &t))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 001d1b3..b5d029e 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -284,11 +284,13 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
};
struct tcf_t t;
- NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
- NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t);
+ if (nla_put(skb, TCA_NAT_TM, sizeof(t), &t))
+ goto nla_put_failure;
return skb->len;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 10d3aed..26aa2f6 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -215,11 +215,13 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
opt->refcnt = p->tcf_refcnt - ref;
opt->bindcnt = p->tcf_bindcnt - bind;
- NLA_PUT(skb, TCA_PEDIT_PARMS, s, opt);
+ if (nla_put(skb, TCA_PEDIT_PARMS, s, opt))
+ goto nla_put_failure;
t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
- NLA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t);
+ if (nla_put(skb, TCA_PEDIT_TM, sizeof(t), &t))
+ goto nla_put_failure;
kfree(opt);
return skb->len;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 6fb3f5a..a9de232 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -356,11 +356,14 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
opt.rate = police->tcfp_R_tab->rate;
if (police->tcfp_P_tab)
opt.peakrate = police->tcfp_P_tab->rate;
- NLA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
- if (police->tcfp_result)
- NLA_PUT_U32(skb, TCA_POLICE_RESULT, police->tcfp_result);
- if (police->tcfp_ewma_rate)
- NLA_PUT_U32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate);
+ if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
+ goto nla_put_failure;
+ if (police->tcfp_result &&
+ nla_put_u32(skb, TCA_POLICE_RESULT, police->tcfp_result))
+ goto nla_put_failure;
+ if (police->tcfp_ewma_rate &&
+ nla_put_u32(skb, TCA_POLICE_AVRATE, police->tcfp_ewma_rate))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 73e0a3a..3922f2a 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -172,12 +172,14 @@ static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
};
struct tcf_t t;
- NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
- NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata);
+ if (nla_put(skb, TCA_DEF_PARMS, sizeof(opt), &opt) ||
+ nla_put_string(skb, TCA_DEF_DATA, d->tcfd_defdata))
+ goto nla_put_failure;
t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
- NLA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
+ if (nla_put(skb, TCA_DEF_TM, sizeof(t), &t))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 35dbbe9..476e0fa 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -166,20 +166,25 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
};
struct tcf_t t;
- NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt);
- if (d->flags & SKBEDIT_F_PRIORITY)
- NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
- &d->priority);
- if (d->flags & SKBEDIT_F_QUEUE_MAPPING)
- NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING,
- sizeof(d->queue_mapping), &d->queue_mapping);
- if (d->flags & SKBEDIT_F_MARK)
- NLA_PUT(skb, TCA_SKBEDIT_MARK, sizeof(d->mark),
- &d->mark);
+ if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
+ if ((d->flags & SKBEDIT_F_PRIORITY) &&
+ nla_put(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
+ &d->priority))
+ goto nla_put_failure;
+ if ((d->flags & SKBEDIT_F_QUEUE_MAPPING) &&
+ nla_put(skb, TCA_SKBEDIT_QUEUE_MAPPING,
+ sizeof(d->queue_mapping), &d->queue_mapping))
+ goto nla_put_failure;
+ if ((d->flags & SKBEDIT_F_MARK) &&
+ nla_put(skb, TCA_SKBEDIT_MARK, sizeof(d->mark),
+ &d->mark))
+ goto nla_put_failure;
t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
- NLA_PUT(skb, TCA_SKBEDIT_TM, sizeof(t), &t);
+ if (nla_put(skb, TCA_SKBEDIT_TM, sizeof(t), &t))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a69d44f..f452f69 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -357,7 +357,8 @@ static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp,
tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
tcm->tcm_parent = tp->classid;
tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
- NLA_PUT_STRING(skb, TCA_KIND, tp->ops->kind);
+ if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
+ goto nla_put_failure;
tcm->tcm_handle = fh;
if (RTM_DELTFILTER != event) {
tcm->tcm_handle = 0;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index ea1f70b..590960a 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -257,8 +257,9 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
if (nest == NULL)
goto nla_put_failure;
- if (f->res.classid)
- NLA_PUT_U32(skb, TCA_BASIC_CLASSID, f->res.classid);
+ if (f->res.classid &&
+ nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid))
+ goto nla_put_failure;
if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index f84fdc3..7743ea8 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -22,23 +22,6 @@
#include <net/sock.h>
#include <net/cls_cgroup.h>
-static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
- struct cgroup *cgrp);
-static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
-static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
-
-struct cgroup_subsys net_cls_subsys = {
- .name = "net_cls",
- .create = cgrp_create,
- .destroy = cgrp_destroy,
- .populate = cgrp_populate,
-#ifdef CONFIG_NET_CLS_CGROUP
- .subsys_id = net_cls_subsys_id,
-#endif
- .module = THIS_MODULE,
-};
-
-
static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp)
{
return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id),
@@ -51,8 +34,7 @@ static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
struct cgroup_cls_state, css);
}
-static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
- struct cgroup *cgrp)
+static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
{
struct cgroup_cls_state *cs;
@@ -66,7 +48,7 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
return &cs->css;
}
-static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+static void cgrp_destroy(struct cgroup *cgrp)
{
kfree(cgrp_cls_state(cgrp));
}
@@ -88,12 +70,19 @@ static struct cftype ss_files[] = {
.read_u64 = read_classid,
.write_u64 = write_classid,
},
+ { } /* terminate */
};
-static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
-{
- return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
-}
+struct cgroup_subsys net_cls_subsys = {
+ .name = "net_cls",
+ .create = cgrp_create,
+ .destroy = cgrp_destroy,
+#ifdef CONFIG_NET_CLS_CGROUP
+ .subsys_id = net_cls_subsys_id,
+#endif
+ .base_cftypes = ss_files,
+ .module = THIS_MODULE,
+};
struct cls_cgroup_head {
u32 handle;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 1d8bd0d..ccd08c8 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -572,25 +572,32 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh,
if (nest == NULL)
goto nla_put_failure;
- NLA_PUT_U32(skb, TCA_FLOW_KEYS, f->keymask);
- NLA_PUT_U32(skb, TCA_FLOW_MODE, f->mode);
+ if (nla_put_u32(skb, TCA_FLOW_KEYS, f->keymask) ||
+ nla_put_u32(skb, TCA_FLOW_MODE, f->mode))
+ goto nla_put_failure;
if (f->mask != ~0 || f->xor != 0) {
- NLA_PUT_U32(skb, TCA_FLOW_MASK, f->mask);
- NLA_PUT_U32(skb, TCA_FLOW_XOR, f->xor);
+ if (nla_put_u32(skb, TCA_FLOW_MASK, f->mask) ||
+ nla_put_u32(skb, TCA_FLOW_XOR, f->xor))
+ goto nla_put_failure;
}
- if (f->rshift)
- NLA_PUT_U32(skb, TCA_FLOW_RSHIFT, f->rshift);
- if (f->addend)
- NLA_PUT_U32(skb, TCA_FLOW_ADDEND, f->addend);
+ if (f->rshift &&
+ nla_put_u32(skb, TCA_FLOW_RSHIFT, f->rshift))
+ goto nla_put_failure;
+ if (f->addend &&
+ nla_put_u32(skb, TCA_FLOW_ADDEND, f->addend))
+ goto nla_put_failure;
- if (f->divisor)
- NLA_PUT_U32(skb, TCA_FLOW_DIVISOR, f->divisor);
- if (f->baseclass)
- NLA_PUT_U32(skb, TCA_FLOW_BASECLASS, f->baseclass);
+ if (f->divisor &&
+ nla_put_u32(skb, TCA_FLOW_DIVISOR, f->divisor))
+ goto nla_put_failure;
+ if (f->baseclass &&
+ nla_put_u32(skb, TCA_FLOW_BASECLASS, f->baseclass))
+ goto nla_put_failure;
- if (f->perturb_period)
- NLA_PUT_U32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ);
+ if (f->perturb_period &&
+ nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ))
+ goto nla_put_failure;
if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
goto nla_put_failure;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 389af15..8384a47 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -346,14 +346,17 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
if (nest == NULL)
goto nla_put_failure;
- if (f->res.classid)
- NLA_PUT_U32(skb, TCA_FW_CLASSID, f->res.classid);
+ if (f->res.classid &&
+ nla_put_u32(skb, TCA_FW_CLASSID, f->res.classid))
+ goto nla_put_failure;
#ifdef CONFIG_NET_CLS_IND
- if (strlen(f->indev))
- NLA_PUT_STRING(skb, TCA_FW_INDEV, f->indev);
+ if (strlen(f->indev) &&
+ nla_put_string(skb, TCA_FW_INDEV, f->indev))
+ goto nla_put_failure;
#endif /* CONFIG_NET_CLS_IND */
- if (head->mask != 0xFFFFFFFF)
- NLA_PUT_U32(skb, TCA_FW_MASK, head->mask);
+ if (head->mask != 0xFFFFFFFF &&
+ nla_put_u32(skb, TCA_FW_MASK, head->mask))
+ goto nla_put_failure;
if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0)
goto nla_put_failure;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 13ab66e..36fec42 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -571,17 +571,21 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
if (!(f->handle & 0x8000)) {
id = f->id & 0xFF;
- NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
+ if (nla_put_u32(skb, TCA_ROUTE4_TO, id))
+ goto nla_put_failure;
}
if (f->handle & 0x80000000) {
- if ((f->handle >> 16) != 0xFFFF)
- NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
+ if ((f->handle >> 16) != 0xFFFF &&
+ nla_put_u32(skb, TCA_ROUTE4_IIF, f->iif))
+ goto nla_put_failure;
} else {
id = f->id >> 16;
- NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
+ if (nla_put_u32(skb, TCA_ROUTE4_FROM, id))
+ goto nla_put_failure;
}
- if (f->res.classid)
- NLA_PUT_U32(skb, TCA_ROUTE4_CLASSID, f->res.classid);
+ if (f->res.classid &&
+ nla_put_u32(skb, TCA_ROUTE4_CLASSID, f->res.classid))
+ goto nla_put_failure;
if (tcf_exts_dump(skb, &f->exts, &route_ext_map) < 0)
goto nla_put_failure;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index b014279..18ab93e 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -615,18 +615,22 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
if (nest == NULL)
goto nla_put_failure;
- NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
+ if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
+ goto nla_put_failure;
pinfo.dpi = s->dpi;
pinfo.spi = f->spi;
pinfo.protocol = s->protocol;
pinfo.tunnelid = s->tunnelid;
pinfo.tunnelhdr = f->tunnelhdr;
pinfo.pad = 0;
- NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
- if (f->res.classid)
- NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
- if (((f->handle >> 8) & 0xFF) != 16)
- NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
+ if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
+ goto nla_put_failure;
+ if (f->res.classid &&
+ nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
+ goto nla_put_failure;
+ if (((f->handle >> 8) & 0xFF) != 16 &&
+ nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
+ goto nla_put_failure;
if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
goto nla_put_failure;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index dbe1992..fe29420 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -438,10 +438,11 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
if (!fh) {
t->tcm_handle = ~0; /* whatever ... */
- NLA_PUT_U32(skb, TCA_TCINDEX_HASH, p->hash);
- NLA_PUT_U16(skb, TCA_TCINDEX_MASK, p->mask);
- NLA_PUT_U32(skb, TCA_TCINDEX_SHIFT, p->shift);
- NLA_PUT_U32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through);
+ if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) ||
+ nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) ||
+ nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) ||
+ nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
} else {
if (p->perfect) {
@@ -460,8 +461,9 @@ static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
}
}
pr_debug("handle = %d\n", t->tcm_handle);
- if (r->res.class)
- NLA_PUT_U32(skb, TCA_TCINDEX_CLASSID, r->res.classid);
+ if (r->res.class &&
+ nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
+ goto nla_put_failure;
if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0)
goto nla_put_failure;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 939b627..d45373f 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -234,8 +234,7 @@ out:
return -1;
deadloop:
- if (net_ratelimit())
- pr_warning("cls_u32: dead loop\n");
+ net_warn_ratelimited("cls_u32: dead loop\n");
return -1;
}
@@ -733,36 +732,44 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
u32 divisor = ht->divisor + 1;
- NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
+ if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor))
+ goto nla_put_failure;
} else {
- NLA_PUT(skb, TCA_U32_SEL,
- sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
- &n->sel);
+ if (nla_put(skb, TCA_U32_SEL,
+ sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
+ &n->sel))
+ goto nla_put_failure;
if (n->ht_up) {
u32 htid = n->handle & 0xFFFFF000;
- NLA_PUT_U32(skb, TCA_U32_HASH, htid);
+ if (nla_put_u32(skb, TCA_U32_HASH, htid))
+ goto nla_put_failure;
}
- if (n->res.classid)
- NLA_PUT_U32(skb, TCA_U32_CLASSID, n->res.classid);
- if (n->ht_down)
- NLA_PUT_U32(skb, TCA_U32_LINK, n->ht_down->handle);
+ if (n->res.classid &&
+ nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid))
+ goto nla_put_failure;
+ if (n->ht_down &&
+ nla_put_u32(skb, TCA_U32_LINK, n->ht_down->handle))
+ goto nla_put_failure;
#ifdef CONFIG_CLS_U32_MARK
- if (n->mark.val || n->mark.mask)
- NLA_PUT(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark);
+ if ((n->mark.val || n->mark.mask) &&
+ nla_put(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark))
+ goto nla_put_failure;
#endif
if (tcf_exts_dump(skb, &n->exts, &u32_ext_map) < 0)
goto nla_put_failure;
#ifdef CONFIG_NET_CLS_IND
- if (strlen(n->indev))
- NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
+ if (strlen(n->indev) &&
+ nla_put_string(skb, TCA_U32_INDEV, n->indev))
+ goto nla_put_failure;
#endif
#ifdef CONFIG_CLS_U32_PERF
- NLA_PUT(skb, TCA_U32_PCNT,
- sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
- n->pf);
+ if (nla_put(skb, TCA_U32_PCNT,
+ sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64),
+ n->pf))
+ goto nla_put_failure;
#endif
}
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 1363bf1..4790c69 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -585,8 +585,9 @@ static void meta_var_apply_extras(struct meta_value *v,
static int meta_var_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
{
- if (v->val && v->len)
- NLA_PUT(skb, tlv, v->len, (void *) v->val);
+ if (v->val && v->len &&
+ nla_put(skb, tlv, v->len, (void *) v->val))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -636,10 +637,13 @@ static void meta_int_apply_extras(struct meta_value *v,
static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
{
- if (v->len == sizeof(unsigned long))
- NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
- else if (v->len == sizeof(u32))
- NLA_PUT_U32(skb, tlv, v->val);
+ if (v->len == sizeof(unsigned long)) {
+ if (nla_put(skb, tlv, sizeof(unsigned long), &v->val))
+ goto nla_put_failure;
+ } else if (v->len == sizeof(u32)) {
+ if (nla_put_u32(skb, tlv, v->val))
+ goto nla_put_failure;
+ }
return 0;
@@ -831,7 +835,8 @@ static int em_meta_dump(struct sk_buff *skb, struct tcf_ematch *em)
memcpy(&hdr.left, &meta->lvalue.hdr, sizeof(hdr.left));
memcpy(&hdr.right, &meta->rvalue.hdr, sizeof(hdr.right));
- NLA_PUT(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr);
+ if (nla_put(skb, TCA_EM_META_HDR, sizeof(hdr), &hdr))
+ goto nla_put_failure;
ops = meta_type_ops(&meta->lvalue);
if (ops->dump(skb, &meta->lvalue, TCA_EM_META_LVALUE) < 0 ||
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 88d93eb..3a633de 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -441,7 +441,8 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
if (top_start == NULL)
goto nla_put_failure;
- NLA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
+ if (nla_put(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr))
+ goto nla_put_failure;
list_start = nla_nest_start(skb, TCA_EMATCH_TREE_LIST);
if (list_start == NULL)
@@ -457,7 +458,8 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
.flags = em->flags
};
- NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr);
+ if (nla_put(skb, i + 1, sizeof(em_hdr), &em_hdr))
+ goto nla_put_failure;
if (em->ops && em->ops->dump) {
if (em->ops->dump(skb, em) < 0)
@@ -535,9 +537,7 @@ pop_stack:
return res;
stack_overflow:
- if (net_ratelimit())
- pr_warning("tc ematch: local stack overflow,"
- " increase NET_EMATCH_STACK\n");
+ net_warn_ratelimited("tc ematch: local stack overflow, increase NET_EMATCH_STACK\n");
return -1;
}
EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 3d8981f..085ce53 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -426,7 +426,8 @@ static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
nest = nla_nest_start(skb, TCA_STAB);
if (nest == NULL)
goto nla_put_failure;
- NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts);
+ if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
return skb->len;
@@ -1201,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
tcm->tcm_parent = clid;
tcm->tcm_handle = q->handle;
tcm->tcm_info = atomic_read(&q->refcnt);
- NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
+ if (nla_put_string(skb, TCA_KIND, q->ops->id))
+ goto nla_put_failure;
if (q->ops->dump && q->ops->dump(q, skb) < 0)
goto nla_put_failure;
q->qstats.qlen = q->q.qlen;
@@ -1505,7 +1507,8 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
tcm->tcm_parent = q->handle;
tcm->tcm_handle = q->handle;
tcm->tcm_info = 0;
- NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
+ if (nla_put_string(skb, TCA_KIND, q->ops->id))
+ goto nla_put_failure;
if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
goto nla_put_failure;
@@ -1688,12 +1691,10 @@ reclassify:
tp = otp;
if (verd++ >= MAX_REC_LOOP) {
- if (net_ratelimit())
- pr_notice("%s: packet reclassify loop"
- " rule prio %u protocol %02x\n",
- tp->q->ops->id,
- tp->prio & 0xffff,
- ntohs(tp->protocol));
+ net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
+ tp->q->ops->id,
+ tp->prio & 0xffff,
+ ntohs(tp->protocol));
return TC_ACT_SHOT;
}
skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index e25e490..8522a47 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -423,8 +423,6 @@ drop: __maybe_unused
}
return ret;
}
- qdisc_bstats_update(sch, skb);
- bstats_update(&flow->bstats, skb);
/*
* Okay, this may seem weird. We pretend we've dropped the packet if
* it goes via ATM. The reason for this is that the outer qdisc
@@ -472,6 +470,8 @@ static void sch_atm_dequeue(unsigned long data)
if (unlikely(!skb))
break;
+ qdisc_bstats_update(sch, skb);
+ bstats_update(&flow->bstats, skb);
pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
/* remove any LL header somebody else has attached */
skb_pull(skb, skb_network_offset(skb));
@@ -601,7 +601,8 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
if (nest == NULL)
goto nla_put_failure;
- NLA_PUT(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr);
+ if (nla_put(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr))
+ goto nla_put_failure;
if (flow->vcc) {
struct sockaddr_atmpvc pvc;
int state;
@@ -610,15 +611,19 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
pvc.sap_addr.vpi = flow->vcc->vpi;
pvc.sap_addr.vci = flow->vcc->vci;
- NLA_PUT(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc);
+ if (nla_put(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc))
+ goto nla_put_failure;
state = ATM_VF2VS(flow->vcc->flags);
- NLA_PUT_U32(skb, TCA_ATM_STATE, state);
+ if (nla_put_u32(skb, TCA_ATM_STATE, state))
+ goto nla_put_failure;
+ }
+ if (flow->excess) {
+ if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->classid))
+ goto nla_put_failure;
+ } else {
+ if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
+ goto nla_put_failure;
}
- if (flow->excess)
- NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
- else
- NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
-
nla_nest_end(skb, nest);
return skb->len;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 24d94c0..6aabd77 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1425,7 +1425,8 @@ static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
{
unsigned char *b = skb_tail_pointer(skb);
- NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
+ if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
@@ -1450,7 +1451,8 @@ static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
opt.minidle = (u32)(-cl->minidle);
opt.offtime = cl->offtime;
opt.change = ~0;
- NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
@@ -1468,7 +1470,8 @@ static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
opt.priority = cl->priority + 1;
opt.cpriority = cl->cpriority + 1;
opt.weight = cl->weight;
- NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
@@ -1485,7 +1488,8 @@ static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
opt.priority2 = cl->priority2 + 1;
opt.pad = 0;
opt.penalty = cl->penalty;
- NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
@@ -1502,7 +1506,8 @@ static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
opt.split = cl->split ? cl->split->common.classid : 0;
opt.defmap = cl->defmap;
opt.defchange = ~0;
- NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
+ goto nla_put_failure;
}
return skb->len;
@@ -1521,7 +1526,8 @@ static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
opt.police = cl->police;
opt.__res1 = 0;
opt.__res2 = 0;
- NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
+ goto nla_put_failure;
}
return skb->len;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 7e267d7..cc37dd5 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -332,15 +332,13 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
q->stats.pdrop++;
- sch->qstats.drops++;
- kfree_skb(skb);
- return NET_XMIT_DROP;
+ return qdisc_drop(skb, sch);
- congestion_drop:
+congestion_drop:
qdisc_drop(skb, sch);
return NET_XMIT_CN;
- other_drop:
+other_drop:
if (ret & __NET_XMIT_BYPASS)
sch->qstats.drops++;
kfree_skb(skb);
@@ -515,8 +513,9 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
if (opts == NULL)
goto nla_put_failure;
- NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
- NLA_PUT_U32(skb, TCA_CHOKE_MAX_P, q->parms.max_P);
+ if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
+ nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
+ goto nla_put_failure;
return nla_nest_end(skb, opts);
nla_put_failure:
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
new file mode 100644
index 0000000..2f9ab17
--- /dev/null
+++ b/net/sched/sch_codel.c
@@ -0,0 +1,276 @@
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm
+ *
+ * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
+ * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
+ *
+ * Implemented on linux by :
+ * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
+ * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The names of the authors may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/prefetch.h>
+#include <net/pkt_sched.h>
+#include <net/codel.h>
+
+
+#define DEFAULT_CODEL_LIMIT 1000
+
+struct codel_sched_data {
+ struct codel_params params;
+ struct codel_vars vars;
+ struct codel_stats stats;
+ u32 drop_overlimit;
+};
+
+/* This is the specific function called from codel_dequeue()
+ * to dequeue a packet from queue. Note: backlog is handled in
+ * codel, we dont need to reduce it here.
+ */
+static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
+{
+ struct sk_buff *skb = __skb_dequeue(&sch->q);
+
+ prefetch(&skb->end); /* we'll need skb_shinfo() */
+ return skb;
+}
+
+static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
+{
+ struct codel_sched_data *q = qdisc_priv(sch);
+ struct sk_buff *skb;
+
+ skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue);
+
+ /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+ * or HTB crashes. Defer it for next round.
+ */
+ if (q->stats.drop_count && sch->q.qlen) {
+ qdisc_tree_decrease_qlen(sch, q->stats.drop_count);
+ q->stats.drop_count = 0;
+ }
+ if (skb)
+ qdisc_bstats_update(sch, skb);
+ return skb;
+}
+
+static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct codel_sched_data *q;
+
+ if (likely(qdisc_qlen(sch) < sch->limit)) {
+ codel_set_enqueue_time(skb);
+ return qdisc_enqueue_tail(skb, sch);
+ }
+ q = qdisc_priv(sch);
+ q->drop_overlimit++;
+ return qdisc_drop(skb, sch);
+}
+
+static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
+ [TCA_CODEL_TARGET] = { .type = NLA_U32 },
+ [TCA_CODEL_LIMIT] = { .type = NLA_U32 },
+ [TCA_CODEL_INTERVAL] = { .type = NLA_U32 },
+ [TCA_CODEL_ECN] = { .type = NLA_U32 },
+};
+
+static int codel_change(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct codel_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_CODEL_MAX + 1];
+ unsigned int qlen;
+ int err;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_CODEL_MAX, opt, codel_policy);
+ if (err < 0)
+ return err;
+
+ sch_tree_lock(sch);
+
+ if (tb[TCA_CODEL_TARGET]) {
+ u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
+
+ q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
+ }
+
+ if (tb[TCA_CODEL_INTERVAL]) {
+ u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
+
+ q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+ }
+
+ if (tb[TCA_CODEL_LIMIT])
+ sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
+
+ if (tb[TCA_CODEL_ECN])
+ q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
+
+ qlen = sch->q.qlen;
+ while (sch->q.qlen > sch->limit) {
+ struct sk_buff *skb = __skb_dequeue(&sch->q);
+
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ qdisc_drop(skb, sch);
+ }
+ qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+
+ sch_tree_unlock(sch);
+ return 0;
+}
+
+static int codel_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct codel_sched_data *q = qdisc_priv(sch);
+
+ sch->limit = DEFAULT_CODEL_LIMIT;
+
+ codel_params_init(&q->params);
+ codel_vars_init(&q->vars);
+ codel_stats_init(&q->stats);
+
+ if (opt) {
+ int err = codel_change(sch, opt);
+
+ if (err)
+ return err;
+ }
+
+ if (sch->limit >= 1)
+ sch->flags |= TCQ_F_CAN_BYPASS;
+ else
+ sch->flags &= ~TCQ_F_CAN_BYPASS;
+
+ return 0;
+}
+
+static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct codel_sched_data *q = qdisc_priv(sch);
+ struct nlattr *opts;
+
+ opts = nla_nest_start(skb, TCA_OPTIONS);
+ if (opts == NULL)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_CODEL_TARGET,
+ codel_time_to_us(q->params.target)) ||
+ nla_put_u32(skb, TCA_CODEL_LIMIT,
+ sch->limit) ||
+ nla_put_u32(skb, TCA_CODEL_INTERVAL,
+ codel_time_to_us(q->params.interval)) ||
+ nla_put_u32(skb, TCA_CODEL_ECN,
+ q->params.ecn))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, opts);
+
+nla_put_failure:
+ nla_nest_cancel(skb, opts);
+ return -1;
+}
+
+static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+ const struct codel_sched_data *q = qdisc_priv(sch);
+ struct tc_codel_xstats st = {
+ .maxpacket = q->stats.maxpacket,
+ .count = q->vars.count,
+ .lastcount = q->vars.lastcount,
+ .drop_overlimit = q->drop_overlimit,
+ .ldelay = codel_time_to_us(q->vars.ldelay),
+ .dropping = q->vars.dropping,
+ .ecn_mark = q->stats.ecn_mark,
+ };
+
+ if (q->vars.dropping) {
+ codel_tdiff_t delta = q->vars.drop_next - codel_get_time();
+
+ if (delta >= 0)
+ st.drop_next = codel_time_to_us(delta);
+ else
+ st.drop_next = -codel_time_to_us(-delta);
+ }
+
+ return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static void codel_reset(struct Qdisc *sch)
+{
+ struct codel_sched_data *q = qdisc_priv(sch);
+
+ qdisc_reset_queue(sch);
+ codel_vars_init(&q->vars);
+}
+
+static struct Qdisc_ops codel_qdisc_ops __read_mostly = {
+ .id = "codel",
+ .priv_size = sizeof(struct codel_sched_data),
+
+ .enqueue = codel_qdisc_enqueue,
+ .dequeue = codel_qdisc_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .init = codel_init,
+ .reset = codel_reset,
+ .change = codel_change,
+ .dump = codel_dump,
+ .dump_stats = codel_dump_stats,
+ .owner = THIS_MODULE,
+};
+
+static int __init codel_module_init(void)
+{
+ return register_qdisc(&codel_qdisc_ops);
+}
+
+static void __exit codel_module_exit(void)
+{
+ unregister_qdisc(&codel_qdisc_ops);
+}
+
+module_init(codel_module_init)
+module_exit(codel_module_exit)
+
+MODULE_DESCRIPTION("Controlled Delay queue discipline");
+MODULE_AUTHOR("Dave Taht");
+MODULE_AUTHOR("Eric Dumazet");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 6b7fe4a..9ce0b4f 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -260,7 +260,8 @@ static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
- NLA_PUT_U32(skb, TCA_DRR_QUANTUM, cl->quantum);
+ if (nla_put_u32(skb, TCA_DRR_QUANTUM, cl->quantum))
+ goto nla_put_failure;
return nla_nest_end(skb, nest);
nla_put_failure:
@@ -375,8 +376,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
cl->deficit = cl->quantum;
}
- bstats_update(&cl->bstats, skb);
-
sch->q.qlen++;
return err;
}
@@ -402,6 +401,8 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
skb = qdisc_dequeue_peeked(cl->qdisc);
if (cl->qdisc->q.qlen == 0)
list_del(&cl->alist);
+
+ bstats_update(&cl->bstats, skb);
qdisc_bstats_update(sch, skb);
sch->q.qlen--;
return skb;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 2c79020..3886365 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -265,8 +265,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
drop:
- kfree_skb(skb);
- sch->qstats.drops++;
+ qdisc_drop(skb, sch);
return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
}
@@ -429,8 +428,9 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
opts = nla_nest_start(skb, TCA_OPTIONS);
if (opts == NULL)
goto nla_put_failure;
- NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]);
- NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]);
+ if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]) ||
+ nla_put_u8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]))
+ goto nla_put_failure;
return nla_nest_end(skb, opts);
@@ -447,13 +447,16 @@ static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
opts = nla_nest_start(skb, TCA_OPTIONS);
if (opts == NULL)
goto nla_put_failure;
- NLA_PUT_U16(skb, TCA_DSMARK_INDICES, p->indices);
+ if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices))
+ goto nla_put_failure;
- if (p->default_index != NO_DEFAULT_INDEX)
- NLA_PUT_U16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index);
+ if (p->default_index != NO_DEFAULT_INDEX &&
+ nla_put_u16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index))
+ goto nla_put_failure;
- if (p->set_tc_index)
- NLA_PUT_FLAG(skb, TCA_DSMARK_SET_TC_INDEX);
+ if (p->set_tc_index &&
+ nla_put_flag(skb, TCA_DSMARK_SET_TC_INDEX))
+ goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 66effe2..e15a9eb 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -85,7 +85,8 @@ static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct tc_fifo_qopt opt = { .limit = sch->limit };
- NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
new file mode 100644
index 0000000..9fc1c62
--- /dev/null
+++ b/net/sched/sch_fq_codel.c
@@ -0,0 +1,626 @@
+/*
+ * Fair Queue CoDel discipline
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/jhash.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/flow_keys.h>
+#include <net/codel.h>
+
+/* Fair Queue CoDel.
+ *
+ * Principles :
+ * Packets are classified (internal classifier or external) on flows.
+ * This is a Stochastic model (as we use a hash, several flows
+ * might be hashed on same slot)
+ * Each flow has a CoDel managed queue.
+ * Flows are linked onto two (Round Robin) lists,
+ * so that new flows have priority on old ones.
+ *
+ * For a given flow, packets are not reordered (CoDel uses a FIFO)
+ * head drops only.
+ * ECN capability is on by default.
+ * Low memory footprint (64 bytes per flow)
+ */
+
+struct fq_codel_flow {
+ struct sk_buff *head;
+ struct sk_buff *tail;
+ struct list_head flowchain;
+ int deficit;
+ u32 dropped; /* number of drops (or ECN marks) on this flow */
+ struct codel_vars cvars;
+}; /* please try to keep this structure <= 64 bytes */
+
+struct fq_codel_sched_data {
+ struct tcf_proto *filter_list; /* optional external classifier */
+ struct fq_codel_flow *flows; /* Flows table [flows_cnt] */
+ u32 *backlogs; /* backlog table [flows_cnt] */
+ u32 flows_cnt; /* number of flows */
+ u32 perturbation; /* hash perturbation */
+ u32 quantum; /* psched_mtu(qdisc_dev(sch)); */
+ struct codel_params cparams;
+ struct codel_stats cstats;
+ u32 drop_overlimit;
+ u32 new_flow_count;
+
+ struct list_head new_flows; /* list of new flows */
+ struct list_head old_flows; /* list of old flows */
+};
+
+static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
+ const struct sk_buff *skb)
+{
+ struct flow_keys keys;
+ unsigned int hash;
+
+ skb_flow_dissect(skb, &keys);
+ hash = jhash_3words((__force u32)keys.dst,
+ (__force u32)keys.src ^ keys.ip_proto,
+ (__force u32)keys.ports, q->perturbation);
+ return ((u64)hash * q->flows_cnt) >> 32;
+}
+
+static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
+ int *qerr)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ struct tcf_result res;
+ int result;
+
+ if (TC_H_MAJ(skb->priority) == sch->handle &&
+ TC_H_MIN(skb->priority) > 0 &&
+ TC_H_MIN(skb->priority) <= q->flows_cnt)
+ return TC_H_MIN(skb->priority);
+
+ if (!q->filter_list)
+ return fq_codel_hash(q, skb) + 1;
+
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+ result = tc_classify(skb, q->filter_list, &res);
+ if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+ switch (result) {
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ case TC_ACT_SHOT:
+ return 0;
+ }
+#endif
+ if (TC_H_MIN(res.classid) <= q->flows_cnt)
+ return TC_H_MIN(res.classid);
+ }
+ return 0;
+}
+
+/* helper functions : might be changed when/if skb use a standard list_head */
+
+/* remove one skb from head of slot queue */
+static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow)
+{
+ struct sk_buff *skb = flow->head;
+
+ flow->head = skb->next;
+ skb->next = NULL;
+ return skb;
+}
+
+/* add skb to flow queue (tail add) */
+static inline void flow_queue_add(struct fq_codel_flow *flow,
+ struct sk_buff *skb)
+{
+ if (flow->head == NULL)
+ flow->head = skb;
+ else
+ flow->tail->next = skb;
+ flow->tail = skb;
+ skb->next = NULL;
+}
+
+static unsigned int fq_codel_drop(struct Qdisc *sch)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ struct sk_buff *skb;
+ unsigned int maxbacklog = 0, idx = 0, i, len;
+ struct fq_codel_flow *flow;
+
+ /* Queue is full! Find the fat flow and drop packet from it.
+ * This might sound expensive, but with 1024 flows, we scan
+ * 4KB of memory, and we dont need to handle a complex tree
+ * in fast path (packet queue/enqueue) with many cache misses.
+ */
+ for (i = 0; i < q->flows_cnt; i++) {
+ if (q->backlogs[i] > maxbacklog) {
+ maxbacklog = q->backlogs[i];
+ idx = i;
+ }
+ }
+ flow = &q->flows[idx];
+ skb = dequeue_head(flow);
+ len = qdisc_pkt_len(skb);
+ q->backlogs[idx] -= len;
+ kfree_skb(skb);
+ sch->q.qlen--;
+ sch->qstats.drops++;
+ sch->qstats.backlog -= len;
+ flow->dropped++;
+ return idx;
+}
+
+static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ unsigned int idx;
+ struct fq_codel_flow *flow;
+ int uninitialized_var(ret);
+
+ idx = fq_codel_classify(skb, sch, &ret);
+ if (idx == 0) {
+ if (ret & __NET_XMIT_BYPASS)
+ sch->qstats.drops++;
+ kfree_skb(skb);
+ return ret;
+ }
+ idx--;
+
+ codel_set_enqueue_time(skb);
+ flow = &q->flows[idx];
+ flow_queue_add(flow, skb);
+ q->backlogs[idx] += qdisc_pkt_len(skb);
+ sch->qstats.backlog += qdisc_pkt_len(skb);
+
+ if (list_empty(&flow->flowchain)) {
+ list_add_tail(&flow->flowchain, &q->new_flows);
+ codel_vars_init(&flow->cvars);
+ q->new_flow_count++;
+ flow->deficit = q->quantum;
+ flow->dropped = 0;
+ }
+ if (++sch->q.qlen < sch->limit)
+ return NET_XMIT_SUCCESS;
+
+ q->drop_overlimit++;
+ /* Return Congestion Notification only if we dropped a packet
+ * from this flow.
+ */
+ if (fq_codel_drop(sch) == idx)
+ return NET_XMIT_CN;
+
+ /* As we dropped a packet, better let upper stack know this */
+ qdisc_tree_decrease_qlen(sch, 1);
+ return NET_XMIT_SUCCESS;
+}
+
+/* This is the specific function called from codel_dequeue()
+ * to dequeue a packet from queue. Note: backlog is handled in
+ * codel, we dont need to reduce it here.
+ */
+static struct sk_buff *dequeue(struct codel_vars *vars, struct Qdisc *sch)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ struct fq_codel_flow *flow;
+ struct sk_buff *skb = NULL;
+
+ flow = container_of(vars, struct fq_codel_flow, cvars);
+ if (flow->head) {
+ skb = dequeue_head(flow);
+ q->backlogs[flow - q->flows] -= qdisc_pkt_len(skb);
+ sch->q.qlen--;
+ }
+ return skb;
+}
+
+static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ struct sk_buff *skb;
+ struct fq_codel_flow *flow;
+ struct list_head *head;
+ u32 prev_drop_count, prev_ecn_mark;
+
+begin:
+ head = &q->new_flows;
+ if (list_empty(head)) {
+ head = &q->old_flows;
+ if (list_empty(head))
+ return NULL;
+ }
+ flow = list_first_entry(head, struct fq_codel_flow, flowchain);
+
+ if (flow->deficit <= 0) {
+ flow->deficit += q->quantum;
+ list_move_tail(&flow->flowchain, &q->old_flows);
+ goto begin;
+ }
+
+ prev_drop_count = q->cstats.drop_count;
+ prev_ecn_mark = q->cstats.ecn_mark;
+
+ skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats,
+ dequeue);
+
+ flow->dropped += q->cstats.drop_count - prev_drop_count;
+ flow->dropped += q->cstats.ecn_mark - prev_ecn_mark;
+
+ if (!skb) {
+ /* force a pass through old_flows to prevent starvation */
+ if ((head == &q->new_flows) && !list_empty(&q->old_flows))
+ list_move_tail(&flow->flowchain, &q->old_flows);
+ else
+ list_del_init(&flow->flowchain);
+ goto begin;
+ }
+ qdisc_bstats_update(sch, skb);
+ flow->deficit -= qdisc_pkt_len(skb);
+ /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+ * or HTB crashes. Defer it for next round.
+ */
+ if (q->cstats.drop_count && sch->q.qlen) {
+ qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+ q->cstats.drop_count = 0;
+ }
+ return skb;
+}
+
+static void fq_codel_reset(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ while ((skb = fq_codel_dequeue(sch)) != NULL)
+ kfree_skb(skb);
+}
+
+static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
+ [TCA_FQ_CODEL_TARGET] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_LIMIT] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_INTERVAL] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_ECN] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 },
+};
+
+static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_FQ_CODEL_MAX + 1];
+ int err;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_FQ_CODEL_MAX, opt, fq_codel_policy);
+ if (err < 0)
+ return err;
+ if (tb[TCA_FQ_CODEL_FLOWS]) {
+ if (q->flows)
+ return -EINVAL;
+ q->flows_cnt = nla_get_u32(tb[TCA_FQ_CODEL_FLOWS]);
+ if (!q->flows_cnt ||
+ q->flows_cnt > 65536)
+ return -EINVAL;
+ }
+ sch_tree_lock(sch);
+
+ if (tb[TCA_FQ_CODEL_TARGET]) {
+ u64 target = nla_get_u32(tb[TCA_FQ_CODEL_TARGET]);
+
+ q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
+ }
+
+ if (tb[TCA_FQ_CODEL_INTERVAL]) {
+ u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
+
+ q->cparams.interval = (interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+ }
+
+ if (tb[TCA_FQ_CODEL_LIMIT])
+ sch->limit = nla_get_u32(tb[TCA_FQ_CODEL_LIMIT]);
+
+ if (tb[TCA_FQ_CODEL_ECN])
+ q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]);
+
+ if (tb[TCA_FQ_CODEL_QUANTUM])
+ q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
+
+ while (sch->q.qlen > sch->limit) {
+ struct sk_buff *skb = fq_codel_dequeue(sch);
+
+ kfree_skb(skb);
+ q->cstats.drop_count++;
+ }
+ qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+ q->cstats.drop_count = 0;
+
+ sch_tree_unlock(sch);
+ return 0;
+}
+
+static void *fq_codel_zalloc(size_t sz)
+{
+ void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN);
+
+ if (!ptr)
+ ptr = vzalloc(sz);
+ return ptr;
+}
+
+static void fq_codel_free(void *addr)
+{
+ if (addr) {
+ if (is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ kfree(addr);
+ }
+}
+
+static void fq_codel_destroy(struct Qdisc *sch)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+
+ tcf_destroy_chain(&q->filter_list);
+ fq_codel_free(q->backlogs);
+ fq_codel_free(q->flows);
+}
+
+static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ int i;
+
+ sch->limit = 10*1024;
+ q->flows_cnt = 1024;
+ q->quantum = psched_mtu(qdisc_dev(sch));
+ q->perturbation = net_random();
+ INIT_LIST_HEAD(&q->new_flows);
+ INIT_LIST_HEAD(&q->old_flows);
+ codel_params_init(&q->cparams);
+ codel_stats_init(&q->cstats);
+ q->cparams.ecn = true;
+
+ if (opt) {
+ int err = fq_codel_change(sch, opt);
+ if (err)
+ return err;
+ }
+
+ if (!q->flows) {
+ q->flows = fq_codel_zalloc(q->flows_cnt *
+ sizeof(struct fq_codel_flow));
+ if (!q->flows)
+ return -ENOMEM;
+ q->backlogs = fq_codel_zalloc(q->flows_cnt * sizeof(u32));
+ if (!q->backlogs) {
+ fq_codel_free(q->flows);
+ return -ENOMEM;
+ }
+ for (i = 0; i < q->flows_cnt; i++) {
+ struct fq_codel_flow *flow = q->flows + i;
+
+ INIT_LIST_HEAD(&flow->flowchain);
+ }
+ }
+ if (sch->limit >= 1)
+ sch->flags |= TCQ_F_CAN_BYPASS;
+ else
+ sch->flags &= ~TCQ_F_CAN_BYPASS;
+ return 0;
+}
+
+static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ struct nlattr *opts;
+
+ opts = nla_nest_start(skb, TCA_OPTIONS);
+ if (opts == NULL)
+ goto nla_put_failure;
+
+ if (nla_put_u32(skb, TCA_FQ_CODEL_TARGET,
+ codel_time_to_us(q->cparams.target)) ||
+ nla_put_u32(skb, TCA_FQ_CODEL_LIMIT,
+ sch->limit) ||
+ nla_put_u32(skb, TCA_FQ_CODEL_INTERVAL,
+ codel_time_to_us(q->cparams.interval)) ||
+ nla_put_u32(skb, TCA_FQ_CODEL_ECN,
+ q->cparams.ecn) ||
+ nla_put_u32(skb, TCA_FQ_CODEL_QUANTUM,
+ q->quantum) ||
+ nla_put_u32(skb, TCA_FQ_CODEL_FLOWS,
+ q->flows_cnt))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, opts);
+ return skb->len;
+
+nla_put_failure:
+ return -1;
+}
+
+static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ struct tc_fq_codel_xstats st = {
+ .type = TCA_FQ_CODEL_XSTATS_QDISC,
+ };
+ struct list_head *pos;
+
+ st.qdisc_stats.maxpacket = q->cstats.maxpacket;
+ st.qdisc_stats.drop_overlimit = q->drop_overlimit;
+ st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
+ st.qdisc_stats.new_flow_count = q->new_flow_count;
+
+ list_for_each(pos, &q->new_flows)
+ st.qdisc_stats.new_flows_len++;
+
+ list_for_each(pos, &q->old_flows)
+ st.qdisc_stats.old_flows_len++;
+
+ return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static struct Qdisc *fq_codel_leaf(struct Qdisc *sch, unsigned long arg)
+{
+ return NULL;
+}
+
+static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid)
+{
+ return 0;
+}
+
+static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent,
+ u32 classid)
+{
+ /* we cannot bypass queue discipline anymore */
+ sch->flags &= ~TCQ_F_CAN_BYPASS;
+ return 0;
+}
+
+static void fq_codel_put(struct Qdisc *q, unsigned long cl)
+{
+}
+
+static struct tcf_proto **fq_codel_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+
+ if (cl)
+ return NULL;
+ return &q->filter_list;
+}
+
+static int fq_codel_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ tcm->tcm_handle |= TC_H_MIN(cl);
+ return 0;
+}
+
+static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ struct gnet_dump *d)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ u32 idx = cl - 1;
+ struct gnet_stats_queue qs = { 0 };
+ struct tc_fq_codel_xstats xstats;
+
+ if (idx < q->flows_cnt) {
+ const struct fq_codel_flow *flow = &q->flows[idx];
+ const struct sk_buff *skb = flow->head;
+
+ memset(&xstats, 0, sizeof(xstats));
+ xstats.type = TCA_FQ_CODEL_XSTATS_CLASS;
+ xstats.class_stats.deficit = flow->deficit;
+ xstats.class_stats.ldelay =
+ codel_time_to_us(flow->cvars.ldelay);
+ xstats.class_stats.count = flow->cvars.count;
+ xstats.class_stats.lastcount = flow->cvars.lastcount;
+ xstats.class_stats.dropping = flow->cvars.dropping;
+ if (flow->cvars.dropping) {
+ codel_tdiff_t delta = flow->cvars.drop_next -
+ codel_get_time();
+
+ xstats.class_stats.drop_next = (delta >= 0) ?
+ codel_time_to_us(delta) :
+ -codel_time_to_us(-delta);
+ }
+ while (skb) {
+ qs.qlen++;
+ skb = skb->next;
+ }
+ qs.backlog = q->backlogs[idx];
+ qs.drops = flow->dropped;
+ }
+ if (gnet_stats_copy_queue(d, &qs) < 0)
+ return -1;
+ if (idx < q->flows_cnt)
+ return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
+ return 0;
+}
+
+static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ unsigned int i;
+
+ if (arg->stop)
+ return;
+
+ for (i = 0; i < q->flows_cnt; i++) {
+ if (list_empty(&q->flows[i].flowchain) ||
+ arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ if (arg->fn(sch, i + 1, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+static const struct Qdisc_class_ops fq_codel_class_ops = {
+ .leaf = fq_codel_leaf,
+ .get = fq_codel_get,
+ .put = fq_codel_put,
+ .tcf_chain = fq_codel_find_tcf,
+ .bind_tcf = fq_codel_bind,
+ .unbind_tcf = fq_codel_put,
+ .dump = fq_codel_dump_class,
+ .dump_stats = fq_codel_dump_class_stats,
+ .walk = fq_codel_walk,
+};
+
+static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
+ .cl_ops = &fq_codel_class_ops,
+ .id = "fq_codel",
+ .priv_size = sizeof(struct fq_codel_sched_data),
+ .enqueue = fq_codel_enqueue,
+ .dequeue = fq_codel_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .drop = fq_codel_drop,
+ .init = fq_codel_init,
+ .reset = fq_codel_reset,
+ .destroy = fq_codel_destroy,
+ .change = fq_codel_change,
+ .dump = fq_codel_dump,
+ .dump_stats = fq_codel_dump_stats,
+ .owner = THIS_MODULE,
+};
+
+static int __init fq_codel_module_init(void)
+{
+ return register_qdisc(&fq_codel_qdisc_ops);
+}
+
+static void __exit fq_codel_module_exit(void)
+{
+ unregister_qdisc(&fq_codel_qdisc_ops);
+}
+
+module_init(fq_codel_module_init)
+module_exit(fq_codel_module_exit)
+MODULE_AUTHOR("Eric Dumazet");
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 67fc573..511323e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -86,9 +86,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
* deadloop is detected. Return OK to try the next skb.
*/
kfree_skb(skb);
- if (net_ratelimit())
- pr_warning("Dead loop on netdevice %s, fix it urgently!\n",
- dev_queue->dev->name);
+ net_warn_ratelimited("Dead loop on netdevice %s, fix it urgently!\n",
+ dev_queue->dev->name);
ret = qdisc_qlen(q);
} else {
/*
@@ -136,9 +135,9 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
ret = handle_dev_cpu_collision(skb, txq, q);
} else {
/* Driver returned NETDEV_TX_BUSY - requeue skb */
- if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
- pr_warning("BUG %s code %d qlen %d\n",
- dev->name, ret, q->q.qlen);
+ if (unlikely(ret != NETDEV_TX_BUSY))
+ net_warn_ratelimited("BUG %s code %d qlen %d\n",
+ dev->name, ret, q->q.qlen);
ret = dev_requeue_skb(skb, q);
}
@@ -512,7 +511,8 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
- NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 0b15236..e901583 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -255,10 +255,8 @@ static struct sk_buff *gred_dequeue(struct Qdisc *sch)
u16 dp = tc_index_to_dp(skb);
if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
- if (net_ratelimit())
- pr_warning("GRED: Unable to relocate VQ 0x%x "
- "after dequeue, screwing up "
- "backlog.\n", tc_index_to_dp(skb));
+ net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x after dequeue, screwing up backlog\n",
+ tc_index_to_dp(skb));
} else {
q->backlog -= qdisc_pkt_len(skb);
@@ -287,10 +285,8 @@ static unsigned int gred_drop(struct Qdisc *sch)
u16 dp = tc_index_to_dp(skb);
if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
- if (net_ratelimit())
- pr_warning("GRED: Unable to relocate VQ 0x%x "
- "while dropping, screwing up "
- "backlog.\n", tc_index_to_dp(skb));
+ net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x while dropping, screwing up backlog\n",
+ tc_index_to_dp(skb));
} else {
q->backlog -= len;
q->stats.other++;
@@ -521,14 +517,16 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
opts = nla_nest_start(skb, TCA_OPTIONS);
if (opts == NULL)
goto nla_put_failure;
- NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
+ if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt))
+ goto nla_put_failure;
for (i = 0; i < MAX_DPs; i++) {
struct gred_sched_data *q = table->tab[i];
max_p[i] = q ? q->parms.max_P : 0;
}
- NLA_PUT(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p);
+ if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p))
+ goto nla_put_failure;
parms = nla_nest_start(skb, TCA_GRED_PARMS);
if (parms == NULL)
@@ -565,11 +563,8 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.packets = q->packetsin;
opt.bytesin = q->bytesin;
- if (gred_wred_mode(table)) {
- q->vars.qidlestart =
- table->tab[table->def]->vars.qidlestart;
- q->vars.qavg = table->tab[table->def]->vars.qavg;
- }
+ if (gred_wred_mode(table))
+ gred_load_wred_set(table, q);
opt.qave = red_calc_qavg(&q->parms, &q->vars, q->vars.qavg);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 9bdca2e..6c2ec45 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1305,7 +1305,8 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
tsc.m1 = sm2m(sc->sm1);
tsc.d = dx2d(sc->dx);
tsc.m2 = sm2m(sc->sm2);
- NLA_PUT(skb, attr, sizeof(tsc), &tsc);
+ if (nla_put(skb, attr, sizeof(tsc), &tsc))
+ goto nla_put_failure;
return skb->len;
@@ -1573,7 +1574,8 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
}
qopt.defcls = q->defcls;
- NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
+ if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
@@ -1607,7 +1609,6 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (cl->qdisc->q.qlen == 1)
set_active(cl, qdisc_pkt_len(skb));
- bstats_update(&cl->bstats, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
@@ -1655,6 +1656,7 @@ hfsc_dequeue(struct Qdisc *sch)
return NULL;
}
+ bstats_update(&cl->bstats, skb);
update_vf(cl, qdisc_pkt_len(skb), cur_time);
if (realtime)
cl->cl_cumul += qdisc_pkt_len(skb);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 29b942c..9d75b77 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -558,9 +558,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
__skb_queue_tail(&q->direct_queue, skb);
q->direct_pkts++;
} else {
- kfree_skb(skb);
- sch->qstats.drops++;
- return NET_XMIT_DROP;
+ return qdisc_drop(skb, sch);
}
#ifdef CONFIG_NET_CLS_ACT
} else if (!cl) {
@@ -576,7 +574,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
return ret;
} else {
- bstats_update(&cl->bstats, skb);
htb_activate(q, cl);
}
@@ -837,6 +834,7 @@ next:
} while (cl != start);
if (likely(skb != NULL)) {
+ bstats_update(&cl->bstats, skb);
cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
if (cl->un.leaf.deficit[level] < 0) {
cl->un.leaf.deficit[level] += cl->quantum;
@@ -1051,7 +1049,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
- NLA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
+ if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
spin_unlock_bh(root_lock);
@@ -1090,7 +1089,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
opt.quantum = cl->quantum;
opt.prio = cl->prio;
opt.level = cl->level;
- NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
spin_unlock_bh(root_lock);
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 28de430..d1831ca 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -247,7 +247,8 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.offset[i] = dev->tc_to_txq[i].offset;
}
- NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+ goto nla_put_failure;
return skb->len;
nla_put_failure:
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 49131d7..2a2b096 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -284,7 +284,8 @@ static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.bands = q->bands;
opt.max_bands = q->max_bands;
- NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+ goto nla_put_failure;
return skb->len;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5da548f..a2a95aa 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -26,6 +26,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
#define VERSION "1.3"
@@ -78,6 +79,7 @@ struct netem_sched_data {
psched_tdiff_t jitter;
u32 loss;
+ u32 ecn;
u32 limit;
u32 counter;
u32 gap;
@@ -374,9 +376,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
++count;
/* Drop packet? */
- if (loss_event(q))
- --count;
-
+ if (loss_event(q)) {
+ if (q->ecn && INET_ECN_set_ce(skb))
+ sch->qstats.drops++; /* mark packet */
+ else
+ --count;
+ }
if (count == 0) {
sch->qstats.drops++;
kfree_skb(skb);
@@ -408,10 +413,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
(skb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_help(skb))) {
- sch->qstats.drops++;
- return NET_XMIT_DROP;
- }
+ skb_checksum_help(skb)))
+ return qdisc_drop(skb, sch);
skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
}
@@ -706,6 +709,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
[TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
[TCA_NETEM_LOSS] = { .type = NLA_NESTED },
+ [TCA_NETEM_ECN] = { .type = NLA_U32 },
};
static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -776,6 +780,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_NETEM_RATE])
get_rate(sch, tb[TCA_NETEM_RATE]);
+ if (tb[TCA_NETEM_ECN])
+ q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
+
q->loss_model = CLG_RANDOM;
if (tb[TCA_NETEM_LOSS])
ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
@@ -834,7 +841,8 @@ static int dump_loss_model(const struct netem_sched_data *q,
.p23 = q->clg.a5,
};
- NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
+ if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
+ goto nla_put_failure;
break;
}
case CLG_GILB_ELL: {
@@ -845,7 +853,8 @@ static int dump_loss_model(const struct netem_sched_data *q,
.k1 = q->clg.a4,
};
- NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
+ if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
+ goto nla_put_failure;
break;
}
}
@@ -874,26 +883,34 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
qopt.loss = q->loss;
qopt.gap = q->gap;
qopt.duplicate = q->duplicate;
- NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
+ if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
+ goto nla_put_failure;
cor.delay_corr = q->delay_cor.rho;
cor.loss_corr = q->loss_cor.rho;
cor.dup_corr = q->dup_cor.rho;
- NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
+ if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
+ goto nla_put_failure;
reorder.probability = q->reorder;
reorder.correlation = q->reorder_cor.rho;
- NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
+ if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
+ goto nla_put_failure;
corrupt.probability = q->corrupt;
corrupt.correlation = q->corrupt_cor.rho;
- NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
+ if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
+ goto nla_put_failure;
rate.rate = q->rate;
rate.packet_overhead = q->packet_overhead;
rate.cell_size = q->cell_size;
rate.cell_overhead = q->cell_overhead;
- NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
+ if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
+ goto nla_put_failure;
+
+ if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
+ goto nla_put_failure;
if (dump_loss_model(q, skb) != 0)
goto nla_put_failure;
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
new file mode 100644
index 0000000..89f8fcf
--- /dev/null
+++ b/net/sched/sch_plug.c
@@ -0,0 +1,233 @@
+/*
+ * sch_plug.c Queue traffic until an explicit release command
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * There are two ways to use this qdisc:
+ * 1. A simple "instantaneous" plug/unplug operation, by issuing an alternating
+ * sequence of TCQ_PLUG_BUFFER & TCQ_PLUG_RELEASE_INDEFINITE commands.
+ *
+ * 2. For network output buffering (a.k.a output commit) functionality.
+ * Output commit property is commonly used by applications using checkpoint
+ * based fault-tolerance to ensure that the checkpoint from which a system
+ * is being restored is consistent w.r.t outside world.
+ *
+ * Consider for e.g. Remus - a Virtual Machine checkpointing system,
+ * wherein a VM is checkpointed, say every 50ms. The checkpoint is replicated
+ * asynchronously to the backup host, while the VM continues executing the
+ * next epoch speculatively.
+ *
+ * The following is a typical sequence of output buffer operations:
+ * 1.At epoch i, start_buffer(i)
+ * 2. At end of epoch i (i.e. after 50ms):
+ * 2.1 Stop VM and take checkpoint(i).
+ * 2.2 start_buffer(i+1) and Resume VM
+ * 3. While speculatively executing epoch(i+1), asynchronously replicate
+ * checkpoint(i) to backup host.
+ * 4. When checkpoint_ack(i) is received from backup, release_buffer(i)
+ * Thus, this Qdisc would receive the following sequence of commands:
+ * TCQ_PLUG_BUFFER (epoch i)
+ * .. TCQ_PLUG_BUFFER (epoch i+1)
+ * ....TCQ_PLUG_RELEASE_ONE (epoch i)
+ * ......TCQ_PLUG_BUFFER (epoch i+2)
+ * ........
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/pkt_sched.h>
+
+/*
+ * State of the queue, when used for network output buffering:
+ *
+ * plug(i+1) plug(i) head
+ * ------------------+--------------------+---------------->
+ * | |
+ * | |
+ * pkts_current_epoch| pkts_last_epoch |pkts_to_release
+ * ----------------->|<--------+--------->|+--------------->
+ * v v
+ *
+ */
+
+struct plug_sched_data {
+ /* If true, the dequeue function releases all packets
+ * from head to end of the queue. The queue turns into
+ * a pass-through queue for newly arriving packets.
+ */
+ bool unplug_indefinite;
+
+ /* Queue Limit in bytes */
+ u32 limit;
+
+ /* Number of packets (output) from the current speculatively
+ * executing epoch.
+ */
+ u32 pkts_current_epoch;
+
+ /* Number of packets corresponding to the recently finished
+ * epoch. These will be released when we receive a
+ * TCQ_PLUG_RELEASE_ONE command. This command is typically
+ * issued after committing a checkpoint at the target.
+ */
+ u32 pkts_last_epoch;
+
+ /*
+ * Number of packets from the head of the queue, that can
+ * be released (committed checkpoint).
+ */
+ u32 pkts_to_release;
+};
+
+static int plug_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct plug_sched_data *q = qdisc_priv(sch);
+
+ if (likely(sch->qstats.backlog + skb->len <= q->limit)) {
+ if (!q->unplug_indefinite)
+ q->pkts_current_epoch++;
+ return qdisc_enqueue_tail(skb, sch);
+ }
+
+ return qdisc_reshape_fail(skb, sch);
+}
+
+static struct sk_buff *plug_dequeue(struct Qdisc *sch)
+{
+ struct plug_sched_data *q = qdisc_priv(sch);
+
+ if (qdisc_is_throttled(sch))
+ return NULL;
+
+ if (!q->unplug_indefinite) {
+ if (!q->pkts_to_release) {
+ /* No more packets to dequeue. Block the queue
+ * and wait for the next release command.
+ */
+ qdisc_throttled(sch);
+ return NULL;
+ }
+ q->pkts_to_release--;
+ }
+
+ return qdisc_dequeue_head(sch);
+}
+
+static int plug_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct plug_sched_data *q = qdisc_priv(sch);
+
+ q->pkts_current_epoch = 0;
+ q->pkts_last_epoch = 0;
+ q->pkts_to_release = 0;
+ q->unplug_indefinite = false;
+
+ if (opt == NULL) {
+ /* We will set a default limit of 100 pkts (~150kB)
+ * in case tx_queue_len is not available. The
+ * default value is completely arbitrary.
+ */
+ u32 pkt_limit = qdisc_dev(sch)->tx_queue_len ? : 100;
+ q->limit = pkt_limit * psched_mtu(qdisc_dev(sch));
+ } else {
+ struct tc_plug_qopt *ctl = nla_data(opt);
+
+ if (nla_len(opt) < sizeof(*ctl))
+ return -EINVAL;
+
+ q->limit = ctl->limit;
+ }
+
+ qdisc_throttled(sch);
+ return 0;
+}
+
+/* Receives 4 types of messages:
+ * TCQ_PLUG_BUFFER: Inset a plug into the queue and
+ * buffer any incoming packets
+ * TCQ_PLUG_RELEASE_ONE: Dequeue packets from queue head
+ * to beginning of the next plug.
+ * TCQ_PLUG_RELEASE_INDEFINITE: Dequeue all packets from queue.
+ * Stop buffering packets until the next TCQ_PLUG_BUFFER
+ * command is received (just act as a pass-thru queue).
+ * TCQ_PLUG_LIMIT: Increase/decrease queue size
+ */
+static int plug_change(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct plug_sched_data *q = qdisc_priv(sch);
+ struct tc_plug_qopt *msg;
+
+ if (opt == NULL)
+ return -EINVAL;
+
+ msg = nla_data(opt);
+ if (nla_len(opt) < sizeof(*msg))
+ return -EINVAL;
+
+ switch (msg->action) {
+ case TCQ_PLUG_BUFFER:
+ /* Save size of the current buffer */
+ q->pkts_last_epoch = q->pkts_current_epoch;
+ q->pkts_current_epoch = 0;
+ if (q->unplug_indefinite)
+ qdisc_throttled(sch);
+ q->unplug_indefinite = false;
+ break;
+ case TCQ_PLUG_RELEASE_ONE:
+ /* Add packets from the last complete buffer to the
+ * packets to be released set.
+ */
+ q->pkts_to_release += q->pkts_last_epoch;
+ q->pkts_last_epoch = 0;
+ qdisc_unthrottled(sch);
+ netif_schedule_queue(sch->dev_queue);
+ break;
+ case TCQ_PLUG_RELEASE_INDEFINITE:
+ q->unplug_indefinite = true;
+ q->pkts_to_release = 0;
+ q->pkts_last_epoch = 0;
+ q->pkts_current_epoch = 0;
+ qdisc_unthrottled(sch);
+ netif_schedule_queue(sch->dev_queue);
+ break;
+ case TCQ_PLUG_LIMIT:
+ /* Limit is supplied in bytes */
+ q->limit = msg->limit;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
+ .id = "plug",
+ .priv_size = sizeof(struct plug_sched_data),
+ .enqueue = plug_enqueue,
+ .dequeue = plug_dequeue,
+ .peek = qdisc_peek_head,
+ .init = plug_init,
+ .change = plug_change,
+ .owner = THIS_MODULE,
+};
+
+static int __init plug_module_init(void)
+{
+ return register_qdisc(&plug_qdisc_ops);
+}
+
+static void __exit plug_module_exit(void)
+{
+ unregister_qdisc(&plug_qdisc_ops);
+}
+module_init(plug_module_init)
+module_exit(plug_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index b5d56a2..79359b6 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -247,7 +247,8 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.bands = q->bands;
memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
- NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+ goto nla_put_failure;
return skb->len;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index e68cb44..9af01f3 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -429,8 +429,9 @@ static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
- NLA_PUT_U32(skb, TCA_QFQ_WEIGHT, ONE_FP/cl->inv_w);
- NLA_PUT_U32(skb, TCA_QFQ_LMAX, cl->lmax);
+ if (nla_put_u32(skb, TCA_QFQ_WEIGHT, ONE_FP/cl->inv_w) ||
+ nla_put_u32(skb, TCA_QFQ_LMAX, cl->lmax))
+ goto nla_put_failure;
return nla_nest_end(skb, nest);
nla_put_failure:
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index a5cc301..633e32d 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -272,8 +272,9 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
opts = nla_nest_start(skb, TCA_OPTIONS);
if (opts == NULL)
goto nla_put_failure;
- NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
- NLA_PUT_U32(skb, TCA_RED_MAX_P, q->parms.max_P);
+ if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
+ nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
+ goto nla_put_failure;
return nla_nest_end(skb, opts);
nla_put_failure:
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index d7eea99..74305c8 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -570,7 +570,8 @@ static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)
sch->qstats.backlog = q->qdisc->qstats.backlog;
opts = nla_nest_start(skb, TCA_OPTIONS);
- NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_SFB_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
return nla_nest_end(skb, opts);
nla_put_failure:
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 02a21ab..d3a1bc2 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -812,7 +812,8 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
opt.flags = q->flags;
- NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+ goto nla_put_failure;
return skb->len;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index b8e1563..4b056c15 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -359,7 +359,8 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
memset(&opt.peakrate, 0, sizeof(opt.peakrate));
opt.mtu = q->mtu;
opt.buffer = q->buffer;
- NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
+ if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
+ goto nla_put_failure;
nla_nest_end(skb, nest);
return skb->len;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 4532659..ca0c296 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -88,9 +88,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;
}
- kfree_skb(skb);
- sch->qstats.drops++;
- return NET_XMIT_DROP;
+ return qdisc_drop(skb, sch);
}
static struct sk_buff *
OpenPOWER on IntegriCloud