From 7316ae88c43d47f6503f4c29b4973204e33c3411 Mon Sep 17 00:00:00 2001 From: Tom Goff Date: Fri, 19 Mar 2010 15:40:13 +0000 Subject: net_sched: make traffic control network namespace aware Mostly minor changes to add a net argument to various functions and remove initial network namespace checks. Make /proc/net/psched per network namespace. Signed-off-by: Tom Goff Signed-off-by: David S. Miller --- net/sched/act_api.c | 45 +++++++++++---------- net/sched/cls_api.c | 30 ++++++-------- net/sched/sch_api.c | 112 +++++++++++++++++++++++++++++++++------------------- 3 files changed, 107 insertions(+), 80 deletions(-) (limited to 'net/sched') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 64f5e32..7a558da 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -667,7 +667,8 @@ nlmsg_failure: } static int -act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) +act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, + struct tc_action *a, int event) { struct sk_buff *skb; @@ -679,7 +680,7 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) return -EINVAL; } - return rtnl_unicast(skb, &init_net, pid); + return rtnl_unicast(skb, net, pid); } static struct tc_action * @@ -749,7 +750,8 @@ static struct tc_action *create_a(int i) return act; } -static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid) +static int tca_action_flush(struct net *net, struct nlattr *nla, + struct nlmsghdr *n, u32 pid) { struct sk_buff *skb; unsigned char *b; @@ -808,7 +810,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid) nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (err > 0) return 0; @@ -825,7 +827,8 @@ noflush_out: } static int -tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event) +tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, + u32 pid, int event) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; @@ -837,7 +840,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event) if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { if (tb[1] != NULL) - return tca_action_flush(tb[1], n, pid); + return tca_action_flush(net, tb[1], n, pid); else return -EINVAL; } @@ -858,7 +861,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event) } if (event == RTM_GETACTION) - ret = act_get_notify(pid, n, head, event); + ret = act_get_notify(net, pid, n, head, event); else { /* delete */ struct sk_buff *skb; @@ -877,7 +880,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event) /* now do the delete */ tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, + ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (ret > 0) return 0; @@ -888,8 +891,8 @@ err: return ret; } -static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, - u16 flags) +static int tcf_add_notify(struct net *net, struct tc_action *a, + u32 pid, u32 seq, int event, u16 flags) { struct tcamsg *t; struct nlmsghdr *nlh; @@ -922,7 +925,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, nlh->nlmsg_len = skb_tail_pointer(skb) - b; NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); + err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); if (err > 0) err = 0; return err; @@ -935,7 +938,8 @@ nlmsg_failure: static int -tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr) +tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, + u32 pid, int ovr) { int ret = 0; struct tc_action *act; @@ -953,7 +957,7 @@ tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr) /* dump then free all the actions after update; inserted policy * stays intact * */ - ret = tcf_add_notify(act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); + ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); for (a = act; a; a = act) { act = a->next; kfree(a); @@ -969,9 +973,6 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 pid = skb ? NETLINK_CB(skb).pid : 0; int ret = 0, ovr = 0; - if (!net_eq(net, &init_net)) - return -EINVAL; - ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); if (ret < 0) return ret; @@ -994,15 +995,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (n->nlmsg_flags&NLM_F_REPLACE) ovr = 1; replay: - ret = tcf_action_add(tca[TCA_ACT_TAB], n, pid, ovr); + ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); if (ret == -EAGAIN) goto replay; break; case RTM_DELACTION: - ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_DELACTION); + ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, + pid, RTM_DELACTION); break; case RTM_GETACTION: - ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_GETACTION); + ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, + pid, RTM_GETACTION); break; default: BUG(); @@ -1042,7 +1045,6 @@ find_dump_kind(const struct nlmsghdr *n) static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = sock_net(skb->sk); struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -1052,9 +1054,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); struct nlattr *kind = find_dump_kind(cb->nlh); - if (!net_eq(net, &init_net)) - return 0; - if (kind == NULL) { printk("tc_dump_action: action bad kind\n"); return 0; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3725d8f..4a795d9 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -98,8 +98,9 @@ out: } EXPORT_SYMBOL(unregister_tcf_proto_ops); -static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, - struct tcf_proto *tp, unsigned long fh, int event); +static int tfilter_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct tcf_proto *tp, + unsigned long fh, int event); /* Select new prio value from the range, managed by kernel. */ @@ -137,9 +138,6 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) int err; int tp_created = 0; - if (!net_eq(net, &init_net)) - return -EINVAL; - replay: t = NLMSG_DATA(n); protocol = TC_H_MIN(t->tcm_info); @@ -158,7 +156,7 @@ replay: /* Find head of filter chain. */ /* Find link */ - dev = __dev_get_by_index(&init_net, t->tcm_ifindex); + dev = __dev_get_by_index(net, t->tcm_ifindex); if (dev == NULL) return -ENODEV; @@ -282,7 +280,7 @@ replay: *back = tp->next; spin_unlock_bh(root_lock); - tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER); + tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); tcf_destroy(tp); err = 0; goto errout; @@ -305,10 +303,10 @@ replay: case RTM_DELTFILTER: err = tp->ops->delete(tp, fh); if (err == 0) - tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER); + tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); goto errout; case RTM_GETTFILTER: - err = tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); + err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); goto errout; default: err = -EINVAL; @@ -324,7 +322,7 @@ replay: *back = tp; spin_unlock_bh(root_lock); } - tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); + tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); } else { if (tp_created) tcf_destroy(tp); @@ -370,8 +368,9 @@ nla_put_failure: return -1; } -static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, - struct tcf_proto *tp, unsigned long fh, int event) +static int tfilter_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct tcf_proto *tp, + unsigned long fh, int event) { struct sk_buff *skb; u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; @@ -385,7 +384,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } @@ -418,12 +417,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) const struct Qdisc_class_ops *cops; struct tcf_dump_args arg; - if (!net_eq(net, &init_net)) - return 0; - if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return skb->len; - if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return skb->len; if (!tcm->tcm_parent) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6cd4910..6d6fe16 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -34,10 +34,12 @@ #include #include -static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, +static int qdisc_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new); -static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, - struct Qdisc *q, unsigned long cl, int event); +static int tclass_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct Qdisc *q, + unsigned long cl, int event); /* @@ -638,11 +640,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); -static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid, +static void notify_and_destroy(struct net *net, struct sk_buff *skb, + struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new) { if (new || old) - qdisc_notify(skb, n, clid, old, new); + qdisc_notify(net, skb, n, clid, old, new); if (old) qdisc_destroy(old); @@ -662,6 +665,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, struct Qdisc *new, struct Qdisc *old) { struct Qdisc *q = old; + struct net *net = dev_net(dev); int err = 0; if (parent == NULL) { @@ -698,12 +702,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, } if (!ingress) { - notify_and_destroy(skb, n, classid, dev->qdisc, new); + notify_and_destroy(net, skb, n, classid, + dev->qdisc, new); if (new && !new->ops->attach) atomic_inc(&new->refcnt); dev->qdisc = new ? : &noop_qdisc; } else { - notify_and_destroy(skb, n, classid, old, new); + notify_and_destroy(net, skb, n, classid, old, new); } if (dev->flags & IFF_UP) @@ -721,7 +726,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, err = -ENOENT; } if (!err) - notify_and_destroy(skb, n, classid, old, new); + notify_and_destroy(net, skb, n, classid, old, new); } return err; } @@ -947,10 +952,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *p = NULL; int err; - if (!net_eq(net, &init_net)) - return -EINVAL; - - if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return -ENODEV; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -990,7 +992,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) return err; } else { - qdisc_notify(skb, n, clid, NULL, q); + qdisc_notify(net, skb, n, clid, NULL, q); } return 0; } @@ -1009,16 +1011,13 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) struct Qdisc *q, *p; int err; - if (!net_eq(net, &init_net)) - return -EINVAL; - replay: /* Reinit, just in case something touches this. */ tcm = NLMSG_DATA(n); clid = tcm->tcm_parent; q = p = NULL; - if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return -ENODEV; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1105,7 +1104,7 @@ replay: return -EINVAL; err = qdisc_change(q, tca); if (err == 0) - qdisc_notify(skb, n, clid, NULL, q); + qdisc_notify(net, skb, n, clid, NULL, q); return err; create_n_graft: @@ -1195,8 +1194,9 @@ nla_put_failure: return -1; } -static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, - u32 clid, struct Qdisc *old, struct Qdisc *new) +static int qdisc_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, u32 clid, + struct Qdisc *old, struct Qdisc *new) { struct sk_buff *skb; u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; @@ -1215,7 +1215,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, } if (skb->len) - return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); err_out: kfree_skb(skb); @@ -1274,15 +1274,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) int s_idx, s_q_idx; struct net_device *dev; - if (!net_eq(net, &init_net)) - return 0; - s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; rcu_read_lock(); idx = 0; - for_each_netdev_rcu(&init_net, dev) { + for_each_netdev_rcu(net, dev) { struct netdev_queue *dev_queue; if (idx < s_idx) @@ -1334,10 +1331,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) u32 qid = TC_H_MAJ(clid); int err; - if (!net_eq(net, &init_net)) - return -EINVAL; - - if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return -ENODEV; err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); @@ -1418,10 +1412,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (cops->delete) err = cops->delete(q, cl); if (err == 0) - tclass_notify(skb, n, q, cl, RTM_DELTCLASS); + tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS); goto out; case RTM_GETTCLASS: - err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS); + err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); goto out; default: err = -EINVAL; @@ -1434,7 +1428,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (cops->change) err = cops->change(q, clid, pid, tca, &new_cl); if (err == 0) - tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS); + tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); out: if (cl) @@ -1486,8 +1480,9 @@ nla_put_failure: return -1; } -static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, - struct Qdisc *q, unsigned long cl, int event) +static int tclass_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct Qdisc *q, + unsigned long cl, int event) { struct sk_buff *skb; u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; @@ -1501,7 +1496,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct qdisc_dump_args @@ -1576,12 +1571,9 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; int t, s_t; - if (!net_eq(net, &init_net)) - return 0; - if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) return 0; - if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) return 0; s_t = cb->args[0]; @@ -1701,15 +1693,55 @@ static const struct file_operations psched_fops = { .llseek = seq_lseek, .release = single_release, }; + +static int __net_init psched_net_init(struct net *net) +{ + struct proc_dir_entry *e; + + e = proc_net_fops_create(net, "psched", 0, &psched_fops); + if (e == NULL) + return -ENOMEM; + + return 0; +} + +static void __net_exit psched_net_exit(struct net *net) +{ + proc_net_remove(net, "psched"); + + return; +} +#else +static int __net_init psched_net_init(struct net *net) +{ + return 0; +} + +static void __net_exit psched_net_exit(struct net *net) +{ +} #endif +static struct pernet_operations psched_net_ops = { + .init = psched_net_init, + .exit = psched_net_exit, +}; + static int __init pktsched_init(void) { + int err; + + err = register_pernet_subsys(&psched_net_ops); + if (err) { + printk(KERN_ERR "pktsched_init: " + "cannot initialize per netns operations\n"); + return err; + } + register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); register_qdisc(&pfifo_head_drop_qdisc_ops); register_qdisc(&mq_qdisc_ops); - proc_net_fops_create(&init_net, "psched", 0, &psched_fops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); -- cgit v1.1 From 8e039d84b323c4503c4d56863faa47c783660826 Mon Sep 17 00:00:00 2001 From: Ben Blum Date: Tue, 23 Mar 2010 05:24:03 +0000 Subject: cgroups: net_cls as module Allows the net_cls cgroup subsystem to be compiled as a module This patch modifies net/sched/cls_cgroup.c to allow the net_cls subsystem to be optionally compiled as a module instead of builtin. The cgroup_subsys struct is moved around a bit to allow the subsys_id to be either declared as a compile-time constant by the cgroup_subsys.h include in cgroup.h, or, if it's a module, initialized within the struct by cgroup_load_subsys. Signed-off-by: Ben Blum Acked-by: Li Zefan Cc: Paul Menage Cc: "David S. Miller" Cc: KAMEZAWA Hiroyuki Cc: Lai Jiangshan Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/sched/Kconfig | 5 ++++- net/sched/cls_cgroup.c | 36 +++++++++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 10 deletions(-) (limited to 'net/sched') diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 21f9c76..2f691fb 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -328,13 +328,16 @@ config NET_CLS_FLOW module will be called cls_flow. config NET_CLS_CGROUP - bool "Control Group Classifier" + tristate "Control Group Classifier" select NET_CLS depends on CGROUPS ---help--- Say Y here if you want to classify packets based on the control cgroup of their process. + To compile this code as a module, choose M here: the + module will be called cls_cgroup. + config NET_EMATCH bool "Extended Matches" select NET_CLS diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index e4877ca..7f27d2c 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -24,6 +24,25 @@ struct cgroup_cls_state u32 classid; }; +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, + struct cgroup *cgrp); +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp); +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp); + +struct cgroup_subsys net_cls_subsys = { + .name = "net_cls", + .create = cgrp_create, + .destroy = cgrp_destroy, + .populate = cgrp_populate, +#ifdef CONFIG_NET_CLS_CGROUP + .subsys_id = net_cls_subsys_id, +#else +#define net_cls_subsys_id net_cls_subsys.subsys_id +#endif + .module = THIS_MODULE, +}; + + static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp) { return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id), @@ -79,14 +98,6 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); } -struct cgroup_subsys net_cls_subsys = { - .name = "net_cls", - .create = cgrp_create, - .destroy = cgrp_destroy, - .populate = cgrp_populate, - .subsys_id = net_cls_subsys_id, -}; - struct cls_cgroup_head { u32 handle; @@ -277,12 +288,19 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { static int __init init_cgroup_cls(void) { - return register_tcf_proto_ops(&cls_cgroup_ops); + int ret = register_tcf_proto_ops(&cls_cgroup_ops); + if (ret) + return ret; + ret = cgroup_load_subsys(&net_cls_subsys); + if (ret) + unregister_tcf_proto_ops(&cls_cgroup_ops); + return ret; } static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); + cgroup_unload_subsys(&net_cls_subsys); } module_init(init_cgroup_cls); -- cgit v1.1 From b138338056fc423c61a583d45f8aa64cfad87131 Mon Sep 17 00:00:00 2001 From: Frans Pop Date: Wed, 24 Mar 2010 07:57:28 +0000 Subject: net: remove trailing space in messages Signed-off-by: Frans Pop Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sched') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 07372f6..1ef7687 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -772,10 +772,10 @@ static int __init init_u32(void) printk(" Performance counters on\n"); #endif #ifdef CONFIG_NET_CLS_IND - printk(" input device check on \n"); + printk(" input device check on\n"); #endif #ifdef CONFIG_NET_CLS_ACT - printk(" Actions configured \n"); + printk(" Actions configured\n"); #endif return register_tcf_proto_ops(&cls_u32_ops); } -- cgit v1.1 From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Mar 2010 17:04:11 +0900 Subject: include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo Guess-its-ok-by: Christoph Lameter Cc: Ingo Molnar Cc: Lee Schermerhorn --- net/sched/act_api.c | 1 + net/sched/act_ipt.c | 1 + net/sched/act_mirred.c | 1 + net/sched/act_pedit.c | 1 + net/sched/act_police.c | 1 + net/sched/act_simple.c | 1 + net/sched/cls_api.c | 1 + net/sched/cls_basic.c | 1 + net/sched/cls_cgroup.c | 1 + net/sched/cls_flow.c | 1 + net/sched/cls_fw.c | 1 + net/sched/cls_route.c | 1 + net/sched/cls_tcindex.c | 1 + net/sched/cls_u32.c | 1 + net/sched/em_meta.c | 1 + net/sched/em_nbyte.c | 1 + net/sched/em_text.c | 1 + net/sched/ematch.c | 1 + net/sched/sch_api.c | 1 + net/sched/sch_atm.c | 1 + net/sched/sch_cbq.c | 1 + net/sched/sch_drr.c | 1 + net/sched/sch_dsmark.c | 1 + net/sched/sch_fifo.c | 1 + net/sched/sch_generic.c | 1 + net/sched/sch_gred.c | 1 + net/sched/sch_htb.c | 1 + net/sched/sch_mq.c | 1 + net/sched/sch_multiq.c | 1 + net/sched/sch_netem.c | 1 + net/sched/sch_prio.c | 1 + net/sched/sch_sfq.c | 1 + net/sched/sch_teql.c | 1 + 33 files changed, 33 insertions(+) (limited to 'net/sched') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 64f5e32..d8e0171 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 082c520..da27a17 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index d329170..c0466820 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 6b0359a..b7dcfed 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 723964c..654f73d 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 8daa1eb..622ca80 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3725d8f..f082b27 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 4e2bda8..efd4f95 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 7f27d2c..2211803 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index e054c62..6ed61b1 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 6d6e875..93b0a7b 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -19,6 +19,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index dd872d5..694dcd8 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index e806f23..20ef330 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 07372f6..17c5dfc 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -31,6 +31,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 24dce8b..3bcac8a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -58,6 +58,7 @@ * only available if that subsystem is enabled in the kernel. */ +#include #include #include #include diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index 370a1b2..1a4176a 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -9,6 +9,7 @@ * Authors: Thomas Graf */ +#include #include #include #include diff --git a/net/sched/em_text.c b/net/sched/em_text.c index 853c5ea..7632532 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -9,6 +9,7 @@ * Authors: Thomas Graf */ +#include #include #include #include diff --git a/net/sched/ematch.c b/net/sched/ematch.c index aab5940..e782bde 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -82,6 +82,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6cd4910..145268c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ab82f14..fcbb86a 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -3,6 +3,7 @@ /* Written 1998-2000 by Werner Almesberger, EPFL ICA */ #include +#include #include #include #include diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 3846d65..28c01ef 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index a65604f..b74046a 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index d303daa..63d41f8 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 4b0a6cc..5948baf 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5173c1e..ff4dd53 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -24,6 +24,7 @@ #include #include #include +#include #include /* Main transmission queue. */ diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 40408d5..51dcc2a 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -18,6 +18,7 @@ * For all the glorious comments look at include/net/red.h */ +#include #include #include #include diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 508cf5f..0b52b8de 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index d1dea3d..b2aba3f 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 7db2c88..c50876c 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index d8b10e0..4714ff1 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -14,6 +14,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 93285ce..81672e0 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -12,6 +12,7 @@ */ #include +#include #include #include #include diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index cb21380..c5a9ac5 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index db69637..3415b6c 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include -- cgit v1.1 From 7e5ab157813993356f021757d0b0dcbdca7c55a1 Mon Sep 17 00:00:00 2001 From: Tom Goff Date: Tue, 30 Mar 2010 19:44:56 -0700 Subject: net_sched: minor netns related cleanup These changes were suggested by Alexey Dobriyan : - psched_show() does not use any private data so just pass NULL to psched_open() - remove unnecessary return statement Signed-off-by: Tom Goff Signed-off-by: David S. Miller --- net/sched/sch_api.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6d6fe16..c65866d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1683,7 +1683,7 @@ static int psched_show(struct seq_file *seq, void *v) static int psched_open(struct inode *inode, struct file *file) { - return single_open(file, psched_show, PDE(inode)->data); + return single_open(file, psched_show, NULL); } static const struct file_operations psched_fops = { @@ -1708,8 +1708,6 @@ static int __net_init psched_net_init(struct net *net) static void __net_exit psched_net_exit(struct net *net) { proc_net_remove(net, "psched"); - - return; } #else static int __net_init psched_net_init(struct net *net) -- cgit v1.1 From 5d944c640b4ae5f37c537acf491c2f0eb89fa0d6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 31 Mar 2010 07:06:04 +0000 Subject: gen_estimator: deadlock fix One of my test machine got a deadlock during "tc" sessions, adding/deleting classes & filters, using traffic estimators. After some analysis, I believe we have a potential use after free case in est_timer() : spin_lock(e->stats_lock); << HERE >> read_lock(&est_lock); if (e->bstats == NULL) << TEST >> goto skip; Test is done a bit late, because after estimator is killed, and before rcu grace period elapsed, we might already have freed/reuse memory where e->stats_locks points to (some qdisc->q.lock) A possible fix is to respect a rcu grace period at Qdisc dismantle time. On 64bit, sizeof(struct Qdisc) is exactly 192 bytes. Adding 16 bytes to it (for struct rcu_head) is a problem because it might change performance, given QDISC_ALIGNTO is 32 bytes. This is why I also change QDISC_ALIGNTO to 64 bytes, to satisfy most current alignment requirements. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'net/sched') diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5173c1e..1751325 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -528,7 +528,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, unsigned int size; int err = -ENOBUFS; - /* ensure that the Qdisc and the private data are 32-byte aligned */ + /* ensure that the Qdisc and the private data are 64-byte aligned */ size = QDISC_ALIGN(sizeof(*sch)); size += ops->priv_size + (QDISC_ALIGNTO - 1); @@ -590,6 +590,13 @@ void qdisc_reset(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_reset); +static void qdisc_rcu_free(struct rcu_head *head) +{ + struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head); + + kfree((char *) qdisc - qdisc->padded); +} + void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops = qdisc->ops; @@ -613,7 +620,11 @@ void qdisc_destroy(struct Qdisc *qdisc) dev_put(qdisc_dev(qdisc)); kfree_skb(qdisc->gso_skb); - kfree((char *) qdisc - qdisc->padded); + /* + * gen_estimator est_timer() might access qdisc->q.lock, + * wait a RCU grace period before freeing qdisc. + */ + call_rcu(&qdisc->rcu_head, qdisc_rcu_free); } EXPORT_SYMBOL(qdisc_destroy); -- cgit v1.1