diff options
Diffstat (limited to 'net/sched/police.c')
-rw-r--r-- | net/sched/police.c | 612 |
1 files changed, 612 insertions, 0 deletions
diff --git a/net/sched/police.c b/net/sched/police.c new file mode 100644 index 0000000..c03545f --- /dev/null +++ b/net/sched/police.c @@ -0,0 +1,612 @@ +/* + * net/sched/police.c Input police filter. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * J Hadi Salim (action changes) + */ + +#include <asm/uaccess.h> +#include <asm/system.h> +#include <linux/bitops.h> +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <linux/in.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/module.h> +#include <linux/rtnetlink.h> +#include <linux/init.h> +#include <net/sock.h> +#include <net/act_api.h> + +#define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log]) +#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log]) +#define PRIV(a) ((struct tcf_police *) (a)->priv) + +/* use generic hash table */ +#define MY_TAB_SIZE 16 +#define MY_TAB_MASK 15 +static u32 idx_gen; +static struct tcf_police *tcf_police_ht[MY_TAB_SIZE]; +/* Policer hash table lock */ +static DEFINE_RWLOCK(police_lock); + +/* Each policer is serialized by its individual spinlock */ + +static __inline__ unsigned tcf_police_hash(u32 index) +{ + return index&0xF; +} + +static __inline__ struct tcf_police * tcf_police_lookup(u32 index) +{ + struct tcf_police *p; + + read_lock(&police_lock); + for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) { + if (p->index == index) + break; + } + read_unlock(&police_lock); + return p; +} + +#ifdef CONFIG_NET_CLS_ACT +static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, + int type, struct tc_action *a) +{ + struct tcf_police *p; + int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; + struct rtattr *r; + + read_lock(&police_lock); + + s_i = cb->args[0]; + + for (i = 0; i < MY_TAB_SIZE; i++) { + p = tcf_police_ht[tcf_police_hash(i)]; + + for (; p; p = p->next) { + index++; + if (index < s_i) + continue; + a->priv = p; + a->order = index; + r = (struct rtattr*) skb->tail; + RTA_PUT(skb, a->order, 0, NULL); + if (type == RTM_DELACTION) + err = tcf_action_dump_1(skb, a, 0, 1); + else + err = tcf_action_dump_1(skb, a, 0, 0); + if (err < 0) { + index--; + skb_trim(skb, (u8*)r - skb->data); + goto done; + } + r->rta_len = skb->tail - (u8*)r; + n_i++; + } + } +done: + read_unlock(&police_lock); + if (n_i) + cb->args[0] += n_i; + return n_i; + +rtattr_failure: + skb_trim(skb, (u8*)r - skb->data); + goto done; +} + +static inline int +tcf_hash_search(struct tc_action *a, u32 index) +{ + struct tcf_police *p = tcf_police_lookup(index); + + if (p != NULL) { + a->priv = p; + return 1; + } else { + return 0; + } +} +#endif + +static inline u32 tcf_police_new_index(void) +{ + do { + if (++idx_gen == 0) + idx_gen = 1; + } while (tcf_police_lookup(idx_gen)); + + return idx_gen; +} + +void tcf_police_destroy(struct tcf_police *p) +{ + unsigned h = tcf_police_hash(p->index); + struct tcf_police **p1p; + + for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) { + if (*p1p == p) { + write_lock_bh(&police_lock); + *p1p = p->next; + write_unlock_bh(&police_lock); +#ifdef CONFIG_NET_ESTIMATOR + gen_kill_estimator(&p->bstats, &p->rate_est); +#endif + if (p->R_tab) + qdisc_put_rtab(p->R_tab); + if (p->P_tab) + qdisc_put_rtab(p->P_tab); + kfree(p); + return; + } + } + BUG_TRAP(0); +} + +#ifdef CONFIG_NET_CLS_ACT +static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) +{ + unsigned h; + int ret = 0, err; + struct rtattr *tb[TCA_POLICE_MAX]; + struct tc_police *parm; + struct tcf_police *p; + struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; + + if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) + return -EINVAL; + + if (tb[TCA_POLICE_TBF-1] == NULL || + RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm)) + return -EINVAL; + parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); + + if (tb[TCA_POLICE_RESULT-1] != NULL && + RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) + return -EINVAL; + if (tb[TCA_POLICE_RESULT-1] != NULL && + RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) + return -EINVAL; + + if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { + a->priv = p; + if (bind) { + p->bindcnt += 1; + p->refcnt += 1; + } + if (ovr) + goto override; + return ret; + } + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return -ENOMEM; + memset(p, 0, sizeof(*p)); + + ret = ACT_P_CREATED; + p->refcnt = 1; + spin_lock_init(&p->lock); + p->stats_lock = &p->lock; + if (bind) + p->bindcnt = 1; +override: + if (parm->rate.rate) { + err = -ENOMEM; + R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); + if (R_tab == NULL) + goto failure; + if (parm->peakrate.rate) { + P_tab = qdisc_get_rtab(&parm->peakrate, + tb[TCA_POLICE_PEAKRATE-1]); + if (p->P_tab == NULL) { + qdisc_put_rtab(R_tab); + goto failure; + } + } + } + /* No failure allowed after this point */ + spin_lock_bh(&p->lock); + if (R_tab != NULL) { + qdisc_put_rtab(p->R_tab); + p->R_tab = R_tab; + } + if (P_tab != NULL) { + qdisc_put_rtab(p->P_tab); + p->P_tab = P_tab; + } + + if (tb[TCA_POLICE_RESULT-1]) + p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + p->toks = p->burst = parm->burst; + p->mtu = parm->mtu; + if (p->mtu == 0) { + p->mtu = ~0; + if (p->R_tab) + p->mtu = 255<<p->R_tab->rate.cell_log; + } + if (p->P_tab) + p->ptoks = L2T_P(p, p->mtu); + p->action = parm->action; + +#ifdef CONFIG_NET_ESTIMATOR + if (tb[TCA_POLICE_AVRATE-1]) + p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); + if (est) + gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); +#endif + + spin_unlock_bh(&p->lock); + if (ret != ACT_P_CREATED) + return ret; + + PSCHED_GET_TIME(p->t_c); + p->index = parm->index ? : tcf_police_new_index(); + h = tcf_police_hash(p->index); + write_lock_bh(&police_lock); + p->next = tcf_police_ht[h]; + tcf_police_ht[h] = p; + write_unlock_bh(&police_lock); + + a->priv = p; + return ret; + +failure: + if (ret == ACT_P_CREATED) + kfree(p); + return err; +} + +static int tcf_act_police_cleanup(struct tc_action *a, int bind) +{ + struct tcf_police *p = PRIV(a); + + if (p != NULL) + return tcf_police_release(p, bind); + return 0; +} + +static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a) +{ + psched_time_t now; + struct sk_buff *skb = *pskb; + struct tcf_police *p = PRIV(a); + long toks; + long ptoks = 0; + + spin_lock(&p->lock); + + p->bstats.bytes += skb->len; + p->bstats.packets++; + +#ifdef CONFIG_NET_ESTIMATOR + if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { + p->qstats.overlimits++; + spin_unlock(&p->lock); + return p->action; + } +#endif + + if (skb->len <= p->mtu) { + if (p->R_tab == NULL) { + spin_unlock(&p->lock); + return p->result; + } + + PSCHED_GET_TIME(now); + + toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); + + if (p->P_tab) { + ptoks = toks + p->ptoks; + if (ptoks > (long)L2T_P(p, p->mtu)) + ptoks = (long)L2T_P(p, p->mtu); + ptoks -= L2T_P(p, skb->len); + } + toks += p->toks; + if (toks > (long)p->burst) + toks = p->burst; + toks -= L2T(p, skb->len); + + if ((toks|ptoks) >= 0) { + p->t_c = now; + p->toks = toks; + p->ptoks = ptoks; + spin_unlock(&p->lock); + return p->result; + } + } + + p->qstats.overlimits++; + spin_unlock(&p->lock); + return p->action; +} + +static int +tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +{ + unsigned char *b = skb->tail; + struct tc_police opt; + struct tcf_police *p = PRIV(a); + + opt.index = p->index; + opt.action = p->action; + opt.mtu = p->mtu; + opt.burst = p->burst; + opt.refcnt = p->refcnt - ref; + opt.bindcnt = p->bindcnt - bind; + if (p->R_tab) + opt.rate = p->R_tab->rate; + else + memset(&opt.rate, 0, sizeof(opt.rate)); + if (p->P_tab) + opt.peakrate = p->P_tab->rate; + else + memset(&opt.peakrate, 0, sizeof(opt.peakrate)); + RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); + if (p->result) + RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); +#ifdef CONFIG_NET_ESTIMATOR + if (p->ewma_rate) + RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); +#endif + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +MODULE_AUTHOR("Alexey Kuznetsov"); +MODULE_DESCRIPTION("Policing actions"); +MODULE_LICENSE("GPL"); + +static struct tc_action_ops act_police_ops = { + .kind = "police", + .type = TCA_ID_POLICE, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_act_police, + .dump = tcf_act_police_dump, + .cleanup = tcf_act_police_cleanup, + .lookup = tcf_hash_search, + .init = tcf_act_police_locate, + .walk = tcf_generic_walker +}; + +static int __init +police_init_module(void) +{ + return tcf_register_action(&act_police_ops); +} + +static void __exit +police_cleanup_module(void) +{ + tcf_unregister_action(&act_police_ops); +} + +module_init(police_init_module); +module_exit(police_cleanup_module); + +#endif + +struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) +{ + unsigned h; + struct tcf_police *p; + struct rtattr *tb[TCA_POLICE_MAX]; + struct tc_police *parm; + + if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) + return NULL; + + if (tb[TCA_POLICE_TBF-1] == NULL || + RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm)) + return NULL; + + parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); + + if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { + p->refcnt++; + return p; + } + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return NULL; + + memset(p, 0, sizeof(*p)); + p->refcnt = 1; + spin_lock_init(&p->lock); + p->stats_lock = &p->lock; + if (parm->rate.rate) { + p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); + if (p->R_tab == NULL) + goto failure; + if (parm->peakrate.rate) { + p->P_tab = qdisc_get_rtab(&parm->peakrate, + tb[TCA_POLICE_PEAKRATE-1]); + if (p->P_tab == NULL) + goto failure; + } + } + if (tb[TCA_POLICE_RESULT-1]) { + if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) + goto failure; + p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + } +#ifdef CONFIG_NET_ESTIMATOR + if (tb[TCA_POLICE_AVRATE-1]) { + if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) + goto failure; + p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); + } +#endif + p->toks = p->burst = parm->burst; + p->mtu = parm->mtu; + if (p->mtu == 0) { + p->mtu = ~0; + if (p->R_tab) + p->mtu = 255<<p->R_tab->rate.cell_log; + } + if (p->P_tab) + p->ptoks = L2T_P(p, p->mtu); + PSCHED_GET_TIME(p->t_c); + p->index = parm->index ? : tcf_police_new_index(); + p->action = parm->action; +#ifdef CONFIG_NET_ESTIMATOR + if (est) + gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); +#endif + h = tcf_police_hash(p->index); + write_lock_bh(&police_lock); + p->next = tcf_police_ht[h]; + tcf_police_ht[h] = p; + write_unlock_bh(&police_lock); + return p; + +failure: + if (p->R_tab) + qdisc_put_rtab(p->R_tab); + kfree(p); + return NULL; +} + +int tcf_police(struct sk_buff *skb, struct tcf_police *p) +{ + psched_time_t now; + long toks; + long ptoks = 0; + + spin_lock(&p->lock); + + p->bstats.bytes += skb->len; + p->bstats.packets++; + +#ifdef CONFIG_NET_ESTIMATOR + if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { + p->qstats.overlimits++; + spin_unlock(&p->lock); + return p->action; + } +#endif + + if (skb->len <= p->mtu) { + if (p->R_tab == NULL) { + spin_unlock(&p->lock); + return p->result; + } + + PSCHED_GET_TIME(now); + + toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); + + if (p->P_tab) { + ptoks = toks + p->ptoks; + if (ptoks > (long)L2T_P(p, p->mtu)) + ptoks = (long)L2T_P(p, p->mtu); + ptoks -= L2T_P(p, skb->len); + } + toks += p->toks; + if (toks > (long)p->burst) + toks = p->burst; + toks -= L2T(p, skb->len); + + if ((toks|ptoks) >= 0) { + p->t_c = now; + p->toks = toks; + p->ptoks = ptoks; + spin_unlock(&p->lock); + return p->result; + } + } + + p->qstats.overlimits++; + spin_unlock(&p->lock); + return p->action; +} + +int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) +{ + unsigned char *b = skb->tail; + struct tc_police opt; + + opt.index = p->index; + opt.action = p->action; + opt.mtu = p->mtu; + opt.burst = p->burst; + if (p->R_tab) + opt.rate = p->R_tab->rate; + else + memset(&opt.rate, 0, sizeof(opt.rate)); + if (p->P_tab) + opt.peakrate = p->P_tab->rate; + else + memset(&opt.peakrate, 0, sizeof(opt.peakrate)); + RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); + if (p->result) + RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); +#ifdef CONFIG_NET_ESTIMATOR + if (p->ewma_rate) + RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); +#endif + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p) +{ + struct gnet_dump d; + + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, + TCA_XSTATS, p->stats_lock, &d) < 0) + goto errout; + + if (gnet_stats_copy_basic(&d, &p->bstats) < 0 || +#ifdef CONFIG_NET_ESTIMATOR + gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 || +#endif + gnet_stats_copy_queue(&d, &p->qstats) < 0) + goto errout; + + if (gnet_stats_finish_copy(&d) < 0) + goto errout; + + return 0; + +errout: + return -1; +} + + +EXPORT_SYMBOL(tcf_police); +EXPORT_SYMBOL(tcf_police_destroy); +EXPORT_SYMBOL(tcf_police_dump); +EXPORT_SYMBOL(tcf_police_dump_stats); +EXPORT_SYMBOL(tcf_police_hash); +EXPORT_SYMBOL(tcf_police_ht); +EXPORT_SYMBOL(tcf_police_locate); +EXPORT_SYMBOL(tcf_police_lookup); +EXPORT_SYMBOL(tcf_police_new_index); |