diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/datagram.c | 54 | ||||
-rw-r--r-- | net/core/dev.c | 61 | ||||
-rw-r--r-- | net/core/dev_mcast.c | 28 | ||||
-rw-r--r-- | net/core/dst.c | 11 | ||||
-rw-r--r-- | net/core/fib_rules.c | 104 | ||||
-rw-r--r-- | net/core/flow.c | 3 | ||||
-rw-r--r-- | net/core/gen_estimator.c | 23 | ||||
-rw-r--r-- | net/core/gen_stats.c | 10 | ||||
-rw-r--r-- | net/core/neighbour.c | 262 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 20 | ||||
-rw-r--r-- | net/core/net_namespace.c | 9 | ||||
-rw-r--r-- | net/core/netpoll.c | 62 | ||||
-rw-r--r-- | net/core/pktgen.c | 104 | ||||
-rw-r--r-- | net/core/request_sock.c | 5 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 56 | ||||
-rw-r--r-- | net/core/skbuff.c | 246 | ||||
-rw-r--r-- | net/core/sock.c | 200 | ||||
-rw-r--r-- | net/core/stream.c | 85 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 70 | ||||
-rw-r--r-- | net/core/utils.c | 27 |
20 files changed, 839 insertions, 601 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c index 029b93e..8a28fc9 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -115,10 +115,10 @@ out_noerr: } /** - * skb_recv_datagram - Receive a datagram skbuff + * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags - * @noblock: blocking operation? + * @peeked: returns non-zero if this packet has been seen before * @err: error code returned * * Get a datagram skbuff, understands the peeking, nonblocking wakeups @@ -143,8 +143,8 @@ out_noerr: * quite explicitly by POSIX 1003.1g, don't change them without having * the standard around please. */ -struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, - int noblock, int *err) +struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, + int *peeked, int *err) { struct sk_buff *skb; long timeo; @@ -156,7 +156,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, if (error) goto no_packet; - timeo = sock_rcvtimeo(sk, noblock); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { /* Again only user level code calls this function, so nothing @@ -165,18 +165,19 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, * Look at current nfs client by the way... * However, this function was corrent in any case. 8) */ - if (flags & MSG_PEEK) { - unsigned long cpu_flags; - - spin_lock_irqsave(&sk->sk_receive_queue.lock, - cpu_flags); - skb = skb_peek(&sk->sk_receive_queue); - if (skb) + unsigned long cpu_flags; + + spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); + skb = skb_peek(&sk->sk_receive_queue); + if (skb) { + *peeked = skb->peeked; + if (flags & MSG_PEEK) { + skb->peeked = 1; atomic_inc(&skb->users); - spin_unlock_irqrestore(&sk->sk_receive_queue.lock, - cpu_flags); - } else - skb = skb_dequeue(&sk->sk_receive_queue); + } else + __skb_unlink(skb, &sk->sk_receive_queue); + } + spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); if (skb) return skb; @@ -194,10 +195,21 @@ no_packet: *err = error; return NULL; } +EXPORT_SYMBOL(__skb_recv_datagram); + +struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, + int noblock, int *err) +{ + int peeked; + + return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + &peeked, err); +} void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); + sk_mem_reclaim(sk); } /** @@ -217,20 +229,28 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb) * This function currently only disables BH when acquiring the * sk_receive_queue lock. Therefore it must not be used in a * context where that lock is acquired in an IRQ context. + * + * It returns 0 if the packet was removed by us. */ -void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) +int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags) { + int err = 0; + if (flags & MSG_PEEK) { + err = -ENOENT; spin_lock_bh(&sk->sk_receive_queue.lock); if (skb == skb_peek(&sk->sk_receive_queue)) { __skb_unlink(skb, &sk->sk_receive_queue); atomic_dec(&skb->users); + err = 0; } spin_unlock_bh(&sk->sk_receive_queue.lock); } kfree_skb(skb); + sk_mem_reclaim(sk); + return err; } EXPORT_SYMBOL(skb_kill_datagram); diff --git a/net/core/dev.c b/net/core/dev.c index 0879f52..c9c593e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -150,8 +150,11 @@ * 86DD IPv6 */ +#define PTYPE_HASH_SIZE (16) +#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) + static DEFINE_SPINLOCK(ptype_lock); -static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */ +static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; static struct list_head ptype_all __read_mostly; /* Taps */ #ifdef CONFIG_NET_DMA @@ -362,7 +365,7 @@ void dev_add_pack(struct packet_type *pt) if (pt->type == htons(ETH_P_ALL)) list_add_rcu(&pt->list, &ptype_all); else { - hash = ntohs(pt->type) & 15; + hash = ntohs(pt->type) & PTYPE_HASH_MASK; list_add_rcu(&pt->list, &ptype_base[hash]); } spin_unlock_bh(&ptype_lock); @@ -391,7 +394,7 @@ void __dev_remove_pack(struct packet_type *pt) if (pt->type == htons(ETH_P_ALL)) head = &ptype_all; else - head = &ptype_base[ntohs(pt->type) & 15]; + head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK]; list_for_each_entry(pt1, head, list) { if (pt == pt1) { @@ -672,7 +675,7 @@ struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *h ASSERT_RTNL(); - for_each_netdev(&init_net, dev) + for_each_netdev(net, dev) if (dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len)) return dev; @@ -1420,7 +1423,8 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) } rcu_read_lock(); - list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { + list_for_each_entry_rcu(ptype, + &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && !ptype->dev && ptype->gso_segment) { if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { err = ptype->gso_send_check(skb); @@ -2077,7 +2081,8 @@ ncls: goto out; type = skb->protocol; - list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { + list_for_each_entry_rcu(ptype, + &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { if (ptype->type == type && (!ptype->dev || ptype->dev == skb->dev)) { if (pt_prev) @@ -2363,8 +2368,9 @@ static int dev_ifconf(struct net *net, char __user *arg) * in detail. */ void *dev_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(dev_base_lock) { - struct net *net = seq->private; + struct net *net = seq_file_net(seq); loff_t off; struct net_device *dev; @@ -2382,13 +2388,14 @@ void *dev_seq_start(struct seq_file *seq, loff_t *pos) void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net *net = seq->private; + struct net *net = seq_file_net(seq); ++*pos; return v == SEQ_START_TOKEN ? first_net_device(net) : next_net_device((struct net_device *)v); } void dev_seq_stop(struct seq_file *seq, void *v) + __releases(dev_base_lock) { read_unlock(&dev_base_lock); } @@ -2481,26 +2488,8 @@ static const struct seq_operations dev_seq_ops = { static int dev_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int res; - res = seq_open(file, &dev_seq_ops); - if (!res) { - seq = file->private_data; - seq->private = get_proc_net(inode); - if (!seq->private) { - seq_release(inode, file); - res = -ENXIO; - } - } - return res; -} - -static int dev_seq_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct net *net = seq->private; - put_net(net); - return seq_release(inode, file); + return seq_open_net(inode, file, &dev_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations dev_seq_fops = { @@ -2508,7 +2497,7 @@ static const struct file_operations dev_seq_fops = { .open = dev_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = dev_seq_release, + .release = seq_release_net, }; static const struct seq_operations softnet_seq_ops = { @@ -2543,7 +2532,7 @@ static void *ptype_get_idx(loff_t pos) ++i; } - for (t = 0; t < 16; t++) { + for (t = 0; t < PTYPE_HASH_SIZE; t++) { list_for_each_entry_rcu(pt, &ptype_base[t], list) { if (i == pos) return pt; @@ -2554,6 +2543,7 @@ static void *ptype_get_idx(loff_t pos) } static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) { rcu_read_lock(); return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; @@ -2577,10 +2567,10 @@ static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) hash = 0; nxt = ptype_base[0].next; } else - hash = ntohs(pt->type) & 15; + hash = ntohs(pt->type) & PTYPE_HASH_MASK; while (nxt == &ptype_base[hash]) { - if (++hash >= 16) + if (++hash >= PTYPE_HASH_SIZE) return NULL; nxt = ptype_base[hash].next; } @@ -2589,6 +2579,7 @@ found: } static void ptype_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) { rcu_read_unlock(); } @@ -3505,7 +3496,7 @@ static int dev_new_index(struct net *net) /* Delayed registration/unregisteration */ static DEFINE_SPINLOCK(net_todo_list_lock); -static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); +static LIST_HEAD(net_todo_list); static void net_set_todo(struct net_device *dev) { @@ -3984,6 +3975,8 @@ void synchronize_net(void) void unregister_netdevice(struct net_device *dev) { + ASSERT_RTNL(); + rollback_registered(dev); /* Finish processing unregister after unlock */ net_set_todo(dev); @@ -4416,7 +4409,7 @@ static int __init net_dev_init(void) goto out; INIT_LIST_HEAD(&ptype_all); - for (i = 0; i < 16; i++) + for (i = 0; i < PTYPE_HASH_SIZE; i++) INIT_LIST_HEAD(&ptype_base[i]); if (register_pernet_subsys(&netdev_net_ops)) diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 69fff16..cadbfbf 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -186,8 +186,9 @@ EXPORT_SYMBOL(dev_mc_unsync); #ifdef CONFIG_PROC_FS static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(dev_base_lock) { - struct net *net = seq->private; + struct net *net = seq_file_net(seq); struct net_device *dev; loff_t off = 0; @@ -206,6 +207,7 @@ static void *dev_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void dev_mc_seq_stop(struct seq_file *seq, void *v) + __releases(dev_base_lock) { read_unlock(&dev_base_lock); } @@ -241,26 +243,8 @@ static const struct seq_operations dev_mc_seq_ops = { static int dev_mc_seq_open(struct inode *inode, struct file *file) { - struct seq_file *seq; - int res; - res = seq_open(file, &dev_mc_seq_ops); - if (!res) { - seq = file->private_data; - seq->private = get_proc_net(inode); - if (!seq->private) { - seq_release(inode, file); - res = -ENXIO; - } - } - return res; -} - -static int dev_mc_seq_release(struct inode *inode, struct file *file) -{ - struct seq_file *seq = file->private_data; - struct net *net = seq->private; - put_net(net); - return seq_release(inode, file); + return seq_open_net(inode, file, &dev_mc_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations dev_mc_seq_fops = { @@ -268,7 +252,7 @@ static const struct file_operations dev_mc_seq_fops = { .open = dev_mc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = dev_mc_seq_release, + .release = seq_release_net, }; #endif diff --git a/net/core/dst.c b/net/core/dst.c index 03daead..7deef48 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -153,18 +153,19 @@ loop: #endif } -static int dst_discard(struct sk_buff *skb) +int dst_discard(struct sk_buff *skb) { kfree_skb(skb); return 0; } +EXPORT_SYMBOL(dst_discard); void * dst_alloc(struct dst_ops * ops) { struct dst_entry * dst; if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { - if (ops->gc()) + if (ops->gc(ops)) return NULL; } dst = kmem_cache_zalloc(ops->kmem_cachep, GFP_ATOMIC); @@ -278,13 +279,13 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { - dst->dev = init_net.loopback_dev; + dst->dev = dst->dev->nd_net->loopback_dev; dev_hold(dst->dev); dev_put(dev); if (dst->neighbour && dst->neighbour->dev == dev) { - dst->neighbour->dev = init_net.loopback_dev; + dst->neighbour->dev = dst->dev; + dev_hold(dst->dev); dev_put(dev); - dev_hold(dst->neighbour->dev); } } } diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 848132b..42ccaf5 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -15,9 +15,6 @@ #include <net/sock.h> #include <net/fib_rules.h> -static LIST_HEAD(rules_ops); -static DEFINE_SPINLOCK(rules_mod_lock); - int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -32,6 +29,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; + r->fr_net = ops->fro_net; /* The lock is not required here, the list in unreacheable * at the moment this function is called */ @@ -44,12 +42,12 @@ static void notify_rule_change(int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid); -static struct fib_rules_ops *lookup_rules_ops(int family) +static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family) { struct fib_rules_ops *ops; rcu_read_lock(); - list_for_each_entry_rcu(ops, &rules_ops, list) { + list_for_each_entry_rcu(ops, &net->rules_ops, list) { if (ops->family == family) { if (!try_module_get(ops->owner)) ops = NULL; @@ -78,6 +76,9 @@ int fib_rules_register(struct fib_rules_ops *ops) { int err = -EEXIST; struct fib_rules_ops *o; + struct net *net; + + net = ops->fro_net; if (ops->rule_size < sizeof(struct fib_rule)) return -EINVAL; @@ -87,22 +88,23 @@ int fib_rules_register(struct fib_rules_ops *ops) ops->action == NULL) return -EINVAL; - spin_lock(&rules_mod_lock); - list_for_each_entry(o, &rules_ops, list) + spin_lock(&net->rules_mod_lock); + list_for_each_entry(o, &net->rules_ops, list) if (ops->family == o->family) goto errout; - list_add_tail_rcu(&ops->list, &rules_ops); + hold_net(net); + list_add_tail_rcu(&ops->list, &net->rules_ops); err = 0; errout: - spin_unlock(&rules_mod_lock); + spin_unlock(&net->rules_mod_lock); return err; } EXPORT_SYMBOL_GPL(fib_rules_register); -static void cleanup_ops(struct fib_rules_ops *ops) +void fib_rules_cleanup_ops(struct fib_rules_ops *ops) { struct fib_rule *rule, *tmp; @@ -111,28 +113,19 @@ static void cleanup_ops(struct fib_rules_ops *ops) fib_rule_put(rule); } } +EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops); -int fib_rules_unregister(struct fib_rules_ops *ops) +void fib_rules_unregister(struct fib_rules_ops *ops) { - int err = 0; - struct fib_rules_ops *o; - - spin_lock(&rules_mod_lock); - list_for_each_entry(o, &rules_ops, list) { - if (o == ops) { - list_del_rcu(&o->list); - cleanup_ops(ops); - goto out; - } - } + struct net *net = ops->fro_net; - err = -ENOENT; -out: - spin_unlock(&rules_mod_lock); + spin_lock(&net->rules_mod_lock); + list_del_rcu(&ops->list); + fib_rules_cleanup_ops(ops); + spin_unlock(&net->rules_mod_lock); synchronize_rcu(); - - return err; + release_net(net); } EXPORT_SYMBOL_GPL(fib_rules_unregister); @@ -231,7 +224,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) goto errout; - ops = lookup_rules_ops(frh->family); + ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { err = EAFNOSUPPORT; goto errout; @@ -250,6 +243,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) err = -ENOMEM; goto errout; } + rule->fr_net = net; if (tb[FRA_PRIORITY]) rule->pref = nla_get_u32(tb[FRA_PRIORITY]); @@ -281,7 +275,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->table = frh_get_table(frh, tb); if (!rule->pref && ops->default_pref) - rule->pref = ops->default_pref(); + rule->pref = ops->default_pref(ops); err = -EINVAL; if (tb[FRA_GOTO]) { @@ -358,6 +352,7 @@ errout: static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct fib_rule_hdr *frh = nlmsg_data(nlh); struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *tmp; @@ -367,7 +362,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) goto errout; - ops = lookup_rules_ops(frh->family); + ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { err = EAFNOSUPPORT; goto errout; @@ -539,13 +534,14 @@ skip: static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; struct fib_rules_ops *ops; int idx = 0, family; family = rtnl_msg_family(cb->nlh); if (family != AF_UNSPEC) { /* Protocol specific dump request */ - ops = lookup_rules_ops(family); + ops = lookup_rules_ops(net, family); if (ops == NULL) return -EAFNOSUPPORT; @@ -553,7 +549,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) } rcu_read_lock(); - list_for_each_entry_rcu(ops, &rules_ops, list) { + list_for_each_entry_rcu(ops, &net->rules_ops, list) { if (idx < cb->args[0] || !try_module_get(ops->owner)) goto skip; @@ -574,9 +570,11 @@ static void notify_rule_change(int event, struct fib_rule *rule, struct fib_rules_ops *ops, struct nlmsghdr *nlh, u32 pid) { + struct net *net; struct sk_buff *skb; int err = -ENOBUFS; + net = ops->fro_net; skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL); if (skb == NULL) goto errout; @@ -588,10 +586,11 @@ static void notify_rule_change(int event, struct fib_rule *rule, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); + + err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(ops->nlgroup, err); + rtnl_set_sk_err(net, ops->nlgroup, err); } static void attach_rules(struct list_head *rules, struct net_device *dev) @@ -619,22 +618,20 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; + struct net *net = dev->nd_net; struct fib_rules_ops *ops; - if (dev->nd_net != &init_net) - return NOTIFY_DONE; - ASSERT_RTNL(); rcu_read_lock(); switch (event) { case NETDEV_REGISTER: - list_for_each_entry(ops, &rules_ops, list) + list_for_each_entry(ops, &net->rules_ops, list) attach_rules(&ops->rules_list, dev); break; case NETDEV_UNREGISTER: - list_for_each_entry(ops, &rules_ops, list) + list_for_each_entry(ops, &net->rules_ops, list) detach_rules(&ops->rules_list, dev); break; } @@ -648,13 +645,40 @@ static struct notifier_block fib_rules_notifier = { .notifier_call = fib_rules_event, }; +static int fib_rules_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->rules_ops); + spin_lock_init(&net->rules_mod_lock); + return 0; +} + +static struct pernet_operations fib_rules_net_ops = { + .init = fib_rules_net_init, +}; + static int __init fib_rules_init(void) { + int err; rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL); rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); - return register_netdevice_notifier(&fib_rules_notifier); + err = register_netdevice_notifier(&fib_rules_notifier); + if (err < 0) + goto fail; + + err = register_pernet_subsys(&fib_rules_net_ops); + if (err < 0) + goto fail_unregister; + return 0; + +fail_unregister: + unregister_netdevice_notifier(&fib_rules_notifier); +fail: + rtnl_unregister(PF_UNSPEC, RTM_NEWRULE); + rtnl_unregister(PF_UNSPEC, RTM_DELRULE); + rtnl_unregister(PF_UNSPEC, RTM_GETRULE); + return err; } subsys_initcall(fib_rules_init); diff --git a/net/core/flow.c b/net/core/flow.c index 6489f4e..46b38e0 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -352,8 +352,7 @@ static int __init flow_cache_init(void) flow_lwm = 2 * flow_hash_size; flow_hwm = 4 * flow_hash_size; - init_timer(&flow_hash_rnd_timer); - flow_hash_rnd_timer.function = flow_cache_new_hashrnd; + setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0); flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; add_timer(&flow_hash_rnd_timer); diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index daadbcc..57abe82 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -135,7 +135,7 @@ skip: } if (!list_empty(&elist[idx].list)) - mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); + mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx)); rcu_read_unlock(); } @@ -159,13 +159,13 @@ skip: int gen_new_estimator(struct gnet_stats_basic *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, - struct rtattr *opt) + struct nlattr *opt) { struct gen_estimator *est; - struct gnet_estimator *parm = RTA_DATA(opt); + struct gnet_estimator *parm = nla_data(opt); int idx; - if (RTA_PAYLOAD(opt) < sizeof(*parm)) + if (nla_len(opt) < sizeof(*parm)) return -EINVAL; if (parm->interval < -2 || parm->interval > 3) @@ -191,7 +191,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, } if (list_empty(&elist[idx].list)) - mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4)); + mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx)); list_add_rcu(&est->list, &elist[idx].list); return 0; @@ -241,7 +241,7 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats, } /** - * gen_replace_estimator - replace rate estimator configruation + * gen_replace_estimator - replace rate estimator configuration * @bstats: basic statistics * @rate_est: rate estimator statistics * @stats_lock: statistics lock @@ -252,13 +252,12 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats, * * Returns 0 on success or a negative error code. */ -int -gen_replace_estimator(struct gnet_stats_basic *bstats, - struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, - struct rtattr *opt) +int gen_replace_estimator(struct gnet_stats_basic *bstats, + struct gnet_stats_rate_est *rate_est, + spinlock_t *stats_lock, struct nlattr *opt) { - gen_kill_estimator(bstats, rate_est); - return gen_new_estimator(bstats, rate_est, stats_lock, opt); + gen_kill_estimator(bstats, rate_est); + return gen_new_estimator(bstats, rate_est, stats_lock, opt); } diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index bcc2559..c3d0ffe 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -20,16 +20,17 @@ #include <linux/socket.h> #include <linux/rtnetlink.h> #include <linux/gen_stats.h> +#include <net/netlink.h> #include <net/gen_stats.h> static inline int gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) { - RTA_PUT(d->skb, type, size, buf); + NLA_PUT(d->skb, type, size, buf); return 0; -rtattr_failure: +nla_put_failure: spin_unlock_bh(d->lock); return -1; } @@ -55,13 +56,14 @@ rtattr_failure: int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type, int xstats_type, spinlock_t *lock, struct gnet_dump *d) + __acquires(lock) { memset(d, 0, sizeof(*d)); spin_lock_bh(lock); d->lock = lock; if (type) - d->tail = (struct rtattr *)skb_tail_pointer(skb); + d->tail = (struct nlattr *)skb_tail_pointer(skb); d->skb = skb; d->compat_tc_stats = tc_stats_type; d->compat_xstats = xstats_type; @@ -212,7 +214,7 @@ int gnet_stats_finish_copy(struct gnet_dump *d) { if (d->tail) - d->tail->rta_len = skb_tail_pointer(d->skb) - (u8 *)d->tail; + d->tail->nla_len = skb_tail_pointer(d->skb) - (u8 *)d->tail; if (d->compat_tc_stats) if (gnet_stats_copy(d, d->compat_tc_stats, &d->tc_stats, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 29b8ee4..a16cf1e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -59,7 +59,6 @@ static void neigh_timer_handler(unsigned long arg); static void __neigh_notify(struct neighbour *n, int type, int flags); static void neigh_update_notify(struct neighbour *neigh); static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev); -void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); static struct neigh_table *neigh_tables; #ifdef CONFIG_PROC_FS @@ -165,6 +164,16 @@ static int neigh_forced_gc(struct neigh_table *tbl) return shrunk; } +static void neigh_add_timer(struct neighbour *n, unsigned long when) +{ + neigh_hold(n); + if (unlikely(mod_timer(&n->timer, when))) { + printk("NEIGH: BUG, double timer add, state is %x\n", + n->nud_state); + dump_stack(); + } +} + static int neigh_del_timer(struct neighbour *n) { if ((n->nud_state & NUD_IN_TIMER) && @@ -270,9 +279,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) n->nud_state = NUD_NONE; n->output = neigh_blackhole; n->parms = neigh_parms_clone(&tbl->parms); - init_timer(&n->timer); - n->timer.function = neigh_timer_handler; - n->timer.data = (unsigned long)n; + setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n); NEIGH_CACHE_STAT_INC(tbl, allocs); n->tbl = tbl; @@ -367,7 +374,8 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, return n; } -struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) +struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, + const void *pkey) { struct neighbour *n; int key_len = tbl->key_len; @@ -377,7 +385,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) read_lock_bh(&tbl->lock); for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { - if (!memcmp(n->primary_key, pkey, key_len)) { + if (!memcmp(n->primary_key, pkey, key_len) && + (net == n->dev->nd_net)) { neigh_hold(n); NEIGH_CACHE_STAT_INC(tbl, hits); break; @@ -455,7 +464,8 @@ out_neigh_release: goto out; } -struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, +struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, + struct net *net, const void *pkey, struct net_device *dev, int creat) { struct pneigh_entry *n; @@ -471,6 +481,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { if (!memcmp(n->key, pkey, key_len) && + (n->net == net) && (n->dev == dev || !n->dev)) { read_unlock_bh(&tbl->lock); goto out; @@ -487,6 +498,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, if (!n) goto out; + n->net = hold_net(net); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -509,7 +521,7 @@ out: } -int pneigh_delete(struct neigh_table *tbl, const void *pkey, +int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev) { struct pneigh_entry *n, **np; @@ -524,13 +536,15 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey, write_lock_bh(&tbl->lock); for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; np = &n->next) { - if (!memcmp(n->key, pkey, key_len) && n->dev == dev) { + if (!memcmp(n->key, pkey, key_len) && n->dev == dev && + (n->net == net)) { *np = n->next; write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); + release_net(n->net); kfree(n); return 0; } @@ -553,6 +567,7 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) tbl->pdestructor(n); if (n->dev) dev_put(n->dev); + release_net(n->net); kfree(n); continue; } @@ -562,6 +577,13 @@ static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev) return -ENOENT; } +static void neigh_parms_destroy(struct neigh_parms *parms); + +static inline void neigh_parms_put(struct neigh_parms *parms) +{ + if (atomic_dec_and_test(&parms->refcnt)) + neigh_parms_destroy(parms); +} /* * neighbour must already be out of the table; @@ -718,15 +740,6 @@ static __inline__ int neigh_max_probes(struct neighbour *n) p->ucast_probes + p->app_probes + p->mcast_probes); } -static inline void neigh_add_timer(struct neighbour *n, unsigned long when) -{ - if (unlikely(mod_timer(&n->timer, when))) { - printk("NEIGH: BUG, double timer add, state is %x\n", - n->nud_state); - dump_stack(); - } -} - /* Called when a timer expires for a neighbour entry. */ static void neigh_timer_handler(unsigned long arg) @@ -858,7 +871,6 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) atomic_set(&neigh->probes, neigh->parms->ucast_probes); neigh->nud_state = NUD_INCOMPLETE; neigh->updated = jiffies; - neigh_hold(neigh); neigh_add_timer(neigh, now + 1); } else { neigh->nud_state = NUD_FAILED; @@ -871,7 +883,6 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) } } else if (neigh->nud_state & NUD_STALE) { NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); - neigh_hold(neigh); neigh->nud_state = NUD_DELAY; neigh->updated = jiffies; neigh_add_timer(neigh, @@ -1015,13 +1026,11 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (new != old) { neigh_del_timer(neigh); - if (new & NUD_IN_TIMER) { - neigh_hold(neigh); + if (new & NUD_IN_TIMER) neigh_add_timer(neigh, (jiffies + ((new & NUD_REACHABLE) ? neigh->parms->reachable_time : 0))); - } neigh->nud_state = new; } @@ -1266,27 +1275,49 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, spin_unlock(&tbl->proxy_queue.lock); } +static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, + struct net *net, int ifindex) +{ + struct neigh_parms *p; + + for (p = &tbl->parms; p; p = p->next) { + if (p->net != net) + continue; + if ((p->dev && p->dev->ifindex == ifindex) || + (!p->dev && !ifindex)) + return p; + } + + return NULL; +} struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { - struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); + struct neigh_parms *p, *ref; + struct net *net; + + net = dev->nd_net; + ref = lookup_neigh_params(tbl, net, 0); + if (!ref) + return NULL; + p = kmemdup(ref, sizeof(*p), GFP_KERNEL); if (p) { p->tbl = tbl; atomic_set(&p->refcnt, 1); INIT_RCU_HEAD(&p->rcu_head); p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); - if (dev) { - if (dev->neigh_setup && dev->neigh_setup(dev, p)) { - kfree(p); - return NULL; - } - dev_hold(dev); - p->dev = dev; + if (dev->neigh_setup && dev->neigh_setup(dev, p)) { + kfree(p); + return NULL; } + + dev_hold(dev); + p->dev = dev; + p->net = hold_net(net); p->sysctl_table = NULL; write_lock_bh(&tbl->lock); p->next = tbl->parms.next; @@ -1326,8 +1357,9 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) NEIGH_PRINTK1("neigh_parms_release: not found\n"); } -void neigh_parms_destroy(struct neigh_parms *parms) +static void neigh_parms_destroy(struct neigh_parms *parms) { + release_net(parms->net); kfree(parms); } @@ -1338,6 +1370,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) unsigned long now = jiffies; unsigned long phsize; + tbl->parms.net = &init_net; atomic_set(&tbl->parms.refcnt, 1); INIT_RCU_HEAD(&tbl->parms.rcu_head); tbl->parms.reachable_time = @@ -1372,15 +1405,11 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); rwlock_init(&tbl->lock); - init_timer(&tbl->gc_timer); - tbl->gc_timer.data = (unsigned long)tbl; - tbl->gc_timer.function = neigh_periodic_timer; + setup_timer(&tbl->gc_timer, neigh_periodic_timer, (unsigned long)tbl); tbl->gc_timer.expires = now + 1; add_timer(&tbl->gc_timer); - init_timer(&tbl->proxy_timer); - tbl->proxy_timer.data = (unsigned long)tbl; - tbl->proxy_timer.function = neigh_proxy_process; + setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl); skb_queue_head_init_class(&tbl->proxy_queue, &neigh_table_proxy_queue_class); @@ -1483,7 +1512,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto out_dev_put; if (ndm->ndm_flags & NTF_PROXY) { - err = pneigh_delete(tbl, nla_data(dst_attr), dev); + err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); goto out_dev_put; } @@ -1560,7 +1589,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct pneigh_entry *pn; err = -ENOBUFS; - pn = pneigh_lookup(tbl, dst, dev, 1); + pn = pneigh_lookup(tbl, net, dst, dev, 1); if (pn) { pn->flags = ndm->ndm_flags; err = 0; @@ -1755,19 +1784,6 @@ errout: return -EMSGSIZE; } -static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, - int ifindex) -{ - struct neigh_parms *p; - - for (p = &tbl->parms; p; p = p->next) - if ((p->dev && p->dev->ifindex == ifindex) || - (!p->dev && !ifindex)) - return p; - - return NULL; -} - static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { [NDTA_NAME] = { .type = NLA_STRING }, [NDTA_THRESH1] = { .type = NLA_U32 }, @@ -1795,6 +1811,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { + struct net *net = skb->sk->sk_net; struct neigh_table *tbl; struct ndtmsg *ndtmsg; struct nlattr *tb[NDTA_MAX+1]; @@ -1844,7 +1861,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (tbp[NDTPA_IFINDEX]) ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); - p = lookup_neigh_params(tbl, ifindex); + p = lookup_neigh_params(tbl, net, ifindex); if (p == NULL) { err = -ENOENT; goto errout_tbl_lock; @@ -1919,6 +1936,7 @@ errout: static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb->sk->sk_net; int family, tidx, nidx = 0; int tbl_skip = cb->args[0]; int neigh_skip = cb->args[1]; @@ -1938,8 +1956,11 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) NLM_F_MULTI) <= 0) break; - for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) { - if (nidx < neigh_skip) + for (nidx = 0, p = tbl->parms.next; p; p = p->next) { + if (net != p->net) + continue; + + if (nidx++ < neigh_skip) continue; if (neightbl_fill_param_info(skb, tbl, p, @@ -2015,6 +2036,7 @@ static void neigh_update_notify(struct neighbour *neigh) static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) { + struct net * net = skb->sk->sk_net; struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; @@ -2025,8 +2047,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, continue; if (h > s_h) s_idx = 0; - for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) { - if (idx < s_idx) + for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { + int lidx; + if (n->dev->nd_net != net) + continue; + lidx = idx++; + if (lidx < s_idx) continue; if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -2118,6 +2144,7 @@ EXPORT_SYMBOL(__neigh_for_each_release); static struct neighbour *neigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; + struct net *net = state->p.net; struct neigh_table *tbl = state->tbl; struct neighbour *n = NULL; int bucket = state->bucket; @@ -2127,6 +2154,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) n = tbl->hash_buckets[bucket]; while (n) { + if (n->dev->nd_net != net) + goto next; if (state->neigh_sub_iter) { loff_t fakep = 0; void *v; @@ -2156,6 +2185,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; + struct net *net = state->p.net; struct neigh_table *tbl = state->tbl; if (state->neigh_sub_iter) { @@ -2167,6 +2197,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, while (1) { while (n) { + if (n->dev->nd_net != net) + goto next; if (state->neigh_sub_iter) { void *v = state->neigh_sub_iter(state, n, pos); if (v) @@ -2213,6 +2245,7 @@ static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; + struct net * net = state->p.net; struct neigh_table *tbl = state->tbl; struct pneigh_entry *pn = NULL; int bucket = state->bucket; @@ -2220,6 +2253,8 @@ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) state->flags |= NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { pn = tbl->phash_buckets[bucket]; + while (pn && (pn->net != net)) + pn = pn->next; if (pn) break; } @@ -2233,6 +2268,7 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, loff_t *pos) { struct neigh_seq_state *state = seq->private; + struct net * net = state->p.net; struct neigh_table *tbl = state->tbl; pn = pn->next; @@ -2240,6 +2276,8 @@ static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, if (++state->bucket > PNEIGH_HASHMASK) break; pn = tbl->phash_buckets[state->bucket]; + while (pn && (pn->net != net)) + pn = pn->next; if (pn) break; } @@ -2277,6 +2315,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) } void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) + __acquires(tbl->lock) { struct neigh_seq_state *state = seq->private; loff_t pos_minus_one; @@ -2320,6 +2359,7 @@ out: EXPORT_SYMBOL(neigh_seq_next); void neigh_seq_stop(struct seq_file *seq, void *v) + __releases(tbl->lock) { struct neigh_seq_state *state = seq->private; struct neigh_table *tbl = state->tbl; @@ -2441,6 +2481,7 @@ static inline size_t neigh_nlmsg_size(void) static void __neigh_notify(struct neighbour *n, int type, int flags) { + struct net *net = n->dev->nd_net; struct sk_buff *skb; int err = -ENOBUFS; @@ -2455,10 +2496,10 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_NEIGH, err); + rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } #ifdef CONFIG_ARPD @@ -2472,11 +2513,8 @@ void neigh_app_ns(struct neighbour *n) static struct neigh_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table neigh_vars[__NET_NEIGH_MAX]; - ctl_table neigh_dev[2]; - ctl_table neigh_neigh_dir[2]; - ctl_table neigh_proto_dir[2]; - ctl_table neigh_root_dir[2]; + struct ctl_table neigh_vars[__NET_NEIGH_MAX]; + char *dev_name; } neigh_sysctl_template __read_mostly = { .neigh_vars = { { @@ -2607,32 +2645,7 @@ static struct neigh_sysctl_table { .mode = 0644, .proc_handler = &proc_dointvec, }, - {} - }, - .neigh_dev = { - { - .ctl_name = NET_PROTO_CONF_DEFAULT, - .procname = "default", - .mode = 0555, - }, - }, - .neigh_neigh_dir = { - { - .procname = "neigh", - .mode = 0555, - }, - }, - .neigh_proto_dir = { - { - .mode = 0555, - }, - }, - .neigh_root_dir = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - }, + {}, }, }; @@ -2640,14 +2653,26 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, int p_id, int pdev_id, char *p_name, proc_handler *handler, ctl_handler *strategy) { - struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template, - sizeof(*t), GFP_KERNEL); + struct neigh_sysctl_table *t; const char *dev_name_source = NULL; - char *dev_name = NULL; - int err = 0; +#define NEIGH_CTL_PATH_ROOT 0 +#define NEIGH_CTL_PATH_PROTO 1 +#define NEIGH_CTL_PATH_NEIGH 2 +#define NEIGH_CTL_PATH_DEV 3 + + struct ctl_path neigh_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "proto", .ctl_name = 0, }, + { .procname = "neigh", .ctl_name = 0, }, + { .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, }, + { }, + }; + + t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); if (!t) - return -ENOBUFS; + goto err; + t->neigh_vars[0].data = &p->mcast_probes; t->neigh_vars[1].data = &p->ucast_probes; t->neigh_vars[2].data = &p->app_probes; @@ -2665,11 +2690,11 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, if (dev) { dev_name_source = dev->name; - t->neigh_dev[0].ctl_name = dev->ifindex; + neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex; /* Terminate the table early */ memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14])); } else { - dev_name_source = t->neigh_dev[0].procname; + dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname; t->neigh_vars[14].data = (int *)(p + 1); t->neigh_vars[15].data = (int *)(p + 1) + 1; t->neigh_vars[16].data = (int *)(p + 1) + 2; @@ -2704,39 +2729,28 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[13].ctl_name = CTL_UNNUMBERED; } - dev_name = kstrdup(dev_name_source, GFP_KERNEL); - if (!dev_name) { - err = -ENOBUFS; + t->dev_name = kstrdup(dev_name_source, GFP_KERNEL); + if (!t->dev_name) goto free; - } - - t->neigh_dev[0].procname = dev_name; - - t->neigh_neigh_dir[0].ctl_name = pdev_id; - t->neigh_proto_dir[0].procname = p_name; - t->neigh_proto_dir[0].ctl_name = p_id; + neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name; + neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id; + neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name; + neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id; - t->neigh_dev[0].child = t->neigh_vars; - t->neigh_neigh_dir[0].child = t->neigh_dev; - t->neigh_proto_dir[0].child = t->neigh_neigh_dir; - t->neigh_root_dir[0].child = t->neigh_proto_dir; - - t->sysctl_header = register_sysctl_table(t->neigh_root_dir); - if (!t->sysctl_header) { - err = -ENOBUFS; + t->sysctl_header = register_sysctl_paths(neigh_path, t->neigh_vars); + if (!t->sysctl_header) goto free_procname; - } + p->sysctl_table = t; return 0; - /* error path */ - free_procname: - kfree(dev_name); - free: +free_procname: + kfree(t->dev_name); +free: kfree(t); - - return err; +err: + return -ENOBUFS; } void neigh_sysctl_unregister(struct neigh_parms *p) @@ -2745,7 +2759,7 @@ void neigh_sysctl_unregister(struct neigh_parms *p) struct neigh_sysctl_table *t = p->sysctl_table; p->sysctl_table = NULL; unregister_sysctl_table(t->sysctl_header); - kfree(t->neigh_dev[0].procname); + kfree(t->dev_name); kfree(t); } } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 61ead1d..7635d3f 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -95,17 +95,6 @@ NETDEVICE_SHOW(type, fmt_dec); NETDEVICE_SHOW(link_mode, fmt_dec); /* use same locking rules as GIFHWADDR ioctl's */ -static ssize_t format_addr(char *buf, const unsigned char *addr, int len) -{ - int i; - char *cp = buf; - - for (i = 0; i < len; i++) - cp += sprintf(cp, "%02x%c", addr[i], - i == (len - 1) ? '\n' : ':'); - return cp - buf; -} - static ssize_t show_address(struct device *dev, struct device_attribute *attr, char *buf) { @@ -114,7 +103,7 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr, read_lock(&dev_base_lock); if (dev_isalive(net)) - ret = format_addr(buf, net->dev_addr, net->addr_len); + ret = sysfs_format_mac(buf, net->dev_addr, net->addr_len); read_unlock(&dev_base_lock); return ret; } @@ -124,7 +113,7 @@ static ssize_t show_broadcast(struct device *dev, { struct net_device *net = to_net_dev(dev); if (dev_isalive(net)) - return format_addr(buf, net->broadcast, net->addr_len); + return sysfs_format_mac(buf, net->broadcast, net->addr_len); return -EINVAL; } @@ -247,9 +236,8 @@ static ssize_t netstat_show(const struct device *d, struct net_device_stats *stats; ssize_t ret = -EINVAL; - if (offset > sizeof(struct net_device_stats) || - offset % sizeof(unsigned long) != 0) - WARN_ON(1); + WARN_ON(offset > sizeof(struct net_device_stats) || + offset % sizeof(unsigned long) != 0); read_lock(&dev_base_lock); if (dev_isalive(dev) && dev->get_stats && diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index ec936ae..26e941d 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -58,6 +58,7 @@ out_undo: #ifdef CONFIG_NET_NS static struct kmem_cache *net_cachep; +static struct workqueue_struct *netns_wq; static struct net *net_alloc(void) { @@ -149,7 +150,7 @@ void __put_net(struct net *net) { /* Cleanup the network namespace in process context */ INIT_WORK(&net->work, cleanup_net); - schedule_work(&net->work); + queue_work(netns_wq, &net->work); } EXPORT_SYMBOL_GPL(__put_net); @@ -171,7 +172,13 @@ static int __init net_ns_init(void) net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, SLAB_PANIC, NULL); + + /* Create workqueue for cleanup */ + netns_wq = create_singlethread_workqueue("netns"); + if (!netns_wq) + panic("Could not create netns workq"); #endif + mutex_lock(&net_mutex); err = setup_net(&init_net); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c499b5c..6faa128 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -39,8 +39,6 @@ static struct sk_buff_head skb_pool; static atomic_t trapped; #define USEC_PER_POLL 50 -#define NETPOLL_RX_ENABLED 1 -#define NETPOLL_RX_DROP 2 #define MAX_SKB_SIZE \ (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ @@ -128,27 +126,24 @@ static int poll_one_napi(struct netpoll_info *npinfo, if (!test_bit(NAPI_STATE_SCHED, &napi->state)) return budget; - npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); work = napi->poll(napi, budget); atomic_dec(&trapped); - npinfo->rx_flags &= ~NETPOLL_RX_DROP; return budget - work; } -static void poll_napi(struct netpoll *np) +static void poll_napi(struct net_device *dev) { - struct netpoll_info *npinfo = np->dev->npinfo; struct napi_struct *napi; int budget = 16; - list_for_each_entry(napi, &np->dev->napi_list, dev_list) { + list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - budget = poll_one_napi(npinfo, napi, budget); + budget = poll_one_napi(dev->npinfo, napi, budget); spin_unlock(&napi->poll_lock); if (!budget) @@ -159,30 +154,27 @@ static void poll_napi(struct netpoll *np) static void service_arp_queue(struct netpoll_info *npi) { - struct sk_buff *skb; - - if (unlikely(!npi)) - return; - - skb = skb_dequeue(&npi->arp_tx); + if (npi) { + struct sk_buff *skb; - while (skb != NULL) { - arp_reply(skb); - skb = skb_dequeue(&npi->arp_tx); + while ((skb = skb_dequeue(&npi->arp_tx))) + arp_reply(skb); } } void netpoll_poll(struct netpoll *np) { - if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) + struct net_device *dev = np->dev; + + if (!dev || !netif_running(dev) || !dev->poll_controller) return; /* Process pending work on NIC */ - np->dev->poll_controller(np->dev); - if (!list_empty(&np->dev->napi_list)) - poll_napi(np); + dev->poll_controller(dev); + + poll_napi(dev); - service_arp_queue(np->dev->npinfo); + service_arp_queue(dev->npinfo); zap_completion_queue(); } @@ -364,8 +356,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth->h_proto = htons(ETH_P_IP); - memcpy(eth->h_source, np->local_mac, 6); - memcpy(eth->h_dest, np->remote_mac, 6); + memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN); + memcpy(eth->h_dest, np->remote_mac, ETH_ALEN); skb->dev = np->dev; @@ -418,7 +410,8 @@ static void arp_reply(struct sk_buff *skb) memcpy(&tip, arp_ptr, 4); /* Should we ignore arp? */ - if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip)) + if (tip != htonl(np->local_ip) || + ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) return; size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4); @@ -435,7 +428,7 @@ static void arp_reply(struct sk_buff *skb) /* Fill the device header for the ARP frame */ if (dev_hard_header(send_skb, skb->dev, ptype, - sha, np->local_mac, + sha, np->dev->dev_addr, send_skb->len) < 0) { kfree_skb(send_skb); return; @@ -479,7 +472,7 @@ int __netpoll_rx(struct sk_buff *skb) if (skb->dev->type != ARPHRD_ETHER) goto out; - /* check if netpoll clients need ARP */ + /* if receive ARP during middle of NAPI poll, then queue */ if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { skb_queue_tail(&npi->arp_tx, skb); @@ -541,6 +534,9 @@ int __netpoll_rx(struct sk_buff *skb) return 1; out: + /* If packet received while already in poll then just + * silently drop. + */ if (atomic_read(&trapped)) { kfree_skb(skb); return 1; @@ -679,7 +675,6 @@ int netpoll_setup(struct netpoll *np) goto release; } - npinfo->rx_flags = 0; npinfo->rx_np = NULL; spin_lock_init(&npinfo->rx_lock); @@ -741,9 +736,6 @@ int netpoll_setup(struct netpoll *np) } } - if (is_zero_ether_addr(np->local_mac) && ndev->dev_addr) - memcpy(np->local_mac, ndev->dev_addr, 6); - if (!np->local_ip) { rcu_read_lock(); in_dev = __in_dev_get_rcu(ndev); @@ -764,7 +756,6 @@ int netpoll_setup(struct netpoll *np) if (np->rx_hook) { spin_lock_irqsave(&npinfo->rx_lock, flags); - npinfo->rx_flags |= NETPOLL_RX_ENABLED; npinfo->rx_np = np; spin_unlock_irqrestore(&npinfo->rx_lock, flags); } @@ -806,7 +797,6 @@ void netpoll_cleanup(struct netpoll *np) if (npinfo->rx_np == np) { spin_lock_irqsave(&npinfo->rx_lock, flags); npinfo->rx_np = NULL; - npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; spin_unlock_irqrestore(&npinfo->rx_lock, flags); } @@ -816,11 +806,7 @@ void netpoll_cleanup(struct netpoll *np) cancel_rearming_delayed_work(&npinfo->tx_work); /* clean after last, unfinished work */ - if (!skb_queue_empty(&npinfo->txq)) { - struct sk_buff *skb; - skb = __skb_dequeue(&npinfo->txq); - kfree_skb(skb); - } + __skb_queue_purge(&npinfo->txq); kfree(npinfo); np->dev->npinfo = NULL; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 285ec3e..eebccdb 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -397,62 +397,6 @@ struct pktgen_thread { #define REMOVE 1 #define FIND 0 -/* This code works around the fact that do_div cannot handle two 64-bit - numbers, and regular 64-bit division doesn't work on x86 kernels. - --Ben -*/ - -#define PG_DIV 0 - -/* This was emailed to LMKL by: Chris Caputo <ccaputo@alt.net> - * Function copied/adapted/optimized from: - * - * nemesis.sourceforge.net/browse/lib/static/intmath/ix86/intmath.c.html - * - * Copyright 1994, University of Cambridge Computer Laboratory - * All Rights Reserved. - * - */ -static inline s64 divremdi3(s64 x, s64 y, int type) -{ - u64 a = (x < 0) ? -x : x; - u64 b = (y < 0) ? -y : y; - u64 res = 0, d = 1; - - if (b > 0) { - while (b < a) { - b <<= 1; - d <<= 1; - } - } - - do { - if (a >= b) { - a -= b; - res += d; - } - b >>= 1; - d >>= 1; - } - while (d); - - if (PG_DIV == type) { - return (((x ^ y) & (1ll << 63)) == 0) ? res : -(s64) res; - } else { - return ((x & (1ll << 63)) == 0) ? a : -(s64) a; - } -} - -/* End of hacks to deal with 64-bit math on x86 */ - -/** Convert to milliseconds */ -static inline __u64 tv_to_ms(const struct timeval *tv) -{ - __u64 ms = tv->tv_usec / 1000; - ms += (__u64) tv->tv_sec * (__u64) 1000; - return ms; -} - /** Convert to micro-seconds */ static inline __u64 tv_to_us(const struct timeval *tv) { @@ -461,51 +405,13 @@ static inline __u64 tv_to_us(const struct timeval *tv) return us; } -static inline __u64 pg_div(__u64 n, __u32 base) -{ - __u64 tmp = n; - do_div(tmp, base); - /* printk("pktgen: pg_div, n: %llu base: %d rv: %llu\n", - n, base, tmp); */ - return tmp; -} - -static inline __u64 pg_div64(__u64 n, __u64 base) -{ - __u64 tmp = n; -/* - * How do we know if the architecture we are running on - * supports division with 64 bit base? - * - */ -#if defined(__sparc_v9__) || defined(__powerpc64__) || defined(__alpha__) || defined(__x86_64__) || defined(__ia64__) - - do_div(tmp, base); -#else - tmp = divremdi3(n, base, PG_DIV); -#endif - return tmp; -} - -static inline __u64 getCurMs(void) -{ - struct timeval tv; - do_gettimeofday(&tv); - return tv_to_ms(&tv); -} - -static inline __u64 getCurUs(void) +static __u64 getCurUs(void) { struct timeval tv; do_gettimeofday(&tv); return tv_to_us(&tv); } -static inline __u64 tv_diff(const struct timeval *a, const struct timeval *b) -{ - return tv_to_us(a) - tv_to_us(b); -} - /* old include end */ static char version[] __initdata = VERSION; @@ -2358,9 +2264,11 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) t = random32() % (imx - imn) + imn; s = htonl(t); - while (LOOPBACK(s) || MULTICAST(s) - || BADCLASS(s) || ZERONET(s) - || LOCAL_MCAST(s)) { + while (ipv4_is_loopback(s) || + ipv4_is_multicast(s) || + ipv4_is_lbcast(s) || + ipv4_is_zeronet(s) || + ipv4_is_local_multicast(s)) { t = random32() % (imx - imn) + imn; s = htonl(t); } diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 45aed75..2d3035d 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -69,8 +69,6 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, return 0; } -EXPORT_SYMBOL(reqsk_queue_alloc); - void __reqsk_queue_destroy(struct request_sock_queue *queue) { struct listen_sock *lopt; @@ -91,8 +89,6 @@ void __reqsk_queue_destroy(struct request_sock_queue *queue) kfree(lopt); } -EXPORT_SYMBOL(__reqsk_queue_destroy); - static inline struct listen_sock *reqsk_queue_yank_listen_sk( struct request_sock_queue *queue) { @@ -134,4 +130,3 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) kfree(lopt); } -EXPORT_SYMBOL(reqsk_queue_destroy); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fed95a3..ddbdde8 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -60,7 +60,6 @@ struct rtnl_link }; static DEFINE_MUTEX(rtnl_mutex); -static struct sock *rtnl; void rtnl_lock(void) { @@ -458,8 +457,9 @@ size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size) return ret; } -int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo) { + struct sock *rtnl = net->rtnl; int err = 0; NETLINK_CB(skb).dst_group = group; @@ -471,14 +471,17 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) return err; } -int rtnl_unicast(struct sk_buff *skb, u32 pid) +int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) { + struct sock *rtnl = net->rtnl; + return nlmsg_unicast(rtnl, skb, pid); } -int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, +int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { + struct sock *rtnl = net->rtnl; int report = 0; if (nlh) @@ -487,8 +490,10 @@ int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, return nlmsg_notify(rtnl, skb, pid, group, report, flags); } -void rtnl_set_sk_err(u32 group, int error) +void rtnl_set_sk_err(struct net *net, u32 group, int error) { + struct sock *rtnl = net->rtnl; + netlink_set_err(rtnl, 0, group, error); } @@ -1186,7 +1191,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) kfree_skb(nskb); goto errout; } - err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); errout: dev_put(dev); @@ -1219,6 +1224,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) { + struct net *net = dev->nd_net; struct sk_buff *skb; int err = -ENOBUFS; @@ -1233,10 +1239,10 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); errout: if (err < 0) - rtnl_set_sk_err(RTNLGRP_LINK, err); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); } /* Protected by RTNL sempahore. */ @@ -1247,6 +1253,7 @@ static int rtattr_max; static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = skb->sk->sk_net; rtnl_doit_func doit; int sz_idx, kind; int min_len; @@ -1275,6 +1282,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EPERM; if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { + struct sock *rtnl; rtnl_dumpit_func dumpit; dumpit = rtnl_get_dumpit(family, type); @@ -1282,6 +1290,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EOPNOTSUPP; __rtnl_unlock(); + rtnl = net->rtnl; err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); rtnl_lock(); return err; @@ -1326,9 +1335,6 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi { struct net_device *dev = ptr; - if (dev->nd_net != &init_net) - return NOTIFY_DONE; - switch (event) { case NETDEV_UNREGISTER: rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); @@ -1354,6 +1360,29 @@ static struct notifier_block rtnetlink_dev_notifier = { .notifier_call = rtnetlink_event, }; + +static int rtnetlink_net_init(struct net *net) +{ + struct sock *sk; + sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, + rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); + if (!sk) + return -ENOMEM; + net->rtnl = sk; + return 0; +} + +static void rtnetlink_net_exit(struct net *net) +{ + netlink_kernel_release(net->rtnl); + net->rtnl = NULL; +} + +static struct pernet_operations rtnetlink_net_ops = { + .init = rtnetlink_net_init, + .exit = rtnetlink_net_exit, +}; + void __init rtnetlink_init(void) { int i; @@ -1366,10 +1395,9 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(&init_net, NETLINK_ROUTE, RTNLGRP_MAX, - rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); - if (rtnl == NULL) + if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); + netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); register_netdevice_notifier(&rtnetlink_dev_notifier); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b628377..98420f9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -52,6 +52,7 @@ #endif #include <linux/string.h> #include <linux/skbuff.h> +#include <linux/splice.h> #include <linux/cache.h> #include <linux/rtnetlink.h> #include <linux/init.h> @@ -71,6 +72,40 @@ static struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; +static void sock_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + struct sk_buff *skb = (struct sk_buff *) buf->private; + + kfree_skb(skb); +} + +static void sock_pipe_buf_get(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + struct sk_buff *skb = (struct sk_buff *) buf->private; + + skb_get(skb); +} + +static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return 1; +} + + +/* Pipe buffer operations for a socket. */ +static struct pipe_buf_operations sock_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = sock_pipe_buf_release, + .steal = sock_pipe_buf_steal, + .get = sock_pipe_buf_get, +}; + /* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always @@ -1122,6 +1157,217 @@ fault: return -EFAULT; } +/* + * Callback from splice_to_pipe(), if we need to release some pages + * at the end of the spd in case we error'ed out in filling the pipe. + */ +static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) +{ + struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private; + + kfree_skb(skb); +} + +/* + * Fill page/offset/length into spd, if it can hold more pages. + */ +static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, + unsigned int len, unsigned int offset, + struct sk_buff *skb) +{ + if (unlikely(spd->nr_pages == PIPE_BUFFERS)) + return 1; + + spd->pages[spd->nr_pages] = page; + spd->partial[spd->nr_pages].len = len; + spd->partial[spd->nr_pages].offset = offset; + spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb); + spd->nr_pages++; + return 0; +} + +/* + * Map linear and fragment data from the skb to spd. Returns number of + * pages mapped. + */ +static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, + unsigned int *total_len, + struct splice_pipe_desc *spd) +{ + unsigned int nr_pages = spd->nr_pages; + unsigned int poff, plen, len, toff, tlen; + int headlen, seg; + + toff = *offset; + tlen = *total_len; + if (!tlen) + goto err; + + /* + * if the offset is greater than the linear part, go directly to + * the fragments. + */ + headlen = skb_headlen(skb); + if (toff >= headlen) { + toff -= headlen; + goto map_frag; + } + + /* + * first map the linear region into the pages/partial map, skipping + * any potential initial offset. + */ + len = 0; + while (len < headlen) { + void *p = skb->data + len; + + poff = (unsigned long) p & (PAGE_SIZE - 1); + plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff); + len += plen; + + if (toff) { + if (plen <= toff) { + toff -= plen; + continue; + } + plen -= toff; + poff += toff; + toff = 0; + } + + plen = min(plen, tlen); + if (!plen) + break; + + /* + * just jump directly to update and return, no point + * in going over fragments when the output is full. + */ + if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb)) + goto done; + + tlen -= plen; + } + + /* + * then map the fragments + */ +map_frag: + for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { + const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; + + plen = f->size; + poff = f->page_offset; + + if (toff) { + if (plen <= toff) { + toff -= plen; + continue; + } + plen -= toff; + poff += toff; + toff = 0; + } + + plen = min(plen, tlen); + if (!plen) + break; + + if (spd_fill_page(spd, f->page, plen, poff, skb)) + break; + + tlen -= plen; + } + +done: + if (spd->nr_pages - nr_pages) { + *offset = 0; + *total_len = tlen; + return 0; + } +err: + return 1; +} + +/* + * Map data from the skb to a pipe. Should handle both the linear part, + * the fragments, and the frag list. It does NOT handle frag lists within + * the frag list, if such a thing exists. We'd probably need to recurse to + * handle that cleanly. + */ +int skb_splice_bits(struct sk_buff *__skb, unsigned int offset, + struct pipe_inode_info *pipe, unsigned int tlen, + unsigned int flags) +{ + struct partial_page partial[PIPE_BUFFERS]; + struct page *pages[PIPE_BUFFERS]; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &sock_pipe_buf_ops, + .spd_release = sock_spd_release, + }; + struct sk_buff *skb; + + /* + * I'd love to avoid the clone here, but tcp_read_sock() + * ignores reference counts and unconditonally kills the sk_buff + * on return from the actor. + */ + skb = skb_clone(__skb, GFP_KERNEL); + if (unlikely(!skb)) + return -ENOMEM; + + /* + * __skb_splice_bits() only fails if the output has no room left, + * so no point in going over the frag_list for the error case. + */ + if (__skb_splice_bits(skb, &offset, &tlen, &spd)) + goto done; + else if (!tlen) + goto done; + + /* + * now see if we have a frag_list to map + */ + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list && tlen; list = list->next) { + if (__skb_splice_bits(list, &offset, &tlen, &spd)) + break; + } + } + +done: + /* + * drop our reference to the clone, the pipe consumption will + * drop the rest. + */ + kfree_skb(skb); + + if (spd.nr_pages) { + int ret; + + /* + * Drop the socket lock, otherwise we have reverse + * locking dependencies between sk_lock and i_mutex + * here as compared to sendfile(). We enter here + * with the socket lock held, and splice_to_pipe() will + * grab the pipe inode lock. For sendfile() emulation, + * we call into ->sendpage() with the i_mutex lock held + * and networking will grab the socket lock. + */ + release_sock(__skb->sk); + ret = splice_to_pipe(pipe, &spd); + lock_sock(__skb->sk); + return ret; + } + + return 0; +} + /** * skb_store_bits - store bits from kernel buffer to skb * @skb: destination buffer diff --git a/net/core/sock.c b/net/core/sock.c index c519b43..1c4b1cd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -154,7 +154,7 @@ static const char *af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , - "sk_lock-27" , "sk_lock-28" , "sk_lock-29" , + "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX" }; @@ -168,7 +168,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , - "slock-27" , "slock-28" , "slock-29" , + "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_MAX" }; @@ -282,6 +282,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) goto out; + if (!sk_rmem_schedule(sk, skb->truesize)) { + err = -ENOBUFS; + goto out; + } + skb->dev = NULL; skb_set_owner_r(skb, sk); @@ -419,6 +424,14 @@ out: return ret; } +static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) +{ + if (valbool) + sock_set_flag(sk, bit); + else + sock_reset_flag(sk, bit); +} + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -463,11 +476,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname, case SO_DEBUG: if (val && !capable(CAP_NET_ADMIN)) { ret = -EACCES; - } - else if (valbool) - sock_set_flag(sk, SOCK_DBG); - else - sock_reset_flag(sk, SOCK_DBG); + } else + sock_valbool_flag(sk, SOCK_DBG, valbool); break; case SO_REUSEADDR: sk->sk_reuse = valbool; @@ -477,10 +487,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, ret = -ENOPROTOOPT; break; case SO_DONTROUTE: - if (valbool) - sock_set_flag(sk, SOCK_LOCALROUTE); - else - sock_reset_flag(sk, SOCK_LOCALROUTE); + sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); break; case SO_BROADCAST: sock_valbool_flag(sk, SOCK_BROADCAST, valbool); @@ -1105,7 +1112,9 @@ void sock_rfree(struct sk_buff *skb) { struct sock *sk = skb->sk; + skb_truesize_check(skb); atomic_sub(skb->truesize, &sk->sk_rmem_alloc); + sk_mem_uncharge(skb->sk, skb->truesize); } @@ -1382,6 +1391,103 @@ int sk_wait_data(struct sock *sk, long *timeo) EXPORT_SYMBOL(sk_wait_data); +/** + * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated + * @sk: socket + * @size: memory size to allocate + * @kind: allocation type + * + * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means + * rmem allocation. This function assumes that protocols which have + * memory_pressure use sk_wmem_queued as write buffer accounting. + */ +int __sk_mem_schedule(struct sock *sk, int size, int kind) +{ + struct proto *prot = sk->sk_prot; + int amt = sk_mem_pages(size); + int allocated; + + sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; + allocated = atomic_add_return(amt, prot->memory_allocated); + + /* Under limit. */ + if (allocated <= prot->sysctl_mem[0]) { + if (prot->memory_pressure && *prot->memory_pressure) + *prot->memory_pressure = 0; + return 1; + } + + /* Under pressure. */ + if (allocated > prot->sysctl_mem[1]) + if (prot->enter_memory_pressure) + prot->enter_memory_pressure(); + + /* Over hard limit. */ + if (allocated > prot->sysctl_mem[2]) + goto suppress_allocation; + + /* guarantee minimum buffer size under pressure */ + if (kind == SK_MEM_RECV) { + if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) + return 1; + } else { /* SK_MEM_SEND */ + if (sk->sk_type == SOCK_STREAM) { + if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) + return 1; + } else if (atomic_read(&sk->sk_wmem_alloc) < + prot->sysctl_wmem[0]) + return 1; + } + + if (prot->memory_pressure) { + if (!*prot->memory_pressure || + prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * + sk_mem_pages(sk->sk_wmem_queued + + atomic_read(&sk->sk_rmem_alloc) + + sk->sk_forward_alloc)) + return 1; + } + +suppress_allocation: + + if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) { + sk_stream_moderate_sndbuf(sk); + + /* Fail only if socket is _under_ its sndbuf. + * In this case we cannot block, so that we have to fail. + */ + if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) + return 1; + } + + /* Alas. Undo changes. */ + sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; + atomic_sub(amt, prot->memory_allocated); + return 0; +} + +EXPORT_SYMBOL(__sk_mem_schedule); + +/** + * __sk_reclaim - reclaim memory_allocated + * @sk: socket + */ +void __sk_mem_reclaim(struct sock *sk) +{ + struct proto *prot = sk->sk_prot; + + atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, + prot->memory_allocated); + sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; + + if (prot->memory_pressure && *prot->memory_pressure && + (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0])) + *prot->memory_pressure = 0; +} + +EXPORT_SYMBOL(__sk_mem_reclaim); + + /* * Set of default routines for initialising struct proto_ops when * the protocol does not support a particular function. In certain @@ -1496,7 +1602,7 @@ static void sock_def_error_report(struct sock *sk) read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); - sk_wake_async(sk,0,POLL_ERR); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); read_unlock(&sk->sk_callback_lock); } @@ -1505,7 +1611,7 @@ static void sock_def_readable(struct sock *sk, int len) read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); - sk_wake_async(sk,1,POLL_IN); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&sk->sk_callback_lock); } @@ -1522,7 +1628,7 @@ static void sock_def_write_space(struct sock *sk) /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) - sk_wake_async(sk, 2, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); @@ -1537,7 +1643,7 @@ void sk_send_sigurg(struct sock *sk) { if (sk->sk_socket && sk->sk_socket->file) if (send_sigurg(&sk->sk_socket->file->f_owner)) - sk_wake_async(sk, 3, POLL_PRI); + sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); } void sk_reset_timer(struct sock *sk, struct timer_list* timer, @@ -1611,6 +1717,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, -1L); atomic_set(&sk->sk_refcnt, 1); + atomic_set(&sk->sk_drops, 0); } void fastcall lock_sock_nested(struct sock *sk, int subclass) @@ -1801,65 +1908,15 @@ EXPORT_SYMBOL(sk_common_release); static DEFINE_RWLOCK(proto_list_lock); static LIST_HEAD(proto_list); -#ifdef CONFIG_SMP -/* - * Define default functions to keep track of inuse sockets per protocol - * Note that often used protocols use dedicated functions to get a speed increase. - * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE) - */ -static void inuse_add(struct proto *prot, int inc) -{ - per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc; -} - -static int inuse_get(const struct proto *prot) -{ - int res = 0, cpu; - for_each_possible_cpu(cpu) - res += per_cpu_ptr(prot->inuse_ptr, cpu)[0]; - return res; -} - -static int inuse_init(struct proto *prot) -{ - if (!prot->inuse_getval || !prot->inuse_add) { - prot->inuse_ptr = alloc_percpu(int); - if (prot->inuse_ptr == NULL) - return -ENOBUFS; - - prot->inuse_getval = inuse_get; - prot->inuse_add = inuse_add; - } - return 0; -} - -static void inuse_fini(struct proto *prot) -{ - if (prot->inuse_ptr != NULL) { - free_percpu(prot->inuse_ptr); - prot->inuse_ptr = NULL; - prot->inuse_getval = NULL; - prot->inuse_add = NULL; - } -} -#else -static inline int inuse_init(struct proto *prot) -{ - return 0; -} - -static inline void inuse_fini(struct proto *prot) -{ -} -#endif - int proto_register(struct proto *prot, int alloc_slab) { char *request_sock_slab_name = NULL; char *timewait_sock_slab_name; - if (inuse_init(prot)) + if (sock_prot_inuse_init(prot) != 0) { + printk(KERN_CRIT "%s: Can't alloc inuse counters!\n", prot->name); goto out; + } if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, @@ -1927,7 +1984,7 @@ out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; out_free_inuse: - inuse_fini(prot); + sock_prot_inuse_free(prot); out: return -ENOBUFS; } @@ -1940,7 +1997,8 @@ void proto_unregister(struct proto *prot) list_del(&prot->node); write_unlock(&proto_list_lock); - inuse_fini(prot); + sock_prot_inuse_free(prot); + if (prot->slab != NULL) { kmem_cache_destroy(prot->slab); prot->slab = NULL; @@ -1967,6 +2025,7 @@ EXPORT_SYMBOL(proto_unregister); #ifdef CONFIG_PROC_FS static void *proto_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(proto_list_lock) { read_lock(&proto_list_lock); return seq_list_start_head(&proto_list, *pos); @@ -1978,6 +2037,7 @@ static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void proto_seq_stop(struct seq_file *seq, void *v) + __releases(proto_list_lock) { read_unlock(&proto_list_lock); } diff --git a/net/core/stream.c b/net/core/stream.c index 755bacb..4a0ad15 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -35,7 +35,7 @@ void sk_stream_write_space(struct sock *sk) if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) - sock_wake_async(sock, 2, POLL_OUT); + sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); } } @@ -172,17 +172,6 @@ do_interrupted: EXPORT_SYMBOL(sk_stream_wait_memory); -void sk_stream_rfree(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - skb_truesize_check(skb); - atomic_sub(skb->truesize, &sk->sk_rmem_alloc); - sk->sk_forward_alloc += skb->truesize; -} - -EXPORT_SYMBOL(sk_stream_rfree); - int sk_stream_error(struct sock *sk, int flags, int err) { if (err == -EPIPE) @@ -194,76 +183,6 @@ int sk_stream_error(struct sock *sk, int flags, int err) EXPORT_SYMBOL(sk_stream_error); -void __sk_stream_mem_reclaim(struct sock *sk) -{ - atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM, - sk->sk_prot->memory_allocated); - sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1; - if (*sk->sk_prot->memory_pressure && - (atomic_read(sk->sk_prot->memory_allocated) < - sk->sk_prot->sysctl_mem[0])) - *sk->sk_prot->memory_pressure = 0; -} - -EXPORT_SYMBOL(__sk_stream_mem_reclaim); - -int sk_stream_mem_schedule(struct sock *sk, int size, int kind) -{ - int amt = sk_stream_pages(size); - - sk->sk_forward_alloc += amt * SK_STREAM_MEM_QUANTUM; - atomic_add(amt, sk->sk_prot->memory_allocated); - - /* Under limit. */ - if (atomic_read(sk->sk_prot->memory_allocated) < sk->sk_prot->sysctl_mem[0]) { - if (*sk->sk_prot->memory_pressure) - *sk->sk_prot->memory_pressure = 0; - return 1; - } - - /* Over hard limit. */ - if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[2]) { - sk->sk_prot->enter_memory_pressure(); - goto suppress_allocation; - } - - /* Under pressure. */ - if (atomic_read(sk->sk_prot->memory_allocated) > sk->sk_prot->sysctl_mem[1]) - sk->sk_prot->enter_memory_pressure(); - - if (kind) { - if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_prot->sysctl_rmem[0]) - return 1; - } else if (sk->sk_wmem_queued < sk->sk_prot->sysctl_wmem[0]) - return 1; - - if (!*sk->sk_prot->memory_pressure || - sk->sk_prot->sysctl_mem[2] > atomic_read(sk->sk_prot->sockets_allocated) * - sk_stream_pages(sk->sk_wmem_queued + - atomic_read(&sk->sk_rmem_alloc) + - sk->sk_forward_alloc)) - return 1; - -suppress_allocation: - - if (!kind) { - sk_stream_moderate_sndbuf(sk); - - /* Fail only if socket is _under_ its sndbuf. - * In this case we cannot block, so that we have to fail. - */ - if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) - return 1; - } - - /* Alas. Undo changes. */ - sk->sk_forward_alloc -= amt * SK_STREAM_MEM_QUANTUM; - atomic_sub(amt, sk->sk_prot->memory_allocated); - return 0; -} - -EXPORT_SYMBOL(sk_stream_mem_schedule); - void sk_stream_kill_queues(struct sock *sk) { /* First the read buffer. */ @@ -276,7 +195,7 @@ void sk_stream_kill_queues(struct sock *sk) BUG_TRAP(skb_queue_empty(&sk->sk_write_queue)); /* Account for returned memory. */ - sk_stream_mem_reclaim(sk); + sk_mem_reclaim(sk); BUG_TRAP(!sk->sk_wmem_queued); BUG_TRAP(!sk->sk_forward_alloc); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 113cc72..130338f 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -10,12 +10,11 @@ #include <linux/module.h> #include <linux/socket.h> #include <linux/netdevice.h> +#include <linux/init.h> #include <net/sock.h> #include <net/xfrm.h> -#ifdef CONFIG_SYSCTL - -ctl_table core_table[] = { +static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { .ctl_name = NET_CORE_WMEM_MAX, @@ -128,7 +127,7 @@ ctl_table core_table[] = { { .ctl_name = NET_CORE_SOMAXCONN, .procname = "somaxconn", - .data = &sysctl_somaxconn, + .data = &init_net.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -152,4 +151,65 @@ ctl_table core_table[] = { { .ctl_name = 0 } }; -#endif +static __net_initdata struct ctl_path net_core_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "core", .ctl_name = NET_CORE, }, + { }, +}; + +static __net_init int sysctl_core_net_init(struct net *net) +{ + struct ctl_table *tbl, *tmp; + + net->sysctl_somaxconn = SOMAXCONN; + + tbl = net_core_table; + if (net != &init_net) { + tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL); + if (tbl == NULL) + goto err_dup; + + for (tmp = tbl; tmp->procname; tmp++) { + if (tmp->data >= (void *)&init_net && + tmp->data < (void *)(&init_net + 1)) + tmp->data += (char *)net - (char *)&init_net; + else + tmp->mode &= ~0222; + } + } + + net->sysctl_core_hdr = register_net_sysctl_table(net, + net_core_path, tbl); + if (net->sysctl_core_hdr == NULL) + goto err_reg; + + return 0; + +err_reg: + if (tbl != net_core_table) + kfree(tbl); +err_dup: + return -ENOMEM; +} + +static __net_exit void sysctl_core_net_exit(struct net *net) +{ + struct ctl_table *tbl; + + tbl = net->sysctl_core_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->sysctl_core_hdr); + BUG_ON(tbl == net_core_table); + kfree(tbl); +} + +static __net_initdata struct pernet_operations sysctl_core_ops = { + .init = sysctl_core_net_init, + .exit = sysctl_core_net_exit, +}; + +static __init int sysctl_core_init(void) +{ + return register_pernet_subsys(&sysctl_core_ops); +} + +__initcall(sysctl_core_init); diff --git a/net/core/utils.c b/net/core/utils.c index 0bf17da4..8031eb5 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -91,17 +91,6 @@ EXPORT_SYMBOL(in_aton); #define IN6PTON_NULL 0x20000000 /* first/tail */ #define IN6PTON_UNKNOWN 0x40000000 -static inline int digit2bin(char c, int delim) -{ - if (c == delim || c == '\0') - return IN6PTON_DELIM; - if (c == '.') - return IN6PTON_DOT; - if (c >= '0' && c <= '9') - return (IN6PTON_DIGIT | (c - '0')); - return IN6PTON_UNKNOWN; -} - static inline int xdigit2bin(char c, int delim) { if (c == delim || c == '\0') @@ -293,3 +282,19 @@ out: } EXPORT_SYMBOL(in6_pton); + +void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, + __be32 from, __be32 to, int pseudohdr) +{ + __be32 diff[] = { ~from, to }; + if (skb->ip_summed != CHECKSUM_PARTIAL) { + *sum = csum_fold(csum_partial(diff, sizeof(diff), + ~csum_unfold(*sum))); + if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + skb->csum = ~csum_partial(diff, sizeof(diff), + ~skb->csum); + } else if (pseudohdr) + *sum = ~csum_fold(csum_partial(diff, sizeof(diff), + csum_unfold(*sum))); +} +EXPORT_SYMBOL(inet_proto_csum_replace4); |