diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 1 | ||||
-rw-r--r-- | net/core/datagram.c | 16 | ||||
-rw-r--r-- | net/core/dev.c | 39 | ||||
-rw-r--r-- | net/core/dv.c | 546 | ||||
-rw-r--r-- | net/core/fib_rules.c | 71 | ||||
-rw-r--r-- | net/core/filter.c | 6 | ||||
-rw-r--r-- | net/core/iovec.c | 4 | ||||
-rw-r--r-- | net/core/neighbour.c | 24 | ||||
-rw-r--r-- | net/core/netpoll.c | 327 | ||||
-rw-r--r-- | net/core/pktgen.c | 68 | ||||
-rw-r--r-- | net/core/request_sock.c | 35 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 60 | ||||
-rw-r--r-- | net/core/skbuff.c | 26 | ||||
-rw-r--r-- | net/core/sock.c | 13 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 14 | ||||
-rw-r--r-- | net/core/utils.c | 10 |
16 files changed, 382 insertions, 878 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 1195680..73272d5 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -12,7 +12,6 @@ obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ obj-$(CONFIG_XFRM) += flow.o obj-$(CONFIG_SYSFS) += net-sysfs.o -obj-$(CONFIG_NET_DIVERT) += dv.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_WIRELESS_EXT) += wireless.o obj-$(CONFIG_NETPOLL) += netpoll.o diff --git a/net/core/datagram.c b/net/core/datagram.c index f558c61..797fdd4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -321,7 +321,7 @@ fault: static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 __user *to, int len, - unsigned int *csump) + __wsum *csump) { int start = skb_headlen(skb); int pos = 0; @@ -350,7 +350,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; int err = 0; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -386,7 +386,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, end = start + list->len; if ((copy = end - offset) > 0) { - unsigned int csum2 = 0; + __wsum csum2 = 0; if (copy > len) copy = len; if (skb_copy_and_csum_datagram(list, @@ -411,11 +411,11 @@ fault: return -EFAULT; } -unsigned int __skb_checksum_complete(struct sk_buff *skb) +__sum16 __skb_checksum_complete(struct sk_buff *skb) { - unsigned int sum; + __sum16 sum; - sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); + sum = csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); @@ -441,7 +441,7 @@ EXPORT_SYMBOL(__skb_checksum_complete); int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, struct iovec *iov) { - unsigned int csum; + __wsum csum; int chunk = skb->len - hlen; /* Skip filled elements. @@ -460,7 +460,7 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base, chunk, &csum)) goto fault; - if ((unsigned short)csum_fold(csum)) + if (csum_fold(csum)) goto csum_error; if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) netdev_rx_csum_fault(skb->dev); diff --git a/net/core/dev.c b/net/core/dev.c index 81c426a..59d058a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -98,7 +98,6 @@ #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/if_bridge.h> -#include <linux/divert.h> #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> @@ -1170,7 +1169,7 @@ EXPORT_SYMBOL(netif_device_attach); */ int skb_checksum_help(struct sk_buff *skb) { - unsigned int csum; + __wsum csum; int ret = 0, offset = skb->h.raw - skb->data; if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -1192,9 +1191,9 @@ int skb_checksum_help(struct sk_buff *skb) offset = skb->tail - skb->h.raw; BUG_ON(offset <= 0); - BUG_ON(skb->csum + 2 > offset); + BUG_ON(skb->csum_offset + 2 > offset); - *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum); + *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum); out_set_summed: skb->ip_summed = CHECKSUM_NONE; @@ -1216,7 +1215,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_type *ptype; - int type = skb->protocol; + __be16 type = skb->protocol; int err; BUG_ON(skb_shinfo(skb)->frag_list); @@ -1767,7 +1766,7 @@ int netif_receive_skb(struct sk_buff *skb) struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; int ret = NET_RX_DROP; - unsigned short type; + __be16 type; /* if we've gotten here through NAPI, check netpoll */ if (skb->dev->poll && netpoll_rx(skb)) @@ -1827,8 +1826,6 @@ int netif_receive_skb(struct sk_buff *skb) ncls: #endif - handle_diverter(skb); - if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; @@ -2898,10 +2895,6 @@ int register_netdevice(struct net_device *dev) spin_lock_init(&dev->ingress_lock); #endif - ret = alloc_divert_blk(dev); - if (ret) - goto out; - dev->iflink = -1; /* Init, if this function is available */ @@ -2910,13 +2903,13 @@ int register_netdevice(struct net_device *dev) if (ret) { if (ret > 0) ret = -EIO; - goto out_err; + goto out; } } if (!dev_valid_name(dev->name)) { ret = -EINVAL; - goto out_err; + goto out; } dev->ifindex = dev_new_index(); @@ -2930,7 +2923,7 @@ int register_netdevice(struct net_device *dev) = hlist_entry(p, struct net_device, name_hlist); if (!strncmp(d->name, dev->name, IFNAMSIZ)) { ret = -EEXIST; - goto out_err; + goto out; } } @@ -2974,7 +2967,7 @@ int register_netdevice(struct net_device *dev) ret = netdev_register_sysfs(dev); if (ret) - goto out_err; + goto out; dev->reg_state = NETREG_REGISTERED; /* @@ -3001,9 +2994,6 @@ int register_netdevice(struct net_device *dev) out: return ret; -out_err: - free_divert_blk(dev); - goto out; } /** @@ -3035,15 +3025,6 @@ int register_netdev(struct net_device *dev) goto out; } - /* - * Back compatibility hook. Kill this one in 2.5 - */ - if (dev->name[0] == 0 || dev->name[0] == ' ') { - err = dev_alloc_name(dev, "eth%d"); - if (err < 0) - goto out; - } - err = register_netdevice(dev); out: rtnl_unlock(); @@ -3329,8 +3310,6 @@ int unregister_netdevice(struct net_device *dev) /* Notifier chain MUST detach us from master device. */ BUG_TRAP(!dev->master); - free_divert_blk(dev); - /* Finish processing unregister after unlock */ net_set_todo(dev); diff --git a/net/core/dv.c b/net/core/dv.c deleted file mode 100644 index 29ee77f1..0000000 --- a/net/core/dv.c +++ /dev/null @@ -1,546 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Generic frame diversion - * - * Authors: - * Benoit LOCHER: initial integration within the kernel with support for ethernet - * Dave Miller: improvement on the code (correctness, performance and source files) - * - */ -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/skbuff.h> -#include <linux/capability.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <net/dst.h> -#include <net/arp.h> -#include <net/sock.h> -#include <net/ipv6.h> -#include <net/ip.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/checksum.h> -#include <linux/divert.h> -#include <linux/sockios.h> - -const char sysctl_divert_version[32]="0.46"; /* Current version */ - -static int __init dv_init(void) -{ - return 0; -} -module_init(dv_init); - -/* - * Allocate a divert_blk for a device. This must be an ethernet nic. - */ -int alloc_divert_blk(struct net_device *dev) -{ - int alloc_size = (sizeof(struct divert_blk) + 3) & ~3; - - dev->divert = NULL; - if (dev->type == ARPHRD_ETHER) { - dev->divert = kzalloc(alloc_size, GFP_KERNEL); - if (dev->divert == NULL) { - printk(KERN_INFO "divert: unable to allocate divert_blk for %s\n", - dev->name); - return -ENOMEM; - } - dev_hold(dev); - } - - return 0; -} - -/* - * Free a divert_blk allocated by the above function, if it was - * allocated on that device. - */ -void free_divert_blk(struct net_device *dev) -{ - if (dev->divert) { - kfree(dev->divert); - dev->divert=NULL; - dev_put(dev); - } -} - -/* - * Adds a tcp/udp (source or dest) port to an array - */ -static int add_port(u16 ports[], u16 port) -{ - int i; - - if (port == 0) - return -EINVAL; - - /* Storing directly in network format for performance, - * thanks Dave :) - */ - port = htons(port); - - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - if (ports[i] == port) - return -EALREADY; - } - - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - if (ports[i] == 0) { - ports[i] = port; - return 0; - } - } - - return -ENOBUFS; -} - -/* - * Removes a port from an array tcp/udp (source or dest) - */ -static int remove_port(u16 ports[], u16 port) -{ - int i; - - if (port == 0) - return -EINVAL; - - /* Storing directly in network format for performance, - * thanks Dave ! - */ - port = htons(port); - - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - if (ports[i] == port) { - ports[i] = 0; - return 0; - } - } - - return -EINVAL; -} - -/* Some basic sanity checks on the arguments passed to divert_ioctl() */ -static int check_args(struct divert_cf *div_cf, struct net_device **dev) -{ - char devname[32]; - int ret; - - if (dev == NULL) - return -EFAULT; - - /* GETVERSION: all other args are unused */ - if (div_cf->cmd == DIVCMD_GETVERSION) - return 0; - - /* Network device index should reasonably be between 0 and 1000 :) */ - if (div_cf->dev_index < 0 || div_cf->dev_index > 1000) - return -EINVAL; - - /* Let's try to find the ifname */ - sprintf(devname, "eth%d", div_cf->dev_index); - *dev = dev_get_by_name(devname); - - /* dev should NOT be null */ - if (*dev == NULL) - return -EINVAL; - - ret = 0; - - /* user issuing the ioctl must be a super one :) */ - if (!capable(CAP_SYS_ADMIN)) { - ret = -EPERM; - goto out; - } - - /* Device must have a divert_blk member NOT null */ - if ((*dev)->divert == NULL) - ret = -EINVAL; -out: - dev_put(*dev); - return ret; -} - -/* - * control function of the diverter - */ -#if 0 -#define DVDBG(a) \ - printk(KERN_DEBUG "divert_ioctl() line %d %s\n", __LINE__, (a)) -#else -#define DVDBG(a) -#endif - -int divert_ioctl(unsigned int cmd, struct divert_cf __user *arg) -{ - struct divert_cf div_cf; - struct divert_blk *div_blk; - struct net_device *dev; - int ret; - - switch (cmd) { - case SIOCGIFDIVERT: - DVDBG("SIOCGIFDIVERT, copy_from_user"); - if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf))) - return -EFAULT; - DVDBG("before check_args"); - ret = check_args(&div_cf, &dev); - if (ret) - return ret; - DVDBG("after checkargs"); - div_blk = dev->divert; - - DVDBG("befre switch()"); - switch (div_cf.cmd) { - case DIVCMD_GETSTATUS: - /* Now, just give the user the raw divert block - * for him to play with :) - */ - if (copy_to_user(div_cf.arg1.ptr, dev->divert, - sizeof(struct divert_blk))) - return -EFAULT; - break; - - case DIVCMD_GETVERSION: - DVDBG("GETVERSION: checking ptr"); - if (div_cf.arg1.ptr == NULL) - return -EINVAL; - DVDBG("GETVERSION: copying data to userland"); - if (copy_to_user(div_cf.arg1.ptr, - sysctl_divert_version, 32)) - return -EFAULT; - DVDBG("GETVERSION: data copied"); - break; - - default: - return -EINVAL; - } - - break; - - case SIOCSIFDIVERT: - if (copy_from_user(&div_cf, arg, sizeof(struct divert_cf))) - return -EFAULT; - - ret = check_args(&div_cf, &dev); - if (ret) - return ret; - - div_blk = dev->divert; - - switch(div_cf.cmd) { - case DIVCMD_RESET: - div_blk->divert = 0; - div_blk->protos = DIVERT_PROTO_NONE; - memset(div_blk->tcp_dst, 0, - MAX_DIVERT_PORTS * sizeof(u16)); - memset(div_blk->tcp_src, 0, - MAX_DIVERT_PORTS * sizeof(u16)); - memset(div_blk->udp_dst, 0, - MAX_DIVERT_PORTS * sizeof(u16)); - memset(div_blk->udp_src, 0, - MAX_DIVERT_PORTS * sizeof(u16)); - return 0; - - case DIVCMD_DIVERT: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->divert) - return -EALREADY; - div_blk->divert = 1; - break; - - case DIVARG1_DISABLE: - if (!div_blk->divert) - return -EALREADY; - div_blk->divert = 0; - break; - - default: - return -EINVAL; - } - - break; - - case DIVCMD_IP: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->protos & DIVERT_PROTO_IP) - return -EALREADY; - div_blk->protos |= DIVERT_PROTO_IP; - break; - - case DIVARG1_DISABLE: - if (!(div_blk->protos & DIVERT_PROTO_IP)) - return -EALREADY; - div_blk->protos &= ~DIVERT_PROTO_IP; - break; - - default: - return -EINVAL; - } - - break; - - case DIVCMD_TCP: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->protos & DIVERT_PROTO_TCP) - return -EALREADY; - div_blk->protos |= DIVERT_PROTO_TCP; - break; - - case DIVARG1_DISABLE: - if (!(div_blk->protos & DIVERT_PROTO_TCP)) - return -EALREADY; - div_blk->protos &= ~DIVERT_PROTO_TCP; - break; - - default: - return -EINVAL; - } - - break; - - case DIVCMD_TCPDST: - switch(div_cf.arg1.int32) { - case DIVARG1_ADD: - return add_port(div_blk->tcp_dst, - div_cf.arg2.uint16); - - case DIVARG1_REMOVE: - return remove_port(div_blk->tcp_dst, - div_cf.arg2.uint16); - - default: - return -EINVAL; - } - - break; - - case DIVCMD_TCPSRC: - switch(div_cf.arg1.int32) { - case DIVARG1_ADD: - return add_port(div_blk->tcp_src, - div_cf.arg2.uint16); - - case DIVARG1_REMOVE: - return remove_port(div_blk->tcp_src, - div_cf.arg2.uint16); - - default: - return -EINVAL; - } - - break; - - case DIVCMD_UDP: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->protos & DIVERT_PROTO_UDP) - return -EALREADY; - div_blk->protos |= DIVERT_PROTO_UDP; - break; - - case DIVARG1_DISABLE: - if (!(div_blk->protos & DIVERT_PROTO_UDP)) - return -EALREADY; - div_blk->protos &= ~DIVERT_PROTO_UDP; - break; - - default: - return -EINVAL; - } - - break; - - case DIVCMD_UDPDST: - switch(div_cf.arg1.int32) { - case DIVARG1_ADD: - return add_port(div_blk->udp_dst, - div_cf.arg2.uint16); - - case DIVARG1_REMOVE: - return remove_port(div_blk->udp_dst, - div_cf.arg2.uint16); - - default: - return -EINVAL; - } - - break; - - case DIVCMD_UDPSRC: - switch(div_cf.arg1.int32) { - case DIVARG1_ADD: - return add_port(div_blk->udp_src, - div_cf.arg2.uint16); - - case DIVARG1_REMOVE: - return remove_port(div_blk->udp_src, - div_cf.arg2.uint16); - - default: - return -EINVAL; - } - - break; - - case DIVCMD_ICMP: - switch(div_cf.arg1.int32) { - case DIVARG1_ENABLE: - if (div_blk->protos & DIVERT_PROTO_ICMP) - return -EALREADY; - div_blk->protos |= DIVERT_PROTO_ICMP; - break; - - case DIVARG1_DISABLE: - if (!(div_blk->protos & DIVERT_PROTO_ICMP)) - return -EALREADY; - div_blk->protos &= ~DIVERT_PROTO_ICMP; - break; - - default: - return -EINVAL; - } - - break; - - default: - return -EINVAL; - } - - break; - - default: - return -EINVAL; - } - - return 0; -} - - -/* - * Check if packet should have its dest mac address set to the box itself - * for diversion - */ - -#define ETH_DIVERT_FRAME(skb) \ - memcpy(eth_hdr(skb), skb->dev->dev_addr, ETH_ALEN); \ - skb->pkt_type=PACKET_HOST - -void divert_frame(struct sk_buff *skb) -{ - struct ethhdr *eth = eth_hdr(skb); - struct iphdr *iph; - struct tcphdr *tcph; - struct udphdr *udph; - struct divert_blk *divert = skb->dev->divert; - int i, src, dst; - unsigned char *skb_data_end = skb->data + skb->len; - - /* Packet is already aimed at us, return */ - if (!compare_ether_addr(eth->h_dest, skb->dev->dev_addr)) - return; - - /* proto is not IP, do nothing */ - if (eth->h_proto != htons(ETH_P_IP)) - return; - - /* Divert all IP frames ? */ - if (divert->protos & DIVERT_PROTO_IP) { - ETH_DIVERT_FRAME(skb); - return; - } - - /* Check for possible (maliciously) malformed IP frame (thanks Dave) */ - iph = (struct iphdr *) skb->data; - if (((iph->ihl<<2)+(unsigned char*)(iph)) >= skb_data_end) { - printk(KERN_INFO "divert: malformed IP packet !\n"); - return; - } - - switch (iph->protocol) { - /* Divert all ICMP frames ? */ - case IPPROTO_ICMP: - if (divert->protos & DIVERT_PROTO_ICMP) { - ETH_DIVERT_FRAME(skb); - return; - } - break; - - /* Divert all TCP frames ? */ - case IPPROTO_TCP: - if (divert->protos & DIVERT_PROTO_TCP) { - ETH_DIVERT_FRAME(skb); - return; - } - - /* Check for possible (maliciously) malformed IP - * frame (thanx Dave) - */ - tcph = (struct tcphdr *) - (((unsigned char *)iph) + (iph->ihl<<2)); - if (((unsigned char *)(tcph+1)) >= skb_data_end) { - printk(KERN_INFO "divert: malformed TCP packet !\n"); - return; - } - - /* Divert some tcp dst/src ports only ?*/ - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - dst = divert->tcp_dst[i]; - src = divert->tcp_src[i]; - if ((dst && dst == tcph->dest) || - (src && src == tcph->source)) { - ETH_DIVERT_FRAME(skb); - return; - } - } - break; - - /* Divert all UDP frames ? */ - case IPPROTO_UDP: - if (divert->protos & DIVERT_PROTO_UDP) { - ETH_DIVERT_FRAME(skb); - return; - } - - /* Check for possible (maliciously) malformed IP - * packet (thanks Dave) - */ - udph = (struct udphdr *) - (((unsigned char *)iph) + (iph->ihl<<2)); - if (((unsigned char *)(udph+1)) >= skb_data_end) { - printk(KERN_INFO - "divert: malformed UDP packet !\n"); - return; - } - - /* Divert some udp dst/src ports only ? */ - for (i = 0; i < MAX_DIVERT_PORTS; i++) { - dst = divert->udp_dst[i]; - src = divert->udp_src[i]; - if ((dst && dst == udph->dest) || - (src && src == udph->source)) { - ETH_DIVERT_FRAME(skb); - return; - } - } - break; - } -} diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 6b0e63c..1df6cd45 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -107,6 +107,22 @@ out: EXPORT_SYMBOL_GPL(fib_rules_unregister); +static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, + struct flowi *fl, int flags) +{ + int ret = 0; + + if (rule->ifindex && (rule->ifindex != fl->iif)) + goto out; + + if ((rule->mark ^ fl->mark) & rule->mark_mask) + goto out; + + ret = ops->match(rule, fl, flags); +out: + return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; +} + int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) { @@ -116,10 +132,7 @@ int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, rcu_read_lock(); list_for_each_entry_rcu(rule, ops->rules_list, list) { - if (rule->ifindex && (rule->ifindex != fl->iif)) - continue; - - if (!ops->match(rule, fl, flags)) + if (!fib_rule_match(rule, ops, fl, flags)) continue; err = ops->action(rule, fl, flags, arg); @@ -179,6 +192,18 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->ifindex = dev->ifindex; } + if (tb[FRA_FWMARK]) { + rule->mark = nla_get_u32(tb[FRA_FWMARK]); + if (rule->mark) + /* compatibility: if the mark value is non-zero all bits + * are compared unless a mask is explicitly specified. + */ + rule->mark_mask = 0xFFFFFFFF; + } + + if (tb[FRA_FWMASK]) + rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]); + rule->action = frh->action; rule->flags = frh->flags; rule->table = frh_get_table(frh, tb); @@ -250,6 +275,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) nla_strcmp(tb[FRA_IFNAME], rule->ifname)) continue; + if (tb[FRA_FWMARK] && + (rule->mark != nla_get_u32(tb[FRA_FWMARK]))) + continue; + + if (tb[FRA_FWMASK] && + (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -273,6 +306,22 @@ errout: return err; } +static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + struct fib_rule *rule) +{ + size_t payload = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)) + + nla_total_size(IFNAMSIZ) /* FRA_IFNAME */ + + nla_total_size(4) /* FRA_PRIORITY */ + + nla_total_size(4) /* FRA_TABLE */ + + nla_total_size(4) /* FRA_FWMARK */ + + nla_total_size(4); /* FRA_FWMASK */ + + if (ops->nlmsg_payload) + payload += ops->nlmsg_payload(rule); + + return payload; +} + static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, u32 pid, u32 seq, int type, int flags, struct fib_rules_ops *ops) @@ -298,6 +347,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->pref) NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); + if (rule->mark) + NLA_PUT_U32(skb, FRA_FWMARK, rule->mark); + + if (rule->mark_mask || rule->mark) + NLA_PUT_U32(skb, FRA_FWMASK, rule->mark_mask); + if (ops->fill(rule, skb, nlh, frh) < 0) goto nla_put_failure; @@ -345,15 +400,13 @@ static void notify_rule_change(int event, struct fib_rule *rule, struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL); if (skb == NULL) goto errout; err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in fib_rule_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); errout: diff --git a/net/core/filter.c b/net/core/filter.c index 6732782..0df843b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -178,7 +178,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int load_w: ptr = load_pointer(skb, k, 4, &tmp); if (ptr != NULL) { - A = ntohl(get_unaligned((u32 *)ptr)); + A = ntohl(get_unaligned((__be32 *)ptr)); continue; } break; @@ -187,7 +187,7 @@ load_w: load_h: ptr = load_pointer(skb, k, 2, &tmp); if (ptr != NULL) { - A = ntohs(get_unaligned((u16 *)ptr)); + A = ntohs(get_unaligned((__be16 *)ptr)); continue; } break; @@ -261,7 +261,7 @@ load_b: */ switch (k-SKF_AD_OFF) { case SKF_AD_PROTOCOL: - A = htons(skb->protocol); + A = ntohs(skb->protocol); continue; case SKF_AD_PKTTYPE: A = skb->pkt_type; diff --git a/net/core/iovec.c b/net/core/iovec.c index 65e4b56..04b249c 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -158,9 +158,9 @@ int memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, * call to this function will be unaligned also. */ int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov, - int offset, unsigned int len, int *csump) + int offset, unsigned int len, __wsum *csump) { - int csum = *csump; + __wsum csum = *csump; int partial_cnt = 0, err = 0; /* Skip over the finished iovecs */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b4b4783..ba509a4 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1266,10 +1266,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { - struct neigh_parms *p = kmalloc(sizeof(*p), GFP_KERNEL); + struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); if (p) { - memcpy(p, &tbl->parms, sizeof(*p)); p->tbl = tbl; atomic_set(&p->refcnt, 1); INIT_RCU_HEAD(&p->rcu_head); @@ -2410,20 +2409,27 @@ static struct file_operations neigh_stat_seq_fops = { #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_ARPD +static inline size_t neigh_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ + + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ + + nla_total_size(sizeof(struct nda_cacheinfo)) + + nla_total_size(4); /* NDA_PROBES */ +} + static void __neigh_notify(struct neighbour *n, int type, int flags) { struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; err = neigh_fill_info(skb, n, 0, 0, type, flags); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in neigh_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); errout: @@ -2618,14 +2624,14 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, int p_id, int pdev_id, char *p_name, proc_handler *handler, ctl_handler *strategy) { - struct neigh_sysctl_table *t = kmalloc(sizeof(*t), GFP_KERNEL); + struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template, + sizeof(*t), GFP_KERNEL); const char *dev_name_source = NULL; char *dev_name = NULL; int err = 0; if (!t) return -ENOBUFS; - memcpy(t, &neigh_sysctl_template, sizeof(*t)); t->neigh_vars[0].data = &p->mcast_probes; t->neigh_vars[1].data = &p->ucast_probes; t->neigh_vars[2].data = &p->app_probes; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 63f24c9..b3c559b 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -34,18 +34,12 @@ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 #define MAX_QUEUE_DEPTH (MAX_SKBS / 2) -#define MAX_RETRIES 20000 -static DEFINE_SPINLOCK(skb_list_lock); -static int nr_skbs; -static struct sk_buff *skbs; - -static DEFINE_SPINLOCK(queue_lock); -static int queue_depth; -static struct sk_buff *queue_head, *queue_tail; +static struct sk_buff_head skb_pool; static atomic_t trapped; +#define USEC_PER_POLL 50 #define NETPOLL_RX_ENABLED 1 #define NETPOLL_RX_DROP 2 @@ -58,52 +52,34 @@ static void arp_reply(struct sk_buff *skb); static void queue_process(struct work_struct *work) { - unsigned long flags; + struct netpoll_info *npinfo = + container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; - while (queue_head) { - spin_lock_irqsave(&queue_lock, flags); - - skb = queue_head; - queue_head = skb->next; - if (skb == queue_tail) - queue_head = NULL; - - queue_depth--; - - spin_unlock_irqrestore(&queue_lock, flags); - - dev_queue_xmit(skb); - } -} + while ((skb = skb_dequeue(&npinfo->txq))) { + struct net_device *dev = skb->dev; -static DECLARE_WORK(send_queue, queue_process); + if (!netif_device_present(dev) || !netif_running(dev)) { + __kfree_skb(skb); + continue; + } -void netpoll_queue(struct sk_buff *skb) -{ - unsigned long flags; + netif_tx_lock_bh(dev); + if (netif_queue_stopped(dev) || + dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + skb_queue_head(&npinfo->txq, skb); + netif_tx_unlock_bh(dev); - if (queue_depth == MAX_QUEUE_DEPTH) { - __kfree_skb(skb); - return; + schedule_delayed_work(&npinfo->tx_work, HZ/10); + return; + } } - - spin_lock_irqsave(&queue_lock, flags); - if (!queue_head) - queue_head = skb; - else - queue_tail->next = skb; - queue_tail = skb; - queue_depth++; - spin_unlock_irqrestore(&queue_lock, flags); - - schedule_work(&send_queue); } -static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, u32 saddr, u32 daddr) +static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, + unsigned short ulen, __be32 saddr, __be32 daddr) { - unsigned int psum; + __wsum psum; if (uh->check == 0 || skb->ip_summed == CHECKSUM_UNNECESSARY) return 0; @@ -111,7 +87,7 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); if (skb->ip_summed == CHECKSUM_COMPLETE && - !(u16)csum_fold(csum_add(psum, skb->csum))) + !csum_fold(csum_add(psum, skb->csum))) return 0; skb->csum = psum; @@ -167,12 +143,11 @@ static void service_arp_queue(struct netpoll_info *npi) arp_reply(skb); skb = skb_dequeue(&npi->arp_tx); } - return; } void netpoll_poll(struct netpoll *np) { - if(!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) + if (!np->dev || !netif_running(np->dev) || !np->dev->poll_controller) return; /* Process pending work on NIC */ @@ -190,17 +165,15 @@ static void refill_skbs(void) struct sk_buff *skb; unsigned long flags; - spin_lock_irqsave(&skb_list_lock, flags); - while (nr_skbs < MAX_SKBS) { + spin_lock_irqsave(&skb_pool.lock, flags); + while (skb_pool.qlen < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; - skb->next = skbs; - skbs = skb; - nr_skbs++; + __skb_queue_tail(&skb_pool, skb); } - spin_unlock_irqrestore(&skb_list_lock, flags); + spin_unlock_irqrestore(&skb_pool.lock, flags); } static void zap_completion_queue(void) @@ -219,7 +192,7 @@ static void zap_completion_queue(void) while (clist != NULL) { struct sk_buff *skb = clist; clist = clist->next; - if(skb->destructor) + if (skb->destructor) dev_kfree_skb_any(skb); /* put this one back */ else __kfree_skb(skb); @@ -229,38 +202,25 @@ static void zap_completion_queue(void) put_cpu_var(softnet_data); } -static struct sk_buff * find_skb(struct netpoll *np, int len, int reserve) +static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) { - int once = 1, count = 0; - unsigned long flags; - struct sk_buff *skb = NULL; + int count = 0; + struct sk_buff *skb; zap_completion_queue(); + refill_skbs(); repeat: - if (nr_skbs < MAX_SKBS) - refill_skbs(); skb = alloc_skb(len, GFP_ATOMIC); + if (!skb) + skb = skb_dequeue(&skb_pool); if (!skb) { - spin_lock_irqsave(&skb_list_lock, flags); - skb = skbs; - if (skb) { - skbs = skb->next; - skb->next = NULL; - nr_skbs--; + if (++count < 10) { + netpoll_poll(np); + goto repeat; } - spin_unlock_irqrestore(&skb_list_lock, flags); - } - - if(!skb) { - count++; - if (once && (count == 1000000)) { - printk("out of netpoll skbs!\n"); - once = 0; - } - netpoll_poll(np); - goto repeat; + return NULL; } atomic_set(&skb->users, 1); @@ -270,50 +230,40 @@ repeat: static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { - int status; - struct netpoll_info *npinfo; - - if (!np || !np->dev || !netif_running(np->dev)) { - __kfree_skb(skb); - return; - } - - npinfo = np->dev->npinfo; - - /* avoid recursion */ - if (npinfo->poll_owner == smp_processor_id() || - np->dev->xmit_lock_owner == smp_processor_id()) { - if (np->drop) - np->drop(skb); - else - __kfree_skb(skb); - return; - } - - do { - npinfo->tries--; - netif_tx_lock(np->dev); + int status = NETDEV_TX_BUSY; + unsigned long tries; + struct net_device *dev = np->dev; + struct netpoll_info *npinfo = np->dev->npinfo; + + if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { + __kfree_skb(skb); + return; + } + + /* don't get messages out of order, and no recursion */ + if (skb_queue_len(&npinfo->txq) == 0 && + npinfo->poll_owner != smp_processor_id() && + netif_tx_trylock(dev)) { + /* try until next clock tick */ + for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { + if (!netif_queue_stopped(dev)) + status = dev->hard_start_xmit(skb, dev); - /* - * network drivers do not expect to be called if the queue is - * stopped. - */ - status = NETDEV_TX_BUSY; - if (!netif_queue_stopped(np->dev)) - status = np->dev->hard_start_xmit(skb, np->dev); + if (status == NETDEV_TX_OK) + break; - netif_tx_unlock(np->dev); + /* tickle device maybe there is some cleanup */ + netpoll_poll(np); - /* success */ - if(!status) { - npinfo->tries = MAX_RETRIES; /* reset */ - return; + udelay(USEC_PER_POLL); } + netif_tx_unlock(dev); + } - /* transmit busy */ - netpoll_poll(np); - udelay(50); - } while (npinfo->tries > 0); + if (status != NETDEV_TX_OK) { + skb_queue_tail(&npinfo->txq, skb); + schedule_delayed_work(&npinfo->tx_work,0); + } } void netpoll_send_udp(struct netpoll *np, const char *msg, int len) @@ -345,7 +295,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udp_len, IPPROTO_UDP, csum_partial((unsigned char *)udph, udp_len, 0)); if (udph->check == 0) - udph->check = -1; + udph->check = CSUM_MANGLED_0; skb->nh.iph = iph = (struct iphdr *)skb_push(skb, sizeof(*iph)); @@ -379,7 +329,7 @@ static void arp_reply(struct sk_buff *skb) struct arphdr *arp; unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; - u32 sip, tip; + __be32 sip, tip; struct sk_buff *send_skb; struct netpoll *np = NULL; @@ -431,8 +381,8 @@ static void arp_reply(struct sk_buff *skb) if (np->dev->hard_header && np->dev->hard_header(send_skb, skb->dev, ptype, - np->remote_mac, np->local_mac, - send_skb->len) < 0) { + np->remote_mac, np->local_mac, + send_skb->len) < 0) { kfree_skb(send_skb); return; } @@ -470,7 +420,6 @@ int __netpoll_rx(struct sk_buff *skb) struct netpoll_info *npi = skb->dev->npinfo; struct netpoll *np = npi->rx_np; - if (!np) goto out; if (skb->dev->type != ARPHRD_ETHER) @@ -543,47 +492,47 @@ int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; - if(*cur != '@') { + if (*cur != '@') { if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; - *delim=0; - np->local_port=simple_strtol(cur, NULL, 10); - cur=delim; + *delim = 0; + np->local_port = simple_strtol(cur, NULL, 10); + cur = delim; } cur++; printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); - if(*cur != '/') { + if (*cur != '/') { if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; - *delim=0; - np->local_ip=ntohl(in_aton(cur)); - cur=delim; + *delim = 0; + np->local_ip = ntohl(in_aton(cur)); + cur = delim; printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", np->name, HIPQUAD(np->local_ip)); } cur++; - if ( *cur != ',') { + if (*cur != ',') { /* parse out dev name */ if ((delim = strchr(cur, ',')) == NULL) goto parse_failed; - *delim=0; + *delim = 0; strlcpy(np->dev_name, cur, sizeof(np->dev_name)); - cur=delim; + cur = delim; } cur++; printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name); - if ( *cur != '@' ) { + if (*cur != '@') { /* dst port */ if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; - *delim=0; - np->remote_port=simple_strtol(cur, NULL, 10); - cur=delim; + *delim = 0; + np->remote_port = simple_strtol(cur, NULL, 10); + cur = delim; } cur++; printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port); @@ -591,42 +540,41 @@ int netpoll_parse_options(struct netpoll *np, char *opt) /* dst ip */ if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; - *delim=0; - np->remote_ip=ntohl(in_aton(cur)); - cur=delim+1; + *delim = 0; + np->remote_ip = ntohl(in_aton(cur)); + cur = delim + 1; printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->remote_ip)); + np->name, HIPQUAD(np->remote_ip)); - if( *cur != 0 ) - { + if (*cur != 0) { /* MAC address */ if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[0]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[0] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[1]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[1] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[2]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[2] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[3]=simple_strtol(cur, NULL, 16); - cur=delim+1; + *delim = 0; + np->remote_mac[3] = simple_strtol(cur, NULL, 16); + cur = delim + 1; if ((delim = strchr(cur, ':')) == NULL) goto parse_failed; - *delim=0; - np->remote_mac[4]=simple_strtol(cur, NULL, 16); - cur=delim+1; - np->remote_mac[5]=simple_strtol(cur, NULL, 16); + *delim = 0; + np->remote_mac[4] = simple_strtol(cur, NULL, 16); + cur = delim + 1; + np->remote_mac[5] = simple_strtol(cur, NULL, 16); } printk(KERN_INFO "%s: remote ethernet address " @@ -653,34 +601,44 @@ int netpoll_setup(struct netpoll *np) struct in_device *in_dev; struct netpoll_info *npinfo; unsigned long flags; + int err; if (np->dev_name) ndev = dev_get_by_name(np->dev_name); if (!ndev) { printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", np->name, np->dev_name); - return -1; + return -ENODEV; } np->dev = ndev; if (!ndev->npinfo) { npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); - if (!npinfo) + if (!npinfo) { + err = -ENOMEM; goto release; + } npinfo->rx_flags = 0; npinfo->rx_np = NULL; spin_lock_init(&npinfo->poll_lock); npinfo->poll_owner = -1; - npinfo->tries = MAX_RETRIES; + spin_lock_init(&npinfo->rx_lock); skb_queue_head_init(&npinfo->arp_tx); - } else + skb_queue_head_init(&npinfo->txq); + INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); + + atomic_set(&npinfo->refcnt, 1); + } else { npinfo = ndev->npinfo; + atomic_inc(&npinfo->refcnt); + } if (!ndev->poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); + err = -ENOTSUPP; goto release; } @@ -691,13 +649,14 @@ int netpoll_setup(struct netpoll *np) np->name, np->dev_name); rtnl_lock(); - if (dev_change_flags(ndev, ndev->flags | IFF_UP) < 0) { + err = dev_open(ndev); + rtnl_unlock(); + + if (err) { printk(KERN_ERR "%s: failed to open %s\n", - np->name, np->dev_name); - rtnl_unlock(); + np->name, ndev->name); goto release; } - rtnl_unlock(); atleast = jiffies + HZ/10; atmost = jiffies + 4*HZ; @@ -735,6 +694,7 @@ int netpoll_setup(struct netpoll *np) rcu_read_unlock(); printk(KERN_ERR "%s: no IP address for %s, aborting\n", np->name, np->dev_name); + err = -EDESTADDRREQ; goto release; } @@ -767,9 +727,16 @@ int netpoll_setup(struct netpoll *np) kfree(npinfo); np->dev = NULL; dev_put(ndev); - return -1; + return err; } +static int __init netpoll_init(void) +{ + skb_queue_head_init(&skb_pool); + return 0; +} +core_initcall(netpoll_init); + void netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; @@ -777,12 +744,25 @@ void netpoll_cleanup(struct netpoll *np) if (np->dev) { npinfo = np->dev->npinfo; - if (npinfo && npinfo->rx_np == np) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - npinfo->rx_np = NULL; - npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); + if (npinfo) { + if (npinfo->rx_np == np) { + spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_np = NULL; + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; + spin_unlock_irqrestore(&npinfo->rx_lock, flags); + } + + np->dev->npinfo = NULL; + if (atomic_dec_and_test(&npinfo->refcnt)) { + skb_queue_purge(&npinfo->arp_tx); + skb_queue_purge(&npinfo->txq); + cancel_rearming_delayed_work(&npinfo->tx_work); + flush_scheduled_work(); + + kfree(npinfo); + } } + dev_put(np->dev); } @@ -809,4 +789,3 @@ EXPORT_SYMBOL(netpoll_setup); EXPORT_SYMBOL(netpoll_cleanup); EXPORT_SYMBOL(netpoll_send_udp); EXPORT_SYMBOL(netpoll_poll); -EXPORT_SYMBOL(netpoll_queue); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 733d86d..1897a3a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -207,7 +207,7 @@ static struct proc_dir_entry *pg_proc_dir = NULL; #define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4) struct flow_state { - __u32 cur_daddr; + __be32 cur_daddr; int count; }; @@ -282,10 +282,10 @@ struct pktgen_dev { /* If we're doing ranges, random or incremental, then this * defines the min/max for those ranges. */ - __u32 saddr_min; /* inclusive, source IP address */ - __u32 saddr_max; /* exclusive, source IP address */ - __u32 daddr_min; /* inclusive, dest IP address */ - __u32 daddr_max; /* exclusive, dest IP address */ + __be32 saddr_min; /* inclusive, source IP address */ + __be32 saddr_max; /* exclusive, source IP address */ + __be32 daddr_min; /* inclusive, dest IP address */ + __be32 daddr_max; /* exclusive, dest IP address */ __u16 udp_src_min; /* inclusive, source UDP port */ __u16 udp_src_max; /* exclusive, source UDP port */ @@ -317,8 +317,8 @@ struct pktgen_dev { __u32 cur_dst_mac_offset; __u32 cur_src_mac_offset; - __u32 cur_saddr; - __u32 cur_daddr; + __be32 cur_saddr; + __be32 cur_daddr; __u16 cur_udp_dst; __u16 cur_udp_src; __u32 cur_pkt_size; @@ -350,10 +350,10 @@ struct pktgen_dev { }; struct pktgen_hdr { - __u32 pgh_magic; - __u32 seq_num; - __u32 tv_sec; - __u32 tv_usec; + __be32 pgh_magic; + __be32 seq_num; + __be32 tv_sec; + __be32 tv_usec; }; struct pktgen_thread { @@ -2160,7 +2160,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) for(i = 0; i < pkt_dev->nr_labels; i++) if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM) pkt_dev->labels[i] = MPLS_STACK_BOTTOM | - (pktgen_random() & + ((__force __be32)pktgen_random() & htonl(0x000fffff)); } @@ -2220,29 +2220,25 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) { pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr; } else { - - if ((imn = ntohl(pkt_dev->daddr_min)) < (imx = - ntohl(pkt_dev-> - daddr_max))) - { + imn = ntohl(pkt_dev->daddr_min); + imx = ntohl(pkt_dev->daddr_max); + if (imn < imx) { __u32 t; + __be32 s; if (pkt_dev->flags & F_IPDST_RND) { - t = ((pktgen_random() % (imx - imn)) + - imn); - t = htonl(t); + t = pktgen_random() % (imx - imn) + imn; + s = htonl(t); - while (LOOPBACK(t) || MULTICAST(t) - || BADCLASS(t) || ZERONET(t) - || LOCAL_MCAST(t)) { - t = ((pktgen_random() % - (imx - imn)) + imn); - t = htonl(t); + while (LOOPBACK(s) || MULTICAST(s) + || BADCLASS(s) || ZERONET(s) + || LOCAL_MCAST(s)) { + t = (pktgen_random() % + (imx - imn)) + imn; + s = htonl(t); } - pkt_dev->cur_daddr = t; - } - - else { + pkt_dev->cur_daddr = s; + } else { t = ntohl(pkt_dev->cur_daddr); t++; if (t > imx) { @@ -2270,7 +2266,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) for (i = 0; i < 4; i++) { pkt_dev->cur_in6_daddr.s6_addr32[i] = - ((pktgen_random() | + (((__force __be32)pktgen_random() | pkt_dev->min_in6_daddr.s6_addr32[i]) & pkt_dev->max_in6_daddr.s6_addr32[i]); } @@ -2377,7 +2373,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); memcpy(eth, pkt_dev->hh, 12); - *(u16 *) & eth[12] = protocol; + *(__be16 *) & eth[12] = protocol; /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - @@ -2497,7 +2493,7 @@ static unsigned int scan_ip6(const char *s, char ip[16]) char suffix[16]; unsigned int prefixlen = 0; unsigned int suffixlen = 0; - __u32 tmp; + __be32 tmp; for (i = 0; i < 16; i++) ip[i] = 0; @@ -2713,7 +2709,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); memcpy(eth, pkt_dev->hh, 12); - *(u16 *) & eth[12] = protocol; + *(__be16 *) & eth[12] = protocol; /* Eth + IPh + UDPh + mpls */ datalen = pkt_dev->cur_pkt_size - 14 - @@ -2732,11 +2728,11 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, udph->len = htons(datalen + sizeof(struct udphdr)); udph->check = 0; /* No checksum */ - *(u32 *) iph = __constant_htonl(0x60000000); /* Version + flow */ + *(__be32 *) iph = __constant_htonl(0x60000000); /* Version + flow */ if (pkt_dev->traffic_class) { /* Version + traffic class + flow (0) */ - *(u32 *)iph |= htonl(0x60000000 | (pkt_dev->traffic_class << 20)); + *(__be32 *)iph |= htonl(0x60000000 | (pkt_dev->traffic_class << 20)); } iph->hop_limit = 32; diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 79ebd75..5f0818d 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -15,6 +15,7 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/string.h> +#include <linux/vmalloc.h> #include <net/request_sock.h> @@ -29,22 +30,31 @@ * it is absolutely not enough even at 100conn/sec. 256 cures most * of problems. This value is adjusted to 128 for very small machines * (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb). - * Further increasing requires to change hash table size. + * Note : Dont forget somaxconn that may limit backlog too. */ int sysctl_max_syn_backlog = 256; int reqsk_queue_alloc(struct request_sock_queue *queue, - const int nr_table_entries) + unsigned int nr_table_entries) { - const int lopt_size = sizeof(struct listen_sock) + - nr_table_entries * sizeof(struct request_sock *); - struct listen_sock *lopt = kzalloc(lopt_size, GFP_KERNEL); - + size_t lopt_size = sizeof(struct listen_sock); + struct listen_sock *lopt; + + nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); + nr_table_entries = max_t(u32, nr_table_entries, 8); + nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); + lopt_size += nr_table_entries * sizeof(struct request_sock *); + if (lopt_size > PAGE_SIZE) + lopt = __vmalloc(lopt_size, + GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL); + else + lopt = kzalloc(lopt_size, GFP_KERNEL); if (lopt == NULL) return -ENOMEM; - for (lopt->max_qlen_log = 6; - (1 << lopt->max_qlen_log) < sysctl_max_syn_backlog; + for (lopt->max_qlen_log = 3; + (1 << lopt->max_qlen_log) < nr_table_entries; lopt->max_qlen_log++); get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); @@ -65,9 +75,11 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) { /* make all the listen_opt local to us */ struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); + size_t lopt_size = sizeof(struct listen_sock) + + lopt->nr_table_entries * sizeof(struct request_sock *); if (lopt->qlen != 0) { - int i; + unsigned int i; for (i = 0; i < lopt->nr_table_entries; i++) { struct request_sock *req; @@ -81,7 +93,10 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) } BUG_TRAP(lopt->qlen == 0); - kfree(lopt); + if (lopt_size > PAGE_SIZE) + vfree(lopt); + else + kfree(lopt); } EXPORT_SYMBOL(reqsk_queue_destroy); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 02f3c79..e76539a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -108,7 +108,6 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)), - [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), }; @@ -213,6 +212,26 @@ nla_put_failure: return nla_nest_cancel(skb, mx); } +int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, + u32 ts, u32 tsage, long expires, u32 error) +{ + struct rta_cacheinfo ci = { + .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), + .rta_used = dst->__use, + .rta_clntref = atomic_read(&(dst->__refcnt)), + .rta_error = error, + .rta_id = id, + .rta_ts = ts, + .rta_tsage = tsage, + }; + + if (expires) + ci.rta_expires = jiffies_to_clock_t(expires); + + return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); +} + +EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); static void set_operstate(struct net_device *dev, unsigned char transition) { @@ -273,6 +292,25 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a, a->tx_compressed = b->tx_compressed; }; +static inline size_t if_nlmsg_size(int iwbuflen) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ + + nla_total_size(sizeof(struct rtnl_link_ifmap)) + + nla_total_size(sizeof(struct rtnl_link_stats)) + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ + + nla_total_size(4) /* IFLA_TXQLEN */ + + nla_total_size(4) /* IFLA_WEIGHT */ + + nla_total_size(4) /* IFLA_MTU */ + + nla_total_size(4) /* IFLA_LINK */ + + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(1) /* IFLA_OPERSTATE */ + + nla_total_size(1) /* IFLA_LINKMODE */ + + nla_total_size(iwbuflen); +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, void *iwbuf, int iwbuflen, int type, u32 pid, u32 seq, u32 change, unsigned int flags) @@ -558,7 +596,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) struct sk_buff *nskb; char *iw_buf = NULL, *iw = NULL; int iw_buf_len = 0; - int err, payload; + int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); if (err < 0) @@ -587,9 +625,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ - payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) + - nla_total_size(iw_buf_len)); - nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); + nskb = nlmsg_new(if_nlmsg_size(iw_buf_len), GFP_KERNEL); if (nskb == NULL) { err = -ENOBUFS; goto errout; @@ -597,10 +633,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); - if (err <= 0) { - kfree_skb(nskb); - goto errout; - } + /* failure impilies BUG in if_nlmsg_size or wireless_rtnetlink_get */ + BUG_ON(err < 0); err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); errout: @@ -639,15 +673,13 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(if_nlmsg_size(0), GFP_KERNEL); if (skb == NULL) goto errout; err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + /* failure implies BUG in if_nlmsg_size() */ + BUG_ON(err < 0); err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); errout: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b8b1063..a90bc43 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -473,8 +473,8 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) #endif C(protocol); n->destructor = NULL; + C(mark); #ifdef CONFIG_NETFILTER - C(nfmark); C(nfct); nf_conntrack_get(skb->nfct); C(nfctinfo); @@ -534,8 +534,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->pkt_type = old->pkt_type; new->tstamp = old->tstamp; new->destructor = NULL; + new->mark = old->mark; #ifdef CONFIG_NETFILTER - new->nfmark = old->nfmark; new->nfct = old->nfct; nf_conntrack_get(old->nfct); new->nfctinfo = old->nfctinfo; @@ -1240,8 +1240,8 @@ EXPORT_SYMBOL(skb_store_bits); /* Checksum skb data. */ -unsigned int skb_checksum(const struct sk_buff *skb, int offset, - int len, unsigned int csum) +__wsum skb_checksum(const struct sk_buff *skb, int offset, + int len, __wsum csum) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -1265,7 +1265,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset, end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -1294,7 +1294,7 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset, end = start + list->len; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; if (copy > len) copy = len; csum2 = skb_checksum(list, offset - start, @@ -1315,8 +1315,8 @@ unsigned int skb_checksum(const struct sk_buff *skb, int offset, /* Both of above in one bottle. */ -unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, - u8 *to, int len, unsigned int csum) +__wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, + u8 *to, int len, __wsum csum) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -1342,7 +1342,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, end = start + skb_shinfo(skb)->frags[i].size; if ((copy = end - offset) > 0) { - unsigned int csum2; + __wsum csum2; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -1368,7 +1368,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, struct sk_buff *list = skb_shinfo(skb)->frag_list; for (; list; list = list->next) { - unsigned int csum2; + __wsum csum2; int end; BUG_TRAP(start <= offset + len); @@ -1396,7 +1396,7 @@ unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { - unsigned int csum; + __wsum csum; long csstart; if (skb->ip_summed == CHECKSUM_PARTIAL) @@ -1414,9 +1414,9 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) skb->len - csstart, 0); if (skb->ip_summed == CHECKSUM_PARTIAL) { - long csstuff = csstart + skb->csum; + long csstuff = csstart + skb->csum_offset; - *((unsigned short *)(to + csstuff)) = csum_fold(csum); + *((__sum16 *)(to + csstuff)) = csum_fold(csum); } } diff --git a/net/core/sock.c b/net/core/sock.c index ee6cd25..ab8fafa 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -270,7 +270,7 @@ out: } EXPORT_SYMBOL(sock_queue_rcv_skb); -int sk_receive_skb(struct sock *sk, struct sk_buff *skb) +int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) { int rc = NET_RX_SUCCESS; @@ -279,7 +279,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) skb->dev = NULL; - bh_lock_sock(sk); + if (nested) + bh_lock_sock_nested(sk); + else + bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { /* * trylock + unlock semantics: @@ -1527,7 +1530,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) atomic_set(&sk->sk_refcnt, 1); } -void fastcall lock_sock(struct sock *sk) +void fastcall lock_sock_nested(struct sock *sk, int subclass) { might_sleep(); spin_lock_bh(&sk->sk_lock.slock); @@ -1538,11 +1541,11 @@ void fastcall lock_sock(struct sock *sk) /* * The sk_lock has mutex_lock() semantics here: */ - mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); local_bh_enable(); } -EXPORT_SYMBOL(lock_sock); +EXPORT_SYMBOL(lock_sock_nested); void fastcall release_sock(struct sock *sk) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0253413..1e75b15 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -21,10 +21,6 @@ extern __u32 sysctl_rmem_max; extern int sysctl_core_destroy_delay; -#ifdef CONFIG_NET_DIVERT -extern char sysctl_divert_version[]; -#endif /* CONFIG_NET_DIVERT */ - #ifdef CONFIG_XFRM extern u32 sysctl_xfrm_aevent_etime; extern u32 sysctl_xfrm_aevent_rseqth; @@ -105,16 +101,6 @@ ctl_table core_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, -#ifdef CONFIG_NET_DIVERT - { - .ctl_name = NET_CORE_DIVERT_VERSION, - .procname = "divert_version", - .data = (void *)sysctl_divert_version, - .maxlen = 32, - .mode = 0444, - .proc_handler = &proc_dostring - }, -#endif /* CONFIG_NET_DIVERT */ #ifdef CONFIG_XFRM { .ctl_name = NET_CORE_AEVENT_ETIME, diff --git a/net/core/utils.c b/net/core/utils.c index d93fe64..6155606 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -88,7 +88,7 @@ EXPORT_SYMBOL(in_aton); #define IN6PTON_NULL 0x20000000 /* first/tail */ #define IN6PTON_UNKNOWN 0x40000000 -static inline int digit2bin(char c, char delim) +static inline int digit2bin(char c, int delim) { if (c == delim || c == '\0') return IN6PTON_DELIM; @@ -99,7 +99,7 @@ static inline int digit2bin(char c, char delim) return IN6PTON_UNKNOWN; } -static inline int xdigit2bin(char c, char delim) +static inline int xdigit2bin(char c, int delim) { if (c == delim || c == '\0') return IN6PTON_DELIM; @@ -113,12 +113,14 @@ static inline int xdigit2bin(char c, char delim) return (IN6PTON_XDIGIT | (c - 'a' + 10)); if (c >= 'A' && c <= 'F') return (IN6PTON_XDIGIT | (c - 'A' + 10)); + if (delim == -1) + return IN6PTON_DELIM; return IN6PTON_UNKNOWN; } int in4_pton(const char *src, int srclen, u8 *dst, - char delim, const char **end) + int delim, const char **end) { const char *s; u8 *d; @@ -173,7 +175,7 @@ EXPORT_SYMBOL(in4_pton); int in6_pton(const char *src, int srclen, u8 *dst, - char delim, const char **end) + int delim, const char **end) { const char *s, *tok = NULL; u8 *d, *dc = NULL; |