diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2006-08-02 22:35:26 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-08-02 22:35:26 -0700 |
commit | 46f5960fdbf359f0c75989854bbaebc1de7a1eb4 (patch) | |
tree | 132d8d0eba110342bb88fcce2519c441ac771162 | |
parent | 90eb29efd0ca9301d80d03ea13662d32436f060e (diff) | |
parent | 29bbd72d6ee1dbf2d9f00d022f8e999aa528fb3a (diff) | |
download | op-kernel-dev-46f5960fdbf359f0c75989854bbaebc1de7a1eb4.zip op-kernel-dev-46f5960fdbf359f0c75989854bbaebc1de7a1eb4.tar.gz |
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (32 commits)
[NET]: Fix more per-cpu typos
[SECURITY]: Fix build with CONFIG_SECURITY disabled.
[I/OAT]: Remove CPU hotplug lock from net_dma_rebalance
[DECNET]: Fix for routing bug
[AF_UNIX]: Kernel memory leak fix for af_unix datagram getpeersec patch
[NET]: skb_queue_lock_key() is no longer used.
[NET]: Remove lockdep_set_class() call from skb_queue_head_init().
[IPV6]: SNMPv2 "ipv6IfStatsOutFragCreates" counter error
[IPV6]: SNMPv2 "ipv6IfStatsInHdrErrors" counter error
[NET]: Kill the WARN_ON() calls for checksum fixups.
[NETFILTER]: xt_hashlimit/xt_string: missing string validation
[NETFILTER]: SIP helper: expect RTP streams in both directions
[E1000]: Convert to netdev_alloc_skb
[TG3]: Convert to netdev_alloc_skb
[NET]: Add netdev_alloc_skb().
[TCP]: Process linger2 timeout consistently.
[SECURITY] secmark: nul-terminate secdata
[NET] infiniband: Cleanup ib_addr module to use the netevents
[NET]: Core net changes to generate netevents
[NET]: Network Event Notifier Mechanism.
...
43 files changed, 632 insertions, 187 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index d294bbc..1205e80 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -35,6 +35,7 @@ #include <net/arp.h> #include <net/neighbour.h> #include <net/route.h> +#include <net/netevent.h> #include <rdma/ib_addr.h> MODULE_AUTHOR("Sean Hefty"); @@ -326,25 +327,22 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) } EXPORT_SYMBOL(rdma_addr_cancel); -static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pkt, struct net_device *orig_dev) +static int netevent_callback(struct notifier_block *self, unsigned long event, + void *ctx) { - struct arphdr *arp_hdr; + if (event == NETEVENT_NEIGH_UPDATE) { + struct neighbour *neigh = ctx; - arp_hdr = (struct arphdr *) skb->nh.raw; - - if (arp_hdr->ar_op == htons(ARPOP_REQUEST) || - arp_hdr->ar_op == htons(ARPOP_REPLY)) - set_timeout(jiffies); - - kfree_skb(skb); + if (neigh->dev->type == ARPHRD_INFINIBAND && + (neigh->nud_state & NUD_VALID)) { + set_timeout(jiffies); + } + } return 0; } -static struct packet_type addr_arp = { - .type = __constant_htons(ETH_P_ARP), - .func = addr_arp_recv, - .af_packet_priv = (void*) 1, +static struct notifier_block nb = { + .notifier_call = netevent_callback }; static int addr_init(void) @@ -353,13 +351,13 @@ static int addr_init(void) if (!addr_wq) return -ENOMEM; - dev_add_pack(&addr_arp); + register_netevent_notifier(&nb); return 0; } static void addr_cleanup(void) { - dev_remove_pack(&addr_arp); + unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); } diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig index b14e890..0a0e0cd 100644 --- a/drivers/net/appletalk/Kconfig +++ b/drivers/net/appletalk/Kconfig @@ -29,7 +29,7 @@ config ATALK even politically correct people are allowed to say Y here. config DEV_APPLETALK - bool "Appletalk interfaces support" + tristate "Appletalk interfaces support" depends on ATALK help AppleTalk is the protocol that Apple computers can use to communicate diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index da62db8..627f224 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -3127,7 +3127,7 @@ e1000_change_mtu(struct net_device *netdev, int new_mtu) break; } - /* NOTE: dev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN + /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN * means we reserve 2 more, this pushes us to allocate from the next * larger slab size * i.e. RXBUFFER_2048 --> size-4096 slab */ @@ -3708,7 +3708,7 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, #define E1000_CB_LENGTH 256 if (length < E1000_CB_LENGTH) { struct sk_buff *new_skb = - dev_alloc_skb(length + NET_IP_ALIGN); + netdev_alloc_skb(netdev, length + NET_IP_ALIGN); if (new_skb) { skb_reserve(new_skb, NET_IP_ALIGN); new_skb->dev = netdev; @@ -3979,7 +3979,7 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter, while (cleaned_count--) { if (!(skb = buffer_info->skb)) - skb = dev_alloc_skb(bufsz); + skb = netdev_alloc_skb(netdev, bufsz); else { skb_trim(skb, 0); goto map_skb; @@ -3997,7 +3997,7 @@ e1000_alloc_rx_buffers(struct e1000_adapter *adapter, DPRINTK(RX_ERR, ERR, "skb align check failed: %u bytes " "at %p\n", bufsz, skb->data); /* Try again, without freeing the previous */ - skb = dev_alloc_skb(bufsz); + skb = netdev_alloc_skb(netdev, bufsz); /* Failed allocation, critical failure */ if (!skb) { dev_kfree_skb(oldskb); @@ -4121,7 +4121,8 @@ e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter, rx_desc->read.buffer_addr[j+1] = ~0; } - skb = dev_alloc_skb(adapter->rx_ps_bsize0 + NET_IP_ALIGN); + skb = netdev_alloc_skb(netdev, + adapter->rx_ps_bsize0 + NET_IP_ALIGN); if (unlikely(!skb)) { adapter->alloc_rx_buff_failed++; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 1b8138f..6f97962 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -68,8 +68,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.63" -#define DRV_MODULE_RELDATE "July 25, 2006" +#define DRV_MODULE_VERSION "3.64" +#define DRV_MODULE_RELDATE "July 31, 2006" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -3097,7 +3097,7 @@ static int tg3_alloc_rx_skb(struct tg3 *tp, u32 opaque_key, * Callers depend upon this behavior and assume that * we leave everything unchanged if we fail. */ - skb = dev_alloc_skb(skb_size); + skb = netdev_alloc_skb(tp->dev, skb_size); if (skb == NULL) return -ENOMEM; @@ -3270,7 +3270,7 @@ static int tg3_rx(struct tg3 *tp, int budget) tg3_recycle_rx(tp, opaque_key, desc_idx, *post_ptr); - copy_skb = dev_alloc_skb(len + 2); + copy_skb = netdev_alloc_skb(tp->dev, len + 2); if (copy_skb == NULL) goto drop_it_no_recycle; @@ -8618,7 +8618,7 @@ static int tg3_run_loopback(struct tg3 *tp, int loopback_mode) err = -EIO; tx_len = 1514; - skb = dev_alloc_skb(tx_len); + skb = netdev_alloc_skb(tp->dev, tx_len); if (!skb) return -ENOMEM; diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 31f02ba03..10c13dc 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -6,7 +6,6 @@ #include <linux/netfilter.h> #if defined(__KERNEL__) && defined(CONFIG_BRIDGE_NETFILTER) -#include <asm/atomic.h> #include <linux/if_ether.h> #endif diff --git a/include/linux/security.h b/include/linux/security.h index f753038..6bc2aad 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1109,6 +1109,16 @@ struct swap_info_struct; * @name contains the name of the security module being unstacked. * @ops contains a pointer to the struct security_operations of the module to unstack. * + * @secid_to_secctx: + * Convert secid to security context. + * @secid contains the security ID. + * @secdata contains the pointer that stores the converted security context. + * + * @release_secctx: + * Release the security context. + * @secdata contains the security context. + * @seclen contains the length of the security context. + * * This is the main security structure. */ struct security_operations { @@ -1289,6 +1299,8 @@ struct security_operations { int (*getprocattr)(struct task_struct *p, char *name, void *value, size_t size); int (*setprocattr)(struct task_struct *p, char *name, void *value, size_t size); + int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen); + void (*release_secctx)(char *secdata, u32 seclen); #ifdef CONFIG_SECURITY_NETWORK int (*unix_stream_connect) (struct socket * sock, @@ -1317,7 +1329,7 @@ struct security_operations { int (*socket_shutdown) (struct socket * sock, int how); int (*socket_sock_rcv_skb) (struct sock * sk, struct sk_buff * skb); int (*socket_getpeersec_stream) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len); - int (*socket_getpeersec_dgram) (struct sk_buff *skb, char **secdata, u32 *seclen); + int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid); int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir); @@ -2059,6 +2071,16 @@ static inline int security_netlink_recv(struct sk_buff * skb, int cap) return security_ops->netlink_recv(skb, cap); } +static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +{ + return security_ops->secid_to_secctx(secid, secdata, seclen); +} + +static inline void security_release_secctx(char *secdata, u32 seclen) +{ + return security_ops->release_secctx(secdata, seclen); +} + /* prototypes */ extern int security_init (void); extern int register_security (struct security_operations *ops); @@ -2725,6 +2747,14 @@ static inline void securityfs_remove(struct dentry *dentry) { } +static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +{ + return -EOPNOTSUPP; +} + +static inline void security_release_secctx(char *secdata, u32 seclen) +{ +} #endif /* CONFIG_SECURITY */ #ifdef CONFIG_SECURITY_NETWORK @@ -2840,10 +2870,9 @@ static inline int security_socket_getpeersec_stream(struct socket *sock, char __ return security_ops->socket_getpeersec_stream(sock, optval, optlen, len); } -static inline int security_socket_getpeersec_dgram(struct sk_buff *skb, char **secdata, - u32 *seclen) +static inline int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) { - return security_ops->socket_getpeersec_dgram(skb, secdata, seclen); + return security_ops->socket_getpeersec_dgram(sock, skb, secid); } static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority) @@ -2968,8 +2997,7 @@ static inline int security_socket_getpeersec_stream(struct socket *sock, char __ return -ENOPROTOOPT; } -static inline int security_socket_getpeersec_dgram(struct sk_buff *skb, char **secdata, - u32 *seclen) +static inline int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) { return -ENOPROTOOPT; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4307e76..19c96d4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -604,12 +604,17 @@ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) return list_->qlen; } -extern struct lock_class_key skb_queue_lock_key; - +/* + * This function creates a split out lock class for each invocation; + * this is needed for now since a whole lot of users of the skb-queue + * infrastructure in drivers have different locking usage (in hardirq) + * than the networking core (in softirq only). In the long run either the + * network layer or drivers should need annotation to consolidate the + * main types of usage into 3 classes. + */ static inline void skb_queue_head_init(struct sk_buff_head *list) { spin_lock_init(&list->lock); - lockdep_set_class(&list->lock, &skb_queue_lock_key); list->prev = list->next = (struct sk_buff *)list; list->qlen = 0; } @@ -1104,6 +1109,28 @@ static inline struct sk_buff *dev_alloc_skb(unsigned int length) return __dev_alloc_skb(length, GFP_ATOMIC); } +extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev, + unsigned int length, gfp_t gfp_mask); + +/** + * netdev_alloc_skb - allocate an skbuff for rx on a specific device + * @dev: network device to receive on + * @length: length to allocate + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned if there is no free memory. Although this function + * allocates memory it can be called from an interrupt. + */ +static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, + unsigned int length) +{ + return __netdev_alloc_skb(dev, length, GFP_ATOMIC); +} + /** * skb_cow - copy header of skb when it is required * @skb: buffer to cow diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 2fec827..c0398f5 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -54,15 +54,13 @@ struct unix_skb_parms { struct ucred creds; /* Skb credentials */ struct scm_fp_list *fp; /* Passed files */ #ifdef CONFIG_SECURITY_NETWORK - char *secdata; /* Security context */ - u32 seclen; /* Security length */ + u32 secid; /* Security ID */ #endif }; #define UNIXCB(skb) (*(struct unix_skb_parms*)&((skb)->cb)) #define UNIXCREDS(skb) (&UNIXCB((skb)).creds) -#define UNIXSECDATA(skb) (&UNIXCB((skb)).secdata) -#define UNIXSECLEN(skb) (&UNIXCB((skb)).seclen) +#define UNIXSID(skb) (&UNIXCB((skb)).secid) #define unix_state_rlock(s) spin_lock(&unix_sk(s)->lock) #define unix_state_runlock(s) spin_unlock(&unix_sk(s)->lock) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index ab29daf..96b0e66 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -139,16 +139,22 @@ extern rwlock_t rt6_lock; /* * Store a destination cache entry in a socket */ -static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr) +static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, + struct in6_addr *daddr) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt = (struct rt6_info *) dst; - write_lock(&sk->sk_dst_lock); sk_setup_caps(sk, dst); np->daddr_cache = daddr; np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; +} + +static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, + struct in6_addr *daddr) +{ + write_lock(&sk->sk_dst_lock); + __ip6_dst_store(sk, dst, daddr); write_unlock(&sk->sk_dst_lock); } diff --git a/include/net/ipv6.h b/include/net/ipv6.h index a8fdf79..ece7e8a 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -468,6 +468,9 @@ extern void ip6_flush_pending_frames(struct sock *sk); extern int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl); +extern int ip6_sk_dst_lookup(struct sock *sk, + struct dst_entry **dst, + struct flowi *fl); /* * skb processing functions diff --git a/include/net/netdma.h b/include/net/netdma.h index ceae5ee..7f53cd1 100644 --- a/include/net/netdma.h +++ b/include/net/netdma.h @@ -29,7 +29,7 @@ static inline struct dma_chan *get_softnet_dma(void) { struct dma_chan *chan; rcu_read_lock(); - chan = rcu_dereference(__get_cpu_var(softnet_data.net_dma)); + chan = rcu_dereference(__get_cpu_var(softnet_data).net_dma); if (chan) dma_chan_get(chan); rcu_read_unlock(); diff --git a/include/net/netevent.h b/include/net/netevent.h new file mode 100644 index 0000000..e5d2162 --- /dev/null +++ b/include/net/netevent.h @@ -0,0 +1,33 @@ +#ifndef _NET_EVENT_H +#define _NET_EVENT_H + +/* + * Generic netevent notifiers + * + * Authors: + * Tom Tucker <tom@opengridcomputing.com> + * Steve Wise <swise@opengridcomputing.com> + * + * Changes: + */ +#ifdef __KERNEL__ + +#include <net/dst.h> + +struct netevent_redirect { + struct dst_entry *old; + struct dst_entry *new; +}; + +enum netevent_notif_type { + NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ + NETEVENT_PMTU_UPDATE, /* arg is struct dst_entry ptr */ + NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ +}; + +extern int register_netevent_notifier(struct notifier_block *nb); +extern int unregister_netevent_notifier(struct notifier_block *nb); +extern int call_netevent_notifiers(unsigned long val, void *v); + +#endif +#endif diff --git a/include/net/scm.h b/include/net/scm.h index 02daa09..5637d5e 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -3,6 +3,7 @@ #include <linux/limits.h> #include <linux/net.h> +#include <linux/security.h> /* Well, we should have at least one descriptor open * to accept passed FDs 8) @@ -20,8 +21,7 @@ struct scm_cookie struct ucred creds; /* Skb credentials */ struct scm_fp_list *fp; /* Passed files */ #ifdef CONFIG_SECURITY_NETWORK - char *secdata; /* Security context */ - u32 seclen; /* Security length */ + u32 secid; /* Passed security ID */ #endif unsigned long seq; /* Connection seqno */ }; @@ -32,6 +32,16 @@ extern int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie extern void __scm_destroy(struct scm_cookie *scm); extern struct scm_fp_list * scm_fp_dup(struct scm_fp_list *fpl); +#ifdef CONFIG_SECURITY_NETWORK +static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_cookie *scm) +{ + security_socket_getpeersec_dgram(sock, NULL, &scm->secid); +} +#else +static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_cookie *scm) +{ } +#endif /* CONFIG_SECURITY_NETWORK */ + static __inline__ void scm_destroy(struct scm_cookie *scm) { if (scm && scm->fp) @@ -47,6 +57,7 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, scm->creds.pid = p->tgid; scm->fp = NULL; scm->seq = 0; + unix_get_peersec_dgram(sock, scm); if (msg->msg_controllen <= 0) return 0; return __scm_send(sock, msg, scm); @@ -55,8 +66,18 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, #ifdef CONFIG_SECURITY_NETWORK static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { - if (test_bit(SOCK_PASSSEC, &sock->flags) && scm->secdata != NULL) - put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, scm->seclen, scm->secdata); + char *secdata; + u32 seclen; + int err; + + if (test_bit(SOCK_PASSSEC, &sock->flags)) { + err = security_secid_to_secctx(scm->secid, &secdata, &seclen); + + if (!err) { + put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, seclen, secdata); + security_release_secctx(secdata, seclen); + } + } } #else static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) diff --git a/include/net/tcp.h b/include/net/tcp.h index 0720bdd..7a093d0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -914,6 +914,9 @@ static inline void tcp_set_state(struct sock *sk, int state) static inline void tcp_done(struct sock *sk) { + if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) + TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); + tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); diff --git a/net/core/Makefile b/net/core/Makefile index e9bd246..2645ba4 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o -obj-y += dev.o ethtool.o dev_mcast.o dst.o \ +obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o obj-$(CONFIG_XFRM) += flow.o diff --git a/net/core/dev.c b/net/core/dev.c index 4d2b516..d95e262 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1166,11 +1166,6 @@ int skb_checksum_help(struct sk_buff *skb, int inward) goto out_set_summed; if (unlikely(skb_shinfo(skb)->gso_size)) { - static int warned; - - WARN_ON(!warned); - warned = 1; - /* Let GSO fix up the checksum. */ goto out_set_summed; } @@ -1220,11 +1215,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) __skb_pull(skb, skb->mac_len); if (unlikely(skb->ip_summed != CHECKSUM_HW)) { - static int warned; - - WARN_ON(!warned); - warned = 1; - if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); @@ -3429,12 +3419,9 @@ static void net_dma_rebalance(void) unsigned int cpu, i, n; struct dma_chan *chan; - lock_cpu_hotplug(); - if (net_dma_count == 0) { for_each_online_cpu(cpu) - rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL); - unlock_cpu_hotplug(); + rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); return; } @@ -3447,15 +3434,13 @@ static void net_dma_rebalance(void) + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); while(n) { - per_cpu(softnet_data.net_dma, cpu) = chan; + per_cpu(softnet_data, cpu).net_dma = chan; cpu = next_cpu(cpu, cpu_online_map); n--; } i++; } rcu_read_unlock(); - - unlock_cpu_hotplug(); } /** diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 7ad681f..5130d2e 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -29,6 +29,7 @@ #include <net/neighbour.h> #include <net/dst.h> #include <net/sock.h> +#include <net/netevent.h> #include <linux/rtnetlink.h> #include <linux/random.h> #include <linux/string.h> @@ -754,6 +755,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_STALE; neigh->updated = jiffies; neigh_suspect(neigh); + notify = 1; } } else if (state & NUD_DELAY) { if (time_before_eq(now, @@ -762,6 +764,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_REACHABLE; neigh->updated = jiffies; neigh_connect(neigh); + notify = 1; next = neigh->confirmed + neigh->parms->reachable_time; } else { NEIGH_PRINTK2("neigh %p is probed.\n", neigh); @@ -819,6 +822,8 @@ static void neigh_timer_handler(unsigned long arg) out: write_unlock(&neigh->lock); } + if (notify) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) @@ -926,9 +931,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, { u8 old; int err; -#ifdef CONFIG_ARPD int notify = 0; -#endif struct net_device *dev; int update_isrouter = 0; @@ -948,9 +951,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh_suspect(neigh); neigh->nud_state = new; err = 0; -#ifdef CONFIG_ARPD notify = old & NUD_VALID; -#endif goto out; } @@ -1022,9 +1023,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, if (!(new & NUD_CONNECTED)) neigh->confirmed = jiffies - (neigh->parms->base_reachable_time << 1); -#ifdef CONFIG_ARPD notify = 1; -#endif } if (new == old) goto out; @@ -1056,6 +1055,9 @@ out: (neigh->flags & ~NTF_ROUTER); } write_unlock_bh(&neigh->lock); + + if (notify) + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); #ifdef CONFIG_ARPD if (notify && neigh->parms->app_probes) neigh_app_notify(neigh); diff --git a/net/core/netevent.c b/net/core/netevent.c new file mode 100644 index 0000000..35d02c3 --- /dev/null +++ b/net/core/netevent.c @@ -0,0 +1,69 @@ +/* + * Network event notifiers + * + * Authors: + * Tom Tucker <tom@opengridcomputing.com> + * Steve Wise <swise@opengridcomputing.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Fixes: + */ + +#include <linux/rtnetlink.h> +#include <linux/notifier.h> + +static ATOMIC_NOTIFIER_HEAD(netevent_notif_chain); + +/** + * register_netevent_notifier - register a netevent notifier block + * @nb: notifier + * + * Register a notifier to be called when a netevent occurs. + * The notifier passed is linked into the kernel structures and must + * not be reused until it has been unregistered. A negative errno code + * is returned on a failure. + */ +int register_netevent_notifier(struct notifier_block *nb) +{ + int err; + + err = atomic_notifier_chain_register(&netevent_notif_chain, nb); + return err; +} + +/** + * netevent_unregister_notifier - unregister a netevent notifier block + * @nb: notifier + * + * Unregister a notifier previously registered by + * register_neigh_notifier(). The notifier is unlinked into the + * kernel structures and may then be reused. A negative errno code + * is returned on a failure. + */ + +int unregister_netevent_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); +} + +/** + * call_netevent_notifiers - call all netevent notifier blocks + * @val: value passed unmodified to notifier function + * @v: pointer passed unmodified to notifier function + * + * Call all neighbour notifier blocks. Parameters and return value + * are as for notifier_call_chain(). + */ + +int call_netevent_notifiers(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&netevent_notif_chain, val, v); +} + +EXPORT_SYMBOL_GPL(register_netevent_notifier); +EXPORT_SYMBOL_GPL(unregister_netevent_notifier); +EXPORT_SYMBOL_GPL(call_netevent_notifiers); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 476aa39..022d889 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -71,13 +71,6 @@ static kmem_cache_t *skbuff_head_cache __read_mostly; static kmem_cache_t *skbuff_fclone_cache __read_mostly; /* - * lockdep: lock class key used by skb_queue_head_init(): - */ -struct lock_class_key skb_queue_lock_key; - -EXPORT_SYMBOL(skb_queue_lock_key); - -/* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always * reliable. @@ -256,6 +249,29 @@ nodata: goto out; } +/** + * __netdev_alloc_skb - allocate an skbuff for rx on a specific device + * @dev: network device to receive on + * @length: length to allocate + * @gfp_mask: get_free_pages mask, passed to alloc_skb + * + * Allocate a new &sk_buff and assign it a usage count of one. The + * buffer has unspecified headroom built in. Users should allocate + * the headroom they think they need without accounting for the + * built in space. The built in space is used for optimisations. + * + * %NULL is returned if there is no free memory. + */ +struct sk_buff *__netdev_alloc_skb(struct net_device *dev, + unsigned int length, gfp_t gfp_mask) +{ + struct sk_buff *skb; + + skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); + if (likely(skb)) + skb_reserve(skb, NET_SKB_PAD); + return skb; +} static void skb_drop_list(struct sk_buff **listp) { @@ -846,7 +862,11 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))) return err; - for (i = 0; i < nfrags; i++) { + i = 0; + if (offset >= len) + goto drop_pages; + + for (; i < nfrags; i++) { int end = offset + skb_shinfo(skb)->frags[i].size; if (end < len) { @@ -854,9 +874,9 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) continue; } - if (len > offset) - skb_shinfo(skb)->frags[i++].size = len - offset; + skb_shinfo(skb)->frags[i++].size = len - offset; +drop_pages: skb_shinfo(skb)->nr_frags = i; for (; i < nfrags; i++) @@ -864,7 +884,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) if (skb_shinfo(skb)->frag_list) skb_drop_fraglist(skb); - break; + goto done; } for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp); @@ -879,6 +899,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) return -ENOMEM; nfrag->next = frag->next; + kfree_skb(frag); frag = nfrag; *fragp = frag; } @@ -897,6 +918,7 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) break; } +done: if (len > skb_headlen(skb)) { skb->data_len -= skb->len - len; skb->len = len; @@ -2042,6 +2064,7 @@ EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(kfree_skb); EXPORT_SYMBOL(__pskb_pull_tail); EXPORT_SYMBOL(__alloc_skb); +EXPORT_SYMBOL(__netdev_alloc_skb); EXPORT_SYMBOL(pskb_copy); EXPORT_SYMBOL(pskb_expand_head); EXPORT_SYMBOL(skb_checksum); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9f3d4d7..610c722 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -230,7 +230,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt != NULL) @@ -863,7 +863,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * comment in that function for the gory details. -acme */ - ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 1355614..743e9fc 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -925,8 +925,13 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old for(dev_out = dev_base; dev_out; dev_out = dev_out->next) { if (!dev_out->dn_ptr) continue; - if (dn_dev_islocal(dev_out, oldflp->fld_src)) - break; + if (!dn_dev_islocal(dev_out, oldflp->fld_src)) + continue; + if ((dev_out->flags & IFF_LOOPBACK) && + oldflp->fld_dst && + !dn_dev_islocal(dev_out, oldflp->fld_dst)) + continue; + break; } read_unlock(&dev_base_lock); if (dev_out == NULL) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 7c9f9a6..9bf307a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -526,6 +526,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) err = output(skb); + if (!err) + IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; @@ -649,9 +651,6 @@ slow_path: /* * Put this fragment into the sending queue. */ - - IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); - iph->tot_len = htons(len + hlen); ip_send_check(iph); @@ -659,6 +658,8 @@ slow_path: err = output(skb2); if (err) goto fail; + + IP_INC_STATS(IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); IP_INC_STATS(IPSTATS_MIB_FRAGOKS); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 84f43a3..2d05c41 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -112,14 +112,19 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) { char *secdata; - u32 seclen; + u32 seclen, secid; int err; - err = security_socket_getpeersec_dgram(skb, &secdata, &seclen); + err = security_socket_getpeersec_dgram(NULL, skb, &secid); + if (err) + return; + + err = security_secid_to_secctx(secid, &secdata, &seclen); if (err) return; put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); + security_release_secctx(secdata, seclen); } diff --git a/net/ipv4/netfilter/ip_conntrack_sip.c b/net/ipv4/netfilter/ip_conntrack_sip.c index fc87ce0..4f222d6 100644 --- a/net/ipv4/netfilter/ip_conntrack_sip.c +++ b/net/ipv4/netfilter/ip_conntrack_sip.c @@ -442,7 +442,7 @@ static int __init init(void) sip[i].tuple.src.u.udp.port = htons(ports[i]); sip[i].mask.src.u.udp.port = 0xFFFF; sip[i].mask.dst.protonum = 0xFF; - sip[i].max_expected = 1; + sip[i].max_expected = 2; sip[i].timeout = 3 * 60; /* 3 minutes */ sip[i].me = THIS_MODULE; sip[i].help = sip_help; diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 92980ab..6b66244 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -508,6 +508,9 @@ hashlimit_checkentry(const char *tablename, if (!r->cfg.expire) return 0; + if (r->name[sizeof(r->name) - 1] != '\0') + return 0; + /* This is the best we've got: We cannot release and re-grab lock, * since checkentry() is called before ip_tables.c grabs ipt_mutex. * We also cannot grab the hashtable spinlock, since htable_create will diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2dc6dbb..19bd49d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -104,6 +104,7 @@ #include <net/icmp.h> #include <net/xfrm.h> #include <net/ip_mp_alg.h> +#include <net/netevent.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif @@ -1125,6 +1126,7 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, struct rtable *rth, **rthp; u32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; + struct netevent_redirect netevent; if (!in_dev) return; @@ -1216,6 +1218,11 @@ void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, rt_drop(rt); goto do_next; } + + netevent.old = &rth->u.dst; + netevent.new = &rt->u.dst; + call_netevent_notifiers(NETEVENT_REDIRECT, + &netevent); rt_del(hash, rth); if (!rt_intern_hash(hash, rt, &rt)) @@ -1452,6 +1459,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } dst->metrics[RTAX_MTU-1] = mtu; dst_set_expires(dst, ip_rt_mtu_expires); + call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f6a2d92..934396b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1132,7 +1132,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, tp->ucopy.dma_chan = NULL; preempt_disable(); if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && - !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) { + !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { preempt_enable_no_resched(); tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); } else @@ -1659,7 +1659,8 @@ adjudge_to_death: const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); + inet_csk_reset_keepalive_timer(sk, + tmo - TCP_TIMEWAIT_LEN); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f6f39e8..4b04c3e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -438,7 +438,6 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) It can f.e. if SYNs crossed. */ if (!sock_owned_by_user(sk)) { - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); @@ -874,7 +873,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) drop_and_free: reqsk_free(req); drop: - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); return 0; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0ccb7cb..624e2b2 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -589,8 +589,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, /* RFC793: "second check the RST bit" and * "fourth, check the SYN bit" */ - if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) + if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) { + TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); goto embryonic_reset; + } /* ACK sequence verified above, just make sure ACK is * set. If ACK not set, just silently drop the packet. diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index d7d517a..b343532 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -114,7 +114,7 @@ static int tcpprobe_open(struct inode * inode, struct file * file) static ssize_t tcpprobe_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { - int error = 0, cnt; + int error = 0, cnt = 0; unsigned char *tbuf; if (!buf || len < 0) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2316a43..8ea1e36 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1869,15 +1869,21 @@ err_exit: /* * Manual configuration of address on an interface */ -static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) +static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, + __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + __u8 ifa_flags = 0; int scope; ASSERT_RTNL(); + /* check the lifetime */ + if (!valid_lft || prefered_lft > valid_lft) + return -EINVAL; + if ((dev = __dev_get_by_index(ifindex)) == NULL) return -ENODEV; @@ -1889,10 +1895,29 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen) scope = ipv6_addr_scope(pfx); - ifp = ipv6_add_addr(idev, pfx, plen, scope, IFA_F_PERMANENT); + if (valid_lft == INFINITY_LIFE_TIME) + ifa_flags |= IFA_F_PERMANENT; + else if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + + if (prefered_lft == 0) + ifa_flags |= IFA_F_DEPRECATED; + else if ((prefered_lft >= 0x7FFFFFFF/HZ) && + (prefered_lft != INFINITY_LIFE_TIME)) + prefered_lft = 0x7FFFFFFF/HZ; + + ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); + if (!IS_ERR(ifp)) { + spin_lock(&ifp->lock); + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + ifp->tstamp = jiffies; + spin_unlock(&ifp->lock); + addrconf_dad_start(ifp, 0); in6_ifa_put(ifp); + addrconf_verify(0); return 0; } @@ -1945,7 +1970,8 @@ int addrconf_add_ifaddr(void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); + err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); return err; } @@ -2771,12 +2797,16 @@ restart: ifp->idev->nd_parms->retrans_time / HZ; #endif - if (age >= ifp->valid_lft) { + if (ifp->valid_lft != INFINITY_LIFE_TIME && + age >= ifp->valid_lft) { spin_unlock(&ifp->lock); in6_ifa_hold(ifp); read_unlock(&addrconf_hash_lock); ipv6_del_addr(ifp); goto restart; + } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { + spin_unlock(&ifp->lock); + continue; } else if (age >= ifp->prefered_lft) { /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */ int deprecate = 0; @@ -2853,7 +2883,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) pfx = RTA_DATA(rta[IFA_ADDRESS-1]); } if (rta[IFA_LOCAL-1]) { - if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || + (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) return -EINVAL; pfx = RTA_DATA(rta[IFA_LOCAL-1]); } @@ -2864,11 +2895,61 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } static int +inet6_addr_modify(int ifindex, struct in6_addr *pfx, + __u32 prefered_lft, __u32 valid_lft) +{ + struct inet6_ifaddr *ifp = NULL; + struct net_device *dev; + int ifa_flags = 0; + + if ((dev = __dev_get_by_index(ifindex)) == NULL) + return -ENODEV; + + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; + + if (!valid_lft || (prefered_lft > valid_lft)) + return -EINVAL; + + ifp = ipv6_get_ifaddr(pfx, dev, 1); + if (ifp == NULL) + return -ENOENT; + + if (valid_lft == INFINITY_LIFE_TIME) + ifa_flags = IFA_F_PERMANENT; + else if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + + if (prefered_lft == 0) + ifa_flags = IFA_F_DEPRECATED; + else if ((prefered_lft >= 0x7FFFFFFF/HZ) && + (prefered_lft != INFINITY_LIFE_TIME)) + prefered_lft = 0x7FFFFFFF/HZ; + + spin_lock_bh(&ifp->lock); + ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED|IFA_F_PERMANENT)) | ifa_flags; + + ifp->tstamp = jiffies; + ifp->valid_lft = valid_lft; + ifp->prefered_lft = prefered_lft; + + spin_unlock_bh(&ifp->lock); + if (!(ifp->flags&IFA_F_TENTATIVE)) + ipv6_ifa_notify(0, ifp); + in6_ifa_put(ifp); + + addrconf_verify(0); + + return 0; +} + +static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct rtattr **rta = arg; struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in6_addr *pfx; + __u32 valid_lft = INFINITY_LIFE_TIME, prefered_lft = INFINITY_LIFE_TIME; pfx = NULL; if (rta[IFA_ADDRESS-1]) { @@ -2877,14 +2958,34 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) pfx = RTA_DATA(rta[IFA_ADDRESS-1]); } if (rta[IFA_LOCAL-1]) { - if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx))) + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || + (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) return -EINVAL; pfx = RTA_DATA(rta[IFA_LOCAL-1]); } if (pfx == NULL) return -EINVAL; - return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen); + if (rta[IFA_CACHEINFO-1]) { + struct ifa_cacheinfo *ci; + if (RTA_PAYLOAD(rta[IFA_CACHEINFO-1]) < sizeof(*ci)) + return -EINVAL; + ci = RTA_DATA(rta[IFA_CACHEINFO-1]); + valid_lft = ci->ifa_valid; + prefered_lft = ci->ifa_prefered; + } + + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + int ret; + ret = inet6_addr_modify(ifm->ifa_index, pfx, + prefered_lft, valid_lft); + if (ret == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE)) + return ret; + } + + return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, + prefered_lft, valid_lft); + } /* Maximum length of ifa_cacheinfo attributes */ @@ -3121,6 +3222,62 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } +static int inet6_rtm_getaddr(struct sk_buff *in_skb, + struct nlmsghdr* nlh, void *arg) +{ + struct rtattr **rta = arg; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct in6_addr *addr = NULL; + struct net_device *dev = NULL; + struct inet6_ifaddr *ifa; + struct sk_buff *skb; + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); + int err; + + if (rta[IFA_ADDRESS-1]) { + if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*addr)) + return -EINVAL; + addr = RTA_DATA(rta[IFA_ADDRESS-1]); + } + if (rta[IFA_LOCAL-1]) { + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*addr) || + (addr && memcmp(addr, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*addr)))) + return -EINVAL; + addr = RTA_DATA(rta[IFA_LOCAL-1]); + } + if (addr == NULL) + return -EINVAL; + + if (ifm->ifa_index) + dev = __dev_get_by_index(ifm->ifa_index); + + if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) + return -EADDRNOTAVAIL; + + if ((skb = alloc_skb(size, GFP_KERNEL)) == NULL) { + err = -ENOBUFS; + goto out; + } + + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, + nlh->nlmsg_seq, RTM_NEWADDR, 0); + if (err < 0) { + err = -EMSGSIZE; + goto out_free; + } + + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + in6_ifa_put(ifa); + return err; +out_free: + kfree_skb(skb); + goto out; +} + static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; @@ -3363,7 +3520,8 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, }, [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, }, [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, }, - [RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, }, + [RTM_GETADDR - RTM_BASE] = { .doit = inet6_rtm_getaddr, + .dumpit = inet6_dump_ifaddr, }, [RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, }, [RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, }, [RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, }, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 5a0ba58..ac85e9c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -658,7 +658,7 @@ int inet6_sk_rebuild_header(struct sock *sk) return err; } - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); } return 0; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 5c950cc..bf49107 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -185,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) return err; } - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); } skb->dst = dst_clone(dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3bc74ce..69451af 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -356,6 +356,7 @@ int ip6_forward(struct sk_buff *skb) skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); + IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; @@ -595,6 +596,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } err = output(skb); + if(!err) + IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); + if (err || !frag) break; @@ -706,12 +710,11 @@ slow_path: /* * Put this fragment into the sending queue. */ - - IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); - err = output(frag); if (err) goto fail; + + IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES); } kfree_skb(skb); IP6_INC_STATS(IPSTATS_MIB_FRAGOKS); @@ -723,48 +726,51 @@ fail: return err; } -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +static struct dst_entry *ip6_sk_dst_check(struct sock *sk, + struct dst_entry *dst, + struct flowi *fl) { - int err = 0; + struct ipv6_pinfo *np = inet6_sk(sk); + struct rt6_info *rt = (struct rt6_info *)dst; - *dst = NULL; - if (sk) { - struct ipv6_pinfo *np = inet6_sk(sk); - - *dst = sk_dst_check(sk, np->dst_cookie); - if (*dst) { - struct rt6_info *rt = (struct rt6_info*)*dst; - - /* Yes, checking route validity in not connected - * case is not very simple. Take into account, - * that we do not support routing by source, TOS, - * and MSG_DONTROUTE --ANK (980726) - * - * 1. If route was host route, check that - * cached destination is current. - * If it is network route, we still may - * check its validity using saved pointer - * to the last used address: daddr_cache. - * We do not want to save whole address now, - * (because main consumer of this service - * is tcp, which has not this problem), - * so that the last trick works only on connected - * sockets. - * 2. oif also should be the same. - */ - if (((rt->rt6i_dst.plen != 128 || - !ipv6_addr_equal(&fl->fl6_dst, - &rt->rt6i_dst.addr)) - && (np->daddr_cache == NULL || - !ipv6_addr_equal(&fl->fl6_dst, - np->daddr_cache))) - || (fl->oif && fl->oif != (*dst)->dev->ifindex)) { - dst_release(*dst); - *dst = NULL; - } - } + if (!dst) + goto out; + + /* Yes, checking route validity in not connected + * case is not very simple. Take into account, + * that we do not support routing by source, TOS, + * and MSG_DONTROUTE --ANK (980726) + * + * 1. If route was host route, check that + * cached destination is current. + * If it is network route, we still may + * check its validity using saved pointer + * to the last used address: daddr_cache. + * We do not want to save whole address now, + * (because main consumer of this service + * is tcp, which has not this problem), + * so that the last trick works only on connected + * sockets. + * 2. oif also should be the same. + */ + if (((rt->rt6i_dst.plen != 128 || + !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr)) + && (np->daddr_cache == NULL || + !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache))) + || (fl->oif && fl->oif != dst->dev->ifindex)) { + dst_release(dst); + dst = NULL; } +out: + return dst; +} + +static int ip6_dst_lookup_tail(struct sock *sk, + struct dst_entry **dst, struct flowi *fl) +{ + int err; + if (*dst == NULL) *dst = ip6_route_output(sk, fl); @@ -773,7 +779,6 @@ int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) if (ipv6_addr_any(&fl->fl6_src)) { err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src); - if (err) goto out_err_release; } @@ -786,8 +791,48 @@ out_err_release: return err; } +/** + * ip6_dst_lookup - perform route lookup on flow + * @sk: socket which provides route info + * @dst: pointer to dst_entry * for result + * @fl: flow to lookup + * + * This function performs a route lookup on the given flow. + * + * It returns zero on success, or a standard errno code on error. + */ +int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +{ + *dst = NULL; + return ip6_dst_lookup_tail(sk, dst, fl); +} EXPORT_SYMBOL_GPL(ip6_dst_lookup); +/** + * ip6_sk_dst_lookup - perform socket cached route lookup on flow + * @sk: socket which provides the dst cache and route info + * @dst: pointer to dst_entry * for result + * @fl: flow to lookup + * + * This function performs a route lookup on the given flow with the + * possibility of using the cached route in the socket if it is valid. + * It will take the socket dst lock when operating on the dst cache. + * As a result, this function can only be used in process context. + * + * It returns zero on success, or a standard errno code on error. + */ +int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +{ + *dst = NULL; + if (sk) { + *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); + *dst = ip6_sk_dst_check(sk, *dst, fl); + } + + return ip6_dst_lookup_tail(sk, dst, fl); +} +EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); + static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 87c39c9..4b16371 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -53,6 +53,7 @@ #include <linux/rtnetlink.h> #include <net/dst.h> #include <net/xfrm.h> +#include <net/netevent.h> #include <asm/uaccess.h> @@ -742,6 +743,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; } dst->metrics[RTAX_MTU-1] = mtu; + call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -1155,6 +1157,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, struct rt6_info *rt, *nrt = NULL; int strict; struct fib6_node *fn; + struct netevent_redirect netevent; /* * Get the "current" route for this destination and @@ -1252,6 +1255,10 @@ restart: if (ip6_ins_rt(nrt, NULL, NULL, NULL)) goto out; + netevent.old = &rt->u.dst; + netevent.new = &nrt->u.dst; + call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); + if (rt->rt6i_flags&RTF_CACHE) { ip6_del_rt(rt, NULL, NULL, NULL); return; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 923989d..b843a65 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -270,7 +270,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; - ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -427,7 +427,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, case TCP_SYN_RECV: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ @@ -831,7 +830,6 @@ drop: if (req) reqsk_free(req); - TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); return 0; /* don't send reset */ } @@ -947,7 +945,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ sk->sk_gso_type = SKB_GSO_TCPV6; - ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ccc57f4..3d54f24 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -782,7 +782,7 @@ do_udp_sendmsg: connected = 0; } - err = ip6_dst_lookup(sk, &dst, fl); + err = ip6_sk_dst_lookup(sk, &dst, fl); if (err) goto out; if (final_p) diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 0eea60e..c8c8b44 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -125,7 +125,7 @@ static int xfrm6_output_finish(struct sk_buff *skb) if (!skb_is_gso(skb)) return xfrm6_output_finish2(skb); - skb->protocol = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IPV6); segs = skb_gso_segment(skb, 0); kfree_skb(skb); if (unlikely(IS_ERR(segs))) diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index c2ce9c4..de9537a 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -57,6 +57,8 @@ static int checkentry_selinux(struct xt_secmark_target_info *info) { int err; struct xt_secmark_target_selinux_info *sel = &info->u.sel; + + sel->selctx[SECMARK_SELCTX_MAX - 1] = '\0'; err = selinux_string_to_sid(sel->selctx, &sel->selsid); if (err) { diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 0ebb6ac..d8e3891 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -55,7 +55,10 @@ static int checkentry(const char *tablename, /* Damn, can't handle this case properly with iptables... */ if (conf->from_offset > conf->to_offset) return 0; - + if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0') + return 0; + if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE) + return 0; ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, GFP_KERNEL, TS_AUTOLOAD); if (IS_ERR(ts_conf)) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6f29092..de6ec51 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -128,23 +128,17 @@ static atomic_t unix_nr_socks = ATOMIC_INIT(0); #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE) #ifdef CONFIG_SECURITY_NETWORK -static void unix_get_peersec_dgram(struct sk_buff *skb) +static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { - int err; - - err = security_socket_getpeersec_dgram(skb, UNIXSECDATA(skb), - UNIXSECLEN(skb)); - if (err) - *(UNIXSECDATA(skb)) = NULL; + memcpy(UNIXSID(skb), &scm->secid, sizeof(u32)); } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) { - scm->secdata = *UNIXSECDATA(skb); - scm->seclen = *UNIXSECLEN(skb); + scm->secid = *UNIXSID(skb); } #else -static inline void unix_get_peersec_dgram(struct sk_buff *skb) +static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -1322,8 +1316,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); if (siocb->scm->fp) unix_attach_fds(siocb->scm, skb); - - unix_get_peersec_dgram(skb); + unix_get_secdata(siocb->scm, skb); skb->h.raw = skb->data; err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len); diff --git a/security/dummy.c b/security/dummy.c index bbbfda7..58c6d39 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -791,8 +791,7 @@ static int dummy_socket_getpeersec_stream(struct socket *sock, char __user *optv return -ENOPROTOOPT; } -static int dummy_socket_getpeersec_dgram(struct sk_buff *skb, char **secdata, - u32 *seclen) +static int dummy_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) { return -ENOPROTOOPT; } @@ -876,6 +875,15 @@ static int dummy_setprocattr(struct task_struct *p, char *name, void *value, siz return -EINVAL; } +static int dummy_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +{ + return -EOPNOTSUPP; +} + +static void dummy_release_secctx(char *secdata, u32 seclen) +{ +} + #ifdef CONFIG_KEYS static inline int dummy_key_alloc(struct key *key, struct task_struct *ctx, unsigned long flags) @@ -1028,6 +1036,8 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, d_instantiate); set_to_dummy_if_null(ops, getprocattr); set_to_dummy_if_null(ops, setprocattr); + set_to_dummy_if_null(ops, secid_to_secctx); + set_to_dummy_if_null(ops, release_secctx); #ifdef CONFIG_SECURITY_NETWORK set_to_dummy_if_null(ops, unix_stream_connect); set_to_dummy_if_null(ops, unix_may_send); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a91c961..5d1b8c7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3524,25 +3524,21 @@ out: return err; } -static int selinux_socket_getpeersec_dgram(struct sk_buff *skb, char **secdata, u32 *seclen) +static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) { + u32 peer_secid = SECSID_NULL; int err = 0; - u32 peer_sid; - if (skb->sk->sk_family == PF_UNIX) - selinux_get_inode_sid(SOCK_INODE(skb->sk->sk_socket), - &peer_sid); - else - peer_sid = selinux_socket_getpeer_dgram(skb); - - if (peer_sid == SECSID_NULL) - return -EINVAL; + if (sock && (sock->sk->sk_family == PF_UNIX)) + selinux_get_inode_sid(SOCK_INODE(sock), &peer_secid); + else if (skb) + peer_secid = selinux_socket_getpeer_dgram(skb); - err = security_sid_to_context(peer_sid, secdata, seclen); - if (err) - return err; + if (peer_secid == SECSID_NULL) + err = -EINVAL; + *secid = peer_secid; - return 0; + return err; } static int selinux_sk_alloc_security(struct sock *sk, int family, gfp_t priority) @@ -4407,6 +4403,17 @@ static int selinux_setprocattr(struct task_struct *p, return size; } +static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +{ + return security_sid_to_context(secid, secdata, seclen); +} + +static void selinux_release_secctx(char *secdata, u32 seclen) +{ + if (secdata) + kfree(secdata); +} + #ifdef CONFIG_KEYS static int selinux_key_alloc(struct key *k, struct task_struct *tsk, @@ -4587,6 +4594,9 @@ static struct security_operations selinux_ops = { .getprocattr = selinux_getprocattr, .setprocattr = selinux_setprocattr, + .secid_to_secctx = selinux_secid_to_secctx, + .release_secctx = selinux_release_secctx, + .unix_stream_connect = selinux_socket_unix_stream_connect, .unix_may_send = selinux_socket_unix_may_send, |