summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-09-16 21:14:54 -0700
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-09-16 21:14:54 -0700
commitedb1e9671a990e6eb9f593636deed7ac43ba9084 (patch)
tree1b8b592411d9d7e4321479f57cb6d1f38ec483e3
parentfa890d586cc127ce72597ba0a909bfecf784e10c (diff)
parentd9f30ec0b0d129b9cbf2b041a6a3159aa24592f6 (diff)
downloadop-kernel-dev-edb1e9671a990e6eb9f593636deed7ac43ba9084.zip
op-kernel-dev-edb1e9671a990e6eb9f593636deed7ac43ba9084.tar.gz
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: [VLAN]: Fix net_device leak. [PPP] generic: Fix receive path data clobbering & non-linear handling [PPP] generic: Call skb_cow_head before scribbling over skb [NET] skbuff: Add skb_cow_head [BRIDGE]: Kill clone argument to br_flood_* [PPP] pppoe: Fill in header directly in __pppoe_xmit [PPP] pppoe: Fix data clobbering in __pppoe_xmit and return value [PPP] pppoe: Fix skb_unshare_check call position [SCTP]: Convert bind_addr_list locking to RCU [SCTP]: Add RCU synchronization around sctp_localaddr_list [PKT_SCHED]: sch_cbq.c: Shut up uninitialized variable warning [PKTGEN]: srcmac fix [IPV6]: Fix source address selection. [IPV4]: Just increment OutDatagrams once per a datagram. [IPV6]: Just increment OutDatagrams once per a datagram. [IPV6]: Fix unbalanced socket reference with MSG_CONFIRM. [NET_SCHED] protect action config/dump from irqs [NET]: Fix two issues wrt. SO_BINDTODEVICE.
-rw-r--r--drivers/net/ppp_generic.c58
-rw-r--r--drivers/net/pppoe.c70
-rw-r--r--include/linux/skbuff.h40
-rw-r--r--include/net/sctp/sctp.h1
-rw-r--r--include/net/sctp/structs.h13
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_forward.c21
-rw-r--r--net/bridge/br_input.c48
-rw-r--r--net/bridge/br_netfilter.c2
-rw-r--r--net/bridge/br_private.h8
-rw-r--r--net/core/pktgen.c10
-rw-r--r--net/core/sock.c106
-rw-r--r--net/ipv4/udp.c6
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/raw.c3
-rw-r--r--net/ipv6/udp.c6
-rw-r--r--net/sched/act_api.c8
-rw-r--r--net/sched/act_police.c4
-rw-r--r--net/sched/sch_cbq.c2
-rw-r--r--net/sctp/associola.c14
-rw-r--r--net/sctp/bind_addr.c70
-rw-r--r--net/sctp/endpointola.c27
-rw-r--r--net/sctp/ipv6.c46
-rw-r--r--net/sctp/protocol.c79
-rw-r--r--net/sctp/sm_make_chunk.c18
-rw-r--r--net/sctp/socket.c134
27 files changed, 398 insertions, 404 deletions
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 9293c82..4b49d0e 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -899,17 +899,9 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev)
/* Put the 2-byte PPP protocol number on the front,
making sure there is room for the address and control fields. */
- if (skb_headroom(skb) < PPP_HDRLEN) {
- struct sk_buff *ns;
-
- ns = alloc_skb(skb->len + dev->hard_header_len, GFP_ATOMIC);
- if (ns == 0)
- goto outf;
- skb_reserve(ns, dev->hard_header_len);
- skb_copy_bits(skb, 0, skb_put(ns, skb->len), skb->len);
- kfree_skb(skb);
- skb = ns;
- }
+ if (skb_cow_head(skb, PPP_HDRLEN))
+ goto outf;
+
pp = skb_push(skb, 2);
proto = npindex_to_proto[npi];
pp[0] = proto >> 8;
@@ -1533,7 +1525,7 @@ ppp_input_error(struct ppp_channel *chan, int code)
static void
ppp_receive_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch)
{
- if (skb->len >= 2) {
+ if (pskb_may_pull(skb, 2)) {
#ifdef CONFIG_PPP_MULTILINK
/* XXX do channel-level decompression here */
if (PPP_PROTO(skb) == PPP_MP)
@@ -1585,7 +1577,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
if (ppp->vj == 0 || (ppp->flags & SC_REJ_COMP_TCP))
goto err;
- if (skb_tailroom(skb) < 124) {
+ if (skb_tailroom(skb) < 124 || skb_cloned(skb)) {
/* copy to a new sk_buff with more tailroom */
ns = dev_alloc_skb(skb->len + 128);
if (ns == 0) {
@@ -1656,23 +1648,29 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
/* check if the packet passes the pass and active filters */
/* the filter instructions are constructed assuming
a four-byte PPP header on each packet */
- *skb_push(skb, 2) = 0;
- if (ppp->pass_filter
- && sk_run_filter(skb, ppp->pass_filter,
- ppp->pass_len) == 0) {
- if (ppp->debug & 1)
- printk(KERN_DEBUG "PPP: inbound frame not passed\n");
- kfree_skb(skb);
- return;
- }
- if (!(ppp->active_filter
- && sk_run_filter(skb, ppp->active_filter,
- ppp->active_len) == 0))
- ppp->last_recv = jiffies;
- skb_pull(skb, 2);
-#else
- ppp->last_recv = jiffies;
+ if (ppp->pass_filter || ppp->active_filter) {
+ if (skb_cloned(skb) &&
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ goto err;
+
+ *skb_push(skb, 2) = 0;
+ if (ppp->pass_filter
+ && sk_run_filter(skb, ppp->pass_filter,
+ ppp->pass_len) == 0) {
+ if (ppp->debug & 1)
+ printk(KERN_DEBUG "PPP: inbound frame "
+ "not passed\n");
+ kfree_skb(skb);
+ return;
+ }
+ if (!(ppp->active_filter
+ && sk_run_filter(skb, ppp->active_filter,
+ ppp->active_len) == 0))
+ ppp->last_recv = jiffies;
+ __skb_pull(skb, 2);
+ } else
#endif /* CONFIG_PPP_FILTER */
+ ppp->last_recv = jiffies;
if ((ppp->dev->flags & IFF_UP) == 0
|| ppp->npmode[npi] != NPMODE_PASS) {
@@ -1770,7 +1768,7 @@ ppp_receive_mp_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch)
struct channel *ch;
int mphdrlen = (ppp->flags & SC_MP_SHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN;
- if (!pskb_may_pull(skb, mphdrlen) || ppp->mrru == 0)
+ if (!pskb_may_pull(skb, mphdrlen + 1) || ppp->mrru == 0)
goto err; /* no good, throw it away */
/* Decode sequence number and begin/end bits */
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index 68631a5..0d7f570 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -385,12 +385,12 @@ static int pppoe_rcv(struct sk_buff *skb,
struct pppoe_hdr *ph;
struct pppox_sock *po;
- if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr)))
- goto drop;
-
if (!(skb = skb_share_check(skb, GFP_ATOMIC)))
goto out;
+ if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr)))
+ goto drop;
+
ph = pppoe_hdr(skb);
po = get_item((unsigned long) ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
@@ -848,71 +848,45 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
{
struct pppox_sock *po = pppox_sk(sk);
struct net_device *dev = po->pppoe_dev;
- struct pppoe_hdr hdr;
struct pppoe_hdr *ph;
- int headroom = skb_headroom(skb);
int data_len = skb->len;
- struct sk_buff *skb2;
if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
goto abort;
- hdr.ver = 1;
- hdr.type = 1;
- hdr.code = 0;
- hdr.sid = po->num;
- hdr.length = htons(skb->len);
-
if (!dev)
goto abort;
- /* Copy the skb if there is no space for the header. */
- if (headroom < (sizeof(struct pppoe_hdr) + dev->hard_header_len)) {
- skb2 = dev_alloc_skb(32+skb->len +
- sizeof(struct pppoe_hdr) +
- dev->hard_header_len);
-
- if (skb2 == NULL)
- goto abort;
-
- skb_reserve(skb2, dev->hard_header_len + sizeof(struct pppoe_hdr));
- skb_copy_from_linear_data(skb, skb_put(skb2, skb->len),
- skb->len);
- } else {
- /* Make a clone so as to not disturb the original skb,
- * give dev_queue_xmit something it can free.
- */
- skb2 = skb_clone(skb, GFP_ATOMIC);
-
- if (skb2 == NULL)
- goto abort;
- }
+ /* Copy the data if there is no space for the header or if it's
+ * read-only.
+ */
+ if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len))
+ goto abort;
- ph = (struct pppoe_hdr *) skb_push(skb2, sizeof(struct pppoe_hdr));
- memcpy(ph, &hdr, sizeof(struct pppoe_hdr));
- skb2->protocol = __constant_htons(ETH_P_PPP_SES);
+ __skb_push(skb, sizeof(*ph));
+ skb_reset_network_header(skb);
- skb_reset_network_header(skb2);
+ ph = pppoe_hdr(skb);
+ ph->ver = 1;
+ ph->type = 1;
+ ph->code = 0;
+ ph->sid = po->num;
+ ph->length = htons(data_len);
- skb2->dev = dev;
+ skb->protocol = __constant_htons(ETH_P_PPP_SES);
+ skb->dev = dev;
- dev->hard_header(skb2, dev, ETH_P_PPP_SES,
+ dev->hard_header(skb, dev, ETH_P_PPP_SES,
po->pppoe_pa.remote, NULL, data_len);
- /* We're transmitting skb2, and assuming that dev_queue_xmit
- * will free it. The generic ppp layer however, is expecting
- * that we give back 'skb' (not 'skb2') in case of failure,
- * but free it in case of success.
- */
-
- if (dev_queue_xmit(skb2) < 0)
+ if (dev_queue_xmit(skb) < 0)
goto abort;
- kfree_skb(skb);
return 1;
abort:
- return 0;
+ kfree_skb(skb);
+ return 1;
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 93c27f7..a656cec 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1352,6 +1352,22 @@ static inline int skb_clone_writable(struct sk_buff *skb, int len)
skb_headroom(skb) + len <= skb->hdr_len;
}
+static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
+ int cloned)
+{
+ int delta = 0;
+
+ if (headroom < NET_SKB_PAD)
+ headroom = NET_SKB_PAD;
+ if (headroom > skb_headroom(skb))
+ delta = headroom - skb_headroom(skb);
+
+ if (delta || cloned)
+ return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0,
+ GFP_ATOMIC);
+ return 0;
+}
+
/**
* skb_cow - copy header of skb when it is required
* @skb: buffer to cow
@@ -1366,16 +1382,22 @@ static inline int skb_clone_writable(struct sk_buff *skb, int len)
*/
static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
{
- int delta = (headroom > NET_SKB_PAD ? headroom : NET_SKB_PAD) -
- skb_headroom(skb);
-
- if (delta < 0)
- delta = 0;
+ return __skb_cow(skb, headroom, skb_cloned(skb));
+}
- if (delta || skb_cloned(skb))
- return pskb_expand_head(skb, (delta + (NET_SKB_PAD-1)) &
- ~(NET_SKB_PAD-1), 0, GFP_ATOMIC);
- return 0;
+/**
+ * skb_cow_head - skb_cow but only making the head writable
+ * @skb: buffer to cow
+ * @headroom: needed headroom
+ *
+ * This function is identical to skb_cow except that we replace the
+ * skb_cloned check by skb_header_cloned. It should be used when
+ * you only need to push on some header and do not need to modify
+ * the data.
+ */
+static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom)
+{
+ return __skb_cow(skb, headroom, skb_header_cloned(skb));
}
/**
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index d529045..c9cc00c 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -123,6 +123,7 @@
* sctp/protocol.c
*/
extern struct sock *sctp_get_ctl_sock(void);
+extern void sctp_local_addr_free(struct rcu_head *head);
extern int sctp_copy_local_addr_list(struct sctp_bind_addr *,
sctp_scope_t, gfp_t gfp,
int flags);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index c0d5848..c2fe2dc 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -207,6 +207,9 @@ extern struct sctp_globals {
* It is a list of sctp_sockaddr_entry.
*/
struct list_head local_addr_list;
+
+ /* Lock that protects the local_addr_list writers */
+ spinlock_t addr_list_lock;
/* Flag to indicate if addip is enabled. */
int addip_enable;
@@ -242,6 +245,7 @@ extern struct sctp_globals {
#define sctp_port_alloc_lock (sctp_globals.port_alloc_lock)
#define sctp_port_hashtable (sctp_globals.port_hashtable)
#define sctp_local_addr_list (sctp_globals.local_addr_list)
+#define sctp_local_addr_lock (sctp_globals.addr_list_lock)
#define sctp_addip_enable (sctp_globals.addip_enable)
#define sctp_prsctp_enable (sctp_globals.prsctp_enable)
@@ -737,8 +741,10 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk);
/* This is a structure for holding either an IPv6 or an IPv4 address. */
struct sctp_sockaddr_entry {
struct list_head list;
+ struct rcu_head rcu;
union sctp_addr a;
__u8 use_as_src;
+ __u8 valid;
};
typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *);
@@ -1149,7 +1155,9 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
int flags);
int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
__u8 use_as_src, gfp_t gfp);
-int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *);
+int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
+ void (*rcu_call)(struct rcu_head *,
+ void (*func)(struct rcu_head *)));
int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *,
struct sctp_sock *);
union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
@@ -1220,9 +1228,6 @@ struct sctp_ep_common {
* bind_addr.address_list is our set of local IP addresses.
*/
struct sctp_bind_addr bind_addr;
-
- /* Protection during address list comparisons. */
- rwlock_t addr_lock;
};
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 1583c5e..2a54691 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -562,8 +562,6 @@ static int register_vlan_device(struct net_device *real_dev,
if (err < 0)
goto out_free_newdev;
- /* Account for reference in struct vlan_dev_info */
- dev_hold(real_dev);
#ifdef VLAN_DEBUG
printk(VLAN_DBG "Allocated new device successfully, returning.\n");
#endif
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 0eded17..99292e8 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -41,11 +41,11 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb_pull(skb, ETH_HLEN);
if (dest[0] & 1)
- br_flood_deliver(br, skb, 0);
+ br_flood_deliver(br, skb);
else if ((dst = __br_fdb_get(br, dest)) != NULL)
br_deliver(dst->dst, skb);
else
- br_flood_deliver(br, skb, 0);
+ br_flood_deliver(br, skb);
return 0;
}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index ada7f49..bdd7c35 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -100,24 +100,13 @@ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
}
/* called under bridge lock */
-static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone,
+static void br_flood(struct net_bridge *br, struct sk_buff *skb,
void (*__packet_hook)(const struct net_bridge_port *p,
struct sk_buff *skb))
{
struct net_bridge_port *p;
struct net_bridge_port *prev;
- if (clone) {
- struct sk_buff *skb2;
-
- if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
- br->statistics.tx_dropped++;
- return;
- }
-
- skb = skb2;
- }
-
prev = NULL;
list_for_each_entry_rcu(p, &br->port_list, list) {
@@ -148,13 +137,13 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone,
/* called with rcu_read_lock */
-void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, int clone)
+void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb)
{
- br_flood(br, skb, clone, __br_deliver);
+ br_flood(br, skb, __br_deliver);
}
/* called under bridge lock */
-void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone)
+void br_flood_forward(struct net_bridge *br, struct sk_buff *skb)
{
- br_flood(br, skb, clone, __br_forward);
+ br_flood(br, skb, __br_forward);
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 6f468fc..3a8a015 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -43,7 +43,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
struct net_bridge *br;
struct net_bridge_fdb_entry *dst;
- int passedup = 0;
+ struct sk_buff *skb2;
if (!p || p->state == BR_STATE_DISABLED)
goto drop;
@@ -55,39 +55,35 @@ int br_handle_frame_finish(struct sk_buff *skb)
if (p->state == BR_STATE_LEARNING)
goto drop;
- if (br->dev->flags & IFF_PROMISC) {
- struct sk_buff *skb2;
+ /* The packet skb2 goes to the local host (NULL to skip). */
+ skb2 = NULL;
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2 != NULL) {
- passedup = 1;
- br_pass_frame_up(br, skb2);
- }
- }
+ if (br->dev->flags & IFF_PROMISC)
+ skb2 = skb;
+
+ dst = NULL;
if (is_multicast_ether_addr(dest)) {
br->statistics.multicast++;
- br_flood_forward(br, skb, !passedup);
- if (!passedup)
- br_pass_frame_up(br, skb);
- goto out;
+ skb2 = skb;
+ } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) {
+ skb2 = skb;
+ /* Do not forward the packet since it's local. */
+ skb = NULL;
}
- dst = __br_fdb_get(br, dest);
- if (dst != NULL && dst->is_local) {
- if (!passedup)
- br_pass_frame_up(br, skb);
- else
- kfree_skb(skb);
- goto out;
- }
+ if (skb2 == skb)
+ skb2 = skb_clone(skb, GFP_ATOMIC);
- if (dst != NULL) {
- br_forward(dst->dst, skb);
- goto out;
- }
+ if (skb2)
+ br_pass_frame_up(br, skb2);
- br_flood_forward(br, skb, 0);
+ if (skb) {
+ if (dst)
+ br_forward(dst->dst, skb);
+ else
+ br_flood_forward(br, skb);
+ }
out:
return 0;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 3ee2022..fc13130 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -183,7 +183,7 @@ int nf_bridge_copy_header(struct sk_buff *skb)
int err;
int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
- err = skb_cow(skb, header_size);
+ err = skb_cow_head(skb, header_size);
if (err)
return err;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 21bf3a9..e6dc6f5 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -170,12 +170,8 @@ extern int br_dev_queue_push_xmit(struct sk_buff *skb);
extern void br_forward(const struct net_bridge_port *to,
struct sk_buff *skb);
extern int br_forward_finish(struct sk_buff *skb);
-extern void br_flood_deliver(struct net_bridge *br,
- struct sk_buff *skb,
- int clone);
-extern void br_flood_forward(struct net_bridge *br,
- struct sk_buff *skb,
- int clone);
+extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb);
+extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb);
/* br_if.c */
extern void br_port_carrier_check(struct net_bridge_port *p);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 36fdea7..803d0c88 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -111,6 +111,9 @@
*
* 802.1Q/Q-in-Q support by Francesco Fondelli (FF) <francesco.fondelli@gmail.com>
*
+ * Fixed src_mac command to set source mac of packet to value specified in
+ * command by Adit Ranadive <adit.262@gmail.com>
+ *
*/
#include <linux/sys.h>
#include <linux/types.h>
@@ -1451,8 +1454,11 @@ static ssize_t pktgen_if_write(struct file *file,
}
if (!strcmp(name, "src_mac")) {
char *v = valstr;
+ unsigned char old_smac[ETH_ALEN];
unsigned char *m = pkt_dev->src_mac;
+ memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN);
+
len = strn_len(&user_buffer[i], sizeof(valstr) - 1);
if (len < 0) {
return len;
@@ -1481,6 +1487,10 @@ static ssize_t pktgen_if_write(struct file *file,
}
}
+ /* Set up Src MAC */
+ if (compare_ether_addr(old_smac, pkt_dev->src_mac))
+ memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN);
+
sprintf(pg_result, "OK: srcmac");
return count;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index cfed7d4..190de61 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -362,6 +362,61 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
}
EXPORT_SYMBOL(sk_dst_check);
+static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen)
+{
+ int ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+ char devname[IFNAMSIZ];
+ int index;
+
+ /* Sorry... */
+ ret = -EPERM;
+ if (!capable(CAP_NET_RAW))
+ goto out;
+
+ ret = -EINVAL;
+ if (optlen < 0)
+ goto out;
+
+ /* Bind this socket to a particular device like "eth0",
+ * as specified in the passed interface name. If the
+ * name is "" or the option length is zero the socket
+ * is not bound.
+ */
+ if (optlen > IFNAMSIZ - 1)
+ optlen = IFNAMSIZ - 1;
+ memset(devname, 0, sizeof(devname));
+
+ ret = -EFAULT;
+ if (copy_from_user(devname, optval, optlen))
+ goto out;
+
+ if (devname[0] == '\0') {
+ index = 0;
+ } else {
+ struct net_device *dev = dev_get_by_name(devname);
+
+ ret = -ENODEV;
+ if (!dev)
+ goto out;
+
+ index = dev->ifindex;
+ dev_put(dev);
+ }
+
+ lock_sock(sk);
+ sk->sk_bound_dev_if = index;
+ sk_dst_reset(sk);
+ release_sock(sk);
+
+ ret = 0;
+
+out:
+#endif
+
+ return ret;
+}
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -390,6 +445,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
}
#endif
+ if (optname == SO_BINDTODEVICE)
+ return sock_bindtodevice(sk, optval, optlen);
+
if (optlen < sizeof(int))
return -EINVAL;
@@ -578,54 +636,6 @@ set_rcvbuf:
ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
break;
-#ifdef CONFIG_NETDEVICES
- case SO_BINDTODEVICE:
- {
- char devname[IFNAMSIZ];
-
- /* Sorry... */
- if (!capable(CAP_NET_RAW)) {
- ret = -EPERM;
- break;
- }
-
- /* Bind this socket to a particular device like "eth0",
- * as specified in the passed interface name. If the
- * name is "" or the option length is zero the socket
- * is not bound.
- */
-
- if (!valbool) {
- sk->sk_bound_dev_if = 0;
- } else {
- if (optlen > IFNAMSIZ - 1)
- optlen = IFNAMSIZ - 1;
- memset(devname, 0, sizeof(devname));
- if (copy_from_user(devname, optval, optlen)) {
- ret = -EFAULT;
- break;
- }
-
- /* Remove any cached route for this socket. */
- sk_dst_reset(sk);
-
- if (devname[0] == '\0') {
- sk->sk_bound_dev_if = 0;
- } else {
- struct net_device *dev = dev_get_by_name(devname);
- if (!dev) {
- ret = -ENODEV;
- break;
- }
- sk->sk_bound_dev_if = dev->ifindex;
- dev_put(dev);
- }
- }
- break;
- }
-#endif
-
-
case SO_ATTACH_FILTER:
ret = -EINVAL;
if (optlen == sizeof(struct sock_fprog)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2835535..69d4bd1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -505,6 +505,8 @@ send:
out:
up->len = 0;
up->pending = 0;
+ if (!err)
+ UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag);
return err;
}
@@ -693,10 +695,8 @@ out:
ip_rt_put(rt);
if (free)
kfree(ipc.opt);
- if (!err) {
- UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
+ if (!err)
return len;
- }
/*
* ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
* ENOBUFS might not be good (it's not tunable per se), but otherwise
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 91ef3be..45b4c82 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1021,7 +1021,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev,
hiscore.rule++;
}
if (ipv6_saddr_preferred(score.addr_type) ||
- (((ifa_result->flags &
+ (((ifa->flags &
(IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) {
score.attrs |= IPV6_SADDR_SCORE_PREFERRED;
if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e27383d..77167af 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -882,11 +882,10 @@ back_from_confirm:
ip6_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE))
err = rawv6_push_pending_frames(sk, &fl, rp);
+ release_sock(sk);
}
done:
dst_release(dst);
- if (!inet->hdrincl)
- release_sock(sk);
out:
fl6_sock_release(flowlabel);
return err<0?err:len;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4210951..c347f3e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -555,6 +555,8 @@ static int udp_v6_push_pending_frames(struct sock *sk)
out:
up->len = 0;
up->pending = 0;
+ if (!err)
+ UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag);
return err;
}
@@ -823,10 +825,8 @@ do_append_data:
release_sock(sk);
out:
fl6_sock_release(flowlabel);
- if (!err) {
- UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
+ if (!err)
return len;
- }
/*
* ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
* ENOBUFS might not be good (it's not tunable per se), but otherwise
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index feef366..72cdb0f 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -68,7 +68,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
struct rtattr *r ;
- read_lock(hinfo->lock);
+ read_lock_bh(hinfo->lock);
s_i = cb->args[0];
@@ -96,7 +96,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
}
}
done:
- read_unlock(hinfo->lock);
+ read_unlock_bh(hinfo->lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
@@ -156,13 +156,13 @@ struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
{
struct tcf_common *p;
- read_lock(hinfo->lock);
+ read_lock_bh(hinfo->lock);
for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p;
p = p->tcfc_next) {
if (p->tcfc_index == index)
break;
}
- read_unlock(hinfo->lock);
+ read_unlock_bh(hinfo->lock);
return p;
}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 6085be5..17f6f27 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -56,7 +56,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
struct rtattr *r;
- read_lock(&police_lock);
+ read_lock_bh(&police_lock);
s_i = cb->args[0];
@@ -85,7 +85,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c
}
}
done:
- read_unlock(&police_lock);
+ read_unlock_bh(&police_lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index e38c283..cbef3bb 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -380,7 +380,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct cbq_sched_data *q = qdisc_priv(sch);
int len = skb->len;
- int ret;
+ int uninitialized_var(ret);
struct cbq_class *cl = cbq_classify(skb, sch, &ret);
#ifdef CONFIG_NET_CLS_ACT
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 2ad1caf..9bad8ba 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -99,7 +99,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
/* Initialize the bind addr area. */
sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
- rwlock_init(&asoc->base.addr_lock);
asoc->state = SCTP_STATE_CLOSED;
@@ -937,8 +936,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
{
struct sctp_transport *transport;
- sctp_read_lock(&asoc->base.addr_lock);
-
if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) &&
(htons(asoc->peer.port) == paddr->v4.sin_port)) {
transport = sctp_assoc_lookup_paddr(asoc, paddr);
@@ -952,7 +949,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc,
transport = NULL;
out:
- sctp_read_unlock(&asoc->base.addr_lock);
return transport;
}
@@ -1376,19 +1372,13 @@ int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc,
int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
const union sctp_addr *laddr)
{
- int found;
+ int found = 0;
- sctp_read_lock(&asoc->base.addr_lock);
if ((asoc->base.bind_addr.port == ntohs(laddr->v4.sin_port)) &&
sctp_bind_addr_match(&asoc->base.bind_addr, laddr,
- sctp_sk(asoc->base.sk))) {
+ sctp_sk(asoc->base.sk)))
found = 1;
- goto out;
- }
- found = 0;
-out:
- sctp_read_unlock(&asoc->base.addr_lock);
return found;
}
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index fdb287a..d35cbf5 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -163,9 +163,15 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
addr->a.v4.sin_port = htons(bp->port);
addr->use_as_src = use_as_src;
+ addr->valid = 1;
INIT_LIST_HEAD(&addr->list);
- list_add_tail(&addr->list, &bp->address_list);
+ INIT_RCU_HEAD(&addr->rcu);
+
+ /* We always hold a socket lock when calling this function,
+ * and that acts as a writer synchronizing lock.
+ */
+ list_add_tail_rcu(&addr->list, &bp->address_list);
SCTP_DBG_OBJCNT_INC(addr);
return 0;
@@ -174,23 +180,35 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
/* Delete an address from the bind address list in the SCTP_bind_addr
* structure.
*/
-int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
+int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr,
+ void (*rcu_call)(struct rcu_head *head,
+ void (*func)(struct rcu_head *head)))
{
- struct list_head *pos, *temp;
- struct sctp_sockaddr_entry *addr;
+ struct sctp_sockaddr_entry *addr, *temp;
- list_for_each_safe(pos, temp, &bp->address_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ /* We hold the socket lock when calling this function,
+ * and that acts as a writer synchronizing lock.
+ */
+ list_for_each_entry_safe(addr, temp, &bp->address_list, list) {
if (sctp_cmp_addr_exact(&addr->a, del_addr)) {
/* Found the exact match. */
- list_del(pos);
- kfree(addr);
- SCTP_DBG_OBJCNT_DEC(addr);
-
- return 0;
+ addr->valid = 0;
+ list_del_rcu(&addr->list);
+ break;
}
}
+ /* Call the rcu callback provided in the args. This function is
+ * called by both BH packet processing and user side socket option
+ * processing, but it works on different lists in those 2 contexts.
+ * Each context provides it's own callback, whether call_rcu_bh()
+ * or call_rcu(), to make sure that we wait for an appropriate time.
+ */
+ if (addr && !addr->valid) {
+ rcu_call(&addr->rcu, sctp_local_addr_free);
+ SCTP_DBG_OBJCNT_DEC(addr);
+ }
+
return -EINVAL;
}
@@ -300,15 +318,20 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp,
struct sctp_sock *opt)
{
struct sctp_sockaddr_entry *laddr;
- struct list_head *pos;
-
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
- if (opt->pf->cmp_addr(&laddr->a, addr, opt))
- return 1;
+ int match = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ if (!laddr->valid)
+ continue;
+ if (opt->pf->cmp_addr(&laddr->a, addr, opt)) {
+ match = 1;
+ break;
+ }
}
+ rcu_read_unlock();
- return 0;
+ return match;
}
/* Find the first address in the bind address list that is not present in
@@ -323,18 +346,19 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
union sctp_addr *addr;
void *addr_buf;
struct sctp_af *af;
- struct list_head *pos;
int i;
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
-
+ /* This is only called sctp_send_asconf_del_ip() and we hold
+ * the socket lock in that code patch, so that address list
+ * can't change.
+ */
+ list_for_each_entry(laddr, &bp->address_list, list) {
addr_buf = (union sctp_addr *)addrs;
for (i = 0; i < addrcnt; i++) {
addr = (union sctp_addr *)addr_buf;
af = sctp_get_af_specific(addr->v4.sin_family);
if (!af)
- return NULL;
+ break;
if (opt->pf->cmp_addr(&laddr->a, addr, opt))
break;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 1404a9e..8f485a0 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -92,7 +92,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
/* Initialize the bind addr area */
sctp_bind_addr_init(&ep->base.bind_addr, 0);
- rwlock_init(&ep->base.addr_lock);
/* Remember who we are attached to. */
ep->base.sk = sk;
@@ -225,21 +224,14 @@ void sctp_endpoint_put(struct sctp_endpoint *ep)
struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
const union sctp_addr *laddr)
{
- struct sctp_endpoint *retval;
+ struct sctp_endpoint *retval = NULL;
- sctp_read_lock(&ep->base.addr_lock);
if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) {
if (sctp_bind_addr_match(&ep->base.bind_addr, laddr,
- sctp_sk(ep->base.sk))) {
+ sctp_sk(ep->base.sk)))
retval = ep;
- goto out;
- }
}
- retval = NULL;
-
-out:
- sctp_read_unlock(&ep->base.addr_lock);
return retval;
}
@@ -261,9 +253,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc(
list_for_each(pos, &ep->asocs) {
asoc = list_entry(pos, struct sctp_association, asocs);
if (rport == asoc->peer.port) {
- sctp_read_lock(&asoc->base.addr_lock);
*transport = sctp_assoc_lookup_paddr(asoc, paddr);
- sctp_read_unlock(&asoc->base.addr_lock);
if (*transport)
return asoc;
@@ -295,20 +285,17 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
const union sctp_addr *paddr)
{
- struct list_head *pos;
struct sctp_sockaddr_entry *addr;
struct sctp_bind_addr *bp;
- sctp_read_lock(&ep->base.addr_lock);
bp = &ep->base.bind_addr;
- list_for_each(pos, &bp->address_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
- if (sctp_has_association(&addr->a, paddr)) {
- sctp_read_unlock(&ep->base.addr_lock);
+ /* This function is called with the socket lock held,
+ * so the address_list can not change.
+ */
+ list_for_each_entry(addr, &bp->address_list, list) {
+ if (sctp_has_association(&addr->a, paddr))
return 1;
- }
}
- sctp_read_unlock(&ep->base.addr_lock);
return 0;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index f8aa23d..670fd27 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -77,13 +77,18 @@
#include <asm/uaccess.h>
-/* Event handler for inet6 address addition/deletion events. */
+/* Event handler for inet6 address addition/deletion events.
+ * The sctp_local_addr_list needs to be protocted by a spin lock since
+ * multiple notifiers (say IPv4 and IPv6) may be running at the same
+ * time and thus corrupt the list.
+ * The reader side is protected with RCU.
+ */
static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
void *ptr)
{
struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
- struct sctp_sockaddr_entry *addr;
- struct list_head *pos, *temp;
+ struct sctp_sockaddr_entry *addr = NULL;
+ struct sctp_sockaddr_entry *temp;
switch (ev) {
case NETDEV_UP:
@@ -94,19 +99,26 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
memcpy(&addr->a.v6.sin6_addr, &ifa->addr,
sizeof(struct in6_addr));
addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
- list_add_tail(&addr->list, &sctp_local_addr_list);
+ addr->valid = 1;
+ spin_lock_bh(&sctp_local_addr_lock);
+ list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
+ spin_unlock_bh(&sctp_local_addr_lock);
}
break;
case NETDEV_DOWN:
- list_for_each_safe(pos, temp, &sctp_local_addr_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
- if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) {
- list_del(pos);
- kfree(addr);
+ spin_lock_bh(&sctp_local_addr_lock);
+ list_for_each_entry_safe(addr, temp,
+ &sctp_local_addr_list, list) {
+ if (ipv6_addr_equal(&addr->a.v6.sin6_addr,
+ &ifa->addr)) {
+ addr->valid = 0;
+ list_del_rcu(&addr->list);
break;
}
}
-
+ spin_unlock_bh(&sctp_local_addr_lock);
+ if (addr && !addr->valid)
+ call_rcu(&addr->rcu, sctp_local_addr_free);
break;
}
@@ -290,9 +302,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
union sctp_addr *saddr)
{
struct sctp_bind_addr *bp;
- rwlock_t *addr_lock;
struct sctp_sockaddr_entry *laddr;
- struct list_head *pos;
sctp_scope_t scope;
union sctp_addr *baddr = NULL;
__u8 matchlen = 0;
@@ -312,14 +322,14 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
scope = sctp_scope(daddr);
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
/* Go through the bind address list and find the best source address
* that matches the scope of the destination address.
*/
- sctp_read_lock(addr_lock);
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ if (!laddr->valid)
+ continue;
if ((laddr->use_as_src) &&
(laddr->a.sa.sa_family == AF_INET6) &&
(scope <= sctp_scope(&laddr->a))) {
@@ -341,7 +351,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
__FUNCTION__, asoc, NIP6(daddr->v6.sin6_addr));
}
- sctp_read_unlock(addr_lock);
+ rcu_read_unlock();
}
/* Make a copy of all potential local addresses. */
@@ -367,7 +377,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
addr->a.v6.sin6_port = 0;
addr->a.v6.sin6_addr = ifp->addr;
addr->a.v6.sin6_scope_id = dev->ifindex;
+ addr->valid = 1;
INIT_LIST_HEAD(&addr->list);
+ INIT_RCU_HEAD(&addr->rcu);
list_add_tail(&addr->list, addrlist);
}
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e98579b..3d036cd 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -153,6 +153,9 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist,
addr->a.v4.sin_family = AF_INET;
addr->a.v4.sin_port = 0;
addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
+ addr->valid = 1;
+ INIT_LIST_HEAD(&addr->list);
+ INIT_RCU_HEAD(&addr->rcu);
list_add_tail(&addr->list, addrlist);
}
}
@@ -192,16 +195,24 @@ static void sctp_free_local_addr_list(void)
}
}
+void sctp_local_addr_free(struct rcu_head *head)
+{
+ struct sctp_sockaddr_entry *e = container_of(head,
+ struct sctp_sockaddr_entry, rcu);
+ kfree(e);
+}
+
/* Copy the local addresses which are valid for 'scope' into 'bp'. */
int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
gfp_t gfp, int copy_flags)
{
struct sctp_sockaddr_entry *addr;
int error = 0;
- struct list_head *pos, *temp;
- list_for_each_safe(pos, temp, &sctp_local_addr_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+ if (!addr->valid)
+ continue;
if (sctp_in_scope(&addr->a, scope)) {
/* Now that the address is in scope, check to see if
* the address type is really supported by the local
@@ -213,7 +224,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
(copy_flags & SCTP_ADDR6_ALLOWED) &&
(copy_flags & SCTP_ADDR6_PEERSUPP)))) {
error = sctp_add_bind_addr(bp, &addr->a, 1,
- GFP_ATOMIC);
+ GFP_ATOMIC);
if (error)
goto end_copy;
}
@@ -221,6 +232,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
}
end_copy:
+ rcu_read_unlock();
return error;
}
@@ -416,9 +428,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
struct rtable *rt;
struct flowi fl;
struct sctp_bind_addr *bp;
- rwlock_t *addr_lock;
struct sctp_sockaddr_entry *laddr;
- struct list_head *pos;
struct dst_entry *dst = NULL;
union sctp_addr dst_saddr;
@@ -447,23 +457,20 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
goto out;
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
if (dst) {
/* Walk through the bind address list and look for a bind
* address that matches the source address of the returned dst.
*/
- sctp_read_lock(addr_lock);
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry,
- list);
- if (!laddr->use_as_src)
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ if (!laddr->valid || !laddr->use_as_src)
continue;
sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port));
if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a))
goto out_unlock;
}
- sctp_read_unlock(addr_lock);
+ rcu_read_unlock();
/* None of the bound addresses match the source address of the
* dst. So release it.
@@ -475,10 +482,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
/* Walk through the bind address list and try to get a dst that
* matches a bind address as the source address.
*/
- sctp_read_lock(addr_lock);
- list_for_each(pos, &bp->address_list) {
- laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
-
+ rcu_read_lock();
+ list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ if (!laddr->valid)
+ continue;
if ((laddr->use_as_src) &&
(AF_INET == laddr->a.sa.sa_family)) {
fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
@@ -490,7 +497,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
}
out_unlock:
- sctp_read_unlock(addr_lock);
+ rcu_read_unlock();
out:
if (dst)
SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n",
@@ -600,13 +607,18 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr));
}
-/* Event handler for inet address addition/deletion events. */
+/* Event handler for inet address addition/deletion events.
+ * The sctp_local_addr_list needs to be protocted by a spin lock since
+ * multiple notifiers (say IPv4 and IPv6) may be running at the same
+ * time and thus corrupt the list.
+ * The reader side is protected with RCU.
+ */
static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
void *ptr)
{
struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
- struct sctp_sockaddr_entry *addr;
- struct list_head *pos, *temp;
+ struct sctp_sockaddr_entry *addr = NULL;
+ struct sctp_sockaddr_entry *temp;
switch (ev) {
case NETDEV_UP:
@@ -615,19 +627,25 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
addr->a.v4.sin_family = AF_INET;
addr->a.v4.sin_port = 0;
addr->a.v4.sin_addr.s_addr = ifa->ifa_local;
- list_add_tail(&addr->list, &sctp_local_addr_list);
+ addr->valid = 1;
+ spin_lock_bh(&sctp_local_addr_lock);
+ list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
+ spin_unlock_bh(&sctp_local_addr_lock);
}
break;
case NETDEV_DOWN:
- list_for_each_safe(pos, temp, &sctp_local_addr_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ spin_lock_bh(&sctp_local_addr_lock);
+ list_for_each_entry_safe(addr, temp,
+ &sctp_local_addr_list, list) {
if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) {
- list_del(pos);
- kfree(addr);
+ addr->valid = 0;
+ list_del_rcu(&addr->list);
break;
}
}
-
+ spin_unlock_bh(&sctp_local_addr_lock);
+ if (addr && !addr->valid)
+ call_rcu(&addr->rcu, sctp_local_addr_free);
break;
}
@@ -1160,6 +1178,7 @@ SCTP_STATIC __init int sctp_init(void)
/* Initialize the local address list. */
INIT_LIST_HEAD(&sctp_local_addr_list);
+ spin_lock_init(&sctp_local_addr_lock);
sctp_get_local_addr_list();
/* Register notifier for inet address additions/deletions. */
@@ -1227,6 +1246,9 @@ SCTP_STATIC __exit void sctp_exit(void)
sctp_v6_del_protocol();
inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
+ /* Unregister notifier for inet address additions/deletions. */
+ unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
+
/* Free the local address list. */
sctp_free_local_addr_list();
@@ -1240,9 +1262,6 @@ SCTP_STATIC __exit void sctp_exit(void)
inet_unregister_protosw(&sctp_stream_protosw);
inet_unregister_protosw(&sctp_seqpacket_protosw);
- /* Unregister notifier for inet address additions/deletions. */
- unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
-
sctp_sysctl_unregister();
list_del(&sctp_ipv4_specific.list);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 79856c9..2e34220 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1531,7 +1531,7 @@ no_hmac:
/* Also, add the destination address. */
if (list_empty(&retval->base.bind_addr.address_list)) {
sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1,
- GFP_ATOMIC);
+ GFP_ATOMIC);
}
retval->next_tsn = retval->c.initial_tsn;
@@ -2613,22 +2613,16 @@ static int sctp_asconf_param_success(struct sctp_association *asoc,
switch (asconf_param->param_hdr.type) {
case SCTP_PARAM_ADD_IP:
- sctp_local_bh_disable();
- sctp_write_lock(&asoc->base.addr_lock);
- list_for_each(pos, &bp->address_list) {
- saddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ /* This is always done in BH context with a socket lock
+ * held, so the list can not change.
+ */
+ list_for_each_entry(saddr, &bp->address_list, list) {
if (sctp_cmp_addr_exact(&saddr->a, &addr))
saddr->use_as_src = 1;
}
- sctp_write_unlock(&asoc->base.addr_lock);
- sctp_local_bh_enable();
break;
case SCTP_PARAM_DEL_IP:
- sctp_local_bh_disable();
- sctp_write_lock(&asoc->base.addr_lock);
- retval = sctp_del_bind_addr(bp, &addr);
- sctp_write_unlock(&asoc->base.addr_lock);
- sctp_local_bh_enable();
+ retval = sctp_del_bind_addr(bp, &addr, call_rcu_bh);
list_for_each(pos, &asoc->peer.transport_addr_list) {
transport = list_entry(pos, struct sctp_transport,
transports);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3335460..772fbfb 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -367,14 +367,10 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
if (!bp->port)
bp->port = inet_sk(sk)->num;
- /* Add the address to the bind address list. */
- sctp_local_bh_disable();
- sctp_write_lock(&ep->base.addr_lock);
-
- /* Use GFP_ATOMIC since BHs are disabled. */
+ /* Add the address to the bind address list.
+ * Use GFP_ATOMIC since BHs will be disabled.
+ */
ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC);
- sctp_write_unlock(&ep->base.addr_lock);
- sctp_local_bh_enable();
/* Copy back into socket for getsockname() use. */
if (!ret) {
@@ -544,15 +540,12 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
if (i < addrcnt)
continue;
- /* Use the first address in bind addr list of association as
- * Address Parameter of ASCONF CHUNK.
+ /* Use the first valid address in bind addr list of
+ * association as Address Parameter of ASCONF CHUNK.
*/
- sctp_read_lock(&asoc->base.addr_lock);
bp = &asoc->base.bind_addr;
p = bp->address_list.next;
laddr = list_entry(p, struct sctp_sockaddr_entry, list);
- sctp_read_unlock(&asoc->base.addr_lock);
-
chunk = sctp_make_asconf_update_ip(asoc, &laddr->a, addrs,
addrcnt, SCTP_PARAM_ADD_IP);
if (!chunk) {
@@ -567,8 +560,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
/* Add the new addresses to the bind address list with
* use_as_src set to 0.
*/
- sctp_local_bh_disable();
- sctp_write_lock(&asoc->base.addr_lock);
addr_buf = addrs;
for (i = 0; i < addrcnt; i++) {
addr = (union sctp_addr *)addr_buf;
@@ -578,8 +569,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
GFP_ATOMIC);
addr_buf += af->sockaddr_len;
}
- sctp_write_unlock(&asoc->base.addr_lock);
- sctp_local_bh_enable();
}
out:
@@ -651,13 +640,7 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
* socket routing and failover schemes. Refer to comments in
* sctp_do_bind(). -daisy
*/
- sctp_local_bh_disable();
- sctp_write_lock(&ep->base.addr_lock);
-
- retval = sctp_del_bind_addr(bp, sa_addr);
-
- sctp_write_unlock(&ep->base.addr_lock);
- sctp_local_bh_enable();
+ retval = sctp_del_bind_addr(bp, sa_addr, call_rcu);
addr_buf += af->sockaddr_len;
err_bindx_rem:
@@ -748,14 +731,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
* make sure that we do not delete all the addresses in the
* association.
*/
- sctp_read_lock(&asoc->base.addr_lock);
bp = &asoc->base.bind_addr;
laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs,
addrcnt, sp);
- sctp_read_unlock(&asoc->base.addr_lock);
if (!laddr)
continue;
+ /* We do not need RCU protection throughout this loop
+ * because this is done under a socket lock from the
+ * setsockopt call.
+ */
chunk = sctp_make_asconf_update_ip(asoc, laddr, addrs, addrcnt,
SCTP_PARAM_DEL_IP);
if (!chunk) {
@@ -766,23 +751,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
/* Reset use_as_src flag for the addresses in the bind address
* list that are to be deleted.
*/
- sctp_local_bh_disable();
- sctp_write_lock(&asoc->base.addr_lock);
addr_buf = addrs;
for (i = 0; i < addrcnt; i++) {
laddr = (union sctp_addr *)addr_buf;
af = sctp_get_af_specific(laddr->v4.sin_family);
- list_for_each(pos1, &bp->address_list) {
- saddr = list_entry(pos1,
- struct sctp_sockaddr_entry,
- list);
+ list_for_each_entry(saddr, &bp->address_list, list) {
if (sctp_cmp_addr_exact(&saddr->a, laddr))
saddr->use_as_src = 0;
}
addr_buf += af->sockaddr_len;
}
- sctp_write_unlock(&asoc->base.addr_lock);
- sctp_local_bh_enable();
/* Update the route and saddr entries for all the transports
* as some of the addresses in the bind address list are
@@ -4059,9 +4037,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
sctp_assoc_t id;
struct sctp_bind_addr *bp;
struct sctp_association *asoc;
- struct list_head *pos, *temp;
struct sctp_sockaddr_entry *addr;
- rwlock_t *addr_lock;
int cnt = 0;
if (len < sizeof(sctp_assoc_t))
@@ -4078,17 +4054,13 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
*/
if (0 == id) {
bp = &sctp_sk(sk)->ep->base.bind_addr;
- addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
} else {
asoc = sctp_id2assoc(sk, id);
if (!asoc)
return -EINVAL;
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
}
- sctp_read_lock(addr_lock);
-
/* If the endpoint is bound to 0.0.0.0 or ::0, count the valid
* addresses from the global local address list.
*/
@@ -4096,27 +4068,33 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len,
addr = list_entry(bp->address_list.next,
struct sctp_sockaddr_entry, list);
if (sctp_is_any(&addr->a)) {
- list_for_each_safe(pos, temp, &sctp_local_addr_list) {
- addr = list_entry(pos,
- struct sctp_sockaddr_entry,
- list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr,
+ &sctp_local_addr_list, list) {
+ if (!addr->valid)
+ continue;
+
if ((PF_INET == sk->sk_family) &&
(AF_INET6 == addr->a.sa.sa_family))
continue;
+
cnt++;
}
+ rcu_read_unlock();
} else {
cnt = 1;
}
goto done;
}
- list_for_each(pos, &bp->address_list) {
+ /* Protection on the bound address list is not needed,
+ * since in the socket option context we hold the socket lock,
+ * so there is no way that the bound address list can change.
+ */
+ list_for_each_entry(addr, &bp->address_list, list) {
cnt ++;
}
-
done:
- sctp_read_unlock(addr_lock);
return cnt;
}
@@ -4127,14 +4105,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
int max_addrs, void *to,
int *bytes_copied)
{
- struct list_head *pos, *next;
struct sctp_sockaddr_entry *addr;
union sctp_addr temp;
int cnt = 0;
int addrlen;
- list_for_each_safe(pos, next, &sctp_local_addr_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+ if (!addr->valid)
+ continue;
+
if ((PF_INET == sk->sk_family) &&
(AF_INET6 == addr->a.sa.sa_family))
continue;
@@ -4149,6 +4129,7 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
cnt ++;
if (cnt >= max_addrs) break;
}
+ rcu_read_unlock();
return cnt;
}
@@ -4156,14 +4137,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port,
static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
size_t space_left, int *bytes_copied)
{
- struct list_head *pos, *next;
struct sctp_sockaddr_entry *addr;
union sctp_addr temp;
int cnt = 0;
int addrlen;
- list_for_each_safe(pos, next, &sctp_local_addr_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ rcu_read_lock();
+ list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) {
+ if (!addr->valid)
+ continue;
+
if ((PF_INET == sk->sk_family) &&
(AF_INET6 == addr->a.sa.sa_family))
continue;
@@ -4171,8 +4154,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk),
&temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
- if (space_left < addrlen)
- return -ENOMEM;
+ if (space_left < addrlen) {
+ cnt = -ENOMEM;
+ break;
+ }
memcpy(to, &temp, addrlen);
to += addrlen;
@@ -4180,6 +4165,7 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to,
space_left -= addrlen;
*bytes_copied += addrlen;
}
+ rcu_read_unlock();
return cnt;
}
@@ -4192,7 +4178,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
{
struct sctp_bind_addr *bp;
struct sctp_association *asoc;
- struct list_head *pos;
int cnt = 0;
struct sctp_getaddrs_old getaddrs;
struct sctp_sockaddr_entry *addr;
@@ -4200,7 +4185,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
union sctp_addr temp;
struct sctp_sock *sp = sctp_sk(sk);
int addrlen;
- rwlock_t *addr_lock;
int err = 0;
void *addrs;
void *buf;
@@ -4222,13 +4206,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
*/
if (0 == getaddrs.assoc_id) {
bp = &sctp_sk(sk)->ep->base.bind_addr;
- addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
} else {
asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
if (!asoc)
return -EINVAL;
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
}
to = getaddrs.addrs;
@@ -4242,8 +4224,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
if (!addrs)
return -ENOMEM;
- sctp_read_lock(addr_lock);
-
/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
* addresses from the global local address list.
*/
@@ -4259,8 +4239,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
}
buf = addrs;
- list_for_each(pos, &bp->address_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ /* Protection on the bound address list is not needed since
+ * in the socket option context we hold a socket lock and
+ * thus the bound address list can't change.
+ */
+ list_for_each_entry(addr, &bp->address_list, list) {
memcpy(&temp, &addr->a, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
@@ -4272,8 +4255,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
}
copy_getaddrs:
- sctp_read_unlock(addr_lock);
-
/* copy the entire address list into the user provided space */
if (copy_to_user(to, addrs, bytes_copied)) {
err = -EFAULT;
@@ -4295,7 +4276,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
{
struct sctp_bind_addr *bp;
struct sctp_association *asoc;
- struct list_head *pos;
int cnt = 0;
struct sctp_getaddrs getaddrs;
struct sctp_sockaddr_entry *addr;
@@ -4303,7 +4283,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
union sctp_addr temp;
struct sctp_sock *sp = sctp_sk(sk);
int addrlen;
- rwlock_t *addr_lock;
int err = 0;
size_t space_left;
int bytes_copied = 0;
@@ -4324,13 +4303,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
*/
if (0 == getaddrs.assoc_id) {
bp = &sctp_sk(sk)->ep->base.bind_addr;
- addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
} else {
asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
if (!asoc)
return -EINVAL;
bp = &asoc->base.bind_addr;
- addr_lock = &asoc->base.addr_lock;
}
to = optval + offsetof(struct sctp_getaddrs,addrs);
@@ -4340,8 +4317,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
if (!addrs)
return -ENOMEM;
- sctp_read_lock(addr_lock);
-
/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
* addresses from the global local address list.
*/
@@ -4353,21 +4328,24 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
space_left, &bytes_copied);
if (cnt < 0) {
err = cnt;
- goto error_lock;
+ goto out;
}
goto copy_getaddrs;
}
}
buf = addrs;
- list_for_each(pos, &bp->address_list) {
- addr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ /* Protection on the bound address list is not needed since
+ * in the socket option context we hold a socket lock and
+ * thus the bound address list can't change.
+ */
+ list_for_each_entry(addr, &bp->address_list, list) {
memcpy(&temp, &addr->a, sizeof(temp));
sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
if (space_left < addrlen) {
err = -ENOMEM; /*fixme: right error?*/
- goto error_lock;
+ goto out;
}
memcpy(buf, &temp, addrlen);
buf += addrlen;
@@ -4377,8 +4355,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
}
copy_getaddrs:
- sctp_read_unlock(addr_lock);
-
if (copy_to_user(to, addrs, bytes_copied)) {
err = -EFAULT;
goto out;
@@ -4389,12 +4365,6 @@ copy_getaddrs:
}
if (put_user(bytes_copied, optlen))
err = -EFAULT;
-
- goto out;
-
-error_lock:
- sctp_read_unlock(addr_lock);
-
out:
kfree(addrs);
return err;
OpenPOWER on IntegriCloud