From 14deae41566b5cdd992c01d0069518ced5227c83 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 4 Jan 2009 16:04:39 -0800 Subject: ipv6: Fix sporadic sendmsg -EINVAL when sending to multicast groups. Thanks to excellent diagnosis by Eduard Guzovsky. The core problem is that on a network with lots of active multicast traffic, the neighbour cache can fill up. If we try to allocate a new route and thus neighbour cache entry, the bog-standard GC attempt the neighbour layer does in ineffective because route entries hold a reference to the existing neighbour entries and GC can only liberate entries with no references. IPV4 already has a way to handle this, by doing a route cache GC in such situations (when neigh attach returns -ENOBUFS). So simply mimick this on the ipv6 side. Tested-by: Eduard Guzovsky Signed-off-by: David S. Miller --- net/ipv6/route.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 18c486c..76f06b9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -627,6 +627,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad rt = ip6_rt_copy(ort); if (rt) { + struct neighbour *neigh; + int attempts = !in_softirq(); + if (!(rt->rt6i_flags&RTF_GATEWAY)) { if (rt->rt6i_dst.plen != 128 && ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) @@ -646,7 +649,35 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad } #endif - rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + retry: + neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + if (IS_ERR(neigh)) { + struct net *net = dev_net(rt->rt6i_dev); + int saved_rt_min_interval = + net->ipv6.sysctl.ip6_rt_gc_min_interval; + int saved_rt_elasticity = + net->ipv6.sysctl.ip6_rt_gc_elasticity; + + if (attempts-- > 0) { + net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; + net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; + + ip6_dst_gc(net->ipv6.ip6_dst_ops); + + net->ipv6.sysctl.ip6_rt_gc_elasticity = + saved_rt_elasticity; + net->ipv6.sysctl.ip6_rt_gc_min_interval = + saved_rt_min_interval; + goto retry; + } + + if (net_ratelimit()) + printk(KERN_WARNING + "Neighbour table overflow.\n"); + dst_free(&rt->u.dst); + return NULL; + } + rt->rt6i_nexthop = neigh; } @@ -945,8 +976,11 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, dev_hold(dev); if (neigh) neigh_hold(neigh); - else + else { neigh = ndisc_get_neigh(dev, addr); + if (IS_ERR(neigh)) + neigh = NULL; + } rt->rt6i_dev = dev; rt->rt6i_idev = idev; @@ -1887,6 +1921,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); + struct neighbour *neigh; if (rt == NULL) return ERR_PTR(-ENOMEM); @@ -1909,11 +1944,18 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_flags |= RTF_ANYCAST; else rt->rt6i_flags |= RTF_LOCAL; - rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); - if (rt->rt6i_nexthop == NULL) { + neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); + if (IS_ERR(neigh)) { dst_free(&rt->u.dst); - return ERR_PTR(-ENOMEM); + + /* We are casting this because that is the return + * value type. But an errno encoded pointer is the + * same regardless of the underlying pointer type, + * and that's what we are returning. So this is OK. + */ + return (struct rt6_info *) neigh; } + rt->rt6i_nexthop = neigh; ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; -- cgit v1.1 From b530256d2e0f1a75fab31f9821129fff1bb49faa Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 4 Jan 2009 16:13:19 -0800 Subject: gro: Use gso_size to store MSS In order to allow GRO packets without frag_list at all, we need to store the MSS in the packet itself. The obvious place is gso_size. The only thing to watch out for is if the packet ends up not being GRO then we need to clear gso_size before pushing the packet into the stack. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ net/core/skbuff.c | 1 + net/ipv4/tcp.c | 5 +---- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 09c66a4..1e1a680 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2365,6 +2365,7 @@ static int napi_gro_complete(struct sk_buff *skb) } out: + skb_shinfo(skb)->gso_size = 0; __skb_push(skb, -skb_network_offset(skb)); return netif_receive_skb(skb); } @@ -2446,6 +2447,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) } NAPI_GRO_CB(skb)->count = 1; + skb_shinfo(skb)->gso_size = skb->len; skb->next = napi->gro_list; napi->gro_list = skb; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b8d0abb..3aafb10 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2613,6 +2613,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); skb_shinfo(nskb)->frag_list = p; + skb_shinfo(nskb)->gso_size = skb_shinfo(p)->gso_size; skb_header_release(p); nskb->prev = p; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f28acf1..4d655e9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2519,9 +2519,7 @@ found: flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); total = p->len; - mss = total; - if (skb_shinfo(p)->frag_list) - mss = skb_shinfo(p)->frag_list->len; + mss = skb_shinfo(p)->gso_size; flush |= skb->len > mss || skb->len <= 0; flush |= ntohl(th2->seq) + total != ntohl(th->seq); @@ -2557,7 +2555,6 @@ int tcp_gro_complete(struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); skb->ip_summed = CHECKSUM_PARTIAL; - skb_shinfo(skb)->gso_size = skb_shinfo(skb)->frag_list->len; skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; if (th->cwr) -- cgit v1.1 From 5d38a079ce3971f932bbdc0dc5b887806fabd5dc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 4 Jan 2009 16:13:40 -0800 Subject: gro: Add page frag support This patch allows GRO to merge page frags (skb_shinfo(skb)->frags) in one skb, rather than using the less efficient frag_list. It also adds a new interface, napi_gro_frags to allow drivers to inject page frags directly into the stack without allocating an skb. This is intended to be the GRO equivalent for LRO's lro_receive_frags interface. The existing GSO interface can already handle page frags with or without an appended frag_list so nothing needs to be changed there. The merging itself is rather simple. We store any new frag entries after the last existing entry, without checking whether the first new entry can be merged with the last existing entry. Making this check would actually be easy but since no existing driver can produce contiguous frags anyway it would just be mental masturbation. If the total number of entries would exceed the capacity of a single skb, we simply resort to using frag_list as we do now. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- net/core/skbuff.c | 14 ++++++++- 2 files changed, 99 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1e1a680..382df6c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -132,6 +132,9 @@ /* Instead of increasing this, you should create a hash table. */ #define MAX_GRO_SKBS 8 +/* This should be increased if a protocol with a bigger head is added. */ +#define GRO_MAX_HEAD (MAX_HEADER + 128) + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -2345,7 +2348,7 @@ static int napi_gro_complete(struct sk_buff *skb) struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; int err = -ENOENT; - if (!skb_shinfo(skb)->frag_list) + if (NAPI_GRO_CB(skb)->count == 1) goto out; rcu_read_lock(); @@ -2384,7 +2387,7 @@ void napi_gro_flush(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_flush); -int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; struct packet_type *ptype; @@ -2393,6 +2396,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) int count = 0; int same_flow; int mac_len; + int free; if (!(skb->dev->features & NETIF_F_GRO)) goto normal; @@ -2409,6 +2413,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) skb->mac_len = mac_len; NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; + NAPI_GRO_CB(skb)->free = 0; for (p = napi->gro_list; p; p = p->next) { count++; @@ -2428,6 +2433,7 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) goto normal; same_flow = NAPI_GRO_CB(skb)->same_flow; + free = NAPI_GRO_CB(skb)->free; if (pp) { struct sk_buff *nskb = *pp; @@ -2452,13 +2458,86 @@ int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) napi->gro_list = skb; ok: - return NET_RX_SUCCESS; + return free; normal: - return netif_receive_skb(skb); + return -1; +} + +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + switch (__napi_gro_receive(napi, skb)) { + case -1: + return netif_receive_skb(skb); + + case 1: + kfree_skb(skb); + break; + } + + return NET_RX_SUCCESS; } EXPORT_SYMBOL(napi_gro_receive); +int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +{ + struct net_device *dev = napi->dev; + struct sk_buff *skb = napi->skb; + int err = NET_RX_DROP; + + napi->skb = NULL; + + if (!skb) { + skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN); + if (!skb) + goto out; + + skb_reserve(skb, NET_IP_ALIGN); + } + + BUG_ON(info->nr_frags > MAX_SKB_FRAGS); + skb_shinfo(skb)->nr_frags = info->nr_frags; + memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags)); + + skb->data_len = info->len; + skb->len += info->len; + skb->truesize += info->len; + + if (!pskb_may_pull(skb, ETH_HLEN)) + goto reuse; + + err = NET_RX_SUCCESS; + + skb->protocol = eth_type_trans(skb, dev); + + skb->ip_summed = info->ip_summed; + skb->csum = info->csum; + + switch (__napi_gro_receive(napi, skb)) { + case -1: + return netif_receive_skb(skb); + + case 0: + goto out; + } + +reuse: + skb_shinfo(skb)->nr_frags = 0; + + skb->len -= skb->data_len; + skb->truesize -= skb->data_len; + skb->data_len = 0; + + __skb_pull(skb, skb_headlen(skb)); + skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); + + napi->skb = skb; + +out: + return err; +} +EXPORT_SYMBOL(napi_gro_frags); + static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; @@ -2537,11 +2616,12 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, { INIT_LIST_HEAD(&napi->poll_list); napi->gro_list = NULL; + napi->skb = NULL; napi->poll = poll; napi->weight = weight; list_add(&napi->dev_list, &dev->napi_list); -#ifdef CONFIG_NETPOLL napi->dev = dev; +#ifdef CONFIG_NETPOLL spin_lock_init(&napi->poll_lock); napi->poll_owner = -1; #endif @@ -2554,6 +2634,7 @@ void netif_napi_del(struct napi_struct *napi) struct sk_buff *skb, *next; list_del_init(&napi->dev_list); + kfree(napi->skb); for (skb = napi->gro_list; skb; skb = next) { next = skb->next; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3aafb10..5110b35 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2594,6 +2594,17 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) if (skb_shinfo(p)->frag_list) goto merge; + else if (!skb_headlen(p) && !skb_headlen(skb) && + skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags < + MAX_SKB_FRAGS) { + memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags, + skb_shinfo(skb)->frags, + skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); + + skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags; + NAPI_GRO_CB(skb)->free = 1; + goto done; + } headroom = skb_headroom(p); nskb = netdev_alloc_skb(p->dev, headroom); @@ -2628,11 +2639,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) p = nskb; merge: - NAPI_GRO_CB(p)->count++; p->prev->next = skb; p->prev = skb; skb_header_release(skb); +done: + NAPI_GRO_CB(p)->count++; p->data_len += skb->len; p->truesize += skb->len; p->len += skb->len; -- cgit v1.1 From f32f8b72e02e851972a0172603104046aa5fec96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Holm=20Th=C3=B8gersen?= Date: Sun, 4 Jan 2009 17:11:24 -0800 Subject: net/rfkill/rfkill.c: fix unused rfkill_led_trigger() warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4dec9b807be757780ca3611a959ac22c28d292a7 ("rfkill: strip pointless notifier chain") removed the only user of rfkill_led_trigger() that was not guarded by #ifdef CONFIG_RFKILL_LEDS. Therefore, move rfkill_led_trigger() completely inside #ifdef CONFIG_RFKILL_LEDS and avoid the compile time warning: net/rfkill/rfkill.c:59: warning: 'rfkill_led_trigger' defined but not used Signed-off-by: Simon Holm Thøgersen Signed-off-by: David S. Miller --- net/rfkill/rfkill.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 3c94f76..3eaa394 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -54,10 +54,10 @@ static unsigned long rfkill_states_lockdflt[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; static bool rfkill_epo_lock_active; +#ifdef CONFIG_RFKILL_LEDS static void rfkill_led_trigger(struct rfkill *rfkill, enum rfkill_state state) { -#ifdef CONFIG_RFKILL_LEDS struct led_trigger *led = &rfkill->led_trigger; if (!led->name) @@ -66,10 +66,8 @@ static void rfkill_led_trigger(struct rfkill *rfkill, led_trigger_event(led, LED_OFF); else led_trigger_event(led, LED_FULL); -#endif /* CONFIG_RFKILL_LEDS */ } -#ifdef CONFIG_RFKILL_LEDS static void rfkill_led_trigger_activate(struct led_classdev *led) { struct rfkill *rfkill = container_of(led->trigger, -- cgit v1.1 From 22604c866889c4b2e12b73cbf1683bda1b72a313 Mon Sep 17 00:00:00 2001 From: Michael Marineau Date: Sun, 4 Jan 2009 17:18:51 -0800 Subject: net: Fix for initial link state in 2.6.28 From: Michael Marineau Commit b47300168e770b60ab96c8924854c3b0eb4260eb "Do not fire linkwatch events until the device is registered." was made as a workaround for drivers that call netif_carrier_off before registering the device. Unfortunately this causes these drivers to incorrectly report their link status as IF_OPER_UNKNOWN which can falsely set the IFF_RUNNING flag when the interface is first brought up. This issues was previously pointed out[1] but was dismissed saying that IFF_RUNNING is not related to the link status. From my digging IFF_RUNNING, as reported to userspace, is based on the link state. It is set based on __LINK_STATE_START and IF_OPER_UP or IF_OPER_UNKNOWN. See [2], [3], and [4]. (Whether or not the kernel has IFF_RUNNING set in flags is not reported to user space so it may well be independent of the link, I don't know if and when it may get set.) The end result depends slightly depending on the driver. The the two I tested were e1000e and b44. With e1000e if the system is booted without a network cable attached the interface will falsely report RUNNING when it is brought up causing NetworkManager to attempt to start it and eventually time out. With b44 when the system is booted with a network cable attached and brought up with dhcpcd it will time out the first time. The attached patch that will still set the operstate variable correctly to IF_OPER_UP/DOWN/etc when linkwatch_fire_event is called but then return rather than skipping the linkwatch_fire_event call entirely as the previous fix did. (sorry it isn't inline, I don't have a patch friendly email client at the moment) Signed-off-by: David S. Miller --- net/core/link_watch.c | 7 ++++++- net/sched/sch_generic.c | 4 ---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/link_watch.c b/net/core/link_watch.c index bf8f7af..1e401e1 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -178,7 +178,6 @@ static void __linkwatch_run_queue(int urgent_only) */ clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); - rfc2863_policy(dev); if (dev->flags & IFF_UP) { if (netif_carrier_ok(dev)) dev_activate(dev); @@ -215,6 +214,12 @@ void linkwatch_fire_event(struct net_device *dev) { bool urgent = linkwatch_urgent_event(dev); + rfc2863_policy(dev); + + /* Some drivers call netif_carrier_off early */ + if (dev->reg_state == NETREG_UNINITIALIZED) + return; + if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { dev_hold(dev); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5f5efe4..23a8e61 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -270,8 +270,6 @@ static void dev_watchdog_down(struct net_device *dev) void netif_carrier_on(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { - if (dev->reg_state == NETREG_UNINITIALIZED) - return; linkwatch_fire_event(dev); if (netif_running(dev)) __netdev_watchdog_up(dev); @@ -288,8 +286,6 @@ EXPORT_SYMBOL(netif_carrier_on); void netif_carrier_off(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { - if (dev->reg_state == NETREG_UNINITIALIZED) - return; linkwatch_fire_event(dev); } } -- cgit v1.1 From 914d11647b6d6fe81bdf0c059612ee36282b8cee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sun, 4 Jan 2009 17:27:31 -0800 Subject: ipv6: IPV6_PKTINFO relied userspace providing correct length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Reported-by: Eric Sesterhenn Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index eeeaad2..40f3246 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -404,7 +404,7 @@ sticky_done: else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL) goto e_inval; - if (copy_from_user(&pkt, optval, optlen)) { + if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) { retv = -EFAULT; break; } -- cgit v1.1 From 858eb711ba64f8a001d7003295b8078bcab33b6d Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Sun, 4 Jan 2009 17:29:21 -0800 Subject: DCB: fix kfree(skb) Use kfree_skb instead of kfree for struct sk_buff pointers. Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 5dbfe5f..8379496 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -191,7 +191,7 @@ static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, return 0; nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); return ret; } @@ -272,7 +272,7 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb, return 0; nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); err_out: return -EINVAL; } @@ -314,7 +314,7 @@ static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb, nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); err_out: return -EINVAL; } @@ -380,7 +380,7 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlattr **tb, return 0; nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); err_out: return -EINVAL; } @@ -458,7 +458,7 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlattr **tb, return 0; nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); err_out: return ret; } @@ -687,7 +687,7 @@ err_pg: nla_nest_cancel(dcbnl_skb, pg_nest); nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); err_out: ret = -EINVAL; return ret; @@ -949,7 +949,7 @@ err_bcn: nla_nest_cancel(dcbnl_skb, bcn_nest); nlmsg_failure: err: - kfree(dcbnl_skb); + kfree_skb(dcbnl_skb); err_out: ret = -EINVAL; return ret; -- cgit v1.1 From 6e5c172cf7ca1ab878cc6a6a4c1d52fef60f3ee0 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sun, 4 Jan 2009 17:31:18 -0800 Subject: can: update can-bcm for hrtimer hardirq callbacks Since commit ca109491f612aab5c8152207631c0444f63da97f ("hrtimer: removing all ur callback modes") the hrtimer callbacks are processed only in hardirq context. This patch moves some functionality into tasklets to run in softirq context. Additionally some duplicated code was removed in bcm_rx_thr_flush() and an avoidable memcpy was removed from bcm_rx_handler(). Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 208 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 125 insertions(+), 83 deletions(-) (limited to 'net') diff --git a/net/can/bcm.c b/net/can/bcm.c index da0d426..6248ae2 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -70,7 +70,7 @@ #define CAN_BCM_VERSION CAN_VERSION static __initdata const char banner[] = KERN_INFO - "can: broadcast manager protocol (rev " CAN_BCM_VERSION ")\n"; + "can: broadcast manager protocol (rev " CAN_BCM_VERSION " t)\n"; MODULE_DESCRIPTION("PF_CAN broadcast manager protocol"); MODULE_LICENSE("Dual BSD/GPL"); @@ -90,6 +90,7 @@ struct bcm_op { unsigned long frames_abs, frames_filtered; struct timeval ival1, ival2; struct hrtimer timer, thrtimer; + struct tasklet_struct tsklet, thrtsklet; ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg; int rx_ifindex; int count; @@ -341,6 +342,23 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, } } +static void bcm_tx_timeout_tsklet(unsigned long data) +{ + struct bcm_op *op = (struct bcm_op *)data; + struct bcm_msg_head msg_head; + + /* create notification to user */ + msg_head.opcode = TX_EXPIRED; + msg_head.flags = op->flags; + msg_head.count = op->count; + msg_head.ival1 = op->ival1; + msg_head.ival2 = op->ival2; + msg_head.can_id = op->can_id; + msg_head.nframes = 0; + + bcm_send_to_user(op, &msg_head, NULL, 0); +} + /* * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions */ @@ -352,20 +370,8 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) if (op->kt_ival1.tv64 && (op->count > 0)) { op->count--; - if (!op->count && (op->flags & TX_COUNTEVT)) { - struct bcm_msg_head msg_head; - - /* create notification to user */ - msg_head.opcode = TX_EXPIRED; - msg_head.flags = op->flags; - msg_head.count = op->count; - msg_head.ival1 = op->ival1; - msg_head.ival2 = op->ival2; - msg_head.can_id = op->can_id; - msg_head.nframes = 0; - - bcm_send_to_user(op, &msg_head, NULL, 0); - } + if (!op->count && (op->flags & TX_COUNTEVT)) + tasklet_schedule(&op->tsklet); } if (op->kt_ival1.tv64 && (op->count > 0)) { @@ -402,6 +408,9 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data) if (op->frames_filtered > ULONG_MAX/100) op->frames_filtered = op->frames_abs = 0; + /* this element is not throttled anymore */ + data->can_dlc &= (BCM_CAN_DLC_MASK|RX_RECV); + head.opcode = RX_CHANGED; head.flags = op->flags; head.count = op->count; @@ -420,37 +429,32 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data) */ static void bcm_rx_update_and_send(struct bcm_op *op, struct can_frame *lastdata, - struct can_frame *rxdata) + const struct can_frame *rxdata) { memcpy(lastdata, rxdata, CFSIZ); - /* mark as used */ - lastdata->can_dlc |= RX_RECV; + /* mark as used and throttled by default */ + lastdata->can_dlc |= (RX_RECV|RX_THR); - /* throtteling mode inactive OR data update already on the run ? */ - if (!op->kt_ival2.tv64 || hrtimer_callback_running(&op->thrtimer)) { + /* throtteling mode inactive ? */ + if (!op->kt_ival2.tv64) { /* send RX_CHANGED to the user immediately */ - bcm_rx_changed(op, rxdata); + bcm_rx_changed(op, lastdata); return; } - if (hrtimer_active(&op->thrtimer)) { - /* mark as 'throttled' */ - lastdata->can_dlc |= RX_THR; + /* with active throttling timer we are just done here */ + if (hrtimer_active(&op->thrtimer)) return; - } - if (!op->kt_lastmsg.tv64) { - /* send first RX_CHANGED to the user immediately */ - bcm_rx_changed(op, rxdata); - op->kt_lastmsg = ktime_get(); - return; - } + /* first receiption with enabled throttling mode */ + if (!op->kt_lastmsg.tv64) + goto rx_changed_settime; + /* got a second frame inside a potential throttle period? */ if (ktime_us_delta(ktime_get(), op->kt_lastmsg) < ktime_to_us(op->kt_ival2)) { - /* mark as 'throttled' and start timer */ - lastdata->can_dlc |= RX_THR; + /* do not send the saved data - only start throttle timer */ hrtimer_start(&op->thrtimer, ktime_add(op->kt_lastmsg, op->kt_ival2), HRTIMER_MODE_ABS); @@ -458,7 +462,8 @@ static void bcm_rx_update_and_send(struct bcm_op *op, } /* the gap was that big, that throttling was not needed here */ - bcm_rx_changed(op, rxdata); +rx_changed_settime: + bcm_rx_changed(op, lastdata); op->kt_lastmsg = ktime_get(); } @@ -467,7 +472,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op, * received data stored in op->last_frames[] */ static void bcm_rx_cmp_to_index(struct bcm_op *op, int index, - struct can_frame *rxdata) + const struct can_frame *rxdata) { /* * no one uses the MSBs of can_dlc for comparation, @@ -511,14 +516,12 @@ static void bcm_rx_starttimer(struct bcm_op *op) hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL); } -/* - * bcm_rx_timeout_handler - when the (cyclic) CAN frame receiption timed out - */ -static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) +static void bcm_rx_timeout_tsklet(unsigned long data) { - struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); + struct bcm_op *op = (struct bcm_op *)data; struct bcm_msg_head msg_head; + /* create notification to user */ msg_head.opcode = RX_TIMEOUT; msg_head.flags = op->flags; msg_head.count = op->count; @@ -528,6 +531,17 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) msg_head.nframes = 0; bcm_send_to_user(op, &msg_head, NULL, 0); +} + +/* + * bcm_rx_timeout_handler - when the (cyclic) CAN frame receiption timed out + */ +static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) +{ + struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); + + /* schedule before NET_RX_SOFTIRQ */ + tasklet_hi_schedule(&op->tsklet); /* no restart of the timer is done here! */ @@ -541,9 +555,25 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) } /* + * bcm_rx_do_flush - helper for bcm_rx_thr_flush + */ +static inline int bcm_rx_do_flush(struct bcm_op *op, int update, int index) +{ + if ((op->last_frames) && (op->last_frames[index].can_dlc & RX_THR)) { + if (update) + bcm_rx_changed(op, &op->last_frames[index]); + return 1; + } + return 0; +} + +/* * bcm_rx_thr_flush - Check for throttled data and send it to the userspace + * + * update == 0 : just check if throttled data is available (any irq context) + * update == 1 : check and send throttled data to userspace (soft_irq context) */ -static int bcm_rx_thr_flush(struct bcm_op *op) +static int bcm_rx_thr_flush(struct bcm_op *op, int update) { int updated = 0; @@ -551,27 +581,25 @@ static int bcm_rx_thr_flush(struct bcm_op *op) int i; /* for MUX filter we start at index 1 */ - for (i = 1; i < op->nframes; i++) { - if ((op->last_frames) && - (op->last_frames[i].can_dlc & RX_THR)) { - op->last_frames[i].can_dlc &= ~RX_THR; - bcm_rx_changed(op, &op->last_frames[i]); - updated++; - } - } + for (i = 1; i < op->nframes; i++) + updated += bcm_rx_do_flush(op, update, i); } else { /* for RX_FILTER_ID and simple filter */ - if (op->last_frames && (op->last_frames[0].can_dlc & RX_THR)) { - op->last_frames[0].can_dlc &= ~RX_THR; - bcm_rx_changed(op, &op->last_frames[0]); - updated++; - } + updated += bcm_rx_do_flush(op, update, 0); } return updated; } +static void bcm_rx_thr_tsklet(unsigned long data) +{ + struct bcm_op *op = (struct bcm_op *)data; + + /* push the changed data to the userspace */ + bcm_rx_thr_flush(op, 1); +} + /* * bcm_rx_thr_handler - the time for blocked content updates is over now: * Check for throttled data and send it to the userspace @@ -580,7 +608,9 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer); - if (bcm_rx_thr_flush(op)) { + tasklet_schedule(&op->thrtsklet); + + if (bcm_rx_thr_flush(op, 0)) { hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2); return HRTIMER_RESTART; } else { @@ -596,48 +626,38 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer) static void bcm_rx_handler(struct sk_buff *skb, void *data) { struct bcm_op *op = (struct bcm_op *)data; - struct can_frame rxframe; + const struct can_frame *rxframe = (struct can_frame *)skb->data; int i; /* disable timeout */ hrtimer_cancel(&op->timer); - if (skb->len == sizeof(rxframe)) { - memcpy(&rxframe, skb->data, sizeof(rxframe)); - /* save rx timestamp */ - op->rx_stamp = skb->tstamp; - /* save originator for recvfrom() */ - op->rx_ifindex = skb->dev->ifindex; - /* update statistics */ - op->frames_abs++; - kfree_skb(skb); + if (op->can_id != rxframe->can_id) + goto rx_freeskb; - } else { - kfree_skb(skb); - return; - } - - if (op->can_id != rxframe.can_id) - return; + /* save rx timestamp */ + op->rx_stamp = skb->tstamp; + /* save originator for recvfrom() */ + op->rx_ifindex = skb->dev->ifindex; + /* update statistics */ + op->frames_abs++; if (op->flags & RX_RTR_FRAME) { /* send reply for RTR-request (placed in op->frames[0]) */ bcm_can_tx(op); - return; + goto rx_freeskb; } if (op->flags & RX_FILTER_ID) { /* the easiest case */ - bcm_rx_update_and_send(op, &op->last_frames[0], &rxframe); - bcm_rx_starttimer(op); - return; + bcm_rx_update_and_send(op, &op->last_frames[0], rxframe); + goto rx_freeskb_starttimer; } if (op->nframes == 1) { /* simple compare with index 0 */ - bcm_rx_cmp_to_index(op, 0, &rxframe); - bcm_rx_starttimer(op); - return; + bcm_rx_cmp_to_index(op, 0, rxframe); + goto rx_freeskb_starttimer; } if (op->nframes > 1) { @@ -649,15 +669,19 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data) */ for (i = 1; i < op->nframes; i++) { - if ((GET_U64(&op->frames[0]) & GET_U64(&rxframe)) == + if ((GET_U64(&op->frames[0]) & GET_U64(rxframe)) == (GET_U64(&op->frames[0]) & GET_U64(&op->frames[i]))) { - bcm_rx_cmp_to_index(op, i, &rxframe); + bcm_rx_cmp_to_index(op, i, rxframe); break; } } - bcm_rx_starttimer(op); } + +rx_freeskb_starttimer: + bcm_rx_starttimer(op); +rx_freeskb: + kfree_skb(skb); } /* @@ -681,6 +705,12 @@ static void bcm_remove_op(struct bcm_op *op) hrtimer_cancel(&op->timer); hrtimer_cancel(&op->thrtimer); + if (op->tsklet.func) + tasklet_kill(&op->tsklet); + + if (op->thrtsklet.func) + tasklet_kill(&op->thrtsklet); + if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); @@ -891,6 +921,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); op->timer.function = bcm_tx_timeout_handler; + /* initialize tasklet for tx countevent notification */ + tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet, + (unsigned long) op); + /* currently unused in tx_ops */ hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -1054,9 +1088,17 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); op->timer.function = bcm_rx_timeout_handler; + /* initialize tasklet for rx timeout notification */ + tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet, + (unsigned long) op); + hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); op->thrtimer.function = bcm_rx_thr_handler; + /* initialize tasklet for rx throttle handling */ + tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet, + (unsigned long) op); + /* add this bcm_op to the list of the rx_ops */ list_add(&op->list, &bo->rx_ops); @@ -1102,7 +1144,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, */ op->kt_lastmsg = ktime_set(0, 0); hrtimer_cancel(&op->thrtimer); - bcm_rx_thr_flush(op); + bcm_rx_thr_flush(op, 1); } if ((op->flags & STARTTIMER) && op->kt_ival1.tv64) -- cgit v1.1 From ddebc973c56b51b4e5d84d606f0430d81b895d67 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 4 Jan 2009 21:42:53 -0800 Subject: dccp: Lockless integration of CCID congestion-control plugins Based on Arnaldo's earlier patch, this patch integrates the standardised CCID congestion control plugins (CCID-2 and CCID-3) of DCCP with dccp.ko: * enables a faster connection path by eliminating the need to always go through the CCID registration lock; * updates the implementation to use only a single array whose size equals the number of configured CCIDs instead of the maximum (256); * since the CCIDs are now fixed array elements, synchronization is no longer needed, simplifying use and implementation. CCID-2 is suggested as minimum for a basic DCCP implementation (RFC 4340, 10); CCID-3 is a standards-track CCID supported by RFC 4342 and RFC 5348. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/Kconfig | 4 -- net/dccp/Makefile | 9 ++- net/dccp/ackvec.h | 4 +- net/dccp/ccid.c | 156 +++++++++++++++++++++++++++++++----------------- net/dccp/ccid.h | 11 ++-- net/dccp/ccids/Kconfig | 70 +++++++--------------- net/dccp/ccids/Makefile | 8 --- net/dccp/ccids/ccid2.c | 22 +------ net/dccp/ccids/ccid3.c | 23 +------ net/dccp/dccp.h | 2 - net/dccp/proto.c | 7 +++ 11 files changed, 148 insertions(+), 168 deletions(-) (limited to 'net') diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 7aa2a7a..ad6dffd 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -1,7 +1,6 @@ menuconfig IP_DCCP tristate "The DCCP Protocol (EXPERIMENTAL)" depends on INET && EXPERIMENTAL - select IP_DCCP_CCID2 ---help--- Datagram Congestion Control Protocol (RFC 4340) @@ -25,9 +24,6 @@ config INET_DCCP_DIAG def_tristate y if (IP_DCCP = y && INET_DIAG = y) def_tristate m -config IP_DCCP_ACKVEC - bool - source "net/dccp/ccids/Kconfig" menu "DCCP Kernel Hacking" diff --git a/net/dccp/Makefile b/net/dccp/Makefile index f4f8793..5ff2e7b 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -2,14 +2,19 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o +# +# CCID algorithms to be used by dccp.ko +# +# CCID-2 is default (RFC 4340, p. 77) and has Ack Vectors as dependency +dccp-y += ccids/ccid2.o ackvec.o +dccp-$(CONFIG_IP_DCCP_CCID3) += ccids/ccid3.o + dccp_ipv4-y := ipv4.o # build dccp_ipv6 as module whenever either IPv6 or DCCP is a module obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o dccp_ipv6-y := ipv6.o -dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o - obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 4ccee03..569a33a 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -84,7 +84,7 @@ struct dccp_ackvec_record { struct sock; struct sk_buff; -#ifdef CONFIG_IP_DCCP_ACKVEC +#ifndef ___OLD_INTERFACE_TO_BE_REMOVED___ extern int dccp_ackvec_init(void); extern void dccp_ackvec_exit(void); @@ -106,7 +106,7 @@ static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) { return av->av_vec_len; } -#else /* CONFIG_IP_DCCP_ACKVEC */ +#else /* ___OLD_INTERFACE_TO_BE_REMOVED___ */ static inline int dccp_ackvec_init(void) { return 0; diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index bcc643f..538d3b1 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -13,6 +13,70 @@ #include "ccid.h" +static struct ccid_operations *ccids[] = { + &ccid2_ops, +#ifdef CONFIG_IP_DCCP_CCID3 + &ccid3_ops, +#endif +}; + +static struct ccid_operations *ccid_by_number(const u8 id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ccids); i++) + if (ccids[i]->ccid_id == id) + return ccids[i]; + return NULL; +} + +/* check that up to @array_len members in @ccid_array are supported */ +bool ccid_support_check(u8 const *ccid_array, u8 array_len) +{ + while (array_len > 0) + if (ccid_by_number(ccid_array[--array_len]) == NULL) + return false; + return true; +} + +/** + * ccid_get_builtin_ccids - Populate a list of built-in CCIDs + * @ccid_array: pointer to copy into + * @array_len: value to return length into + * This function allocates memory - caller must see that it is freed after use. + */ +int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len) +{ + *ccid_array = kmalloc(ARRAY_SIZE(ccids), gfp_any()); + if (*ccid_array == NULL) + return -ENOBUFS; + + for (*array_len = 0; *array_len < ARRAY_SIZE(ccids); *array_len += 1) + (*ccid_array)[*array_len] = ccids[*array_len]->ccid_id; + return 0; +} + +int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + u8 *ccid_array, array_len; + int err = 0; + + if (len < ARRAY_SIZE(ccids)) + return -EINVAL; + + if (ccid_get_builtin_ccids(&ccid_array, &array_len)) + return -ENOBUFS; + + if (put_user(array_len, optlen) || + copy_to_user(optval, ccid_array, array_len)) + err = -EFAULT; + + kfree(ccid_array); + return err; +} + +#ifdef ___OLD_INTERFACE_TO_BE_REMOVED___ static u8 builtin_ccids[] = { DCCPC_CCID2, /* CCID2 is supported by default */ #if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE) @@ -62,6 +126,7 @@ static inline void ccids_read_unlock(void) #define ccids_read_lock() do { } while(0) #define ccids_read_unlock() do { } while(0) #endif +#endif /* ___OLD_INTERFACE_TO_BE_REMOVED___ */ static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...) { @@ -93,6 +158,7 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab) } } +#ifdef ___OLD_INTERFACE_TO_BE_REMOVED___ /* check that up to @array_len members in @ccid_array are supported */ bool ccid_support_check(u8 const *ccid_array, u8 array_len) { @@ -133,8 +199,9 @@ int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, return -EFAULT; return 0; } +#endif /* ___OLD_INTERFACE_TO_BE_REMOVED___ */ -int ccid_register(struct ccid_operations *ccid_ops) +static int ccid_activate(struct ccid_operations *ccid_ops) { int err = -ENOBUFS; @@ -152,79 +219,40 @@ int ccid_register(struct ccid_operations *ccid_ops) if (ccid_ops->ccid_hc_tx_slab == NULL) goto out_free_rx_slab; - ccids_write_lock(); - err = -EEXIST; - if (ccids[ccid_ops->ccid_id] == NULL) { - ccids[ccid_ops->ccid_id] = ccid_ops; - err = 0; - } - ccids_write_unlock(); - if (err != 0) - goto out_free_tx_slab; - - pr_info("CCID: Registered CCID %d (%s)\n", + pr_info("CCID: Activated CCID %d (%s)\n", ccid_ops->ccid_id, ccid_ops->ccid_name); + err = 0; out: return err; -out_free_tx_slab: - ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab); - ccid_ops->ccid_hc_tx_slab = NULL; - goto out; out_free_rx_slab: ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab); ccid_ops->ccid_hc_rx_slab = NULL; goto out; } -EXPORT_SYMBOL_GPL(ccid_register); - -int ccid_unregister(struct ccid_operations *ccid_ops) +static void ccid_deactivate(struct ccid_operations *ccid_ops) { - ccids_write_lock(); - ccids[ccid_ops->ccid_id] = NULL; - ccids_write_unlock(); - ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab); ccid_ops->ccid_hc_tx_slab = NULL; ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab); ccid_ops->ccid_hc_rx_slab = NULL; - pr_info("CCID: Unregistered CCID %d (%s)\n", + pr_info("CCID: Deactivated CCID %d (%s)\n", ccid_ops->ccid_id, ccid_ops->ccid_name); - return 0; } -EXPORT_SYMBOL_GPL(ccid_unregister); - struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp) { - struct ccid_operations *ccid_ops; + struct ccid_operations *ccid_ops = ccid_by_number(id); struct ccid *ccid = NULL; - ccids_read_lock(); -#ifdef CONFIG_MODULES - if (ccids[id] == NULL) { - /* We only try to load if in process context */ - ccids_read_unlock(); - if (gfp & GFP_ATOMIC) - goto out; - request_module("net-dccp-ccid-%d", id); - ccids_read_lock(); - } -#endif - ccid_ops = ccids[id]; if (ccid_ops == NULL) - goto out_unlock; - - if (!try_module_get(ccid_ops->ccid_owner)) - goto out_unlock; - - ccids_read_unlock(); + goto out; ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab : ccid_ops->ccid_hc_tx_slab, gfp); if (ccid == NULL) - goto out_module_put; + goto out; ccid->ccid_ops = ccid_ops; if (rx) { memset(ccid + 1, 0, ccid_ops->ccid_hc_rx_obj_size); @@ -239,15 +267,10 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp) } out: return ccid; -out_unlock: - ccids_read_unlock(); - goto out; out_free_ccid: kmem_cache_free(rx ? ccid_ops->ccid_hc_rx_slab : ccid_ops->ccid_hc_tx_slab, ccid); ccid = NULL; -out_module_put: - module_put(ccid_ops->ccid_owner); goto out; } @@ -270,10 +293,6 @@ static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx) ccid_ops->ccid_hc_tx_exit(sk); kmem_cache_free(ccid_ops->ccid_hc_tx_slab, ccid); } - ccids_read_lock(); - if (ccids[ccid_ops->ccid_id] != NULL) - module_put(ccid_ops->ccid_owner); - ccids_read_unlock(); } void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk) @@ -289,3 +308,28 @@ void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk) } EXPORT_SYMBOL_GPL(ccid_hc_tx_delete); + +int __init ccid_initialize_builtins(void) +{ + int i, err; + + for (i = 0; i < ARRAY_SIZE(ccids); i++) { + err = ccid_activate(ccids[i]); + if (err) + goto unwind_registrations; + } + return 0; + +unwind_registrations: + while(--i >= 0) + ccid_deactivate(ccids[i]); + return err; +} + +void ccid_cleanup_builtins(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ccids); i++) + ccid_deactivate(ccids[i]); +} diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 18f69423..75c21c5 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -29,7 +29,6 @@ struct tcp_info; * @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.) * @ccid_ccmps: the CCMPS including network/transport headers (0 when disabled) * @ccid_name: alphabetical identifier string for @ccid_id - * @ccid_owner: module which implements/owns this CCID * @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection * @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection socket * @@ -48,7 +47,6 @@ struct ccid_operations { unsigned char ccid_id; __u32 ccid_ccmps; const char *ccid_name; - struct module *ccid_owner; struct kmem_cache *ccid_hc_rx_slab, *ccid_hc_tx_slab; __u32 ccid_hc_rx_obj_size, @@ -90,8 +88,13 @@ struct ccid_operations { int __user *optlen); }; -extern int ccid_register(struct ccid_operations *ccid_ops); -extern int ccid_unregister(struct ccid_operations *ccid_ops); +extern struct ccid_operations ccid2_ops; +#ifdef CONFIG_IP_DCCP_CCID3 +extern struct ccid_operations ccid3_ops; +#endif + +extern int ccid_initialize_builtins(void); +extern void ccid_cleanup_builtins(void); struct ccid { struct ccid_operations *ccid_ops; diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index 1227594..b30f049 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -1,80 +1,52 @@ menu "DCCP CCIDs Configuration (EXPERIMENTAL)" depends on EXPERIMENTAL -config IP_DCCP_CCID2 - tristate "CCID2 (TCP-Like) (EXPERIMENTAL)" - def_tristate IP_DCCP - select IP_DCCP_ACKVEC - ---help--- - CCID 2, TCP-like Congestion Control, denotes Additive Increase, - Multiplicative Decrease (AIMD) congestion control with behavior - modelled directly on TCP, including congestion window, slow start, - timeouts, and so forth [RFC 2581]. CCID 2 achieves maximum - bandwidth over the long term, consistent with the use of end-to-end - congestion control, but halves its congestion window in response to - each congestion event. This leads to the abrupt rate changes - typical of TCP. Applications should use CCID 2 if they prefer - maximum bandwidth utilization to steadiness of rate. This is often - the case for applications that are not playing their data directly - to the user. For example, a hypothetical application that - transferred files over DCCP, using application-level retransmissions - for lost packets, would prefer CCID 2 to CCID 3. On-line games may - also prefer CCID 2. See RFC 4341 for further details. - - CCID2 is the default CCID used by DCCP. - config IP_DCCP_CCID2_DEBUG - bool "CCID2 debugging messages" - depends on IP_DCCP_CCID2 - ---help--- - Enable CCID2-specific debugging messages. + bool "CCID-2 debugging messages" + ---help--- + Enable CCID-2 specific debugging messages. - When compiling CCID2 as a module, this debugging output can - additionally be toggled by setting the ccid2_debug module - parameter to 0 or 1. + The debugging output can additionally be toggled by setting the + ccid2_debug parameter to 0 or 1. - If in doubt, say N. + If in doubt, say N. config IP_DCCP_CCID3 - tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" - def_tristate IP_DCCP + bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)" + def_bool y if (IP_DCCP = y || IP_DCCP = m) select IP_DCCP_TFRC_LIB ---help--- - CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based + CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based rate-controlled congestion control mechanism. TFRC is designed to be reasonably fair when competing for bandwidth with TCP-like flows, where a flow is "reasonably fair" if its sending rate is generally within a factor of two of the sending rate of a TCP flow under the same conditions. However, TFRC has a much lower variation of - throughput over time compared with TCP, which makes CCID 3 more - suitable than CCID 2 for applications such streaming media where a + throughput over time compared with TCP, which makes CCID-3 more + suitable than CCID-2 for applications such streaming media where a relatively smooth sending rate is of importance. - CCID 3 is further described in RFC 4342, + CCID-3 is further described in RFC 4342, http://www.ietf.org/rfc/rfc4342.txt The TFRC congestion control algorithms were initially described in - RFC 3448. + RFC 5448. This text was extracted from RFC 4340 (sec. 10.2), http://www.ietf.org/rfc/rfc4340.txt - - To compile this CCID as a module, choose M here: the module will be - called dccp_ccid3. - If in doubt, say M. + If in doubt, say N. config IP_DCCP_CCID3_DEBUG - bool "CCID3 debugging messages" - depends on IP_DCCP_CCID3 - ---help--- - Enable CCID3-specific debugging messages. + bool "CCID-3 debugging messages" + depends on IP_DCCP_CCID3 + ---help--- + Enable CCID-3 specific debugging messages. - When compiling CCID3 as a module, this debugging output can - additionally be toggled by setting the ccid3_debug module - parameter to 0 or 1. + The debugging output can additionally be toggled by setting the + ccid3_debug parameter to 0 or 1. - If in doubt, say N. + If in doubt, say N. config IP_DCCP_CCID3_RTO int "Use higher bound for nofeedback timer" diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile index 438f20b..cdaefff 100644 --- a/net/dccp/ccids/Makefile +++ b/net/dccp/ccids/Makefile @@ -1,9 +1 @@ -obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o - -dccp_ccid3-y := ccid3.o - -obj-$(CONFIG_IP_DCCP_CCID2) += dccp_ccid2.o - -dccp_ccid2-y := ccid2.o - obj-y += lib/ diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index c9ea19a..d235294 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -768,10 +768,9 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) } } -static struct ccid_operations ccid2 = { +struct ccid_operations ccid2_ops = { .ccid_id = DCCPC_CCID2, .ccid_name = "TCP-like", - .ccid_owner = THIS_MODULE, .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock), .ccid_hc_tx_init = ccid2_hc_tx_init, .ccid_hc_tx_exit = ccid2_hc_tx_exit, @@ -784,22 +783,5 @@ static struct ccid_operations ccid2 = { #ifdef CONFIG_IP_DCCP_CCID2_DEBUG module_param(ccid2_debug, bool, 0644); -MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); +MODULE_PARM_DESC(ccid2_debug, "Enable CCID-2 debug messages"); #endif - -static __init int ccid2_module_init(void) -{ - return ccid_register(&ccid2); -} -module_init(ccid2_module_init); - -static __exit void ccid2_module_exit(void) -{ - ccid_unregister(&ccid2); -} -module_exit(ccid2_module_exit); - -MODULE_AUTHOR("Andrea Bittau "); -MODULE_DESCRIPTION("DCCP TCP-Like (CCID2) CCID"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("net-dccp-ccid-2"); diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 3b8bd7c..a27b7f4 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -940,10 +940,9 @@ static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, return 0; } -static struct ccid_operations ccid3 = { +struct ccid_operations ccid3_ops = { .ccid_id = DCCPC_CCID3, .ccid_name = "TCP-Friendly Rate Control", - .ccid_owner = THIS_MODULE, .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock), .ccid_hc_tx_init = ccid3_hc_tx_init, .ccid_hc_tx_exit = ccid3_hc_tx_exit, @@ -964,23 +963,5 @@ static struct ccid_operations ccid3 = { #ifdef CONFIG_IP_DCCP_CCID3_DEBUG module_param(ccid3_debug, bool, 0644); -MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); +MODULE_PARM_DESC(ccid3_debug, "Enable CCID-3 debug messages"); #endif - -static __init int ccid3_module_init(void) -{ - return ccid_register(&ccid3); -} -module_init(ccid3_module_init); - -static __exit void ccid3_module_exit(void) -{ - ccid_unregister(&ccid3); -} -module_exit(ccid3_module_exit); - -MODULE_AUTHOR("Ian McDonald , " - "Arnaldo Carvalho de Melo "); -MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("net-dccp-ccid-3"); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 0bc4c9a..f2230fc 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -432,10 +432,8 @@ static inline int dccp_ack_pending(const struct sock *sk) { const struct dccp_sock *dp = dccp_sk(sk); return dp->dccps_timestamp_echo != 0 || -#ifdef CONFIG_IP_DCCP_ACKVEC (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || -#endif inet_csk_ack_scheduled(sk); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 1747cca..945b4d5 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1118,9 +1118,15 @@ static int __init dccp_init(void) if (rc) goto out_ackvec_exit; + rc = ccid_initialize_builtins(); + if (rc) + goto out_sysctl_exit; + dccp_timestamping_init(); out: return rc; +out_sysctl_exit: + dccp_sysctl_exit(); out_ackvec_exit: dccp_ackvec_exit(); out_free_dccp_mib: @@ -1143,6 +1149,7 @@ out_free_percpu: static void __exit dccp_fini(void) { + ccid_cleanup_builtins(); dccp_mib_exit(); free_pages((unsigned long)dccp_hashinfo.bhash, get_order(dccp_hashinfo.bhash_size * -- cgit v1.1 From e5fd56ca4eb3a130882bbef69d6952ef6aca5c8d Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 4 Jan 2009 21:43:23 -0800 Subject: dccp: Clean up ccid.c after integration of CCID plugins This patch cleans up after integrating the CCID modules and, in addition, * moves the if/else cases from ccid_delete() into ccid_hc_{tx,rx}_delete(); * removes the 'gfp' argument to ccid_new() - since it is always gfp_any(). Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/ackvec.h | 49 -------------------- net/dccp/ccid.c | 136 +++++------------------------------------------------- net/dccp/ccid.h | 3 +- net/dccp/feat.c | 2 +- 4 files changed, 14 insertions(+), 176 deletions(-) (limited to 'net') diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 569a33a..45f95e5 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -84,7 +84,6 @@ struct dccp_ackvec_record { struct sock; struct sk_buff; -#ifndef ___OLD_INTERFACE_TO_BE_REMOVED___ extern int dccp_ackvec_init(void); extern void dccp_ackvec_exit(void); @@ -106,52 +105,4 @@ static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) { return av->av_vec_len; } -#else /* ___OLD_INTERFACE_TO_BE_REMOVED___ */ -static inline int dccp_ackvec_init(void) -{ - return 0; -} - -static inline void dccp_ackvec_exit(void) -{ -} - -static inline struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) -{ - return NULL; -} - -static inline void dccp_ackvec_free(struct dccp_ackvec *av) -{ -} - -static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, - const u64 ackno, const u8 state) -{ - return -1; -} - -static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, - struct sock *sk, const u64 ackno) -{ -} - -static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - const u64 *ackno, const u8 opt, - const u8 *value, const u8 len) -{ - return -1; -} - -static inline int dccp_insert_option_ackvec(const struct sock *sk, - const struct sk_buff *skb) -{ - return -1; -} - -static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) -{ - return 0; -} -#endif /* CONFIG_IP_DCCP_ACKVEC */ #endif /* _ACKVEC_H */ diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 538d3b1..19b214a 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -76,58 +76,6 @@ int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, return err; } -#ifdef ___OLD_INTERFACE_TO_BE_REMOVED___ -static u8 builtin_ccids[] = { - DCCPC_CCID2, /* CCID2 is supported by default */ -#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE) - DCCPC_CCID3, -#endif -}; - -static struct ccid_operations *ccids[CCID_MAX]; -#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) -static atomic_t ccids_lockct = ATOMIC_INIT(0); -static DEFINE_SPINLOCK(ccids_lock); - -/* - * The strategy is: modifications ccids vector are short, do not sleep and - * veeery rare, but read access should be free of any exclusive locks. - */ -static void ccids_write_lock(void) -{ - spin_lock(&ccids_lock); - while (atomic_read(&ccids_lockct) != 0) { - spin_unlock(&ccids_lock); - yield(); - spin_lock(&ccids_lock); - } -} - -static inline void ccids_write_unlock(void) -{ - spin_unlock(&ccids_lock); -} - -static inline void ccids_read_lock(void) -{ - atomic_inc(&ccids_lockct); - smp_mb__after_atomic_inc(); - spin_unlock_wait(&ccids_lock); -} - -static inline void ccids_read_unlock(void) -{ - atomic_dec(&ccids_lockct); -} - -#else -#define ccids_write_lock() do { } while(0) -#define ccids_write_unlock() do { } while(0) -#define ccids_read_lock() do { } while(0) -#define ccids_read_unlock() do { } while(0) -#endif -#endif /* ___OLD_INTERFACE_TO_BE_REMOVED___ */ - static struct kmem_cache *ccid_kmem_cache_create(int obj_size, const char *fmt,...) { struct kmem_cache *slab; @@ -158,49 +106,6 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab) } } -#ifdef ___OLD_INTERFACE_TO_BE_REMOVED___ -/* check that up to @array_len members in @ccid_array are supported */ -bool ccid_support_check(u8 const *ccid_array, u8 array_len) -{ - u8 i, j, found; - - for (i = 0, found = 0; i < array_len; i++, found = 0) { - for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++) - found = (ccid_array[i] == builtin_ccids[j]); - if (!found) - return false; - } - return true; -} - -/** - * ccid_get_builtin_ccids - Provide copy of `builtin' CCID array - * @ccid_array: pointer to copy into - * @array_len: value to return length into - * This function allocates memory - caller must see that it is freed after use. - */ -int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len) -{ - *ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any()); - if (*ccid_array == NULL) - return -ENOBUFS; - *array_len = ARRAY_SIZE(builtin_ccids); - return 0; -} - -int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, - char __user *optval, int __user *optlen) -{ - if (len < sizeof(builtin_ccids)) - return -EINVAL; - - if (put_user(sizeof(builtin_ccids), optlen) || - copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids))) - return -EFAULT; - return 0; -} -#endif /* ___OLD_INTERFACE_TO_BE_REMOVED___ */ - static int ccid_activate(struct ccid_operations *ccid_ops) { int err = -ENOBUFS; @@ -241,7 +146,7 @@ static void ccid_deactivate(struct ccid_operations *ccid_ops) ccid_ops->ccid_id, ccid_ops->ccid_name); } -struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp) +struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx) { struct ccid_operations *ccid_ops = ccid_by_number(id); struct ccid *ccid = NULL; @@ -250,7 +155,7 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp) goto out; ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab : - ccid_ops->ccid_hc_tx_slab, gfp); + ccid_ops->ccid_hc_tx_slab, gfp_any()); if (ccid == NULL) goto out; ccid->ccid_ops = ccid_ops; @@ -274,41 +179,24 @@ out_free_ccid: goto out; } -EXPORT_SYMBOL_GPL(ccid_new); - -static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx) -{ - struct ccid_operations *ccid_ops; - - if (ccid == NULL) - return; - - ccid_ops = ccid->ccid_ops; - if (rx) { - if (ccid_ops->ccid_hc_rx_exit != NULL) - ccid_ops->ccid_hc_rx_exit(sk); - kmem_cache_free(ccid_ops->ccid_hc_rx_slab, ccid); - } else { - if (ccid_ops->ccid_hc_tx_exit != NULL) - ccid_ops->ccid_hc_tx_exit(sk); - kmem_cache_free(ccid_ops->ccid_hc_tx_slab, ccid); - } -} - void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk) { - ccid_delete(ccid, sk, 1); + if (ccid != NULL) { + if (ccid->ccid_ops->ccid_hc_rx_exit != NULL) + ccid->ccid_ops->ccid_hc_rx_exit(sk); + kmem_cache_free(ccid->ccid_ops->ccid_hc_rx_slab, ccid); + } } -EXPORT_SYMBOL_GPL(ccid_hc_rx_delete); - void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk) { - ccid_delete(ccid, sk, 0); + if (ccid != NULL) { + if (ccid->ccid_ops->ccid_hc_tx_exit != NULL) + ccid->ccid_ops->ccid_hc_tx_exit(sk); + kmem_cache_free(ccid->ccid_ops->ccid_hc_tx_slab, ccid); + } } -EXPORT_SYMBOL_GPL(ccid_hc_tx_delete); - int __init ccid_initialize_builtins(void) { int i, err; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 75c21c5..facedd2 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -111,8 +111,7 @@ extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, char __user *, int __user *); -extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, - gfp_t gfp); +extern struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx); static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp) { diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 30f9fb7..741b2db 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -34,7 +34,7 @@ static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx) { struct dccp_sock *dp = dccp_sk(sk); - struct ccid *new_ccid = ccid_new(ccid, sk, rx, gfp_any()); + struct ccid *new_ccid = ccid_new(ccid, sk, rx); if (new_ccid == NULL) return -ENOMEM; -- cgit v1.1 From 129fa44785a399248ae2466b6cb5c655e96668f7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 4 Jan 2009 21:45:33 -0800 Subject: dccp: Integrate the TFRC library with DCCP This patch integrates the TFRC library, which is a dependency of CCID-3 (and CCID-4), with the new use of CCIDs in the DCCP module. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/Makefile | 6 ++++-- net/dccp/ccid.c | 8 +++++++- net/dccp/ccids/Kconfig | 9 ++------- net/dccp/ccids/Makefile | 1 - net/dccp/ccids/lib/Makefile | 3 --- net/dccp/ccids/lib/loss_interval.c | 3 --- net/dccp/ccids/lib/packet_history.c | 9 --------- net/dccp/ccids/lib/tfrc.c | 19 ++++--------------- net/dccp/ccids/lib/tfrc.h | 11 ++++++++++- net/dccp/ccids/lib/tfrc_equation.c | 4 ---- net/dccp/feat.c | 4 ---- net/dccp/input.c | 2 -- 12 files changed, 27 insertions(+), 52 deletions(-) delete mode 100644 net/dccp/ccids/Makefile delete mode 100644 net/dccp/ccids/lib/Makefile (limited to 'net') diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 5ff2e7b..2991efc 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -8,6 +8,10 @@ dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o # CCID-2 is default (RFC 4340, p. 77) and has Ack Vectors as dependency dccp-y += ccids/ccid2.o ackvec.o dccp-$(CONFIG_IP_DCCP_CCID3) += ccids/ccid3.o +dccp-$(CONFIG_IP_DCCP_TFRC_LIB) += ccids/lib/tfrc.o \ + ccids/lib/tfrc_equation.o \ + ccids/lib/packet_history.o \ + ccids/lib/loss_interval.o dccp_ipv4-y := ipv4.o @@ -22,5 +26,3 @@ dccp-$(CONFIG_SYSCTL) += sysctl.o dccp_diag-y := diag.o dccp_probe-y := probe.o - -obj-y += ccids/ diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 19b214a..f3e9ba1 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -12,6 +12,7 @@ */ #include "ccid.h" +#include "ccids/lib/tfrc.h" static struct ccid_operations *ccids[] = { &ccid2_ops, @@ -199,7 +200,10 @@ void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk) int __init ccid_initialize_builtins(void) { - int i, err; + int i, err = tfrc_lib_init(); + + if (err) + return err; for (i = 0; i < ARRAY_SIZE(ccids); i++) { err = ccid_activate(ccids[i]); @@ -211,6 +215,7 @@ int __init ccid_initialize_builtins(void) unwind_registrations: while(--i >= 0) ccid_deactivate(ccids[i]); + tfrc_lib_exit(); return err; } @@ -220,4 +225,5 @@ void ccid_cleanup_builtins(void) for (i = 0; i < ARRAY_SIZE(ccids); i++) ccid_deactivate(ccids[i]); + tfrc_lib_exit(); } diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig index b30f049..b28bf96 100644 --- a/net/dccp/ccids/Kconfig +++ b/net/dccp/ccids/Kconfig @@ -14,7 +14,6 @@ config IP_DCCP_CCID2_DEBUG config IP_DCCP_CCID3 bool "CCID-3 (TCP-Friendly) (EXPERIMENTAL)" def_bool y if (IP_DCCP = y || IP_DCCP = m) - select IP_DCCP_TFRC_LIB ---help--- CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based rate-controlled congestion control mechanism. TFRC is designed to @@ -80,12 +79,8 @@ config IP_DCCP_CCID3_RTO therefore not be performed on WANs. config IP_DCCP_TFRC_LIB - tristate - default n + def_bool y if IP_DCCP_CCID3 config IP_DCCP_TFRC_DEBUG - bool - depends on IP_DCCP_TFRC_LIB - default y if IP_DCCP_CCID3_DEBUG - + def_bool y if IP_DCCP_CCID3_DEBUG endmenu diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile deleted file mode 100644 index cdaefff..0000000 --- a/net/dccp/ccids/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-y += lib/ diff --git a/net/dccp/ccids/lib/Makefile b/net/dccp/ccids/lib/Makefile deleted file mode 100644 index 68c93e3..0000000 --- a/net/dccp/ccids/lib/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-$(CONFIG_IP_DCCP_TFRC_LIB) += dccp_tfrc_lib.o - -dccp_tfrc_lib-y := tfrc.o tfrc_equation.o packet_history.o loss_interval.o diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 5b3ce06..4d1e401 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -60,7 +60,6 @@ void tfrc_lh_cleanup(struct tfrc_loss_hist *lh) lh->ring[LIH_INDEX(lh->counter)] = NULL; } } -EXPORT_SYMBOL_GPL(tfrc_lh_cleanup); static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) { @@ -121,7 +120,6 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) return (lh->i_mean < old_i_mean); } -EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean); /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur, @@ -169,7 +167,6 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh, } return 1; } -EXPORT_SYMBOL_GPL(tfrc_lh_interval_add); int __init tfrc_li_init(void) { diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 6cc108a..b7785b3 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -94,7 +94,6 @@ int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) *headp = entry; return 0; } -EXPORT_SYMBOL_GPL(tfrc_tx_hist_add); void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) { @@ -109,7 +108,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp) *headp = NULL; } -EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge); u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno, const ktime_t now) @@ -127,7 +125,6 @@ u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno, return rtt; } -EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt); /* @@ -172,7 +169,6 @@ void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, tfrc_rx_hist_entry_from_skb(entry, skb, ndp); } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet); /* has the packet contained in skb been seen before? */ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) @@ -189,7 +185,6 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb) return 0; } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate); static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) { @@ -390,7 +385,6 @@ int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, } return is_new_loss; } -EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss); int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h) { @@ -412,7 +406,6 @@ out_free: } return -ENOBUFS; } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc); void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) { @@ -424,7 +417,6 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h) h->ring[i] = NULL; } } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge); /** * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against @@ -495,4 +487,3 @@ keep_ref_for_next_time: return sample; } -EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt); diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 1859162..60c412c 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -1,20 +1,18 @@ /* - * TFRC: main module holding the pieces of the TFRC library together + * TFRC library initialisation * * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2007 Arnaldo Carvalho de Melo */ -#include -#include #include "tfrc.h" #ifdef CONFIG_IP_DCCP_TFRC_DEBUG int tfrc_debug; module_param(tfrc_debug, bool, 0644); -MODULE_PARM_DESC(tfrc_debug, "Enable debug messages"); +MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages"); #endif -static int __init tfrc_module_init(void) +int __init tfrc_lib_init(void) { int rc = tfrc_li_init(); @@ -38,18 +36,9 @@ out: return rc; } -static void __exit tfrc_module_exit(void) +void __exit tfrc_lib_exit(void) { tfrc_rx_packet_history_exit(); tfrc_tx_packet_history_exit(); tfrc_li_exit(); } - -module_init(tfrc_module_init); -module_exit(tfrc_module_exit); - -MODULE_AUTHOR("Gerrit Renker , " - "Ian McDonald , " - "Arnaldo Carvalho de Melo "); -MODULE_DESCRIPTION("DCCP TFRC library"); -MODULE_LICENSE("GPL"); diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index ed98575..e9720b1 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -17,7 +17,8 @@ #include #include #include "../../dccp.h" -/* internal includes that this module exports: */ + +/* internal includes that this library exports: */ #include "loss_interval.h" #include "packet_history.h" @@ -66,4 +67,12 @@ extern void tfrc_rx_packet_history_exit(void); extern int tfrc_li_init(void); extern void tfrc_li_exit(void); + +#ifdef CONFIG_IP_DCCP_TFRC_LIB +extern int tfrc_lib_init(void); +extern void tfrc_lib_exit(void); +#else +#define tfrc_lib_init() (0) +#define tfrc_lib_exit() +#endif #endif /* _TFRC_H_ */ diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 2f20a29..c5d3a9e 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -659,8 +659,6 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) return scaled_div32(result, f); } -EXPORT_SYMBOL_GPL(tfrc_calc_x); - /** * tfrc_calc_x_reverse_lookup - try to find p given f(p) * @@ -693,5 +691,3 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) index = tfrc_binsearch(fvalue, 0); return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; } - -EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup); diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 741b2db..4152308 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -1214,8 +1214,6 @@ const char *dccp_feat_typename(const u8 type) return NULL; } -EXPORT_SYMBOL_GPL(dccp_feat_typename); - const char *dccp_feat_name(const u8 feat) { static const char *feature_names[] = { @@ -1240,6 +1238,4 @@ const char *dccp_feat_name(const u8 feat) return feature_names[feat]; } - -EXPORT_SYMBOL_GPL(dccp_feat_name); #endif /* CONFIG_IP_DCCP_DEBUG */ diff --git a/net/dccp/input.c b/net/dccp/input.c index 5eb443f..7648f31 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -741,5 +741,3 @@ u32 dccp_sample_rtt(struct sock *sk, long delta) return delta; } - -EXPORT_SYMBOL_GPL(dccp_sample_rtt); -- cgit v1.1 From 4f7d54f59bc470f0aaa932f747a95232d7ebf8b1 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Mon, 5 Jan 2009 00:00:12 -0800 Subject: tcp: don't mask EOF and socket errors on nonblocking splice receive Currently, setting SPLICE_F_NONBLOCK on splice from a TCP socket results in masking of EOF (RDHUP) and error conditions on the socket by an -EAGAIN return. Move the NONBLOCK check in tcp_splice_read() to be after the EOF and error checks to fix this. Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4d655e9..bce1b06 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -580,10 +580,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, else if (!ret) { if (spliced) break; - if (flags & SPLICE_F_NONBLOCK) { - ret = -EAGAIN; - break; - } if (sock_flag(sk, SOCK_DONE)) break; if (sk->sk_err) { @@ -601,6 +597,10 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -ENOTCONN; break; } + if (flags & SPLICE_F_NONBLOCK) { + ret = -EAGAIN; + break; + } if (!timeo) { ret = -EAGAIN; break; -- cgit v1.1 From 7945cc6464a4db0caf6dfacdfe05806051c4cb7b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 5 Jan 2009 00:59:00 -0800 Subject: tcp: Kill extraneous SPLICE_F_NONBLOCK checks. In splice TCP receive, the SPLICE_F_NONBLOCK flag is used to compute the "timeo" value. So checking it again inside of the main receive loop to trigger -EAGAIN processing is entirely unnecessary. Noticed by Jarek P. and Lennert Buytenhek. Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bce1b06..35bcddf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -597,10 +597,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -ENOTCONN; break; } - if (flags & SPLICE_F_NONBLOCK) { - ret = -EAGAIN; - break; - } if (!timeo) { ret = -EAGAIN; break; -- cgit v1.1 From c276e098d3ee33059b4a1c747354226cec58487c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 5 Jan 2009 16:01:51 -0800 Subject: Revert "net: Fix for initial link state in 2.6.28" This reverts commit 22604c866889c4b2e12b73cbf1683bda1b72a313. We can't fix this issue in this way, because we now can try to take the dev_base_lock rwlock as a writer in software interrupt context and that is not allowed without major surgery elsewhere. This initial link state problem needs to be solved in some other way. Signed-off-by: David S. Miller --- net/core/link_watch.c | 7 +------ net/sched/sch_generic.c | 4 ++++ 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 1e401e1..bf8f7af 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -178,6 +178,7 @@ static void __linkwatch_run_queue(int urgent_only) */ clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); + rfc2863_policy(dev); if (dev->flags & IFF_UP) { if (netif_carrier_ok(dev)) dev_activate(dev); @@ -214,12 +215,6 @@ void linkwatch_fire_event(struct net_device *dev) { bool urgent = linkwatch_urgent_event(dev); - rfc2863_policy(dev); - - /* Some drivers call netif_carrier_off early */ - if (dev->reg_state == NETREG_UNINITIALIZED) - return; - if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { dev_hold(dev); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 23a8e61..5f5efe4 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -270,6 +270,8 @@ static void dev_watchdog_down(struct net_device *dev) void netif_carrier_on(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { + if (dev->reg_state == NETREG_UNINITIALIZED) + return; linkwatch_fire_event(dev); if (netif_running(dev)) __netdev_watchdog_up(dev); @@ -286,6 +288,8 @@ EXPORT_SYMBOL(netif_carrier_on); void netif_carrier_off(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { + if (dev->reg_state == NETREG_UNINITIALIZED) + return; linkwatch_fire_event(dev); } } -- cgit v1.1 From 55cdea9ed9cf2d76993e40ed7a1fc649a14db07c Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 5 Jan 2009 18:07:07 -0800 Subject: af_iucv: New error return codes for connect() If the iucv_path_connect() call fails then return an error code that corresponds to the iucv_path_connect() failure condition; instead of returning -ECONNREFUSED for any failure. This helps to improve error handling for user space applications (e.g. inform the user that the z/VM guest is not authorized to connect to other guest virtual machines). The error return codes are based on those described in connect(2). Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index af3192d..1077bc4 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -494,7 +494,21 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, if (err) { iucv_path_free(iucv->path); iucv->path = NULL; - err = -ECONNREFUSED; + switch (err) { + case 0x0b: /* Target communicator is not logged on */ + err = -ENETUNREACH; + break; + case 0x0d: /* Max connections for this guest exceeded */ + case 0x0e: /* Max connections for target guest exceeded */ + err = -EAGAIN; + break; + case 0x0f: /* Missing IUCV authorization */ + err = -EACCES; + break; + default: + err = -ECONNREFUSED; + break; + } goto done; } -- cgit v1.1 From 18becbc5479f88d5adc218374ca62b8b93ec2545 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 5 Jan 2009 18:07:46 -0800 Subject: af_iucv: avoid left over IUCV connections from failing connects For certain types of AFIUCV socket connect failures IUCV connections are left over. Add some cleanup-statements to avoid cluttered IUCV connections. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 1077bc4..6b5f193 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -521,6 +521,13 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, release_sock(sk); return -ECONNREFUSED; } + + if (err) { + iucv_path_sever(iucv->path, NULL); + iucv_path_free(iucv->path); + iucv->path = NULL; + } + done: release_sock(sk); return err; -- cgit v1.1 From 65dbd7c2778f1921ef1ee2a73e47a2a126fed30f Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 5 Jan 2009 18:08:23 -0800 Subject: af_iucv: Free iucv path/socket in path_pending callback Free iucv path after iucv_path_sever() calls in iucv_callback_connreq() (path_pending() iucv callback). If iucv_path_accept() fails, free path and free/kill newly created socket. Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6b5f193..eb8a2a0 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1042,12 +1042,14 @@ static int iucv_callback_connreq(struct iucv_path *path, ASCEBC(user_data, sizeof(user_data)); if (sk->sk_state != IUCV_LISTEN) { err = iucv_path_sever(path, user_data); + iucv_path_free(path); goto fail; } /* Check for backlog size */ if (sk_acceptq_is_full(sk)) { err = iucv_path_sever(path, user_data); + iucv_path_free(path); goto fail; } @@ -1055,6 +1057,7 @@ static int iucv_callback_connreq(struct iucv_path *path, nsk = iucv_sock_alloc(NULL, SOCK_STREAM, GFP_ATOMIC); if (!nsk) { err = iucv_path_sever(path, user_data); + iucv_path_free(path); goto fail; } @@ -1078,6 +1081,8 @@ static int iucv_callback_connreq(struct iucv_path *path, err = iucv_path_accept(path, &af_iucv_handler, nuser_data, nsk); if (err) { err = iucv_path_sever(path, user_data); + iucv_path_free(path); + iucv_sock_kill(nsk); goto fail; } -- cgit v1.1 From f1d3e4dca3f8d4f55656477e83d0afe0ea7cbaed Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 5 Jan 2009 18:09:02 -0800 Subject: iucv: fix cpu hotplug If the iucv module is compiled in/loaded but no user is registered cpu hot remove doesn't work. Reason for that is that the iucv cpu hotplug notifier on CPU_DOWN_PREPARE checks if the iucv_buffer_cpumask would be empty after the corresponding bit would be cleared. However the bit was never set since iucv wasn't enable. That causes all cpu hot unplug operations to fail in this scenario. To fix this use iucv_path_table as an indicator wether iucv is enabled or not. Signed-off-by: Heiko Carstens Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/iucv.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 8f57d4f..032f61e 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -517,6 +517,7 @@ static int iucv_enable(void) size_t alloc_size; int cpu, rc; + get_online_cpus(); rc = -ENOMEM; alloc_size = iucv_max_pathid * sizeof(struct iucv_path); iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); @@ -524,19 +525,17 @@ static int iucv_enable(void) goto out; /* Declare per cpu buffers. */ rc = -EIO; - get_online_cpus(); for_each_online_cpu(cpu) smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); if (cpus_empty(iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ - goto out_path; + goto out; put_online_cpus(); return 0; - -out_path: - put_online_cpus(); - kfree(iucv_path_table); out: + kfree(iucv_path_table); + iucv_path_table = NULL; + put_online_cpus(); return rc; } @@ -551,8 +550,9 @@ static void iucv_disable(void) { get_online_cpus(); on_each_cpu(iucv_retrieve_cpu, NULL, 1); - put_online_cpus(); kfree(iucv_path_table); + iucv_path_table = NULL; + put_online_cpus(); } static int __cpuinit iucv_cpu_notify(struct notifier_block *self, @@ -589,10 +589,14 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, case CPU_ONLINE_FROZEN: case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: + if (!iucv_path_table) + break; smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: + if (!iucv_path_table) + break; cpumask = iucv_buffer_cpumask; cpu_clear(cpu, cpumask); if (cpus_empty(cpumask)) -- cgit v1.1 From 6f57321422e0d359e83c978c2b03db77b967b7d5 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 5 Jan 2009 18:14:19 -0800 Subject: pkt_sched: cls_u32: Fix locking in u32_change() New nodes are inserted in u32_change() under rtnl_lock() with wmb(), so without tcf_tree_lock() like in other classifiers (e.g. cls_fw). This isn't enough without rmb() on the read side, but on the other hand adding such barriers doesn't give any savings, so the lock is added instead. Reported-by: m0sia Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 05d1780..07372f6 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -638,8 +638,9 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, break; n->next = *ins; - wmb(); + tcf_tree_lock(tp); *ins = n; + tcf_tree_unlock(tp); *arg = (unsigned long)n; return 0; -- cgit v1.1