diff options
Diffstat (limited to 'net')
38 files changed, 405 insertions, 222 deletions
diff --git a/net/802/mrp.c b/net/802/mrp.c index 1eb05d8..3ed6162 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -24,6 +24,11 @@ static unsigned int mrp_join_time __read_mostly = 200; module_param(mrp_join_time, uint, 0644); MODULE_PARM_DESC(mrp_join_time, "Join time in ms (default 200ms)"); + +static unsigned int mrp_periodic_time __read_mostly = 1000; +module_param(mrp_periodic_time, uint, 0644); +MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)"); + MODULE_LICENSE("GPL"); static const u8 @@ -595,6 +600,24 @@ static void mrp_join_timer(unsigned long data) mrp_join_timer_arm(app); } +static void mrp_periodic_timer_arm(struct mrp_applicant *app) +{ + mod_timer(&app->periodic_timer, + jiffies + msecs_to_jiffies(mrp_periodic_time)); +} + +static void mrp_periodic_timer(unsigned long data) +{ + struct mrp_applicant *app = (struct mrp_applicant *)data; + + spin_lock(&app->lock); + mrp_mad_event(app, MRP_EVENT_PERIODIC); + mrp_pdu_queue(app); + spin_unlock(&app->lock); + + mrp_periodic_timer_arm(app); +} + static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset) { __be16 endmark; @@ -845,6 +868,9 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl) rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app); setup_timer(&app->join_timer, mrp_join_timer, (unsigned long)app); mrp_join_timer_arm(app); + setup_timer(&app->periodic_timer, mrp_periodic_timer, + (unsigned long)app); + mrp_periodic_timer_arm(app); return 0; err3: @@ -870,6 +896,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl) * all pending messages before the applicant is gone. */ del_timer_sync(&app->join_timer); + del_timer_sync(&app->periodic_timer); spin_lock_bh(&app->lock); mrp_mad_event(app, MRP_EVENT_TX); diff --git a/net/Kconfig b/net/Kconfig index ee02136..b50dacc 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -228,7 +228,7 @@ config RPS config RFS_ACCEL boolean - depends on RPS && GENERIC_HARDIRQS + depends on RPS select CPU_RMAP default y diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 4493913..813db4e 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -168,6 +168,7 @@ static int batadv_interface_tx(struct sk_buff *skb, case ETH_P_8021Q: vhdr = (struct vlan_ethhdr *)skb->data; vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; if (vhdr->h_vlan_encapsulated_proto != ethertype) break; @@ -331,6 +332,7 @@ void batadv_interface_rx(struct net_device *soft_iface, case ETH_P_8021Q: vhdr = (struct vlan_ethhdr *)skb->data; vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; if (vhdr->h_vlan_encapsulated_proto != ethertype) break; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 634deba..fb7356f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1146,7 +1146,11 @@ int hci_dev_open(__u16 dev) goto done; } - if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) { + /* Check for rfkill but allow the HCI setup stage to proceed + * (which in itself doesn't cause any RF activity). + */ + if (test_bit(HCI_RFKILLED, &hdev->dev_flags) && + !test_bit(HCI_SETUP, &hdev->dev_flags)) { ret = -ERFKILL; goto done; } @@ -1566,10 +1570,13 @@ static int hci_rfkill_set_block(void *data, bool blocked) BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); - if (!blocked) - return 0; - - hci_dev_do_close(hdev); + if (blocked) { + set_bit(HCI_RFKILLED, &hdev->dev_flags); + if (!test_bit(HCI_SETUP, &hdev->dev_flags)) + hci_dev_do_close(hdev); + } else { + clear_bit(HCI_RFKILLED, &hdev->dev_flags); + } return 0; } @@ -1591,9 +1598,13 @@ static void hci_power_on(struct work_struct *work) return; } - if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) { + clear_bit(HCI_AUTO_OFF, &hdev->dev_flags); + hci_dev_do_close(hdev); + } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { queue_delayed_work(hdev->req_workqueue, &hdev->power_off, HCI_AUTO_OFF_TIMEOUT); + } if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) mgmt_index_added(hdev); @@ -2209,6 +2220,9 @@ int hci_register_dev(struct hci_dev *hdev) } } + if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) + set_bit(HCI_RFKILLED, &hdev->dev_flags); + set_bit(HCI_SETUP, &hdev->dev_flags); if (hdev->dev_type != HCI_AMP) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 94aab73..8db3e89 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3557,7 +3557,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) cp.handle = cpu_to_le16(conn->handle); if (ltk->authenticated) - conn->sec_level = BT_SECURITY_HIGH; + conn->pending_sec_level = BT_SECURITY_HIGH; + else + conn->pending_sec_level = BT_SECURITY_MEDIUM; + + conn->enc_key_size = ltk->enc_size; hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index b3bb7bc..63fa111 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3755,6 +3755,13 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, sk = chan->sk; + /* For certain devices (ex: HID mouse), support for authentication, + * pairing and bonding is optional. For such devices, inorder to avoid + * the ACL alive for too long after L2CAP disconnection, reset the ACL + * disc_timeout back to HCI_DISCONN_TIMEOUT during L2CAP connect. + */ + conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; + bacpy(&bt_sk(sk)->src, conn->src); bacpy(&bt_sk(sk)->dst, conn->dst); chan->psm = psm; diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 6d126fa..84fcf9f 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -569,7 +569,6 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) { struct rfcomm_dev *dev = dlc->owner; - struct tty_struct *tty; if (!dev) return; @@ -581,38 +580,8 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) DPM_ORDER_DEV_AFTER_PARENT); wake_up_interruptible(&dev->port.open_wait); - } else if (dlc->state == BT_CLOSED) { - tty = tty_port_tty_get(&dev->port); - if (!tty) { - if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { - /* Drop DLC lock here to avoid deadlock - * 1. rfcomm_dev_get will take rfcomm_dev_lock - * but in rfcomm_dev_add there's lock order: - * rfcomm_dev_lock -> dlc lock - * 2. tty_port_put will deadlock if it's - * the last reference - * - * FIXME: when we release the lock anything - * could happen to dev, even its destruction - */ - rfcomm_dlc_unlock(dlc); - if (rfcomm_dev_get(dev->id) == NULL) { - rfcomm_dlc_lock(dlc); - return; - } - - if (!test_and_set_bit(RFCOMM_TTY_RELEASED, - &dev->flags)) - tty_port_put(&dev->port); - - tty_port_put(&dev->port); - rfcomm_dlc_lock(dlc); - } - } else { - tty_hangup(tty); - tty_kref_put(tty); - } - } + } else if (dlc->state == BT_CLOSED) + tty_port_tty_hangup(&dev->port, false); } static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 1606f74..2b4b32a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2216,6 +2216,17 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) EXPORT_SYMBOL(ceph_osdc_sync); /* + * Call all pending notify callbacks - for use after a watch is + * unregistered, to make sure no more callbacks for it will be invoked + */ +extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc) +{ + flush_workqueue(osdc->notify_wq); +} +EXPORT_SYMBOL(ceph_osdc_flush_notifies); + + +/* * init, shutdown */ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) diff --git a/net/core/dev.c b/net/core/dev.c index 5c713f2..65f829c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5247,10 +5247,12 @@ static int dev_new_index(struct net *net) /* Delayed registration/unregisteration */ static LIST_HEAD(net_todo_list); +static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) { list_add_tail(&dev->todo_list, &net_todo_list); + dev_net(dev)->dev_unreg_count++; } static void rollback_registered_many(struct list_head *head) @@ -5918,6 +5920,12 @@ void netdev_run_todo(void) if (dev->destructor) dev->destructor(dev); + /* Report a network device has been unregistered */ + rtnl_lock(); + dev_net(dev)->dev_unreg_count--; + __rtnl_unlock(); + wake_up(&netdev_unregistering_wq); + /* Free network device */ kobject_put(&dev->dev.kobj); } @@ -6603,6 +6611,34 @@ static void __net_exit default_device_exit(struct net *net) rtnl_unlock(); } +static void __net_exit rtnl_lock_unregistering(struct list_head *net_list) +{ + /* Return with the rtnl_lock held when there are no network + * devices unregistering in any network namespace in net_list. + */ + struct net *net; + bool unregistering; + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&netdev_unregistering_wq, &wait, + TASK_UNINTERRUPTIBLE); + unregistering = false; + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) { + if (net->dev_unreg_count > 0) { + unregistering = true; + break; + } + } + if (!unregistering) + break; + __rtnl_unlock(); + schedule(); + } + finish_wait(&netdev_unregistering_wq, &wait); +} + static void __net_exit default_device_exit_batch(struct list_head *net_list) { /* At exit all network devices most be removed from a network @@ -6614,7 +6650,18 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list) struct net *net; LIST_HEAD(dev_kill_list); - rtnl_lock(); + /* To prevent network device cleanup code from dereferencing + * loopback devices or network devices that have been freed + * wait here for all pending unregistrations to complete, + * before unregistring the loopback device and allowing the + * network namespace be freed. + * + * The netdev todo list containing all network devices + * unregistrations that happen in default_device_exit_batch + * will run in the rtnl_unlock() at the end of + * default_device_exit_batch. + */ + rtnl_lock_unregistering(net_list); list_for_each_entry(net, net_list, exit_list) { for_each_netdev_reverse(net, dev) { if (dev->rtnl_link_ops) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1929af8..8d7d0dd 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -154,8 +154,8 @@ ipv6: if (poff >= 0) { __be32 *ports, _ports; - nhoff += poff; - ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); + ports = skb_header_pointer(skb, nhoff + poff, + sizeof(_ports), &_ports); if (ports) flow->ports = *ports; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c3c7b27..fc75c9e 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -1284,15 +1284,14 @@ EXPORT_SYMBOL_GPL(__netpoll_free_async); void netpoll_cleanup(struct netpoll *np) { - if (!np->dev) - return; - rtnl_lock(); + if (!np->dev) + goto out; __netpoll_cleanup(np); - rtnl_unlock(); - dev_put(np->dev); np->dev = NULL; +out: + rtnl_unlock(); } EXPORT_SYMBOL(netpoll_cleanup); diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 6a2f13c..3f1ec15 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -10,11 +10,24 @@ #include <net/secure_seq.h> -static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; +#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) -void net_secret_init(void) +static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; + +static void net_secret_init(void) { - get_random_bytes(net_secret, sizeof(net_secret)); + u32 tmp; + int i; + + if (likely(net_secret[0])) + return; + + for (i = NET_SECRET_SIZE; i > 0;) { + do { + get_random_bytes(&tmp, sizeof(tmp)); + } while (!tmp); + cmpxchg(&net_secret[--i], 0, tmp); + } } #ifdef CONFIG_INET @@ -42,6 +55,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32)daddr[i]; @@ -63,6 +77,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32) daddr[i]; @@ -82,6 +97,7 @@ __u32 secure_ip_id(__be32 daddr) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force __u32) daddr; hash[1] = net_secret[13]; hash[2] = net_secret[14]; @@ -96,6 +112,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4]) { __u32 hash[4]; + net_secret_init(); memcpy(hash, daddr, 16); md5_transform(hash, net_secret); @@ -107,6 +124,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -121,6 +139,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = (__force u32)dport ^ net_secret[14]; @@ -140,6 +159,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, u32 hash[MD5_DIGEST_WORDS]; u64 seq; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -164,6 +184,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, u64 seq; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + daddr[i]; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9c61f9c..6cf9f77 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -135,6 +135,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (dst) dst->ops->redirect(dst, sk, skb); + goto out; } if (type == ICMPV6_PKT_TOOBIG) { diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 7a1874b..cfeb85c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -263,10 +263,8 @@ void build_ehash_secret(void) get_random_bytes(&rnd, sizeof(rnd)); } while (rnd == 0); - if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { + if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); - net_secret_init(); - } } EXPORT_SYMBOL(build_ehash_secret); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d6c0e64..7defdc9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) pip->saddr = fl4.saddr; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(pip, &rt->dst, NULL); + ip_select_ident(skb, &rt->dst, NULL); ((u8 *)&pip[1])[0] = IPOPT_RA; ((u8 *)&pip[1])[1] = 4; ((u8 *)&pip[1])[2] = 0; @@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; - ip_select_ident(iph, &rt->dst, NULL); + ip_select_ident(skb, &rt->dst, NULL); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; @@ -736,7 +736,7 @@ static void igmp_gq_timer_expire(unsigned long data) in_dev->mr_gq_running = 0; igmpv3_send_report(in_dev, NULL); - __in_dev_put(in_dev); + in_dev_put(in_dev); } static void igmp_ifc_timer_expire(unsigned long data) @@ -749,7 +749,7 @@ static void igmp_ifc_timer_expire(unsigned long data) igmp_ifc_start_timer(in_dev, unsolicited_report_interval(in_dev)); } - __in_dev_put(in_dev); + in_dev_put(in_dev); } static void igmp_ifc_event(struct in_device *in_dev) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 000e3d2..33d5537 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -32,8 +32,8 @@ * At the moment of writing this notes identifier of IP packets is generated * to be unpredictable using this code only for packets subjected * (actually or potentially) to defragmentation. I.e. DF packets less than - * PMTU in size uses a constant ID and do not use this code (see - * ip_select_ident() in include/net/ip.h). + * PMTU in size when local fragmentation is disabled use a constant ID and do + * not use this code (see ip_select_ident() in include/net/ip.h). * * Route cache entries hold references to our nodes. * New cache entries get references via lookup by destination IP address in diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9ee17e3..a04d872 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - ip_select_ident(iph, &rt->dst, sk); + ip_select_ident(skb, &rt->dst, sk); if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; @@ -386,7 +386,7 @@ packet_routed: ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } - ip_select_ident_more(iph, &rt->dst, sk, + ip_select_ident_more(skb, &rt->dst, sk, (skb_shinfo(skb)->gso_segs ?: 1) - 1); skb->priority = sk->sk_priority; @@ -1316,7 +1316,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, else ttl = ip_select_ttl(inet, &rt->dst); - iph = (struct iphdr *)skb->data; + iph = ip_hdr(skb); iph->version = 4; iph->ihl = 5; iph->tos = inet->tos; @@ -1324,7 +1324,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); - ip_select_ident(iph, &rt->dst, sk); + ip_select_ident(skb, &rt->dst, sk); if (opt) { iph->ihl += opt->optlen>>2; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ac9fabe0..d3fbad4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -623,6 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->err_count = 0; } + tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); ttl = tnl_params->ttl; if (ttl == 0) { if (skb->protocol == htons(ETH_P_IP)) @@ -651,8 +652,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, - ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df, - !net_eq(tunnel->net, dev_net(dev))); + tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9ae54b0..62212c7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1658,7 +1658,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); - ip_select_ident(iph, skb_dst(skb), NULL); + ip_select_ident(skb, skb_dst(skb), NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a86c7ae..193db03 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -218,8 +218,10 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) ipv4_sk_update_pmtu(skb, sk, info); - else if (type == ICMP_REDIRECT) + else if (type == ICMP_REDIRECT) { ipv4_sk_redirect(skb, sk); + return; + } /* Report error on raw socket, if: 1. User requested ip_recverr. @@ -387,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(iph, &rt->dst, NULL); + ip_select_ident(skb, &rt->dst, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 8a57d79..559d4ae 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -87,8 +87,8 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) if (!cg_proto) return -EINVAL; - if (val > RESOURCE_MAX) - val = RESOURCE_MAX; + if (val > RES_COUNTER_MAX) + val = RES_COUNTER_MAX; tcp = tcp_from_cgproto(cg_proto); @@ -101,9 +101,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, net->ipv4.sysctl_tcp_mem[i]); - if (val == RESOURCE_MAX) + if (val == RES_COUNTER_MAX) clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags); - else if (val != RESOURCE_MAX) { + else if (val != RES_COUNTER_MAX) { /* * The active bit needs to be written after the static_key * update. This is what guarantees that the socket activation @@ -187,7 +187,7 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) switch (cft->private) { case RES_LIMIT: - val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX); + val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX); break; case RES_USAGE: val = tcp_read_usage(memcg); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7c83cb8..e6bb825 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -895,8 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; - skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? - tcp_wfree : sock_wfree; + skb->destructor = tcp_wfree; atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ @@ -1840,7 +1839,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, while ((skb = tcp_send_head(sk))) { unsigned int limit; - tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); @@ -1869,13 +1867,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, break; } - /* TSQ : sk_wmem_alloc accounts skb truesize, - * including skb overhead. But thats OK. + /* TCP Small Queues : + * Control number of packets in qdisc/devices to two packets / or ~1 ms. + * This allows for : + * - better RTT estimation and ACK scheduling + * - faster recovery + * - high rates */ - if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { + limit = max(skb->truesize, sk->sk_pacing_rate >> 10); + + if (atomic_read(&sk->sk_wmem_alloc) > limit) { set_bit(TSQ_THROTTLED, &tp->tsq_flags); break; } + limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 74d2c95..0ca44df 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -658,7 +658,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) break; case ICMP_REDIRECT: ipv4_sk_redirect(skb, sk); - break; + goto out; } /* diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index eb1dd4d..b5663c3 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -117,7 +117,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); - ip_select_ident(top_iph, dst->child, NULL); + ip_select_ident(skb, dst->child, NULL); top_iph->ttl = ip4_dst_hoplimit(dst->child); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d6ff126..cd3fb30 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1499,6 +1499,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, return false; } +/* Compares an address/prefix_len with addresses on device @dev. + * If one is found it returns true. + */ +bool ipv6_chk_custom_prefix(const struct in6_addr *addr, + const unsigned int prefix_len, struct net_device *dev) +{ + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool ret = false; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + list_for_each_entry(ifa, &idev->addr_list, if_list) { + ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); + if (ret) + break; + } + read_unlock_bh(&idev->lock); + } + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(ipv6_chk_custom_prefix); + int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) { struct inet6_dev *idev; @@ -2193,43 +2220,21 @@ ok: else stored_lft = 0; if (!update_lft && !create && stored_lft) { - if (valid_lft > MIN_VALID_LIFETIME || - valid_lft > stored_lft) - update_lft = 1; - else if (stored_lft <= MIN_VALID_LIFETIME) { - /* valid_lft <= stored_lft is always true */ - /* - * RFC 4862 Section 5.5.3e: - * "Note that the preferred lifetime of - * the corresponding address is always - * reset to the Preferred Lifetime in - * the received Prefix Information - * option, regardless of whether the - * valid lifetime is also reset or - * ignored." - * - * So if the preferred lifetime in - * this advertisement is different - * than what we have stored, but the - * valid lifetime is invalid, just - * reset prefered_lft. - * - * We must set the valid lifetime - * to the stored lifetime since we'll - * be updating the timestamp below, - * else we'll set it back to the - * minimum. - */ - if (prefered_lft != ifp->prefered_lft) { - valid_lft = stored_lft; - update_lft = 1; - } - } else { - valid_lft = MIN_VALID_LIFETIME; - if (valid_lft < prefered_lft) - prefered_lft = valid_lft; - update_lft = 1; - } + const u32 minimum_lft = min( + stored_lft, (u32)MIN_VALID_LIFETIME); + valid_lft = max(valid_lft, minimum_lft); + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = 1; } if (update_lft) { diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6b26e9f..7bb5446 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -618,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, struct ip6_tnl *tunnel = netdev_priv(dev); struct net_device *tdev; /* Device to other host */ struct ipv6hdr *ipv6h; /* Our new IP header */ - unsigned int max_headroom; /* The extra header space needed */ + unsigned int max_headroom = 0; /* The extra header space needed */ int gre_hlen; struct ipv6_tel_txoption opt; int mtu; @@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; + max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3a692d5..a54c45c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1015,6 +1015,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, * udp datagram */ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + struct frag_hdr fhdr; + skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); @@ -1036,12 +1038,6 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - } - - err = skb_append_datato_frags(sk,skb, getfrag, from, - (length - transhdrlen)); - if (!err) { - struct frag_hdr fhdr; /* Specify the length of each IPv6 datagram fragment. * It has to be a multiple of 8. @@ -1052,15 +1048,10 @@ static inline int ip6_ufo_append_data(struct sock *sk, ipv6_select_ident(&fhdr, rt); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); - - return 0; } - /* There is not enough support do UPD LSO, - * so follow normal path - */ - kfree_skb(skb); - return err; + return skb_append_datato_frags(sk, skb, getfrag, from, + (length - transhdrlen)); } static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, @@ -1227,27 +1218,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - cork->length += length; - if (length > mtu) { - int proto = sk->sk_protocol; - if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ - ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); - return -EMSGSIZE; - } - - if (proto == IPPROTO_UDP && - (rt->dst.dev->features & NETIF_F_UFO)) { + if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); + return -EMSGSIZE; + } - err = ip6_ufo_append_data(sk, getfrag, from, length, - hh_len, fragheaderlen, - transhdrlen, mtu, flags, rt); - if (err) - goto error; - return 0; - } + skb = skb_peek_tail(&sk->sk_write_queue); + cork->length += length; + if (((length > mtu) || + (skb && skb_is_gso(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO)) { + err = ip6_ufo_append_data(sk, getfrag, from, length, + hh_len, fragheaderlen, + transhdrlen, mtu, flags, rt); + if (err) + goto error; + return 0; } - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) + if (!skb) goto alloc_new_skb; while (length > 0) { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 096cd67..d18f9f9 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2034,7 +2034,7 @@ static void mld_dad_timer_expire(unsigned long data) if (idev->mc_dad_count) mld_dad_start_timer(idev, idev->mc_maxdelay); } - __in6_dev_put(idev); + in6_dev_put(idev); } static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, @@ -2379,7 +2379,7 @@ static void mld_gq_timer_expire(unsigned long data) idev->mc_gq_running = 0; mld_send_report(idev, NULL); - __in6_dev_put(idev); + in6_dev_put(idev); } static void mld_ifc_timer_expire(unsigned long data) @@ -2392,7 +2392,7 @@ static void mld_ifc_timer_expire(unsigned long data) if (idev->mc_ifc_count) mld_ifc_start_timer(idev, idev->mc_maxdelay); } - __in6_dev_put(idev); + in6_dev_put(idev); } static void mld_ifc_event(struct inet6_dev *idev) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 58916bb..a4ed241 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -335,8 +335,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, ip6_sk_update_pmtu(skb, sk, info); harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); } - if (type == NDISC_REDIRECT) + if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); + return; + } if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 7ee5cb9..afd5605 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, return false; } +/* Checks if an address matches an address on the tunnel interface. + * Used to detect the NAT of proto 41 packets and let them pass spoofing test. + * Long story: + * This function is called after we considered the packet as spoofed + * in is_spoofed_6rd. + * We may have a router that is doing NAT for proto 41 packets + * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb + * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd + * function will return true, dropping the packet. + * But, we can still check if is spoofed against the IP + * addresses associated with the interface. + */ +static bool only_dnatted(const struct ip_tunnel *tunnel, + const struct in6_addr *v6dst) +{ + int prefix_len; + +#ifdef CONFIG_IPV6_SIT_6RD + prefix_len = tunnel->ip6rd.prefixlen + 32 + - tunnel->ip6rd.relay_prefixlen; +#else + prefix_len = 48; +#endif + return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev); +} + +/* Returns true if a packet is spoofed */ +static bool packet_is_spoofed(struct sk_buff *skb, + const struct iphdr *iph, + struct ip_tunnel *tunnel) +{ + const struct ipv6hdr *ipv6h; + + if (tunnel->dev->priv_flags & IFF_ISATAP) { + if (!isatap_chksrc(skb, iph, tunnel)) + return true; + + return false; + } + + if (tunnel->dev->flags & IFF_POINTOPOINT) + return false; + + ipv6h = ipv6_hdr(skb); + + if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) { + net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; + } + + if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr))) + return false; + + if (only_dnatted(tunnel, &ipv6h->daddr)) + return false; + + net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; +} + static int ipip6_rcv(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); @@ -586,19 +650,9 @@ static int ipip6_rcv(struct sk_buff *skb) IPCB(skb)->flags = 0; skb->protocol = htons(ETH_P_IPV6); - if (tunnel->dev->priv_flags & IFF_ISATAP) { - if (!isatap_chksrc(skb, iph, tunnel)) { - tunnel->dev->stats.rx_errors++; - goto out; - } - } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) { - if (is_spoofed_6rd(tunnel, iph->saddr, - &ipv6_hdr(skb)->saddr) || - is_spoofed_6rd(tunnel, iph->daddr, - &ipv6_hdr(skb)->daddr)) { - tunnel->dev->stats.rx_errors++; - goto out; - } + if (packet_is_spoofed(skb, iph, tunnel)) { + tunnel->dev->stats.rx_errors++; + goto out; } __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); @@ -748,7 +802,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -777,7 +831,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f405815..72b7eaa 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -525,8 +525,10 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type == ICMPV6_PKT_TOOBIG) ip6_sk_update_pmtu(skb, sk, info); - if (type == NDISC_REDIRECT) + if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); + goto out; + } np = inet6_sk(sk); diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c index 54563ad..355cc3b 100644 --- a/net/lapb/lapb_timer.c +++ b/net/lapb/lapb_timer.c @@ -154,6 +154,7 @@ static void lapb_t1timer_expiry(unsigned long param) } else { lapb->n2count++; lapb_requeue_frames(lapb); + lapb_kick(lapb); } break; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index b75ff64..c47444e 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = cp->daddr.ip; iph->saddr = saddr; iph->ttl = old_iph->ttl; - ip_select_ident(iph, &rt->dst, NULL); + ip_select_ident(skb, &rt->dst, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 32ad015..fc6de56 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -285,7 +285,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) /* remove one skb from head of flow queue */ -static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) +static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow) { struct sk_buff *skb = flow->head; @@ -293,6 +293,8 @@ static struct sk_buff *fq_dequeue_head(struct fq_flow *flow) flow->head = skb->next; skb->next = NULL; flow->qlen--; + sch->qstats.backlog -= qdisc_pkt_len(skb); + sch->q.qlen--; } return skb; } @@ -419,7 +421,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) struct sk_buff *skb; struct fq_flow *f; - skb = fq_dequeue_head(&q->internal); + skb = fq_dequeue_head(sch, &q->internal); if (skb) goto out; fq_check_throttled(q, now); @@ -449,7 +451,7 @@ begin: goto begin; } - skb = fq_dequeue_head(f); + skb = fq_dequeue_head(sch, f); if (!skb) { head->first = f->next; /* force a pass through old_flows to prevent starvation */ @@ -490,19 +492,44 @@ begin: } } out: - sch->qstats.backlog -= qdisc_pkt_len(skb); qdisc_bstats_update(sch, skb); - sch->q.qlen--; qdisc_unthrottled(sch); return skb; } static void fq_reset(struct Qdisc *sch) { + struct fq_sched_data *q = qdisc_priv(sch); + struct rb_root *root; struct sk_buff *skb; + struct rb_node *p; + struct fq_flow *f; + unsigned int idx; - while ((skb = fq_dequeue(sch)) != NULL) + while ((skb = fq_dequeue_head(sch, &q->internal)) != NULL) kfree_skb(skb); + + if (!q->fq_root) + return; + + for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { + root = &q->fq_root[idx]; + while ((p = rb_first(root)) != NULL) { + f = container_of(p, struct fq_flow, fq_node); + rb_erase(p, root); + + while ((skb = fq_dequeue_head(sch, f)) != NULL) + kfree_skb(skb); + + kmem_cache_free(fq_flow_cachep, f); + } + } + q->new_flows.first = NULL; + q->old_flows.first = NULL; + q->delayed = RB_ROOT; + q->flows = 0; + q->inactive_flows = 0; + q->throttled_flows = 0; } static void fq_rehash(struct fq_sched_data *q, @@ -645,6 +672,8 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_dequeue(sch); + if (!skb) + break; kfree_skb(skb); drop_count++; } @@ -657,21 +686,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) static void fq_destroy(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); - struct rb_root *root; - struct rb_node *p; - unsigned int idx; - if (q->fq_root) { - for (idx = 0; idx < (1U << q->fq_trees_log); idx++) { - root = &q->fq_root[idx]; - while ((p = rb_first(root)) != NULL) { - rb_erase(p, root); - kmem_cache_free(fq_flow_cachep, - container_of(p, struct fq_flow, fq_node)); - } - } - kfree(q->fq_root); - } + fq_reset(sch); + kfree(q->fq_root); qdisc_watchdog_cancel(&q->watchdog); } diff --git a/net/socket.c b/net/socket.c index 0ceaa5c..ebed4b6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -854,11 +854,6 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_recvmsg); -static void sock_aio_dtor(struct kiocb *iocb) -{ - kfree(iocb->private); -} - static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more) { @@ -889,12 +884,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, struct sock_iocb *siocb) { - if (!is_sync_kiocb(iocb)) { - siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); - if (!siocb) - return NULL; - iocb->ki_dtor = sock_aio_dtor; - } + if (!is_sync_kiocb(iocb)) + BUG(); siocb->kiocb = iocb; iocb->private = siocb; @@ -931,7 +922,7 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, if (pos != 0) return -ESPIPE; - if (iocb->ki_left == 0) /* Match SYS5 behaviour */ + if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ return 0; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 4151590..5285ead 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -434,12 +434,13 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache); /* * Remove stale credentials. Avoid sleeping inside the loop. */ -static int +static long rpcauth_prune_expired(struct list_head *free, int nr_to_scan) { spinlock_t *cache_lock; struct rpc_cred *cred, *next; unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; + long freed = 0; list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { @@ -451,10 +452,11 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) */ if (time_in_range(cred->cr_expire, expired, jiffies) && test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) - return 0; + break; list_del_init(&cred->cr_lru); number_cred_unused--; + freed++; if (atomic_read(&cred->cr_count) != 0) continue; @@ -467,29 +469,39 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) } spin_unlock(cache_lock); } - return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; + return freed; } /* * Run memory cache shrinker. */ -static int -rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc) +static unsigned long +rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) + { LIST_HEAD(free); - int res; - int nr_to_scan = sc->nr_to_scan; - gfp_t gfp_mask = sc->gfp_mask; + unsigned long freed; + + if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL) + return SHRINK_STOP; - if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) - return (nr_to_scan == 0) ? 0 : -1; + /* nothing left, don't come back */ if (list_empty(&cred_unused)) - return 0; + return SHRINK_STOP; + spin_lock(&rpc_credcache_lock); - res = rpcauth_prune_expired(&free, nr_to_scan); + freed = rpcauth_prune_expired(&free, sc->nr_to_scan); spin_unlock(&rpc_credcache_lock); rpcauth_destroy_credlist(&free); - return res; + + return freed; +} + +static unsigned long +rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc) + +{ + return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; } /* @@ -805,7 +817,8 @@ rpcauth_uptodatecred(struct rpc_task *task) } static struct shrinker rpc_cred_shrinker = { - .shrink = rpcauth_cache_shrinker, + .count_objects = rpcauth_cache_shrink_count, + .scan_objects = rpcauth_cache_shrink_scan, .seeks = DEFAULT_SEEKS, }; diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index f6d84be..ed04869 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -239,7 +239,7 @@ generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred) if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags)) dprintk("RPC: UID %d Credential key reset\n", - tcred->cr_uid); + from_kuid(&init_user_ns, tcred->cr_uid)); /* set up fasttrack for the normal case */ set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags); } diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 30eb502..fcac5d1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -75,7 +75,7 @@ static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO; * using integrity (two 4-byte integers): */ #define GSS_VERF_SLACK 100 -static DEFINE_HASHTABLE(gss_auth_hash_table, 16); +static DEFINE_HASHTABLE(gss_auth_hash_table, 4); static DEFINE_SPINLOCK(gss_auth_hash_lock); struct gss_pipe { |