diff options
author | Pablo Neira Ayuso <pablo@netfilter.org> | 2012-10-17 10:59:20 +0200 |
---|---|---|
committer | Pablo Neira Ayuso <pablo@netfilter.org> | 2012-10-17 10:59:20 +0200 |
commit | 0b4f5b1d6385826093dc3cd9035b186f0d77a5dc (patch) | |
tree | e7602057216d3de837995b267a281ae09d899a26 /net | |
parent | 939ccba437da1726a5c8a5b702a47d473da927ae (diff) | |
parent | 9f0d3c2781baa1102108e16efbe640dd74564a7c (diff) | |
download | op-kernel-dev-0b4f5b1d6385826093dc3cd9035b186f0d77a5dc.zip op-kernel-dev-0b4f5b1d6385826093dc3cd9035b186f0d77a5dc.tar.gz |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
To obtain new flag FLOWI_FLAG_KNOWN_NH to fix netfilter's xt_TEE target.
Diffstat (limited to 'net')
63 files changed, 816 insertions, 796 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index add69d0..65e06ab 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -5,7 +5,7 @@ #include <linux/export.h> #include "vlan.h" -bool vlan_do_receive(struct sk_buff **skbp, bool last_handler) +bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; @@ -13,14 +13,8 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler) struct vlan_pcpu_stats *rx_stats; vlan_dev = vlan_find_dev(skb->dev, vlan_id); - if (!vlan_dev) { - /* Only the last call to vlan_do_receive() should change - * pkt_type to PACKET_OTHERHOST - */ - if (vlan_id && last_handler) - skb->pkt_type = PACKET_OTHERHOST; + if (!vlan_dev) return false; - } skb = *skbp = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) @@ -372,6 +366,13 @@ EXPORT_SYMBOL(vlan_vids_del_by_dev); bool vlan_uses_dev(const struct net_device *dev) { - return rtnl_dereference(dev->vlan_info) ? true : false; + struct vlan_info *vlan_info; + + ASSERT_RTNL(); + + vlan_info = rtnl_dereference(dev->vlan_info); + if (!vlan_info) + return false; + return vlan_info->grp.nr_vlan_devs ? true : false; } EXPORT_SYMBOL(vlan_uses_dev); diff --git a/net/9p/client.c b/net/9p/client.c index 8260f13..34d4176 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -76,6 +76,20 @@ inline int p9_is_proto_dotu(struct p9_client *clnt) } EXPORT_SYMBOL(p9_is_proto_dotu); +/* + * Some error codes are taken directly from the server replies, + * make sure they are valid. + */ +static int safe_errno(int err) +{ + if ((err > 0) || (err < -MAX_ERRNO)) { + p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err); + return -EPROTO; + } + return err; +} + + /* Interpret mount option for protocol version */ static int get_protocol_version(char *s) { @@ -782,7 +796,7 @@ again: return req; reterr: p9_free_req(c, req); - return ERR_PTR(err); + return ERR_PTR(safe_errno(err)); } /** @@ -865,7 +879,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, return req; reterr: p9_free_req(c, req); - return ERR_PTR(err); + return ERR_PTR(safe_errno(err)); } static struct p9_fid *p9_fid_create(struct p9_client *clnt) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 505f0ce3..02efb25 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -316,8 +316,7 @@ static void p9_read_work(struct work_struct *work) m->rsize - m->rpos); p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); if (err == -EAGAIN) { - clear_bit(Rworksched, &m->wsched); - return; + goto end_clear; } if (err <= 0) @@ -379,19 +378,20 @@ static void p9_read_work(struct work_struct *work) m->req = NULL; } +end_clear: + clear_bit(Rworksched, &m->wsched); + if (!list_empty(&m->req_list)) { if (test_and_clear_bit(Rpending, &m->wsched)) n = POLLIN; else n = p9_fd_poll(m->client, NULL); - if (n & POLLIN) { + if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); - } else - clear_bit(Rworksched, &m->wsched); - } else - clear_bit(Rworksched, &m->wsched); + } + } return; error: @@ -453,12 +453,13 @@ static void p9_write_work(struct work_struct *work) } if (!m->wsize) { + spin_lock(&m->client->lock); if (list_empty(&m->unsent_req_list)) { clear_bit(Wworksched, &m->wsched); + spin_unlock(&m->client->lock); return; } - spin_lock(&m->client->lock); req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); req->status = REQ_STATUS_SENT; @@ -476,10 +477,9 @@ static void p9_write_work(struct work_struct *work) clear_bit(Wpending, &m->wsched); err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); - if (err == -EAGAIN) { - clear_bit(Wworksched, &m->wsched); - return; - } + if (err == -EAGAIN) + goto end_clear; + if (err < 0) goto error; @@ -492,19 +492,21 @@ static void p9_write_work(struct work_struct *work) if (m->wpos == m->wsize) m->wpos = m->wsize = 0; - if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { +end_clear: + clear_bit(Wworksched, &m->wsched); + + if (m->wsize || !list_empty(&m->unsent_req_list)) { if (test_and_clear_bit(Wpending, &m->wsched)) n = POLLOUT; else n = p9_fd_poll(m->client, NULL); - if (n & POLLOUT) { + if ((n & POLLOUT) && + !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); - } else - clear_bit(Wworksched, &m->wsched); - } else - clear_bit(Wworksched, &m->wsched); + } + } return; @@ -793,30 +795,28 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd) static int p9_socket_open(struct p9_client *client, struct socket *csocket) { struct p9_trans_fd *p; - int ret, fd; + struct file *file; + int ret; p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!p) return -ENOMEM; csocket->sk->sk_allocation = GFP_NOIO; - fd = sock_map_fd(csocket, 0); - if (fd < 0) { + file = sock_alloc_file(csocket, 0, NULL); + if (IS_ERR(file)) { pr_err("%s (%d): failed to map fd\n", __func__, task_pid_nr(current)); sock_release(csocket); kfree(p); - return fd; + return PTR_ERR(file); } - get_file(csocket->file); - get_file(csocket->file); - p->wr = p->rd = csocket->file; + get_file(file); + p->wr = p->rd = file; client->trans = p; client->status = Connected; - sys_close(fd); /* still racy */ - p->rd->f_flags |= O_NONBLOCK; p->conn = p9_conn_create(client); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 9d49ee6..ba033f0 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -591,7 +591,7 @@ static int bt_seq_show(struct seq_file *seq, void *v) atomic_read(&sk->sk_refcnt), sk_rmem_alloc_get(sk), sk_wmem_alloc_get(sk), - sock_i_uid(sk), + from_kuid(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), &src_baswapped, &dst_baswapped, diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 68e8f36..fe43bc7 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -265,6 +265,9 @@ static int br_parse_ip_options(struct sk_buff *skb) struct net_device *dev = skb->dev; u32 len; + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto inhdr_error; + iph = ip_hdr(skb); opt = &(IPCB(skb)->opt); diff --git a/net/can/af_can.c b/net/can/af_can.c index 821022a..ddac1ee 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -63,7 +63,7 @@ #include "af_can.h" -static __initdata const char banner[] = KERN_INFO +static __initconst const char banner[] = KERN_INFO "can: controller area network core (" CAN_VERSION_STRING ")\n"; MODULE_DESCRIPTION("Controller Area Network PF_CAN core"); diff --git a/net/can/bcm.c b/net/can/bcm.c index 151b773..6f74758 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -77,7 +77,7 @@ (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) #define CAN_BCM_VERSION CAN_VERSION -static __initdata const char banner[] = KERN_INFO +static __initconst const char banner[] = KERN_INFO "can: broadcast manager protocol (rev " CAN_BCM_VERSION " t)\n"; MODULE_DESCRIPTION("PF_CAN broadcast manager protocol"); diff --git a/net/can/gw.c b/net/can/gw.c index 127879c..1f5c9785 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -58,7 +58,7 @@ #include <net/sock.h> #define CAN_GW_VERSION "20101209" -static __initdata const char banner[] = +static __initconst const char banner[] = KERN_INFO "can: netlink gateway (rev " CAN_GW_VERSION ")\n"; MODULE_DESCRIPTION("PF_CAN netlink gateway"); diff --git a/net/can/raw.c b/net/can/raw.c index 3e9c893..5b0e3e3 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -55,7 +55,7 @@ #include <net/net_namespace.h> #define CAN_RAW_VERSION CAN_VERSION -static __initdata const char banner[] = +static __initconst const char banner[] = KERN_INFO "can: raw protocol (rev " CAN_RAW_VERSION ")\n"; MODULE_DESCRIPTION("PF_CAN raw protocol"); diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 9da7fdd..af14cb4 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -423,14 +423,15 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, } } -int ceph_key_instantiate(struct key *key, const void *data, size_t datalen) +int ceph_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct ceph_crypto_key *ckey; + size_t datalen = prep->datalen; int ret; void *p; ret = -EINVAL; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) goto err; ret = key_payload_reserve(key, datalen); @@ -443,8 +444,8 @@ int ceph_key_instantiate(struct key *key, const void *data, size_t datalen) goto err; /* TODO ceph_crypto_key_decode should really take const input */ - p = (void *)data; - ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen); + p = (void *)prep->data; + ret = ceph_crypto_key_decode(ckey, &p, (char*)prep->data+datalen); if (ret < 0) goto err_ckey; diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 900ea0f..812eb3b 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -637,7 +637,7 @@ bad: /* * Do a synchronous pool op. */ -int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op, +static int do_poolop(struct ceph_mon_client *monc, u32 op, u32 pool, u64 snapid, char *buf, int len) { @@ -687,7 +687,7 @@ out: int ceph_monc_create_snapid(struct ceph_mon_client *monc, u32 pool, u64 *snapid) { - return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, + return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, pool, 0, (char *)snapid, sizeof(*snapid)); } @@ -696,7 +696,7 @@ EXPORT_SYMBOL(ceph_monc_create_snapid); int ceph_monc_delete_snapid(struct ceph_mon_client *monc, u32 pool, u64 snapid) { - return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, + return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, pool, snapid, 0, 0); } @@ -769,7 +769,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc) monc->monmap->mon_inst[i].name.num = cpu_to_le64(i); } monc->monmap->num_mon = num_mon; - monc->have_fsid = false; return 0; } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 42119c0..c1d756c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -52,7 +52,7 @@ static int op_has_extent(int op) op == CEPH_OSD_OP_WRITE); } -void ceph_calc_raw_layout(struct ceph_osd_client *osdc, +int ceph_calc_raw_layout(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, u64 snapid, u64 off, u64 *plen, u64 *bno, @@ -62,12 +62,15 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc, struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; u64 orig_len = *plen; u64 objoff, objlen; /* extent in object */ + int r; reqhead->snapid = cpu_to_le64(snapid); /* object extent? */ - ceph_calc_file_object_mapping(layout, off, plen, bno, - &objoff, &objlen); + r = ceph_calc_file_object_mapping(layout, off, plen, bno, + &objoff, &objlen); + if (r < 0) + return r; if (*plen < orig_len) dout(" skipping last %llu, final file extent %llu~%llu\n", orig_len - *plen, off, *plen); @@ -83,7 +86,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc, dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", *bno, objoff, objlen, req->r_num_pages); - + return 0; } EXPORT_SYMBOL(ceph_calc_raw_layout); @@ -112,20 +115,25 @@ EXPORT_SYMBOL(ceph_calc_raw_layout); * * fill osd op in request message. */ -static void calc_layout(struct ceph_osd_client *osdc, - struct ceph_vino vino, - struct ceph_file_layout *layout, - u64 off, u64 *plen, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op) +static int calc_layout(struct ceph_osd_client *osdc, + struct ceph_vino vino, + struct ceph_file_layout *layout, + u64 off, u64 *plen, + struct ceph_osd_request *req, + struct ceph_osd_req_op *op) { u64 bno; + int r; - ceph_calc_raw_layout(osdc, layout, vino.snap, off, - plen, &bno, req, op); + r = ceph_calc_raw_layout(osdc, layout, vino.snap, off, + plen, &bno, req, op); + if (r < 0) + return r; snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); req->r_oid_len = strlen(req->r_oid); + + return r; } /* @@ -213,7 +221,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, kref_init(&req->r_kref); init_completion(&req->r_completion); init_completion(&req->r_safe_completion); - rb_init_node(&req->r_node); INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_linger_item); INIT_LIST_HEAD(&req->r_linger_osd); @@ -456,6 +463,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, { struct ceph_osd_req_op ops[3]; struct ceph_osd_request *req; + int r; ops[0].op = opcode; ops[0].extent.truncate_seq = truncate_seq; @@ -474,10 +482,12 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, use_mempool, GFP_NOFS, NULL, NULL); if (!req) - return NULL; + return ERR_PTR(-ENOMEM); /* calculate max write size */ - calc_layout(osdc, vino, layout, off, plen, req, ops); + r = calc_layout(osdc, vino, layout, off, plen, req, ops); + if (r < 0) + return ERR_PTR(r); req->r_file_layout = *layout; /* keep a copy */ /* in case it differs from natural (file) alignment that @@ -1920,8 +1930,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, truncate_seq, truncate_size, NULL, false, 1, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); /* it may be a short read due to an object boundary */ req->r_pages = pages; @@ -1963,8 +1973,8 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, snapc, do_sync, truncate_seq, truncate_size, mtime, nofail, 1, page_align); - if (!req) - return -ENOMEM; + if (IS_ERR(req)) + return PTR_ERR(req); /* it may be a short write due to an object boundary */ req->r_pages = pages; diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 3124b71..5433fb0 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -984,7 +984,7 @@ bad: * for now, we write only a single su, until we can * pass a stride back to the caller. */ -void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, +int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u64 off, u64 *plen, u64 *ono, u64 *oxoff, u64 *oxlen) @@ -998,11 +998,17 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen, osize, su); + if (su == 0 || sc == 0) + goto invalid; su_per_object = osize / su; + if (su_per_object == 0) + goto invalid; dout("osize %u / su %u = su_per_object %u\n", osize, su, su_per_object); - BUG_ON((su & ~PAGE_MASK) != 0); + if ((su & ~PAGE_MASK) != 0) + goto invalid; + /* bl = *off / su; */ t = off; do_div(t, su); @@ -1030,6 +1036,14 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, *plen = *oxlen; dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); + return 0; + +invalid: + dout(" invalid layout\n"); + *ono = 0; + *oxoff = 0; + *oxlen = 0; + return -EINVAL; } EXPORT_SYMBOL(ceph_calc_file_object_mapping); diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c index 665cd23..92866be 100644 --- a/net/ceph/pagelist.c +++ b/net/ceph/pagelist.c @@ -1,4 +1,3 @@ - #include <linux/module.h> #include <linux/gfp.h> #include <linux/pagemap.h> @@ -134,8 +133,8 @@ int ceph_pagelist_truncate(struct ceph_pagelist *pl, ceph_pagelist_unmap_tail(pl); while (pl->head.prev != c->page_lru) { page = list_entry(pl->head.prev, struct page, lru); - list_del(&page->lru); /* remove from pagelist */ - list_add_tail(&page->lru, &pl->free_list); /* add to reserve */ + /* move from pagelist to reserve */ + list_move_tail(&page->lru, &pl->free_list); ++pl->num_pages_free; } pl->room = c->room; diff --git a/net/compat.c b/net/compat.c index 74ed1d7..79ae884 100644 --- a/net/compat.c +++ b/net/compat.c @@ -301,8 +301,7 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) break; } /* Bump the usage count and install the file. */ - get_file(fp[i]); - fd_install(new_fd, fp[i]); + fd_install(new_fd, get_file(fp[i])); } if (i > 0) { diff --git a/net/core/dev.c b/net/core/dev.c index 1e0a184..09cb3f6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3300,18 +3300,18 @@ ncls: && !skb_pfmemalloc_protocol(skb)) goto drop; - rx_handler = rcu_dereference(skb->dev->rx_handler); if (vlan_tx_tag_present(skb)) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - if (vlan_do_receive(&skb, !rx_handler)) + if (vlan_do_receive(&skb)) goto another_round; else if (unlikely(!skb)) goto unlock; } + rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); @@ -3331,6 +3331,9 @@ ncls: } } + if (vlan_tx_nonzero_tag_present(skb)) + skb->pkt_type = PACKET_OTHERHOST; + /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; @@ -3471,17 +3474,31 @@ out: return netif_receive_skb(skb); } -inline void napi_gro_flush(struct napi_struct *napi) +/* napi->gro_list contains packets ordered by age. + * youngest packets at the head of it. + * Complete skbs in reverse order to reduce latencies. + */ +void napi_gro_flush(struct napi_struct *napi, bool flush_old) { - struct sk_buff *skb, *next; + struct sk_buff *skb, *prev = NULL; - for (skb = napi->gro_list; skb; skb = next) { - next = skb->next; + /* scan list and build reverse chain */ + for (skb = napi->gro_list; skb != NULL; skb = skb->next) { + skb->prev = prev; + prev = skb; + } + + for (skb = prev; skb; skb = prev) { skb->next = NULL; + + if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) + return; + + prev = skb->prev; napi_gro_complete(skb); + napi->gro_count--; } - napi->gro_count = 0; napi->gro_list = NULL; } EXPORT_SYMBOL(napi_gro_flush); @@ -3542,6 +3559,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) napi->gro_count++; NAPI_GRO_CB(skb)->count = 1; + NAPI_GRO_CB(skb)->age = jiffies; skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; @@ -3631,20 +3649,22 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) } EXPORT_SYMBOL(napi_skb_finish); -void skb_gro_reset_offset(struct sk_buff *skb) +static void skb_gro_reset_offset(struct sk_buff *skb) { + const struct skb_shared_info *pinfo = skb_shinfo(skb); + const skb_frag_t *frag0 = &pinfo->frags[0]; + NAPI_GRO_CB(skb)->data_offset = 0; NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0; if (skb->mac_header == skb->tail && - !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) { - NAPI_GRO_CB(skb)->frag0 = - skb_frag_address(&skb_shinfo(skb)->frags[0]); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]); + pinfo->nr_frags && + !PageHighMem(skb_frag_page(frag0))) { + NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); + NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); } } -EXPORT_SYMBOL(skb_gro_reset_offset); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { @@ -3876,7 +3896,7 @@ void napi_complete(struct napi_struct *n) if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) return; - napi_gro_flush(n); + napi_gro_flush(n, false); local_irq_save(flags); __napi_complete(n); local_irq_restore(flags); @@ -3981,8 +4001,17 @@ static void net_rx_action(struct softirq_action *h) local_irq_enable(); napi_complete(n); local_irq_disable(); - } else + } else { + if (n->gro_list) { + /* flush too old packets + * If HZ < 1000, flush all packets. + */ + local_irq_enable(); + napi_gro_flush(n, HZ >= 1000); + local_irq_disable(); + } list_move_tail(&n->poll_list, &sd->poll_list); + } } netpoll_poll_unlock(have); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index baca771..2257148 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1301,8 +1301,6 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) if (!dst) goto discard; - __skb_pull(skb, skb_network_offset(skb)); - if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; @@ -1312,6 +1310,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) neigh_hh_init(neigh, dst); do { + __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); @@ -1342,9 +1341,8 @@ int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) unsigned int seq; int err; - __skb_pull(skb, skb_network_offset(skb)); - do { + __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 4a83fb3..79285a3 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -239,38 +239,24 @@ out_free_devname: return ret; } +static int update_netprio(const void *v, struct file *file, unsigned n) +{ + int err; + struct socket *sock = sock_from_file(file, &err); + if (sock) + sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; + return 0; +} + void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; + void *v; cgroup_taskset_for_each(p, cgrp, tset) { - unsigned int fd; - struct fdtable *fdt; - struct files_struct *files; - task_lock(p); - files = p->files; - if (!files) { - task_unlock(p); - continue; - } - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - for (fd = 0; fd < fdt->max_fds; fd++) { - struct file *file; - struct socket *sock; - int err; - - file = fcheck_files(files, fd); - if (!file) - continue; - - sock = sock_from_file(file, &err); - if (sock) - sock_update_netprioidx(sock->sk, p); - } - spin_unlock(&files->file_lock); + v = (void *)(unsigned long)task_netprioidx(p); + iterate_fd(p->files, 0, update_netprio, v); task_unlock(p); } } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 148e73d..d1dc14c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -248,8 +248,8 @@ struct pktgen_dev { int removal_mark; /* non-zero => the device is marked for * removal by worker thread */ - int min_pkt_size; /* = ETH_ZLEN; */ - int max_pkt_size; /* = ETH_ZLEN; */ + int min_pkt_size; + int max_pkt_size; int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ int nfrags; struct page *page; @@ -449,8 +449,6 @@ static void pktgen_stop_all_threads_ifs(void); static void pktgen_stop(struct pktgen_thread *t); static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); -static unsigned int scan_ip6(const char *s, char ip[16]); - /* Module parameters, defaults. */ static int pg_count_d __read_mostly = 1000; static int pg_delay_d __read_mostly; @@ -702,8 +700,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v) &pkt_dev->cur_in6_saddr, &pkt_dev->cur_in6_daddr); } else - seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n", - pkt_dev->cur_saddr, pkt_dev->cur_daddr); + seq_printf(seq, " cur_saddr: %pI4 cur_daddr: %pI4\n", + &pkt_dev->cur_saddr, &pkt_dev->cur_daddr); seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n", pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); @@ -1299,7 +1297,7 @@ static ssize_t pktgen_if_write(struct file *file, return -EFAULT; buf[len] = 0; - scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); + in6_pton(buf, -1, pkt_dev->in6_daddr.s6_addr, -1, NULL); snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr); pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr; @@ -1322,7 +1320,7 @@ static ssize_t pktgen_if_write(struct file *file, return -EFAULT; buf[len] = 0; - scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); + in6_pton(buf, -1, pkt_dev->min_in6_daddr.s6_addr, -1, NULL); snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr); pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr; @@ -1344,7 +1342,7 @@ static ssize_t pktgen_if_write(struct file *file, return -EFAULT; buf[len] = 0; - scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); + in6_pton(buf, -1, pkt_dev->max_in6_daddr.s6_addr, -1, NULL); snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->max_in6_daddr); if (debug) @@ -1365,7 +1363,7 @@ static ssize_t pktgen_if_write(struct file *file, return -EFAULT; buf[len] = 0; - scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); + in6_pton(buf, -1, pkt_dev->in6_saddr.s6_addr, -1, NULL); snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr); pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr; @@ -2036,19 +2034,17 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) /* Set up Dest MAC */ memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN); - /* Set up pkt size */ - pkt_dev->cur_pkt_size = pkt_dev->min_pkt_size; - if (pkt_dev->flags & F_IPV6) { - /* - * Skip this automatic address setting until locks or functions - * gets exported - */ - -#ifdef NOTNOW int i, set = 0, err = 1; struct inet6_dev *idev; + if (pkt_dev->min_pkt_size == 0) { + pkt_dev->min_pkt_size = 14 + sizeof(struct ipv6hdr) + + sizeof(struct udphdr) + + sizeof(struct pktgen_hdr) + + pkt_dev->pkt_overhead; + } + for (i = 0; i < IN6_ADDR_HSIZE; i++) if (pkt_dev->cur_in6_saddr.s6_addr[i]) { set = 1; @@ -2069,9 +2065,8 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); - for (ifp = idev->addr_list; ifp; - ifp = ifp->if_next) { - if (ifp->scope == IFA_LINK && + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if ((ifp->scope & IFA_LINK) && !(ifp->flags & IFA_F_TENTATIVE)) { pkt_dev->cur_in6_saddr = ifp->addr; err = 0; @@ -2084,8 +2079,14 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) if (err) pr_err("ERROR: IPv6 link address not available\n"); } -#endif } else { + if (pkt_dev->min_pkt_size == 0) { + pkt_dev->min_pkt_size = 14 + sizeof(struct iphdr) + + sizeof(struct udphdr) + + sizeof(struct pktgen_hdr) + + pkt_dev->pkt_overhead; + } + pkt_dev->saddr_min = 0; pkt_dev->saddr_max = 0; if (strlen(pkt_dev->src_min) == 0) { @@ -2111,6 +2112,10 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) pkt_dev->daddr_max = in_aton(pkt_dev->dst_max); } /* Initialize current values. */ + pkt_dev->cur_pkt_size = pkt_dev->min_pkt_size; + if (pkt_dev->min_pkt_size > pkt_dev->max_pkt_size) + pkt_dev->max_pkt_size = pkt_dev->min_pkt_size; + pkt_dev->cur_dst_mac_offset = 0; pkt_dev->cur_src_mac_offset = 0; pkt_dev->cur_saddr = pkt_dev->saddr_min; @@ -2758,97 +2763,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, return skb; } -/* - * scan_ip6, fmt_ip taken from dietlibc-0.21 - * Author Felix von Leitner <felix-dietlibc@fefe.de> - * - * Slightly modified for kernel. - * Should be candidate for net/ipv4/utils.c - * --ro - */ - -static unsigned int scan_ip6(const char *s, char ip[16]) -{ - unsigned int i; - unsigned int len = 0; - unsigned long u; - char suffix[16]; - unsigned int prefixlen = 0; - unsigned int suffixlen = 0; - __be32 tmp; - char *pos; - - for (i = 0; i < 16; i++) - ip[i] = 0; - - for (;;) { - if (*s == ':') { - len++; - if (s[1] == ':') { /* Found "::", skip to part 2 */ - s += 2; - len++; - break; - } - s++; - } - - u = simple_strtoul(s, &pos, 16); - i = pos - s; - if (!i) - return 0; - if (prefixlen == 12 && s[i] == '.') { - - /* the last 4 bytes may be written as IPv4 address */ - - tmp = in_aton(s); - memcpy((struct in_addr *)(ip + 12), &tmp, sizeof(tmp)); - return i + len; - } - ip[prefixlen++] = (u >> 8); - ip[prefixlen++] = (u & 255); - s += i; - len += i; - if (prefixlen == 16) - return len; - } - -/* part 2, after "::" */ - for (;;) { - if (*s == ':') { - if (suffixlen == 0) - break; - s++; - len++; - } else if (suffixlen != 0) - break; - - u = simple_strtol(s, &pos, 16); - i = pos - s; - if (!i) { - if (*s) - len--; - break; - } - if (suffixlen + prefixlen <= 12 && s[i] == '.') { - tmp = in_aton(s); - memcpy((struct in_addr *)(suffix + suffixlen), &tmp, - sizeof(tmp)); - suffixlen += 4; - len += strlen(s); - break; - } - suffix[suffixlen++] = (u >> 8); - suffix[suffixlen++] = (u & 255); - s += i; - len += i; - if (prefixlen + suffixlen == 16) - break; - } - for (i = 0; i < suffixlen; i++) - ip[16 - suffixlen + i] = suffix[i]; - return len; -} - static struct sk_buff *fill_packet_ipv6(struct net_device *odev, struct pktgen_dev *pkt_dev) { @@ -2927,7 +2841,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, sizeof(struct ipv6hdr) - sizeof(struct udphdr) - pkt_dev->pkt_overhead; - if (datalen < sizeof(struct pktgen_hdr)) { + if (datalen < 0 || datalen < sizeof(struct pktgen_hdr)) { datalen = sizeof(struct pktgen_hdr); net_info_ratelimited("increased datalen to %d\n", datalen); } @@ -3548,8 +3462,6 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) } pkt_dev->removal_mark = 0; - pkt_dev->min_pkt_size = ETH_ZLEN; - pkt_dev->max_pkt_size = ETH_ZLEN; pkt_dev->nfrags = 0; pkt_dev->delay = pg_delay_d; pkt_dev->count = pg_count_d; diff --git a/net/core/scm.c b/net/core/scm.c index 9c1c63d..ab57084 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -301,11 +301,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) break; } /* Bump the usage count and install the file. */ - get_file(fp[i]); sock = sock_from_file(fp[i], &err); if (sock) sock_update_netprioidx(sock->sk, current); - fd_install(new_fd, fp[i]); + fd_install(new_fd, get_file(fp[i])); } if (i > 0) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cdc2859..6e04b1f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -655,53 +655,6 @@ void consume_skb(struct sk_buff *skb) } EXPORT_SYMBOL(consume_skb); -/** - * skb_recycle - clean up an skb for reuse - * @skb: buffer - * - * Recycles the skb to be reused as a receive buffer. This - * function does any necessary reference count dropping, and - * cleans up the skbuff as if it just came from __alloc_skb(). - */ -void skb_recycle(struct sk_buff *skb) -{ - struct skb_shared_info *shinfo; - - skb_release_head_state(skb); - - shinfo = skb_shinfo(skb); - memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); - atomic_set(&shinfo->dataref, 1); - - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->data = skb->head + NET_SKB_PAD; - skb_reset_tail_pointer(skb); -} -EXPORT_SYMBOL(skb_recycle); - -/** - * skb_recycle_check - check if skb can be reused for receive - * @skb: buffer - * @skb_size: minimum receive buffer size - * - * Checks that the skb passed in is not shared or cloned, and - * that it is linear and its head portion at least as large as - * skb_size so that it can be recycled as a receive buffer. - * If these conditions are met, this function does any necessary - * reference count dropping and cleans up the skbuff as if it - * just came from __alloc_skb(). - */ -bool skb_recycle_check(struct sk_buff *skb, int skb_size) -{ - if (!skb_is_recycleable(skb, skb_size)) - return false; - - skb_recycle(skb); - - return true; -} -EXPORT_SYMBOL(skb_recycle_check); - static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { new->tstamp = old->tstamp; diff --git a/net/core/utils.c b/net/core/utils.c index f5613d5..e3487e46 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -107,6 +107,18 @@ static inline int xdigit2bin(char c, int delim) return IN6PTON_UNKNOWN; } +/** + * in4_pton - convert an IPv4 address from literal to binary representation + * @src: the start of the IPv4 address string + * @srclen: the length of the string, -1 means strlen(src) + * @dst: the binary (u8[4] array) representation of the IPv4 address + * @delim: the delimiter of the IPv4 address in @src, -1 means no delimiter + * @end: A pointer to the end of the parsed string will be placed here + * + * Return one on success, return zero when any error occurs + * and @end will point to the end of the parsed string. + * + */ int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end) @@ -161,6 +173,18 @@ out: } EXPORT_SYMBOL(in4_pton); +/** + * in6_pton - convert an IPv6 address from literal to binary representation + * @src: the start of the IPv6 address string + * @srclen: the length of the string, -1 means strlen(src) + * @dst: the binary (u8[16] array) representation of the IPv6 address + * @delim: the delimiter of the IPv6 address in @src, -1 means no delimiter + * @end: A pointer to the end of the parsed string will be placed here + * + * Return one on success, return zero when any error occurs + * and @end will point to the end of the parsed string. + * + */ int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end) diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index e65f2c8..faf7cc3 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -220,7 +220,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) dn_rt_cache_flush(-1); } -static const struct fib_rules_ops __net_initdata dn_fib_rules_ops_template = { +static const struct fib_rules_ops __net_initconst dn_fib_rules_ops_template = { .family = AF_DECnet, .rule_size = sizeof(struct dn_fib_rule), .addr_size = sizeof(u16), diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 9807945..8aa4b11 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -59,13 +59,13 @@ const struct cred *dns_resolver_cache; * "ip1,ip2,...#foo=bar" */ static int -dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen) +dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct user_key_payload *upayload; unsigned long derrno; int ret; - size_t result_len = 0; - const char *data = _data, *end, *opt; + size_t datalen = prep->datalen, result_len = 0; + const char *data = prep->data, *end, *opt; kenter("%%%d,%s,'%*.*s',%zu", key->serial, key->description, diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 68c93d1..825c608 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -322,7 +322,8 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); - if (!r && !fib_num_tclassid_users(dev_net(dev))) { + if (!r && !fib_num_tclassid_users(dev_net(dev)) && + (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { *itag = 0; return 0; } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 274309d..26aa65d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -262,7 +262,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops) rt_cache_flush(ops->fro_net); } -static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { +static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = { .family = AF_INET, .rule_size = sizeof(struct fib4_rule), .addr_size = sizeof(u32), diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2677530..71b125c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -840,6 +840,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) change_nexthops(fi) { nexthop_nh->nh_parent = fi; nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); + if (!nexthop_nh->nh_pcpu_rth_output) + goto failure; } endfor_nexthops(fi) if (cfg->fc_mx) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f0c5b9c..d34ce29 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -406,7 +406,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; return &rt->dst; @@ -442,7 +442,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; rcu_read_unlock(); return &rt->dst; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index ab09b12..694de3b 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -85,7 +85,7 @@ int ip_forward(struct sk_buff *skb) rt = skb_rtable(skb); - if (opt->is_strictroute && opt->nexthop != rt->rt_gateway) + if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 24a29a3..6537a40 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -193,7 +193,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr; + nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); @@ -371,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) skb_dst_set_noref(skb, &rt->dst); packet_routed: - if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway) + if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway) goto no_route; /* OK, we know where to send it, allocate and build IP header. */ diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 978bca4..1831092 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -374,7 +374,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(&fl4, 0, sizeof(fl4)); flowi4_init_output(&fl4, tunnel->parms.link, - htonl(tunnel->parms.i_key), RT_TOS(tos), + be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPIP, 0, dst, tiph->saddr, 0, 0); @@ -441,7 +441,7 @@ static int vti_tunnel_bind_dev(struct net_device *dev) struct flowi4 fl4; memset(&fl4, 0, sizeof(fl4)); flowi4_init_output(&fl4, tunnel->parms.link, - htonl(tunnel->parms.i_key), + be32_to_cpu(tunnel->parms.i_key), RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, IPPROTO_IPIP, 0, iph->daddr, iph->saddr, 0, 0); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1daa95c..6168c4d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -221,7 +221,7 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, return 0; } -static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = { +static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { .family = RTNL_FAMILY_IPMR, .rule_size = sizeof(struct ipmr_rule), .addr_size = sizeof(u32), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ff62206..432f4bb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -802,7 +802,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) net = dev_net(rt->dst.dev); peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1); if (!peer) { - icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, + rt_nexthop(rt, ip_hdr(skb)->daddr)); return; } @@ -827,7 +828,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) time_after(jiffies, (peer->rate_last + (ip_rt_redirect_load << peer->rate_tokens)))) { - icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); + __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr); + + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->rate_tokens; #ifdef CONFIG_IP_ROUTE_VERBOSE @@ -835,7 +838,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) peer->rate_tokens == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), - &ip_hdr(skb)->daddr, &rt->rt_gateway); + &ip_hdr(skb)->daddr, &gw); #endif } out_put_peer: @@ -904,22 +907,32 @@ out: kfree_skb(skb); return 0; } -static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) +static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { + struct dst_entry *dst = &rt->dst; struct fib_result res; + if (dst->dev->mtu < mtu) + return; + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + if (!rt->rt_pmtu) { + dst->obsolete = DST_OBSOLETE_KILL; + } else { + rt->rt_pmtu = mtu; + dst->expires = max(1UL, jiffies + ip_rt_mtu_expires); + } + rcu_read_lock(); - if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { + if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); - return mtu; } static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, @@ -929,14 +942,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); - mtu = __ip_rt_update_pmtu(rt, &fl4, mtu); - - if (!rt->rt_pmtu) { - dst->obsolete = DST_OBSOLETE_KILL; - } else { - rt->rt_pmtu = mtu; - rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires); - } + __ip_rt_update_pmtu(rt, &fl4, mtu); } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, @@ -1120,7 +1126,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { - if (rt->rt_gateway && mtu > 576) + if (rt->rt_uses_gateway && mtu > 576) mtu = 576; } @@ -1171,7 +1177,9 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, if (fnhe->fnhe_gw) { rt->rt_flags |= RTCF_REDIRECTED; rt->rt_gateway = fnhe->fnhe_gw; - } + rt->rt_uses_gateway = 1; + } else if (!rt->rt_gateway) + rt->rt_gateway = daddr; orig = rcu_dereference(fnhe->fnhe_rth); rcu_assign_pointer(fnhe->fnhe_rth, rt); @@ -1180,13 +1188,6 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, fnhe->fnhe_stamp = jiffies; ret = true; - } else { - /* Routes we intend to cache in nexthop exception have - * the DST_NOCACHE bit clear. However, if we are - * unsuccessful at storing this route into the cache - * we really need to set it. - */ - rt->dst.flags |= DST_NOCACHE; } spin_unlock_bh(&fnhe_lock); @@ -1201,8 +1202,6 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) if (rt_is_input_route(rt)) { p = (struct rtable **)&nh->nh_rth_input; } else { - if (!nh->nh_pcpu_rth_output) - goto nocache; p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output); } orig = *p; @@ -1211,16 +1210,8 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) if (prev == orig) { if (orig) rt_free(orig); - } else { - /* Routes we intend to cache in the FIB nexthop have - * the DST_NOCACHE bit clear. However, if we are - * unsuccessful at storing this route into the cache - * we really need to set it. - */ -nocache: - rt->dst.flags |= DST_NOCACHE; + } else ret = false; - } return ret; } @@ -1281,8 +1272,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, if (fi) { struct fib_nh *nh = &FIB_RES_NH(*res); - if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) + if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) { rt->rt_gateway = nh->nh_gw; + rt->rt_uses_gateway = 1; + } dst_init_metrics(&rt->dst, fi->fib_metrics, true); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; @@ -1291,8 +1284,18 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, cached = rt_bind_exception(rt, fnhe, daddr); else if (!(rt->dst.flags & DST_NOCACHE)) cached = rt_cache_route(nh, rt); - } - if (unlikely(!cached)) + if (unlikely(!cached)) { + /* Routes we intend to cache in nexthop exception or + * FIB nexthop have the DST_NOCACHE bit clear. + * However, if we are unsuccessful at storing this + * route into the cache we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; + if (!rt->rt_gateway) + rt->rt_gateway = daddr; + rt_add_uncached_list(rt); + } + } else rt_add_uncached_list(rt); #ifdef CONFIG_IP_ROUTE_CLASSID @@ -1360,6 +1363,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); if (our) { rth->dst.input= ip_local_deliver; @@ -1429,7 +1433,6 @@ static int __mkroute_input(struct sk_buff *skb, return -EINVAL; } - err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), in_dev->dev, in_dev, &itag); if (err < 0) { @@ -1439,10 +1442,13 @@ static int __mkroute_input(struct sk_buff *skb, goto cleanup; } - if (out_dev == in_dev && err && + do_cache = res->fi && !itag; + if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && (IN_DEV_SHARED_MEDIA(out_dev) || - inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) + inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) { flags |= RTCF_DOREDIRECT; + do_cache = false; + } if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is @@ -1459,15 +1465,11 @@ static int __mkroute_input(struct sk_buff *skb, } } - do_cache = false; - if (res->fi) { - if (!itag) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); - if (rt_cache_valid(rth)) { - skb_dst_set_noref(skb, &rth->dst); - goto out; - } - do_cache = true; + if (do_cache) { + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rt_cache_valid(rth)) { + skb_dst_set_noref(skb, &rth->dst); + goto out; } } @@ -1486,6 +1488,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); rth->dst.input = ip_forward; @@ -1656,6 +1659,7 @@ local_input: rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -1758,6 +1762,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, struct in_device *in_dev; u16 type = res->type; struct rtable *rth; + bool do_cache; in_dev = __in_dev_get_rcu(dev_out); if (!in_dev) @@ -1794,24 +1799,36 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } fnhe = NULL; + do_cache = fi != NULL; if (fi) { struct rtable __rcu **prth; + struct fib_nh *nh = &FIB_RES_NH(*res); - fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); + fnhe = find_exception(nh, fl4->daddr); if (fnhe) prth = &fnhe->fnhe_rth; - else - prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); + else { + if (unlikely(fl4->flowi4_flags & + FLOWI_FLAG_KNOWN_NH && + !(nh->nh_gw && + nh->nh_scope == RT_SCOPE_LINK))) { + do_cache = false; + goto add; + } + prth = __this_cpu_ptr(nh->nh_pcpu_rth_output); + } rth = rcu_dereference(*prth); if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; } } + +add: rth = rt_dst_alloc(dev_out, IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(in_dev, NOXFRM), - fi); + do_cache); if (!rth) return ERR_PTR(-ENOBUFS); @@ -1824,6 +1841,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); RT_CACHE_STAT_INC(out_slow_tot); @@ -2102,6 +2120,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_gateway = ort->rt_gateway; + rt->rt_uses_gateway = ort->rt_uses_gateway; INIT_LIST_HEAD(&rt->rt_uncached); @@ -2180,28 +2199,31 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } - if (rt->rt_gateway && + if (rt->rt_uses_gateway && nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; + expires = rt->dst.expires; + if (expires) { + unsigned long now = jiffies; + + if (time_before(now, expires)) + expires -= now; + else + expires = 0; + } + memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); - if (rt->rt_pmtu) + if (rt->rt_pmtu && expires) metrics[RTAX_MTU - 1] = rt->rt_pmtu; if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; if (fl4->flowi4_mark && - nla_put_be32(skb, RTA_MARK, fl4->flowi4_mark)) + nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; error = rt->dst.error; - expires = rt->dst.expires; - if (expires) { - if (time_before(jiffies, expires)) - expires -= jiffies; - else - expires = 0; - } if (rt_is_input_route(rt)) { if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 9205e49..63d4ecc 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -248,6 +248,8 @@ int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, ctxt = rcu_dereference(tcp_fastopen_ctx); if (ctxt) memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); + else + memset(user_key, 0, sizeof(user_key)); rcu_read_unlock(); snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 75735c9..ef998b0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -708,10 +708,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; /* When socket is gone, all binding information is lost. - * routing might fail in this case. using iif for oif to - * make sure we can deliver it + * routing might fail in this case. No choice here, if we choose to force + * input interface, we will misroute in case of asymmetric route. */ - arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); + if (sk) + arg.bound_dev_if = sk->sk_bound_dev_if; net = dev_net(skb_dst(skb)->dev); arg.tos = ip_hdr(skb)->tos; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 681ea2f..05c5ab8 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -91,6 +91,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; xdst->u.rt.rt_gateway = rt->rt_gateway; + xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d7c56f8..0424e4e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3064,14 +3064,15 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) struct hlist_node *n; hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket], addr_lst) { + if (!net_eq(dev_net(ifa->idev->dev), net)) + continue; /* sync with offset */ if (p < state->offset) { p++; continue; } state->offset++; - if (net_eq(dev_net(ifa->idev->dev), net)) - return ifa; + return ifa; } /* prepare for next bucket */ @@ -3089,18 +3090,20 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct hlist_node *n = &ifa->addr_lst; hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) { + if (!net_eq(dev_net(ifa->idev->dev), net)) + continue; state->offset++; - if (net_eq(dev_net(ifa->idev->dev), net)) - return ifa; + return ifa; } while (++state->bucket < IN6_ADDR_HSIZE) { state->offset = 0; hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket], addr_lst) { + if (!net_eq(dev_net(ifa->idev->dev), net)) + continue; state->offset++; - if (net_eq(dev_net(ifa->idev->dev), net)) - return ifa; + return ifa; } } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 4be23da..ff76eec 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -79,7 +79,7 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL -static const __net_initdata struct ip6addrlbl_init_table +static const __net_initconst struct ip6addrlbl_init_table { const struct in6_addr *prefix; int prefixlen; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e22e6d8..a974247 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -822,13 +822,6 @@ out: return segs; } -struct ipv6_gro_cb { - struct napi_gro_cb napi; - int proto; -}; - -#define IPV6_GRO_CB(skb) ((struct ipv6_gro_cb *)(skb)->cb) - static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -874,28 +867,31 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, iph = ipv6_hdr(skb); } - IPV6_GRO_CB(skb)->proto = proto; + NAPI_GRO_CB(skb)->proto = proto; flush--; nlen = skb_network_header_len(skb); for (p = *head; p; p = p->next) { - struct ipv6hdr *iph2; + const struct ipv6hdr *iph2; + __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */ if (!NAPI_GRO_CB(p)->same_flow) continue; iph2 = ipv6_hdr(p); + first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; - /* All fields must match except length. */ + /* All fields must match except length and Traffic Class. */ if (nlen != skb_network_header_len(p) || - memcmp(iph, iph2, offsetof(struct ipv6hdr, payload_len)) || + (first_word & htonl(0xF00FFFFF)) || memcmp(&iph->nexthdr, &iph2->nexthdr, nlen - offsetof(struct ipv6hdr, nexthdr))) { NAPI_GRO_CB(p)->same_flow = 0; continue; } - + /* flush if Traffic Class fields are different */ + NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); NAPI_GRO_CB(p)->flush |= flush; } @@ -927,7 +923,7 @@ static int ipv6_gro_complete(struct sk_buff *skb) sizeof(*iph)); rcu_read_lock(); - ops = rcu_dereference(inet6_protos[IPV6_GRO_CB(skb)->proto]); + ops = rcu_dereference(inet6_protos[NAPI_GRO_CB(skb)->proto]); if (WARN_ON(!ops || !ops->gro_complete)) goto out_unlock; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 0ff1cfd..d9fb911 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -238,7 +238,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) + nla_total_size(16); /* src */ } -static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = { +static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .family = AF_INET6, .rule_size = sizeof(struct fib6_rule), .addr_size = sizeof(struct in6_addr), diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 08ea3f0b..f7c7c63 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -205,7 +205,7 @@ static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, return 0; } -static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = { +static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { .family = RTNL_FAMILY_IP6MR, .rule_size = sizeof(struct ip6mr_rule), .addr_size = sizeof(struct in6_addr), diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 49c8903..26175bf 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -877,7 +877,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; - fl6.flowi6_oif = inet6_iif(skb); + if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = inet6_iif(skb); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index accfa00..a16b7b4 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -56,7 +56,6 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata) u64 tsfdelta; spin_lock_bh(&ifmsh->sync_offset_lock); - if (ifmsh->sync_offset_clockdrift_max < beacon_int_fraction) { msync_dbg(sdata, "TBTT : max clockdrift=%lld; adjusting\n", (long long) ifmsh->sync_offset_clockdrift_max); @@ -69,11 +68,11 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata) tsfdelta = -beacon_int_fraction; ifmsh->sync_offset_clockdrift_max -= beacon_int_fraction; } + spin_unlock_bh(&ifmsh->sync_offset_lock); tsf = drv_get_tsf(local, sdata); if (tsf != -1ULL) drv_set_tsf(local, sdata, tsf + tsfdelta); - spin_unlock_bh(&ifmsh->sync_offset_lock); } static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 2ce8973..3af0cc4 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -34,7 +34,7 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, skb_queue_len(&local->skb_queue_unreliable); while (tmp > IEEE80211_IRQSAFE_QUEUE_LIMIT && (skb = skb_dequeue(&local->skb_queue_unreliable))) { - dev_kfree_skb_irq(skb); + ieee80211_free_txskb(hw, skb); tmp--; I802_DEBUG_INC(local->tx_status_drop); } @@ -159,7 +159,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n", skb_queue_len(&sta->tx_filtered[ac]), !!test_sta_flag(sta, WLAN_STA_PS_STA), jiffies); - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); } static void ieee80211_check_pending_bar(struct sta_info *sta, u8 *addr, u8 tid) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e0e0d1d..c9bf83f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -354,7 +354,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) total += skb_queue_len(&sta->ps_tx_buf[ac]); if (skb) { purged++; - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); break; } } @@ -466,7 +466,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) ps_dbg(tx->sdata, "STA %pM TX buffer for AC %d full - dropping oldest frame\n", sta->sta.addr, ac); - dev_kfree_skb(old); + ieee80211_free_txskb(&local->hw, old); } else tx->local->total_ps_buffered++; @@ -1103,7 +1103,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, spin_unlock(&tx->sta->lock); if (purge_skb) - dev_kfree_skb(purge_skb); + ieee80211_free_txskb(&tx->local->hw, purge_skb); } /* reset session timer */ @@ -1214,7 +1214,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, #ifdef CONFIG_MAC80211_VERBOSE_DEBUG if (WARN_ON_ONCE(q >= local->hw.queues)) { __skb_unlink(skb, skbs); - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); continue; } #endif @@ -1356,7 +1356,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) if (unlikely(res == TX_DROP)) { I802_DEBUG_INC(tx->local->tx_handlers_drop); if (tx->skb) - dev_kfree_skb(tx->skb); + ieee80211_free_txskb(&tx->local->hw, tx->skb); else __skb_queue_purge(&tx->skbs); return -1; @@ -1393,7 +1393,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, res_prepare = ieee80211_tx_prepare(sdata, &tx, skb); if (unlikely(res_prepare == TX_DROP)) { - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); goto out; } else if (unlikely(res_prepare == TX_QUEUED)) { goto out; @@ -1465,7 +1465,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) headroom = max_t(int, 0, headroom); if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) { - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); rcu_read_unlock(); return; } @@ -2050,8 +2050,10 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, head_need += IEEE80211_ENCRYPT_HEADROOM; head_need += local->tx_headroom; head_need = max_t(int, 0, head_need); - if (ieee80211_skb_resize(sdata, skb, head_need, true)) - goto fail; + if (ieee80211_skb_resize(sdata, skb, head_need, true)) { + ieee80211_free_txskb(&local->hw, skb); + return NETDEV_TX_OK; + } } if (encaps_data) { @@ -2184,7 +2186,7 @@ void ieee80211_tx_pending(unsigned long data) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); if (WARN_ON(!info->control.vif)) { - kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); continue; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 56f6d5d..cc4c809 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -50,6 +50,7 @@ enum { * local */ IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ + IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ }; /* @@ -113,6 +114,8 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, fl4.daddr = daddr; fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; fl4.flowi4_tos = rtos; + fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? + FLOWI_FLAG_KNOWN_NH : 0; retry: rt = ip_route_output_key(net, &fl4); @@ -1061,7 +1064,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL, NULL))) + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_KNOWN_NH, NULL))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0f2e3ad..01e944a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -169,6 +169,8 @@ static void netlink_sock_destruct(struct sock *sk) if (nlk->cb) { if (nlk->cb->done) nlk->cb->done(nlk->cb); + + module_put(nlk->cb->module); netlink_destroy_callback(nlk->cb); } @@ -1758,6 +1760,7 @@ static int netlink_dump(struct sock *sk) nlk->cb = NULL; mutex_unlock(nlk->cb_mutex); + module_put(cb->module); netlink_consume_callback(cb); return 0; @@ -1767,9 +1770,9 @@ errout_skb: return err; } -int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, - const struct nlmsghdr *nlh, - struct netlink_dump_control *control) +int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct netlink_dump_control *control) { struct netlink_callback *cb; struct sock *sk; @@ -1784,6 +1787,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb->done = control->done; cb->nlh = nlh; cb->data = control->data; + cb->module = control->module; cb->min_dump_alloc = control->min_dump_alloc; atomic_inc(&skb->users); cb->skb = skb; @@ -1794,19 +1798,28 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, return -ECONNREFUSED; } nlk = nlk_sk(sk); - /* A dump is in progress... */ + mutex_lock(nlk->cb_mutex); + /* A dump is in progress... */ if (nlk->cb) { mutex_unlock(nlk->cb_mutex); netlink_destroy_callback(cb); - sock_put(sk); - return -EBUSY; + ret = -EBUSY; + goto out; } + /* add reference of module which cb->dump belongs to */ + if (!try_module_get(cb->module)) { + mutex_unlock(nlk->cb_mutex); + netlink_destroy_callback(cb); + ret = -EPROTONOSUPPORT; + goto out; + } + nlk->cb = cb; mutex_unlock(nlk->cb_mutex); ret = netlink_dump(sk); - +out: sock_put(sk); if (ret) @@ -1817,7 +1830,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, */ return -EINTR; } -EXPORT_SYMBOL(netlink_dump_start); +EXPORT_SYMBOL(__netlink_dump_start); void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) { diff --git a/net/rds/send.c b/net/rds/send.c index 96531d4..88eace5 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1122,7 +1122,7 @@ rds_send_pong(struct rds_connection *conn, __be16 dport) rds_stats_inc(s_send_pong); if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) - rds_send_xmit(conn); + queue_delayed_work(rds_wq, &conn->c_send_w, 0); rds_message_put(rm); return 0; diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 011d238..7633a75 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -26,8 +26,8 @@ #include "ar-internal.h" static int rxrpc_vet_description_s(const char *); -static int rxrpc_instantiate(struct key *, const void *, size_t); -static int rxrpc_instantiate_s(struct key *, const void *, size_t); +static int rxrpc_instantiate(struct key *, struct key_preparsed_payload *); +static int rxrpc_instantiate_s(struct key *, struct key_preparsed_payload *); static void rxrpc_destroy(struct key *); static void rxrpc_destroy_s(struct key *); static void rxrpc_describe(const struct key *, struct seq_file *); @@ -678,7 +678,7 @@ error: * * if no data is provided, then a no-security key is made */ -static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) +static int rxrpc_instantiate(struct key *key, struct key_preparsed_payload *prep) { const struct rxrpc_key_data_v1 *v1; struct rxrpc_key_token *token, **pp; @@ -686,26 +686,26 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) u32 kver; int ret; - _enter("{%x},,%zu", key_serial(key), datalen); + _enter("{%x},,%zu", key_serial(key), prep->datalen); /* handle a no-security key */ - if (!data && datalen == 0) + if (!prep->data && prep->datalen == 0) return 0; /* determine if the XDR payload format is being used */ - if (datalen > 7 * 4) { - ret = rxrpc_instantiate_xdr(key, data, datalen); + if (prep->datalen > 7 * 4) { + ret = rxrpc_instantiate_xdr(key, prep->data, prep->datalen); if (ret != -EPROTO) return ret; } /* get the key interface version number */ ret = -EINVAL; - if (datalen <= 4 || !data) + if (prep->datalen <= 4 || !prep->data) goto error; - memcpy(&kver, data, sizeof(kver)); - data += sizeof(kver); - datalen -= sizeof(kver); + memcpy(&kver, prep->data, sizeof(kver)); + prep->data += sizeof(kver); + prep->datalen -= sizeof(kver); _debug("KEY I/F VERSION: %u", kver); @@ -715,11 +715,11 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) /* deal with a version 1 key */ ret = -EINVAL; - if (datalen < sizeof(*v1)) + if (prep->datalen < sizeof(*v1)) goto error; - v1 = data; - if (datalen != sizeof(*v1) + v1->ticket_length) + v1 = prep->data; + if (prep->datalen != sizeof(*v1) + v1->ticket_length) goto error; _debug("SCIX: %u", v1->security_index); @@ -784,17 +784,17 @@ error: * instantiate a server secret key * data should be a pointer to the 8-byte secret key */ -static int rxrpc_instantiate_s(struct key *key, const void *data, - size_t datalen) +static int rxrpc_instantiate_s(struct key *key, + struct key_preparsed_payload *prep) { struct crypto_blkcipher *ci; - _enter("{%x},,%zu", key_serial(key), datalen); + _enter("{%x},,%zu", key_serial(key), prep->datalen); - if (datalen != 8) + if (prep->datalen != 8) return -EINVAL; - memcpy(&key->type_data, data, 8); + memcpy(&key->type_data, prep->data, 8); ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(ci)) { @@ -802,7 +802,7 @@ static int rxrpc_instantiate_s(struct key *key, const void *data, return PTR_ERR(ci); } - if (crypto_blkcipher_setkey(ci, data, 8) < 0) + if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0) BUG(); key->payload.data = ci; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 57f7de8..6773d78 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1642,8 +1642,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, asoc->outqueue.outstanding_bytes; sackh.num_gap_ack_blocks = 0; sackh.num_dup_tsns = 0; + chunk->subh.sack_hdr = &sackh; sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, - SCTP_SACKH(&sackh)); + SCTP_CHUNK(chunk)); break; case SCTP_CMD_DISCARD_PACKET: diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d37d24f..59d16ea 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -70,6 +70,7 @@ #include <linux/init.h> #include <linux/crypto.h> #include <linux/slab.h> +#include <linux/file.h> #include <net/ip.h> #include <net/icmp.h> @@ -4292,6 +4293,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval { sctp_peeloff_arg_t peeloff; struct socket *newsock; + struct file *newfile; int retval = 0; if (len < sizeof(sctp_peeloff_arg_t)) @@ -4305,22 +4307,35 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval goto out; /* Map the socket to an unused fd that can be returned to the user. */ - retval = sock_map_fd(newsock, 0); + retval = get_unused_fd(); if (retval < 0) { sock_release(newsock); goto out; } + newfile = sock_alloc_file(newsock, 0, NULL); + if (unlikely(IS_ERR(newfile))) { + put_unused_fd(retval); + sock_release(newsock); + return PTR_ERR(newfile); + } + SCTP_DEBUG_PRINTK("%s: sk: %p newsk: %p sd: %d\n", __func__, sk, newsock->sk, retval); /* Return the fd mapped to the new socket. */ + if (put_user(len, optlen)) { + fput(newfile); + put_unused_fd(retval); + return -EFAULT; + } peeloff.sd = retval; - if (put_user(len, optlen)) + if (copy_to_user(optval, &peeloff, len)) { + fput(newfile); + put_unused_fd(retval); return -EFAULT; - if (copy_to_user(optval, &peeloff, len)) - retval = -EFAULT; - + } + fd_install(retval, newfile); out: return retval; } diff --git a/net/socket.c b/net/socket.c index 80dc7e8..d92c490 100644 --- a/net/socket.c +++ b/net/socket.c @@ -347,17 +347,11 @@ static struct file_system_type sock_fs_type = { * but we take care of internal coherence yet. */ -static int sock_alloc_file(struct socket *sock, struct file **f, int flags, - const char *dname) +struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) { struct qstr name = { .name = "" }; struct path path; struct file *file; - int fd; - - fd = get_unused_fd_flags(flags); - if (unlikely(fd < 0)) - return fd; if (dname) { name.name = dname; @@ -367,10 +361,8 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags, name.len = strlen(name.name); } path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); - if (unlikely(!path.dentry)) { - put_unused_fd(fd); - return -ENOMEM; - } + if (unlikely(!path.dentry)) + return ERR_PTR(-ENOMEM); path.mnt = mntget(sock_mnt); d_instantiate(path.dentry, SOCK_INODE(sock)); @@ -382,30 +374,33 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags, /* drop dentry, keep inode */ ihold(path.dentry->d_inode); path_put(&path); - put_unused_fd(fd); - return -ENFILE; + return ERR_PTR(-ENFILE); } sock->file = file; file->f_flags = O_RDWR | (flags & O_NONBLOCK); file->f_pos = 0; file->private_data = sock; - - *f = file; - return fd; + return file; } +EXPORT_SYMBOL(sock_alloc_file); -int sock_map_fd(struct socket *sock, int flags) +static int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; - int fd = sock_alloc_file(sock, &newfile, flags, NULL); + int fd = get_unused_fd_flags(flags); + if (unlikely(fd < 0)) + return fd; - if (likely(fd >= 0)) + newfile = sock_alloc_file(sock, flags, NULL); + if (likely(!IS_ERR(newfile))) { fd_install(fd, newfile); + return fd; + } - return fd; + put_unused_fd(fd); + return PTR_ERR(newfile); } -EXPORT_SYMBOL(sock_map_fd); struct socket *sock_from_file(struct file *file, int *err) { @@ -1466,17 +1461,32 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, if (err < 0) goto out_release_both; - fd1 = sock_alloc_file(sock1, &newfile1, flags, NULL); + fd1 = get_unused_fd_flags(flags); if (unlikely(fd1 < 0)) { err = fd1; goto out_release_both; } - - fd2 = sock_alloc_file(sock2, &newfile2, flags, NULL); + fd2 = get_unused_fd_flags(flags); if (unlikely(fd2 < 0)) { err = fd2; + put_unused_fd(fd1); + goto out_release_both; + } + + newfile1 = sock_alloc_file(sock1, flags, NULL); + if (unlikely(IS_ERR(newfile1))) { + err = PTR_ERR(newfile1); + put_unused_fd(fd1); + put_unused_fd(fd2); + goto out_release_both; + } + + newfile2 = sock_alloc_file(sock2, flags, NULL); + if (IS_ERR(newfile2)) { + err = PTR_ERR(newfile2); fput(newfile1); put_unused_fd(fd1); + put_unused_fd(fd2); sock_release(sock2); goto out; } @@ -1608,13 +1618,19 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, */ __module_get(newsock->ops->owner); - newfd = sock_alloc_file(newsock, &newfile, flags, - sock->sk->sk_prot_creator->name); + newfd = get_unused_fd_flags(flags); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); goto out_put; } + newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name); + if (unlikely(IS_ERR(newfile))) { + err = PTR_ERR(newfile); + put_unused_fd(newfd); + sock_release(newsock); + goto out_put; + } err = security_socket_accept(sock, newsock); if (err) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 34c5220..909dc0c 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -239,7 +239,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct } return q; err: - dprintk("RPC: gss_fill_context returning %ld\n", -PTR_ERR(p)); + dprintk("RPC: %s returning %ld\n", __func__, -PTR_ERR(p)); return p; } @@ -301,10 +301,10 @@ __gss_find_upcall(struct rpc_pipe *pipe, uid_t uid) if (pos->uid != uid) continue; atomic_inc(&pos->count); - dprintk("RPC: gss_find_upcall found msg %p\n", pos); + dprintk("RPC: %s found msg %p\n", __func__, pos); return pos; } - dprintk("RPC: gss_find_upcall found nothing\n"); + dprintk("RPC: %s found nothing\n", __func__); return NULL; } @@ -507,8 +507,8 @@ gss_refresh_upcall(struct rpc_task *task) struct rpc_pipe *pipe; int err = 0; - dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, - cred->cr_uid); + dprintk("RPC: %5u %s for uid %u\n", + task->tk_pid, __func__, cred->cr_uid); gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we @@ -539,8 +539,8 @@ gss_refresh_upcall(struct rpc_task *task) spin_unlock(&pipe->lock); gss_release_msg(gss_msg); out: - dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n", - task->tk_pid, cred->cr_uid, err); + dprintk("RPC: %5u %s for uid %u result %d\n", + task->tk_pid, __func__, cred->cr_uid, err); return err; } @@ -553,7 +553,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) DEFINE_WAIT(wait); int err = 0; - dprintk("RPC: gss_upcall for uid %u\n", cred->cr_uid); + dprintk("RPC: %s for uid %u\n", __func__, cred->cr_uid); retry: gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { @@ -594,8 +594,8 @@ out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); out: - dprintk("RPC: gss_create_upcall for uid %u result %d\n", - cred->cr_uid, err); + dprintk("RPC: %s for uid %u result %d\n", + __func__, cred->cr_uid, err); return err; } @@ -681,7 +681,7 @@ err_put_ctx: err: kfree(buf); out: - dprintk("RPC: gss_pipe_downcall returning %Zd\n", err); + dprintk("RPC: %s returning %Zd\n", __func__, err); return err; } @@ -747,8 +747,8 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); if (msg->errno < 0) { - dprintk("RPC: gss_pipe_destroy_msg releasing msg %p\n", - gss_msg); + dprintk("RPC: %s releasing msg %p\n", + __func__, gss_msg); atomic_inc(&gss_msg->count); gss_unhash_msg(gss_msg); if (msg->errno == -ETIMEDOUT) @@ -976,7 +976,7 @@ gss_destroying_context(struct rpc_cred *cred) static void gss_do_free_ctx(struct gss_cl_ctx *ctx) { - dprintk("RPC: gss_free_ctx\n"); + dprintk("RPC: %s\n", __func__); gss_delete_sec_context(&ctx->gc_gss_ctx); kfree(ctx->gc_wire_ctx.data); @@ -999,7 +999,7 @@ gss_free_ctx(struct gss_cl_ctx *ctx) static void gss_free_cred(struct gss_cred *gss_cred) { - dprintk("RPC: gss_free_cred %p\n", gss_cred); + dprintk("RPC: %s cred=%p\n", __func__, gss_cred); kfree(gss_cred); } @@ -1049,8 +1049,8 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) struct gss_cred *cred = NULL; int err = -ENOMEM; - dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", - acred->uid, auth->au_flavor); + dprintk("RPC: %s for uid %d, flavor %d\n", + __func__, acred->uid, auth->au_flavor); if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS))) goto out_err; @@ -1069,7 +1069,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) return &cred->gc_base; out_err: - dprintk("RPC: gss_create_cred failed with error %d\n", err); + dprintk("RPC: %s failed with error %d\n", __func__, err); return ERR_PTR(err); } @@ -1127,7 +1127,7 @@ gss_marshal(struct rpc_task *task, __be32 *p) struct kvec iov; struct xdr_buf verf_buf; - dprintk("RPC: %5u gss_marshal\n", task->tk_pid); + dprintk("RPC: %5u %s\n", task->tk_pid, __func__); *p++ = htonl(RPC_AUTH_GSS); cred_len = p++; @@ -1253,7 +1253,7 @@ gss_validate(struct rpc_task *task, __be32 *p) u32 flav,len; u32 maj_stat; - dprintk("RPC: %5u gss_validate\n", task->tk_pid); + dprintk("RPC: %5u %s\n", task->tk_pid, __func__); flav = ntohl(*p++); if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) @@ -1271,20 +1271,20 @@ gss_validate(struct rpc_task *task, __be32 *p) if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat) { - dprintk("RPC: %5u gss_validate: gss_verify_mic returned " - "error 0x%08x\n", task->tk_pid, maj_stat); + dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n", + task->tk_pid, __func__, maj_stat); goto out_bad; } /* We leave it to unwrap to calculate au_rslack. For now we just * calculate the length of the verifier: */ cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; gss_put_ctx(ctx); - dprintk("RPC: %5u gss_validate: gss_verify_mic succeeded.\n", - task->tk_pid); + dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n", + task->tk_pid, __func__); return p + XDR_QUADLEN(len); out_bad: gss_put_ctx(ctx); - dprintk("RPC: %5u gss_validate failed.\n", task->tk_pid); + dprintk("RPC: %5u %s failed.\n", task->tk_pid, __func__); return NULL; } @@ -1466,7 +1466,7 @@ gss_wrap_req(struct rpc_task *task, struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); int status = -EIO; - dprintk("RPC: %5u gss_wrap_req\n", task->tk_pid); + dprintk("RPC: %5u %s\n", task->tk_pid, __func__); if (ctx->gc_proc != RPC_GSS_PROC_DATA) { /* The spec seems a little ambiguous here, but I think that not * wrapping context destruction requests makes the most sense. @@ -1489,7 +1489,7 @@ gss_wrap_req(struct rpc_task *task, } out: gss_put_ctx(ctx); - dprintk("RPC: %5u gss_wrap_req returning %d\n", task->tk_pid, status); + dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status); return status; } @@ -1604,8 +1604,8 @@ out_decode: status = gss_unwrap_req_decode(decode, rqstp, p, obj); out: gss_put_ctx(ctx); - dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid, - status); + dprintk("RPC: %5u %s returning %d\n", + task->tk_pid, __func__, status); return status; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index fa48c60..cdc7564b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -490,61 +490,86 @@ EXPORT_SYMBOL_GPL(rpc_create); * same transport while varying parameters such as the authentication * flavour. */ -struct rpc_clnt * -rpc_clone_client(struct rpc_clnt *clnt) +static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, + struct rpc_clnt *clnt) { - struct rpc_clnt *new; struct rpc_xprt *xprt; - int err = -ENOMEM; + struct rpc_clnt *new; + int err; - new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); - if (!new) - goto out_no_clnt; - new->cl_parent = clnt; - /* Turn off autobind on clones */ - new->cl_autobind = 0; - INIT_LIST_HEAD(&new->cl_tasks); - spin_lock_init(&new->cl_lock); - rpc_init_rtt(&new->cl_rtt_default, clnt->cl_timeout->to_initval); - new->cl_metrics = rpc_alloc_iostats(clnt); - if (new->cl_metrics == NULL) - goto out_no_stats; - if (clnt->cl_principal) { - new->cl_principal = kstrdup(clnt->cl_principal, GFP_KERNEL); - if (new->cl_principal == NULL) - goto out_no_principal; - } + err = -ENOMEM; rcu_read_lock(); xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); rcu_read_unlock(); if (xprt == NULL) - goto out_no_transport; - rcu_assign_pointer(new->cl_xprt, xprt); - atomic_set(&new->cl_count, 1); - err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); - if (err != 0) - goto out_no_path; - rpc_clnt_set_nodename(new, utsname()->nodename); - if (new->cl_auth) - atomic_inc(&new->cl_auth->au_count); + goto out_err; + args->servername = xprt->servername; + + new = rpc_new_client(args, xprt); + if (IS_ERR(new)) { + err = PTR_ERR(new); + goto out_put; + } + atomic_inc(&clnt->cl_count); - rpc_register_client(new); - rpciod_up(); + new->cl_parent = clnt; + + /* Turn off autobind on clones */ + new->cl_autobind = 0; + new->cl_softrtry = clnt->cl_softrtry; + new->cl_discrtry = clnt->cl_discrtry; + new->cl_chatty = clnt->cl_chatty; return new; -out_no_path: + +out_put: xprt_put(xprt); -out_no_transport: - kfree(new->cl_principal); -out_no_principal: - rpc_free_iostats(new->cl_metrics); -out_no_stats: - kfree(new); -out_no_clnt: +out_err: dprintk("RPC: %s: returned error %d\n", __func__, err); return ERR_PTR(err); } + +/** + * rpc_clone_client - Clone an RPC client structure + * + * @clnt: RPC client whose parameters are copied + * + * Returns a fresh RPC client or an ERR_PTR. + */ +struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt) +{ + struct rpc_create_args args = { + .program = clnt->cl_program, + .prognumber = clnt->cl_prog, + .version = clnt->cl_vers, + .authflavor = clnt->cl_auth->au_flavor, + .client_name = clnt->cl_principal, + }; + return __rpc_clone_client(&args, clnt); +} EXPORT_SYMBOL_GPL(rpc_clone_client); +/** + * rpc_clone_client_set_auth - Clone an RPC client structure and set its auth + * + * @clnt: RPC client whose parameters are copied + * @auth: security flavor for new client + * + * Returns a fresh RPC client or an ERR_PTR. + */ +struct rpc_clnt * +rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor) +{ + struct rpc_create_args args = { + .program = clnt->cl_program, + .prognumber = clnt->cl_prog, + .version = clnt->cl_vers, + .authflavor = flavor, + .client_name = clnt->cl_principal, + }; + return __rpc_clone_client(&args, clnt); +} +EXPORT_SYMBOL_GPL(rpc_clone_client_set_auth); + /* * Kill all tasks for the given client. * XXX: kill their descendants as well? diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 21fde99..80f5dd2 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1119,8 +1119,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) return -ENOMEM; - dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", net, - NET_NAME(net)); + dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", + net, NET_NAME(net)); sn->pipefs_sb = sb; err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_MOUNT, @@ -1155,8 +1155,8 @@ static void rpc_kill_sb(struct super_block *sb) sn->pipefs_sb = NULL; mutex_unlock(&sn->pipefs_sb_lock); put_net(net); - dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", net, - NET_NAME(net)); + dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", + net, NET_NAME(net)); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, sb); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 128494ec..6357fcb 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1022,7 +1022,7 @@ static int rpciod_start(void) * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 1); rpciod_workqueue = wq; return rpciod_workqueue != NULL; } diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index bac973a..194d865 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -208,6 +208,35 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); } +/* + * svc_xprt_received conditionally queues the transport for processing + * by another thread. The caller must hold the XPT_BUSY bit and must + * not thereafter touch transport data. + * + * Note: XPT_DATA only gets cleared when a read-attempt finds no (or + * insufficient) data. + */ +static void svc_xprt_received(struct svc_xprt *xprt) +{ + BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); + /* As soon as we clear busy, the xprt could be closed and + * 'put', so we need a reference to call svc_xprt_enqueue with: + */ + svc_xprt_get(xprt); + clear_bit(XPT_BUSY, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); + svc_xprt_put(xprt); +} + +void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) +{ + clear_bit(XPT_TEMP, &new->xpt_flags); + spin_lock_bh(&serv->sv_lock); + list_add(&new->xpt_list, &serv->sv_permsocks); + spin_unlock_bh(&serv->sv_lock); + svc_xprt_received(new); +} + int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, struct net *net, const int family, const unsigned short port, int flags) @@ -232,13 +261,8 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, module_put(xcl->xcl_owner); return PTR_ERR(newxprt); } - - clear_bit(XPT_TEMP, &newxprt->xpt_flags); - spin_lock_bh(&serv->sv_lock); - list_add(&newxprt->xpt_list, &serv->sv_permsocks); - spin_unlock_bh(&serv->sv_lock); + svc_add_new_perm_xprt(serv, newxprt); newport = svc_xprt_local_port(newxprt); - clear_bit(XPT_BUSY, &newxprt->xpt_flags); return newport; } err: @@ -394,27 +418,6 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) return xprt; } -/* - * svc_xprt_received conditionally queues the transport for processing - * by another thread. The caller must hold the XPT_BUSY bit and must - * not thereafter touch transport data. - * - * Note: XPT_DATA only gets cleared when a read-attempt finds no (or - * insufficient) data. - */ -void svc_xprt_received(struct svc_xprt *xprt) -{ - BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); - /* As soon as we clear busy, the xprt could be closed and - * 'put', so we need a reference to call svc_xprt_enqueue with: - */ - svc_xprt_get(xprt); - clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_enqueue(xprt); - svc_xprt_put(xprt); -} -EXPORT_SYMBOL_GPL(svc_xprt_received); - /** * svc_reserve - change the space reserved for the reply to a request. * @rqstp: The request in question @@ -565,33 +568,12 @@ static void svc_check_conn_limits(struct svc_serv *serv) } } -/* - * Receive the next request on any transport. This code is carefully - * organised not to touch any cachelines in the shared svc_serv - * structure, only cachelines in the local svc_pool. - */ -int svc_recv(struct svc_rqst *rqstp, long timeout) +int svc_alloc_arg(struct svc_rqst *rqstp) { - struct svc_xprt *xprt = NULL; - struct svc_serv *serv = rqstp->rq_server; - struct svc_pool *pool = rqstp->rq_pool; - int len, i; - int pages; - struct xdr_buf *arg; - DECLARE_WAITQUEUE(wait, current); - long time_left; - - dprintk("svc: server %p waiting for data (to = %ld)\n", - rqstp, timeout); - - if (rqstp->rq_xprt) - printk(KERN_ERR - "svc_recv: service %p, transport not NULL!\n", - rqstp); - if (waitqueue_active(&rqstp->rq_wait)) - printk(KERN_ERR - "svc_recv: service %p, wait queue active!\n", - rqstp); + struct svc_serv *serv = rqstp->rq_server; + struct xdr_buf *arg; + int pages; + int i; /* now allocate needed pages. If we get a failure, sleep briefly */ pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; @@ -621,11 +603,15 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) arg->page_len = (pages-2)*PAGE_SIZE; arg->len = (pages-1)*PAGE_SIZE; arg->tail[0].iov_len = 0; + return 0; +} - try_to_freeze(); - cond_resched(); - if (signalled() || kthread_should_stop()) - return -EINTR; +struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) +{ + struct svc_xprt *xprt; + struct svc_pool *pool = rqstp->rq_pool; + DECLARE_WAITQUEUE(wait, current); + long time_left; /* Normally we will wait up to 5 seconds for any required * cache information to be provided. @@ -663,7 +649,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (kthread_should_stop()) { set_current_state(TASK_RUNNING); spin_unlock_bh(&pool->sp_lock); - return -EINTR; + return ERR_PTR(-EINTR); } add_wait_queue(&rqstp->rq_wait, &wait); @@ -684,48 +670,58 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&pool->sp_lock); dprintk("svc: server %p, no data yet\n", rqstp); if (signalled() || kthread_should_stop()) - return -EINTR; + return ERR_PTR(-EINTR); else - return -EAGAIN; + return ERR_PTR(-EAGAIN); } } spin_unlock_bh(&pool->sp_lock); + return xprt; +} + +void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) +{ + spin_lock_bh(&serv->sv_lock); + set_bit(XPT_TEMP, &newxpt->xpt_flags); + list_add(&newxpt->xpt_list, &serv->sv_tempsocks); + serv->sv_tmpcnt++; + if (serv->sv_temptimer.function == NULL) { + /* setup timer to age temp transports */ + setup_timer(&serv->sv_temptimer, svc_age_temp_xprts, + (unsigned long)serv); + mod_timer(&serv->sv_temptimer, + jiffies + svc_conn_age_period * HZ); + } + spin_unlock_bh(&serv->sv_lock); + svc_xprt_received(newxpt); +} + +static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) +{ + struct svc_serv *serv = rqstp->rq_server; + int len = 0; - len = 0; if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { dprintk("svc_recv: found XPT_CLOSE\n"); svc_delete_xprt(xprt); /* Leave XPT_BUSY set on the dead xprt: */ - goto out; + return 0; } if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { struct svc_xprt *newxpt; + /* + * We know this module_get will succeed because the + * listener holds a reference too + */ + __module_get(xprt->xpt_class->xcl_owner); + svc_check_conn_limits(xprt->xpt_server); newxpt = xprt->xpt_ops->xpo_accept(xprt); - if (newxpt) { - /* - * We know this module_get will succeed because the - * listener holds a reference too - */ - __module_get(newxpt->xpt_class->xcl_owner); - svc_check_conn_limits(xprt->xpt_server); - spin_lock_bh(&serv->sv_lock); - set_bit(XPT_TEMP, &newxpt->xpt_flags); - list_add(&newxpt->xpt_list, &serv->sv_tempsocks); - serv->sv_tmpcnt++; - if (serv->sv_temptimer.function == NULL) { - /* setup timer to age temp transports */ - setup_timer(&serv->sv_temptimer, - svc_age_temp_xprts, - (unsigned long)serv); - mod_timer(&serv->sv_temptimer, - jiffies + svc_conn_age_period * HZ); - } - spin_unlock_bh(&serv->sv_lock); - svc_xprt_received(newxpt); - } + if (newxpt) + svc_add_new_temp_xprt(serv, newxpt); } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { + /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", - rqstp, pool->sp_id, xprt, + rqstp, rqstp->rq_pool->sp_id, xprt, atomic_read(&xprt->xpt_ref.refcount)); rqstp->rq_deferred = svc_deferred_dequeue(xprt); if (rqstp->rq_deferred) @@ -736,10 +732,51 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } + /* clear XPT_BUSY: */ svc_xprt_received(xprt); + return len; +} + +/* + * Receive the next request on any transport. This code is carefully + * organised not to touch any cachelines in the shared svc_serv + * structure, only cachelines in the local svc_pool. + */ +int svc_recv(struct svc_rqst *rqstp, long timeout) +{ + struct svc_xprt *xprt = NULL; + struct svc_serv *serv = rqstp->rq_server; + int len, err; + + dprintk("svc: server %p waiting for data (to = %ld)\n", + rqstp, timeout); + + if (rqstp->rq_xprt) + printk(KERN_ERR + "svc_recv: service %p, transport not NULL!\n", + rqstp); + if (waitqueue_active(&rqstp->rq_wait)) + printk(KERN_ERR + "svc_recv: service %p, wait queue active!\n", + rqstp); + + err = svc_alloc_arg(rqstp); + if (err) + return err; + + try_to_freeze(); + cond_resched(); + if (signalled() || kthread_should_stop()) + return -EINTR; + + xprt = svc_get_next_xprt(rqstp, timeout); + if (IS_ERR(xprt)) + return PTR_ERR(xprt); + + len = svc_handle_xprt(rqstp, xprt); /* No data, incomplete (TCP) read, or accept() */ - if (len == 0 || len == -EAGAIN) + if (len <= 0) goto out; clear_bit(XPT_OLD, &xprt->xpt_flags); @@ -917,16 +954,18 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -static void svc_close_list(struct list_head *xprt_list, struct net *net) +static void svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; + spin_lock(&serv->sv_lock); list_for_each_entry(xprt, xprt_list, xpt_list) { if (xprt->xpt_net != net) continue; set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_BUSY, &xprt->xpt_flags); } + spin_unlock(&serv->sv_lock); } static void svc_clear_pools(struct svc_serv *serv, struct net *net) @@ -949,24 +988,28 @@ static void svc_clear_pools(struct svc_serv *serv, struct net *net) } } -static void svc_clear_list(struct list_head *xprt_list, struct net *net) +static void svc_clear_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; struct svc_xprt *tmp; + LIST_HEAD(victims); + spin_lock(&serv->sv_lock); list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { if (xprt->xpt_net != net) continue; - svc_delete_xprt(xprt); + list_move(&xprt->xpt_list, &victims); } - list_for_each_entry(xprt, xprt_list, xpt_list) - BUG_ON(xprt->xpt_net == net); + spin_unlock(&serv->sv_lock); + + list_for_each_entry_safe(xprt, tmp, &victims, xpt_list) + svc_delete_xprt(xprt); } void svc_close_net(struct svc_serv *serv, struct net *net) { - svc_close_list(&serv->sv_tempsocks, net); - svc_close_list(&serv->sv_permsocks, net); + svc_close_list(serv, &serv->sv_tempsocks, net); + svc_close_list(serv, &serv->sv_permsocks, net); svc_clear_pools(serv, net); /* @@ -974,8 +1017,8 @@ void svc_close_net(struct svc_serv *serv, struct net *net) * svc_xprt_enqueue will not add new entries without taking the * sp_lock and checking XPT_BUSY. */ - svc_clear_list(&serv->sv_tempsocks, net); - svc_clear_list(&serv->sv_permsocks, net); + svc_clear_list(serv, &serv->sv_tempsocks, net); + svc_clear_list(serv, &serv->sv_permsocks, net); } /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 998aa8c..03827ce 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -59,7 +59,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, - int *errp, int flags); + int flags); static void svc_udp_data_ready(struct sock *, int); static int svc_udp_recvfrom(struct svc_rqst *); static int svc_udp_sendto(struct svc_rqst *); @@ -305,57 +305,6 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) return len; } -/** - * svc_sock_names - construct a list of listener names in a string - * @serv: pointer to RPC service - * @buf: pointer to a buffer to fill in with socket names - * @buflen: size of the buffer to be filled - * @toclose: pointer to '\0'-terminated C string containing the name - * of a listener to be closed - * - * Fills in @buf with a '\n'-separated list of names of listener - * sockets. If @toclose is not NULL, the socket named by @toclose - * is closed, and is not included in the output list. - * - * Returns positive length of the socket name string, or a negative - * errno value on error. - */ -int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, - const char *toclose) -{ - struct svc_sock *svsk, *closesk = NULL; - int len = 0; - - if (!serv) - return 0; - - spin_lock_bh(&serv->sv_lock); - list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { - int onelen = svc_one_sock_name(svsk, buf + len, buflen - len); - if (onelen < 0) { - len = onelen; - break; - } - if (toclose && strcmp(toclose, buf + len) == 0) { - closesk = svsk; - svc_xprt_get(&closesk->sk_xprt); - } else - len += onelen; - } - spin_unlock_bh(&serv->sv_lock); - - if (closesk) { - /* Should unregister with portmap, but you cannot - * unregister just one protocol... - */ - svc_close_xprt(&closesk->sk_xprt); - svc_xprt_put(&closesk->sk_xprt); - } else if (toclose) - return -ENOENT; - return len; -} -EXPORT_SYMBOL_GPL(svc_sock_names); - /* * Check input queue length */ @@ -598,11 +547,9 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) dprintk("svc: recvfrom returned error %d\n", -err); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); } - return -EAGAIN; + return 0; } len = svc_addr_len(svc_addr(rqstp)); - if (len == 0) - return -EAFNOSUPPORT; rqstp->rq_addrlen = len; if (skb->tstamp.tv64 == 0) { skb->tstamp = ktime_get_real(); @@ -620,10 +567,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) if (!svc_udp_get_dest_address(rqstp, cmh)) { net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", cmh->cmsg_level, cmh->cmsg_type); -out_free: - trace_kfree_skb(skb, svc_udp_recvfrom); - skb_free_datagram_locked(svsk->sk_sk, skb); - return 0; + goto out_free; } rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp)); @@ -662,6 +606,10 @@ out_free: serv->sv_stats->netudpcnt++; return len; +out_free: + trace_kfree_skb(skb, svc_udp_recvfrom); + skb_free_datagram_locked(svsk->sk_sk, skb); + return 0; } static int @@ -900,8 +848,9 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) */ newsock->sk->sk_sndtimeo = HZ*30; - if (!(newsvsk = svc_setup_socket(serv, newsock, &err, - (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) + newsvsk = svc_setup_socket(serv, newsock, + (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)); + if (IS_ERR(newsvsk)) goto failed; svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); err = kernel_getsockname(newsock, sin, &slen); @@ -1174,13 +1123,13 @@ error: if (len != -EAGAIN) goto err_other; dprintk("RPC: TCP recvfrom got EAGAIN\n"); - return -EAGAIN; + return 0; err_other: printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", svsk->sk_xprt.xpt_server->sv_name, -len); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); err_noclose: - return -EAGAIN; /* record not complete */ + return 0; /* record not complete */ } /* @@ -1383,29 +1332,29 @@ EXPORT_SYMBOL_GPL(svc_sock_update_bufs); */ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, struct socket *sock, - int *errp, int flags) + int flags) { struct svc_sock *svsk; struct sock *inet; int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); + int err = 0; dprintk("svc: svc_setup_socket %p\n", sock); - if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { - *errp = -ENOMEM; - return NULL; - } + svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); + if (!svsk) + return ERR_PTR(-ENOMEM); inet = sock->sk; /* Register socket with portmapper */ - if (*errp >= 0 && pmap_register) - *errp = svc_register(serv, sock_net(sock->sk), inet->sk_family, + if (pmap_register) + err = svc_register(serv, sock_net(sock->sk), inet->sk_family, inet->sk_protocol, ntohs(inet_sk(inet)->inet_sport)); - if (*errp < 0) { + if (err < 0) { kfree(svsk); - return NULL; + return ERR_PTR(err); } inet->sk_user_data = svsk; @@ -1450,42 +1399,38 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, int err = 0; struct socket *so = sockfd_lookup(fd, &err); struct svc_sock *svsk = NULL; + struct sockaddr_storage addr; + struct sockaddr *sin = (struct sockaddr *)&addr; + int salen; if (!so) return err; + err = -EAFNOSUPPORT; if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6)) - err = -EAFNOSUPPORT; - else if (so->sk->sk_protocol != IPPROTO_TCP && + goto out; + err = -EPROTONOSUPPORT; + if (so->sk->sk_protocol != IPPROTO_TCP && so->sk->sk_protocol != IPPROTO_UDP) - err = -EPROTONOSUPPORT; - else if (so->state > SS_UNCONNECTED) - err = -EISCONN; - else { - if (!try_module_get(THIS_MODULE)) - err = -ENOENT; - else - svsk = svc_setup_socket(serv, so, &err, - SVC_SOCK_DEFAULTS); - if (svsk) { - struct sockaddr_storage addr; - struct sockaddr *sin = (struct sockaddr *)&addr; - int salen; - if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) - svc_xprt_set_local(&svsk->sk_xprt, sin, salen); - clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags); - spin_lock_bh(&serv->sv_lock); - list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks); - spin_unlock_bh(&serv->sv_lock); - svc_xprt_received(&svsk->sk_xprt); - err = 0; - } else - module_put(THIS_MODULE); - } - if (err) { - sockfd_put(so); - return err; + goto out; + err = -EISCONN; + if (so->state > SS_UNCONNECTED) + goto out; + err = -ENOENT; + if (!try_module_get(THIS_MODULE)) + goto out; + svsk = svc_setup_socket(serv, so, SVC_SOCK_DEFAULTS); + if (IS_ERR(svsk)) { + module_put(THIS_MODULE); + err = PTR_ERR(svsk); + goto out; } + if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) + svc_xprt_set_local(&svsk->sk_xprt, sin, salen); + svc_add_new_perm_xprt(serv, &svsk->sk_xprt); return svc_one_sock_name(svsk, name_return, len); +out: + sockfd_put(so); + return err; } EXPORT_SYMBOL_GPL(svc_addsock); @@ -1563,11 +1508,13 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, goto bummer; } - if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { - svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); - return (struct svc_xprt *)svsk; + svsk = svc_setup_socket(serv, sock, flags); + if (IS_ERR(svsk)) { + error = PTR_ERR(svsk); + goto bummer; } - + svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); + return (struct svc_xprt *)svsk; bummer: dprintk("svc: svc_create_socket error = %d\n", -error); sock_release(sock); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 0afba1b..08f50af 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -730,19 +730,24 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len) if (xdr->nwords == 0) return 0; - if (nwords > xdr->nwords) { - nwords = xdr->nwords; - len = nwords << 2; - } /* Realign pages to current pointer position */ iov = buf->head; - if (iov->iov_len > cur) + if (iov->iov_len > cur) { xdr_shrink_bufhead(buf, iov->iov_len - cur); + xdr->nwords = XDR_QUADLEN(buf->len - cur); + } - /* Truncate page data and move it into the tail */ - if (buf->page_len > len) + if (nwords > xdr->nwords) { + nwords = xdr->nwords; + len = nwords << 2; + } + if (buf->page_len <= len) + len = buf->page_len; + else if (nwords < xdr->nwords) { + /* Truncate page data and move it into the tail */ xdr_shrink_pagelen(buf, buf->page_len - len); - xdr->nwords = XDR_QUADLEN(buf->len - cur); + xdr->nwords = XDR_QUADLEN(buf->len - cur); + } return len; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5d7f61d..bd462a5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(xprt_reserve_xprt); static void xprt_clear_locked(struct rpc_xprt *xprt) { xprt->snd_task = NULL; - if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state) || xprt->shutdown) { + if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { smp_mb__before_clear_bit(); clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); @@ -504,9 +504,6 @@ EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); */ void xprt_write_space(struct rpc_xprt *xprt) { - if (unlikely(xprt->shutdown)) - return; - spin_lock_bh(&xprt->transport_lock); if (xprt->snd_task) { dprintk("RPC: write space: waking waiting task on " @@ -679,7 +676,7 @@ xprt_init_autodisconnect(unsigned long data) struct rpc_xprt *xprt = (struct rpc_xprt *)data; spin_lock(&xprt->transport_lock); - if (!list_empty(&xprt->recv) || xprt->shutdown) + if (!list_empty(&xprt->recv)) goto out_abort; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; @@ -1262,7 +1259,6 @@ out: static void xprt_destroy(struct rpc_xprt *xprt) { dprintk("RPC: destroying transport %p\n", xprt); - xprt->shutdown = 1; del_timer_sync(&xprt->timer); rpc_destroy_wait_queue(&xprt->binding); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 73b428b..62e4f9b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -578,10 +578,6 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird) list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q); spin_unlock_bh(&listen_xprt->sc_lock); - /* - * Can't use svc_xprt_received here because we are not on a - * rqstp thread - */ set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags); svc_xprt_enqueue(&listen_xprt->sc_xprt); } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 5d9202d..c9aa7a3 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -199,21 +199,15 @@ xprt_rdma_connect_worker(struct work_struct *work) struct rpc_xprt *xprt = &r_xprt->xprt; int rc = 0; - if (!xprt->shutdown) { - current->flags |= PF_FSTRANS; - xprt_clear_connected(xprt); - - dprintk("RPC: %s: %sconnect\n", __func__, - r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); - rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); - if (rc) - goto out; - } - goto out_clear; + current->flags |= PF_FSTRANS; + xprt_clear_connected(xprt); + + dprintk("RPC: %s: %sconnect\n", __func__, + r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); + rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); + if (rc) + xprt_wake_pending_tasks(xprt, rc); -out: - xprt_wake_pending_tasks(xprt, rc); -out_clear: dprintk("RPC: %s: exit\n", __func__); xprt_clear_connecting(xprt); current->flags &= ~PF_FSTRANS; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a35b8e5..aaaadfb 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -917,9 +917,6 @@ static void xs_local_data_ready(struct sock *sk, int len) if (skb == NULL) goto out; - if (xprt->shutdown) - goto dropit; - repsize = skb->len - sizeof(rpc_fraghdr); if (repsize < 4) { dprintk("RPC: impossible RPC reply size %d\n", repsize); @@ -981,9 +978,6 @@ static void xs_udp_data_ready(struct sock *sk, int len) if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) goto out; - if (xprt->shutdown) - goto dropit; - repsize = skb->len - sizeof(struct udphdr); if (repsize < 4) { dprintk("RPC: impossible RPC reply size %d!\n", repsize); @@ -1025,6 +1019,16 @@ static void xs_udp_data_ready(struct sock *sk, int len) read_unlock_bh(&sk->sk_callback_lock); } +/* + * Helper function to force a TCP close if the server is sending + * junk and/or it has put us in CLOSE_WAIT + */ +static void xs_tcp_force_close(struct rpc_xprt *xprt) +{ + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + xprt_force_disconnect(xprt); +} + static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1051,7 +1055,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea /* Sanity check of the record length */ if (unlikely(transport->tcp_reclen < 8)) { dprintk("RPC: invalid TCP record fragment length\n"); - xprt_force_disconnect(xprt); + xs_tcp_force_close(xprt); return; } dprintk("RPC: reading TCP record fragment of length %d\n", @@ -1132,7 +1136,7 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport, break; default: dprintk("RPC: invalid request message type\n"); - xprt_force_disconnect(&transport->xprt); + xs_tcp_force_close(&transport->xprt); } xs_tcp_check_fraghdr(transport); } @@ -1402,9 +1406,6 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; - if (xprt->shutdown) - goto out; - /* Any data means we had a useful conversation, so * the we don't need to delay the next reconnect */ @@ -1455,6 +1456,8 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) static void xs_sock_mark_closed(struct rpc_xprt *xprt) { smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); smp_mb__after_clear_bit(); @@ -1512,8 +1515,8 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ - xprt_force_disconnect(xprt); xprt->connect_cookie++; + xs_tcp_force_close(xprt); case TCP_CLOSING: /* * If the server closed down the connection, make sure that @@ -1889,9 +1892,6 @@ static void xs_local_setup_socket(struct work_struct *work) struct socket *sock; int status = -EIO; - if (xprt->shutdown) - goto out; - current->flags |= PF_FSTRANS; clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); @@ -2008,9 +2008,6 @@ static void xs_udp_setup_socket(struct work_struct *work) struct socket *sock = transport->sock; int status = -EIO; - if (xprt->shutdown) - goto out; - current->flags |= PF_FSTRANS; /* Start by resetting any existing state */ @@ -2156,9 +2153,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) struct rpc_xprt *xprt = &transport->xprt; int status = -EIO; - if (xprt->shutdown) - goto out; - current->flags |= PF_FSTRANS; if (!sock) { @@ -2199,8 +2193,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) /* We're probably in TIME_WAIT. Get rid of existing socket, * and retry */ - set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); - xprt_force_disconnect(xprt); + xs_tcp_force_close(xprt); break; case -ECONNREFUSED: case -ECONNRESET: @@ -2528,6 +2521,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { static struct rpc_xprt_ops bc_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, + .alloc_slot = xprt_alloc_slot, .rpcbind = xs_local_rpcbind, .buf_alloc = bc_malloc, .buf_free = bc_free, |