diff options
Diffstat (limited to 'net')
73 files changed, 555 insertions, 566 deletions
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 6940d8f..daa749c 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -575,6 +575,8 @@ static int p9_virtio_probe(struct virtio_device *vdev) /* Ceiling limit to avoid denial of service attacks */ chan->p9_max_pages = nr_free_buffer_pages()/4; + virtio_device_ready(vdev); + mutex_lock(&virtio_9p_lock); list_add_tail(&chan->chan_list, &virtio_chan_list); mutex_unlock(&virtio_9p_lock); diff --git a/net/Kconfig b/net/Kconfig index d6b138e..6272420 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -6,6 +6,7 @@ menuconfig NET bool "Networking support" select NLATTR select GENERIC_NET_UTILS + select ANON_INODES ---help--- Unless you really know what you are doing, you should say Y here. The reason is that some programs need kernel networking support even diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 6f5e621..88a1bc3 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -44,10 +44,10 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, if (strlen(buff) > 4) { tmp_ptr = buff + strlen(buff) - 4; - if (strnicmp(tmp_ptr, "mbit", 4) == 0) + if (strncasecmp(tmp_ptr, "mbit", 4) == 0) bw_unit_type = BATADV_BW_UNIT_MBIT; - if ((strnicmp(tmp_ptr, "kbit", 4) == 0) || + if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) || (bw_unit_type == BATADV_BW_UNIT_MBIT)) *tmp_ptr = '\0'; } @@ -77,10 +77,10 @@ static bool batadv_parse_gw_bandwidth(struct net_device *net_dev, char *buff, if (strlen(slash_ptr + 1) > 4) { tmp_ptr = slash_ptr + 1 - 4 + strlen(slash_ptr + 1); - if (strnicmp(tmp_ptr, "mbit", 4) == 0) + if (strncasecmp(tmp_ptr, "mbit", 4) == 0) bw_unit_type = BATADV_BW_UNIT_MBIT; - if ((strnicmp(tmp_ptr, "kbit", 4) == 0) || + if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) || (bw_unit_type == BATADV_BW_UNIT_MBIT)) *tmp_ptr = '\0'; } diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index ba02db02..5cd44f0 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -87,13 +87,12 @@ static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN], { struct cfusbl *this = kmalloc(sizeof(struct cfusbl), GFP_ATOMIC); - if (!this) { - pr_warn("Out of memory\n"); + if (!this) return NULL; - } + caif_assert(offsetof(struct cfusbl, layer) == 0); - memset(this, 0, sizeof(struct cflayer)); + memset(&this->layer, 0, sizeof(this->layer)); this->layer.receive = cfusbl_receive; this->layer.transmit = cfusbl_transmit; this->layer.ctrlcmd = cfusbl_ctrlcmd; diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 8c5d638..510aa5a 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -47,10 +47,10 @@ static struct cflayer *get_up(struct cfmuxl *muxl, u16 id); struct cflayer *cfmuxl_create(void) { - struct cfmuxl *this = kmalloc(sizeof(struct cfmuxl), GFP_ATOMIC); + struct cfmuxl *this = kzalloc(sizeof(struct cfmuxl), GFP_ATOMIC); + if (!this) return NULL; - memset(this, 0, sizeof(*this)); this->layer.receive = cfmuxl_receive; this->layer.transmit = cfmuxl_transmit; this->layer.ctrlcmd = cfmuxl_ctrlcmd; diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig index e50cc69..f8cceb9 100644 --- a/net/ceph/Kconfig +++ b/net/ceph/Kconfig @@ -3,6 +3,7 @@ config CEPH_LIB depends on INET select LIBCRC32C select CRYPTO_AES + select CRYPTO_CBC select CRYPTO select KEYS default n diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 1675021..58fbfe1 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -293,17 +293,20 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) { key_err = PTR_ERR(ukey); switch (key_err) { case -ENOKEY: - pr_warning("ceph: Mount failed due to key not found: %s\n", name); + pr_warn("ceph: Mount failed due to key not found: %s\n", + name); break; case -EKEYEXPIRED: - pr_warning("ceph: Mount failed due to expired key: %s\n", name); + pr_warn("ceph: Mount failed due to expired key: %s\n", + name); break; case -EKEYREVOKED: - pr_warning("ceph: Mount failed due to revoked key: %s\n", name); + pr_warn("ceph: Mount failed due to revoked key: %s\n", + name); break; default: - pr_warning("ceph: Mount failed due to unknown key error" - " %d: %s\n", key_err, name); + pr_warn("ceph: Mount failed due to unknown key error %d: %s\n", + key_err, name); } err = -EPERM; goto out; @@ -433,7 +436,7 @@ ceph_parse_options(char *options, const char *dev_name, /* misc */ case Opt_osdtimeout: - pr_warning("ignoring deprecated osdtimeout option\n"); + pr_warn("ignoring deprecated osdtimeout option\n"); break; case Opt_osdkeepalivetimeout: opt->osd_keepalive_timeout = intval; diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c index 1348df9..3056020 100644 --- a/net/ceph/ceph_strings.c +++ b/net/ceph/ceph_strings.c @@ -19,77 +19,12 @@ const char *ceph_entity_type_name(int type) const char *ceph_osd_op_name(int op) { switch (op) { - case CEPH_OSD_OP_READ: return "read"; - case CEPH_OSD_OP_STAT: return "stat"; - case CEPH_OSD_OP_MAPEXT: return "mapext"; - case CEPH_OSD_OP_SPARSE_READ: return "sparse-read"; - case CEPH_OSD_OP_NOTIFY: return "notify"; - case CEPH_OSD_OP_NOTIFY_ACK: return "notify-ack"; - case CEPH_OSD_OP_ASSERT_VER: return "assert-version"; - - case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; - - case CEPH_OSD_OP_CREATE: return "create"; - case CEPH_OSD_OP_WRITE: return "write"; - case CEPH_OSD_OP_DELETE: return "delete"; - case CEPH_OSD_OP_TRUNCATE: return "truncate"; - case CEPH_OSD_OP_ZERO: return "zero"; - case CEPH_OSD_OP_WRITEFULL: return "writefull"; - case CEPH_OSD_OP_ROLLBACK: return "rollback"; - - case CEPH_OSD_OP_APPEND: return "append"; - case CEPH_OSD_OP_STARTSYNC: return "startsync"; - case CEPH_OSD_OP_SETTRUNC: return "settrunc"; - case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; - - case CEPH_OSD_OP_TMAPUP: return "tmapup"; - case CEPH_OSD_OP_TMAPGET: return "tmapget"; - case CEPH_OSD_OP_TMAPPUT: return "tmapput"; - case CEPH_OSD_OP_WATCH: return "watch"; - - case CEPH_OSD_OP_CLONERANGE: return "clonerange"; - case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version"; - case CEPH_OSD_OP_SRC_CMPXATTR: return "src-cmpxattr"; - - case CEPH_OSD_OP_GETXATTR: return "getxattr"; - case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; - case CEPH_OSD_OP_SETXATTR: return "setxattr"; - case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; - case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; - case CEPH_OSD_OP_RMXATTR: return "rmxattr"; - case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; - - case CEPH_OSD_OP_PULL: return "pull"; - case CEPH_OSD_OP_PUSH: return "push"; - case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; - case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; - case CEPH_OSD_OP_SCRUB: return "scrub"; - case CEPH_OSD_OP_SCRUB_RESERVE: return "scrub-reserve"; - case CEPH_OSD_OP_SCRUB_UNRESERVE: return "scrub-unreserve"; - case CEPH_OSD_OP_SCRUB_STOP: return "scrub-stop"; - case CEPH_OSD_OP_SCRUB_MAP: return "scrub-map"; - - case CEPH_OSD_OP_WRLOCK: return "wrlock"; - case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; - case CEPH_OSD_OP_RDLOCK: return "rdlock"; - case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; - case CEPH_OSD_OP_UPLOCK: return "uplock"; - case CEPH_OSD_OP_DNLOCK: return "dnlock"; - - case CEPH_OSD_OP_CALL: return "call"; - - case CEPH_OSD_OP_PGLS: return "pgls"; - case CEPH_OSD_OP_PGLS_FILTER: return "pgls-filter"; - case CEPH_OSD_OP_OMAPGETKEYS: return "omap-get-keys"; - case CEPH_OSD_OP_OMAPGETVALS: return "omap-get-vals"; - case CEPH_OSD_OP_OMAPGETHEADER: return "omap-get-header"; - case CEPH_OSD_OP_OMAPGETVALSBYKEYS: return "omap-get-vals-by-keys"; - case CEPH_OSD_OP_OMAPSETVALS: return "omap-set-vals"; - case CEPH_OSD_OP_OMAPSETHEADER: return "omap-set-header"; - case CEPH_OSD_OP_OMAPCLEAR: return "omap-clear"; - case CEPH_OSD_OP_OMAPRMKEYS: return "omap-rm-keys"; +#define GENERATE_CASE(op, opcode, str) case CEPH_OSD_OP_##op: return (str); +__CEPH_FORALL_OSD_OPS(GENERATE_CASE) +#undef GENERATE_CASE + default: + return "???"; } - return "???"; } const char *ceph_osd_state_name(int s) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index ffeba8f..62fc5e7 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -476,7 +476,6 @@ struct key_type key_type_ceph = { .preparse = ceph_key_preparse, .free_preparse = ceph_key_free_preparse, .instantiate = generic_key_instantiate, - .match = user_match, .destroy = ceph_key_destroy, }; diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index d1a62c6..d2d5255 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -169,7 +169,8 @@ static int osdc_show(struct seq_file *s, void *pp) for (i = 0; i < req->r_num_ops; i++) { opcode = req->r_ops[i].op; - seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); + seq_printf(s, "%s%s", (i == 0 ? "\t" : ","), + ceph_osd_op_name(opcode)); } seq_printf(s, "\n"); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b2f571d..559c9f6 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -292,7 +292,11 @@ int ceph_msgr_init(void) if (ceph_msgr_slab_init()) return -ENOMEM; - ceph_msgr_wq = alloc_workqueue("ceph-msgr", 0, 0); + /* + * The number of active work items is limited by the number of + * connections, so leave @max_active at default. + */ + ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_MEM_RECLAIM, 0); if (ceph_msgr_wq) return 0; @@ -1937,11 +1941,11 @@ static int process_banner(struct ceph_connection *con) sizeof(con->peer_addr)) != 0 && !(addr_is_blank(&con->actual_peer_addr.in_addr) && con->actual_peer_addr.nonce == con->peer_addr.nonce)) { - pr_warning("wrong peer, want %s/%d, got %s/%d\n", - ceph_pr_addr(&con->peer_addr.in_addr), - (int)le32_to_cpu(con->peer_addr.nonce), - ceph_pr_addr(&con->actual_peer_addr.in_addr), - (int)le32_to_cpu(con->actual_peer_addr.nonce)); + pr_warn("wrong peer, want %s/%d, got %s/%d\n", + ceph_pr_addr(&con->peer_addr.in_addr), + (int)le32_to_cpu(con->peer_addr.nonce), + ceph_pr_addr(&con->actual_peer_addr.in_addr), + (int)le32_to_cpu(con->actual_peer_addr.nonce)); con->error_msg = "wrong peer at address"; return -1; } @@ -2302,7 +2306,7 @@ static int read_partial_message(struct ceph_connection *con) BUG_ON(!con->in_msg ^ skip); if (con->in_msg && data_len > con->in_msg->data_length) { - pr_warning("%s skipping long message (%u > %zd)\n", + pr_warn("%s skipping long message (%u > %zd)\n", __func__, data_len, con->in_msg->data_length); ceph_msg_put(con->in_msg); con->in_msg = NULL; @@ -2712,7 +2716,7 @@ static bool con_sock_closed(struct ceph_connection *con) CASE(OPEN); CASE(STANDBY); default: - pr_warning("%s con %p unrecognized state %lu\n", + pr_warn("%s con %p unrecognized state %lu\n", __func__, con, con->state); con->error_msg = "unrecognized con state"; BUG(); @@ -2828,8 +2832,8 @@ static void con_work(struct work_struct *work) */ static void con_fault(struct ceph_connection *con) { - pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), - ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); + pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), + ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); dout("fault %p state %lu to peer %s\n", con, con->state, ceph_pr_addr(&con->peer_addr.in_addr)); @@ -3071,10 +3075,8 @@ static void ceph_msg_data_destroy(struct ceph_msg_data *data) return; WARN_ON(!list_empty(&data->links)); - if (data->type == CEPH_MSG_DATA_PAGELIST) { + if (data->type == CEPH_MSG_DATA_PAGELIST) ceph_pagelist_release(data->pagelist); - kfree(data->pagelist); - } kmem_cache_free(ceph_msg_data_cache, data); } diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 61fcfc3..a83062c 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1182,10 +1182,10 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, pr_info("alloc_msg unknown type %d\n", type); *skip = 1; } else if (front_len > m->front_alloc_len) { - pr_warning("mon_alloc_msg front %d > prealloc %d (%u#%llu)\n", - front_len, m->front_alloc_len, - (unsigned int)con->peer_name.type, - le64_to_cpu(con->peer_name.num)); + pr_warn("mon_alloc_msg front %d > prealloc %d (%u#%llu)\n", + front_len, m->front_alloc_len, + (unsigned int)con->peer_name.type, + le64_to_cpu(con->peer_name.num)); ceph_msg_put(m); m = ceph_msg_new(type, front_len, GFP_NOFS, false); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 30f6faf..f3fc54e 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -30,8 +30,11 @@ static void __send_queued(struct ceph_osd_client *osdc); static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd); static void __register_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); +static void __unregister_request(struct ceph_osd_client *osdc, + struct ceph_osd_request *req); static void __unregister_linger_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); +static void __enqueue_request(struct ceph_osd_request *req); static void __send_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); @@ -428,68 +431,9 @@ EXPORT_SYMBOL(ceph_osdc_alloc_request); static bool osd_req_opcode_valid(u16 opcode) { switch (opcode) { - case CEPH_OSD_OP_READ: - case CEPH_OSD_OP_STAT: - case CEPH_OSD_OP_MAPEXT: - case CEPH_OSD_OP_MASKTRUNC: - case CEPH_OSD_OP_SPARSE_READ: - case CEPH_OSD_OP_NOTIFY: - case CEPH_OSD_OP_NOTIFY_ACK: - case CEPH_OSD_OP_ASSERT_VER: - case CEPH_OSD_OP_WRITE: - case CEPH_OSD_OP_WRITEFULL: - case CEPH_OSD_OP_TRUNCATE: - case CEPH_OSD_OP_ZERO: - case CEPH_OSD_OP_DELETE: - case CEPH_OSD_OP_APPEND: - case CEPH_OSD_OP_STARTSYNC: - case CEPH_OSD_OP_SETTRUNC: - case CEPH_OSD_OP_TRIMTRUNC: - case CEPH_OSD_OP_TMAPUP: - case CEPH_OSD_OP_TMAPPUT: - case CEPH_OSD_OP_TMAPGET: - case CEPH_OSD_OP_CREATE: - case CEPH_OSD_OP_ROLLBACK: - case CEPH_OSD_OP_WATCH: - case CEPH_OSD_OP_OMAPGETKEYS: - case CEPH_OSD_OP_OMAPGETVALS: - case CEPH_OSD_OP_OMAPGETHEADER: - case CEPH_OSD_OP_OMAPGETVALSBYKEYS: - case CEPH_OSD_OP_OMAPSETVALS: - case CEPH_OSD_OP_OMAPSETHEADER: - case CEPH_OSD_OP_OMAPCLEAR: - case CEPH_OSD_OP_OMAPRMKEYS: - case CEPH_OSD_OP_OMAP_CMP: - case CEPH_OSD_OP_SETALLOCHINT: - case CEPH_OSD_OP_CLONERANGE: - case CEPH_OSD_OP_ASSERT_SRC_VERSION: - case CEPH_OSD_OP_SRC_CMPXATTR: - case CEPH_OSD_OP_GETXATTR: - case CEPH_OSD_OP_GETXATTRS: - case CEPH_OSD_OP_CMPXATTR: - case CEPH_OSD_OP_SETXATTR: - case CEPH_OSD_OP_SETXATTRS: - case CEPH_OSD_OP_RESETXATTRS: - case CEPH_OSD_OP_RMXATTR: - case CEPH_OSD_OP_PULL: - case CEPH_OSD_OP_PUSH: - case CEPH_OSD_OP_BALANCEREADS: - case CEPH_OSD_OP_UNBALANCEREADS: - case CEPH_OSD_OP_SCRUB: - case CEPH_OSD_OP_SCRUB_RESERVE: - case CEPH_OSD_OP_SCRUB_UNRESERVE: - case CEPH_OSD_OP_SCRUB_STOP: - case CEPH_OSD_OP_SCRUB_MAP: - case CEPH_OSD_OP_WRLOCK: - case CEPH_OSD_OP_WRUNLOCK: - case CEPH_OSD_OP_RDLOCK: - case CEPH_OSD_OP_RDUNLOCK: - case CEPH_OSD_OP_UPLOCK: - case CEPH_OSD_OP_DNLOCK: - case CEPH_OSD_OP_CALL: - case CEPH_OSD_OP_PGLS: - case CEPH_OSD_OP_PGLS_FILTER: - return true; +#define GENERATE_CASE(op, opcode, str) case CEPH_OSD_OP_##op: return true; +__CEPH_FORALL_OSD_OPS(GENERATE_CASE) +#undef GENERATE_CASE default: return false; } @@ -892,6 +836,37 @@ __lookup_request_ge(struct ceph_osd_client *osdc, return NULL; } +static void __kick_linger_request(struct ceph_osd_request *req) +{ + struct ceph_osd_client *osdc = req->r_osdc; + struct ceph_osd *osd = req->r_osd; + + /* + * Linger requests need to be resent with a new tid to avoid + * the dup op detection logic on the OSDs. Achieve this with + * a re-register dance instead of open-coding. + */ + ceph_osdc_get_request(req); + if (!list_empty(&req->r_linger_item)) + __unregister_linger_request(osdc, req); + else + __unregister_request(osdc, req); + __register_request(osdc, req); + ceph_osdc_put_request(req); + + /* + * Unless request has been registered as both normal and + * lingering, __unregister{,_linger}_request clears r_osd. + * However, here we need to preserve r_osd to make sure we + * requeue on the same OSD. + */ + WARN_ON(req->r_osd || !osd); + req->r_osd = osd; + + dout("%s requeueing %p tid %llu\n", __func__, req, req->r_tid); + __enqueue_request(req); +} + /* * Resubmit requests pending on the given osd. */ @@ -900,12 +875,14 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, { struct ceph_osd_request *req, *nreq; LIST_HEAD(resend); + LIST_HEAD(resend_linger); int err; - dout("__kick_osd_requests osd%d\n", osd->o_osd); + dout("%s osd%d\n", __func__, osd->o_osd); err = __reset_osd(osdc, osd); if (err) return; + /* * Build up a list of requests to resend by traversing the * osd's list of requests. Requests for a given object are @@ -926,33 +903,32 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc, list_for_each_entry(req, &osd->o_requests, r_osd_item) { if (!req->r_sent) break; - list_move_tail(&req->r_req_lru_item, &resend); - dout("requeueing %p tid %llu osd%d\n", req, req->r_tid, - osd->o_osd); - if (!req->r_linger) + + if (!req->r_linger) { + dout("%s requeueing %p tid %llu\n", __func__, req, + req->r_tid); + list_move_tail(&req->r_req_lru_item, &resend); req->r_flags |= CEPH_OSD_FLAG_RETRY; + } else { + list_move_tail(&req->r_req_lru_item, &resend_linger); + } } list_splice(&resend, &osdc->req_unsent); /* - * Linger requests are re-registered before sending, which - * sets up a new tid for each. We add them to the unsent - * list at the end to keep things in tid order. + * Both registered and not yet registered linger requests are + * enqueued with a new tid on the same OSD. We add/move them + * to req_unsent/o_requests at the end to keep things in tid + * order. */ list_for_each_entry_safe(req, nreq, &osd->o_linger_requests, r_linger_osd_item) { - /* - * reregister request prior to unregistering linger so - * that r_osd is preserved. - */ - BUG_ON(!list_empty(&req->r_req_lru_item)); - __register_request(osdc, req); - list_add_tail(&req->r_req_lru_item, &osdc->req_unsent); - list_add_tail(&req->r_osd_item, &req->r_osd->o_requests); - __unregister_linger_request(osdc, req); - dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid, - osd->o_osd); + WARN_ON(!list_empty(&req->r_req_lru_item)); + __kick_linger_request(req); } + + list_for_each_entry_safe(req, nreq, &resend_linger, r_req_lru_item) + __kick_linger_request(req); } /* @@ -1346,6 +1322,22 @@ static int __calc_request_pg(struct ceph_osdmap *osdmap, &req->r_target_oid, pg_out); } +static void __enqueue_request(struct ceph_osd_request *req) +{ + struct ceph_osd_client *osdc = req->r_osdc; + + dout("%s %p tid %llu to osd%d\n", __func__, req, req->r_tid, + req->r_osd ? req->r_osd->o_osd : -1); + + if (req->r_osd) { + __remove_osd_from_lru(req->r_osd); + list_add_tail(&req->r_osd_item, &req->r_osd->o_requests); + list_move_tail(&req->r_req_lru_item, &osdc->req_unsent); + } else { + list_move_tail(&req->r_req_lru_item, &osdc->req_notarget); + } +} + /* * Pick an osd (the first 'up' osd in the pg), allocate the osd struct * (as needed), and set the request r_osd appropriately. If there is @@ -1423,13 +1415,7 @@ static int __map_request(struct ceph_osd_client *osdc, &osdc->osdmap->osd_addr[o]); } - if (req->r_osd) { - __remove_osd_from_lru(req->r_osd); - list_add_tail(&req->r_osd_item, &req->r_osd->o_requests); - list_move_tail(&req->r_req_lru_item, &osdc->req_unsent); - } else { - list_move_tail(&req->r_req_lru_item, &osdc->req_notarget); - } + __enqueue_request(req); err = 1; /* osd or pg changed */ out: @@ -1774,8 +1760,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, } bytes = le32_to_cpu(msg->hdr.data_len); if (payload_len != bytes) { - pr_warning("sum of op payload lens %d != data_len %d", - payload_len, bytes); + pr_warn("sum of op payload lens %d != data_len %d\n", + payload_len, bytes); goto bad_put; } @@ -2313,24 +2299,19 @@ static void handle_watch_notify(struct ceph_osd_client *osdc, if (event) { event_work = kmalloc(sizeof(*event_work), GFP_NOIO); if (!event_work) { - dout("ERROR: could not allocate event_work\n"); - goto done_err; + pr_err("couldn't allocate event_work\n"); + ceph_osdc_put_event(event); + return; } INIT_WORK(&event_work->work, do_event_work); event_work->event = event; event_work->ver = ver; event_work->notify_id = notify_id; event_work->opcode = opcode; - if (!queue_work(osdc->notify_wq, &event_work->work)) { - dout("WARNING: failed to queue notify event work\n"); - goto done_err; - } - } - return; + queue_work(osdc->notify_wq, &event_work->work); + } -done_err: - ceph_osdc_put_event(event); return; bad: @@ -2797,10 +2778,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, ceph_msg_revoke_incoming(req->r_reply); if (front_len > req->r_reply->front_alloc_len) { - pr_warning("get_reply front %d > preallocated %d (%u#%llu)\n", - front_len, req->r_reply->front_alloc_len, - (unsigned int)con->peer_name.type, - le64_to_cpu(con->peer_name.num)); + pr_warn("get_reply front %d > preallocated %d (%u#%llu)\n", + front_len, req->r_reply->front_alloc_len, + (unsigned int)con->peer_name.type, + le64_to_cpu(con->peer_name.num)); m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front_len, GFP_NOFS, false); if (!m) @@ -2823,8 +2804,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, if (osd_data->pages && unlikely(osd_data->length < data_len)) { - pr_warning("tid %lld reply has %d bytes " - "we had only %llu bytes ready\n", + pr_warn("tid %lld reply has %d bytes we had only %llu bytes ready\n", tid, data_len, osd_data->length); *skip = 1; ceph_msg_put(m); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index c547e46..b8c3fde 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -521,11 +521,11 @@ static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) ev = ceph_decode_8(p); /* encoding version */ cv = ceph_decode_8(p); /* compat version */ if (ev < 5) { - pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); + pr_warn("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); return -EINVAL; } if (cv > 9) { - pr_warning("got v %d cv %d > 9 of ceph_pg_pool\n", ev, cv); + pr_warn("got v %d cv %d > 9 of ceph_pg_pool\n", ev, cv); return -EINVAL; } len = ceph_decode_32(p); @@ -671,26 +671,26 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) int i; state = krealloc(map->osd_state, max*sizeof(*state), GFP_NOFS); + if (!state) + return -ENOMEM; + map->osd_state = state; + weight = krealloc(map->osd_weight, max*sizeof(*weight), GFP_NOFS); - addr = krealloc(map->osd_addr, max*sizeof(*addr), GFP_NOFS); - if (!state || !weight || !addr) { - kfree(state); - kfree(weight); - kfree(addr); + if (!weight) + return -ENOMEM; + map->osd_weight = weight; + addr = krealloc(map->osd_addr, max*sizeof(*addr), GFP_NOFS); + if (!addr) return -ENOMEM; - } + map->osd_addr = addr; for (i = map->max_osd; i < max; i++) { - state[i] = 0; - weight[i] = CEPH_OSD_OUT; - memset(addr + i, 0, sizeof(*addr)); + map->osd_state[i] = 0; + map->osd_weight[i] = CEPH_OSD_OUT; + memset(map->osd_addr + i, 0, sizeof(*map->osd_addr)); } - map->osd_state = state; - map->osd_weight = weight; - map->osd_addr = addr; - if (map->osd_primary_affinity) { u32 *affinity; @@ -698,11 +698,11 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) max*sizeof(*affinity), GFP_NOFS); if (!affinity) return -ENOMEM; + map->osd_primary_affinity = affinity; for (i = map->max_osd; i < max; i++) - affinity[i] = CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; - - map->osd_primary_affinity = affinity; + map->osd_primary_affinity[i] = + CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; } map->max_osd = max; @@ -729,9 +729,9 @@ static int get_osdmap_client_data_v(void **p, void *end, ceph_decode_8_safe(p, end, struct_compat, e_inval); if (struct_compat > OSDMAP_WRAPPER_COMPAT_VER) { - pr_warning("got v %d cv %d > %d of %s ceph_osdmap\n", - struct_v, struct_compat, - OSDMAP_WRAPPER_COMPAT_VER, prefix); + pr_warn("got v %d cv %d > %d of %s ceph_osdmap\n", + struct_v, struct_compat, + OSDMAP_WRAPPER_COMPAT_VER, prefix); return -EINVAL; } *p += 4; /* ignore wrapper struct_len */ @@ -739,9 +739,9 @@ static int get_osdmap_client_data_v(void **p, void *end, ceph_decode_8_safe(p, end, struct_v, e_inval); ceph_decode_8_safe(p, end, struct_compat, e_inval); if (struct_compat > OSDMAP_CLIENT_DATA_COMPAT_VER) { - pr_warning("got v %d cv %d > %d of %s ceph_osdmap client data\n", - struct_v, struct_compat, - OSDMAP_CLIENT_DATA_COMPAT_VER, prefix); + pr_warn("got v %d cv %d > %d of %s ceph_osdmap client data\n", + struct_v, struct_compat, + OSDMAP_CLIENT_DATA_COMPAT_VER, prefix); return -EINVAL; } *p += 4; /* ignore client data struct_len */ @@ -751,8 +751,8 @@ static int get_osdmap_client_data_v(void **p, void *end, *p -= 1; ceph_decode_16_safe(p, end, version, e_inval); if (version < 6) { - pr_warning("got v %d < 6 of %s ceph_osdmap\n", version, - prefix); + pr_warn("got v %d < 6 of %s ceph_osdmap\n", + version, prefix); return -EINVAL; } diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c index 92866be..c7c220a 100644 --- a/net/ceph/pagelist.c +++ b/net/ceph/pagelist.c @@ -1,5 +1,6 @@ #include <linux/module.h> #include <linux/gfp.h> +#include <linux/slab.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/ceph/pagelist.h> @@ -13,8 +14,10 @@ static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl) } } -int ceph_pagelist_release(struct ceph_pagelist *pl) +void ceph_pagelist_release(struct ceph_pagelist *pl) { + if (!atomic_dec_and_test(&pl->refcnt)) + return; ceph_pagelist_unmap_tail(pl); while (!list_empty(&pl->head)) { struct page *page = list_first_entry(&pl->head, struct page, @@ -23,7 +26,7 @@ int ceph_pagelist_release(struct ceph_pagelist *pl) __free_page(page); } ceph_pagelist_free_reserve(pl); - return 0; + kfree(pl); } EXPORT_SYMBOL(ceph_pagelist_release); diff --git a/net/core/dev.c b/net/core/dev.c index 4699dcf..b793e35 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2153,7 +2153,7 @@ static inline void __netif_reschedule(struct Qdisc *q) unsigned long flags; local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); + sd = this_cpu_ptr(&softnet_data); q->next_sched = NULL; *sd->output_queue_tailp = q; sd->output_queue_tailp = &q->next_sched; @@ -2675,7 +2675,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (skb->encapsulation) features &= dev->hw_enc_features; - if (netif_needs_gso(skb, features)) { + if (netif_needs_gso(dev, skb, features)) { struct sk_buff *segs; segs = skb_gso_segment(skb, features); @@ -3233,7 +3233,7 @@ static void rps_trigger_softirq(void *data) static int rps_ipi_queued(struct softnet_data *sd) { #ifdef CONFIG_RPS - struct softnet_data *mysd = &__get_cpu_var(softnet_data); + struct softnet_data *mysd = this_cpu_ptr(&softnet_data); if (sd != mysd) { sd->rps_ipi_next = mysd->rps_ipi_list; @@ -3260,7 +3260,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) if (qlen < (netdev_max_backlog >> 1)) return false; - sd = &__get_cpu_var(softnet_data); + sd = this_cpu_ptr(&softnet_data); rcu_read_lock(); fl = rcu_dereference(sd->flow_limit); @@ -3407,7 +3407,7 @@ EXPORT_SYMBOL(netif_rx_ni); static void net_tx_action(struct softirq_action *h) { - struct softnet_data *sd = &__get_cpu_var(softnet_data); + struct softnet_data *sd = this_cpu_ptr(&softnet_data); if (sd->completion_queue) { struct sk_buff *clist; @@ -3832,7 +3832,7 @@ EXPORT_SYMBOL(netif_receive_skb); static void flush_backlog(void *arg) { struct net_device *dev = arg; - struct softnet_data *sd = &__get_cpu_var(softnet_data); + struct softnet_data *sd = this_cpu_ptr(&softnet_data); struct sk_buff *skb, *tmp; rps_lock(sd); @@ -4379,7 +4379,7 @@ void __napi_schedule(struct napi_struct *n) unsigned long flags; local_irq_save(flags); - ____napi_schedule(&__get_cpu_var(softnet_data), n); + ____napi_schedule(this_cpu_ptr(&softnet_data), n); local_irq_restore(flags); } EXPORT_SYMBOL(__napi_schedule); @@ -4500,7 +4500,7 @@ EXPORT_SYMBOL(netif_napi_del); static void net_rx_action(struct softirq_action *h) { - struct softnet_data *sd = &__get_cpu_var(softnet_data); + struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + 2; int budget = netdev_budget; void *have; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 50f9a9db..252e155 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -146,7 +146,7 @@ static void trace_drop_common(struct sk_buff *skb, void *location) unsigned long flags; local_irq_save(flags); - data = &__get_cpu_var(dm_cpu_data); + data = this_cpu_ptr(&dm_cpu_data); spin_lock(&data->lock); dskb = data->skb; diff --git a/net/core/filter.c b/net/core/filter.c index fcd3f67..647b122 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -51,9 +51,9 @@ * @skb: buffer to filter * * Run the filter code and then cut skb->data to correct size returned by - * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller + * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller * than pkt_len we keep whole skb->data. This is the socket level - * wrapper to sk_run_filter. It returns 0 if the packet should + * wrapper to SK_RUN_FILTER. It returns 0 if the packet should * be accepted or -EPERM if the packet should be tossed. * */ @@ -566,11 +566,8 @@ err: /* Security: * - * A BPF program is able to use 16 cells of memory to store intermediate - * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()). - * * As we dont want to clear mem[] array for each packet going through - * sk_run_filter(), we check that filter loaded by user never try to read + * __bpf_prog_run(), we check that filter loaded by user never try to read * a cell if not previously written, and we check all branches to be sure * a malicious user doesn't try to abuse us. */ diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 8560dea..4508493 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -100,6 +100,13 @@ ip: if (ip_is_fragment(iph)) ip_proto = 0; + /* skip the address processing if skb is NULL. The assumption + * here is that if there is no skb we are not looking for flow + * info but lengths and protocols. + */ + if (!skb) + break; + iph_to_flow_copy_addrs(flow, iph); break; } @@ -114,17 +121,15 @@ ipv6: return false; ip_proto = iph->nexthdr; - flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); - flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); - /* skip the flow label processing if skb is NULL. The - * assumption here is that if there is no skb we are not - * looking for flow info as much as we are length. - */ + /* see comment above in IPv4 section */ if (!skb) break; + flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); + flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + flow_label = ip6_flowlabel(iph); if (flow_label) { /* Awesome, IPv6 packet has a flow label so we can @@ -231,9 +236,13 @@ ipv6: flow->n_proto = proto; flow->ip_proto = ip_proto; - flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); flow->thoff = (u16) nhoff; + /* unless skb is set we don't need to record port info */ + if (skb) + flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, + data, hlen); + return true; } EXPORT_SYMBOL(__skb_flow_dissect); @@ -334,15 +343,16 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data, switch (keys->ip_proto) { case IPPROTO_TCP: { - const struct tcphdr *tcph; - struct tcphdr _tcph; + /* access doff as u8 to avoid unaligned access */ + const u8 *doff; + u8 _doff; - tcph = __skb_header_pointer(skb, poff, sizeof(_tcph), - data, hlen, &_tcph); - if (!tcph) + doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff), + data, hlen, &_doff); + if (!doff) return poff; - poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4); + poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2); break; } case IPPROTO_UDP: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7b3df0d..61059a0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -345,7 +345,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) unsigned long flags; local_irq_save(flags); - nc = &__get_cpu_var(netdev_alloc_cache); + nc = this_cpu_ptr(&netdev_alloc_cache); if (unlikely(!nc->frag.page)) { refill: for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) { @@ -360,18 +360,29 @@ refill: goto end; } nc->frag.size = PAGE_SIZE << order; -recycle: - atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS); + /* Even if we own the page, we do not use atomic_set(). + * This would break get_page_unless_zero() users. + */ + atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1, + &nc->frag.page->_count); nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; nc->frag.offset = 0; } if (nc->frag.offset + fragsz > nc->frag.size) { - /* avoid unnecessary locked operations if possible */ - if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) || - atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count)) - goto recycle; - goto refill; + if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) { + if (!atomic_sub_and_test(nc->pagecnt_bias, + &nc->frag.page->_count)) + goto refill; + /* OK, page count is 0, we can safely set it */ + atomic_set(&nc->frag.page->_count, + NETDEV_PAGECNT_MAX_BIAS); + } else { + atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias, + &nc->frag.page->_count); + } + nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; + nc->frag.offset = 0; } data = page_address(nc->frag.page) + nc->frag.offset; @@ -4126,11 +4137,11 @@ EXPORT_SYMBOL(skb_vlan_untag); /** * alloc_skb_with_frags - allocate skb with page frags * - * header_len: size of linear part - * data_len: needed length in frags - * max_page_order: max page order desired. - * errcode: pointer to error code if any - * gfp_mask: allocation mask + * @header_len: size of linear part + * @data_len: needed length in frags + * @max_page_order: max page order desired. + * @errcode: pointer to error code if any + * @gfp_mask: allocation mask * * This can be used to allocate a paged skb, given a maximal order for frags. */ diff --git a/net/core/sock.c b/net/core/sock.c index b4f3ea2..15e0c67 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1718,6 +1718,8 @@ EXPORT_SYMBOL(sock_kmalloc); */ void sock_kfree_s(struct sock *sk, void *mem, int size) { + if (WARN_ON_ONCE(!mem)) + return; kfree(mem); atomic_sub(size, &sk->sk_omem_alloc); } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index ad2acfe..6bcaa33 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -757,7 +757,8 @@ static int dccp_v6_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ sk = __inet6_lookup_skb(&dccp_hashinfo, skb, - dh->dccph_sport, dh->dccph_dport); + dh->dccph_sport, dh->dccph_dport, + inet6_iif(skb)); /* * Step 2: * If no socket ... diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 97b0fcc..5ab6627 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1115,7 +1115,7 @@ static int __init dccp_init(void) BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); - rc = percpu_counter_init(&dccp_orphan_count, 0); + rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL); if (rc) goto out_fail; rc = -ENOBUFS; diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index f380b2c..31cd4fd 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -176,11 +176,11 @@ static void dns_resolver_free_preparse(struct key_preparsed_payload *prep) * The domain name may be a simple name or an absolute domain name (which * should end with a period). The domain name is case-independent. */ -static int -dns_resolver_match(const struct key *key, const void *description) +static bool dns_resolver_cmp(const struct key *key, + const struct key_match_data *match_data) { int slen, dlen, ret = 0; - const char *src = key->description, *dsp = description; + const char *src = key->description, *dsp = match_data->raw_data; kenter("%s,%s", src, dsp); @@ -209,6 +209,16 @@ no_match: } /* + * Preparse the match criterion. + */ +static int dns_resolver_match_preparse(struct key_match_data *match_data) +{ + match_data->lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE; + match_data->cmp = dns_resolver_cmp; + return 0; +} + +/* * Describe a DNS key */ static void dns_resolver_describe(const struct key *key, struct seq_file *m) @@ -242,7 +252,7 @@ struct key_type key_type_dns_resolver = { .preparse = dns_resolver_preparse, .free_preparse = dns_resolver_free_preparse, .instantiate = generic_key_instantiate, - .match = dns_resolver_match, + .match_preparse = dns_resolver_match_preparse, .revoke = user_revoke, .destroy = user_destroy, .describe = dns_resolver_describe, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8030489..6d18174 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -11,6 +11,7 @@ #include <linux/list.h> #include <linux/etherdevice.h> #include <linux/phy.h> +#include <linux/phy_fixed.h> #include <linux/of_net.h> #include <linux/of_mdio.h> #include "dsa_priv.h" @@ -598,7 +599,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, netif_carrier_off(slave_dev); if (p->phy != NULL) { - if (ds->drv->get_phy_flags(ds, port)) + if (ds->drv->get_phy_flags) p->phy->dev_flags |= ds->drv->get_phy_flags(ds, port); phy_attach(slave_dev, dev_name(&p->phy->dev), diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5b6efb3..f99f41b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -537,7 +537,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) return 1; attrlen = rtnh_attrlen(rtnh); - if (attrlen < 0) { + if (attrlen > 0) { struct nlattr *nla, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index efa70ad..32e7892 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -87,6 +87,9 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb) if (!pskb_may_pull(skb, len)) goto drop; + uh = udp_hdr(skb); + guehdr = (struct guehdr *)&uh[1]; + if (guehdr->version != 0) goto drop; diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index a777295..ccda096 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -55,13 +55,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, if (csum) skb->encap_hdr_csum = 1; - if (unlikely(!pskb_may_pull(skb, ghl))) - goto out; - /* setup inner skb. */ skb->protocol = greh->protocol; skb->encapsulation = 0; + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + __skb_pull(skb, ghl); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e35b712..88e5ef2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1535,6 +1535,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, struct sk_buff *nskb; struct sock *sk; struct inet_sock *inet; + int err; if (__ip_options_echo(&replyopts.opt.opt, skb, sopt)) return; @@ -1574,8 +1575,13 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, sock_net_set(sk, net); __skb_queue_head_init(&sk->sk_write_queue); sk->sk_sndbuf = sysctl_wmem_default; - ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0, - &ipc, &rt, MSG_DONTWAIT); + err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, + len, 0, &ipc, &rt, MSG_DONTWAIT); + if (unlikely(err)) { + ip_flush_pending_frames(sk); + goto out; + } + nskb = skb_peek(&sk->sk_write_queue); if (nskb) { if (arg->csumoffset >= 0) @@ -1587,7 +1593,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb)); ip_push_pending_frames(sk, &fl4); } - +out: put_cpu_var(unicast_sock); ip_rt_put(rt); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index f4c987b..88c386c 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -91,11 +91,12 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) skb_pull_rcsum(skb, hdr_len); if (inner_proto == htons(ETH_P_TEB)) { - struct ethhdr *eh = (struct ethhdr *)skb->data; + struct ethhdr *eh; if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) return -ENOMEM; + eh = (struct ethhdr *)skb->data; if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) skb->protocol = eh->h_proto; else diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 793c0bb..2d4ae46 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1311,7 +1311,7 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) if (rt_is_input_route(rt)) { p = (struct rtable **)&nh->nh_rth_input; } else { - p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output); + p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output); } orig = *p; @@ -1939,7 +1939,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, do_cache = false; goto add; } - prth = __this_cpu_ptr(nh->nh_pcpu_rth_output); + prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); } rth = rcu_dereference(*prth); if (rt_cache_valid(rth)) { diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 0431a8f..32b98d0 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -40,7 +40,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, net_get_random_once(syncookie_secret, sizeof(syncookie_secret)); - tmp = __get_cpu_var(ipv4_cookie_scratch); + tmp = this_cpu_ptr(ipv4_cookie_scratch); memcpy(tmp + 4, syncookie_secret[c], sizeof(syncookie_secret[c])); tmp[0] = (__force u32)saddr; tmp[1] = (__force u32)daddr; @@ -255,9 +255,9 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, } EXPORT_SYMBOL(cookie_check_timestamp); -struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, - struct ip_options *opt) +struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) { + struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; struct tcp_options_received tcp_opt; struct inet_request_sock *ireq; struct tcp_request_sock *treq; @@ -317,15 +317,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ - if (opt && opt->optlen) { - int opt_size = sizeof(struct ip_options_rcu) + opt->optlen; - - ireq->opt = kmalloc(opt_size, GFP_ATOMIC); - if (ireq->opt != NULL && ip_options_echo(&ireq->opt->opt, skb)) { - kfree(ireq->opt); - ireq->opt = NULL; - } - } + ireq->opt = tcp_v4_save_options(skb); if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); @@ -344,7 +336,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), - (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, + opt->srr ? opt->faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, th->source, th->dest); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 461003d..1bec4e7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2941,7 +2941,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) local_bh_disable(); p = ACCESS_ONCE(tcp_md5sig_pool); if (p) - return __this_cpu_ptr(p); + return raw_cpu_ptr(p); local_bh_enable(); return NULL; @@ -3071,8 +3071,8 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); - percpu_counter_init(&tcp_sockets_allocated, 0); - percpu_counter_init(&tcp_orphan_count, 0); + percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); + percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 00a4149..a12b455 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -68,6 +68,7 @@ #include <linux/module.h> #include <linux/sysctl.h> #include <linux/kernel.h> +#include <linux/prefetch.h> #include <net/dst.h> #include <net/tcp.h> #include <net/inet_common.h> @@ -3029,6 +3030,21 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) return packets_acked; } +static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, + u32 prior_snd_una) +{ + const struct skb_shared_info *shinfo; + + /* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */ + if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))) + return; + + shinfo = skb_shinfo(skb); + if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) && + between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1)) + __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); +} + /* Remove acknowledged frames from the retransmission queue. If our packet * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. @@ -3052,14 +3068,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, first_ackt.v64 = 0; while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { - struct skb_shared_info *shinfo = skb_shinfo(skb); struct tcp_skb_cb *scb = TCP_SKB_CB(skb); u8 sacked = scb->sacked; u32 acked_pcount; - if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) && - between(shinfo->tskey, prior_snd_una, tp->snd_una - 1)) - __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); + tcp_ack_tstamp(sk, skb, prior_snd_una); /* Determine how many packets and what bytes were acked, tso and else */ if (after(scb->end_seq, tp->snd_una)) { @@ -3073,10 +3086,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, fully_acked = false; } else { + /* Speedup tcp_unlink_write_queue() and next loop */ + prefetchw(skb->next); acked_pcount = tcp_skb_pcount(skb); } - if (sacked & TCPCB_RETRANS) { + if (unlikely(sacked & TCPCB_RETRANS)) { if (sacked & TCPCB_SACKED_RETRANS) tp->retrans_out -= acked_pcount; flag |= FLAG_RETRANS_DATA_ACKED; @@ -3107,7 +3122,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ - if (!(scb->tcp_flags & TCPHDR_SYN)) { + if (likely(!(scb->tcp_flags & TCPHDR_SYN))) { flag |= FLAG_DATA_ACKED; } else { flag |= FLAG_SYN_ACKED; @@ -3119,9 +3134,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - if (skb == tp->retransmit_skb_hint) + if (unlikely(skb == tp->retransmit_skb_hint)) tp->retransmit_skb_hint = NULL; - if (skb == tp->lost_skb_hint) + if (unlikely(skb == tp->lost_skb_hint)) tp->lost_skb_hint = NULL; } @@ -3132,7 +3147,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, flag |= FLAG_SACK_RENEGING; skb_mstamp_get(&now); - if (first_ackt.v64) { + if (likely(first_ackt.v64)) { seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); } @@ -3394,6 +3409,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int acked = 0; /* Number of packets newly acked */ long sack_rtt_us = -1L; + /* We very likely will need to access write queue head. */ + prefetchw(sk->sk_write_queue.next); + /* If the ack is older than previous acks * then we can probably ignore it. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 552e87e..94d1a77 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -880,26 +880,6 @@ bool tcp_syn_flood_action(struct sock *sk, } EXPORT_SYMBOL(tcp_syn_flood_action); -/* - * Save and compile IPv4 options into the request_sock if needed. - */ -static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb) -{ - const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; - struct ip_options_rcu *dopt = NULL; - - if (opt && opt->optlen) { - int opt_size = sizeof(*dopt) + opt->optlen; - - dopt = kmalloc(opt_size, GFP_ATOMIC); - if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) { - kfree(dopt); - dopt = NULL; - } - } - return dopt; -} - #ifdef CONFIG_TCP_MD5SIG /* * RFC2385 MD5 checksumming requires a mapping of @@ -1428,7 +1408,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_SYN_COOKIES if (!th->syn) - sk = cookie_v4_check(sk, skb, &TCP_SKB_CB(skb)->header.h4.opt); + sk = cookie_v4_check(sk, skb); #endif return sk; } diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 3af5226..1d19135 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -32,7 +32,7 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) res_parent = &parent_cg->memory_allocated; res_counter_init(&cg_proto->memory_allocated, res_parent); - percpu_counter_init(&cg_proto->sockets_allocated, 0); + percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL); return 0; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8d4eac7..3af2129 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -839,26 +839,38 @@ void tcp_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; struct tcp_sock *tp = tcp_sk(sk); + int wmem; + + /* Keep one reference on sk_wmem_alloc. + * Will be released by sk_free() from here or tcp_tasklet_func() + */ + wmem = atomic_sub_return(skb->truesize - 1, &sk->sk_wmem_alloc); + + /* If this softirq is serviced by ksoftirqd, we are likely under stress. + * Wait until our queues (qdisc + devices) are drained. + * This gives : + * - less callbacks to tcp_write_xmit(), reducing stress (batches) + * - chance for incoming ACK (processed by another cpu maybe) + * to migrate this flow (skb->ooo_okay will be eventually set) + */ + if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current) + goto out; if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) && !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) { unsigned long flags; struct tsq_tasklet *tsq; - /* Keep a ref on socket. - * This last ref will be released in tcp_tasklet_func() - */ - atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc); - /* queue this socket to tasklet queue */ local_irq_save(flags); - tsq = &__get_cpu_var(tsq_tasklet); + tsq = this_cpu_ptr(&tsq_tasklet); list_add(&tp->tsq_node, &tsq->head); tasklet_schedule(&tsq->tasklet); local_irq_restore(flags); - } else { - sock_wfree(skb); + return; } +out: + sk_free(sk); } /* This routine actually transmits TCP packets queued in by @@ -914,9 +926,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_ca_event(sk, CA_EVENT_TX_START); /* if no packet is in qdisc/device queue, then allow XPS to select - * another queue. + * another queue. We can be called from tcp_tsq_handler() + * which holds one reference to sk_wmem_alloc. + * + * TODO: Ideally, in-flight pure ACK packets should not matter here. + * One way to get this would be to set skb->truesize = 2 on them. */ - skb->ooo_okay = sk_wmem_alloc_get(sk) == 0; + skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index f5e319a..baf2742 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -235,7 +235,6 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, /* aca_tstamp should be updated upon changes */ aca->aca_cstamp = aca->aca_tstamp = jiffies; atomic_set(&aca->aca_refcnt, 1); - spin_lock_init(&aca->aca_lock); return aca; } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 9034f76..91014d3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -46,6 +46,7 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto) if (unlikely(!pskb_may_pull(skb, len))) break; + opth = (void *)skb->data; proto = opth->nexthdr; __skb_pull(skb, len); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6eab37c..58e5b47 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -485,11 +485,11 @@ static void ipip6_tunnel_uninit(struct net_device *dev) */ static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb) { - const struct iphdr *iph = (const struct iphdr *) skb->data; + int ihl = ((const struct iphdr *)skb->data)->ihl*4; struct rt6_info *rt; struct sk_buff *skb2; - if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(struct ipv6hdr) + 8)) + if (!pskb_may_pull(skb, ihl + sizeof(struct ipv6hdr) + 8)) return 1; skb2 = skb_clone(skb, GFP_ATOMIC); @@ -498,7 +498,7 @@ static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb) return 1; skb_dst_drop(skb2); - skb_pull(skb2, iph->ihl * 4); + skb_pull(skb2, ihl); skb_reset_network_header(skb2); rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 9a2838e..2f25cb6 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -67,7 +67,7 @@ static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *dadd net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret)); - tmp = __get_cpu_var(ipv6_cookie_scratch); + tmp = this_cpu_ptr(ipv6_cookie_scratch); /* * we have 320 bits of information to hash, copy in the remaining @@ -214,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq->ir_iif = inet6_iif(skb); + ireq->ir_iif = tcp_v6_iif(skb); ireq->ir_mark = inet_request_mark(sk, skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cf2e45a..8314955 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -424,6 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sock_owned_by_user(sk)) goto out; + /* Note : We use inet6_iif() here, not tcp_v6_iif() */ req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, &hdr->saddr, inet6_iif(skb)); if (!req) @@ -738,7 +739,7 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) - ireq->ir_iif = inet6_iif(skb); + ireq->ir_iif = tcp_v6_iif(skb); if (!TCP_SKB_CB(skb)->tcp_tw_isn && (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) || @@ -860,7 +861,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) - fl6.flowi6_oif = inet6_iif(skb); + fl6.flowi6_oif = tcp_v6_iif(skb); else fl6.flowi6_oif = oif; fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); @@ -918,7 +919,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), &tcp_hashinfo, &ipv6h->saddr, th->source, &ipv6h->daddr, - ntohs(th->source), inet6_iif(skb)); + ntohs(th->source), tcp_v6_iif(skb)); if (!sk1) return; @@ -1000,13 +1001,14 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) /* Find possible connection requests. */ req = inet6_csk_search_req(sk, &prev, th->source, &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, inet6_iif(skb)); + &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); if (req) return tcp_check_req(sk, skb, req, prev, false); nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, - &ipv6_hdr(skb)->saddr, th->source, - &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb)); + &ipv6_hdr(skb)->saddr, th->source, + &ipv6_hdr(skb)->daddr, ntohs(th->dest), + tcp_v6_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1090,7 +1092,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; - newnp->mcast_oif = inet6_iif(skb); + newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); if (np->repflow) @@ -1174,7 +1176,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, skb_set_owner_r(newnp->pktoptions, newsk); } newnp->opt = NULL; - newnp->mcast_oif = inet6_iif(skb); + newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); if (np->repflow) @@ -1360,7 +1362,7 @@ ipv6_pktoptions: if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) - np->mcast_oif = inet6_iif(opt_skb); + np->mcast_oif = tcp_v6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) @@ -1427,7 +1429,8 @@ static int tcp_v6_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; - sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); + sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest, + tcp_v6_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1514,7 +1517,7 @@ do_time_wait: sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, - ntohs(th->dest), inet6_iif(skb)); + ntohs(th->dest), tcp_v6_iif(skb)); if (sk2 != NULL) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule(tw, &tcp_death_row); @@ -1553,6 +1556,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) if (th->doff < sizeof(struct tcphdr) / 4) return; + /* Note : We use inet6_iif() here, not tcp_v6_iif() */ sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, &hdr->saddr, th->source, &hdr->daddr, ntohs(th->dest), diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index a64fa15..1d5341f 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -96,13 +96,13 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, if (data_limit - data < plen) { /* check if there is partial match */ - if (strnicmp(data, pattern, data_limit - data) == 0) + if (strncasecmp(data, pattern, data_limit - data) == 0) return -1; else return 0; } - if (strnicmp(data, pattern, plen) != 0) { + if (strncasecmp(data, pattern, plen) != 0) { return 0; } s = data + plen; @@ -354,7 +354,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, data_limit = skb_tail_pointer(skb); while (data <= data_limit - 6) { - if (strnicmp(data, "PASV\r\n", 6) == 0) { + if (strncasecmp(data, "PASV\r\n", 6) == 0) { /* Passive mode on */ IP_VS_DBG(7, "got PASV at %td of %td\n", data - data_start, diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index b8a0924..b666959 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -304,12 +304,12 @@ static int find_pattern(const char *data, size_t dlen, if (dlen <= plen) { /* Short packet: try for partial? */ - if (strnicmp(data, pattern, dlen) == 0) + if (strncasecmp(data, pattern, dlen) == 0) return -1; else return 0; } - if (strnicmp(data, pattern, plen) != 0) { + if (strncasecmp(data, pattern, plen) != 0) { #if 0 size_t i; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 4c3ba1c..885b4ab 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -247,7 +247,7 @@ int ct_sip_parse_request(const struct nf_conn *ct, for (; dptr < limit - strlen("sip:"); dptr++) { if (*dptr == '\r' || *dptr == '\n') return -1; - if (strnicmp(dptr, "sip:", strlen("sip:")) == 0) { + if (strncasecmp(dptr, "sip:", strlen("sip:")) == 0) { dptr += strlen("sip:"); break; } @@ -350,7 +350,7 @@ static const char *ct_sip_header_search(const char *dptr, const char *limit, continue; } - if (strnicmp(dptr, needle, len) == 0) + if (strncasecmp(dptr, needle, len) == 0) return dptr; } return NULL; @@ -383,10 +383,10 @@ int ct_sip_get_header(const struct nf_conn *ct, const char *dptr, /* Find header. Compact headers must be followed by a * non-alphabetic character to avoid mismatches. */ if (limit - dptr >= hdr->len && - strnicmp(dptr, hdr->name, hdr->len) == 0) + strncasecmp(dptr, hdr->name, hdr->len) == 0) dptr += hdr->len; else if (hdr->cname && limit - dptr >= hdr->clen + 1 && - strnicmp(dptr, hdr->cname, hdr->clen) == 0 && + strncasecmp(dptr, hdr->cname, hdr->clen) == 0 && !isalpha(*(dptr + hdr->clen))) dptr += hdr->clen; else @@ -620,9 +620,9 @@ static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, if (ct_sip_parse_param(ct, dptr, dataoff, datalen, "transport=", &matchoff, &matchlen)) { - if (!strnicmp(dptr + matchoff, "TCP", strlen("TCP"))) + if (!strncasecmp(dptr + matchoff, "TCP", strlen("TCP"))) *proto = IPPROTO_TCP; - else if (!strnicmp(dptr + matchoff, "UDP", strlen("UDP"))) + else if (!strncasecmp(dptr + matchoff, "UDP", strlen("UDP"))) *proto = IPPROTO_UDP; else return 0; @@ -743,10 +743,10 @@ int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr, if (term != SDP_HDR_UNSPEC && limit - dptr >= thdr->len && - strnicmp(dptr, thdr->name, thdr->len) == 0) + strncasecmp(dptr, thdr->name, thdr->len) == 0) break; else if (limit - dptr >= hdr->len && - strnicmp(dptr, hdr->name, hdr->len) == 0) + strncasecmp(dptr, hdr->name, hdr->len) == 0) dptr += hdr->len; else continue; @@ -1394,7 +1394,7 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff, if (handler->response == NULL) continue; if (*datalen < matchend + handler->len || - strnicmp(*dptr + matchend, handler->method, handler->len)) + strncasecmp(*dptr + matchend, handler->method, handler->len)) continue; return handler->response(skb, protoff, dataoff, dptr, datalen, cseq, code); @@ -1435,7 +1435,7 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, if (handler->request == NULL) continue; if (*datalen < handler->len || - strnicmp(*dptr, handler->method, handler->len)) + strncasecmp(*dptr, handler->method, handler->len)) continue; if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ, @@ -1462,7 +1462,7 @@ static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, const struct nf_nat_sip_hooks *hooks; int ret; - if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) + if (strncasecmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) ret = process_sip_request(skb, protoff, dataoff, dptr, datalen); else ret = process_sip_response(skb, protoff, dataoff, dptr, datalen); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index daad602..d719764 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -30,7 +30,7 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) log = rcu_dereference_protected(loggers[pf][i], lockdep_is_held(&nf_log_mutex)); - if (!strnicmp(str_logger, log->name, strlen(log->name))) + if (!strncasecmp(str_logger, log->name, strlen(log->name))) return log; } diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index b4d691d..791fac4 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -155,7 +155,7 @@ static unsigned int nf_nat_sip(struct sk_buff *skb, unsigned int protoff, int request, in_header; /* Basic rules: requests and responses. */ - if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) { + if (strncasecmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) { if (ct_sip_parse_request(ct, *dptr, *datalen, &matchoff, &matchlen, &addr, &port) > 0 && diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index ec8a456..57d3e1a 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -72,7 +72,7 @@ nla_put_failure: } EXPORT_SYMBOL_GPL(nft_reject_dump); -static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = { +static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX + 1] = { [NFT_REJECT_ICMPX_NO_ROUTE] = ICMP_NET_UNREACH, [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMP_PORT_UNREACH, [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMP_HOST_UNREACH, @@ -81,8 +81,7 @@ static u8 icmp_code_v4[NFT_REJECT_ICMPX_MAX] = { int nft_reject_icmp_code(u8 code) { - if (code > NFT_REJECT_ICMPX_MAX) - return -EINVAL; + BUG_ON(code > NFT_REJECT_ICMPX_MAX); return icmp_code_v4[code]; } @@ -90,7 +89,7 @@ int nft_reject_icmp_code(u8 code) EXPORT_SYMBOL_GPL(nft_reject_icmp_code); -static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = { +static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX + 1] = { [NFT_REJECT_ICMPX_NO_ROUTE] = ICMPV6_NOROUTE, [NFT_REJECT_ICMPX_PORT_UNREACH] = ICMPV6_PORT_UNREACH, [NFT_REJECT_ICMPX_HOST_UNREACH] = ICMPV6_ADDR_UNREACH, @@ -99,8 +98,7 @@ static u8 icmp_code_v6[NFT_REJECT_ICMPX_MAX] = { int nft_reject_icmpv6_code(u8 code) { - if (code > NFT_REJECT_ICMPX_MAX) - return -EINVAL; + BUG_ON(code > NFT_REJECT_ICMPX_MAX); return icmp_code_v6[code]; } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 0b4692d..a845cd4 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -246,7 +246,6 @@ int netlbl_cfg_unlbl_static_add(struct net *net, * @addr: IP address in network byte order (struct in[6]_addr) * @mask: address mask in network byte order (struct in[6]_addr) * @family: address family - * @secid: LSM secid value for the entry * @audit_info: NetLabel audit information * * Description: diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c416725..7a186e7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -715,7 +715,7 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, * after validation, the socket and the ring may only be used by a * single process, otherwise we fall back to copying. */ - if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 || + if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 || atomic_read(&nlk->mapped) > 1) excl = false; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 71cf1bf..1b06a1f 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -30,7 +30,7 @@ #include <linux/skbuff.h> #include <net/net_namespace.h> #include <net/sock.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/mm.h> diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 743262b..6ae063c 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -20,8 +20,8 @@ #include <linux/in.h> #include <linux/if_ether.h> /* For the statistics structure. */ #include <linux/slab.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> #include <asm/io.h> #include <linux/inet.h> diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index c3073a2..80dbd0b 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -23,7 +23,7 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c index 0b4bcb2..00fbf14 100644 --- a/net/netrom/nr_out.c +++ b/net/netrom/nr_out.c @@ -22,7 +22,7 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index b976d5e..96b64d2 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -25,7 +25,7 @@ #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/mm.h> diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index ca40e22..029c8bb 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -22,7 +22,7 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index ff2c1b1..94d05806 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -23,7 +23,7 @@ #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 62db02b..2b78789 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -274,6 +274,8 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) key->ip.frag = OVS_FRAG_TYPE_LATER; else key->ip.frag = OVS_FRAG_TYPE_FIRST; + } else { + key->ip.frag = OVS_FRAG_TYPE_NONE; } nh_len = payload_ofs - nh_ofs; @@ -358,6 +360,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, */ key->tp.src = htons(icmp->icmp6_type); key->tp.dst = htons(icmp->icmp6_code); + memset(&key->ipv6.nd, 0, sizeof(key->ipv6.nd)); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || @@ -557,10 +560,11 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) } else if (key->eth.type == htons(ETH_P_ARP) || key->eth.type == htons(ETH_P_RARP)) { struct arp_eth_header *arp; + bool arp_available = arphdr_ok(skb); arp = (struct arp_eth_header *)skb_network_header(skb); - if (arphdr_ok(skb) && + if (arp_available && arp->ar_hrd == htons(ARPHRD_ETHER) && arp->ar_pro == htons(ETH_P_IP) && arp->ar_hln == ETH_ALEN && @@ -673,9 +677,6 @@ int ovs_flow_key_extract(struct ovs_tunnel_info *tun_info, key->ovs_flow_hash = 0; key->recirc_id = 0; - /* Flags are always used as part of stats */ - key->tp.flags = 0; - return key_extract(skb, key); } diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 368f233..939bcb3 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -103,10 +103,19 @@ static void update_range__(struct sw_flow_match *match, SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ value_p, len, is_mask) -static u16 range_n_bytes(const struct sw_flow_key_range *range) -{ - return range->end - range->start; -} +#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ + do { \ + update_range__(match, offsetof(struct sw_flow_key, field), \ + sizeof((match)->key->field), is_mask); \ + if (is_mask) { \ + if ((match)->mask) \ + memset((u8 *)&(match)->mask->key.field, value,\ + sizeof((match)->mask->key.field)); \ + } else { \ + memset((u8 *)&(match)->key->field, value, \ + sizeof((match)->key->field)); \ + } \ + } while (0) static bool match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs) @@ -809,13 +818,26 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, return 0; } -static void sw_flow_mask_set(struct sw_flow_mask *mask, - struct sw_flow_key_range *range, u8 val) +static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) { - u8 *m = (u8 *)&mask->key + range->start; + struct nlattr *nla; + int rem; + + /* The nlattr stream should already have been validated */ + nla_for_each_nested(nla, attr, rem) { + /* We assume that ovs_key_lens[type] == -1 means that type is a + * nested attribute + */ + if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1) + nlattr_set(nla, val, false); + else + memset(nla_data(nla), val, nla_len(nla)); + } +} - mask->range = *range; - memset(m, val, range_n_bytes(range)); +static void mask_set_nlattr(struct nlattr *attr, u8 val) +{ + nlattr_set(attr, val, true); } /** @@ -836,6 +858,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; const struct nlattr *encap; + struct nlattr *newmask = NULL; u64 key_attrs = 0; u64 mask_attrs = 0; bool encap_valid = false; @@ -882,18 +905,44 @@ int ovs_nla_get_match(struct sw_flow_match *match, if (err) return err; + if (match->mask && !mask) { + /* Create an exact match mask. We need to set to 0xff all the + * 'match->mask' fields that have been touched in 'match->key'. + * We cannot simply memset 'match->mask', because padding bytes + * and fields not specified in 'match->key' should be left to 0. + * Instead, we use a stream of netlink attributes, copied from + * 'key' and set to 0xff: ovs_key_from_nlattrs() will take care + * of filling 'match->mask' appropriately. + */ + newmask = kmemdup(key, nla_total_size(nla_len(key)), + GFP_KERNEL); + if (!newmask) + return -ENOMEM; + + mask_set_nlattr(newmask, 0xff); + + /* The userspace does not send tunnel attributes that are 0, + * but we should not wildcard them nonetheless. + */ + if (match->key->tun_key.ipv4_dst) + SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 0xff, true); + + mask = newmask; + } + if (mask) { err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); if (err) - return err; + goto free_newmask; - if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { + if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { __be16 eth_type = 0; __be16 tci = 0; if (!encap_valid) { OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); - return -EINVAL; + err = -EINVAL; + goto free_newmask; } mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); @@ -904,10 +953,13 @@ int ovs_nla_get_match(struct sw_flow_match *match, mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); encap = a[OVS_KEY_ATTR_ENCAP]; err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); + if (err) + goto free_newmask; } else { OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", ntohs(eth_type)); - return -EINVAL; + err = -EINVAL; + goto free_newmask; } if (a[OVS_KEY_ATTR_VLAN]) @@ -915,23 +967,22 @@ int ovs_nla_get_match(struct sw_flow_match *match, if (!(tci & htons(VLAN_TAG_PRESENT))) { OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); - return -EINVAL; + err = -EINVAL; + goto free_newmask; } } err = ovs_key_from_nlattrs(match, mask_attrs, a, true); if (err) - return err; - } else { - /* Populate exact match flow's key mask. */ - if (match->mask) - sw_flow_mask_set(match->mask, &match->range, 0xff); + goto free_newmask; } if (!match_validate(match, key_attrs, mask_attrs)) - return -EINVAL; + err = -EINVAL; - return 0; +free_newmask: + kfree(newmask); + return err; } /** diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 910b3ef..106a9d8 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -30,7 +30,7 @@ /** * struct geneve_port - Keeps track of open UDP ports - * @sock: The socket created for this port number. + * @gs: The socket created for this port number. * @name: vport name. */ struct geneve_port { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 53001b0..6015802 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -408,13 +408,13 @@ int ovs_vport_get_upcall_portids(const struct vport *vport, * * Returns the portid of the target socket. Must be called with rcu_read_lock. */ -u32 ovs_vport_find_upcall_portid(const struct vport *p, struct sk_buff *skb) +u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) { struct vport_portids *ids; u32 ids_index; u32 hash; - ids = rcu_dereference(p->upcall_portids); + ids = rcu_dereference(vport->upcall_portids); if (ids->n_ids == 1 && ids->ids[0] == 0) return 0; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index e8fdb17..273b8bf 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -267,7 +267,7 @@ static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) unsigned long *flag; preempt_disable(); - flag = &__get_cpu_var(clean_list_grace); + flag = this_cpu_ptr(&clean_list_grace); set_bit(CLEAN_LIST_BUSY_BIT, flag); ret = llist_del_first(&pool->clean_list); if (ret) diff --git a/net/rds/rdma.c b/net/rds/rdma.c index 4e37c1c..40084d8 100644 --- a/net/rds/rdma.c +++ b/net/rds/rdma.c @@ -564,12 +564,12 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, if (rs->rs_bound_addr == 0) { ret = -ENOTCONN; /* XXX not a great errno */ - goto out; + goto out_ret; } if (args->nr_local > UIO_MAXIOV) { ret = -EMSGSIZE; - goto out; + goto out_ret; } /* Check whether to allocate the iovec area */ @@ -578,7 +578,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, iovs = sock_kmalloc(rds_rs_to_sk(rs), iov_size, GFP_KERNEL); if (!iovs) { ret = -ENOMEM; - goto out; + goto out_ret; } } @@ -696,6 +696,7 @@ out: if (iovs != iovstack) sock_kfree_s(rds_rs_to_sk(rs), iovs, iov_size); kfree(pages); +out_ret: if (ret) rds_rdma_free_op(op); else diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 1b24191..db0f39f 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -44,7 +44,6 @@ struct key_type key_type_rxrpc = { .preparse = rxrpc_preparse, .free_preparse = rxrpc_free_preparse, .instantiate = generic_key_instantiate, - .match = user_match, .destroy = rxrpc_destroy, .describe = rxrpc_describe, .read = rxrpc_read, @@ -61,7 +60,6 @@ struct key_type key_type_rxrpc_s = { .preparse = rxrpc_preparse_s, .free_preparse = rxrpc_free_preparse_s, .instantiate = generic_key_instantiate, - .match = user_match, .destroy = rxrpc_destroy_s, .describe = rxrpc_describe, }; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 38d58e6..6efca30 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -57,7 +57,8 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) static void try_bulk_dequeue_skb(struct Qdisc *q, struct sk_buff *skb, - const struct netdev_queue *txq) + const struct netdev_queue *txq, + int *packets) { int bytelimit = qdisc_avail_bulklimit(txq) - skb->len; @@ -70,6 +71,7 @@ static void try_bulk_dequeue_skb(struct Qdisc *q, bytelimit -= nskb->len; /* covers GSO len */ skb->next = nskb; skb = nskb; + (*packets)++; /* GSO counts as one pkt */ } skb->next = NULL; } @@ -77,11 +79,13 @@ static void try_bulk_dequeue_skb(struct Qdisc *q, /* Note that dequeue_skb can possibly return a SKB list (via skb->next). * A requeued skb (via q->gso_skb) can also be a SKB list. */ -static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate) +static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, + int *packets) { struct sk_buff *skb = q->gso_skb; const struct netdev_queue *txq = q->dev_queue; + *packets = 1; *validate = true; if (unlikely(skb)) { /* check the reason of requeuing without tx lock first */ @@ -98,7 +102,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate) !netif_xmit_frozen_or_stopped(txq)) { skb = q->dequeue(q); if (skb && qdisc_may_bulk(q)) - try_bulk_dequeue_skb(q, skb, txq); + try_bulk_dequeue_skb(q, skb, txq, packets); } } return skb; @@ -204,7 +208,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, * >0 - queue is not empty. * */ -static inline int qdisc_restart(struct Qdisc *q) +static inline int qdisc_restart(struct Qdisc *q, int *packets) { struct netdev_queue *txq; struct net_device *dev; @@ -213,7 +217,7 @@ static inline int qdisc_restart(struct Qdisc *q) bool validate; /* Dequeue packet */ - skb = dequeue_skb(q, &validate); + skb = dequeue_skb(q, &validate, packets); if (unlikely(!skb)) return 0; @@ -227,14 +231,16 @@ static inline int qdisc_restart(struct Qdisc *q) void __qdisc_run(struct Qdisc *q) { int quota = weight_p; + int packets; - while (qdisc_restart(q)) { + while (qdisc_restart(q, &packets)) { /* * Ordered by possible occurrence: Postpone processing if * 1. we've exceeded packet quota * 2. another process needs the CPU; */ - if (--quota <= 0 || need_resched()) { + quota -= packets; + if (quota <= 0 || need_resched()) { __netif_schedule(q); break; } diff --git a/net/sctp/associola.c b/net/sctp/associola.c index a88b852..f791edd 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1668,6 +1668,8 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack( * ack chunk whose serial number matches that of the request. */ list_for_each_entry(ack, &asoc->asconf_ack_list, transmitted_list) { + if (sctp_chunk_pending(ack)) + continue; if (ack->subh.addip_hdr->serial == serial) { sctp_chunk_hold(ack); return ack; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 4de12af..7e8a16c 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -140,18 +140,9 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) } else { /* Nothing to do. Next chunk in the packet, please. */ ch = (sctp_chunkhdr_t *) chunk->chunk_end; - /* Force chunk->skb->data to chunk->chunk_end. */ - skb_pull(chunk->skb, - chunk->chunk_end - chunk->skb->data); - - /* Verify that we have at least chunk headers - * worth of buffer left. - */ - if (skb_headlen(chunk->skb) < sizeof(sctp_chunkhdr_t)) { - sctp_chunk_free(chunk); - chunk = queue->in_progress = NULL; - } + skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data); + /* We are guaranteed to pull a SCTP header. */ } } @@ -187,24 +178,14 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ - if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) { + if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) < + skb_tail_pointer(chunk->skb)) { /* This is not a singleton */ chunk->singleton = 0; } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) { - /* RFC 2960, Section 6.10 Bundling - * - * Partial chunks MUST NOT be placed in an SCTP packet. - * If the receiver detects a partial chunk, it MUST drop - * the chunk. - * - * Since the end of the chunk is past the end of our buffer - * (which contains the whole packet, we can freely discard - * the whole packet. - */ - sctp_chunk_free(chunk); - chunk = queue->in_progress = NULL; - - return NULL; + /* Discard inside state machine. */ + chunk->pdiscard = 1; + chunk->chunk_end = skb_tail_pointer(chunk->skb); } else { /* We are at the end of the packet, so mark the chunk * in case we need to send a SACK. diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 9d2c6c9..8f34b27 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1341,7 +1341,7 @@ static __init int sctp_init(void) if (!sctp_chunk_cachep) goto err_chunk_cachep; - status = percpu_counter_init(&sctp_sockets_allocated, 0); + status = percpu_counter_init(&sctp_sockets_allocated, 0, GFP_KERNEL); if (status) goto err_percpu_counter_init; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index ae0e616..ab734be 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3110,50 +3110,63 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, return SCTP_ERROR_NO_ERROR; } -/* Verify the ASCONF packet before we process it. */ -int sctp_verify_asconf(const struct sctp_association *asoc, - struct sctp_paramhdr *param_hdr, void *chunk_end, - struct sctp_paramhdr **errp) { - sctp_addip_param_t *asconf_param; +/* Verify the ASCONF packet before we process it. */ +bool sctp_verify_asconf(const struct sctp_association *asoc, + struct sctp_chunk *chunk, bool addr_param_needed, + struct sctp_paramhdr **errp) +{ + sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr; union sctp_params param; - int length, plen; - - param.v = (sctp_paramhdr_t *) param_hdr; - while (param.v <= chunk_end - sizeof(sctp_paramhdr_t)) { - length = ntohs(param.p->length); - *errp = param.p; + bool addr_param_seen = false; - if (param.v > chunk_end - length || - length < sizeof(sctp_paramhdr_t)) - return 0; + sctp_walk_params(param, addip, addip_hdr.params) { + size_t length = ntohs(param.p->length); + *errp = param.p; switch (param.p->type) { + case SCTP_PARAM_ERR_CAUSE: + break; + case SCTP_PARAM_IPV4_ADDRESS: + if (length != sizeof(sctp_ipv4addr_param_t)) + return false; + addr_param_seen = true; + break; + case SCTP_PARAM_IPV6_ADDRESS: + if (length != sizeof(sctp_ipv6addr_param_t)) + return false; + addr_param_seen = true; + break; case SCTP_PARAM_ADD_IP: case SCTP_PARAM_DEL_IP: case SCTP_PARAM_SET_PRIMARY: - asconf_param = (sctp_addip_param_t *)param.v; - plen = ntohs(asconf_param->param_hdr.length); - if (plen < sizeof(sctp_addip_param_t) + - sizeof(sctp_paramhdr_t)) - return 0; + /* In ASCONF chunks, these need to be first. */ + if (addr_param_needed && !addr_param_seen) + return false; + length = ntohs(param.addip->param_hdr.length); + if (length < sizeof(sctp_addip_param_t) + + sizeof(sctp_paramhdr_t)) + return false; break; case SCTP_PARAM_SUCCESS_REPORT: case SCTP_PARAM_ADAPTATION_LAYER_IND: if (length != sizeof(sctp_addip_param_t)) - return 0; - + return false; break; default: - break; + /* This is unkown to us, reject! */ + return false; } - - param.v += WORD_ROUND(length); } - if (param.v != chunk_end) - return 0; + /* Remaining sanity checks. */ + if (addr_param_needed && !addr_param_seen) + return false; + if (!addr_param_needed && addr_param_seen) + return false; + if (param.v != chunk->chunk_end) + return false; - return 1; + return true; } /* Process an incoming ASCONF chunk with the next expected serial no. and @@ -3162,16 +3175,17 @@ int sctp_verify_asconf(const struct sctp_association *asoc, struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, struct sctp_chunk *asconf) { + sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr; + bool all_param_pass = true; + union sctp_params param; sctp_addiphdr_t *hdr; union sctp_addr_param *addr_param; sctp_addip_param_t *asconf_param; struct sctp_chunk *asconf_ack; - __be16 err_code; int length = 0; int chunk_len; __u32 serial; - int all_param_pass = 1; chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t); hdr = (sctp_addiphdr_t *)asconf->skb->data; @@ -3199,9 +3213,14 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, goto done; /* Process the TLVs contained within the ASCONF chunk. */ - while (chunk_len > 0) { + sctp_walk_params(param, addip, addip_hdr.params) { + /* Skip preceeding address parameters. */ + if (param.p->type == SCTP_PARAM_IPV4_ADDRESS || + param.p->type == SCTP_PARAM_IPV6_ADDRESS) + continue; + err_code = sctp_process_asconf_param(asoc, asconf, - asconf_param); + param.addip); /* ADDIP 4.1 A7) * If an error response is received for a TLV parameter, * all TLVs with no response before the failed TLV are @@ -3209,28 +3228,20 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, * the failed response are considered unsuccessful unless * a specific success indication is present for the parameter. */ - if (SCTP_ERROR_NO_ERROR != err_code) - all_param_pass = 0; - + if (err_code != SCTP_ERROR_NO_ERROR) + all_param_pass = false; if (!all_param_pass) - sctp_add_asconf_response(asconf_ack, - asconf_param->crr_id, err_code, - asconf_param); + sctp_add_asconf_response(asconf_ack, param.addip->crr_id, + err_code, param.addip); /* ADDIP 4.3 D11) When an endpoint receiving an ASCONF to add * an IP address sends an 'Out of Resource' in its response, it * MUST also fail any subsequent add or delete requests bundled * in the ASCONF. */ - if (SCTP_ERROR_RSRC_LOW == err_code) + if (err_code == SCTP_ERROR_RSRC_LOW) goto done; - - /* Move to the next ASCONF param. */ - length = ntohs(asconf_param->param_hdr.length); - asconf_param = (void *)asconf_param + length; - chunk_len -= length; } - done: asoc->peer.addip_serial++; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index c8f6063..3ee27b7 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -170,6 +170,9 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, { __u16 chunk_length = ntohs(chunk->chunk_hdr->length); + /* Previously already marked? */ + if (unlikely(chunk->pdiscard)) + return 0; if (unlikely(chunk_length < required_length)) return 0; @@ -3591,9 +3594,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, struct sctp_chunk *asconf_ack = NULL; struct sctp_paramhdr *err_param = NULL; sctp_addiphdr_t *hdr; - union sctp_addr_param *addr_param; __u32 serial; - int length; if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, @@ -3618,17 +3619,8 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, hdr = (sctp_addiphdr_t *)chunk->skb->data; serial = ntohl(hdr->serial); - addr_param = (union sctp_addr_param *)hdr->params; - length = ntohs(addr_param->p.length); - if (length < sizeof(sctp_paramhdr_t)) - return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, - (void *)addr_param, commands); - /* Verify the ASCONF chunk before processing it. */ - if (!sctp_verify_asconf(asoc, - (sctp_paramhdr_t *)((void *)addr_param + length), - (void *)chunk->chunk_end, - &err_param)) + if (!sctp_verify_asconf(asoc, chunk, true, &err_param)) return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err_param, commands); @@ -3745,10 +3737,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, rcvd_serial = ntohl(addip_hdr->serial); /* Verify the ASCONF-ACK chunk before processing it. */ - if (!sctp_verify_asconf(asoc, - (sctp_paramhdr_t *)addip_hdr->params, - (void *)asconf_ack->chunk_end, - &err_param)) + if (!sctp_verify_asconf(asoc, asconf_ack, false, &err_param)) return sctp_sf_violation_paramlen(net, ep, asoc, type, arg, (void *)err_param, commands); diff --git a/net/socket.c b/net/socket.c index ffd9cb4..fe20c31 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1065,7 +1065,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) err = -EFAULT; if (get_user(pid, (int __user *)argp)) break; - err = f_setown(sock->file, pid, 1); + f_setown(sock->file, pid, 1); + err = 0; break; case FIOGETOWN: case SIOCGPGRP: diff --git a/net/tipc/link.c b/net/tipc/link.c index 65410e1..1db162a 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1924,7 +1924,12 @@ void tipc_link_bundle_rcv(struct sk_buff *buf) } omsg = buf_msg(obuf); pos += align(msg_size(omsg)); - if (msg_isdata(omsg) || (msg_user(omsg) == CONN_MANAGER)) { + if (msg_isdata(omsg)) { + if (unlikely(msg_type(omsg) == TIPC_MCAST_MSG)) + tipc_sk_mcast_rcv(obuf); + else + tipc_sk_rcv(obuf); + } else if (msg_user(omsg) == CONN_MANAGER) { tipc_sk_rcv(obuf); } else if (msg_user(omsg) == NAME_DISTRIBUTOR) { tipc_named_rcv(obuf); diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c index a55c27b..4596115 100644 --- a/net/wireless/lib80211.c +++ b/net/wireless/lib80211.c @@ -46,38 +46,6 @@ static void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, static void lib80211_crypt_quiescing(struct lib80211_crypt_info *info); static void lib80211_crypt_deinit_handler(unsigned long data); -const char *print_ssid(char *buf, const char *ssid, u8 ssid_len) -{ - const char *s = ssid; - char *d = buf; - - ssid_len = min_t(u8, ssid_len, IEEE80211_MAX_SSID_LEN); - while (ssid_len--) { - if (isprint(*s)) { - *d++ = *s++; - continue; - } - - *d++ = '\\'; - if (*s == '\0') - *d++ = '0'; - else if (*s == '\n') - *d++ = 'n'; - else if (*s == '\r') - *d++ = 'r'; - else if (*s == '\t') - *d++ = 't'; - else if (*s == '\\') - *d++ = '\\'; - else - d += snprintf(d, 3, "%03o", *s); - s++; - } - *d = '\0'; - return buf; -} -EXPORT_SYMBOL(print_ssid); - int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, spinlock_t *lock) { |