diff options
author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2008-03-19 17:59:44 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2008-03-19 17:59:44 -0400 |
commit | c7c350e92aab1bba68f26a6027b734adcf9824ba (patch) | |
tree | aa99bd94c3049dd871d9c030d70a5f3d87591a95 /net | |
parent | 2f42b5d043ee271d1e5d30ecd77186b6c4d4e534 (diff) | |
parent | f8512ad0da16cbe156f3a7627971cdf0b39c4138 (diff) | |
download | op-kernel-dev-c7c350e92aab1bba68f26a6027b734adcf9824ba.zip op-kernel-dev-c7c350e92aab1bba68f26a6027b734adcf9824ba.tar.gz |
Merge branch 'hotfixes' into devel
Diffstat (limited to 'net')
-rw-r--r-- | net/bluetooth/bnep/bnep.h | 2 | ||||
-rw-r--r-- | net/bluetooth/bnep/sock.c | 4 | ||||
-rw-r--r-- | net/bluetooth/hci_core.c | 4 | ||||
-rw-r--r-- | net/bluetooth/hci_sock.c | 4 | ||||
-rw-r--r-- | net/core/sock.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 12 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_expect.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_extend.c | 19 | ||||
-rw-r--r-- | net/netfilter/nf_queue.c | 2 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_log.c | 32 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_queue.c | 17 | ||||
-rw-r--r-- | net/netfilter/xt_time.c | 7 | ||||
-rw-r--r-- | net/rxrpc/ar-recvmsg.c | 3 | ||||
-rw-r--r-- | net/sctp/bind_addr.c | 4 | ||||
-rw-r--r-- | net/sctp/ipv6.c | 4 | ||||
-rw-r--r-- | net/sctp/protocol.c | 4 | ||||
-rw-r--r-- | net/sctp/sm_make_chunk.c | 8 | ||||
-rw-r--r-- | net/sctp/socket.c | 73 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma_transport.c | 96 |
20 files changed, 187 insertions, 116 deletions
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index a299228..e69244dd 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -174,7 +174,7 @@ struct bnep_session { void bnep_net_setup(struct net_device *dev); int bnep_sock_init(void); -int bnep_sock_cleanup(void); +void bnep_sock_cleanup(void); static inline int bnep_mc_hash(__u8 *addr) { diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 81065e5..201e5b1 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -257,12 +257,10 @@ error: return err; } -int __exit bnep_sock_cleanup(void) +void __exit bnep_sock_cleanup(void) { if (bt_sock_unregister(BTPROTO_BNEP) < 0) BT_ERR("Can't unregister BNEP socket"); proto_unregister(&bnep_proto); - - return 0; } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 930b58e..aec6929 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -902,8 +902,6 @@ int hci_unregister_dev(struct hci_dev *hdev) BT_DBG("%p name %s type %d", hdev, hdev->name, hdev->type); - hci_unregister_sysfs(hdev); - write_lock_bh(&hci_dev_list_lock); list_del(&hdev->list); write_unlock_bh(&hci_dev_list_lock); @@ -915,6 +913,8 @@ int hci_unregister_dev(struct hci_dev *hdev) hci_notify(hdev, HCI_DEV_UNREG); + hci_unregister_sysfs(hdev); + __hci_dev_put(hdev); return 0; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1499132..b5d4019 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -734,7 +734,7 @@ error: return err; } -int __exit hci_sock_cleanup(void) +void __exit hci_sock_cleanup(void) { if (bt_sock_unregister(BTPROTO_HCI) < 0) BT_ERR("HCI socket unregistration failed"); @@ -742,6 +742,4 @@ int __exit hci_sock_cleanup(void) hci_unregister_notifier(&hci_sock_nblock); proto_unregister(&hci_sk_proto); - - return 0; } diff --git a/net/core/sock.c b/net/core/sock.c index 09cb3a7..2654c14 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1621,7 +1621,7 @@ static void sock_def_readable(struct sock *sk, int len) { read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible_sync(sk->sk_sleep); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&sk->sk_callback_lock); } @@ -1635,7 +1635,7 @@ static void sock_def_write_space(struct sock *sk) */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible_sync(sk->sk_sleep); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ed750f9..01578f5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1035,6 +1035,13 @@ static void tcp_cwnd_validate(struct sock *sk) * introducing MSS oddities to segment boundaries. In rare cases where * mss_now != mss_cache, we will request caller to create a small skb * per input skb which could be mostly avoided here (if desired). + * + * We explicitly want to create a request for splitting write queue tail + * to a small skb for Nagle purposes while avoiding unnecessary modulos, + * thus all the complexity (cwnd_len is always MSS multiple which we + * return whenever allowed by the other factors). Basically we need the + * modulo only when the receiver window alone is the limiting factor or + * when we would be allowed to send the split-due-to-Nagle skb fully. */ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd) @@ -1048,10 +1055,11 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk))) return cwnd_len; - if (skb == tcp_write_queue_tail(sk) && cwnd_len <= skb->len) + needed = min(skb->len, window); + + if (skb == tcp_write_queue_tail(sk) && cwnd_len <= needed) return cwnd_len; - needed = min(skb->len, window); return needed - needed % mss_now; } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e06bf00..684ec9c 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -381,7 +381,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) if (nf_ct_expect_count >= nf_ct_expect_max) { if (net_ratelimit()) printk(KERN_WARNING - "nf_conntrack: expectation table full"); + "nf_conntrack: expectation table full\n"); ret = -EMFILE; goto out; } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 8b9be1e..2bd9963 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -19,14 +19,6 @@ static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM]; static DEFINE_MUTEX(nf_ct_ext_type_mutex); -/* Horrible trick to figure out smallest amount worth kmallocing. */ -#define CACHE(x) (x) + 0 * -enum { - NF_CT_EXT_MIN_SIZE = -#include <linux/kmalloc_sizes.h> - 1 }; -#undef CACHE - void __nf_ct_ext_destroy(struct nf_conn *ct) { unsigned int i; @@ -53,7 +45,7 @@ EXPORT_SYMBOL(__nf_ct_ext_destroy); static void * nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) { - unsigned int off, len, real_len; + unsigned int off, len; struct nf_ct_ext_type *t; rcu_read_lock(); @@ -61,16 +53,14 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) BUG_ON(t == NULL); off = ALIGN(sizeof(struct nf_ct_ext), t->align); len = off + t->len; - real_len = t->alloc_size; rcu_read_unlock(); - *ext = kzalloc(real_len, gfp); + *ext = kzalloc(t->alloc_size, gfp); if (!*ext) return NULL; (*ext)->offset[id] = off; (*ext)->len = len; - (*ext)->real_len = real_len; return (void *)(*ext) + off; } @@ -95,7 +85,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) newlen = newoff + t->len; rcu_read_unlock(); - if (newlen >= ct->ext->real_len) { + if (newlen >= ksize(ct->ext)) { new = kmalloc(newlen, gfp); if (!new) return NULL; @@ -114,7 +104,6 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) rcu_read_unlock(); } kfree(ct->ext); - new->real_len = newlen; ct->ext = new; } @@ -156,8 +145,6 @@ static void update_alloc_size(struct nf_ct_ext_type *type) t1->alloc_size = ALIGN(t1->alloc_size, t2->align) + t2->len; } - if (t1->alloc_size < NF_CT_EXT_MIN_SIZE) - t1->alloc_size = NF_CT_EXT_MIN_SIZE; } } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index bfc2928..ddc80ea 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -51,7 +51,7 @@ int nf_unregister_queue_handler(int pf, const struct nf_queue_handler *qh) return -EINVAL; mutex_lock(&queue_handler_mutex); - if (queue_handler[pf] != qh) { + if (queue_handler[pf] && queue_handler[pf] != qh) { mutex_unlock(&queue_handler_mutex); return -EINVAL; } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 7efa40d..bf3f19b 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -556,7 +556,7 @@ nfulnl_log_packet(unsigned int pf, /* FIXME: do we want to make the size calculation conditional based on * what is actually present? way more branches and checks, but more * memory efficient... */ - size = NLMSG_ALIGN(sizeof(struct nfgenmsg)) + size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ @@ -702,20 +702,30 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t group_num = ntohs(nfmsg->res_id); struct nfulnl_instance *inst; + struct nfulnl_msg_config_cmd *cmd = NULL; int ret = 0; + if (nfula[NFULA_CFG_CMD]) { + u_int8_t pf = nfmsg->nfgen_family; + cmd = nla_data(nfula[NFULA_CFG_CMD]); + + /* Commands without queue context */ + switch (cmd->command) { + case NFULNL_CFG_CMD_PF_BIND: + return nf_log_register(pf, &nfulnl_logger); + case NFULNL_CFG_CMD_PF_UNBIND: + nf_log_unregister_pf(pf); + return 0; + } + } + inst = instance_lookup_get(group_num); if (inst && inst->peer_pid != NETLINK_CB(skb).pid) { ret = -EPERM; goto out_put; } - if (nfula[NFULA_CFG_CMD]) { - u_int8_t pf = nfmsg->nfgen_family; - struct nfulnl_msg_config_cmd *cmd; - - cmd = nla_data(nfula[NFULA_CFG_CMD]); - + if (cmd != NULL) { switch (cmd->command) { case NFULNL_CFG_CMD_BIND: if (inst) { @@ -738,14 +748,6 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, instance_destroy(inst); goto out; - case NFULNL_CFG_CMD_PF_BIND: - ret = nf_log_register(pf, &nfulnl_logger); - break; - case NFULNL_CFG_CMD_PF_UNBIND: - /* This is a bug and a feature. We cannot unregister - * other handlers, like nfnetlink_inst can */ - nf_log_unregister_pf(pf); - break; default: ret = -ENOTSUPP; break; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 0043d3a..012cb69 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -224,7 +224,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, struct net_device *indev; struct net_device *outdev; - size = NLMSG_ALIGN(sizeof(struct nfgenmsg)) + size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ @@ -703,19 +703,12 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, /* Commands without queue context - might sleep */ switch (cmd->command) { case NFQNL_CFG_CMD_PF_BIND: - ret = nf_register_queue_handler(ntohs(cmd->pf), - &nfqh); - break; + return nf_register_queue_handler(ntohs(cmd->pf), + &nfqh); case NFQNL_CFG_CMD_PF_UNBIND: - ret = nf_unregister_queue_handler(ntohs(cmd->pf), - &nfqh); - break; - default: - break; + return nf_unregister_queue_handler(ntohs(cmd->pf), + &nfqh); } - - if (ret < 0) - return ret; } rcu_read_lock(); diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index e9a8794..9fa2e08 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -95,8 +95,11 @@ static inline void localtime_2(struct xtm *r, time_t time) */ r->dse = time / 86400; - /* 1970-01-01 (w=0) was a Thursday (4). */ - r->weekday = (4 + r->dse) % 7; + /* + * 1970-01-01 (w=0) was a Thursday (4). + * -1 and +1 map Sunday properly onto 7. + */ + r->weekday = (4 + r->dse - 1) % 7 + 1; } static void localtime_3(struct xtm *r, time_t time) diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index f19121d..a39bf97 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -143,7 +143,8 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, /* copy the peer address and timestamp */ if (!continue_call) { if (msg->msg_name && msg->msg_namelen > 0) - memcpy(&msg->msg_name, &call->conn->trans->peer->srx, + memcpy(msg->msg_name, + &call->conn->trans->peer->srx, sizeof(call->conn->trans->peer->srx)); sock_recv_timestamp(msg, &rx->sk, skb); } diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index a27511e..ceefda0 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -209,6 +209,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr) { struct sctp_sockaddr_entry *addr, *temp; + int found = 0; /* We hold the socket lock when calling this function, * and that acts as a writer synchronizing lock. @@ -216,13 +217,14 @@ int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr) list_for_each_entry_safe(addr, temp, &bp->address_list, list) { if (sctp_cmp_addr_exact(&addr->a, del_addr)) { /* Found the exact match. */ + found = 1; addr->valid = 0; list_del_rcu(&addr->list); break; } } - if (addr && !addr->valid) { + if (found) { call_rcu(&addr->rcu, sctp_local_addr_free); SCTP_DBG_OBJCNT_DEC(addr); return 0; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 87f9405..9aa0733 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -89,6 +89,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; + int found = 0; switch (ev) { case NETDEV_UP: @@ -111,13 +112,14 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, &sctp_local_addr_list, list) { if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { + found = 1; addr->valid = 0; list_del_rcu(&addr->list); break; } } spin_unlock_bh(&sctp_local_addr_lock); - if (addr && !addr->valid) + if (found) call_rcu(&addr->rcu, sctp_local_addr_free); break; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 688546d..ad0a406 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -628,6 +628,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; struct sctp_sockaddr_entry *addr = NULL; struct sctp_sockaddr_entry *temp; + int found = 0; switch (ev) { case NETDEV_UP: @@ -647,13 +648,14 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, list_for_each_entry_safe(addr, temp, &sctp_local_addr_list, list) { if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { + found = 1; addr->valid = 0; list_del_rcu(&addr->list); break; } } spin_unlock_bh(&sctp_local_addr_lock); - if (addr && !addr->valid) + if (found) call_rcu(&addr->rcu, sctp_local_addr_free); break; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index e45be4e..578630e 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2375,6 +2375,14 @@ static int sctp_process_param(struct sctp_association *asoc, asoc->peer.ipv4_address = 0; asoc->peer.ipv6_address = 0; + /* Assume that peer supports the address family + * by which it sends a packet. + */ + if (peer_addr->sa.sa_family == AF_INET6) + asoc->peer.ipv6_address = 1; + else if (peer_addr->sa.sa_family == AF_INET) + asoc->peer.ipv4_address = 1; + /* Cycle through address types; avoid divide by 0. */ sat = ntohs(param.p->length) - sizeof(sctp_paramhdr_t); if (sat) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9398926..d994d82 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2933,17 +2933,39 @@ static int sctp_setsockopt_maxburst(struct sock *sk, char __user *optval, int optlen) { + struct sctp_assoc_value params; + struct sctp_sock *sp; + struct sctp_association *asoc; int val; + int assoc_id = 0; - if (optlen != sizeof(int)) + if (optlen < sizeof(int)) return -EINVAL; - if (get_user(val, (int __user *)optval)) - return -EFAULT; - if (val < 0) + if (optlen == sizeof(int)) { + printk(KERN_WARNING + "SCTP: Use of int in max_burst socket option deprecated\n"); + printk(KERN_WARNING + "SCTP: Use struct sctp_assoc_value instead\n"); + if (copy_from_user(&val, optval, optlen)) + return -EFAULT; + } else if (optlen == sizeof(struct sctp_assoc_value)) { + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT; + val = params.assoc_value; + assoc_id = params.assoc_id; + } else return -EINVAL; - sctp_sk(sk)->max_burst = val; + sp = sctp_sk(sk); + + if (assoc_id != 0) { + asoc = sctp_id2assoc(sk, assoc_id); + if (!asoc) + return -EINVAL; + asoc->max_burst = val; + } else + sp->max_burst = val; return 0; } @@ -5005,20 +5027,45 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, char __user *optval, int __user *optlen) { - int val; + struct sctp_assoc_value params; + struct sctp_sock *sp; + struct sctp_association *asoc; if (len < sizeof(int)) return -EINVAL; - len = sizeof(int); + if (len == sizeof(int)) { + printk(KERN_WARNING + "SCTP: Use of int in max_burst socket option deprecated\n"); + printk(KERN_WARNING + "SCTP: Use struct sctp_assoc_value instead\n"); + params.assoc_id = 0; + } else if (len == sizeof (struct sctp_assoc_value)) { + if (copy_from_user(¶ms, optval, len)) + return -EFAULT; + } else + return -EINVAL; - val = sctp_sk(sk)->max_burst; - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, &val, len)) - return -EFAULT; + sp = sctp_sk(sk); + + if (params.assoc_id != 0) { + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + return -EINVAL; + params.assoc_value = asoc->max_burst; + } else + params.assoc_value = sp->max_burst; + + if (len == sizeof(int)) { + if (copy_to_user(optval, ¶ms.assoc_value, len)) + return -EFAULT; + } else { + if (copy_to_user(optval, ¶ms, len)) + return -EFAULT; + } + + return 0; - return -ENOTSUPP; } static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 0598b22..981f190 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -156,7 +156,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *ctxt; int ret = 0; - BUG_ON(sge_count >= 32); + BUG_ON(sge_count > RPCSVC_MAXPAGES); dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " "write_len=%d, xdr_sge=%p, sge_count=%d\n", rmr, (unsigned long long)to, xdr_off, diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index f09444c..16fd3f6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -54,7 +54,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, int flags); static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); static void svc_rdma_release_rqst(struct svc_rqst *); -static void rdma_destroy_xprt(struct svcxprt_rdma *xprt); static void dto_tasklet_func(unsigned long data); static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt); @@ -247,6 +246,7 @@ static void dto_tasklet_func(unsigned long data) sq_cq_reap(xprt); } + svc_xprt_put(&xprt->sc_xprt); spin_lock_irqsave(&dto_lock, flags); } spin_unlock_irqrestore(&dto_lock, flags); @@ -275,8 +275,10 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context) * add it */ spin_lock_irqsave(&dto_lock, flags); - if (list_empty(&xprt->sc_dto_q)) + if (list_empty(&xprt->sc_dto_q)) { + svc_xprt_get(&xprt->sc_xprt); list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); + } spin_unlock_irqrestore(&dto_lock, flags); /* Tasklet does all the work to avoid irqsave locks. */ @@ -386,8 +388,10 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context) * add it */ spin_lock_irqsave(&dto_lock, flags); - if (list_empty(&xprt->sc_dto_q)) + if (list_empty(&xprt->sc_dto_q)) { + svc_xprt_get(&xprt->sc_xprt); list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); + } spin_unlock_irqrestore(&dto_lock, flags); /* Tasklet does all the work to avoid irqsave locks. */ @@ -611,6 +615,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, switch (event->event) { case RDMA_CM_EVENT_ESTABLISHED: /* Accept complete */ + svc_xprt_get(xprt); dprintk("svcrdma: Connection completed on DTO xprt=%p, " "cm_id=%p\n", xprt, cma_id); clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags); @@ -661,15 +666,15 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); if (IS_ERR(listen_id)) { - rdma_destroy_xprt(cma_xprt); + svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_create_id failed = %ld\n", PTR_ERR(listen_id)); return (void *)listen_id; } ret = rdma_bind_addr(listen_id, sa); if (ret) { - rdma_destroy_xprt(cma_xprt); rdma_destroy_id(listen_id); + svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); return ERR_PTR(ret); } @@ -678,8 +683,9 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); if (ret) { rdma_destroy_id(listen_id); - rdma_destroy_xprt(cma_xprt); + svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_listen failed = %d\n", ret); + return ERR_PTR(ret); } /* @@ -820,6 +826,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; } + svc_xprt_get(&newxprt->sc_xprt); newxprt->sc_qp = newxprt->sc_cm_id->qp; /* Register all of physical memory */ @@ -891,8 +898,15 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) errout: dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); + /* Take a reference in case the DTO handler runs */ + svc_xprt_get(&newxprt->sc_xprt); + if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) { + ib_destroy_qp(newxprt->sc_qp); + svc_xprt_put(&newxprt->sc_xprt); + } rdma_destroy_id(newxprt->sc_cm_id); - rdma_destroy_xprt(newxprt); + /* This call to put will destroy the transport */ + svc_xprt_put(&newxprt->sc_xprt); return NULL; } @@ -919,54 +933,60 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; } -/* Disable data ready events for this connection */ +/* + * When connected, an svc_xprt has at least three references: + * + * - A reference held by the QP. We still hold that here because this + * code deletes the QP and puts the reference. + * + * - A reference held by the cm_id between the ESTABLISHED and + * DISCONNECTED events. If the remote peer disconnected first, this + * reference could be gone. + * + * - A reference held by the svc_recv code that called this function + * as part of close processing. + * + * At a minimum two references should still be held. + */ static void svc_rdma_detach(struct svc_xprt *xprt) { struct svcxprt_rdma *rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); - unsigned long flags; - dprintk("svc: svc_rdma_detach(%p)\n", xprt); - /* - * Shutdown the connection. This will ensure we don't get any - * more events from the provider. - */ + + /* Disconnect and flush posted WQE */ rdma_disconnect(rdma->sc_cm_id); - rdma_destroy_id(rdma->sc_cm_id); - /* We may already be on the DTO list */ - spin_lock_irqsave(&dto_lock, flags); - if (!list_empty(&rdma->sc_dto_q)) - list_del_init(&rdma->sc_dto_q); - spin_unlock_irqrestore(&dto_lock, flags); + /* Destroy the QP if present (not a listener) */ + if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) { + ib_destroy_qp(rdma->sc_qp); + svc_xprt_put(xprt); + } + + /* Destroy the CM ID */ + rdma_destroy_id(rdma->sc_cm_id); } static void svc_rdma_free(struct svc_xprt *xprt) { struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); - rdma_destroy_xprt(rdma); - kfree(rdma); -} - -static void rdma_destroy_xprt(struct svcxprt_rdma *xprt) -{ - if (xprt->sc_qp && !IS_ERR(xprt->sc_qp)) - ib_destroy_qp(xprt->sc_qp); - - if (xprt->sc_sq_cq && !IS_ERR(xprt->sc_sq_cq)) - ib_destroy_cq(xprt->sc_sq_cq); + /* We should only be called from kref_put */ + BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0); + if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) + ib_destroy_cq(rdma->sc_sq_cq); - if (xprt->sc_rq_cq && !IS_ERR(xprt->sc_rq_cq)) - ib_destroy_cq(xprt->sc_rq_cq); + if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq)) + ib_destroy_cq(rdma->sc_rq_cq); - if (xprt->sc_phys_mr && !IS_ERR(xprt->sc_phys_mr)) - ib_dereg_mr(xprt->sc_phys_mr); + if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr)) + ib_dereg_mr(rdma->sc_phys_mr); - if (xprt->sc_pd && !IS_ERR(xprt->sc_pd)) - ib_dealloc_pd(xprt->sc_pd); + if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) + ib_dealloc_pd(rdma->sc_pd); - destroy_context_cache(xprt->sc_ctxt_head); + destroy_context_cache(rdma->sc_ctxt_head); + kfree(rdma); } static int svc_rdma_has_wspace(struct svc_xprt *xprt) |