diff options
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/core/addr.c | 196 | ||||
-rw-r--r-- | drivers/infiniband/core/cma.c | 86 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_classes.h | 7 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_eq.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_main.c | 17 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_qp.c | 12 | ||||
-rw-r--r-- | drivers/infiniband/hw/ehca/ehca_reqs.c | 13 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/cq.c | 12 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/main.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/nes/nes.h | 18 | ||||
-rw-r--r-- | drivers/infiniband/hw/nes/nes_cm.c | 279 | ||||
-rw-r--r-- | drivers/infiniband/hw/nes/nes_cm.h | 14 | ||||
-rw-r--r-- | drivers/infiniband/hw/nes/nes_hw.c | 42 | ||||
-rw-r--r-- | drivers/infiniband/hw/nes/nes_utils.c | 9 | ||||
-rw-r--r-- | drivers/infiniband/hw/nes/nes_verbs.c | 45 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.h | 3 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_initiator.c | 132 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_verbs.c | 1 |
18 files changed, 550 insertions, 340 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 09a2bec..d98b05b 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -41,6 +41,8 @@ #include <net/neighbour.h> #include <net/route.h> #include <net/netevent.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> #include <rdma/ib_addr.h> MODULE_AUTHOR("Sean Hefty"); @@ -49,8 +51,8 @@ MODULE_LICENSE("Dual BSD/GPL"); struct addr_req { struct list_head list; - struct sockaddr src_addr; - struct sockaddr dst_addr; + struct sockaddr_storage src_addr; + struct sockaddr_storage dst_addr; struct rdma_dev_addr *addr; struct rdma_addr_client *client; void *context; @@ -113,15 +115,32 @@ EXPORT_SYMBOL(rdma_copy_addr); int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) { struct net_device *dev; - __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; - int ret; + int ret = -EADDRNOTAVAIL; - dev = ip_dev_find(&init_net, ip); - if (!dev) - return -EADDRNOTAVAIL; + switch (addr->sa_family) { + case AF_INET: + dev = ip_dev_find(&init_net, + ((struct sockaddr_in *) addr)->sin_addr.s_addr); + + if (!dev) + return ret; - ret = rdma_copy_addr(dev_addr, dev, NULL); - dev_put(dev); + ret = rdma_copy_addr(dev_addr, dev, NULL); + dev_put(dev); + break; + case AF_INET6: + for_each_netdev(&init_net, dev) { + if (ipv6_chk_addr(&init_net, + &((struct sockaddr_in6 *) addr)->sin6_addr, + dev, 1)) { + ret = rdma_copy_addr(dev_addr, dev, NULL); + break; + } + } + break; + default: + break; + } return ret; } EXPORT_SYMBOL(rdma_translate_ip); @@ -156,22 +175,37 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); } -static void addr_send_arp(struct sockaddr_in *dst_in) +static void addr_send_arp(struct sockaddr *dst_in) { struct rtable *rt; struct flowi fl; - __be32 dst_ip = dst_in->sin_addr.s_addr; + struct dst_entry *dst; memset(&fl, 0, sizeof fl); - fl.nl_u.ip4_u.daddr = dst_ip; - if (ip_route_output_key(&init_net, &rt, &fl)) - return; + if (dst_in->sa_family == AF_INET) { + fl.nl_u.ip4_u.daddr = + ((struct sockaddr_in *) dst_in)->sin_addr.s_addr; - neigh_event_send(rt->u.dst.neighbour, NULL); - ip_rt_put(rt); + if (ip_route_output_key(&init_net, &rt, &fl)) + return; + + neigh_event_send(rt->u.dst.neighbour, NULL); + ip_rt_put(rt); + + } else { + fl.nl_u.ip6_u.daddr = + ((struct sockaddr_in6 *) dst_in)->sin6_addr; + + dst = ip6_route_output(&init_net, NULL, &fl); + if (!dst) + return; + + neigh_event_send(dst->neighbour, NULL); + dst_release(dst); + } } -static int addr_resolve_remote(struct sockaddr_in *src_in, +static int addr4_resolve_remote(struct sockaddr_in *src_in, struct sockaddr_in *dst_in, struct rdma_dev_addr *addr) { @@ -220,10 +254,51 @@ out: return ret; } +static int addr6_resolve_remote(struct sockaddr_in6 *src_in, + struct sockaddr_in6 *dst_in, + struct rdma_dev_addr *addr) +{ + struct flowi fl; + struct neighbour *neigh; + struct dst_entry *dst; + int ret = -ENODATA; + + memset(&fl, 0, sizeof fl); + fl.nl_u.ip6_u.daddr = dst_in->sin6_addr; + fl.nl_u.ip6_u.saddr = src_in->sin6_addr; + + dst = ip6_route_output(&init_net, NULL, &fl); + if (!dst) + return ret; + + if (dst->dev->flags & IFF_NOARP) { + ret = rdma_copy_addr(addr, dst->dev, NULL); + } else { + neigh = dst->neighbour; + if (neigh && (neigh->nud_state & NUD_VALID)) + ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); + } + + dst_release(dst); + return ret; +} + +static int addr_resolve_remote(struct sockaddr *src_in, + struct sockaddr *dst_in, + struct rdma_dev_addr *addr) +{ + if (src_in->sa_family == AF_INET) { + return addr4_resolve_remote((struct sockaddr_in *) src_in, + (struct sockaddr_in *) dst_in, addr); + } else + return addr6_resolve_remote((struct sockaddr_in6 *) src_in, + (struct sockaddr_in6 *) dst_in, addr); +} + static void process_req(struct work_struct *work) { struct addr_req *req, *temp_req; - struct sockaddr_in *src_in, *dst_in; + struct sockaddr *src_in, *dst_in; struct list_head done_list; INIT_LIST_HEAD(&done_list); @@ -231,8 +306,8 @@ static void process_req(struct work_struct *work) mutex_lock(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->status == -ENODATA) { - src_in = (struct sockaddr_in *) &req->src_addr; - dst_in = (struct sockaddr_in *) &req->dst_addr; + src_in = (struct sockaddr *) &req->src_addr; + dst_in = (struct sockaddr *) &req->dst_addr; req->status = addr_resolve_remote(src_in, dst_in, req->addr); if (req->status && time_after_eq(jiffies, req->timeout)) @@ -251,41 +326,72 @@ static void process_req(struct work_struct *work) list_for_each_entry_safe(req, temp_req, &done_list, list) { list_del(&req->list); - req->callback(req->status, &req->src_addr, req->addr, - req->context); + req->callback(req->status, (struct sockaddr *) &req->src_addr, + req->addr, req->context); put_client(req->client); kfree(req); } } -static int addr_resolve_local(struct sockaddr_in *src_in, - struct sockaddr_in *dst_in, +static int addr_resolve_local(struct sockaddr *src_in, + struct sockaddr *dst_in, struct rdma_dev_addr *addr) { struct net_device *dev; - __be32 src_ip = src_in->sin_addr.s_addr; - __be32 dst_ip = dst_in->sin_addr.s_addr; int ret; - dev = ip_dev_find(&init_net, dst_ip); - if (!dev) - return -EADDRNOTAVAIL; - - if (ipv4_is_zeronet(src_ip)) { - src_in->sin_family = dst_in->sin_family; - src_in->sin_addr.s_addr = dst_ip; - ret = rdma_copy_addr(addr, dev, dev->dev_addr); - } else if (ipv4_is_loopback(src_ip)) { - ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + if (dst_in->sa_family == AF_INET) { + __be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr; + __be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr; + + dev = ip_dev_find(&init_net, dst_ip); + if (!dev) + return -EADDRNOTAVAIL; + + if (ipv4_is_zeronet(src_ip)) { + src_in->sa_family = dst_in->sa_family; + ((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip; + ret = rdma_copy_addr(addr, dev, dev->dev_addr); + } else if (ipv4_is_loopback(src_ip)) { + ret = rdma_translate_ip(dst_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + } else { + ret = rdma_translate_ip(src_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + } + dev_put(dev); } else { - ret = rdma_translate_ip((struct sockaddr *)src_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + struct in6_addr *a; + + for_each_netdev(&init_net, dev) + if (ipv6_chk_addr(&init_net, + &((struct sockaddr_in6 *) addr)->sin6_addr, + dev, 1)) + break; + + if (!dev) + return -EADDRNOTAVAIL; + + a = &((struct sockaddr_in6 *) src_in)->sin6_addr; + + if (ipv6_addr_any(a)) { + src_in->sa_family = dst_in->sa_family; + ((struct sockaddr_in6 *) src_in)->sin6_addr = + ((struct sockaddr_in6 *) dst_in)->sin6_addr; + ret = rdma_copy_addr(addr, dev, dev->dev_addr); + } else if (ipv6_addr_loopback(a)) { + ret = rdma_translate_ip(dst_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + } else { + ret = rdma_translate_ip(src_in, addr); + if (!ret) + memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); + } } - dev_put(dev); return ret; } @@ -296,7 +402,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client, struct rdma_dev_addr *addr, void *context), void *context) { - struct sockaddr_in *src_in, *dst_in; + struct sockaddr *src_in, *dst_in; struct addr_req *req; int ret = 0; @@ -313,8 +419,8 @@ int rdma_resolve_ip(struct rdma_addr_client *client, req->client = client; atomic_inc(&client->refcount); - src_in = (struct sockaddr_in *) &req->src_addr; - dst_in = (struct sockaddr_in *) &req->dst_addr; + src_in = (struct sockaddr *) &req->src_addr; + dst_in = (struct sockaddr *) &req->dst_addr; req->status = addr_resolve_local(src_in, dst_in, addr); if (req->status == -EADDRNOTAVAIL) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d951896..2a2e508 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -42,6 +42,7 @@ #include <linux/inetdevice.h> #include <net/tcp.h> +#include <net/ipv6.h> #include <rdma/rdma_cm.h> #include <rdma/rdma_cm_ib.h> @@ -636,7 +637,12 @@ static inline int cma_zero_addr(struct sockaddr *addr) static inline int cma_loopback_addr(struct sockaddr *addr) { - return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); + if (addr->sa_family == AF_INET) + return ipv4_is_loopback( + ((struct sockaddr_in *) addr)->sin_addr.s_addr); + else + return ipv6_addr_loopback( + &((struct sockaddr_in6 *) addr)->sin6_addr); } static inline int cma_any_addr(struct sockaddr *addr) @@ -1467,10 +1473,10 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv) static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af) { - struct sockaddr_in addr_in; + struct sockaddr_storage addr_in; memset(&addr_in, 0, sizeof addr_in); - addr_in.sin_family = af; + addr_in.ss_family = af; return rdma_bind_addr(id, (struct sockaddr *) &addr_in); } @@ -2073,7 +2079,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) struct rdma_id_private *id_priv; int ret; - if (addr->sa_family != AF_INET) + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) return -EAFNOSUPPORT; id_priv = container_of(id, struct rdma_id_private, id); @@ -2113,31 +2119,59 @@ EXPORT_SYMBOL(rdma_bind_addr); static int cma_format_hdr(void *hdr, enum rdma_port_space ps, struct rdma_route *route) { - struct sockaddr_in *src4, *dst4; struct cma_hdr *cma_hdr; struct sdp_hh *sdp_hdr; - src4 = (struct sockaddr_in *) &route->addr.src_addr; - dst4 = (struct sockaddr_in *) &route->addr.dst_addr; - - switch (ps) { - case RDMA_PS_SDP: - sdp_hdr = hdr; - if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) - return -EINVAL; - sdp_set_ip_ver(sdp_hdr, 4); - sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; - sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; - sdp_hdr->port = src4->sin_port; - break; - default: - cma_hdr = hdr; - cma_hdr->cma_version = CMA_VERSION; - cma_set_ip_ver(cma_hdr, 4); - cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; - cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; - cma_hdr->port = src4->sin_port; - break; + if (route->addr.src_addr.ss_family == AF_INET) { + struct sockaddr_in *src4, *dst4; + + src4 = (struct sockaddr_in *) &route->addr.src_addr; + dst4 = (struct sockaddr_in *) &route->addr.dst_addr; + + switch (ps) { + case RDMA_PS_SDP: + sdp_hdr = hdr; + if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) + return -EINVAL; + sdp_set_ip_ver(sdp_hdr, 4); + sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; + sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; + sdp_hdr->port = src4->sin_port; + break; + default: + cma_hdr = hdr; + cma_hdr->cma_version = CMA_VERSION; + cma_set_ip_ver(cma_hdr, 4); + cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; + cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; + cma_hdr->port = src4->sin_port; + break; + } + } else { + struct sockaddr_in6 *src6, *dst6; + + src6 = (struct sockaddr_in6 *) &route->addr.src_addr; + dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr; + + switch (ps) { + case RDMA_PS_SDP: + sdp_hdr = hdr; + if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) + return -EINVAL; + sdp_set_ip_ver(sdp_hdr, 6); + sdp_hdr->src_addr.ip6 = src6->sin6_addr; + sdp_hdr->dst_addr.ip6 = dst6->sin6_addr; + sdp_hdr->port = src6->sin6_port; + break; + default: + cma_hdr = hdr; + cma_hdr->cma_version = CMA_VERSION; + cma_set_ip_ver(cma_hdr, 6); + cma_hdr->src_addr.ip6 = src6->sin6_addr; + cma_hdr->dst_addr.ip6 = dst6->sin6_addr; + cma_hdr->port = src6->sin6_port; + break; + } } return 0; } diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 7fc35cf..c825142 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -175,6 +175,13 @@ struct ehca_queue_map { unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ }; +/* function to calculate the next index for the qmap */ +static inline unsigned int next_index(unsigned int cur_index, unsigned int limit) +{ + unsigned int temp = cur_index + 1; + return (temp == limit) ? 0 : temp; +} + struct ehca_qp { union { struct ib_qp ib_qp; diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c index 49660df..523e733c 100644 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ b/drivers/infiniband/hw/ehca/ehca_eq.c @@ -113,7 +113,7 @@ int ehca_create_eq(struct ehca_shca *shca, if (h_ret != H_SUCCESS || vpage) goto create_eq_exit2; } else { - if (h_ret != H_PAGE_REGISTERED || !vpage) + if (h_ret != H_PAGE_REGISTERED) goto create_eq_exit2; } } diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index bec7e02..3b77b67 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -717,6 +717,7 @@ static int __devinit ehca_probe(struct of_device *dev, const u64 *handle; struct ib_pd *ibpd; int ret, i, eq_size; + unsigned long flags; handle = of_get_property(dev->node, "ibm,hca-handle", NULL); if (!handle) { @@ -830,9 +831,9 @@ static int __devinit ehca_probe(struct of_device *dev, ehca_err(&shca->ib_device, "Cannot create device attributes ret=%d", ret); - spin_lock(&shca_list_lock); + spin_lock_irqsave(&shca_list_lock, flags); list_add(&shca->shca_list, &shca_list); - spin_unlock(&shca_list_lock); + spin_unlock_irqrestore(&shca_list_lock, flags); return 0; @@ -878,6 +879,7 @@ probe1: static int __devexit ehca_remove(struct of_device *dev) { struct ehca_shca *shca = dev->dev.driver_data; + unsigned long flags; int ret; sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); @@ -915,9 +917,9 @@ static int __devexit ehca_remove(struct of_device *dev) ib_dealloc_device(&shca->ib_device); - spin_lock(&shca_list_lock); + spin_lock_irqsave(&shca_list_lock, flags); list_del(&shca->shca_list); - spin_unlock(&shca_list_lock); + spin_unlock_irqrestore(&shca_list_lock, flags); return ret; } @@ -975,6 +977,7 @@ static int ehca_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { static unsigned long ehca_dmem_warn_time; + unsigned long flags; switch (action) { case MEM_CANCEL_OFFLINE: @@ -985,12 +988,12 @@ static int ehca_mem_notifier(struct notifier_block *nb, case MEM_GOING_ONLINE: case MEM_GOING_OFFLINE: /* only ok if no hca is attached to the lpar */ - spin_lock(&shca_list_lock); + spin_lock_irqsave(&shca_list_lock, flags); if (list_empty(&shca_list)) { - spin_unlock(&shca_list_lock); + spin_unlock_irqrestore(&shca_list_lock, flags); return NOTIFY_OK; } else { - spin_unlock(&shca_list_lock); + spin_unlock_irqrestore(&shca_list_lock, flags); if (printk_timed_ratelimit(&ehca_dmem_warn_time, 30 * 1000)) ehca_gen_err("DMEM operations are not allowed" diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index cadbf0c..f161cf1 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -1138,14 +1138,14 @@ static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, return -EFAULT; } - tail_idx = (qmap->tail + 1) % qmap->entries; + tail_idx = next_index(qmap->tail, qmap->entries); wqe_idx = q_ofs / ipz_queue->qe_size; /* check all processed wqes, whether a cqe is requested or not */ while (tail_idx != wqe_idx) { if (qmap->map[tail_idx].cqe_req) qmap->left_to_poll++; - tail_idx = (tail_idx + 1) % qmap->entries; + tail_idx = next_index(tail_idx, qmap->entries); } /* save index in queue, where we have to start flushing */ qmap->next_wqe_idx = wqe_idx; @@ -1195,14 +1195,14 @@ static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) } else { spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); my_qp->sq_map.left_to_poll = 0; - my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) % - my_qp->sq_map.entries; + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); my_qp->rq_map.left_to_poll = 0; - my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) % - my_qp->rq_map.entries; + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); } diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 00a648f..c711268 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -726,13 +726,13 @@ repoll: * set left_to_poll to 0 because in error state, we will not * get any additional CQEs */ - my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) % - my_qp->sq_map.entries; + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); my_qp->sq_map.left_to_poll = 0; ehca_add_to_err_list(my_qp, 1); - my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) % - my_qp->rq_map.entries; + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); my_qp->rq_map.left_to_poll = 0; if (HAS_RQ(my_qp)) ehca_add_to_err_list(my_qp, 0); @@ -860,9 +860,8 @@ static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, /* mark as reported and advance next_wqe pointer */ qmap_entry->reported = 1; - qmap->next_wqe_idx++; - if (qmap->next_wqe_idx == qmap->entries) - qmap->next_wqe_idx = 0; + qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, + qmap->entries); qmap_entry = &qmap->map[qmap->next_wqe_idx]; wc++; nr++; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 1830849..8415ecc 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -222,7 +222,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector } err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, - cq->db.dma, &cq->mcq, 0); + cq->db.dma, &cq->mcq, vector, 0); if (err) goto err_dbmap; @@ -325,15 +325,17 @@ static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq) static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) { - struct mlx4_cqe *cqe; + struct mlx4_cqe *cqe, *new_cqe; int i; i = cq->mcq.cons_index; cqe = get_cqe(cq, i & cq->ibcq.cqe); while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { - memcpy(get_cqe_from_buf(&cq->resize_buf->buf, - (i + 1) & cq->resize_buf->cqe), - get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); + new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, + (i + 1) & cq->resize_buf->cqe); + memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); + new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | + (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); cqe = get_cqe(cq, ++i & cq->ibcq.cqe); } ++cq->mcq.cons_index; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2e80f8f..dcefe1f 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -578,7 +578,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) ibdev->num_ports++; ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; - ibdev->ib_dev.num_comp_vectors = 1; + ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dma_device = &dev->pdev->dev; ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 1595dc7..13a5bb1 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -137,14 +137,18 @@ #ifdef CONFIG_INFINIBAND_NES_DEBUG #define nes_debug(level, fmt, args...) \ +do { \ if (level & nes_debug_level) \ - printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args) - -#define assert(expr) \ -if (!(expr)) { \ - printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \ - #expr, __FILE__, __func__, __LINE__); \ -} + printk(KERN_ERR PFX "%s[%u]: " fmt, __func__, __LINE__, ##args); \ +} while (0) + +#define assert(expr) \ +do { \ + if (!(expr)) { \ + printk(KERN_ERR PFX "Assertion failed! %s, %s, %s, line %d\n", \ + #expr, __FILE__, __func__, __LINE__); \ + } \ +} while (0) #define NES_EVENT_TIMEOUT 1200000 #else diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 2caf9da..cb48041 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -86,15 +86,14 @@ static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *); static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *); -static void mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, +static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); static int mini_cm_dealloc_core(struct nes_cm_core *); static int mini_cm_get(struct nes_cm_core *); static int mini_cm_set(struct nes_cm_core *, u32, u32); -static struct sk_buff *form_cm_frame(struct sk_buff *, struct nes_cm_node *, +static void form_cm_frame(struct sk_buff *, struct nes_cm_node *, void *, u32, void *, u32, u8); -static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node); static int add_ref_cm_node(struct nes_cm_node *); static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); @@ -251,7 +250,7 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len) * form_cm_frame - get a free packet and build empty frame Use * node info to build. */ -static struct sk_buff *form_cm_frame(struct sk_buff *skb, +static void form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node, void *options, u32 optionsize, void *data, u32 datasize, u8 flags) { @@ -339,7 +338,6 @@ static struct sk_buff *form_cm_frame(struct sk_buff *skb, skb_shinfo(skb)->nr_frags = 0; cm_packets_created++; - return skb; } @@ -356,7 +354,6 @@ static void print_core(struct nes_cm_core *core) nes_debug(NES_DBG_CM, "State : %u \n", core->state); - nes_debug(NES_DBG_CM, "Tx Free cnt : %u \n", skb_queue_len(&core->tx_free_list)); nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt)); nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt)); @@ -381,8 +378,6 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, int ret = 0; u32 was_timer_set; - if (!cm_node) - return -EINVAL; new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); if (!new_send) return -1; @@ -459,13 +454,23 @@ static void nes_cm_timer_tick(unsigned long pass) int ret = NETDEV_TX_OK; enum nes_cm_node_state last_state; + struct list_head timer_list; + INIT_LIST_HEAD(&timer_list); spin_lock_irqsave(&cm_core->ht_lock, flags); list_for_each_safe(list_node, list_core_temp, - &cm_core->connected_nodes) { + &cm_core->connected_nodes) { cm_node = container_of(list_node, struct nes_cm_node, list); - add_ref_cm_node(cm_node); - spin_unlock_irqrestore(&cm_core->ht_lock, flags); + if (!list_empty(&cm_node->recv_list) || (cm_node->send_entry)) { + add_ref_cm_node(cm_node); + list_add(&cm_node->timer_entry, &timer_list); + } + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + list_for_each_safe(list_node, list_core_temp, &timer_list) { + cm_node = container_of(list_node, struct nes_cm_node, + timer_entry); spin_lock_irqsave(&cm_node->recv_list_lock, flags); list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { @@ -519,7 +524,7 @@ static void nes_cm_timer_tick(unsigned long pass) do { send_entry = cm_node->send_entry; if (!send_entry) - continue; + break; if (time_after(send_entry->timetosend, jiffies)) { if (cm_node->state != NES_CM_STATE_TSA) { if ((nexttimeout > @@ -528,18 +533,18 @@ static void nes_cm_timer_tick(unsigned long pass) nexttimeout = send_entry->timetosend; settimer = 1; - continue; + break; } } else { free_retrans_entry(cm_node); - continue; + break; } } if ((cm_node->state == NES_CM_STATE_TSA) || (cm_node->state == NES_CM_STATE_CLOSED)) { free_retrans_entry(cm_node); - continue; + break; } if (!send_entry->retranscount || @@ -557,7 +562,7 @@ static void nes_cm_timer_tick(unsigned long pass) NES_CM_EVENT_ABORTED); spin_lock_irqsave(&cm_node->retrans_list_lock, flags); - continue; + break; } atomic_inc(&send_entry->skb->users); cm_packets_retrans++; @@ -583,7 +588,7 @@ static void nes_cm_timer_tick(unsigned long pass) send_entry->retrycount--; nexttimeout = jiffies + NES_SHORT_TIME; settimer = 1; - continue; + break; } else { cm_packets_sent++; } @@ -615,14 +620,12 @@ static void nes_cm_timer_tick(unsigned long pass) spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); rem_ref_cm_node(cm_node->cm_core, cm_node); - spin_lock_irqsave(&cm_core->ht_lock, flags); if (ret != NETDEV_TX_OK) { nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n", cm_node); break; } } - spin_unlock_irqrestore(&cm_core->ht_lock, flags); if (settimer) { if (!timer_pending(&cm_core->tcp_timer)) { @@ -683,7 +686,7 @@ static int send_syn(struct nes_cm_node *cm_node, u32 sendack, optionssize += 1; if (!skb) - skb = get_free_pkt(cm_node); + skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); return -1; @@ -708,7 +711,7 @@ static int send_reset(struct nes_cm_node *cm_node, struct sk_buff *skb) int flags = SET_RST | SET_ACK; if (!skb) - skb = get_free_pkt(cm_node); + skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); return -1; @@ -729,7 +732,7 @@ static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb) int ret; if (!skb) - skb = get_free_pkt(cm_node); + skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); @@ -752,7 +755,7 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb) /* if we didn't get a frame get one */ if (!skb) - skb = get_free_pkt(cm_node); + skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); @@ -767,59 +770,15 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb) /** - * get_free_pkt - */ -static struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node) -{ - struct sk_buff *skb, *new_skb; - - /* check to see if we need to repopulate the free tx pkt queue */ - if (skb_queue_len(&cm_node->cm_core->tx_free_list) < NES_CM_FREE_PKT_LO_WATERMARK) { - while (skb_queue_len(&cm_node->cm_core->tx_free_list) < - cm_node->cm_core->free_tx_pkt_max) { - /* replace the frame we took, we won't get it back */ - new_skb = dev_alloc_skb(cm_node->cm_core->mtu); - BUG_ON(!new_skb); - /* add a replacement frame to the free tx list head */ - skb_queue_head(&cm_node->cm_core->tx_free_list, new_skb); - } - } - - skb = skb_dequeue(&cm_node->cm_core->tx_free_list); - - return skb; -} - - -/** - * make_hashkey - generate hash key from node tuple - */ -static inline int make_hashkey(u16 loc_port, nes_addr_t loc_addr, u16 rem_port, - nes_addr_t rem_addr) -{ - u32 hashkey = 0; - - hashkey = loc_addr + rem_addr + loc_port + rem_port; - hashkey = (hashkey % NES_CM_HASHTABLE_SIZE); - - return hashkey; -} - - -/** * find_node - find a cm node that matches the reference cm node */ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr) { unsigned long flags; - u32 hashkey; struct list_head *hte; struct nes_cm_node *cm_node; - /* make a hash index key for this packet */ - hashkey = make_hashkey(loc_port, loc_addr, rem_port, rem_addr); - /* get a handle on the hte */ hte = &cm_core->connected_nodes; @@ -887,7 +846,6 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) { unsigned long flags; - u32 hashkey; struct list_head *hte; if (!cm_node || !cm_core) @@ -896,11 +854,6 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n", cm_node); - /* first, make an index into our hash table */ - hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr, - cm_node->rem_port, cm_node->rem_addr); - cm_node->hashkey = hashkey; - spin_lock_irqsave(&cm_core->ht_lock, flags); /* get a handle on the hash table element (list head for this slot) */ @@ -925,28 +878,36 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, struct list_head *list_pos = NULL; struct list_head *list_temp = NULL; struct nes_cm_node *cm_node = NULL; + struct list_head reset_list; nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, " "refcnt=%d\n", listener, free_hanging_nodes, atomic_read(&listener->ref_count)); /* free non-accelerated child nodes for this listener */ + INIT_LIST_HEAD(&reset_list); if (free_hanging_nodes) { spin_lock_irqsave(&cm_core->ht_lock, flags); list_for_each_safe(list_pos, list_temp, - &g_cm_core->connected_nodes) { + &g_cm_core->connected_nodes) { cm_node = container_of(list_pos, struct nes_cm_node, list); if ((cm_node->listener == listener) && - (!cm_node->accelerated)) { - cleanup_retrans_entry(cm_node); - spin_unlock_irqrestore(&cm_core->ht_lock, - flags); - send_reset(cm_node, NULL); - spin_lock_irqsave(&cm_core->ht_lock, flags); + (!cm_node->accelerated)) { + add_ref_cm_node(cm_node); + list_add(&cm_node->reset_entry, &reset_list); } } spin_unlock_irqrestore(&cm_core->ht_lock, flags); } + + list_for_each_safe(list_pos, list_temp, &reset_list) { + cm_node = container_of(list_pos, struct nes_cm_node, + reset_entry); + cleanup_retrans_entry(cm_node); + send_reset(cm_node, NULL); + rem_ref_cm_node(cm_node->cm_core, cm_node); + } + spin_lock_irqsave(&cm_core->listen_list_lock, flags); if (!atomic_dec_return(&listener->ref_count)) { list_del(&listener->list); @@ -1126,7 +1087,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->loopbackpartner = NULL; /* get the mac addr for the remote node */ - arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); + if (ipv4_is_loopback(htonl(cm_node->rem_addr))) + arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE); + else + arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); if (arpindex < 0) { arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr); if (arpindex < 0) { @@ -1306,7 +1270,6 @@ static void drop_packet(struct sk_buff *skb) static void handle_fin_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, struct tcphdr *tcph) { - atomic_inc(&cm_resets_recvd); nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. " "refcnt=%d\n", cm_node, cm_node->state, atomic_read(&cm_node->ref_count)); @@ -1344,6 +1307,7 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, { int reset = 0; /* whether to send reset in case of err.. */ + int passive_state; atomic_inc(&cm_resets_recvd); nes_debug(NES_DBG_CM, "Received Reset, cm_node = %p, state = %u." " refcnt=%d\n", cm_node, cm_node->state, @@ -1357,7 +1321,14 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, cm_node->listener, cm_node->state); active_open_err(cm_node, skb, reset); break; - /* For PASSIVE open states, remove the cm_node event */ + case NES_CM_STATE_MPAREQ_RCVD: + passive_state = atomic_add_return(1, &cm_node->passive_state); + if (passive_state == NES_SEND_RESET_EVENT) + create_event(cm_node, NES_CM_EVENT_RESET); + cleanup_retrans_entry(cm_node); + cm_node->state = NES_CM_STATE_CLOSED; + dev_kfree_skb_any(skb); + break; case NES_CM_STATE_ESTABLISHED: case NES_CM_STATE_SYN_RCVD: case NES_CM_STATE_LISTENING: @@ -1365,7 +1336,14 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, passive_open_err(cm_node, skb, reset); break; case NES_CM_STATE_TSA: + active_open_err(cm_node, skb, reset); + break; + case NES_CM_STATE_CLOSED: + cleanup_retrans_entry(cm_node); + drop_packet(skb); + break; default: + drop_packet(skb); break; } } @@ -1394,6 +1372,9 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb, dev_kfree_skb_any(skb); if (type == NES_CM_EVENT_CONNECTED) cm_node->state = NES_CM_STATE_TSA; + else + atomic_set(&cm_node->passive_state, + NES_PASSIVE_STATE_INDICATED); create_event(cm_node, type); } @@ -1474,7 +1455,7 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, int optionsize; optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); - skb_pull(skb, tcph->doff << 2); + skb_trim(skb, 0); inc_sequence = ntohl(tcph->seq); switch (cm_node->state) { @@ -1507,6 +1488,10 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, cm_node->state = NES_CM_STATE_SYN_RCVD; send_syn(cm_node, 1, skb); break; + case NES_CM_STATE_CLOSED: + cleanup_retrans_entry(cm_node); + send_reset(cm_node, skb); + break; case NES_CM_STATE_TSA: case NES_CM_STATE_ESTABLISHED: case NES_CM_STATE_FIN_WAIT1: @@ -1515,7 +1500,6 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, case NES_CM_STATE_LAST_ACK: case NES_CM_STATE_CLOSING: case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_CLOSED: default: drop_packet(skb); break; @@ -1531,7 +1515,7 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, int optionsize; optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); - skb_pull(skb, tcph->doff << 2); + skb_trim(skb, 0); inc_sequence = ntohl(tcph->seq); switch (cm_node->state) { case NES_CM_STATE_SYN_SENT: @@ -1555,6 +1539,12 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, /* passive open, so should not be here */ passive_open_err(cm_node, skb, 1); break; + case NES_CM_STATE_LISTENING: + case NES_CM_STATE_CLOSED: + cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); + cleanup_retrans_entry(cm_node); + send_reset(cm_node, skb); + break; case NES_CM_STATE_ESTABLISHED: case NES_CM_STATE_FIN_WAIT1: case NES_CM_STATE_FIN_WAIT2: @@ -1562,7 +1552,6 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, case NES_CM_STATE_TSA: case NES_CM_STATE_CLOSING: case NES_CM_STATE_UNKNOWN: - case NES_CM_STATE_CLOSED: case NES_CM_STATE_MPAREQ_SENT: default: drop_packet(skb); @@ -1577,6 +1566,13 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, u32 inc_sequence; u32 rem_seq_ack; u32 rem_seq; + int ret; + int optionsize; + u32 temp_seq = cm_node->tcp_cntxt.loc_seq_num; + + optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); + cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq); + if (check_seq(cm_node, tcph, skb)) return; @@ -1589,7 +1585,18 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, switch (cm_node->state) { case NES_CM_STATE_SYN_RCVD: /* Passive OPEN */ + ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1); + if (ret) + break; cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); + cm_node->tcp_cntxt.loc_seq_num = temp_seq; + if (cm_node->tcp_cntxt.rem_ack_num != + cm_node->tcp_cntxt.loc_seq_num) { + nes_debug(NES_DBG_CM, "rem_ack_num != loc_seq_num\n"); + cleanup_retrans_entry(cm_node); + send_reset(cm_node, skb); + return; + } cm_node->state = NES_CM_STATE_ESTABLISHED; if (datasize) { cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; @@ -1621,11 +1628,15 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, dev_kfree_skb_any(skb); } break; + case NES_CM_STATE_LISTENING: + case NES_CM_STATE_CLOSED: + cleanup_retrans_entry(cm_node); + send_reset(cm_node, skb); + break; case NES_CM_STATE_FIN_WAIT1: case NES_CM_STATE_SYN_SENT: case NES_CM_STATE_FIN_WAIT2: case NES_CM_STATE_TSA: - case NES_CM_STATE_CLOSED: case NES_CM_STATE_MPAREQ_RCVD: case NES_CM_STATE_LAST_ACK: case NES_CM_STATE_CLOSING: @@ -1648,9 +1659,9 @@ static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", __func__, cm_node); if (passive) - passive_open_err(cm_node, skb, 0); + passive_open_err(cm_node, skb, 1); else - active_open_err(cm_node, skb, 0); + active_open_err(cm_node, skb, 1); return 1; } } @@ -1970,6 +1981,7 @@ static int mini_cm_reject(struct nes_cm_core *cm_core, struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node) { int ret = 0; + int passive_state; nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n", __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state); @@ -1977,9 +1989,13 @@ static int mini_cm_reject(struct nes_cm_core *cm_core, if (cm_node->tcp_cntxt.client) return ret; cleanup_retrans_entry(cm_node); - cm_node->state = NES_CM_STATE_CLOSED; - ret = send_reset(cm_node, NULL); + passive_state = atomic_add_return(1, &cm_node->passive_state); + cm_node->state = NES_CM_STATE_CLOSED; + if (passive_state == NES_SEND_RESET_EVENT) + rem_ref_cm_node(cm_core, cm_node); + else + ret = send_reset(cm_node, NULL); return ret; } @@ -2037,7 +2053,7 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod * recv_pkt - recv an ETHERNET packet, and process it through CM * node state machine */ -static void mini_cm_recv_pkt(struct nes_cm_core *cm_core, +static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic, struct sk_buff *skb) { struct nes_cm_node *cm_node = NULL; @@ -2045,23 +2061,16 @@ static void mini_cm_recv_pkt(struct nes_cm_core *cm_core, struct iphdr *iph; struct tcphdr *tcph; struct nes_cm_info nfo; + int skb_handled = 1; if (!skb) - return; + return 0; if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) { - dev_kfree_skb_any(skb); - return; + return 0; } iph = (struct iphdr *)skb->data; tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr)); - skb_reset_network_header(skb); - skb_set_transport_header(skb, sizeof(*tcph)); - if (!tcph) { - dev_kfree_skb_any(skb); - return; - } - skb->len = ntohs(iph->tot_len); nfo.loc_addr = ntohl(iph->daddr); nfo.loc_port = ntohs(tcph->dest); @@ -2082,23 +2091,21 @@ static void mini_cm_recv_pkt(struct nes_cm_core *cm_core, /* Only type of packet accepted are for */ /* the PASSIVE open (syn only) */ if ((!tcph->syn) || (tcph->ack)) { - cm_packets_dropped++; + skb_handled = 0; break; } listener = find_listener(cm_core, nfo.loc_addr, nfo.loc_port, NES_CM_LISTENER_ACTIVE_STATE); - if (listener) { - nfo.cm_id = listener->cm_id; - nfo.conn_type = listener->conn_type; - } else { - nes_debug(NES_DBG_CM, "Unable to find listener " - "for the pkt\n"); - cm_packets_dropped++; - dev_kfree_skb_any(skb); + if (!listener) { + nfo.cm_id = NULL; + nfo.conn_type = 0; + nes_debug(NES_DBG_CM, "Unable to find listener for the pkt\n"); + skb_handled = 0; break; } - + nfo.cm_id = listener->cm_id; + nfo.conn_type = listener->conn_type; cm_node = make_cm_node(cm_core, nesvnic, &nfo, listener); if (!cm_node) { @@ -2124,9 +2131,13 @@ static void mini_cm_recv_pkt(struct nes_cm_core *cm_core, dev_kfree_skb_any(skb); break; } + skb_reset_network_header(skb); + skb_set_transport_header(skb, sizeof(*tcph)); + skb->len = ntohs(iph->tot_len); process_packet(cm_node, skb, cm_core); rem_ref_cm_node(cm_core, cm_node); } while (0); + return skb_handled; } @@ -2135,10 +2146,7 @@ static void mini_cm_recv_pkt(struct nes_cm_core *cm_core, */ static struct nes_cm_core *nes_cm_alloc_core(void) { - int i; - struct nes_cm_core *cm_core; - struct sk_buff *skb = NULL; /* setup the CM core */ /* alloc top level core control structure */ @@ -2156,19 +2164,6 @@ static struct nes_cm_core *nes_cm_alloc_core(void) atomic_set(&cm_core->events_posted, 0); - /* init the packet lists */ - skb_queue_head_init(&cm_core->tx_free_list); - - for (i = 0; i < NES_CM_DEFAULT_FRAME_CNT; i++) { - skb = dev_alloc_skb(cm_core->mtu); - if (!skb) { - kfree(cm_core); - return NULL; - } - /* add 'raw' skb to free frame list */ - skb_queue_head(&cm_core->tx_free_list, skb); - } - cm_core->api = &nes_cm_api; spin_lock_init(&cm_core->ht_lock); @@ -2397,7 +2392,6 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) atomic_inc(&cm_disconnects); cm_event.event = IW_CM_EVENT_DISCONNECT; if (last_ae == NES_AEQE_AEID_LLP_CONNECTION_RESET) { - issued_disconnect_reset = 1; cm_event.status = IW_CM_EVENT_STATUS_RESET; nes_debug(NES_DBG_CM, "Generating a CM " "Disconnect Event (status reset) for " @@ -2547,6 +2541,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct nes_v4_quad nes_quad; u32 crc_value; int ret; + int passive_state; ibqp = nes_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) @@ -2714,8 +2709,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) conn_param->private_data_len + sizeof(struct ietf_mpa_frame)); - attr.qp_state = IB_QPS_RTS; - nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); /* notify OF layer that accept event was successfull */ cm_id->add_ref(cm_id); @@ -2728,6 +2721,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_event.private_data = NULL; cm_event.private_data_len = 0; ret = cm_id->event_handler(cm_id, &cm_event); + attr.qp_state = IB_QPS_RTS; + nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); if (cm_node->loopbackpartner) { cm_node->loopbackpartner->mpa_frame_size = nesqp->private_data_len; @@ -2740,6 +2735,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " "ret=%d\n", __func__, __LINE__, ret); + passive_state = atomic_add_return(1, &cm_node->passive_state); + if (passive_state == NES_SEND_RESET_EVENT) + create_event(cm_node, NES_CM_EVENT_RESET); return 0; } @@ -2943,15 +2941,16 @@ int nes_destroy_listen(struct iw_cm_id *cm_id) */ int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice) { + int rc = 0; cm_packets_received++; if ((g_cm_core) && (g_cm_core->api)) { - g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb); + rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb); } else { nes_debug(NES_DBG_CM, "Unable to process packet for CM," " cm is not setup properly.\n"); } - return 0; + return rc; } @@ -3222,6 +3221,18 @@ static void cm_event_reset(struct nes_cm_event *event) cm_event.private_data_len = 0; ret = cm_id->event_handler(cm_id, &cm_event); + cm_id->add_ref(cm_id); + atomic_inc(&cm_closes); + cm_event.event = IW_CM_EVENT_CLOSE; + cm_event.status = IW_CM_EVENT_STATUS_OK; + cm_event.provider_data = cm_id->provider_data; + cm_event.local_addr = cm_id->local_addr; + cm_event.remote_addr = cm_id->remote_addr; + cm_event.private_data = NULL; + cm_event.private_data_len = 0; + nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node); + ret = cm_id->event_handler(cm_id, &cm_event); + nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 367b3d2..fafa350 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -76,6 +76,10 @@ enum nes_timer_type { NES_TIMER_TYPE_CLOSE, }; +#define NES_PASSIVE_STATE_INDICATED 0 +#define NES_DO_NOT_SEND_RESET_EVENT 1 +#define NES_SEND_RESET_EVENT 2 + #define MAX_NES_IFS 4 #define SET_ACK 1 @@ -161,6 +165,8 @@ struct nes_timer_entry { #define NES_CM_DEF_SEQ2 0x18ed5740 #define NES_CM_DEF_LOCAL_ID2 0xb807 +#define MAX_CM_BUFFER 512 + typedef u32 nes_addr_t; @@ -254,8 +260,6 @@ struct nes_cm_listener { /* per connection node and node state information */ struct nes_cm_node { - u32 hashkey; - nes_addr_t loc_addr, rem_addr; u16 loc_port, rem_port; @@ -292,7 +296,10 @@ struct nes_cm_node { int apbvt_set; int accept_pend; int freed; + struct list_head timer_entry; + struct list_head reset_entry; struct nes_qp *nesqp; + atomic_t passive_state; }; /* structure for client or CM to fill when making CM api calls. */ @@ -350,7 +357,6 @@ struct nes_cm_core { u32 mtu; u32 free_tx_pkt_max; u32 rx_pkt_posted; - struct sk_buff_head tx_free_list; atomic_t ht_node_cnt; struct list_head connected_nodes; /* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */ @@ -390,7 +396,7 @@ struct nes_cm_ops { struct nes_cm_node *); int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *, struct nes_cm_node *); - void (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, + int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); int (*destroy_cm_core)(struct nes_cm_core *); int (*get)(struct nes_cm_core *); diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 7c49cc8..8f70ff2 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -2700,27 +2700,33 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq) pkt_type, (pkt_type & NES_PKT_TYPE_APBVT_MASK)); */ if ((pkt_type & NES_PKT_TYPE_APBVT_MASK) == NES_PKT_TYPE_APBVT_BITS) { - nes_cm_recv(rx_skb, nesvnic->netdev); + if (nes_cm_recv(rx_skb, nesvnic->netdev)) + rx_skb = NULL; + } + if (rx_skb == NULL) + goto skip_rx_indicate0; + + + if ((cqe_misc & NES_NIC_CQE_TAG_VALID) && + (nesvnic->vlan_grp != NULL)) { + vlan_tag = (u16)(le32_to_cpu( + cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX]) + >> 16); + nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n", + nesvnic->netdev->name, vlan_tag); + if (nes_use_lro) + lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb, + nesvnic->vlan_grp, vlan_tag, NULL); + else + nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag); } else { - if ((cqe_misc & NES_NIC_CQE_TAG_VALID) && (nesvnic->vlan_grp != NULL)) { - vlan_tag = (u16)(le32_to_cpu( - cq->cq_vbase[head].cqe_words[NES_NIC_CQE_TAG_PKT_TYPE_IDX]) - >> 16); - nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n", - nesvnic->netdev->name, vlan_tag); - if (nes_use_lro) - lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb, - nesvnic->vlan_grp, vlan_tag, NULL); - else - nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag); - } else { - if (nes_use_lro) - lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); - else - nes_netif_rx(rx_skb); - } + if (nes_use_lro) + lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL); + else + nes_netif_rx(rx_skb); } +skip_rx_indicate0: nesvnic->netdev->last_rx = jiffies; /* nesvnic->netstats.rx_packets++; */ /* nesvnic->netstats.rx_bytes += rx_pkt_size; */ diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index fb8cbd7..5611a73 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -540,11 +540,14 @@ struct nes_cqp_request *nes_get_cqp_request(struct nes_device *nesdev) if (!list_empty(&nesdev->cqp_avail_reqs)) { spin_lock_irqsave(&nesdev->cqp.lock, flags); - cqp_request = list_entry(nesdev->cqp_avail_reqs.next, + if (!list_empty(&nesdev->cqp_avail_reqs)) { + cqp_request = list_entry(nesdev->cqp_avail_reqs.next, struct nes_cqp_request, list); - list_del_init(&cqp_request->list); + list_del_init(&cqp_request->list); + } spin_unlock_irqrestore(&nesdev->cqp.lock, flags); - } else { + } + if (cqp_request == NULL) { cqp_request = kzalloc(sizeof(struct nes_cqp_request), GFP_KERNEL); if (cqp_request) { cqp_request->dynamic = 1; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index d36c9a0..4fdb724 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1695,13 +1695,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, /* use 4k pbl */ nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 4k PBL\n", pbl_entries); if (nesadapter->free_4kpbl == 0) { - if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kfree(cqp_request); - } else { - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_free_cqp_request(nesdev, cqp_request); if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); @@ -1717,13 +1712,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries, /* use 256 byte pbl */ nes_debug(NES_DBG_CQ, "pbl_entries=%u, use a 256 byte PBL\n", pbl_entries); if (nesadapter->free_256pbl == 0) { - if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kfree(cqp_request); - } else { - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_free_cqp_request(nesdev, cqp_request); if (!context) pci_free_consistent(nesdev->pcidev, nescq->cq_mem_size, mem, nescq->hw_cq.cq_pbase); @@ -1928,13 +1918,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, /* Two level PBL */ if ((pbl_count+1) > nesadapter->free_4kpbl) { nes_debug(NES_DBG_MR, "Out of 4KB Pbls for two level request.\n"); - if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kfree(cqp_request); - } else { - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_free_cqp_request(nesdev, cqp_request); return -ENOMEM; } else { nesadapter->free_4kpbl -= pbl_count+1; @@ -1942,13 +1927,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, } else if (residual_page_count > 32) { if (pbl_count > nesadapter->free_4kpbl) { nes_debug(NES_DBG_MR, "Out of 4KB Pbls.\n"); - if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kfree(cqp_request); - } else { - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_free_cqp_request(nesdev, cqp_request); return -ENOMEM; } else { nesadapter->free_4kpbl -= pbl_count; @@ -1956,13 +1936,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd, } else { if (pbl_count > nesadapter->free_256pbl) { nes_debug(NES_DBG_MR, "Out of 256B Pbls.\n"); - if (cqp_request->dynamic) { - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - kfree(cqp_request); - } else { - list_add_tail(&cqp_request->list, &nesdev->cqp_avail_reqs); - spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); - } + spin_unlock_irqrestore(&nesadapter->pbl_lock, flags); + nes_free_cqp_request(nesdev, cqp_request); return -ENOMEM; } else { nesadapter->free_256pbl -= pbl_count; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 81a8262..8611195 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -252,6 +252,9 @@ struct iser_conn { wait_queue_head_t wait; /* waitq for conn/disconn */ atomic_t post_recv_buf_count; /* posted rx count */ atomic_t post_send_buf_count; /* posted tx count */ + atomic_t unexpected_pdu_count;/* count of received * + * unexpected pdus * + * not yet retired */ char name[ISER_OBJECT_NAME_SIZE]; struct iser_page_vec *page_vec; /* represents SG to fmr maps* * maps serialized as tx is*/ diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index cdd2831..ed1aff2 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -183,14 +183,8 @@ static int iser_post_receive_control(struct iscsi_conn *conn) struct iser_regd_buf *regd_data; struct iser_dto *recv_dto = NULL; struct iser_device *device = iser_conn->ib_conn->device; - int rx_data_size, err = 0; - - rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); - if (rx_desc == NULL) { - iser_err("Failed to alloc desc for post recv\n"); - return -ENOMEM; - } - rx_desc->type = ISCSI_RX; + int rx_data_size, err; + int posts, outstanding_unexp_pdus; /* for the login sequence we must support rx of upto 8K; login is done * after conn create/bind (connect) and conn stop/bind (reconnect), @@ -201,46 +195,80 @@ static int iser_post_receive_control(struct iscsi_conn *conn) else /* FIXME till user space sets conn->max_recv_dlength correctly */ rx_data_size = 128; - rx_desc->data = kmalloc(rx_data_size, GFP_NOIO); - if (rx_desc->data == NULL) { - iser_err("Failed to alloc data buf for post recv\n"); - err = -ENOMEM; - goto post_rx_kmalloc_failure; - } + outstanding_unexp_pdus = + atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0); - recv_dto = &rx_desc->dto; - recv_dto->ib_conn = iser_conn->ib_conn; - recv_dto->regd_vector_len = 0; + /* + * in addition to the response buffer, replace those consumed by + * unexpected pdus. + */ + for (posts = 0; posts < 1 + outstanding_unexp_pdus; posts++) { + rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); + if (rx_desc == NULL) { + iser_err("Failed to alloc desc for post recv %d\n", + posts); + err = -ENOMEM; + goto post_rx_cache_alloc_failure; + } + rx_desc->type = ISCSI_RX; + rx_desc->data = kmalloc(rx_data_size, GFP_NOIO); + if (rx_desc->data == NULL) { + iser_err("Failed to alloc data buf for post recv %d\n", + posts); + err = -ENOMEM; + goto post_rx_kmalloc_failure; + } - regd_hdr = &rx_desc->hdr_regd_buf; - memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); - regd_hdr->device = device; - regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */ - regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; + recv_dto = &rx_desc->dto; + recv_dto->ib_conn = iser_conn->ib_conn; + recv_dto->regd_vector_len = 0; - iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); + regd_hdr = &rx_desc->hdr_regd_buf; + memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); + regd_hdr->device = device; + regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */ + regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; - iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0); + iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); - regd_data = &rx_desc->data_regd_buf; - memset(regd_data, 0, sizeof(struct iser_regd_buf)); - regd_data->device = device; - regd_data->virt_addr = rx_desc->data; - regd_data->data_size = rx_data_size; + iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0); - iser_reg_single(device, regd_data, DMA_FROM_DEVICE); + regd_data = &rx_desc->data_regd_buf; + memset(regd_data, 0, sizeof(struct iser_regd_buf)); + regd_data->device = device; + regd_data->virt_addr = rx_desc->data; + regd_data->data_size = rx_data_size; - iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0); + iser_reg_single(device, regd_data, DMA_FROM_DEVICE); - err = iser_post_recv(rx_desc); - if (!err) - return 0; + iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0); - /* iser_post_recv failed */ + err = iser_post_recv(rx_desc); + if (err) { + iser_err("Failed iser_post_recv for post %d\n", posts); + goto post_rx_post_recv_failure; + } + } + /* all posts successful */ + return 0; + +post_rx_post_recv_failure: iser_dto_buffs_release(recv_dto); kfree(rx_desc->data); post_rx_kmalloc_failure: kmem_cache_free(ig.desc_cache, rx_desc); +post_rx_cache_alloc_failure: + if (posts > 0) { + /* + * response buffer posted, but did not replace all unexpected + * pdu recv bufs. Ignore error, retry occurs next send + */ + outstanding_unexp_pdus -= (posts - 1); + err = 0; + } + atomic_add(outstanding_unexp_pdus, + &iser_conn->ib_conn->unexpected_pdu_count); + return err; } @@ -274,8 +302,10 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) struct iscsi_iser_conn *iser_conn = conn->dd_data; int i; - /* no need to keep it in a var, we are after login so if this should - * be negotiated, by now the result should be available here */ + /* + * FIXME this value should be declared to the target during login with + * the MaxOutstandingUnexpectedPDUs key when supported + */ int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS; iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num); @@ -478,6 +508,7 @@ int iser_send_control(struct iscsi_conn *conn, int err = 0; struct iser_regd_buf *regd_buf; struct iser_device *device; + unsigned char opcode; if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); @@ -512,10 +543,15 @@ int iser_send_control(struct iscsi_conn *conn, data_seg_len); } - if (iser_post_receive_control(conn) != 0) { - iser_err("post_rcv_buff failed!\n"); - err = -ENOMEM; - goto send_control_error; + opcode = task->hdr->opcode & ISCSI_OPCODE_MASK; + + /* post recv buffer for response if one is expected */ + if (!(opcode == ISCSI_OP_NOOP_OUT && task->hdr->itt == RESERVED_ITT)) { + if (iser_post_receive_control(conn) != 0) { + iser_err("post_rcv_buff failed!\n"); + err = -ENOMEM; + goto send_control_error; + } } err = iser_post_send(mdesc); @@ -586,6 +622,20 @@ void iser_rcv_completion(struct iser_desc *rx_desc, * parallel to the execution of iser_conn_term. So the code that waits * * for the posted rx bufs refcount to become zero handles everything */ atomic_dec(&conn->ib_conn->post_recv_buf_count); + + /* + * if an unexpected PDU was received then the recv wr consumed must + * be replaced, this is done in the next send of a control-type PDU + */ + if (opcode == ISCSI_OP_NOOP_IN && hdr->itt == RESERVED_ITT) { + /* nop-in with itt = 0xffffffff */ + atomic_inc(&conn->ib_conn->unexpected_pdu_count); + } + else if (opcode == ISCSI_OP_ASYNC_EVENT) { + /* asyncronous message */ + atomic_inc(&conn->ib_conn->unexpected_pdu_count); + } + /* a reject PDU consumes the recv buf posted for the response */ } void iser_snd_completion(struct iser_desc *tx_desc) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 26ff621..6dc6b17 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -498,6 +498,7 @@ void iser_conn_init(struct iser_conn *ib_conn) init_waitqueue_head(&ib_conn->wait); atomic_set(&ib_conn->post_recv_buf_count, 0); atomic_set(&ib_conn->post_send_buf_count, 0); + atomic_set(&ib_conn->unexpected_pdu_count, 0); atomic_set(&ib_conn->refcount, 1); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); |