diff options
28 files changed, 957 insertions, 355 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 922d35f..3cab0ce 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3748,6 +3748,7 @@ error1: cm_remove_port_fs(port); } device_unregister(cm_dev->device); + kfree(cm_dev); } static void cm_remove_one(struct ib_device *ib_device) @@ -3776,6 +3777,7 @@ static void cm_remove_one(struct ib_device *ib_device) cm_remove_port_fs(port); } device_unregister(cm_dev->device); + kfree(cm_dev); } static int __init ib_cm_init(void) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 1adf2ef..49c45fe 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1697,9 +1697,8 @@ static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv, u8 port_num = mad_agent_priv->agent.port_num; u8 lmc; - send_resp = ((struct ib_mad *)(wr->send_buf.mad))-> - mad_hdr.method & IB_MGMT_METHOD_RESP; - rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP; + send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad); + rcv_resp = ib_response_mad(rwc->recv_buf.mad); if (send_resp == rcv_resp) /* both requests, or both responses. GIDs different */ diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 2acf9b6..69580e2 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -272,7 +272,6 @@ static struct ib_qp *c2_create_qp(struct ib_pd *pd, pr_debug("%s: Invalid QP type: %d\n", __func__, init_attr->qp_type); return ERR_PTR(-EINVAL); - break; } if (err) { diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index eb778bfd..ecff980 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1155,13 +1155,11 @@ static int iwch_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { PDBG("%s ibdev %p\n", __func__, ibdev); + + memset(props, 0, sizeof(struct ib_port_attr)); props->max_mtu = IB_MTU_4096; - props->lid = 0; - props->lmc = 0; - props->sm_lid = 0; - props->sm_sl = 0; + props->active_mtu = IB_MTU_2048; props->state = IB_PORT_ACTIVE; - props->phys_state = 0; props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_SNMP_TUNNEL_SUP | @@ -1170,7 +1168,6 @@ static int iwch_query_port(struct ib_device *ibdev, IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; - props->qkey_viol_cntr = 0; props->active_width = 2; props->active_speed = 2; props->max_msg_sz = -1; diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 1ab919f..5d7b785 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -164,6 +164,13 @@ struct ehca_qmap_entry { u16 reported; }; +struct ehca_queue_map { + struct ehca_qmap_entry *map; + unsigned int entries; + unsigned int tail; + unsigned int left_to_poll; +}; + struct ehca_qp { union { struct ib_qp ib_qp; @@ -173,8 +180,9 @@ struct ehca_qp { enum ehca_ext_qp_type ext_type; enum ib_qp_state state; struct ipz_queue ipz_squeue; - struct ehca_qmap_entry *sq_map; + struct ehca_queue_map sq_map; struct ipz_queue ipz_rqueue; + struct ehca_queue_map rq_map; struct h_galpas galpas; u32 qkey; u32 real_qp_num; @@ -204,6 +212,8 @@ struct ehca_qp { atomic_t nr_events; /* events seen */ wait_queue_head_t wait_completion; int mig_armed; + struct list_head sq_err_node; + struct list_head rq_err_node; }; #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) @@ -233,6 +243,8 @@ struct ehca_cq { /* mmap counter for resources mapped into user space */ u32 mm_count_queue; u32 mm_count_galpa; + struct list_head sqp_err_list; + struct list_head rqp_err_list; }; enum ehca_mr_flag { diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 5540b27..33647a9 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -276,6 +276,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, for (i = 0; i < QP_HASHTAB_LEN; i++) INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + INIT_LIST_HEAD(&my_cq->sqp_err_list); + INIT_LIST_HEAD(&my_cq->rqp_err_list); + if (context) { struct ipz_queue *ipz_queue = &my_cq->ipz_queue; struct ehca_create_cq_resp resp; diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index a8a2ea5..8f7f282 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h @@ -197,6 +197,8 @@ void ehca_poll_eqs(unsigned long data); int ehca_calc_ipd(struct ehca_shca *shca, int port, enum ib_rate path_rate, u32 *ipd); +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); + #ifdef CONFIG_PPC_64K_PAGES void *ehca_alloc_fw_ctrlblock(gfp_t flags); void ehca_free_fw_ctrlblock(void *ptr); diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c index b6bcee0..4dbe287 100644 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ b/drivers/infiniband/hw/ehca/ehca_qp.c @@ -396,6 +396,50 @@ static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, queue->is_small = (queue->page_size != 0); } +/* needs to be called with cq->spinlock held */ +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) +{ + struct list_head *list, *node; + + /* TODO: support low latency QPs */ + if (qp->ext_type == EQPT_LLQP) + return; + + if (on_sq) { + list = &qp->send_cq->sqp_err_list; + node = &qp->sq_err_node; + } else { + list = &qp->recv_cq->rqp_err_list; + node = &qp->rq_err_node; + } + + if (list_empty(node)) + list_add_tail(node, list); + + return; +} + +static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) +{ + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + + if (!list_empty(node)) + list_del_init(node); + + spin_unlock_irqrestore(&cq->spinlock, flags); +} + +static void reset_queue_map(struct ehca_queue_map *qmap) +{ + int i; + + qmap->tail = 0; + for (i = 0; i < qmap->entries; i++) + qmap->map[i].reported = 1; +} + /* * Create an ib_qp struct that is either a QP or an SRQ, depending on * the value of the is_srq parameter. If init_attr and srq_init_attr share @@ -407,12 +451,11 @@ static struct ehca_qp *internal_create_qp( struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata, int is_srq) { - struct ehca_qp *my_qp; + struct ehca_qp *my_qp, *my_srq = NULL; struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, ib_device); struct ib_ucontext *context = NULL; - u32 nr_qes; u64 h_ret; int is_llqp = 0, has_srq = 0; int qp_type, max_send_sge, max_recv_sge, ret; @@ -457,8 +500,7 @@ static struct ehca_qp *internal_create_qp( /* handle SRQ base QPs */ if (init_attr->srq) { - struct ehca_qp *my_srq = - container_of(init_attr->srq, struct ehca_qp, ib_srq); + my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); has_srq = 1; parms.ext_type = EQPT_SRQBASE; @@ -716,15 +758,19 @@ static struct ehca_qp *internal_create_qp( "and pages ret=%i", ret); goto create_qp_exit2; } - nr_qes = my_qp->ipz_squeue.queue_length / + + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / my_qp->ipz_squeue.qe_size; - my_qp->sq_map = vmalloc(nr_qes * + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * sizeof(struct ehca_qmap_entry)); - if (!my_qp->sq_map) { + if (!my_qp->sq_map.map) { ehca_err(pd->device, "Couldn't allocate squeue " "map ret=%i", ret); goto create_qp_exit3; } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); } if (HAS_RQ(my_qp)) { @@ -736,6 +782,25 @@ static struct ehca_qp *internal_create_qp( "and pages ret=%i", ret); goto create_qp_exit4; } + + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); + } else if (init_attr->srq) { + /* this is a base QP, use the queue map of the SRQ */ + my_qp->rq_map = my_srq->rq_map; + INIT_LIST_HEAD(&my_qp->rq_err_node); + + my_qp->ipz_rqueue = my_srq->ipz_rqueue; } if (is_srq) { @@ -799,7 +864,7 @@ static struct ehca_qp *internal_create_qp( if (ret) { ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%i", ret); - goto create_qp_exit6; + goto create_qp_exit7; } } @@ -825,25 +890,29 @@ static struct ehca_qp *internal_create_qp( if (ib_copy_to_udata(udata, &resp, sizeof resp)) { ehca_err(pd->device, "Copy to udata failed"); ret = -EINVAL; - goto create_qp_exit7; + goto create_qp_exit8; } } return my_qp; -create_qp_exit7: +create_qp_exit8: ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); -create_qp_exit6: +create_qp_exit7: kfree(my_qp->mod_qp_parm); +create_qp_exit6: + if (HAS_RQ(my_qp)) + vfree(my_qp->rq_map.map); + create_qp_exit5: if (HAS_RQ(my_qp)) ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); create_qp_exit4: if (HAS_SQ(my_qp)) - vfree(my_qp->sq_map); + vfree(my_qp->sq_map.map); create_qp_exit3: if (HAS_SQ(my_qp)) @@ -1035,6 +1104,101 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, return 0; } +static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, + struct ehca_queue_map *qmap) +{ + void *wqe_v; + u64 q_ofs; + u32 wqe_idx; + + /* convert real to abs address */ + wqe_p = wqe_p & (~(1UL << 63)); + + wqe_v = abs_to_virt(wqe_p); + + if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { + ehca_gen_err("Invalid offset for calculating left cqes " + "wqe_p=%#lx wqe_v=%p\n", wqe_p, wqe_v); + return -EFAULT; + } + + wqe_idx = q_ofs / ipz_queue->qe_size; + if (wqe_idx < qmap->tail) + qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx; + else + qmap->left_to_poll = wqe_idx - qmap->tail; + + return 0; +} + +static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) +{ + u64 h_ret; + void *send_wqe_p, *recv_wqe_p; + int ret; + unsigned long flags; + int qp_num = my_qp->ib_qp.qp_num; + + /* this hcall is not supported on base QPs */ + if (my_qp->ext_type != EQPT_SRQBASE) { + /* get send and receive wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &send_wqe_p, &recv_wqe_p, 4); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "disable_and_get_wqe() " + "failed ehca_qp=%p qp_num=%x h_ret=%li", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + + /* + * acquire lock to ensure that nobody is polling the cq which + * could mean that the qmap->tail pointer is in an + * inconsistent state. + */ + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, + &my_qp->sq_map); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + if (ret) + return ret; + + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, + &my_qp->rq_map); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + if (ret) + return ret; + } else { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + my_qp->sq_map.left_to_poll = 0; + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + my_qp->rq_map.left_to_poll = 0; + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + } + + /* this assures flush cqes being generated only for pending wqes */ + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 1); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + if (HAS_RQ(my_qp)) { + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 0); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, + flags); + } + } + + return 0; +} + /* * internal_modify_qp with circumvention to handle aqp0 properly * smi_reset2init indicates if this is an internal reset-to-init-call for @@ -1539,10 +1703,27 @@ static int internal_modify_qp(struct ib_qp *ibqp, goto modify_qp_exit2; } } + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR)) { + ret = check_for_left_cqes(my_qp, shca); + if (ret) + goto modify_qp_exit2; + } if (statetrans == IB_QPST_ANY2RESET) { ipz_qeit_reset(&my_qp->ipz_rqueue); ipz_qeit_reset(&my_qp->ipz_squeue); + + if (qp_cur_state == IB_QPS_ERR) { + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + if (HAS_RQ(my_qp)) + del_from_err_list(my_qp->recv_cq, + &my_qp->rq_err_node); + } + reset_queue_map(&my_qp->sq_map); + + if (HAS_RQ(my_qp)) + reset_queue_map(&my_qp->rq_map); } if (attr_mask & IB_QP_QKEY) @@ -1958,6 +2139,16 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, idr_remove(&ehca_qp_idr, my_qp->token); write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + /* + * SRQs will never get into an error list and do not have a recv_cq, + * so we need to skip them here. + */ + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp)) + del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); + + if (HAS_SQ(my_qp)) + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + /* now wait until all pending events have completed */ wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); @@ -1983,7 +2174,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, if (qp_type == IB_QPT_GSI) { struct ib_event event; ehca_info(dev, "device %s: port %x is inactive.", - shca->ib_device.name, port_num); + shca->ib_device.name, port_num); event.device = &shca->ib_device; event.event = IB_EVENT_PORT_ERR; event.element.port_num = port_num; @@ -1991,11 +2182,15 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, ib_dispatch_event(&event); } - if (HAS_RQ(my_qp)) + if (HAS_RQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); + + vfree(my_qp->rq_map.map); + } if (HAS_SQ(my_qp)) { ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - vfree(my_qp->sq_map); + + vfree(my_qp->sq_map.map); } kmem_cache_free(qp_cache, my_qp); atomic_dec(&shca->num_qps); diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 4426d82..6492807 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -53,9 +53,25 @@ /* in RC traffic, insert an empty RDMA READ every this many packets */ #define ACK_CIRC_THRESHOLD 2000000 +static u64 replace_wr_id(u64 wr_id, u16 idx) +{ + u64 ret; + + ret = wr_id & ~QMAP_IDX_MASK; + ret |= idx & QMAP_IDX_MASK; + + return ret; +} + +static u16 get_app_wr_id(u64 wr_id) +{ + return wr_id & QMAP_IDX_MASK; +} + static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, struct ehca_wqe *wqe_p, - struct ib_recv_wr *recv_wr) + struct ib_recv_wr *recv_wr, + u32 rq_map_idx) { u8 cnt_ds; if (unlikely((recv_wr->num_sge < 0) || @@ -69,7 +85,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = recv_wr->wr_id; + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); wqe_p->nr_of_data_seg = recv_wr->num_sge; for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { @@ -146,6 +162,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, u64 dma_length; struct ehca_av *my_av; u32 remote_qkey = send_wr->wr.ud.remote_qkey; + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; if (unlikely((send_wr->num_sge < 0) || (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { @@ -158,11 +175,10 @@ static inline int ehca_write_swqe(struct ehca_qp *qp, /* clear wqe header until sglist */ memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - wqe_p->work_request_id = send_wr->wr_id & ~QMAP_IDX_MASK; - wqe_p->work_request_id |= sq_map_idx & QMAP_IDX_MASK; + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); - qp->sq_map[sq_map_idx].app_wr_id = send_wr->wr_id & QMAP_IDX_MASK; - qp->sq_map[sq_map_idx].reported = 0; + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); + qmap_entry->reported = 0; switch (send_wr->opcode) { case IB_WR_SEND: @@ -496,7 +512,9 @@ static int internal_post_recv(struct ehca_qp *my_qp, struct ehca_wqe *wqe_p; int wqe_cnt = 0; int ret = 0; + u32 rq_map_idx; unsigned long flags; + struct ehca_qmap_entry *qmap_entry; if (unlikely(!HAS_RQ(my_qp))) { ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", @@ -524,8 +542,15 @@ static int internal_post_recv(struct ehca_qp *my_qp, } goto post_recv_exit0; } + /* + * Get the index of the WQE in the recv queue. The same index + * is used for writing into the rq_map. + */ + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; + /* write a RECV WQE into the QUEUE */ - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr); + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr, + rq_map_idx); /* * if something failed, * reset the free entry pointer to the start value @@ -540,6 +565,11 @@ static int internal_post_recv(struct ehca_qp *my_qp, } goto post_recv_exit0; } + + qmap_entry = &my_qp->rq_map.map[rq_map_idx]; + qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id); + qmap_entry->reported = 0; + wqe_cnt++; } /* eof for cur_recv_wr */ @@ -596,10 +626,12 @@ static const u8 ib_wc_opcode[255] = { /* internal function to poll one entry of cq */ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) { - int ret = 0; + int ret = 0, qmap_tail_idx; struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); struct ehca_cqe *cqe; struct ehca_qp *my_qp; + struct ehca_qmap_entry *qmap_entry; + struct ehca_queue_map *qmap; int cqe_count = 0, is_error; repoll: @@ -674,27 +706,52 @@ repoll: goto repoll; wc->qp = &my_qp->ib_qp; - if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) { - struct ehca_qmap_entry *qmap_entry; + if (is_error) { /* - * We got a send completion and need to restore the original - * wr_id. + * set left_to_poll to 0 because in error state, we will not + * get any additional CQEs */ - qmap_entry = &my_qp->sq_map[cqe->work_request_id & - QMAP_IDX_MASK]; + ehca_add_to_err_list(my_qp, 1); + my_qp->sq_map.left_to_poll = 0; - if (qmap_entry->reported) { - ehca_warn(cq->device, "Double cqe on qp_num=%#x", - my_qp->real_qp_num); - /* found a double cqe, discard it and read next one */ - goto repoll; - } - wc->wr_id = cqe->work_request_id & ~QMAP_IDX_MASK; - wc->wr_id |= qmap_entry->app_wr_id; - qmap_entry->reported = 1; - } else + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + my_qp->rq_map.left_to_poll = 0; + } + + qmap_tail_idx = get_app_wr_id(cqe->work_request_id); + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) + /* We got a send completion. */ + qmap = &my_qp->sq_map; + else /* We got a receive completion. */ - wc->wr_id = cqe->work_request_id; + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap_tail_idx]; + if (qmap_entry->reported) { + ehca_warn(cq->device, "Double cqe on qp_num=%#x", + my_qp->real_qp_num); + /* found a double cqe, discard it and read next one */ + goto repoll; + } + + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); + qmap_entry->reported = 1; + + /* this is a proper completion, we need to advance the tail pointer */ + if (++qmap->tail == qmap->entries) + qmap->tail = 0; + + /* if left_to_poll is decremented to 0, add the QP to the error list */ + if (qmap->left_to_poll > 0) { + qmap->left_to_poll--; + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + ehca_add_to_err_list(my_qp, 1); + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + } /* eval ib_wc_opcode */ wc->opcode = ib_wc_opcode[cqe->optype]-1; @@ -733,13 +790,88 @@ poll_cq_one_exit0: return ret; } +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, + struct ib_wc *wc, int num_entries, + struct ipz_queue *ipz_queue, int on_sq) +{ + int nr = 0; + struct ehca_wqe *wqe; + u64 offset; + struct ehca_queue_map *qmap; + struct ehca_qmap_entry *qmap_entry; + + if (on_sq) + qmap = &my_qp->sq_map; + else + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap->tail]; + + while ((nr < num_entries) && (qmap_entry->reported == 0)) { + /* generate flush CQE */ + memset(wc, 0, sizeof(*wc)); + + offset = qmap->tail * ipz_queue->qe_size; + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); + if (!wqe) { + ehca_err(cq->device, "Invalid wqe offset=%#lx on " + "qp_num=%#x", offset, my_qp->real_qp_num); + return nr; + } + + wc->wr_id = replace_wr_id(wqe->work_request_id, + qmap_entry->app_wr_id); + + if (on_sq) { + switch (wqe->optype) { + case WQE_OPTYPE_SEND: + wc->opcode = IB_WC_SEND; + break; + case WQE_OPTYPE_RDMAWRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case WQE_OPTYPE_RDMAREAD: + wc->opcode = IB_WC_RDMA_READ; + break; + default: + ehca_err(cq->device, "Invalid optype=%x", + wqe->optype); + return nr; + } + } else + wc->opcode = IB_WC_RECV; + + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { + wc->ex.imm_data = wqe->immediate_data; + wc->wc_flags |= IB_WC_WITH_IMM; + } + + wc->status = IB_WC_WR_FLUSH_ERR; + + wc->qp = &my_qp->ib_qp; + + /* mark as reported and advance tail pointer */ + qmap_entry->reported = 1; + if (++qmap->tail == qmap->entries) + qmap->tail = 0; + qmap_entry = &qmap->map[qmap->tail]; + + wc++; nr++; + } + + return nr; + +} + int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) { struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); int nr; + struct ehca_qp *err_qp; struct ib_wc *current_wc = wc; int ret = 0; unsigned long flags; + int entries_left = num_entries; if (num_entries < 1) { ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " @@ -749,15 +881,40 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) } spin_lock_irqsave(&my_cq->spinlock, flags); - for (nr = 0; nr < num_entries; nr++) { + + /* generate flush cqes for send queues */ + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_squeue, 1); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + /* generate flush cqes for receive queues */ + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_rqueue, 0); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + for (nr = 0; nr < entries_left; nr++) { ret = ehca_poll_cq_one(cq, current_wc); if (ret) break; current_wc++; } /* eof for nr */ + entries_left -= nr; + spin_unlock_irqrestore(&my_cq->spinlock, flags); if (ret == -EAGAIN || !ret) - ret = nr; + ret = num_entries - entries_left; poll_cq_exit0: return ret; diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c index 9771052..7b93cda 100644 --- a/drivers/infiniband/hw/ipath/ipath_rc.c +++ b/drivers/infiniband/hw/ipath/ipath_rc.c @@ -675,7 +675,8 @@ static void send_rc_ack(struct ipath_qp *qp) hdr.lrh[0] = cpu_to_be16(lrh0); hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); - hdr.lrh[3] = cpu_to_be16(dd->ipath_lid); + hdr.lrh[3] = cpu_to_be16(dd->ipath_lid | + qp->remote_ah_attr.src_path_bits); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & IPATH_PSN_MASK); diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c index af051f7..fc0f6d9 100644 --- a/drivers/infiniband/hw/ipath/ipath_ruc.c +++ b/drivers/infiniband/hw/ipath/ipath_ruc.c @@ -618,7 +618,8 @@ void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp, qp->s_hdr.lrh[0] = cpu_to_be16(lrh0); qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); - qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid); + qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid | + qp->remote_ah_attr.src_path_bits); bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index); bth0 |= extra_bytes << 20; ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22)); diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c index b766e40..eabc424 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.c +++ b/drivers/infiniband/hw/ipath/ipath_verbs.c @@ -340,9 +340,16 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) int acc; int ret; unsigned long flags; + struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; spin_lock_irqsave(&qp->s_lock, flags); + if (qp->ibqp.qp_type != IB_QPT_SMI && + !(dd->ipath_flags & IPATH_LINKACTIVE)) { + ret = -ENETDOWN; + goto bail; + } + /* Check that state is OK to post send. */ if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) goto bail_inval; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 9559248..baa01de 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1058,6 +1058,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, else sqd_event = 0; + if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + context->rlkey |= (1 << 4); + /* * Before passing a kernel QP to the HW, make sure that the * ownership bits of the send queue are set and the SQ diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index cc440f9..65ad359 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -149,18 +149,10 @@ void mthca_start_catas_poll(struct mthca_dev *dev) ((pci_resource_len(dev->pdev, 0) - 1) & dev->catas_err.addr); - if (!request_mem_region(addr, dev->catas_err.size * 4, - DRV_NAME)) { - mthca_warn(dev, "couldn't request catastrophic error region " - "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); - return; - } - dev->catas_err.map = ioremap(addr, dev->catas_err.size * 4); if (!dev->catas_err.map) { mthca_warn(dev, "couldn't map catastrophic error region " "at 0x%lx/0x%x\n", addr, dev->catas_err.size * 4); - release_mem_region(addr, dev->catas_err.size * 4); return; } @@ -175,13 +167,8 @@ void mthca_stop_catas_poll(struct mthca_dev *dev) { del_timer_sync(&dev->catas_err.timer); - if (dev->catas_err.map) { + if (dev->catas_err.map) iounmap(dev->catas_err.map); - release_mem_region(pci_resource_start(dev->pdev, 0) + - ((pci_resource_len(dev->pdev, 0) - 1) & - dev->catas_err.addr), - dev->catas_err.size * 4); - } spin_lock_irq(&catas_lock); list_del(&dev->catas_err.list); diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index cc6858f..28f0e0c 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -652,27 +652,13 @@ static int mthca_map_reg(struct mthca_dev *dev, { unsigned long base = pci_resource_start(dev->pdev, 0); - if (!request_mem_region(base + offset, size, DRV_NAME)) - return -EBUSY; - *map = ioremap(base + offset, size); - if (!*map) { - release_mem_region(base + offset, size); + if (!*map) return -ENOMEM; - } return 0; } -static void mthca_unmap_reg(struct mthca_dev *dev, unsigned long offset, - unsigned long size, void __iomem *map) -{ - unsigned long base = pci_resource_start(dev->pdev, 0); - - release_mem_region(base + offset, size); - iounmap(map); -} - static int mthca_map_eq_regs(struct mthca_dev *dev) { if (mthca_is_memfree(dev)) { @@ -699,9 +685,7 @@ static int mthca_map_eq_regs(struct mthca_dev *dev) dev->fw.arbel.eq_arm_base) + 4, 4, &dev->eq_regs.arbel.eq_arm)) { mthca_err(dev, "Couldn't map EQ arm register, aborting.\n"); - mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->clr_base); return -ENOMEM; } @@ -710,12 +694,8 @@ static int mthca_map_eq_regs(struct mthca_dev *dev) MTHCA_EQ_SET_CI_SIZE, &dev->eq_regs.arbel.eq_set_ci_base)) { mthca_err(dev, "Couldn't map EQ CI register, aborting.\n"); - mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.eq_arm_base) + 4, 4, - dev->eq_regs.arbel.eq_arm); - mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->eq_regs.arbel.eq_arm); + iounmap(dev->clr_base); return -ENOMEM; } } else { @@ -731,8 +711,7 @@ static int mthca_map_eq_regs(struct mthca_dev *dev) &dev->eq_regs.tavor.ecr_base)) { mthca_err(dev, "Couldn't map ecr register, " "aborting.\n"); - mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->clr_base); return -ENOMEM; } } @@ -744,22 +723,12 @@ static int mthca_map_eq_regs(struct mthca_dev *dev) static void mthca_unmap_eq_regs(struct mthca_dev *dev) { if (mthca_is_memfree(dev)) { - mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.eq_set_ci_base, - MTHCA_EQ_SET_CI_SIZE, - dev->eq_regs.arbel.eq_set_ci_base); - mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.eq_arm_base) + 4, 4, - dev->eq_regs.arbel.eq_arm); - mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & - dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->eq_regs.arbel.eq_set_ci_base); + iounmap(dev->eq_regs.arbel.eq_arm); + iounmap(dev->clr_base); } else { - mthca_unmap_reg(dev, MTHCA_ECR_BASE, - MTHCA_ECR_SIZE + MTHCA_ECR_CLR_SIZE, - dev->eq_regs.tavor.ecr_base); - mthca_unmap_reg(dev, MTHCA_CLR_INT_BASE, MTHCA_CLR_INT_SIZE, - dev->clr_base); + iounmap(dev->eq_regs.tavor.ecr_base); + iounmap(dev->clr_base); } } diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index fb9f91b..52f60f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -921,58 +921,6 @@ err_uar_table_free: return err; } -static int mthca_request_regions(struct pci_dev *pdev, int ddr_hidden) -{ - int err; - - /* - * We can't just use pci_request_regions() because the MSI-X - * table is right in the middle of the first BAR. If we did - * pci_request_region and grab all of the first BAR, then - * setting up MSI-X would fail, since the PCI core wants to do - * request_mem_region on the MSI-X vector table. - * - * So just request what we need right now, and request any - * other regions we need when setting up EQs. - */ - if (!request_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, - MTHCA_HCR_SIZE, DRV_NAME)) - return -EBUSY; - - err = pci_request_region(pdev, 2, DRV_NAME); - if (err) - goto err_bar2_failed; - - if (!ddr_hidden) { - err = pci_request_region(pdev, 4, DRV_NAME); - if (err) - goto err_bar4_failed; - } - - return 0; - -err_bar4_failed: - pci_release_region(pdev, 2); - -err_bar2_failed: - release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, - MTHCA_HCR_SIZE); - - return err; -} - -static void mthca_release_regions(struct pci_dev *pdev, - int ddr_hidden) -{ - if (!ddr_hidden) - pci_release_region(pdev, 4); - - pci_release_region(pdev, 2); - - release_mem_region(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, - MTHCA_HCR_SIZE); -} - static int mthca_enable_msi_x(struct mthca_dev *mdev) { struct msix_entry entries[3]; @@ -1059,7 +1007,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM)) ddr_hidden = 1; - err = mthca_request_regions(pdev, ddr_hidden); + err = pci_request_regions(pdev, DRV_NAME); if (err) { dev_err(&pdev->dev, "Cannot obtain PCI resources, " "aborting.\n"); @@ -1196,7 +1144,7 @@ err_free_dev: ib_dealloc_device(&mdev->ib_dev); err_free_res: - mthca_release_regions(pdev, ddr_hidden); + pci_release_regions(pdev); err_disable_pdev: pci_disable_device(pdev); @@ -1240,8 +1188,7 @@ static void __mthca_remove_one(struct pci_dev *pdev) pci_disable_msix(pdev); ib_dealloc_device(&mdev->ib_dev); - mthca_release_regions(pdev, mdev->mthca_flags & - MTHCA_FLAG_DDR_HIDDEN); + pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index b0cab64..a2b04d6 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -70,27 +70,31 @@ int interrupt_mod_interval = 0; /* Interoperability */ int mpa_version = 1; -module_param(mpa_version, int, 0); +module_param(mpa_version, int, 0644); MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)"); /* Interoperability */ int disable_mpa_crc = 0; -module_param(disable_mpa_crc, int, 0); +module_param(disable_mpa_crc, int, 0644); MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC"); unsigned int send_first = 0; -module_param(send_first, int, 0); +module_param(send_first, int, 0644); MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection"); unsigned int nes_drv_opt = 0; -module_param(nes_drv_opt, int, 0); +module_param(nes_drv_opt, int, 0644); MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters"); unsigned int nes_debug_level = 0; module_param_named(debug_level, nes_debug_level, uint, 0644); MODULE_PARM_DESC(debug_level, "Enable debug output level"); +unsigned int wqm_quanta = 0x10000; +module_param(wqm_quanta, int, 0644); +MODULE_PARM_DESC(wqm_quanta, "WQM quanta"); + LIST_HEAD(nes_adapter_list); static LIST_HEAD(nes_dev_list); @@ -557,12 +561,32 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i goto bail5; } nesdev->nesadapter->et_rx_coalesce_usecs_irq = interrupt_mod_interval; + nesdev->nesadapter->wqm_quanta = wqm_quanta; /* nesdev->base_doorbell_index = nesdev->nesadapter->pd_config_base[PCI_FUNC(nesdev->pcidev->devfn)]; */ nesdev->base_doorbell_index = 1; nesdev->doorbell_start = nesdev->nesadapter->doorbell_start; - nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % nesdev->nesadapter->port_count; + if (nesdev->nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { + switch (PCI_FUNC(nesdev->pcidev->devfn) % + nesdev->nesadapter->port_count) { + case 1: + nesdev->mac_index = 2; + break; + case 2: + nesdev->mac_index = 1; + break; + case 3: + nesdev->mac_index = 3; + break; + case 0: + default: + nesdev->mac_index = 0; + } + } else { + nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn) % + nesdev->nesadapter->port_count; + } tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev); @@ -581,7 +605,7 @@ static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_i nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) | (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16)); if (PCI_FUNC(nesdev->pcidev->devfn) < 4) { - nesdev->int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+24)); + nesdev->int_req |= (1 << (PCI_FUNC(nesdev->mac_index)+24)); } /* TODO: This really should be the first driver to load, not function 0 */ @@ -772,14 +796,14 @@ static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf) list_for_each_entry(nesdev, &nes_dev_list, list) { if (i == ee_flsh_adapter) { - devfn = nesdev->nesadapter->devfn; - bus_number = nesdev->nesadapter->bus_number; + devfn = nesdev->pcidev->devfn; + bus_number = nesdev->pcidev->bus->number; break; } i++; } - return snprintf(buf, PAGE_SIZE, "%x:%x", bus_number, devfn); + return snprintf(buf, PAGE_SIZE, "%x:%x\n", bus_number, devfn); } static ssize_t nes_store_adapter(struct device_driver *ddp, @@ -1050,6 +1074,55 @@ static ssize_t nes_store_idx_data(struct device_driver *ddp, return strnlen(buf, count); } + +/** + * nes_show_wqm_quanta + */ +static ssize_t nes_show_wqm_quanta(struct device_driver *ddp, char *buf) +{ + u32 wqm_quanta_value = 0xdead; + u32 i = 0; + struct nes_device *nesdev; + + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + wqm_quanta_value = nesdev->nesadapter->wqm_quanta; + break; + } + i++; + } + + return snprintf(buf, PAGE_SIZE, "0x%X\n", wqm_quanta); +} + + +/** + * nes_store_wqm_quanta + */ +static ssize_t nes_store_wqm_quanta(struct device_driver *ddp, + const char *buf, size_t count) +{ + unsigned long wqm_quanta_value; + u32 wqm_config1; + u32 i = 0; + struct nes_device *nesdev; + + strict_strtoul(buf, 0, &wqm_quanta_value); + list_for_each_entry(nesdev, &nes_dev_list, list) { + if (i == ee_flsh_adapter) { + nesdev->nesadapter->wqm_quanta = wqm_quanta_value; + wqm_config1 = nes_read_indexed(nesdev, + NES_IDX_WQM_CONFIG1); + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, + ((wqm_quanta_value << 1) | + (wqm_config1 & 0x00000001))); + break; + } + i++; + } + return strnlen(buf, count); +} + static DRIVER_ATTR(adapter, S_IRUSR | S_IWUSR, nes_show_adapter, nes_store_adapter); static DRIVER_ATTR(eeprom_cmd, S_IRUSR | S_IWUSR, @@ -1068,6 +1141,8 @@ static DRIVER_ATTR(idx_addr, S_IRUSR | S_IWUSR, nes_show_idx_addr, nes_store_idx_addr); static DRIVER_ATTR(idx_data, S_IRUSR | S_IWUSR, nes_show_idx_data, nes_store_idx_data); +static DRIVER_ATTR(wqm_quanta, S_IRUSR | S_IWUSR, + nes_show_wqm_quanta, nes_store_wqm_quanta); static int nes_create_driver_sysfs(struct pci_driver *drv) { @@ -1081,6 +1156,7 @@ static int nes_create_driver_sysfs(struct pci_driver *drv) error |= driver_create_file(&drv->driver, &driver_attr_nonidx_data); error |= driver_create_file(&drv->driver, &driver_attr_idx_addr); error |= driver_create_file(&drv->driver, &driver_attr_idx_data); + error |= driver_create_file(&drv->driver, &driver_attr_wqm_quanta); return error; } @@ -1095,6 +1171,7 @@ static void nes_remove_driver_sysfs(struct pci_driver *drv) driver_remove_file(&drv->driver, &driver_attr_nonidx_data); driver_remove_file(&drv->driver, &driver_attr_idx_addr); driver_remove_file(&drv->driver, &driver_attr_idx_data); + driver_remove_file(&drv->driver, &driver_attr_wqm_quanta); } /** diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 8eb7ae9..1595dc7 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -169,7 +169,7 @@ extern int disable_mpa_crc; extern unsigned int send_first; extern unsigned int nes_drv_opt; extern unsigned int nes_debug_level; - +extern unsigned int wqm_quanta; extern struct list_head nes_adapter_list; extern atomic_t cm_connects; diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 499d3cf..2caf9da 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -52,7 +52,7 @@ #include <linux/random.h> #include <linux/list.h> #include <linux/threads.h> - +#include <net/arp.h> #include <net/neighbour.h> #include <net/route.h> #include <net/ip_fib.h> @@ -1019,23 +1019,43 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, /** - * nes_addr_send_arp + * nes_addr_resolve_neigh */ -static void nes_addr_send_arp(u32 dst_ip) +static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip) { struct rtable *rt; struct flowi fl; + struct neighbour *neigh; + int rc = -1; + DECLARE_MAC_BUF(mac); memset(&fl, 0, sizeof fl); fl.nl_u.ip4_u.daddr = htonl(dst_ip); if (ip_route_output_key(&init_net, &rt, &fl)) { printk("%s: ip_route_output_key failed for 0x%08X\n", __func__, dst_ip); - return; + return rc; + } + + neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, nesvnic->netdev); + if (neigh) { + if (neigh->nud_state & NUD_VALID) { + nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X" + " is %s, Gateway is 0x%08X \n", dst_ip, + print_mac(mac, neigh->ha), ntohl(rt->rt_gateway)); + nes_manage_arp_cache(nesvnic->netdev, neigh->ha, + dst_ip, NES_ARP_ADD); + rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL, + NES_ARP_RESOLVE); + } + neigh_release(neigh); } - neigh_event_send(rt->u.dst.neighbour, NULL); + if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID))) + neigh_event_send(rt->u.dst.neighbour, NULL); + ip_rt_put(rt); + return rc; } @@ -1108,9 +1128,11 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, /* get the mac addr for the remote node */ arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); if (arpindex < 0) { - kfree(cm_node); - nes_addr_send_arp(cm_info->rem_addr); - return NULL; + arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr); + if (arpindex < 0) { + kfree(cm_node); + return NULL; + } } /* copy the mac addr to node context */ @@ -1826,7 +1848,7 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, /** * mini_cm_connect - make a connection node with params */ -struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, +static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, struct nes_vnic *nesvnic, u16 private_data_len, void *private_data, struct nes_cm_info *cm_info) { @@ -2007,7 +2029,6 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod ret = rem_ref_cm_node(cm_core, cm_node); break; } - cm_node->cm_id = NULL; return ret; } diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 1513d40..7c49cc8 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -55,18 +55,19 @@ u32 int_mod_cq_depth_24; u32 int_mod_cq_depth_16; u32 int_mod_cq_depth_4; u32 int_mod_cq_depth_1; - +static const u8 nes_max_critical_error_count = 100; #include "nes_cm.h" static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq); static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count); static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, - u8 OneG_Mode); + struct nes_adapter *nesadapter, u8 OneG_Mode); static void nes_nic_napi_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq); static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq); static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq); static void nes_process_iwarp_aeqe(struct nes_device *nesdev, struct nes_hw_aeqe *aeqe); +static void process_critical_error(struct nes_device *nesdev); static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); @@ -222,11 +223,10 @@ static void nes_nic_tune_timer(struct nes_device *nesdev) } /* boundary checking */ - if (shared_timer->timer_in_use > NES_NIC_FAST_TIMER_HIGH) - shared_timer->timer_in_use = NES_NIC_FAST_TIMER_HIGH; - else if (shared_timer->timer_in_use < NES_NIC_FAST_TIMER_LOW) { - shared_timer->timer_in_use = NES_NIC_FAST_TIMER_LOW; - } + if (shared_timer->timer_in_use > shared_timer->threshold_high) + shared_timer->timer_in_use = shared_timer->threshold_high; + else if (shared_timer->timer_in_use < shared_timer->threshold_low) + shared_timer->timer_in_use = shared_timer->threshold_low; nesdev->currcq_count = 0; @@ -292,9 +292,6 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0) return NULL; - if (nes_init_serdes(nesdev, hw_rev, port_count, OneG_Mode)) - return NULL; - nes_init_csr_ne020(nesdev, hw_rev, port_count); max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE); nes_debug(NES_DBG_INIT, "QP_CTX_SIZE=%u\n", max_qp); @@ -353,6 +350,22 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n", nesadapter, (u32)sizeof(struct nes_adapter), adapter_size); + if (nes_read_eeprom_values(nesdev, nesadapter)) { + printk(KERN_ERR PFX "Unable to read EEPROM data.\n"); + kfree(nesadapter); + return NULL; + } + + if (nes_init_serdes(nesdev, hw_rev, port_count, nesadapter, + OneG_Mode)) { + kfree(nesadapter); + return NULL; + } + nes_init_csr_ne020(nesdev, hw_rev, port_count); + + memset(nesadapter->pft_mcast_map, 255, + sizeof nesadapter->pft_mcast_map); + /* populate the new nesadapter */ nesadapter->devfn = nesdev->pcidev->devfn; nesadapter->bus_number = nesdev->pcidev->bus->number; @@ -468,20 +481,25 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) { /* setup port configuration */ if (nesadapter->port_count == 1) { - u32temp = 0x00000000; + nesadapter->log_port = 0x00000000; if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002); else nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003); } else { - if (nesadapter->port_count == 2) - u32temp = 0x00000044; - else - u32temp = 0x000000e4; + if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { + nesadapter->log_port = 0x000000D8; + } else { + if (nesadapter->port_count == 2) + nesadapter->log_port = 0x00000044; + else + nesadapter->log_port = 0x000000e4; + } nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003); } - nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, u32temp); + nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, + nesadapter->log_port); nes_debug(NES_DBG_INIT, "Probe time, LOG2PHY=%u\n", nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT)); @@ -706,23 +724,43 @@ static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_ * nes_init_serdes */ static int nes_init_serdes(struct nes_device *nesdev, u8 hw_rev, u8 port_count, - u8 OneG_Mode) + struct nes_adapter *nesadapter, u8 OneG_Mode) { int i; u32 u32temp; + u32 serdes_common_control; if (hw_rev != NE020_REV) { /* init serdes 0 */ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF); - if (!OneG_Mode) + if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { + serdes_common_control = nes_read_indexed(nesdev, + NES_IDX_ETH_SERDES_COMMON_CONTROL0); + serdes_common_control |= 0x000000100; + nes_write_indexed(nesdev, + NES_IDX_ETH_SERDES_COMMON_CONTROL0, + serdes_common_control); + } else if (!OneG_Mode) { nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000); - if (port_count > 1) { + } + if (((port_count > 1) && + (nesadapter->phy_type[0] != NES_PHY_TYPE_PUMA_1G)) || + ((port_count > 2) && + (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G))) { /* init serdes 1 */ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF); - if (!OneG_Mode) + if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) { + serdes_common_control = nes_read_indexed(nesdev, + NES_IDX_ETH_SERDES_COMMON_CONTROL1); + serdes_common_control |= 0x000000100; + nes_write_indexed(nesdev, + NES_IDX_ETH_SERDES_COMMON_CONTROL1, + serdes_common_control); + } else if (!OneG_Mode) { nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000); } + } } else { /* init serdes 0 */ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008); @@ -826,7 +864,8 @@ static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_cou nes_write_indexed(nesdev, 0x00005000, 0x00018000); /* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */ - nes_write_indexed(nesdev, 0x00005004, 0x00020001); + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG1, (wqm_quanta << 1) | + 0x00000001); nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F); nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F); nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F); @@ -1226,6 +1265,7 @@ int nes_init_phy(struct nes_device *nesdev) if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) { printk(PFX "%s: Programming mdc config for 1G\n", __func__); tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); + tx_config &= 0xFFFFFFE3; tx_config |= 0x04; nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); } @@ -1291,7 +1331,8 @@ int nes_init_phy(struct nes_device *nesdev) (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) { /* setup 10G MDIO operation */ tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG); - tx_config |= 0x14; + tx_config &= 0xFFFFFFE3; + tx_config |= 0x15; nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); } if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) { @@ -1315,7 +1356,7 @@ int nes_init_phy(struct nes_device *nesdev) nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008); nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098); nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00); - nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0000); + nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0001); nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528); /* @@ -1759,9 +1800,14 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev) */ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) { + u64 u64temp; + dma_addr_t bus_address; struct nes_device *nesdev = nesvnic->nesdev; struct nes_hw_cqp_wqe *cqp_wqe; + struct nes_hw_nic_sq_wqe *nic_sqe; struct nes_hw_nic_rq_wqe *nic_rqe; + __le16 *wqe_fragment_length; + u16 wqe_fragment_index; u64 wqe_frag; u32 cqp_head; unsigned long flags; @@ -1770,14 +1816,69 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) /* Free remaining NIC receive buffers */ while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) { nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail]; - wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); - wqe_frag |= ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32; + wqe_frag = (u64)le32_to_cpu( + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]); + wqe_frag |= ((u64)le32_to_cpu( + nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX]))<<32; pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]); nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1); } + /* Free remaining NIC transmit buffers */ + while (nesvnic->nic.sq_head != nesvnic->nic.sq_tail) { + nic_sqe = &nesvnic->nic.sq_vbase[nesvnic->nic.sq_tail]; + wqe_fragment_index = 1; + wqe_fragment_length = (__le16 *) + &nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX]; + /* bump past the vlan tag */ + wqe_fragment_length++; + if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) { + u64temp = (u64)le32_to_cpu( + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+ + wqe_fragment_index*2]); + u64temp += ((u64)le32_to_cpu( + nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX + + wqe_fragment_index*2]))<<32; + bus_address = (dma_addr_t)u64temp; + if (test_and_clear_bit(nesvnic->nic.sq_tail, + nesvnic->nic.first_frag_overflow)) { + pci_unmap_single(nesdev->pcidev, + bus_address, + le16_to_cpu(wqe_fragment_length[ + wqe_fragment_index++]), + PCI_DMA_TODEVICE); + } + for (; wqe_fragment_index < 5; wqe_fragment_index++) { + if (wqe_fragment_length[wqe_fragment_index]) { + u64temp = le32_to_cpu( + nic_sqe->wqe_words[ + NES_NIC_SQ_WQE_FRAG0_LOW_IDX+ + wqe_fragment_index*2]); + u64temp += ((u64)le32_to_cpu( + nic_sqe->wqe_words[ + NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+ + wqe_fragment_index*2]))<<32; + bus_address = (dma_addr_t)u64temp; + pci_unmap_page(nesdev->pcidev, + bus_address, + le16_to_cpu( + wqe_fragment_length[ + wqe_fragment_index]), + PCI_DMA_TODEVICE); + } else + break; + } + } + if (nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]) + dev_kfree_skb( + nesvnic->nic.tx_skb[nesvnic->nic.sq_tail]); + + nesvnic->nic.sq_tail = (++nesvnic->nic.sq_tail) + & (nesvnic->nic.sq_size - 1); + } + spin_lock_irqsave(&nesdev->cqp.lock, flags); /* Destroy NIC QP */ @@ -1894,7 +1995,30 @@ int nes_napi_isr(struct nes_device *nesdev) } } - +static void process_critical_error(struct nes_device *nesdev) +{ + u32 debug_error; + u32 nes_idx_debug_error_masks0 = 0; + u16 error_module = 0; + + debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS); + printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n", + (u16)debug_error); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS, + 0x01010000 | (debug_error & 0x0000ffff)); + if (crit_err_count++ > 10) + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17); + error_module = (u16) (debug_error & 0x1F00) >> 8; + if (++nesdev->nesadapter->crit_error_count[error_module-1] >= + nes_max_critical_error_count) { + printk(KERN_ERR PFX "Masking off critical error for module " + "0x%02X\n", (u16)error_module); + nes_idx_debug_error_masks0 = nes_read_indexed(nesdev, + NES_IDX_DEBUG_ERROR_MASKS0); + nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, + nes_idx_debug_error_masks0 | (1 << error_module)); + } +} /** * nes_dpc */ @@ -1909,7 +2033,6 @@ void nes_dpc(unsigned long param) u32 timer_stat; u32 temp_int_stat; u32 intf_int_stat; - u32 debug_error; u32 processed_intf_int = 0; u16 processed_timer_int = 0; u16 completion_ints = 0; @@ -1987,14 +2110,7 @@ void nes_dpc(unsigned long param) intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT); intf_int_stat &= nesdev->intf_int_req; if (NES_INTF_INT_CRITERR & intf_int_stat) { - debug_error = nes_read_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS); - printk(KERN_ERR PFX "Critical Error reported by device!!! 0x%02X\n", - (u16)debug_error); - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_CONTROL_STATUS, - 0x01010000 | (debug_error & 0x0000ffff)); - /* BUG(); */ - if (crit_err_count++ > 10) - nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 1 << 0x17); + process_critical_error(nesdev); } if (NES_INTF_INT_PCIERR & intf_int_stat) { printk(KERN_ERR PFX "PCI Error reported by device!!!\n"); @@ -2258,7 +2374,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number) spin_unlock_irqrestore(&nesadapter->phy_lock, flags); } /* read the PHY interrupt status register */ - if (nesadapter->OneG_Mode) { + if ((nesadapter->OneG_Mode) && + (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) { do { nes_read_1G_phy_reg(nesdev, 0x1a, nesadapter->phy_index[mac_index], &phy_data); @@ -3077,6 +3194,22 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, nes_cm_disconn(nesqp); break; /* TODO: additional AEs need to be here */ + case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION: + nesqp = *((struct nes_qp **)&context); + spin_lock_irqsave(&nesqp->lock, flags); + nesqp->hw_iwarp_state = iwarp_state; + nesqp->hw_tcp_state = tcp_state; + nesqp->last_aeq = async_event_id; + spin_unlock_irqrestore(&nesqp->lock, flags); + if (nesqp->ibqp.event_handler) { + ibevent.device = nesqp->ibqp.device; + ibevent.element.qp = &nesqp->ibqp; + ibevent.event = IB_EVENT_QP_ACCESS_ERR; + nesqp->ibqp.event_handler(&ibevent, + nesqp->ibqp.qp_context); + } + nes_cm_disconn(nesqp); + break; default: nes_debug(NES_DBG_AEQ, "Processing an iWARP related AE for QP, misc = 0x%04X\n", async_event_id); diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 7b81e0a..610b9d8 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -156,6 +156,7 @@ enum indexed_regs { NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004, NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008, NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c, + NES_IDX_WQM_CONFIG1 = 0x5004, NES_IDX_CM_CONFIG = 0x5100, NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000, NES_IDX_NIC_PHYPORT_TO_USW = 0x6008, @@ -967,6 +968,7 @@ struct nes_arp_entry { #define DEFAULT_JUMBO_NES_QL_TARGET 40 #define DEFAULT_JUMBO_NES_QL_HIGH 128 #define NES_NIC_CQ_DOWNWARD_TREND 16 +#define NES_PFT_SIZE 48 struct nes_hw_tune_timer { /* u16 cq_count; */ @@ -1079,6 +1081,7 @@ struct nes_adapter { u32 et_rx_max_coalesced_frames_high; u32 et_rate_sample_interval; u32 timer_int_limit; + u32 wqm_quanta; /* Adapter base MAC address */ u32 mac_addr_low; @@ -1094,12 +1097,14 @@ struct nes_adapter { u16 pd_config_base[4]; u16 link_interrupt_count[4]; + u8 crit_error_count[32]; /* the phy index for each port */ u8 phy_index[4]; u8 mac_sw_state[4]; u8 mac_link_down[4]; u8 phy_type[4]; + u8 log_port; /* PCI information */ unsigned int devfn; @@ -1113,6 +1118,7 @@ struct nes_adapter { u8 virtwq; u8 et_use_adaptive_rx_coalesce; u8 adapter_fcn_count; + u8 pft_mcast_map[NES_PFT_SIZE]; }; struct nes_pbl { diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 1b0938c..7303586 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -91,6 +91,7 @@ static struct nic_qp_map *nic_qp_mapping_per_function[] = { static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN; static int debug = -1; +static int nics_per_function = 1; /** * nes_netdev_poll @@ -201,7 +202,8 @@ static int nes_netdev_open(struct net_device *netdev) nes_debug(NES_DBG_NETDEV, "i=%d, perfect filter table index= %d, PERF FILTER LOW" " (Addr:%08X) = %08X, HIGH = %08X.\n", i, nesvnic->qp_nic_index[i], - NES_IDX_PERFECT_FILTER_LOW+((nesvnic->perfect_filter_index + i) * 8), + NES_IDX_PERFECT_FILTER_LOW+ + (nesvnic->qp_nic_index[i] * 8), macaddr_low, (u32)macaddr_high | NES_MAC_ADDR_VALID | ((((u32)nesvnic->nic_index) << 16))); @@ -272,14 +274,18 @@ static int nes_netdev_stop(struct net_device *netdev) break; } - if (first_nesvnic->netdev_open == 0) + if ((first_nesvnic->netdev_open == 1) && (first_nesvnic != nesvnic) && + (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) != + PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) { + nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+ + (0x200*nesdev->mac_index), 0xffffffff); + nes_write_indexed(first_nesvnic->nesdev, + NES_IDX_MAC_INT_MASK+ + (0x200*first_nesvnic->nesdev->mac_index), + ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | + NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); + } else { nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK+(0x200*nesdev->mac_index), 0xffffffff); - else if ((first_nesvnic != nesvnic) && - (PCI_FUNC(first_nesvnic->nesdev->pcidev->devfn) != PCI_FUNC(nesvnic->nesdev->pcidev->devfn))) { - nes_write_indexed(nesdev, NES_IDX_MAC_INT_MASK + (0x200 * nesdev->mac_index), 0xffffffff); - nes_write_indexed(first_nesvnic->nesdev, NES_IDX_MAC_INT_MASK + (0x200 * first_nesvnic->nesdev->mac_index), - ~(NES_MAC_INT_LINK_STAT_CHG | NES_MAC_INT_XGMII_EXT | - NES_MAC_INT_TX_UNDERFLOW | NES_MAC_INT_TX_ERROR)); } nic_active_mask = ~((u32)(1 << nesvnic->nic_index)); @@ -437,7 +443,7 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev) struct nes_hw_nic_sq_wqe *nic_sqe; struct tcphdr *tcph; /* struct udphdr *udph; */ -#define NES_MAX_TSO_FRAGS 18 +#define NES_MAX_TSO_FRAGS MAX_SKB_FRAGS /* 64K segment plus overflow on each side */ dma_addr_t tso_bus_address[NES_MAX_TSO_FRAGS]; dma_addr_t bus_address; @@ -605,6 +611,8 @@ tso_sq_no_longer_full: wqe_fragment_length[wqe_fragment_index] = 0; set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX, bus_address); + tso_wqe_length += skb_headlen(skb) - + original_first_length; } while (wqe_fragment_index < 5) { wqe_fragment_length[wqe_fragment_index] = @@ -827,6 +835,7 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; + struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; struct dev_mc_list *multicast_addr; u32 nic_active_bit; u32 nic_active; @@ -836,7 +845,12 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) u8 mc_all_on = 0; u8 mc_index; int mc_nic_index = -1; + u8 pft_entries_preallocated = max(nesadapter->adapter_fcn_count * + nics_per_function, 4); + u8 max_pft_entries_avaiable = NES_PFT_SIZE - pft_entries_preallocated; + unsigned long flags; + spin_lock_irqsave(&nesadapter->resource_lock, flags); nic_active_bit = 1 << nesvnic->nic_index; if (netdev->flags & IFF_PROMISC) { @@ -847,7 +861,7 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) nic_active |= nic_active_bit; nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); mc_all_on = 1; - } else if ((netdev->flags & IFF_ALLMULTI) || (netdev->mc_count > NES_MULTICAST_PF_MAX) || + } else if ((netdev->flags & IFF_ALLMULTI) || (nesvnic->nic_index > 3)) { nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL); nic_active |= nic_active_bit; @@ -866,17 +880,34 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) } nes_debug(NES_DBG_NIC_RX, "Number of MC entries = %d, Promiscous = %d, All Multicast = %d.\n", - netdev->mc_count, (netdev->flags & IFF_PROMISC)?1:0, - (netdev->flags & IFF_ALLMULTI)?1:0); + netdev->mc_count, !!(netdev->flags & IFF_PROMISC), + !!(netdev->flags & IFF_ALLMULTI)); if (!mc_all_on) { multicast_addr = netdev->mc_list; - perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + 0x80; - perfect_filter_register_address += nesvnic->nic_index*0x40; - for (mc_index=0; mc_index < NES_MULTICAST_PF_MAX; mc_index++) { - while (multicast_addr && nesvnic->mcrq_mcast_filter && ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, multicast_addr->dmi_addr)) == 0)) + perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW + + pft_entries_preallocated * 0x8; + for (mc_index = 0; mc_index < max_pft_entries_avaiable; + mc_index++) { + while (multicast_addr && nesvnic->mcrq_mcast_filter && + ((mc_nic_index = nesvnic->mcrq_mcast_filter(nesvnic, + multicast_addr->dmi_addr)) == 0)) { multicast_addr = multicast_addr->next; + } if (mc_nic_index < 0) mc_nic_index = nesvnic->nic_index; + while (nesadapter->pft_mcast_map[mc_index] < 16 && + nesadapter->pft_mcast_map[mc_index] != + nesvnic->nic_index && + mc_index < max_pft_entries_avaiable) { + nes_debug(NES_DBG_NIC_RX, + "mc_index=%d skipping nic_index=%d,\ + used for=%d \n", mc_index, + nesvnic->nic_index, + nesadapter->pft_mcast_map[mc_index]); + mc_index++; + } + if (mc_index >= max_pft_entries_avaiable) + break; if (multicast_addr) { DECLARE_MAC_BUF(mac); nes_debug(NES_DBG_NIC_RX, "Assigning MC Address %s to register 0x%04X nic_idx=%d\n", @@ -897,15 +928,33 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev) (u32)macaddr_high | NES_MAC_ADDR_VALID | ((((u32)(1<<mc_nic_index)) << 16))); multicast_addr = multicast_addr->next; + nesadapter->pft_mcast_map[mc_index] = + nesvnic->nic_index; } else { nes_debug(NES_DBG_NIC_RX, "Clearing MC Address at register 0x%04X\n", perfect_filter_register_address+(mc_index * 8)); nes_write_indexed(nesdev, perfect_filter_register_address+4+(mc_index * 8), 0); + nesadapter->pft_mcast_map[mc_index] = 255; } } + /* PFT is not large enough */ + if (multicast_addr && multicast_addr->next) { + nic_active = nes_read_indexed(nesdev, + NES_IDX_NIC_MULTICAST_ALL); + nic_active |= nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, + nic_active); + nic_active = nes_read_indexed(nesdev, + NES_IDX_NIC_UNICAST_ALL); + nic_active &= ~nic_active_bit; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, + nic_active); + } } + + spin_unlock_irqrestore(&nesadapter->resource_lock, flags); } @@ -918,6 +967,10 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) struct nes_device *nesdev = nesvnic->nesdev; int ret = 0; u8 jumbomode = 0; + u32 nic_active; + u32 nic_active_bit; + u32 uc_all_active; + u32 mc_all_active; if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu)) return -EINVAL; @@ -931,8 +984,24 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu) nes_nic_init_timer_defaults(nesdev, jumbomode); if (netif_running(netdev)) { + nic_active_bit = 1 << nesvnic->nic_index; + mc_all_active = nes_read_indexed(nesdev, + NES_IDX_NIC_MULTICAST_ALL) & nic_active_bit; + uc_all_active = nes_read_indexed(nesdev, + NES_IDX_NIC_UNICAST_ALL) & nic_active_bit; + nes_netdev_stop(netdev); nes_netdev_open(netdev); + + nic_active = nes_read_indexed(nesdev, + NES_IDX_NIC_MULTICAST_ALL); + nic_active |= mc_all_active; + nes_write_indexed(nesdev, NES_IDX_NIC_MULTICAST_ALL, + nic_active); + + nic_active = nes_read_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL); + nic_active |= uc_all_active; + nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active); } return ret; @@ -1208,10 +1277,12 @@ static void nes_netdev_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { struct nes_vnic *nesvnic = netdev_priv(netdev); + struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter; strcpy(drvinfo->driver, DRV_NAME); strcpy(drvinfo->bus_info, pci_name(nesvnic->nesdev->pcidev)); - strcpy(drvinfo->fw_version, "TBD"); + sprintf(drvinfo->fw_version, "%u.%u", nesadapter->firmware_version>>16, + nesadapter->firmware_version & 0x000000ff); strcpy(drvinfo->version, DRV_VERSION); drvinfo->n_stats = nes_netdev_get_stats_count(netdev); drvinfo->testinfo_len = 0; @@ -1587,7 +1658,9 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic, (unsigned long)netdev->features, nesvnic->nic.qp_id, nesvnic->nic_index, nesvnic->logical_port, nesdev->mac_index); - if (nesvnic->nesdev->nesadapter->port_count == 1) { + if (nesvnic->nesdev->nesadapter->port_count == 1 && + nesvnic->nesdev->nesadapter->adapter_fcn_count == 1) { + nesvnic->qp_nic_index[0] = nesvnic->nic_index; nesvnic->qp_nic_index[1] = nesvnic->nic_index + 1; if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) { @@ -1598,11 +1671,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, nesvnic->qp_nic_index[3] = nesvnic->nic_index + 3; } } else { - if (nesvnic->nesdev->nesadapter->port_count == 2) { - nesvnic->qp_nic_index[0] = nesvnic->nic_index; - nesvnic->qp_nic_index[1] = nesvnic->nic_index + 2; - nesvnic->qp_nic_index[2] = 0xf; - nesvnic->qp_nic_index[3] = 0xf; + if (nesvnic->nesdev->nesadapter->port_count == 2 || + (nesvnic->nesdev->nesadapter->port_count == 1 && + nesvnic->nesdev->nesadapter->adapter_fcn_count == 2)) { + nesvnic->qp_nic_index[0] = nesvnic->nic_index; + nesvnic->qp_nic_index[1] = nesvnic->nic_index + + 2; + nesvnic->qp_nic_index[2] = 0xf; + nesvnic->qp_nic_index[3] = 0xf; } else { nesvnic->qp_nic_index[0] = nesvnic->nic_index; nesvnic->qp_nic_index[1] = 0xf; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index d79942e..932e56f 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1467,7 +1467,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, default: nes_debug(NES_DBG_QP, "Invalid QP type: %d\n", init_attr->qp_type); return ERR_PTR(-EINVAL); - break; } /* update the QP table */ @@ -2498,7 +2497,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nes_debug(NES_DBG_MR, "Leaving, ibmr=%p", ibmr); return ibmr; - break; case IWNES_MEMREG_TYPE_QP: case IWNES_MEMREG_TYPE_CQ: nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL); @@ -2572,7 +2570,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nesmr->ibmr.lkey = -1; nesmr->mode = req.reg_type; return &nesmr->ibmr; - break; } return ERR_PTR(-ENOSYS); diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 05eb41b..68ba5c3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -268,10 +268,9 @@ struct ipoib_lro { }; /* - * Device private locking: tx_lock protects members used in TX fast - * path (and we use LLTX so upper layers don't do extra locking). - * lock protects everything else. lock nests inside of tx_lock (ie - * tx_lock must be acquired first if needed). + * Device private locking: network stack tx_lock protects members used + * in TX fast path, lock protects everything else. lock nests inside + * of tx_lock (ie tx_lock must be acquired first if needed). */ struct ipoib_dev_priv { spinlock_t lock; @@ -320,7 +319,6 @@ struct ipoib_dev_priv { struct ipoib_rx_buf *rx_ring; - spinlock_t tx_lock; struct ipoib_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 341ffed..7b14c2c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -786,7 +786,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) dev_kfree_skb_any(tx_req->skb); - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock(dev); + ++tx->tx_tail; if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(dev) && @@ -801,7 +802,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); neigh = tx->neigh; if (neigh) { @@ -821,10 +822,10 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock(dev); } int ipoib_cm_dev_open(struct net_device *dev) @@ -1149,7 +1150,6 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); struct ipoib_cm_tx_buf *tx_req; - unsigned long flags; unsigned long begin; ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", @@ -1180,12 +1180,12 @@ timeout: DMA_TO_DEVICE); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock_bh(p->dev); if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(p->dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(p->dev); - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock_bh(p->dev); } if (p->qp) @@ -1202,6 +1202,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, struct ipoib_dev_priv *priv = netdev_priv(tx->dev); struct net_device *dev = priv->dev; struct ipoib_neigh *neigh; + unsigned long flags; int ret; switch (event->event) { @@ -1220,8 +1221,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, case IB_CM_REJ_RECEIVED: case IB_CM_TIMEWAIT_EXIT: ipoib_dbg(priv, "CM error %d.\n", event->event); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); neigh = tx->neigh; if (neigh) { @@ -1239,8 +1240,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, queue_work(ipoib_workqueue, &priv->cm.reap_task); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); break; default: break; @@ -1294,19 +1295,24 @@ static void ipoib_cm_tx_start(struct work_struct *work) struct ib_sa_path_rec pathrec; u32 qpn; - spin_lock_irqsave(&priv->tx_lock, flags); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); + while (!list_empty(&priv->cm.start_list)) { p = list_entry(priv->cm.start_list.next, typeof(*p), list); list_del_init(&p->list); neigh = p->neigh; qpn = IPOIB_QPN(neigh->neighbour->ha); memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); - spin_unlock(&priv->lock); - spin_unlock_irqrestore(&priv->tx_lock, flags); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); + ret = ipoib_cm_tx_init(p, qpn, &pathrec); - spin_lock_irqsave(&priv->tx_lock, flags); - spin_lock(&priv->lock); + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); + if (ret) { neigh = p->neigh; if (neigh) { @@ -1320,44 +1326,52 @@ static void ipoib_cm_tx_start(struct work_struct *work) kfree(p); } } - spin_unlock(&priv->lock); - spin_unlock_irqrestore(&priv->tx_lock, flags); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } static void ipoib_cm_tx_reap(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.reap_task); + struct net_device *dev = priv->dev; struct ipoib_cm_tx *p; + unsigned long flags; + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); while (!list_empty(&priv->cm.reap_list)) { p = list_entry(priv->cm.reap_list.next, typeof(*p), list); list_del(&p->list); - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); ipoib_cm_tx_destroy(p); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } static void ipoib_cm_skb_reap(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.skb_task); + struct net_device *dev = priv->dev; struct sk_buff *skb; - + unsigned long flags; unsigned mtu = priv->mcast_mtu; - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); + while ((skb = skb_dequeue(&priv->cm.skb_queue))) { - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); + if (skb->protocol == htons(ETH_P_IP)) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -1365,11 +1379,13 @@ static void ipoib_cm_skb_reap(struct work_struct *work) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev); #endif dev_kfree_skb_any(skb); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 66cafa2..0e748ae 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -468,21 +468,22 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) static void drain_tx_cq(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - unsigned long flags; - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock(dev); while (poll_tx(priv)) ; /* nothing */ if (netif_queue_stopped(dev)) mod_timer(&priv->poll_timer, jiffies + 1); - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock(dev); } void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr) { - drain_tx_cq((struct net_device *)dev_ptr); + struct ipoib_dev_priv *priv = netdev_priv(dev_ptr); + + mod_timer(&priv->poll_timer, jiffies); } static inline int post_send(struct ipoib_dev_priv *priv, @@ -614,17 +615,20 @@ static void __ipoib_reap_ah(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah, *tah; LIST_HEAD(remove_list); + unsigned long flags; + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) if ((int) priv->tx_tail - (int) ah->last_send >= 0) { list_del(&ah->list); ib_destroy_ah(ah->ah); kfree(ah); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } void ipoib_reap_ah(struct work_struct *work) @@ -761,6 +765,14 @@ void ipoib_drain_cq(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int i, n; + + /* + * We call completion handling routines that expect to be + * called from the BH-disabled NAPI poll context, so disable + * BHs here too. + */ + local_bh_disable(); + do { n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc); for (i = 0; i < n; ++i) { @@ -784,6 +796,8 @@ void ipoib_drain_cq(struct net_device *dev) while (poll_tx(priv)) ; /* nothing */ + + local_bh_enable(); } int ipoib_ib_dev_stop(struct net_device *dev, int flush) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index e9ca3cb..c0ee5143 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -373,9 +373,10 @@ void ipoib_flush_paths(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path, *tp; LIST_HEAD(remove_list); + unsigned long flags; - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); list_splice_init(&priv->path_list, &remove_list); @@ -385,15 +386,16 @@ void ipoib_flush_paths(struct net_device *dev) list_for_each_entry_safe(path, tp, &remove_list, list) { if (path->query) ib_sa_cancel_query(path->query_id, path->query); - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); wait_for_completion(&path->done); path_free(dev, path); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } static void path_rec_completion(int status, @@ -555,6 +557,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; struct ipoib_neigh *neigh; + unsigned long flags; neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev); if (!neigh) { @@ -563,11 +566,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) return; } - /* - * We can only be called from ipoib_start_xmit, so we're - * inside tx_lock -- no need to save/restore flags. - */ - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); path = __path_find(dev, skb->dst->neighbour->ha + 4); if (!path) { @@ -614,7 +613,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) __skb_queue_tail(&neigh->queue, skb); } - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); return; err_list: @@ -626,7 +625,7 @@ err_drop: ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) @@ -650,12 +649,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; + unsigned long flags; - /* - * We can only be called from ipoib_start_xmit, so we're - * inside tx_lock -- no need to save/restore flags. - */ - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); path = __path_find(dev, phdr->hwaddr + 4); if (!path || !path->valid) { @@ -667,7 +663,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, __skb_queue_tail(&path->queue, skb); if (path_rec_start(dev, path)) { - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); path_free(dev, path); return; } else @@ -677,7 +673,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, dev_kfree_skb_any(skb); } - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); return; } @@ -696,7 +692,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, dev_kfree_skb_any(skb); } - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -705,13 +701,10 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) struct ipoib_neigh *neigh; unsigned long flags; - if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags))) - return NETDEV_TX_LOCKED; - if (likely(skb->dst && skb->dst->neighbour)) { if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { ipoib_path_lookup(skb, dev); - goto out; + return NETDEV_TX_OK; } neigh = *to_ipoib_neigh(skb->dst->neighbour); @@ -721,7 +714,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->dst->neighbour->ha + 4, sizeof(union ib_gid))) || (neigh->dev != dev))) { - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); /* * It's safe to call ipoib_put_ah() inside * priv->lock here, because we know that @@ -732,25 +725,25 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) ipoib_put_ah(neigh->ah); list_del(&neigh->list); ipoib_neigh_free(dev, neigh); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); ipoib_path_lookup(skb, dev); - goto out; + return NETDEV_TX_OK; } if (ipoib_cm_get(neigh)) { if (ipoib_cm_up(neigh)) { ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); - goto out; + return NETDEV_TX_OK; } } else if (neigh->ah) { ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha)); - goto out; + return NETDEV_TX_OK; } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); __skb_queue_tail(&neigh->queue, skb); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } else { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -779,16 +772,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) IPOIB_GID_RAW_ARG(phdr->hwaddr + 4)); dev_kfree_skb_any(skb); ++dev->stats.tx_dropped; - goto out; + return NETDEV_TX_OK; } unicast_arp_send(skb, dev, phdr); } } -out: - spin_unlock_irqrestore(&priv->tx_lock, flags); - return NETDEV_TX_OK; } @@ -1052,7 +1042,6 @@ static void ipoib_setup(struct net_device *dev) dev->type = ARPHRD_INFINIBAND; dev->tx_queue_len = ipoib_sendq_size * 2; dev->features = (NETIF_F_VLAN_CHALLENGED | - NETIF_F_LLTX | NETIF_F_HIGHDMA); memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); @@ -1064,7 +1053,6 @@ static void ipoib_setup(struct net_device *dev) ipoib_lro_setup(priv); spin_lock_init(&priv->lock); - spin_lock_init(&priv->tx_lock); mutex_init(&priv->vlan_mutex); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index aae2862..d9d1223 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -69,14 +69,13 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) struct net_device *dev = mcast->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh, *tmp; - unsigned long flags; int tx_dropped = 0; ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group " IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mcast->mcmember.mgid)); - spin_lock_irqsave(&priv->lock, flags); + spin_lock_irq(&priv->lock); list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { /* @@ -90,7 +89,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) ipoib_neigh_free(dev, neigh); } - spin_unlock_irqrestore(&priv->lock, flags); + spin_unlock_irq(&priv->lock); if (mcast->ah) ipoib_put_ah(mcast->ah); @@ -100,9 +99,9 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); } - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock_bh(dev); dev->stats.tx_dropped += tx_dropped; - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock_bh(dev); kfree(mcast); } @@ -259,10 +258,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, } /* actually send any queued packets */ - spin_lock_irq(&priv->tx_lock); + netif_tx_lock_bh(dev); while (!skb_queue_empty(&mcast->pkt_queue)) { struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); - spin_unlock_irq(&priv->tx_lock); + netif_tx_unlock_bh(dev); skb->dev = dev; @@ -273,9 +272,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, if (dev_queue_xmit(skb)) ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); - spin_lock_irq(&priv->tx_lock); + netif_tx_lock_bh(dev); } - spin_unlock_irq(&priv->tx_lock); + netif_tx_unlock_bh(dev); return 0; } @@ -286,7 +285,6 @@ ipoib_mcast_sendonly_join_complete(int status, { struct ipoib_mcast *mcast = multicast->context; struct net_device *dev = mcast->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); /* We trap for port events ourselves. */ if (status == -ENETRESET) @@ -302,12 +300,12 @@ ipoib_mcast_sendonly_join_complete(int status, IPOIB_GID_ARG(mcast->mcmember.mgid), status); /* Flush out any queued packets */ - spin_lock_irq(&priv->tx_lock); + netif_tx_lock_bh(dev); while (!skb_queue_empty(&mcast->pkt_queue)) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); } - spin_unlock_irq(&priv->tx_lock); + netif_tx_unlock_bh(dev); /* Clear the busy flag so we try again */ status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, @@ -662,12 +660,9 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_mcast *mcast; + unsigned long flags; - /* - * We can only be called from ipoib_start_xmit, so we're - * inside tx_lock -- no need to save/restore flags. - */ - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || !priv->broadcast || @@ -738,7 +733,7 @@ out: } unlock: - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } void ipoib_mcast_dev_flush(struct net_device *dev) |