summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h49
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c206
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c118
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c40
6 files changed, 325 insertions, 97 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 87310ee..285c143 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -132,12 +132,46 @@ struct ipoib_cm_data {
__be32 mtu;
};
+/*
+ * Quoting 10.3.1 Queue Pair and EE Context States:
+ *
+ * Note, for QPs that are associated with an SRQ, the Consumer should take the
+ * QP through the Error State before invoking a Destroy QP or a Modify QP to the
+ * Reset State. The Consumer may invoke the Destroy QP without first performing
+ * a Modify QP to the Error State and waiting for the Affiliated Asynchronous
+ * Last WQE Reached Event. However, if the Consumer does not wait for the
+ * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
+ * leakage may occur. Therefore, it is good programming practice to tear down a
+ * QP that is associated with an SRQ by using the following process:
+ *
+ * - Put the QP in the Error State
+ * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
+ * - either:
+ * drain the CQ by invoking the Poll CQ verb and either wait for CQ
+ * to be empty or the number of Poll CQ operations has exceeded
+ * CQ capacity size;
+ * - or
+ * post another WR that completes on the same CQ and wait for this
+ * WR to return as a WC;
+ * - and then invoke a Destroy QP or Reset QP.
+ *
+ * We use the second option and wait for a completion on the
+ * same CQ before destroying QPs attached to our SRQ.
+ */
+
+enum ipoib_cm_state {
+ IPOIB_CM_RX_LIVE,
+ IPOIB_CM_RX_ERROR, /* Ignored by stale task */
+ IPOIB_CM_RX_FLUSH /* Last WQE Reached event observed */
+};
+
struct ipoib_cm_rx {
struct ib_cm_id *id;
struct ib_qp *qp;
struct list_head list;
struct net_device *dev;
unsigned long jiffies;
+ enum ipoib_cm_state state;
};
struct ipoib_cm_tx {
@@ -165,10 +199,15 @@ struct ipoib_cm_dev_priv {
struct ib_srq *srq;
struct ipoib_cm_rx_buf *srq_ring;
struct ib_cm_id *id;
- struct list_head passive_ids;
+ struct list_head passive_ids; /* state: LIVE */
+ struct list_head rx_error_list; /* state: ERROR */
+ struct list_head rx_flush_list; /* state: FLUSH, drain not started */
+ struct list_head rx_drain_list; /* state: FLUSH, drain started */
+ struct list_head rx_reap_list; /* state: FLUSH, drain done */
struct work_struct start_task;
struct work_struct reap_task;
struct work_struct skb_task;
+ struct work_struct rx_reap_task;
struct delayed_work stale_task;
struct sk_buff_head skb_queue;
struct list_head start_list;
@@ -201,15 +240,17 @@ struct ipoib_dev_priv {
struct list_head multicast_list;
struct rb_root multicast_tree;
- struct delayed_work pkey_task;
+ struct delayed_work pkey_poll_task;
struct delayed_work mcast_task;
struct work_struct flush_task;
struct work_struct restart_task;
struct delayed_work ah_reap_task;
+ struct work_struct pkey_event_task;
struct ib_device *ca;
u8 port;
u16 pkey;
+ u16 pkey_index;
struct ib_pd *pd;
struct ib_mr *mr;
struct ib_cq *cq;
@@ -333,12 +374,13 @@ struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_ib_dev_flush(struct work_struct *work);
+void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev, int flush);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev);
@@ -386,6 +428,7 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
void ipoib_pkey_poll(struct work_struct *work);
int ipoib_pkey_dev_delay_open(struct net_device *dev);
+void ipoib_drain_cq(struct net_device *dev);
#ifdef CONFIG_INFINIBAND_IPOIB_CM
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index eec833b..076a0bb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -37,6 +37,7 @@
#include <net/dst.h>
#include <net/icmp.h>
#include <linux/icmpv6.h>
+#include <linux/delay.h>
#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
static int data_debug_level;
@@ -62,6 +63,17 @@ struct ipoib_cm_id {
u32 remote_mtu;
};
+static struct ib_qp_attr ipoib_cm_err_attr = {
+ .qp_state = IB_QPS_ERR
+};
+
+#define IPOIB_CM_RX_DRAIN_WRID 0x7fffffff
+
+static struct ib_send_wr ipoib_cm_rx_drain_wr = {
+ .wr_id = IPOIB_CM_RX_DRAIN_WRID,
+ .opcode = IB_WR_SEND,
+};
+
static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ib_cm_event *event);
@@ -150,15 +162,54 @@ partial_error:
return NULL;
}
+static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv* priv)
+{
+ struct ib_send_wr *bad_wr;
+ struct ipoib_cm_rx *p;
+
+ /* We only reserved 1 extra slot in CQ for drain WRs, so
+ * make sure we have at most 1 outstanding WR. */
+ if (list_empty(&priv->cm.rx_flush_list) ||
+ !list_empty(&priv->cm.rx_drain_list))
+ return;
+
+ /*
+ * QPs on flush list are error state. This way, a "flush
+ * error" WC will be immediately generated for each WR we post.
+ */
+ p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+ if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
+ ipoib_warn(priv, "failed to post drain wr\n");
+
+ list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
+}
+
+static void ipoib_cm_rx_event_handler(struct ib_event *event, void *ctx)
+{
+ struct ipoib_cm_rx *p = ctx;
+ struct ipoib_dev_priv *priv = netdev_priv(p->dev);
+ unsigned long flags;
+
+ if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
+ return;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ list_move(&p->list, &priv->cm.rx_flush_list);
+ p->state = IPOIB_CM_RX_FLUSH;
+ ipoib_cm_start_rx_drain(priv);
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
struct ipoib_cm_rx *p)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = {
- .send_cq = priv->cq, /* does not matter, we never send anything */
+ .event_handler = ipoib_cm_rx_event_handler,
+ .send_cq = priv->cq, /* For drain WR */
.recv_cq = priv->cq,
.srq = priv->cm.srq,
- .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */
+ .cap.max_send_wr = 1, /* For drain WR */
.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_RC,
@@ -198,6 +249,27 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
return ret;
}
+
+ /*
+ * Current Mellanox HCA firmware won't generate completions
+ * with error for drain WRs unless the QP has been moved to
+ * RTS first. This work-around leaves a window where a QP has
+ * moved to error asynchronously, but this will eventually get
+ * fixed in firmware, so let's not error out if modify QP
+ * fails.
+ */
+ qp_attr.qp_state = IB_QPS_RTS;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
+ return 0;
+ }
+ ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
+ return 0;
+ }
+
return 0;
}
@@ -256,6 +328,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
cm_id->context = p;
p->jiffies = jiffies;
+ p->state = IPOIB_CM_RX_LIVE;
spin_lock_irq(&priv->lock);
if (list_empty(&priv->cm.passive_ids))
queue_delayed_work(ipoib_workqueue,
@@ -277,7 +350,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
{
struct ipoib_cm_rx *p;
struct ipoib_dev_priv *priv;
- int ret;
switch (event->event) {
case IB_CM_REQ_RECEIVED:
@@ -289,20 +361,9 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
case IB_CM_REJ_RECEIVED:
p = cm_id->context;
priv = netdev_priv(p->dev);
- spin_lock_irq(&priv->lock);
- if (list_empty(&p->list))
- ret = 0; /* Connection is going away already. */
- else {
- list_del_init(&p->list);
- ret = -ECONNRESET;
- }
- spin_unlock_irq(&priv->lock);
- if (ret) {
- ib_destroy_qp(p->qp);
- kfree(p);
- return ret;
- }
- return 0;
+ if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
+ ipoib_warn(priv, "unable to move qp to error state\n");
+ /* Fall through */
default:
return 0;
}
@@ -354,8 +415,15 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
wr_id, wc->status);
if (unlikely(wr_id >= ipoib_recvq_size)) {
- ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
- wr_id, ipoib_recvq_size);
+ if (wr_id == (IPOIB_CM_RX_DRAIN_WRID & ~IPOIB_CM_OP_SRQ)) {
+ spin_lock_irqsave(&priv->lock, flags);
+ list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
+ ipoib_cm_start_rx_drain(priv);
+ queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ } else
+ ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
+ wr_id, ipoib_recvq_size);
return;
}
@@ -374,9 +442,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
spin_lock_irqsave(&priv->lock, flags);
p->jiffies = jiffies;
- /* Move this entry to list head, but do
- * not re-add it if it has been removed. */
- if (!list_empty(&p->list))
+ /* Move this entry to list head, but do not re-add it
+ * if it has been moved out of list. */
+ if (p->state == IPOIB_CM_RX_LIVE)
list_move(&p->list, &priv->cm.passive_ids);
spin_unlock_irqrestore(&priv->lock, flags);
}
@@ -592,8 +660,7 @@ int ipoib_cm_dev_open(struct net_device *dev)
if (IS_ERR(priv->cm.id)) {
printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
ret = PTR_ERR(priv->cm.id);
- priv->cm.id = NULL;
- return ret;
+ goto err_cm;
}
ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
@@ -601,34 +668,76 @@ int ipoib_cm_dev_open(struct net_device *dev)
if (ret) {
printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
IPOIB_CM_IETF_ID | priv->qp->qp_num);
- ib_destroy_cm_id(priv->cm.id);
- priv->cm.id = NULL;
- return ret;
+ goto err_listen;
}
+
return 0;
+
+err_listen:
+ ib_destroy_cm_id(priv->cm.id);
+err_cm:
+ priv->cm.id = NULL;
+ return ret;
}
void ipoib_cm_dev_stop(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- struct ipoib_cm_rx *p;
+ struct ipoib_cm_rx *p, *n;
+ unsigned long begin;
+ LIST_HEAD(list);
+ int ret;
if (!IPOIB_CM_SUPPORTED(dev->dev_addr) || !priv->cm.id)
return;
ib_destroy_cm_id(priv->cm.id);
priv->cm.id = NULL;
+
spin_lock_irq(&priv->lock);
while (!list_empty(&priv->cm.passive_ids)) {
p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
- list_del_init(&p->list);
+ list_move(&p->list, &priv->cm.rx_error_list);
+ p->state = IPOIB_CM_RX_ERROR;
+ spin_unlock_irq(&priv->lock);
+ ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
+ if (ret)
+ ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
+ spin_lock_irq(&priv->lock);
+ }
+
+ /* Wait for all RX to be drained */
+ begin = jiffies;
+
+ while (!list_empty(&priv->cm.rx_error_list) ||
+ !list_empty(&priv->cm.rx_flush_list) ||
+ !list_empty(&priv->cm.rx_drain_list)) {
+ if (time_after(jiffies, begin + 5 * HZ)) {
+ ipoib_warn(priv, "RX drain timing out\n");
+
+ /*
+ * assume the HW is wedged and just free up everything.
+ */
+ list_splice_init(&priv->cm.rx_flush_list, &list);
+ list_splice_init(&priv->cm.rx_error_list, &list);
+ list_splice_init(&priv->cm.rx_drain_list, &list);
+ break;
+ }
spin_unlock_irq(&priv->lock);
+ msleep(1);
+ ipoib_drain_cq(dev);
+ spin_lock_irq(&priv->lock);
+ }
+
+ list_splice_init(&priv->cm.rx_reap_list, &list);
+
+ spin_unlock_irq(&priv->lock);
+
+ list_for_each_entry_safe(p, n, &list, list) {
ib_destroy_cm_id(p->id);
ib_destroy_qp(p->qp);
kfree(p);
- spin_lock_irq(&priv->lock);
}
- spin_unlock_irq(&priv->lock);
cancel_delayed_work(&priv->cm.stale_task);
}
@@ -1079,24 +1188,44 @@ void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
queue_work(ipoib_workqueue, &priv->cm.skb_task);
}
+static void ipoib_cm_rx_reap(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+ cm.rx_reap_task);
+ struct ipoib_cm_rx *p, *n;
+ LIST_HEAD(list);
+
+ spin_lock_irq(&priv->lock);
+ list_splice_init(&priv->cm.rx_reap_list, &list);
+ spin_unlock_irq(&priv->lock);
+
+ list_for_each_entry_safe(p, n, &list, list) {
+ ib_destroy_cm_id(p->id);
+ ib_destroy_qp(p->qp);
+ kfree(p);
+ }
+}
+
static void ipoib_cm_stale_task(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
cm.stale_task.work);
struct ipoib_cm_rx *p;
+ int ret;
spin_lock_irq(&priv->lock);
while (!list_empty(&priv->cm.passive_ids)) {
- /* List if sorted by LRU, start from tail,
+ /* List is sorted by LRU, start from tail,
* stop when we see a recently used entry */
p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
break;
- list_del_init(&p->list);
+ list_move(&p->list, &priv->cm.rx_error_list);
+ p->state = IPOIB_CM_RX_ERROR;
spin_unlock_irq(&priv->lock);
- ib_destroy_cm_id(p->id);
- ib_destroy_qp(p->qp);
- kfree(p);
+ ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE);
+ if (ret)
+ ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
spin_lock_irq(&priv->lock);
}
@@ -1164,9 +1293,14 @@ int ipoib_cm_dev_init(struct net_device *dev)
INIT_LIST_HEAD(&priv->cm.passive_ids);
INIT_LIST_HEAD(&priv->cm.reap_list);
INIT_LIST_HEAD(&priv->cm.start_list);
+ INIT_LIST_HEAD(&priv->cm.rx_error_list);
+ INIT_LIST_HEAD(&priv->cm.rx_flush_list);
+ INIT_LIST_HEAD(&priv->cm.rx_drain_list);
+ INIT_LIST_HEAD(&priv->cm.rx_reap_list);
INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
+ INIT_WORK(&priv->cm.rx_reap_task, ipoib_cm_rx_reap);
INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);
skb_queue_head_init(&priv->cm.skb_queue);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 68d72c6..8404f05b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -448,6 +448,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
+ if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
+ ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
+ clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ return -1;
+ }
+ set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+
ret = ipoib_init_qp(dev);
if (ret) {
ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
@@ -457,14 +464,14 @@ int ipoib_ib_dev_open(struct net_device *dev)
ret = ipoib_ib_post_receives(dev);
if (ret) {
ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- ipoib_ib_dev_stop(dev);
+ ipoib_ib_dev_stop(dev, 1);
return -1;
}
ret = ipoib_cm_dev_open(dev);
if (ret) {
- ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
- ipoib_ib_dev_stop(dev);
+ ipoib_warn(priv, "ipoib_cm_dev_open returned %d\n", ret);
+ ipoib_ib_dev_stop(dev, 1);
return -1;
}
@@ -516,7 +523,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
mutex_lock(&pkey_mutex);
set_bit(IPOIB_PKEY_STOP, &priv->flags);
- cancel_delayed_work(&priv->pkey_task);
+ cancel_delayed_work(&priv->pkey_poll_task);
mutex_unlock(&pkey_mutex);
if (flush)
flush_workqueue(ipoib_workqueue);
@@ -543,13 +550,30 @@ static int recvs_pending(struct net_device *dev)
return pending;
}
-int ipoib_ib_dev_stop(struct net_device *dev)
+void ipoib_drain_cq(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ int i, n;
+ do {
+ n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
+ for (i = 0; i < n; ++i) {
+ if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ)
+ ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
+ else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV)
+ ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
+ else
+ ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
+ }
+ } while (n == IPOIB_NUM_WC);
+}
+
+int ipoib_ib_dev_stop(struct net_device *dev, int flush)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr;
unsigned long begin;
struct ipoib_tx_buf *tx_req;
- int i, n;
+ int i;
clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
netif_poll_disable(dev);
@@ -604,17 +628,7 @@ int ipoib_ib_dev_stop(struct net_device *dev)
goto timeout;
}
- do {
- n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
- for (i = 0; i < n; ++i) {
- if (priv->ibwc[i].wr_id & IPOIB_CM_OP_SRQ)
- ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
- else if (priv->ibwc[i].wr_id & IPOIB_OP_RECV)
- ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
- else
- ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
- }
- } while (n == IPOIB_NUM_WC);
+ ipoib_drain_cq(dev);
msleep(1);
}
@@ -629,7 +643,8 @@ timeout:
/* Wait for all AHs to be reaped */
set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task);
- flush_workqueue(ipoib_workqueue);
+ if (flush)
+ flush_workqueue(ipoib_workqueue);
begin = jiffies;
@@ -673,13 +688,24 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
return 0;
}
-void ipoib_ib_dev_flush(struct work_struct *work)
+static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, int pkey_event)
{
- struct ipoib_dev_priv *cpriv, *priv =
- container_of(work, struct ipoib_dev_priv, flush_task);
+ struct ipoib_dev_priv *cpriv;
struct net_device *dev = priv->dev;
+ u16 new_index;
+
+ mutex_lock(&priv->vlan_mutex);
+
+ /*
+ * Flush any child interfaces too -- they might be up even if
+ * the parent is down.
+ */
+ list_for_each_entry(cpriv, &priv->child_intfs, list)
+ __ipoib_ib_dev_flush(cpriv, pkey_event);
- if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) ) {
+ mutex_unlock(&priv->vlan_mutex);
+
+ if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
return;
}
@@ -689,10 +715,32 @@ void ipoib_ib_dev_flush(struct work_struct *work)
return;
}
+ if (pkey_event) {
+ if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
+ clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ ipoib_ib_dev_down(dev, 0);
+ ipoib_pkey_dev_delay_open(dev);
+ return;
+ }
+ set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+
+ /* restart QP only if P_Key index is changed */
+ if (new_index == priv->pkey_index) {
+ ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
+ return;
+ }
+ priv->pkey_index = new_index;
+ }
+
ipoib_dbg(priv, "flushing\n");
ipoib_ib_dev_down(dev, 0);
+ if (pkey_event) {
+ ipoib_ib_dev_stop(dev, 0);
+ ipoib_ib_dev_open(dev);
+ }
+
/*
* The device could have been brought down between the start and when
* we get here, don't bring it back up if it's not configured up
@@ -701,14 +749,24 @@ void ipoib_ib_dev_flush(struct work_struct *work)
ipoib_ib_dev_up(dev);
ipoib_mcast_restart_task(&priv->restart_task);
}
+}
- mutex_lock(&priv->vlan_mutex);
+void ipoib_ib_dev_flush(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv =
+ container_of(work, struct ipoib_dev_priv, flush_task);
- /* Flush any child interfaces too */
- list_for_each_entry(cpriv, &priv->child_intfs, list)
- ipoib_ib_dev_flush(&cpriv->flush_task);
+ ipoib_dbg(priv, "Flushing %s\n", priv->dev->name);
+ __ipoib_ib_dev_flush(priv, 0);
+}
- mutex_unlock(&priv->vlan_mutex);
+void ipoib_pkey_event(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv =
+ container_of(work, struct ipoib_dev_priv, pkey_event_task);
+
+ ipoib_dbg(priv, "Flushing %s and restarting its QP\n", priv->dev->name);
+ __ipoib_ib_dev_flush(priv, 1);
}
void ipoib_ib_dev_cleanup(struct net_device *dev)
@@ -736,7 +794,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
void ipoib_pkey_poll(struct work_struct *work)
{
struct ipoib_dev_priv *priv =
- container_of(work, struct ipoib_dev_priv, pkey_task.work);
+ container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
struct net_device *dev = priv->dev;
ipoib_pkey_dev_check_presence(dev);
@@ -747,7 +805,7 @@ void ipoib_pkey_poll(struct work_struct *work)
mutex_lock(&pkey_mutex);
if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
queue_delayed_work(ipoib_workqueue,
- &priv->pkey_task,
+ &priv->pkey_poll_task,
HZ);
mutex_unlock(&pkey_mutex);
}
@@ -766,7 +824,7 @@ int ipoib_pkey_dev_delay_open(struct net_device *dev)
mutex_lock(&pkey_mutex);
clear_bit(IPOIB_PKEY_STOP, &priv->flags);
queue_delayed_work(ipoib_workqueue,
- &priv->pkey_task,
+ &priv->pkey_poll_task,
HZ);
mutex_unlock(&pkey_mutex);
return 1;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 0a428f2..894b1dcd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -107,7 +107,7 @@ int ipoib_open(struct net_device *dev)
return -EINVAL;
if (ipoib_ib_dev_up(dev)) {
- ipoib_ib_dev_stop(dev);
+ ipoib_ib_dev_stop(dev, 1);
return -EINVAL;
}
@@ -152,7 +152,7 @@ static int ipoib_stop(struct net_device *dev)
flush_workqueue(ipoib_workqueue);
ipoib_ib_dev_down(dev, 1);
- ipoib_ib_dev_stop(dev);
+ ipoib_ib_dev_stop(dev, 1);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@@ -988,7 +988,8 @@ static void ipoib_setup(struct net_device *dev)
INIT_LIST_HEAD(&priv->dead_ahs);
INIT_LIST_HEAD(&priv->multicast_list);
- INIT_DELAYED_WORK(&priv->pkey_task, ipoib_pkey_poll);
+ INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
+ INIT_WORK(&priv->pkey_event_task, ipoib_pkey_event);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
INIT_WORK(&priv->flush_task, ipoib_ib_dev_flush);
INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 54fbead..aae3670 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -524,7 +524,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
return;
if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
- ipoib_warn(priv, "ib_gid_entry_get() failed\n");
+ ipoib_warn(priv, "ib_query_gid() failed\n");
else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 5c3c6a4..982eb88 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -33,8 +33,6 @@
* $Id: ipoib_verbs.c 1349 2004-12-16 21:09:43Z roland $
*/
-#include <rdma/ib_cache.h>
-
#include "ipoib.h"
int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
@@ -49,7 +47,7 @@ int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid)
if (!qp_attr)
goto out;
- if (ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
+ if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
ret = -ENXIO;
goto out;
@@ -94,26 +92,16 @@ int ipoib_init_qp(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
- u16 pkey_index;
struct ib_qp_attr qp_attr;
int attr_mask;
- /*
- * Search through the port P_Key table for the requested pkey value.
- * The port has to be assigned to the respective IB partition in
- * advance.
- */
- ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &pkey_index);
- if (ret) {
- clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
- return ret;
- }
- set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
+ return -1;
qp_attr.qp_state = IB_QPS_INIT;
qp_attr.qkey = 0;
qp_attr.port_num = priv->port;
- qp_attr.pkey_index = pkey_index;
+ qp_attr.pkey_index = priv->pkey_index;
attr_mask =
IB_QP_QKEY |
IB_QP_PORT |
@@ -185,7 +173,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
size = ipoib_sendq_size + ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev);
if (!ret)
- size += ipoib_recvq_size;
+ size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */;
priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
if (IS_ERR(priv->cq)) {
@@ -259,14 +247,18 @@ void ipoib_event(struct ib_event_handler *handler,
struct ipoib_dev_priv *priv =
container_of(handler, struct ipoib_dev_priv, event_handler);
- if ((record->event == IB_EVENT_PORT_ERR ||
- record->event == IB_EVENT_PKEY_CHANGE ||
- record->event == IB_EVENT_PORT_ACTIVE ||
- record->event == IB_EVENT_LID_CHANGE ||
- record->event == IB_EVENT_SM_CHANGE ||
- record->event == IB_EVENT_CLIENT_REREGISTER) &&
- record->element.port_num == priv->port) {
+ if (record->element.port_num != priv->port)
+ return;
+
+ if (record->event == IB_EVENT_PORT_ERR ||
+ record->event == IB_EVENT_PORT_ACTIVE ||
+ record->event == IB_EVENT_LID_CHANGE ||
+ record->event == IB_EVENT_SM_CHANGE ||
+ record->event == IB_EVENT_CLIENT_REREGISTER) {
ipoib_dbg(priv, "Port state change event\n");
queue_work(ipoib_workqueue, &priv->flush_task);
+ } else if (record->event == IB_EVENT_PKEY_CHANGE) {
+ ipoib_dbg(priv, "P_Key change event on port:%d\n", priv->port);
+ queue_work(ipoib_workqueue, &priv->pkey_event_task);
}
}
OpenPOWER on IntegriCloud