summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp/ipoib
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp/ipoib')
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig22
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c107
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c30
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c96
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c52
6 files changed, 179 insertions, 138 deletions
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index 691525c..9d9a9dc 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -11,16 +11,17 @@ config INFINIBAND_IPOIB
config INFINIBAND_IPOIB_CM
bool "IP-over-InfiniBand Connected Mode support"
- depends on INFINIBAND_IPOIB && EXPERIMENTAL
+ depends on INFINIBAND_IPOIB
default n
---help---
- This option enables experimental support for IPoIB connected mode.
- After enabling this option, you need to switch to connected mode through
- /sys/class/net/ibXXX/mode to actually create connections, and then increase
- the interface MTU with e.g. ifconfig ib0 mtu 65520.
+ This option enables support for IPoIB connected mode. After
+ enabling this option, you need to switch to connected mode
+ through /sys/class/net/ibXXX/mode to actually create
+ connections, and then increase the interface MTU with
+ e.g. ifconfig ib0 mtu 65520.
- WARNING: Enabling connected mode will trigger some
- packet drops for multicast and UD mode traffic from this interface,
+ WARNING: Enabling connected mode will trigger some packet
+ drops for multicast and UD mode traffic from this interface,
unless you limit mtu for these destinations to 2044.
config INFINIBAND_IPOIB_DEBUG
@@ -33,9 +34,10 @@ config INFINIBAND_IPOIB_DEBUG
debug_level and mcast_debug_level module parameters (which
can also be set after the driver is loaded through sysfs).
- This option also creates an "ipoib_debugfs," which can be
- mounted to expose debugging information about IB multicast
- groups used by the IPoIB driver.
+ This option also creates a directory tree under ipoib/ in
+ debugfs, which contains files that expose debugging
+ information about IB multicast groups used by the IPoIB
+ driver.
config INFINIBAND_IPOIB_DEBUG_DATA
bool "IP-over-InfiniBand data path debugging"
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index b0ffc9a..68ba5c3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -268,10 +268,9 @@ struct ipoib_lro {
};
/*
- * Device private locking: tx_lock protects members used in TX fast
- * path (and we use LLTX so upper layers don't do extra locking).
- * lock protects everything else. lock nests inside of tx_lock (ie
- * tx_lock must be acquired first if needed).
+ * Device private locking: network stack tx_lock protects members used
+ * in TX fast path, lock protects everything else. lock nests inside
+ * of tx_lock (ie tx_lock must be acquired first if needed).
*/
struct ipoib_dev_priv {
spinlock_t lock;
@@ -293,6 +292,7 @@ struct ipoib_dev_priv {
struct delayed_work pkey_poll_task;
struct delayed_work mcast_task;
+ struct work_struct carrier_on_task;
struct work_struct flush_light;
struct work_struct flush_normal;
struct work_struct flush_heavy;
@@ -319,7 +319,6 @@ struct ipoib_dev_priv {
struct ipoib_rx_buf *rx_ring;
- spinlock_t tx_lock;
struct ipoib_tx_buf *tx_ring;
unsigned tx_head;
unsigned tx_tail;
@@ -464,6 +463,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
void ipoib_dev_cleanup(struct net_device *dev);
void ipoib_mcast_join_task(struct work_struct *work);
+void ipoib_mcast_carrier_on_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 0f2d304..7b14c2c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -202,7 +202,7 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev,
dev_kfree_skb_any(rx_ring[i].skb);
}
- kfree(rx_ring);
+ vfree(rx_ring);
}
static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
@@ -337,7 +337,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev,
sge[i].length = PAGE_SIZE;
wr->next = NULL;
- wr->sg_list = priv->cm.rx_sge;
+ wr->sg_list = sge;
wr->num_sge = priv->cm.num_frags;
}
@@ -352,9 +352,14 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
int ret;
int i;
- rx->rx_ring = kcalloc(ipoib_recvq_size, sizeof *rx->rx_ring, GFP_KERNEL);
- if (!rx->rx_ring)
+ rx->rx_ring = vmalloc(ipoib_recvq_size * sizeof *rx->rx_ring);
+ if (!rx->rx_ring) {
+ printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n",
+ priv->ca->name, ipoib_recvq_size);
return -ENOMEM;
+ }
+
+ memset(rx->rx_ring, 0, ipoib_recvq_size * sizeof *rx->rx_ring);
t = kmalloc(sizeof *t, GFP_KERNEL);
if (!t) {
@@ -781,7 +786,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
dev_kfree_skb_any(tx_req->skb);
- spin_lock_irqsave(&priv->tx_lock, flags);
+ netif_tx_lock(dev);
+
++tx->tx_tail;
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(dev) &&
@@ -796,7 +802,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
- spin_lock(&priv->lock);
+ spin_lock_irqsave(&priv->lock, flags);
neigh = tx->neigh;
if (neigh) {
@@ -816,10 +822,10 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
}
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ netif_tx_unlock(dev);
}
int ipoib_cm_dev_open(struct net_device *dev)
@@ -1144,7 +1150,6 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
{
struct ipoib_dev_priv *priv = netdev_priv(p->dev);
struct ipoib_cm_tx_buf *tx_req;
- unsigned long flags;
unsigned long begin;
ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
@@ -1175,12 +1180,12 @@ timeout:
DMA_TO_DEVICE);
dev_kfree_skb_any(tx_req->skb);
++p->tx_tail;
- spin_lock_irqsave(&priv->tx_lock, flags);
+ netif_tx_lock_bh(p->dev);
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(p->dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(p->dev);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ netif_tx_unlock_bh(p->dev);
}
if (p->qp)
@@ -1197,6 +1202,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
struct net_device *dev = priv->dev;
struct ipoib_neigh *neigh;
+ unsigned long flags;
int ret;
switch (event->event) {
@@ -1215,8 +1221,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
case IB_CM_REJ_RECEIVED:
case IB_CM_TIMEWAIT_EXIT:
ipoib_dbg(priv, "CM error %d.\n", event->event);
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
neigh = tx->neigh;
if (neigh) {
@@ -1234,8 +1240,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
queue_work(ipoib_workqueue, &priv->cm.reap_task);
}
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
break;
default:
break;
@@ -1289,19 +1295,24 @@ static void ipoib_cm_tx_start(struct work_struct *work)
struct ib_sa_path_rec pathrec;
u32 qpn;
- spin_lock_irqsave(&priv->tx_lock, flags);
- spin_lock(&priv->lock);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
+
while (!list_empty(&priv->cm.start_list)) {
p = list_entry(priv->cm.start_list.next, typeof(*p), list);
list_del_init(&p->list);
neigh = p->neigh;
qpn = IPOIB_QPN(neigh->neighbour->ha);
memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
- spin_unlock(&priv->lock);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
+
ret = ipoib_cm_tx_init(p, qpn, &pathrec);
- spin_lock_irqsave(&priv->tx_lock, flags);
- spin_lock(&priv->lock);
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
+
if (ret) {
neigh = p->neigh;
if (neigh) {
@@ -1315,44 +1326,52 @@ static void ipoib_cm_tx_start(struct work_struct *work)
kfree(p);
}
}
- spin_unlock(&priv->lock);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
}
static void ipoib_cm_tx_reap(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
cm.reap_task);
+ struct net_device *dev = priv->dev;
struct ipoib_cm_tx *p;
+ unsigned long flags;
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
while (!list_empty(&priv->cm.reap_list)) {
p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
list_del(&p->list);
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
ipoib_cm_tx_destroy(p);
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
}
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
}
static void ipoib_cm_skb_reap(struct work_struct *work)
{
struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
cm.skb_task);
+ struct net_device *dev = priv->dev;
struct sk_buff *skb;
-
+ unsigned long flags;
unsigned mtu = priv->mcast_mtu;
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
+
while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
+
if (skb->protocol == htons(ETH_P_IP))
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -1360,11 +1379,13 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev);
#endif
dev_kfree_skb_any(skb);
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
}
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
}
void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
@@ -1494,14 +1515,16 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
return;
}
- priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring,
- GFP_KERNEL);
+ priv->cm.srq_ring = vmalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring);
if (!priv->cm.srq_ring) {
printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n",
priv->ca->name, ipoib_recvq_size);
ib_destroy_srq(priv->cm.srq);
priv->cm.srq = NULL;
+ return;
}
+
+ memset(priv->cm.srq_ring, 0, ipoib_recvq_size * sizeof *priv->cm.srq_ring);
}
int ipoib_cm_dev_init(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 66cafa2..0e748ae 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -468,21 +468,22 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
static void drain_tx_cq(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- unsigned long flags;
- spin_lock_irqsave(&priv->tx_lock, flags);
+ netif_tx_lock(dev);
while (poll_tx(priv))
; /* nothing */
if (netif_queue_stopped(dev))
mod_timer(&priv->poll_timer, jiffies + 1);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ netif_tx_unlock(dev);
}
void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
{
- drain_tx_cq((struct net_device *)dev_ptr);
+ struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
+
+ mod_timer(&priv->poll_timer, jiffies);
}
static inline int post_send(struct ipoib_dev_priv *priv,
@@ -614,17 +615,20 @@ static void __ipoib_reap_ah(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_ah *ah, *tah;
LIST_HEAD(remove_list);
+ unsigned long flags;
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
list_del(&ah->list);
ib_destroy_ah(ah->ah);
kfree(ah);
}
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
}
void ipoib_reap_ah(struct work_struct *work)
@@ -761,6 +765,14 @@ void ipoib_drain_cq(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int i, n;
+
+ /*
+ * We call completion handling routines that expect to be
+ * called from the BH-disabled NAPI poll context, so disable
+ * BHs here too.
+ */
+ local_bh_disable();
+
do {
n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
for (i = 0; i < n; ++i) {
@@ -784,6 +796,8 @@ void ipoib_drain_cq(struct net_device *dev)
while (poll_tx(priv))
; /* nothing */
+
+ local_bh_enable();
}
int ipoib_ib_dev_stop(struct net_device *dev, int flush)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index f51201b..c0ee5143 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -156,14 +156,8 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev);
- /*
- * Now flush workqueue to make sure a scheduled task doesn't
- * bring our internal state back up.
- */
- flush_workqueue(ipoib_workqueue);
-
- ipoib_ib_dev_down(dev, 1);
- ipoib_ib_dev_stop(dev, 1);
+ ipoib_ib_dev_down(dev, 0);
+ ipoib_ib_dev_stop(dev, 0);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@@ -379,9 +373,10 @@ void ipoib_flush_paths(struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path, *tp;
LIST_HEAD(remove_list);
+ unsigned long flags;
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->path_list, &remove_list);
@@ -391,15 +386,16 @@ void ipoib_flush_paths(struct net_device *dev)
list_for_each_entry_safe(path, tp, &remove_list, list) {
if (path->query)
ib_sa_cancel_query(path->query_id, path->query);
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
wait_for_completion(&path->done);
path_free(dev, path);
- spin_lock_irq(&priv->tx_lock);
- spin_lock(&priv->lock);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
}
- spin_unlock(&priv->lock);
- spin_unlock_irq(&priv->tx_lock);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
}
static void path_rec_completion(int status,
@@ -410,7 +406,7 @@ static void path_rec_completion(int status,
struct net_device *dev = path->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_ah *ah = NULL;
- struct ipoib_ah *old_ah;
+ struct ipoib_ah *old_ah = NULL;
struct ipoib_neigh *neigh, *tn;
struct sk_buff_head skqueue;
struct sk_buff *skb;
@@ -434,12 +430,12 @@ static void path_rec_completion(int status,
spin_lock_irqsave(&priv->lock, flags);
- old_ah = path->ah;
- path->ah = ah;
-
if (ah) {
path->pathrec = *pathrec;
+ old_ah = path->ah;
+ path->ah = ah;
+
ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
@@ -561,6 +557,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path;
struct ipoib_neigh *neigh;
+ unsigned long flags;
neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev);
if (!neigh) {
@@ -569,11 +566,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
return;
}
- /*
- * We can only be called from ipoib_start_xmit, so we're
- * inside tx_lock -- no need to save/restore flags.
- */
- spin_lock(&priv->lock);
+ spin_lock_irqsave(&priv->lock, flags);
path = __path_find(dev, skb->dst->neighbour->ha + 4);
if (!path) {
@@ -620,7 +613,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
__skb_queue_tail(&neigh->queue, skb);
}
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
return;
err_list:
@@ -632,7 +625,7 @@ err_drop:
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
}
static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
@@ -656,12 +649,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_path *path;
+ unsigned long flags;
- /*
- * We can only be called from ipoib_start_xmit, so we're
- * inside tx_lock -- no need to save/restore flags.
- */
- spin_lock(&priv->lock);
+ spin_lock_irqsave(&priv->lock, flags);
path = __path_find(dev, phdr->hwaddr + 4);
if (!path || !path->valid) {
@@ -673,7 +663,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
__skb_queue_tail(&path->queue, skb);
if (path_rec_start(dev, path)) {
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
path_free(dev, path);
return;
} else
@@ -683,7 +673,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
dev_kfree_skb_any(skb);
}
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
return;
}
@@ -702,7 +692,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
dev_kfree_skb_any(skb);
}
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
}
static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -711,13 +701,10 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct ipoib_neigh *neigh;
unsigned long flags;
- if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
- return NETDEV_TX_LOCKED;
-
if (likely(skb->dst && skb->dst->neighbour)) {
if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
ipoib_path_lookup(skb, dev);
- goto out;
+ return NETDEV_TX_OK;
}
neigh = *to_ipoib_neigh(skb->dst->neighbour);
@@ -727,7 +714,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
skb->dst->neighbour->ha + 4,
sizeof(union ib_gid))) ||
(neigh->dev != dev))) {
- spin_lock(&priv->lock);
+ spin_lock_irqsave(&priv->lock, flags);
/*
* It's safe to call ipoib_put_ah() inside
* priv->lock here, because we know that
@@ -738,25 +725,25 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
ipoib_put_ah(neigh->ah);
list_del(&neigh->list);
ipoib_neigh_free(dev, neigh);
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
ipoib_path_lookup(skb, dev);
- goto out;
+ return NETDEV_TX_OK;
}
if (ipoib_cm_get(neigh)) {
if (ipoib_cm_up(neigh)) {
ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
- goto out;
+ return NETDEV_TX_OK;
}
} else if (neigh->ah) {
ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
- goto out;
+ return NETDEV_TX_OK;
}
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- spin_lock(&priv->lock);
+ spin_lock_irqsave(&priv->lock, flags);
__skb_queue_tail(&neigh->queue, skb);
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
} else {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
@@ -785,16 +772,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
IPOIB_GID_RAW_ARG(phdr->hwaddr + 4));
dev_kfree_skb_any(skb);
++dev->stats.tx_dropped;
- goto out;
+ return NETDEV_TX_OK;
}
unicast_arp_send(skb, dev, phdr);
}
}
-out:
- spin_unlock_irqrestore(&priv->tx_lock, flags);
-
return NETDEV_TX_OK;
}
@@ -1058,7 +1042,6 @@ static void ipoib_setup(struct net_device *dev)
dev->type = ARPHRD_INFINIBAND;
dev->tx_queue_len = ipoib_sendq_size * 2;
dev->features = (NETIF_F_VLAN_CHALLENGED |
- NETIF_F_LLTX |
NETIF_F_HIGHDMA);
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
@@ -1070,7 +1053,6 @@ static void ipoib_setup(struct net_device *dev)
ipoib_lro_setup(priv);
spin_lock_init(&priv->lock);
- spin_lock_init(&priv->tx_lock);
mutex_init(&priv->vlan_mutex);
@@ -1081,6 +1063,7 @@ static void ipoib_setup(struct net_device *dev)
INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
+ INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal);
INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
@@ -1314,7 +1297,7 @@ sysfs_failed:
register_failed:
ib_unregister_event_handler(&priv->event_handler);
- flush_scheduled_work();
+ flush_workqueue(ipoib_workqueue);
event_failed:
ipoib_dev_cleanup(priv->dev);
@@ -1373,7 +1356,12 @@ static void ipoib_remove_one(struct ib_device *device)
list_for_each_entry_safe(priv, tmp, dev_list, list) {
ib_unregister_event_handler(&priv->event_handler);
- flush_scheduled_work();
+
+ rtnl_lock();
+ dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
+ rtnl_unlock();
+
+ flush_workqueue(ipoib_workqueue);
unregister_netdev(priv->dev);
ipoib_dev_cleanup(priv->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 8950e95..d9d1223 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -69,14 +69,13 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
struct net_device *dev = mcast->dev;
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_neigh *neigh, *tmp;
- unsigned long flags;
int tx_dropped = 0;
ipoib_dbg_mcast(netdev_priv(dev),
"deleting multicast group " IPOIB_GID_FMT "\n",
IPOIB_GID_ARG(mcast->mcmember.mgid));
- spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irq(&priv->lock);
list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
/*
@@ -90,7 +89,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
ipoib_neigh_free(dev, neigh);
}
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irq(&priv->lock);
if (mcast->ah)
ipoib_put_ah(mcast->ah);
@@ -100,9 +99,9 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
}
- spin_lock_irqsave(&priv->tx_lock, flags);
+ netif_tx_lock_bh(dev);
dev->stats.tx_dropped += tx_dropped;
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ netif_tx_unlock_bh(dev);
kfree(mcast);
}
@@ -259,10 +258,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
}
/* actually send any queued packets */
- spin_lock_irq(&priv->tx_lock);
+ netif_tx_lock_bh(dev);
while (!skb_queue_empty(&mcast->pkt_queue)) {
struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
- spin_unlock_irq(&priv->tx_lock);
+ netif_tx_unlock_bh(dev);
skb->dev = dev;
@@ -273,9 +272,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
if (dev_queue_xmit(skb))
ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
- spin_lock_irq(&priv->tx_lock);
+ netif_tx_lock_bh(dev);
}
- spin_unlock_irq(&priv->tx_lock);
+ netif_tx_unlock_bh(dev);
return 0;
}
@@ -286,7 +285,6 @@ ipoib_mcast_sendonly_join_complete(int status,
{
struct ipoib_mcast *mcast = multicast->context;
struct net_device *dev = mcast->dev;
- struct ipoib_dev_priv *priv = netdev_priv(dev);
/* We trap for port events ourselves. */
if (status == -ENETRESET)
@@ -302,12 +300,12 @@ ipoib_mcast_sendonly_join_complete(int status,
IPOIB_GID_ARG(mcast->mcmember.mgid), status);
/* Flush out any queued packets */
- spin_lock_irq(&priv->tx_lock);
+ netif_tx_lock_bh(dev);
while (!skb_queue_empty(&mcast->pkt_queue)) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
}
- spin_unlock_irq(&priv->tx_lock);
+ netif_tx_unlock_bh(dev);
/* Clear the busy flag so we try again */
status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
@@ -366,6 +364,21 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
return ret;
}
+void ipoib_mcast_carrier_on_task(struct work_struct *work)
+{
+ struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
+ carrier_on_task);
+
+ /*
+ * Take rtnl_lock to avoid racing with ipoib_stop() and
+ * turning the carrier back on while a device is being
+ * removed.
+ */
+ rtnl_lock();
+ netif_carrier_on(priv->dev);
+ rtnl_unlock();
+}
+
static int ipoib_mcast_join_complete(int status,
struct ib_sa_multicast *multicast)
{
@@ -392,8 +405,12 @@ static int ipoib_mcast_join_complete(int status,
&priv->mcast_task, 0);
mutex_unlock(&mcast_mutex);
+ /*
+ * Defer carrier on work to ipoib_workqueue to avoid a
+ * deadlock on rtnl_lock here.
+ */
if (mcast == priv->broadcast)
- netif_carrier_on(dev);
+ queue_work(ipoib_workqueue, &priv->carrier_on_task);
return 0;
}
@@ -643,12 +660,9 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_mcast *mcast;
+ unsigned long flags;
- /*
- * We can only be called from ipoib_start_xmit, so we're
- * inside tx_lock -- no need to save/restore flags.
- */
- spin_lock(&priv->lock);
+ spin_lock_irqsave(&priv->lock, flags);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) ||
!priv->broadcast ||
@@ -719,7 +733,7 @@ out:
}
unlock:
- spin_unlock(&priv->lock);
+ spin_unlock_irqrestore(&priv->lock, flags);
}
void ipoib_mcast_dev_flush(struct net_device *dev)
OpenPOWER on IntegriCloud