diff options
Diffstat (limited to 'drivers/infiniband/ulp/ipoib')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/Kconfig | 22 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 10 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 107 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ib.c | 30 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 96 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 52 |
6 files changed, 179 insertions, 138 deletions
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig index 691525c..9d9a9dc 100644 --- a/drivers/infiniband/ulp/ipoib/Kconfig +++ b/drivers/infiniband/ulp/ipoib/Kconfig @@ -11,16 +11,17 @@ config INFINIBAND_IPOIB config INFINIBAND_IPOIB_CM bool "IP-over-InfiniBand Connected Mode support" - depends on INFINIBAND_IPOIB && EXPERIMENTAL + depends on INFINIBAND_IPOIB default n ---help--- - This option enables experimental support for IPoIB connected mode. - After enabling this option, you need to switch to connected mode through - /sys/class/net/ibXXX/mode to actually create connections, and then increase - the interface MTU with e.g. ifconfig ib0 mtu 65520. + This option enables support for IPoIB connected mode. After + enabling this option, you need to switch to connected mode + through /sys/class/net/ibXXX/mode to actually create + connections, and then increase the interface MTU with + e.g. ifconfig ib0 mtu 65520. - WARNING: Enabling connected mode will trigger some - packet drops for multicast and UD mode traffic from this interface, + WARNING: Enabling connected mode will trigger some packet + drops for multicast and UD mode traffic from this interface, unless you limit mtu for these destinations to 2044. config INFINIBAND_IPOIB_DEBUG @@ -33,9 +34,10 @@ config INFINIBAND_IPOIB_DEBUG debug_level and mcast_debug_level module parameters (which can also be set after the driver is loaded through sysfs). - This option also creates an "ipoib_debugfs," which can be - mounted to expose debugging information about IB multicast - groups used by the IPoIB driver. + This option also creates a directory tree under ipoib/ in + debugfs, which contains files that expose debugging + information about IB multicast groups used by the IPoIB + driver. config INFINIBAND_IPOIB_DEBUG_DATA bool "IP-over-InfiniBand data path debugging" diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index b0ffc9a..68ba5c3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -268,10 +268,9 @@ struct ipoib_lro { }; /* - * Device private locking: tx_lock protects members used in TX fast - * path (and we use LLTX so upper layers don't do extra locking). - * lock protects everything else. lock nests inside of tx_lock (ie - * tx_lock must be acquired first if needed). + * Device private locking: network stack tx_lock protects members used + * in TX fast path, lock protects everything else. lock nests inside + * of tx_lock (ie tx_lock must be acquired first if needed). */ struct ipoib_dev_priv { spinlock_t lock; @@ -293,6 +292,7 @@ struct ipoib_dev_priv { struct delayed_work pkey_poll_task; struct delayed_work mcast_task; + struct work_struct carrier_on_task; struct work_struct flush_light; struct work_struct flush_normal; struct work_struct flush_heavy; @@ -319,7 +319,6 @@ struct ipoib_dev_priv { struct ipoib_rx_buf *rx_ring; - spinlock_t tx_lock; struct ipoib_tx_buf *tx_ring; unsigned tx_head; unsigned tx_tail; @@ -464,6 +463,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); void ipoib_dev_cleanup(struct net_device *dev); void ipoib_mcast_join_task(struct work_struct *work); +void ipoib_mcast_carrier_on_task(struct work_struct *work); void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb); void ipoib_mcast_restart_task(struct work_struct *work); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 0f2d304..7b14c2c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -202,7 +202,7 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev, dev_kfree_skb_any(rx_ring[i].skb); } - kfree(rx_ring); + vfree(rx_ring); } static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) @@ -337,7 +337,7 @@ static void ipoib_cm_init_rx_wr(struct net_device *dev, sge[i].length = PAGE_SIZE; wr->next = NULL; - wr->sg_list = priv->cm.rx_sge; + wr->sg_list = sge; wr->num_sge = priv->cm.num_frags; } @@ -352,9 +352,14 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i int ret; int i; - rx->rx_ring = kcalloc(ipoib_recvq_size, sizeof *rx->rx_ring, GFP_KERNEL); - if (!rx->rx_ring) + rx->rx_ring = vmalloc(ipoib_recvq_size * sizeof *rx->rx_ring); + if (!rx->rx_ring) { + printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n", + priv->ca->name, ipoib_recvq_size); return -ENOMEM; + } + + memset(rx->rx_ring, 0, ipoib_recvq_size * sizeof *rx->rx_ring); t = kmalloc(sizeof *t, GFP_KERNEL); if (!t) { @@ -781,7 +786,8 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) dev_kfree_skb_any(tx_req->skb); - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock(dev); + ++tx->tx_tail; if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(dev) && @@ -796,7 +802,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) "(status=%d, wrid=%d vend_err %x)\n", wc->status, wr_id, wc->vendor_err); - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); neigh = tx->neigh; if (neigh) { @@ -816,10 +822,10 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock(dev); } int ipoib_cm_dev_open(struct net_device *dev) @@ -1144,7 +1150,6 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) { struct ipoib_dev_priv *priv = netdev_priv(p->dev); struct ipoib_cm_tx_buf *tx_req; - unsigned long flags; unsigned long begin; ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n", @@ -1175,12 +1180,12 @@ timeout: DMA_TO_DEVICE); dev_kfree_skb_any(tx_req->skb); ++p->tx_tail; - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock_bh(p->dev); if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) && netif_queue_stopped(p->dev) && test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) netif_wake_queue(p->dev); - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock_bh(p->dev); } if (p->qp) @@ -1197,6 +1202,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, struct ipoib_dev_priv *priv = netdev_priv(tx->dev); struct net_device *dev = priv->dev; struct ipoib_neigh *neigh; + unsigned long flags; int ret; switch (event->event) { @@ -1215,8 +1221,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, case IB_CM_REJ_RECEIVED: case IB_CM_TIMEWAIT_EXIT: ipoib_dbg(priv, "CM error %d.\n", event->event); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); neigh = tx->neigh; if (neigh) { @@ -1234,8 +1240,8 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, queue_work(ipoib_workqueue, &priv->cm.reap_task); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); break; default: break; @@ -1289,19 +1295,24 @@ static void ipoib_cm_tx_start(struct work_struct *work) struct ib_sa_path_rec pathrec; u32 qpn; - spin_lock_irqsave(&priv->tx_lock, flags); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); + while (!list_empty(&priv->cm.start_list)) { p = list_entry(priv->cm.start_list.next, typeof(*p), list); list_del_init(&p->list); neigh = p->neigh; qpn = IPOIB_QPN(neigh->neighbour->ha); memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); - spin_unlock(&priv->lock); - spin_unlock_irqrestore(&priv->tx_lock, flags); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); + ret = ipoib_cm_tx_init(p, qpn, &pathrec); - spin_lock_irqsave(&priv->tx_lock, flags); - spin_lock(&priv->lock); + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); + if (ret) { neigh = p->neigh; if (neigh) { @@ -1315,44 +1326,52 @@ static void ipoib_cm_tx_start(struct work_struct *work) kfree(p); } } - spin_unlock(&priv->lock); - spin_unlock_irqrestore(&priv->tx_lock, flags); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } static void ipoib_cm_tx_reap(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.reap_task); + struct net_device *dev = priv->dev; struct ipoib_cm_tx *p; + unsigned long flags; + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); while (!list_empty(&priv->cm.reap_list)) { p = list_entry(priv->cm.reap_list.next, typeof(*p), list); list_del(&p->list); - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); ipoib_cm_tx_destroy(p); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } static void ipoib_cm_skb_reap(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, cm.skb_task); + struct net_device *dev = priv->dev; struct sk_buff *skb; - + unsigned long flags; unsigned mtu = priv->mcast_mtu; - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); + while ((skb = skb_dequeue(&priv->cm.skb_queue))) { - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); + if (skb->protocol == htons(ETH_P_IP)) icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -1360,11 +1379,13 @@ static void ipoib_cm_skb_reap(struct work_struct *work) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev); #endif dev_kfree_skb_any(skb); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb, @@ -1494,14 +1515,16 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) return; } - priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, - GFP_KERNEL); + priv->cm.srq_ring = vmalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring); if (!priv->cm.srq_ring) { printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n", priv->ca->name, ipoib_recvq_size); ib_destroy_srq(priv->cm.srq); priv->cm.srq = NULL; + return; } + + memset(priv->cm.srq_ring, 0, ipoib_recvq_size * sizeof *priv->cm.srq_ring); } int ipoib_cm_dev_init(struct net_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 66cafa2..0e748ae 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -468,21 +468,22 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr) static void drain_tx_cq(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - unsigned long flags; - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock(dev); while (poll_tx(priv)) ; /* nothing */ if (netif_queue_stopped(dev)) mod_timer(&priv->poll_timer, jiffies + 1); - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock(dev); } void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr) { - drain_tx_cq((struct net_device *)dev_ptr); + struct ipoib_dev_priv *priv = netdev_priv(dev_ptr); + + mod_timer(&priv->poll_timer, jiffies); } static inline int post_send(struct ipoib_dev_priv *priv, @@ -614,17 +615,20 @@ static void __ipoib_reap_ah(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah, *tah; LIST_HEAD(remove_list); + unsigned long flags; + + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) if ((int) priv->tx_tail - (int) ah->last_send >= 0) { list_del(&ah->list); ib_destroy_ah(ah->ah); kfree(ah); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } void ipoib_reap_ah(struct work_struct *work) @@ -761,6 +765,14 @@ void ipoib_drain_cq(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); int i, n; + + /* + * We call completion handling routines that expect to be + * called from the BH-disabled NAPI poll context, so disable + * BHs here too. + */ + local_bh_disable(); + do { n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc); for (i = 0; i < n; ++i) { @@ -784,6 +796,8 @@ void ipoib_drain_cq(struct net_device *dev) while (poll_tx(priv)) ; /* nothing */ + + local_bh_enable(); } int ipoib_ib_dev_stop(struct net_device *dev, int flush) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f51201b..c0ee5143 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -156,14 +156,8 @@ static int ipoib_stop(struct net_device *dev) netif_stop_queue(dev); - /* - * Now flush workqueue to make sure a scheduled task doesn't - * bring our internal state back up. - */ - flush_workqueue(ipoib_workqueue); - - ipoib_ib_dev_down(dev, 1); - ipoib_ib_dev_stop(dev, 1); + ipoib_ib_dev_down(dev, 0); + ipoib_ib_dev_stop(dev, 0); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { struct ipoib_dev_priv *cpriv; @@ -379,9 +373,10 @@ void ipoib_flush_paths(struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path, *tp; LIST_HEAD(remove_list); + unsigned long flags; - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); list_splice_init(&priv->path_list, &remove_list); @@ -391,15 +386,16 @@ void ipoib_flush_paths(struct net_device *dev) list_for_each_entry_safe(path, tp, &remove_list, list) { if (path->query) ib_sa_cancel_query(path->query_id, path->query); - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); wait_for_completion(&path->done); path_free(dev, path); - spin_lock_irq(&priv->tx_lock); - spin_lock(&priv->lock); + netif_tx_lock_bh(dev); + spin_lock_irqsave(&priv->lock, flags); } - spin_unlock(&priv->lock); - spin_unlock_irq(&priv->tx_lock); + + spin_unlock_irqrestore(&priv->lock, flags); + netif_tx_unlock_bh(dev); } static void path_rec_completion(int status, @@ -410,7 +406,7 @@ static void path_rec_completion(int status, struct net_device *dev = path->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_ah *ah = NULL; - struct ipoib_ah *old_ah; + struct ipoib_ah *old_ah = NULL; struct ipoib_neigh *neigh, *tn; struct sk_buff_head skqueue; struct sk_buff *skb; @@ -434,12 +430,12 @@ static void path_rec_completion(int status, spin_lock_irqsave(&priv->lock, flags); - old_ah = path->ah; - path->ah = ah; - if (ah) { path->pathrec = *pathrec; + old_ah = path->ah; + path->ah = ah; + ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n", ah, be16_to_cpu(pathrec->dlid), pathrec->sl); @@ -561,6 +557,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; struct ipoib_neigh *neigh; + unsigned long flags; neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev); if (!neigh) { @@ -569,11 +566,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) return; } - /* - * We can only be called from ipoib_start_xmit, so we're - * inside tx_lock -- no need to save/restore flags. - */ - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); path = __path_find(dev, skb->dst->neighbour->ha + 4); if (!path) { @@ -620,7 +613,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) __skb_queue_tail(&neigh->queue, skb); } - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); return; err_list: @@ -632,7 +625,7 @@ err_drop: ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) @@ -656,12 +649,9 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; + unsigned long flags; - /* - * We can only be called from ipoib_start_xmit, so we're - * inside tx_lock -- no need to save/restore flags. - */ - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); path = __path_find(dev, phdr->hwaddr + 4); if (!path || !path->valid) { @@ -673,7 +663,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, __skb_queue_tail(&path->queue, skb); if (path_rec_start(dev, path)) { - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); path_free(dev, path); return; } else @@ -683,7 +673,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, dev_kfree_skb_any(skb); } - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); return; } @@ -702,7 +692,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, dev_kfree_skb_any(skb); } - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -711,13 +701,10 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) struct ipoib_neigh *neigh; unsigned long flags; - if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags))) - return NETDEV_TX_LOCKED; - if (likely(skb->dst && skb->dst->neighbour)) { if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { ipoib_path_lookup(skb, dev); - goto out; + return NETDEV_TX_OK; } neigh = *to_ipoib_neigh(skb->dst->neighbour); @@ -727,7 +714,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->dst->neighbour->ha + 4, sizeof(union ib_gid))) || (neigh->dev != dev))) { - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); /* * It's safe to call ipoib_put_ah() inside * priv->lock here, because we know that @@ -738,25 +725,25 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) ipoib_put_ah(neigh->ah); list_del(&neigh->list); ipoib_neigh_free(dev, neigh); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); ipoib_path_lookup(skb, dev); - goto out; + return NETDEV_TX_OK; } if (ipoib_cm_get(neigh)) { if (ipoib_cm_up(neigh)) { ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); - goto out; + return NETDEV_TX_OK; } } else if (neigh->ah) { ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha)); - goto out; + return NETDEV_TX_OK; } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); __skb_queue_tail(&neigh->queue, skb); - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } else { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -785,16 +772,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) IPOIB_GID_RAW_ARG(phdr->hwaddr + 4)); dev_kfree_skb_any(skb); ++dev->stats.tx_dropped; - goto out; + return NETDEV_TX_OK; } unicast_arp_send(skb, dev, phdr); } } -out: - spin_unlock_irqrestore(&priv->tx_lock, flags); - return NETDEV_TX_OK; } @@ -1058,7 +1042,6 @@ static void ipoib_setup(struct net_device *dev) dev->type = ARPHRD_INFINIBAND; dev->tx_queue_len = ipoib_sendq_size * 2; dev->features = (NETIF_F_VLAN_CHALLENGED | - NETIF_F_LLTX | NETIF_F_HIGHDMA); memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); @@ -1070,7 +1053,6 @@ static void ipoib_setup(struct net_device *dev) ipoib_lro_setup(priv); spin_lock_init(&priv->lock); - spin_lock_init(&priv->tx_lock); mutex_init(&priv->vlan_mutex); @@ -1081,6 +1063,7 @@ static void ipoib_setup(struct net_device *dev) INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); + INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); INIT_WORK(&priv->flush_normal, ipoib_ib_dev_flush_normal); INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); @@ -1314,7 +1297,7 @@ sysfs_failed: register_failed: ib_unregister_event_handler(&priv->event_handler); - flush_scheduled_work(); + flush_workqueue(ipoib_workqueue); event_failed: ipoib_dev_cleanup(priv->dev); @@ -1373,7 +1356,12 @@ static void ipoib_remove_one(struct ib_device *device) list_for_each_entry_safe(priv, tmp, dev_list, list) { ib_unregister_event_handler(&priv->event_handler); - flush_scheduled_work(); + + rtnl_lock(); + dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); + rtnl_unlock(); + + flush_workqueue(ipoib_workqueue); unregister_netdev(priv->dev); ipoib_dev_cleanup(priv->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 8950e95..d9d1223 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -69,14 +69,13 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) struct net_device *dev = mcast->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh, *tmp; - unsigned long flags; int tx_dropped = 0; ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group " IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mcast->mcmember.mgid)); - spin_lock_irqsave(&priv->lock, flags); + spin_lock_irq(&priv->lock); list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { /* @@ -90,7 +89,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) ipoib_neigh_free(dev, neigh); } - spin_unlock_irqrestore(&priv->lock, flags); + spin_unlock_irq(&priv->lock); if (mcast->ah) ipoib_put_ah(mcast->ah); @@ -100,9 +99,9 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); } - spin_lock_irqsave(&priv->tx_lock, flags); + netif_tx_lock_bh(dev); dev->stats.tx_dropped += tx_dropped; - spin_unlock_irqrestore(&priv->tx_lock, flags); + netif_tx_unlock_bh(dev); kfree(mcast); } @@ -259,10 +258,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, } /* actually send any queued packets */ - spin_lock_irq(&priv->tx_lock); + netif_tx_lock_bh(dev); while (!skb_queue_empty(&mcast->pkt_queue)) { struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); - spin_unlock_irq(&priv->tx_lock); + netif_tx_unlock_bh(dev); skb->dev = dev; @@ -273,9 +272,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, if (dev_queue_xmit(skb)) ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); - spin_lock_irq(&priv->tx_lock); + netif_tx_lock_bh(dev); } - spin_unlock_irq(&priv->tx_lock); + netif_tx_unlock_bh(dev); return 0; } @@ -286,7 +285,6 @@ ipoib_mcast_sendonly_join_complete(int status, { struct ipoib_mcast *mcast = multicast->context; struct net_device *dev = mcast->dev; - struct ipoib_dev_priv *priv = netdev_priv(dev); /* We trap for port events ourselves. */ if (status == -ENETRESET) @@ -302,12 +300,12 @@ ipoib_mcast_sendonly_join_complete(int status, IPOIB_GID_ARG(mcast->mcmember.mgid), status); /* Flush out any queued packets */ - spin_lock_irq(&priv->tx_lock); + netif_tx_lock_bh(dev); while (!skb_queue_empty(&mcast->pkt_queue)) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); } - spin_unlock_irq(&priv->tx_lock); + netif_tx_unlock_bh(dev); /* Clear the busy flag so we try again */ status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, @@ -366,6 +364,21 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) return ret; } +void ipoib_mcast_carrier_on_task(struct work_struct *work) +{ + struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, + carrier_on_task); + + /* + * Take rtnl_lock to avoid racing with ipoib_stop() and + * turning the carrier back on while a device is being + * removed. + */ + rtnl_lock(); + netif_carrier_on(priv->dev); + rtnl_unlock(); +} + static int ipoib_mcast_join_complete(int status, struct ib_sa_multicast *multicast) { @@ -392,8 +405,12 @@ static int ipoib_mcast_join_complete(int status, &priv->mcast_task, 0); mutex_unlock(&mcast_mutex); + /* + * Defer carrier on work to ipoib_workqueue to avoid a + * deadlock on rtnl_lock here. + */ if (mcast == priv->broadcast) - netif_carrier_on(dev); + queue_work(ipoib_workqueue, &priv->carrier_on_task); return 0; } @@ -643,12 +660,9 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_mcast *mcast; + unsigned long flags; - /* - * We can only be called from ipoib_start_xmit, so we're - * inside tx_lock -- no need to save/restore flags. - */ - spin_lock(&priv->lock); + spin_lock_irqsave(&priv->lock, flags); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || !priv->broadcast || @@ -719,7 +733,7 @@ out: } unlock: - spin_unlock(&priv->lock); + spin_unlock_irqrestore(&priv->lock, flags); } void ipoib_mcast_dev_flush(struct net_device *dev) |