292 files changed, 15414 insertions, 4643 deletions
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 9c62340..c96887d 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -481,6 +481,10 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	if (!pin)
 		return;
 
+	/* Keep IOAPIC pin configuration when suspending */
+	if (dev->dev.power.is_prepared)
+		return;
+
 	entry = acpi_pci_irq_lookup(dev, pin);
 	if (!entry)
 		return;
@@ -498,5 +502,6 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	 */
 
 	dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
-	acpi_unregister_gsi(gsi);
+	if (gsi >= 0 && dev->irq > 0)
+		acpi_unregister_gsi(gsi);
 }
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index 0e3f8f9..480fa6f 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -299,6 +299,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb)
 	out_vcc = find_vcc(dev, ntohs(hdr->vpi), ntohs(hdr->vci));
 	read_unlock(&vcc_sklist_lock);
 	if (!out_vcc) {
+		result = -EUNATCH;
 		atomic_inc(&vcc->stats->tx_err);
 		goto done;
 	}
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index 943cf0d..7652e8d 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -1278,6 +1278,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id)
 			card->dma_bounce = kmalloc(card->nr_ports * BUF_SIZE, GFP_KERNEL);
 			if (!card->dma_bounce) {
 				dev_warn(&card->dev->dev, "Failed to allocate DMA bounce buffers\n");
+				err = -ENOMEM;
 				/* Fallback to MMIO doesn't work */
 				goto out_unmap_both;
 			}
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
index 8b45033..4464e35 100644
--- a/drivers/block/drbd/Makefile
+++ b/drivers/block/drbd/Makefile
@@ -3,5 +3,6 @@ drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
 drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
 drbd-y += drbd_interval.o drbd_state.o
 drbd-y += drbd_nla.o
+drbd-$(CONFIG_DEBUG_FS) += drbd_debugfs.o
 
 obj-$(CONFIG_BLK_DEV_DRBD)     += drbd.o
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 05a1780..d26a3fa 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -92,34 +92,26 @@ struct __packed al_transaction_on_disk {
 	__be32	context[AL_CONTEXT_PER_TRANSACTION];
 };
 
-struct update_odbm_work {
-	struct drbd_work w;
-	struct drbd_device *device;
-	unsigned int enr;
-};
-
-struct update_al_work {
-	struct drbd_work w;
-	struct drbd_device *device;
-	struct completion event;
-	int err;
-};
-
-
-void *drbd_md_get_buffer(struct drbd_device *device)
+void *drbd_md_get_buffer(struct drbd_device *device, const char *intent)
 {
 	int r;
 
 	wait_event(device->misc_wait,
-		   (r = atomic_cmpxchg(&device->md_io_in_use, 0, 1)) == 0 ||
+		   (r = atomic_cmpxchg(&device->md_io.in_use, 0, 1)) == 0 ||
 		   device->state.disk <= D_FAILED);
 
-	return r ? NULL : page_address(device->md_io_page);
+	if (r)
+		return NULL;
+
+	device->md_io.current_use = intent;
+	device->md_io.start_jif = jiffies;
+	device->md_io.submit_jif = device->md_io.start_jif - 1;
+	return page_address(device->md_io.page);
 }
 
 void drbd_md_put_buffer(struct drbd_device *device)
 {
-	if (atomic_dec_and_test(&device->md_io_in_use))
+	if (atomic_dec_and_test(&device->md_io.in_use))
 		wake_up(&device->misc_wait);
 }
 
@@ -145,10 +137,11 @@ void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_b
 
 static int _drbd_md_sync_page_io(struct drbd_device *device,
 				 struct drbd_backing_dev *bdev,
-				 struct page *page, sector_t sector,
-				 int rw, int size)
+				 sector_t sector, int rw)
 {
 	struct bio *bio;
+	/* we do all our meta data IO in aligned 4k blocks. */
+	const int size = 4096;
 	int err;
 
 	device->md_io.done = 0;
@@ -156,15 +149,15 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
 
 	if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags))
 		rw |= REQ_FUA | REQ_FLUSH;
-	rw |= REQ_SYNC;
+	rw |= REQ_SYNC | REQ_NOIDLE;
 
 	bio = bio_alloc_drbd(GFP_NOIO);
 	bio->bi_bdev = bdev->md_bdev;
 	bio->bi_iter.bi_sector = sector;
 	err = -EIO;
-	if (bio_add_page(bio, page, size, 0) != size)
+	if (bio_add_page(bio, device->md_io.page, size, 0) != size)
 		goto out;
-	bio->bi_private = &device->md_io;
+	bio->bi_private = device;
 	bio->bi_end_io = drbd_md_io_complete;
 	bio->bi_rw = rw;
 
@@ -179,7 +172,8 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
 	}
 
 	bio_get(bio); /* one bio_put() is in the completion handler */
-	atomic_inc(&device->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
+	atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */
+	device->md_io.submit_jif = jiffies;
 	if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
 		bio_endio(bio, -EIO);
 	else
@@ -197,9 +191,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
 			 sector_t sector, int rw)
 {
 	int err;
-	struct page *iop = device->md_io_page;
-
-	D_ASSERT(device, atomic_read(&device->md_io_in_use) == 1);
+	D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1);
 
 	BUG_ON(!bdev->md_bdev);
 
@@ -214,8 +206,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
 		     current->comm, current->pid, __func__,
 		     (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
 
-	/* we do all our meta data IO in aligned 4k blocks. */
-	err = _drbd_md_sync_page_io(device, bdev, iop, sector, rw, 4096);
+	err = _drbd_md_sync_page_io(device, bdev, sector, rw);
 	if (err) {
 		drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
 		    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
@@ -297,26 +288,12 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *
 	return need_transaction;
 }
 
-static int al_write_transaction(struct drbd_device *device, bool delegate);
-
-/* When called through generic_make_request(), we must delegate
- * activity log I/O to the worker thread: a further request
- * submitted via generic_make_request() within the same task
- * would be queued on current->bio_list, and would only start
- * after this function returns (see generic_make_request()).
- *
- * However, if we *are* the worker, we must not delegate to ourselves.
- */
+static int al_write_transaction(struct drbd_device *device);
 
-/*
- * @delegate:   delegate activity log I/O to the worker thread
- */
-void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
+void drbd_al_begin_io_commit(struct drbd_device *device)
 {
 	bool locked = false;
 
-	BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task);
-
 	/* Serialize multiple transactions.
 	 * This uses test_and_set_bit, memory barrier is implicit.
 	 */
@@ -335,7 +312,7 @@ void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
 			rcu_read_unlock();
 
 			if (write_al_updates)
-				al_write_transaction(device, delegate);
+				al_write_transaction(device);
 			spin_lock_irq(&device->al_lock);
 			/* FIXME
 			if (err)
@@ -352,12 +329,10 @@ void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
 /*
  * @delegate:   delegate activity log I/O to the worker thread
  */
-void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate)
+void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i)
 {
-	BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task);
-
 	if (drbd_al_begin_io_prepare(device, i))
-		drbd_al_begin_io_commit(device, delegate);
+		drbd_al_begin_io_commit(device);
 }
 
 int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i)
@@ -380,8 +355,19 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *
 	/* We want all necessary updates for a given request within the same transaction
 	 * We could first check how many updates are *actually* needed,
 	 * and use that instead of the worst-case nr_al_extents */
-	if (available_update_slots < nr_al_extents)
-		return -EWOULDBLOCK;
+	if (available_update_slots < nr_al_extents) {
+		/* Too many activity log extents are currently "hot".
+		 *
+		 * If we have accumulated pending changes already,
+		 * we made progress.
+		 *
+		 * If we cannot get even a single pending change through,
+		 * stop the fast path until we made some progress,
+		 * or requests to "cold" extents could be starved. */
+		if (!al->pending_changes)
+			__set_bit(__LC_STARVING, &device->act_log->flags);
+		return -ENOBUFS;
+	}
 
 	/* Is resync active in this area? */
 	for (enr = first; enr <= last; enr++) {
@@ -452,15 +438,6 @@ static unsigned int al_extent_to_bm_page(unsigned int al_enr)
 		 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
 }
 
-static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
-{
-	return rs_enr >>
-		/* bit to page */
-		((PAGE_SHIFT + 3) -
-		/* resync extent number to bit */
-		 (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
-}
-
 static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
 {
 	const unsigned int stripes = device->ldev->md.al_stripes;
@@ -479,8 +456,7 @@ static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
 	return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
 }
 
-static int
-_al_write_transaction(struct drbd_device *device)
+int al_write_transaction(struct drbd_device *device)
 {
 	struct al_transaction_on_disk *buffer;
 	struct lc_element *e;
@@ -505,7 +481,8 @@ _al_write_transaction(struct drbd_device *device)
 		return -EIO;
 	}
 
-	buffer = drbd_md_get_buffer(device); /* protects md_io_buffer, al_tr_cycle, ... */
+	/* protects md_io_buffer, al_tr_cycle, ... */
+	buffer = drbd_md_get_buffer(device, __func__);
 	if (!buffer) {
 		drbd_err(device, "disk failed while waiting for md_io buffer\n");
 		put_ldev(device);
@@ -590,38 +567,6 @@ _al_write_transaction(struct drbd_device *device)
 	return err;
 }
 
-
-static int w_al_write_transaction(struct drbd_work *w, int unused)
-{
-	struct update_al_work *aw = container_of(w, struct update_al_work, w);
-	struct drbd_device *device = aw->device;
-	int err;
-
-	err = _al_write_transaction(device);
-	aw->err = err;
-	complete(&aw->event);
-
-	return err != -EIO ? err : 0;
-}
-
-/* Calls from worker context (see w_restart_disk_io()) need to write the
-   transaction directly. Others came through generic_make_request(),
-   those need to delegate it to the worker. */
-static int al_write_transaction(struct drbd_device *device, bool delegate)
-{
-	if (delegate) {
-		struct update_al_work al_work;
-		init_completion(&al_work.event);
-		al_work.w.cb = w_al_write_transaction;
-		al_work.device = device;
-		drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
-				      &al_work.w);
-		wait_for_completion(&al_work.event);
-		return al_work.err;
-	} else
-		return _al_write_transaction(device);
-}
-
 static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
 {
 	int rv;
@@ -682,72 +627,56 @@ int drbd_initialize_al(struct drbd_device *device, void *buffer)
 	return 0;
 }
 
-static int w_update_odbm(struct drbd_work *w, int unused)
-{
-	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
-	struct drbd_device *device = udw->device;
-	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
-
-	if (!get_ldev(device)) {
-		if (__ratelimit(&drbd_ratelimit_state))
-			drbd_warn(device, "Can not update on disk bitmap, local IO disabled.\n");
-		kfree(udw);
-		return 0;
-	}
-
-	drbd_bm_write_page(device, rs_extent_to_bm_page(udw->enr));
-	put_ldev(device);
-
-	kfree(udw);
-
-	if (drbd_bm_total_weight(device) <= device->rs_failed) {
-		switch (device->state.conn) {
-		case C_SYNC_SOURCE:  case C_SYNC_TARGET:
-		case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T:
-			drbd_resync_finished(device);
-		default:
-			/* nothing to do */
-			break;
-		}
-	}
-	drbd_bcast_event(device, &sib);
-
-	return 0;
-}
-
+static const char *drbd_change_sync_fname[] = {
+	[RECORD_RS_FAILED] = "drbd_rs_failed_io",
+	[SET_IN_SYNC] = "drbd_set_in_sync",
+	[SET_OUT_OF_SYNC] = "drbd_set_out_of_sync"
+};
 
 /* ATTENTION. The AL's extents are 4MB each, while the extents in the
  * resync LRU-cache are 16MB each.
  * The caller of this function has to hold an get_ldev() reference.
  *
+ * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success),
+ * potentially pulling in (and recounting the corresponding bits)
+ * this resync extent into the resync extent lru cache.
+ *
+ * Returns whether all bits have been cleared for this resync extent,
+ * precisely: (rs_left <= rs_failed)
+ *
  * TODO will be obsoleted once we have a caching lru of the on disk bitmap
  */
-static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector,
-				      int count, int success)
+static bool update_rs_extent(struct drbd_device *device,
+		unsigned int enr, int count,
+		enum update_sync_bits_mode mode)
 {
 	struct lc_element *e;
-	struct update_odbm_work *udw;
-
-	unsigned int enr;
 
 	D_ASSERT(device, atomic_read(&device->local_cnt));
 
-	/* I simply assume that a sector/size pair never crosses
-	 * a 16 MB extent border. (Currently this is true...) */
-	enr = BM_SECT_TO_EXT(sector);
-
-	e = lc_get(device->resync, enr);
+	/* When setting out-of-sync bits,
+	 * we don't need it cached (lc_find).
+	 * But if it is present in the cache,
+	 * we should update the cached bit count.
+	 * Otherwise, that extent should be in the resync extent lru cache
+	 * already -- or we want to pull it in if necessary -- (lc_get),
+	 * then update and check rs_left and rs_failed. */
+	if (mode == SET_OUT_OF_SYNC)
+		e = lc_find(device->resync, enr);
+	else
+		e = lc_get(device->resync, enr);
 	if (e) {
 		struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
 		if (ext->lce.lc_number == enr) {
-			if (success)
+			if (mode == SET_IN_SYNC)
 				ext->rs_left -= count;
+			else if (mode == SET_OUT_OF_SYNC)
+				ext->rs_left += count;
 			else
 				ext->rs_failed += count;
 			if (ext->rs_left < ext->rs_failed) {
-				drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d "
+				drbd_warn(device, "BAD! enr=%u rs_left=%d "
 				    "rs_failed=%d count=%d cstate=%s\n",
-				     (unsigned long long)sector,
 				     ext->lce.lc_number, ext->rs_left,
 				     ext->rs_failed, count,
 				     drbd_conn_str(device->state.conn));
@@ -781,34 +710,27 @@ static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t secto
 				     ext->lce.lc_number, ext->rs_failed);
 			}
 			ext->rs_left = rs_left;
-			ext->rs_failed = success ? 0 : count;
+			ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0;
 			/* we don't keep a persistent log of the resync lru,
 			 * we can commit any change right away. */
 			lc_committed(device->resync);
 		}
-		lc_put(device->resync, &ext->lce);
+		if (mode != SET_OUT_OF_SYNC)
+			lc_put(device->resync, &ext->lce);
 		/* no race, we are within the al_lock! */
 
-		if (ext->rs_left == ext->rs_failed) {
+		if (ext->rs_left <= ext->rs_failed) {
 			ext->rs_failed = 0;
-
-			udw = kmalloc(sizeof(*udw), GFP_ATOMIC);
-			if (udw) {
-				udw->enr = ext->lce.lc_number;
-				udw->w.cb = w_update_odbm;
-				udw->device = device;
-				drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
-						      &udw->w);
-			} else {
-				drbd_warn(device, "Could not kmalloc an udw\n");
-			}
+			return true;
 		}
-	} else {
+	} else if (mode != SET_OUT_OF_SYNC) {
+		/* be quiet if lc_find() did not find it. */
 		drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n",
 		    device->resync_locked,
 		    device->resync->nr_elements,
 		    device->resync->flags);
 	}
+	return false;
 }
 
 void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go)
@@ -827,105 +749,105 @@ void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go
 	}
 }
 
-/* clear the bit corresponding to the piece of storage in question:
- * size byte of data starting from sector.  Only clear a bits of the affected
- * one ore more _aligned_ BM_BLOCK_SIZE blocks.
- *
- * called by worker on C_SYNC_TARGET and receiver on SyncSource.
- *
- */
-void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size,
-		       const char *file, const unsigned int line)
+/* It is called lazy update, so don't do write-out too often. */
+static bool lazy_bitmap_update_due(struct drbd_device *device)
 {
-	/* Is called from worker and receiver context _only_ */
-	unsigned long sbnr, ebnr, lbnr;
-	unsigned long count = 0;
-	sector_t esector, nr_sectors;
-	int wake_up = 0;
-	unsigned long flags;
+	return time_after(jiffies, device->rs_last_bcast + 2*HZ);
+}
 
-	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
-		drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
-				(unsigned long long)sector, size);
+static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done)
+{
+	if (rs_done)
+		set_bit(RS_DONE, &device->flags);
+		/* and also set RS_PROGRESS below */
+	else if (!lazy_bitmap_update_due(device))
 		return;
-	}
-
-	if (!get_ldev(device))
-		return; /* no disk, no metadata, no bitmap to clear bits in */
-
-	nr_sectors = drbd_get_capacity(device->this_bdev);
-	esector = sector + (size >> 9) - 1;
-
-	if (!expect(sector < nr_sectors))
-		goto out;
-	if (!expect(esector < nr_sectors))
-		esector = nr_sectors - 1;
-
-	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
-
-	/* we clear it (in sync).
-	 * round up start sector, round down end sector.  we make sure we only
-	 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
-	if (unlikely(esector < BM_SECT_PER_BIT-1))
-		goto out;
-	if (unlikely(esector == (nr_sectors-1)))
-		ebnr = lbnr;
-	else
-		ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
-	sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
 
-	if (sbnr > ebnr)
-		goto out;
+	drbd_device_post_work(device, RS_PROGRESS);
+}
 
+static int update_sync_bits(struct drbd_device *device,
+		unsigned long sbnr, unsigned long ebnr,
+		enum update_sync_bits_mode mode)
+{
 	/*
-	 * ok, (capacity & 7) != 0 sometimes, but who cares...
-	 * we count rs_{total,left} in bits, not sectors.
+	 * We keep a count of set bits per resync-extent in the ->rs_left
+	 * caching member, so we need to loop and work within the resync extent
+	 * alignment. Typically this loop will execute exactly once.
 	 */
-	count = drbd_bm_clear_bits(device, sbnr, ebnr);
-	if (count) {
-		drbd_advance_rs_marks(device, drbd_bm_total_weight(device));
-		spin_lock_irqsave(&device->al_lock, flags);
-		drbd_try_clear_on_disk_bm(device, sector, count, true);
-		spin_unlock_irqrestore(&device->al_lock, flags);
-
-		/* just wake_up unconditional now, various lc_chaged(),
-		 * lc_put() in drbd_try_clear_on_disk_bm(). */
-		wake_up = 1;
+	unsigned long flags;
+	unsigned long count = 0;
+	unsigned int cleared = 0;
+	while (sbnr <= ebnr) {
+		/* set temporary boundary bit number to last bit number within
+		 * the resync extent of the current start bit number,
+		 * but cap at provided end bit number */
+		unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK);
+		unsigned long c;
+
+		if (mode == RECORD_RS_FAILED)
+			/* Only called from drbd_rs_failed_io(), bits
+			 * supposedly still set.  Recount, maybe some
+			 * of the bits have been successfully cleared
+			 * by application IO meanwhile.
+			 */
+			c = drbd_bm_count_bits(device, sbnr, tbnr);
+		else if (mode == SET_IN_SYNC)
+			c = drbd_bm_clear_bits(device, sbnr, tbnr);
+		else /* if (mode == SET_OUT_OF_SYNC) */
+			c = drbd_bm_set_bits(device, sbnr, tbnr);
+
+		if (c) {
+			spin_lock_irqsave(&device->al_lock, flags);
+			cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode);
+			spin_unlock_irqrestore(&device->al_lock, flags);
+			count += c;
+		}
+		sbnr = tbnr + 1;
 	}
-out:
-	put_ldev(device);
-	if (wake_up)
+	if (count) {
+		if (mode == SET_IN_SYNC) {
+			unsigned long still_to_go = drbd_bm_total_weight(device);
+			bool rs_is_done = (still_to_go <= device->rs_failed);
+			drbd_advance_rs_marks(device, still_to_go);
+			if (cleared || rs_is_done)
+				maybe_schedule_on_disk_bitmap_update(device, rs_is_done);
+		} else if (mode == RECORD_RS_FAILED)
+			device->rs_failed += count;
 		wake_up(&device->al_wait);
+	}
+	return count;
 }
 
-/*
- * this is intended to set one request worth of data out of sync.
- * affects at least 1 bit,
- * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits.
+/* clear the bit corresponding to the piece of storage in question:
+ * size byte of data starting from sector.  Only clear a bits of the affected
+ * one ore more _aligned_ BM_BLOCK_SIZE blocks.
+ *
+ * called by worker on C_SYNC_TARGET and receiver on SyncSource.
  *
- * called by tl_clear and drbd_send_dblock (==drbd_make_request).
- * so this can be _any_ process.
  */
-int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size,
-			    const char *file, const unsigned int line)
+int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
+		enum update_sync_bits_mode mode,
+		const char *file, const unsigned int line)
 {
-	unsigned long sbnr, ebnr, flags;
+	/* Is called from worker and receiver context _only_ */
+	unsigned long sbnr, ebnr, lbnr;
+	unsigned long count = 0;
 	sector_t esector, nr_sectors;
-	unsigned int enr, count = 0;
-	struct lc_element *e;
 
-	/* this should be an empty REQ_FLUSH */
-	if (size == 0)
+	/* This would be an empty REQ_FLUSH, be silent. */
+	if ((mode == SET_OUT_OF_SYNC) && size == 0)
 		return 0;
 
-	if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
-		drbd_err(device, "sector: %llus, size: %d\n",
-			(unsigned long long)sector, size);
+	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
+		drbd_err(device, "%s: sector=%llus size=%d nonsense!\n",
+				drbd_change_sync_fname[mode],
+				(unsigned long long)sector, size);
 		return 0;
 	}
 
 	if (!get_ldev(device))
-		return 0; /* no disk, no metadata, no bitmap to set bits in */
+		return 0; /* no disk, no metadata, no bitmap to manipulate bits in */
 
 	nr_sectors = drbd_get_capacity(device->this_bdev);
 	esector = sector + (size >> 9) - 1;
@@ -935,25 +857,28 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size
 	if (!expect(esector < nr_sectors))
 		esector = nr_sectors - 1;
 
-	/* we set it out of sync,
-	 * we do not need to round anything here */
-	sbnr = BM_SECT_TO_BIT(sector);
-	ebnr = BM_SECT_TO_BIT(esector);
-
-	/* ok, (capacity & 7) != 0 sometimes, but who cares...
-	 * we count rs_{total,left} in bits, not sectors.  */
-	spin_lock_irqsave(&device->al_lock, flags);
-	count = drbd_bm_set_bits(device, sbnr, ebnr);
+	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
 
-	enr = BM_SECT_TO_EXT(sector);
-	e = lc_find(device->resync, enr);
-	if (e)
-		lc_entry(e, struct bm_extent, lce)->rs_left += count;
-	spin_unlock_irqrestore(&device->al_lock, flags);
+	if (mode == SET_IN_SYNC) {
+		/* Round up start sector, round down end sector.  We make sure
+		 * we only clear full, aligned, BM_BLOCK_SIZE blocks. */
+		if (unlikely(esector < BM_SECT_PER_BIT-1))
+			goto out;
+		if (unlikely(esector == (nr_sectors-1)))
+			ebnr = lbnr;
+		else
+			ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
+		sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
+	} else {
+		/* We set it out of sync, or record resync failure.
+		 * Should not round anything here. */
+		sbnr = BM_SECT_TO_BIT(sector);
+		ebnr = BM_SECT_TO_BIT(esector);
+	}
 
+	count = update_sync_bits(device, sbnr, ebnr, mode);
 out:
 	put_ldev(device);
-
 	return count;
 }
 
@@ -1075,6 +1000,15 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
 	struct lc_element *e;
 	struct bm_extent *bm_ext;
 	int i;
+	bool throttle = drbd_rs_should_slow_down(device, sector, true);
+
+	/* If we need to throttle, a half-locked (only marked BME_NO_WRITES,
+	 * not yet BME_LOCKED) extent needs to be kicked out explicitly if we
+	 * need to throttle. There is at most one such half-locked extent,
+	 * which is remembered in resync_wenr. */
+
+	if (throttle && device->resync_wenr != enr)
+		return -EAGAIN;
 
 	spin_lock_irq(&device->al_lock);
 	if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) {
@@ -1098,8 +1032,10 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
 			D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
 			clear_bit(BME_NO_WRITES, &bm_ext->flags);
 			device->resync_wenr = LC_FREE;
-			if (lc_put(device->resync, &bm_ext->lce) == 0)
+			if (lc_put(device->resync, &bm_ext->lce) == 0) {
+				bm_ext->flags = 0;
 				device->resync_locked--;
+			}
 			wake_up(&device->al_wait);
 		} else {
 			drbd_alert(device, "LOGIC BUG\n");
@@ -1161,8 +1097,20 @@ proceed:
 	return 0;
 
 try_again:
-	if (bm_ext)
-		device->resync_wenr = enr;
+	if (bm_ext) {
+		if (throttle) {
+			D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
+			D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
+			clear_bit(BME_NO_WRITES, &bm_ext->flags);
+			device->resync_wenr = LC_FREE;
+			if (lc_put(device->resync, &bm_ext->lce) == 0) {
+				bm_ext->flags = 0;
+				device->resync_locked--;
+			}
+			wake_up(&device->al_wait);
+		} else
+			device->resync_wenr = enr;
+	}
 	spin_unlock_irq(&device->al_lock);
 	return -EAGAIN;
 }
@@ -1270,69 +1218,3 @@ int drbd_rs_del_all(struct drbd_device *device)
 
 	return 0;
 }
-
-/**
- * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
- * @device:	DRBD device.
- * @sector:	The sector number.
- * @size:	Size of failed IO operation, in byte.
- */
-void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size)
-{
-	/* Is called from worker and receiver context _only_ */
-	unsigned long sbnr, ebnr, lbnr;
-	unsigned long count;
-	sector_t esector, nr_sectors;
-	int wake_up = 0;
-
-	if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
-		drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
-				(unsigned long long)sector, size);
-		return;
-	}
-	nr_sectors = drbd_get_capacity(device->this_bdev);
-	esector = sector + (size >> 9) - 1;
-
-	if (!expect(sector < nr_sectors))
-		return;
-	if (!expect(esector < nr_sectors))
-		esector = nr_sectors - 1;
-
-	lbnr = BM_SECT_TO_BIT(nr_sectors-1);
-
-	/*
-	 * round up start sector, round down end sector.  we make sure we only
-	 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
-	if (unlikely(esector < BM_SECT_PER_BIT-1))
-		return;
-	if (unlikely(esector == (nr_sectors-1)))
-		ebnr = lbnr;
-	else
-		ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
-	sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
-
-	if (sbnr > ebnr)
-		return;
-
-	/*
-	 * ok, (capacity & 7) != 0 sometimes, but who cares...
-	 * we count rs_{total,left} in bits, not sectors.
-	 */
-	spin_lock_irq(&device->al_lock);
-	count = drbd_bm_count_bits(device, sbnr, ebnr);
-	if (count) {
-		device->rs_failed += count;
-
-		if (get_ldev(device)) {
-			drbd_try_clear_on_disk_bm(device, sector, count, false);
-			put_ldev(device);
-		}
-
-		/* just wake_up unconditional now, various lc_chaged(),
-		 * lc_put() in drbd_try_clear_on_disk_bm(). */
-		wake_up = 1;
-	}
-	spin_unlock_irq(&device->al_lock);
-	if (wake_up)
-		wake_up(&device->al_wait);
-}
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 1aa29f8..426c97a 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -22,6 +22,8 @@
    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
 #include <linux/bitops.h>
 #include <linux/vmalloc.h>
 #include <linux/string.h>
@@ -353,9 +355,8 @@ static void bm_free_pages(struct page **pages, unsigned long number)
 
 	for (i = 0; i < number; i++) {
 		if (!pages[i]) {
-			printk(KERN_ALERT "drbd: bm_free_pages tried to free "
-					  "a NULL pointer; i=%lu n=%lu\n",
-					  i, number);
+			pr_alert("bm_free_pages tried to free a NULL pointer; i=%lu n=%lu\n",
+				 i, number);
 			continue;
 		}
 		__free_page(pages[i]);
@@ -592,7 +593,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
 	end = offset + len;
 
 	if (end > b->bm_words) {
-		printk(KERN_ALERT "drbd: bm_memset end > bm_words\n");
+		pr_alert("bm_memset end > bm_words\n");
 		return;
 	}
 
@@ -602,7 +603,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
 		p_addr = bm_map_pidx(b, idx);
 		bm = p_addr + MLPP(offset);
 		if (bm+do_now > p_addr + LWPP) {
-			printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
+			pr_alert("BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
 			       p_addr, bm, (int)do_now);
 		} else
 			memset(bm, c, do_now * sizeof(long));
@@ -927,22 +928,14 @@ void drbd_bm_clear_all(struct drbd_device *device)
 	spin_unlock_irq(&b->bm_lock);
 }
 
-struct bm_aio_ctx {
-	struct drbd_device *device;
-	atomic_t in_flight;
-	unsigned int done;
-	unsigned flags;
-#define BM_AIO_COPY_PAGES	1
-#define BM_AIO_WRITE_HINTED	2
-#define BM_WRITE_ALL_PAGES	4
-	int error;
-	struct kref kref;
-};
-
-static void bm_aio_ctx_destroy(struct kref *kref)
+static void drbd_bm_aio_ctx_destroy(struct kref *kref)
 {
-	struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref);
+	struct drbd_bm_aio_ctx *ctx = container_of(kref, struct drbd_bm_aio_ctx, kref);
+	unsigned long flags;
 
+	spin_lock_irqsave(&ctx->device->resource->req_lock, flags);
+	list_del(&ctx->list);
+	spin_unlock_irqrestore(&ctx->device->resource->req_lock, flags);
 	put_ldev(ctx->device);
 	kfree(ctx);
 }
@@ -950,7 +943,7 @@ static void bm_aio_ctx_destroy(struct kref *kref)
 /* bv_page may be a copy, or may be the original */
 static void bm_async_io_complete(struct bio *bio, int error)
 {
-	struct bm_aio_ctx *ctx = bio->bi_private;
+	struct drbd_bm_aio_ctx *ctx = bio->bi_private;
 	struct drbd_device *device = ctx->device;
 	struct drbd_bitmap *b = device->bitmap;
 	unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
@@ -993,17 +986,18 @@ static void bm_async_io_complete(struct bio *bio, int error)
 	if (atomic_dec_and_test(&ctx->in_flight)) {
 		ctx->done = 1;
 		wake_up(&device->misc_wait);
-		kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+		kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
 	}
 }
 
-static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
+static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
 {
 	struct bio *bio = bio_alloc_drbd(GFP_NOIO);
 	struct drbd_device *device = ctx->device;
 	struct drbd_bitmap *b = device->bitmap;
 	struct page *page;
 	unsigned int len;
+	unsigned int rw = (ctx->flags & BM_AIO_READ) ? READ : WRITE;
 
 	sector_t on_disk_sector =
 		device->ldev->md.md_offset + device->ldev->md.bm_offset;
@@ -1049,9 +1043,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
 /*
  * bm_rw: read/write the whole bitmap from/to its on disk location.
  */
-static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
+static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
 {
-	struct bm_aio_ctx *ctx;
+	struct drbd_bm_aio_ctx *ctx;
 	struct drbd_bitmap *b = device->bitmap;
 	int num_pages, i, count = 0;
 	unsigned long now;
@@ -1067,12 +1061,13 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
 	 * as we submit copies of pages anyways.
 	 */
 
-	ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
+	ctx = kmalloc(sizeof(struct drbd_bm_aio_ctx), GFP_NOIO);
 	if (!ctx)
 		return -ENOMEM;
 
-	*ctx = (struct bm_aio_ctx) {
+	*ctx = (struct drbd_bm_aio_ctx) {
 		.device = device,
+		.start_jif = jiffies,
 		.in_flight = ATOMIC_INIT(1),
 		.done = 0,
 		.flags = flags,
@@ -1080,15 +1075,21 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
 		.kref = { ATOMIC_INIT(2) },
 	};
 
-	if (!get_ldev_if_state(device, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */
+	if (!get_ldev_if_state(device, D_ATTACHING)) {  /* put is in drbd_bm_aio_ctx_destroy() */
 		drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
 		kfree(ctx);
 		return -ENODEV;
 	}
+	/* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from
+	   drbd_adm_attach(), after device->ldev was assigned. */
 
-	if (!ctx->flags)
+	if (0 == (ctx->flags & ~BM_AIO_READ))
 		WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
 
+	spin_lock_irq(&device->resource->req_lock);
+	list_add_tail(&ctx->list, &device->pending_bitmap_io);
+	spin_unlock_irq(&device->resource->req_lock);
+
 	num_pages = b->bm_number_of_pages;
 
 	now = jiffies;
@@ -1098,13 +1099,13 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
 		/* ignore completely unchanged pages */
 		if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
 			break;
-		if (rw & WRITE) {
+		if (!(flags & BM_AIO_READ)) {
 			if ((flags & BM_AIO_WRITE_HINTED) &&
 			    !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
 				    &page_private(b->bm_pages[i])))
 				continue;
 
-			if (!(flags & BM_WRITE_ALL_PAGES) &&
+			if (!(flags & BM_AIO_WRITE_ALL_PAGES) &&
 			    bm_test_page_unchanged(b->bm_pages[i])) {
 				dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i);
 				continue;
@@ -1118,7 +1119,7 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
 			}
 		}
 		atomic_inc(&ctx->in_flight);
-		bm_page_io_async(ctx, i, rw);
+		bm_page_io_async(ctx, i);
 		++count;
 		cond_resched();
 	}
@@ -1134,12 +1135,12 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
 	if (!atomic_dec_and_test(&ctx->in_flight))
 		wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
 	else
-		kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+		kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
 
 	/* summary for global bitmap IO */
 	if (flags == 0)
 		drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n",
-			 rw == WRITE ? "WRITE" : "READ",
+			 (flags & BM_AIO_READ) ? "READ" : "WRITE",
 			 count, jiffies - now);
 
 	if (ctx->error) {
@@ -1152,20 +1153,18 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
 		err = -EIO; /* Disk timeout/force-detach during IO... */
 
 	now = jiffies;
-	if (rw == WRITE) {
-		drbd_md_flush(device);
-	} else /* rw == READ */ {
+	if (flags & BM_AIO_READ) {
 		b->bm_set = bm_count_bits(b);
 		drbd_info(device, "recounting of set bits took additional %lu jiffies\n",
 		     jiffies - now);
 	}
 	now = b->bm_set;
 
-	if (flags == 0)
+	if ((flags & ~BM_AIO_READ) == 0)
 		drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
 		     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
 
-	kref_put(&ctx->kref, &bm_aio_ctx_destroy);
+	kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
 	return err;
 }
 
@@ -1175,7 +1174,7 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
  */
 int drbd_bm_read(struct drbd_device *device) __must_hold(local)
 {
-	return bm_rw(device, READ, 0, 0);
+	return bm_rw(device, BM_AIO_READ, 0);
 }
 
 /**
@@ -1186,7 +1185,7 @@ int drbd_bm_read(struct drbd_device *device) __must_hold(local)
  */
 int drbd_bm_write(struct drbd_device *device) __must_hold(local)
 {
-	return bm_rw(device, WRITE, 0, 0);
+	return bm_rw(device, 0, 0);
 }
 
 /**
@@ -1197,7 +1196,17 @@ int drbd_bm_write(struct drbd_device *device) __must_hold(local)
  */
 int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
 {
-	return bm_rw(device, WRITE, BM_WRITE_ALL_PAGES, 0);
+	return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0);
+}
+
+/**
+ * drbd_bm_write_lazy() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
+ * @device:	DRBD device.
+ * @upper_idx:	0: write all changed pages; +ve: page index to stop scanning for changed pages
+ */
+int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local)
+{
+	return bm_rw(device, BM_AIO_COPY_PAGES, upper_idx);
 }
 
 /**
@@ -1213,7 +1222,7 @@ int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
  */
 int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
 {
-	return bm_rw(device, WRITE, BM_AIO_COPY_PAGES, 0);
+	return bm_rw(device, BM_AIO_COPY_PAGES, 0);
 }
 
 /**
@@ -1222,62 +1231,7 @@ int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
  */
 int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local)
 {
-	return bm_rw(device, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
-}
-
-/**
- * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
- * @device:	DRBD device.
- * @idx:	bitmap page index
- *
- * We don't want to special case on logical_block_size of the backend device,
- * so we submit PAGE_SIZE aligned pieces.
- * Note that on "most" systems, PAGE_SIZE is 4k.
- *
- * In case this becomes an issue on systems with larger PAGE_SIZE,
- * we may want to change this again to write 4k aligned 4k pieces.
- */
-int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local)
-{
-	struct bm_aio_ctx *ctx;
-	int err;
-
-	if (bm_test_page_unchanged(device->bitmap->bm_pages[idx])) {
-		dynamic_drbd_dbg(device, "skipped bm page write for idx %u\n", idx);
-		return 0;
-	}
-
-	ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
-	if (!ctx)
-		return -ENOMEM;
-
-	*ctx = (struct bm_aio_ctx) {
-		.device = device,
-		.in_flight = ATOMIC_INIT(1),
-		.done = 0,
-		.flags = BM_AIO_COPY_PAGES,
-		.error = 0,
-		.kref = { ATOMIC_INIT(2) },
-	};
-
-	if (!get_ldev_if_state(device, D_ATTACHING)) {  /* put is in bm_aio_ctx_destroy() */
-		drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
-		kfree(ctx);
-		return -ENODEV;
-	}
-
-	bm_page_io_async(ctx, idx, WRITE_SYNC);
-	wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
-
-	if (ctx->error)
-		drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
-		/* that causes us to detach, so the in memory bitmap will be
-		 * gone in a moment as well. */
-
-	device->bm_writ_cnt++;
-	err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
-	kref_put(&ctx->kref, &bm_aio_ctx_destroy);
-	return err;
+	return bm_rw(device, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
 }
 
 /* NOTE
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
new file mode 100644
index 0000000..5c20b18
--- /dev/null
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -0,0 +1,958 @@
+#define pr_fmt(fmt) "drbd debugfs: " fmt
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/stat.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+
+#include "drbd_int.h"
+#include "drbd_req.h"
+#include "drbd_debugfs.h"
+
+
+/**********************************************************************
+ * Whenever you change the file format, remember to bump the version. *
+ **********************************************************************/
+
+static struct dentry *drbd_debugfs_root;
+static struct dentry *drbd_debugfs_version;
+static struct dentry *drbd_debugfs_resources;
+static struct dentry *drbd_debugfs_minors;
+
+static void seq_print_age_or_dash(struct seq_file *m, bool valid, unsigned long dt)
+{
+	if (valid)
+		seq_printf(m, "\t%d", jiffies_to_msecs(dt));
+	else
+		seq_printf(m, "\t-");
+}
+
+static void __seq_print_rq_state_bit(struct seq_file *m,
+	bool is_set, char *sep, const char *set_name, const char *unset_name)
+{
+	if (is_set && set_name) {
+		seq_putc(m, *sep);
+		seq_puts(m, set_name);
+		*sep = '|';
+	} else if (!is_set && unset_name) {
+		seq_putc(m, *sep);
+		seq_puts(m, unset_name);
+		*sep = '|';
+	}
+}
+
+static void seq_print_rq_state_bit(struct seq_file *m,
+	bool is_set, char *sep, const char *set_name)
+{
+	__seq_print_rq_state_bit(m, is_set, sep, set_name, NULL);
+}
+
+/* pretty print enum drbd_req_state_bits req->rq_state */
+static void seq_print_request_state(struct seq_file *m, struct drbd_request *req)
+{
+	unsigned int s = req->rq_state;
+	char sep = ' ';
+	seq_printf(m, "\t0x%08x", s);
+	seq_printf(m, "\tmaster: %s", req->master_bio ? "pending" : "completed");
+
+	/* RQ_WRITE ignored, already reported */
+	seq_puts(m, "\tlocal:");
+	seq_print_rq_state_bit(m, s & RQ_IN_ACT_LOG, &sep, "in-AL");
+	seq_print_rq_state_bit(m, s & RQ_POSTPONED, &sep, "postponed");
+	seq_print_rq_state_bit(m, s & RQ_COMPLETION_SUSP, &sep, "suspended");
+	sep = ' ';
+	seq_print_rq_state_bit(m, s & RQ_LOCAL_PENDING, &sep, "pending");
+	seq_print_rq_state_bit(m, s & RQ_LOCAL_COMPLETED, &sep, "completed");
+	seq_print_rq_state_bit(m, s & RQ_LOCAL_ABORTED, &sep, "aborted");
+	seq_print_rq_state_bit(m, s & RQ_LOCAL_OK, &sep, "ok");
+	if (sep == ' ')
+		seq_puts(m, " -");
+
+	/* for_each_connection ... */
+	seq_printf(m, "\tnet:");
+	sep = ' ';
+	seq_print_rq_state_bit(m, s & RQ_NET_PENDING, &sep, "pending");
+	seq_print_rq_state_bit(m, s & RQ_NET_QUEUED, &sep, "queued");
+	seq_print_rq_state_bit(m, s & RQ_NET_SENT, &sep, "sent");
+	seq_print_rq_state_bit(m, s & RQ_NET_DONE, &sep, "done");
+	seq_print_rq_state_bit(m, s & RQ_NET_SIS, &sep, "sis");
+	seq_print_rq_state_bit(m, s & RQ_NET_OK, &sep, "ok");
+	if (sep == ' ')
+		seq_puts(m, " -");
+
+	seq_printf(m, " :");
+	sep = ' ';
+	seq_print_rq_state_bit(m, s & RQ_EXP_RECEIVE_ACK, &sep, "B");
+	seq_print_rq_state_bit(m, s & RQ_EXP_WRITE_ACK, &sep, "C");
+	seq_print_rq_state_bit(m, s & RQ_EXP_BARR_ACK, &sep, "barr");
+	if (sep == ' ')
+		seq_puts(m, " -");
+	seq_printf(m, "\n");
+}
+
+static void seq_print_one_request(struct seq_file *m, struct drbd_request *req, unsigned long now)
+{
+	/* change anything here, fixup header below! */
+	unsigned int s = req->rq_state;
+
+#define RQ_HDR_1 "epoch\tsector\tsize\trw"
+	seq_printf(m, "0x%x\t%llu\t%u\t%s",
+		req->epoch,
+		(unsigned long long)req->i.sector, req->i.size >> 9,
+		(s & RQ_WRITE) ? "W" : "R");
+
+#define RQ_HDR_2 "\tstart\tin AL\tsubmit"
+	seq_printf(m, "\t%d", jiffies_to_msecs(now - req->start_jif));
+	seq_print_age_or_dash(m, s & RQ_IN_ACT_LOG, now - req->in_actlog_jif);
+	seq_print_age_or_dash(m, s & RQ_LOCAL_PENDING, now - req->pre_submit_jif);
+
+#define RQ_HDR_3 "\tsent\tacked\tdone"
+	seq_print_age_or_dash(m, s & RQ_NET_SENT, now - req->pre_send_jif);
+	seq_print_age_or_dash(m, (s & RQ_NET_SENT) && !(s & RQ_NET_PENDING), now - req->acked_jif);
+	seq_print_age_or_dash(m, s & RQ_NET_DONE, now - req->net_done_jif);
+
+#define RQ_HDR_4 "\tstate\n"
+	seq_print_request_state(m, req);
+}
+#define RQ_HDR RQ_HDR_1 RQ_HDR_2 RQ_HDR_3 RQ_HDR_4
+
+static void seq_print_minor_vnr_req(struct seq_file *m, struct drbd_request *req, unsigned long now)
+{
+	seq_printf(m, "%u\t%u\t", req->device->minor, req->device->vnr);
+	seq_print_one_request(m, req, now);
+}
+
+static void seq_print_resource_pending_meta_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
+{
+	struct drbd_device *device;
+	unsigned int i;
+
+	seq_puts(m, "minor\tvnr\tstart\tsubmit\tintent\n");
+	rcu_read_lock();
+	idr_for_each_entry(&resource->devices, device, i) {
+		struct drbd_md_io tmp;
+		/* In theory this is racy,
+		 * in the sense that there could have been a
+		 * drbd_md_put_buffer(); drbd_md_get_buffer();
+		 * between accessing these members here.  */
+		tmp = device->md_io;
+		if (atomic_read(&tmp.in_use)) {
+			seq_printf(m, "%u\t%u\t%d\t",
+				device->minor, device->vnr,
+				jiffies_to_msecs(now - tmp.start_jif));
+			if (time_before(tmp.submit_jif, tmp.start_jif))
+				seq_puts(m, "-\t");
+			else
+				seq_printf(m, "%d\t", jiffies_to_msecs(now - tmp.submit_jif));
+			seq_printf(m, "%s\n", tmp.current_use);
+		}
+	}
+	rcu_read_unlock();
+}
+
+static void seq_print_waiting_for_AL(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
+{
+	struct drbd_device *device;
+	unsigned int i;
+
+	seq_puts(m, "minor\tvnr\tage\t#waiting\n");
+	rcu_read_lock();
+	idr_for_each_entry(&resource->devices, device, i) {
+		unsigned long jif;
+		struct drbd_request *req;
+		int n = atomic_read(&device->ap_actlog_cnt);
+		if (n) {
+			spin_lock_irq(&device->resource->req_lock);
+			req = list_first_entry_or_null(&device->pending_master_completion[1],
+				struct drbd_request, req_pending_master_completion);
+			/* if the oldest request does not wait for the activity log
+			 * it is not interesting for us here */
+			if (req && !(req->rq_state & RQ_IN_ACT_LOG))
+				jif = req->start_jif;
+			else
+				req = NULL;
+			spin_unlock_irq(&device->resource->req_lock);
+		}
+		if (n) {
+			seq_printf(m, "%u\t%u\t", device->minor, device->vnr);
+			if (req)
+				seq_printf(m, "%u\t", jiffies_to_msecs(now - jif));
+			else
+				seq_puts(m, "-\t");
+			seq_printf(m, "%u\n", n);
+		}
+	}
+	rcu_read_unlock();
+}
+
+static void seq_print_device_bitmap_io(struct seq_file *m, struct drbd_device *device, unsigned long now)
+{
+	struct drbd_bm_aio_ctx *ctx;
+	unsigned long start_jif;
+	unsigned int in_flight;
+	unsigned int flags;
+	spin_lock_irq(&device->resource->req_lock);
+	ctx = list_first_entry_or_null(&device->pending_bitmap_io, struct drbd_bm_aio_ctx, list);
+	if (ctx && ctx->done)
+		ctx = NULL;
+	if (ctx) {
+		start_jif = ctx->start_jif;
+		in_flight = atomic_read(&ctx->in_flight);
+		flags = ctx->flags;
+	}
+	spin_unlock_irq(&device->resource->req_lock);
+	if (ctx) {
+		seq_printf(m, "%u\t%u\t%c\t%u\t%u\n",
+			device->minor, device->vnr,
+			(flags & BM_AIO_READ) ? 'R' : 'W',
+			jiffies_to_msecs(now - start_jif),
+			in_flight);
+	}
+}
+
+static void seq_print_resource_pending_bitmap_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
+{
+	struct drbd_device *device;
+	unsigned int i;
+
+	seq_puts(m, "minor\tvnr\trw\tage\t#in-flight\n");
+	rcu_read_lock();
+	idr_for_each_entry(&resource->devices, device, i) {
+		seq_print_device_bitmap_io(m, device, now);
+	}
+	rcu_read_unlock();
+}
+
+/* pretty print enum peer_req->flags */
+static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_request *peer_req)
+{
+	unsigned long f = peer_req->flags;
+	char sep = ' ';
+
+	__seq_print_rq_state_bit(m, f & EE_SUBMITTED, &sep, "submitted", "preparing");
+	__seq_print_rq_state_bit(m, f & EE_APPLICATION, &sep, "application", "internal");
+	seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
+	seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
+	seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
+
+	if (f & EE_IS_TRIM) {
+		seq_putc(m, sep);
+		sep = '|';
+		if (f & EE_IS_TRIM_USE_ZEROOUT)
+			seq_puts(m, "zero-out");
+		else
+			seq_puts(m, "trim");
+	}
+	seq_putc(m, '\n');
+}
+
+static void seq_print_peer_request(struct seq_file *m,
+	struct drbd_device *device, struct list_head *lh,
+	unsigned long now)
+{
+	bool reported_preparing = false;
+	struct drbd_peer_request *peer_req;
+	list_for_each_entry(peer_req, lh, w.list) {
+		if (reported_preparing && !(peer_req->flags & EE_SUBMITTED))
+			continue;
+
+		if (device)
+			seq_printf(m, "%u\t%u\t", device->minor, device->vnr);
+
+		seq_printf(m, "%llu\t%u\t%c\t%u\t",
+			(unsigned long long)peer_req->i.sector, peer_req->i.size >> 9,
+			(peer_req->flags & EE_WRITE) ? 'W' : 'R',
+			jiffies_to_msecs(now - peer_req->submit_jif));
+		seq_print_peer_request_flags(m, peer_req);
+		if (peer_req->flags & EE_SUBMITTED)
+			break;
+		else
+			reported_preparing = true;
+	}
+}
+
+static void seq_print_device_peer_requests(struct seq_file *m,
+	struct drbd_device *device, unsigned long now)
+{
+	seq_puts(m, "minor\tvnr\tsector\tsize\trw\tage\tflags\n");
+	spin_lock_irq(&device->resource->req_lock);
+	seq_print_peer_request(m, device, &device->active_ee, now);
+	seq_print_peer_request(m, device, &device->read_ee, now);
+	seq_print_peer_request(m, device, &device->sync_ee, now);
+	spin_unlock_irq(&device->resource->req_lock);
+	if (test_bit(FLUSH_PENDING, &device->flags)) {
+		seq_printf(m, "%u\t%u\t-\t-\tF\t%u\tflush\n",
+			device->minor, device->vnr,
+			jiffies_to_msecs(now - device->flush_jif));
+	}
+}
+
+static void seq_print_resource_pending_peer_requests(struct seq_file *m,
+	struct drbd_resource *resource, unsigned long now)
+{
+	struct drbd_device *device;
+	unsigned int i;
+
+	rcu_read_lock();
+	idr_for_each_entry(&resource->devices, device, i) {
+		seq_print_device_peer_requests(m, device, now);
+	}
+	rcu_read_unlock();
+}
+
+static void seq_print_resource_transfer_log_summary(struct seq_file *m,
+	struct drbd_resource *resource,
+	struct drbd_connection *connection,
+	unsigned long now)
+{
+	struct drbd_request *req;
+	unsigned int count = 0;
+	unsigned int show_state = 0;
+
+	seq_puts(m, "n\tdevice\tvnr\t" RQ_HDR);
+	spin_lock_irq(&resource->req_lock);
+	list_for_each_entry(req, &connection->transfer_log, tl_requests) {
+		unsigned int tmp = 0;
+		unsigned int s;
+		++count;
+
+		/* don't disable irq "forever" */
+		if (!(count & 0x1ff)) {
+			struct drbd_request *req_next;
+			kref_get(&req->kref);
+			spin_unlock_irq(&resource->req_lock);
+			cond_resched();
+			spin_lock_irq(&resource->req_lock);
+			req_next = list_next_entry(req, tl_requests);
+			if (kref_put(&req->kref, drbd_req_destroy))
+				req = req_next;
+			if (&req->tl_requests == &connection->transfer_log)
+				break;
+		}
+
+		s = req->rq_state;
+
+		/* This is meant to summarize timing issues, to be able to tell
+		 * local disk problems from network problems.
+		 * Skip requests, if we have shown an even older request with
+		 * similar aspects already.  */
+		if (req->master_bio == NULL)
+			tmp |= 1;
+		if ((s & RQ_LOCAL_MASK) && (s & RQ_LOCAL_PENDING))
+			tmp |= 2;
+		if (s & RQ_NET_MASK) {
+			if (!(s & RQ_NET_SENT))
+				tmp |= 4;
+			if (s & RQ_NET_PENDING)
+				tmp |= 8;
+			if (!(s & RQ_NET_DONE))
+				tmp |= 16;
+		}
+		if ((tmp & show_state) == tmp)
+			continue;
+		show_state |= tmp;
+		seq_printf(m, "%u\t", count);
+		seq_print_minor_vnr_req(m, req, now);
+		if (show_state == 0x1f)
+			break;
+	}
+	spin_unlock_irq(&resource->req_lock);
+}
+
+/* TODO: transfer_log and friends should be moved to resource */
+static int in_flight_summary_show(struct seq_file *m, void *pos)
+{
+	struct drbd_resource *resource = m->private;
+	struct drbd_connection *connection;
+	unsigned long jif = jiffies;
+
+	connection = first_connection(resource);
+	/* This does not happen, actually.
+	 * But be robust and prepare for future code changes. */
+	if (!connection || !kref_get_unless_zero(&connection->kref))
+		return -ESTALE;
+
+	/* BUMP me if you change the file format/content/presentation */
+	seq_printf(m, "v: %u\n\n", 0);
+
+	seq_puts(m, "oldest bitmap IO\n");
+	seq_print_resource_pending_bitmap_io(m, resource, jif);
+	seq_putc(m, '\n');
+
+	seq_puts(m, "meta data IO\n");
+	seq_print_resource_pending_meta_io(m, resource, jif);
+	seq_putc(m, '\n');
+
+	seq_puts(m, "socket buffer stats\n");
+	/* for each connection ... once we have more than one */
+	rcu_read_lock();
+	if (connection->data.socket) {
+		/* open coded SIOCINQ, the "relevant" part */
+		struct tcp_sock *tp = tcp_sk(connection->data.socket->sk);
+		int answ = tp->rcv_nxt - tp->copied_seq;
+		seq_printf(m, "unread receive buffer: %u Byte\n", answ);
+		/* open coded SIOCOUTQ, the "relevant" part */
+		answ = tp->write_seq - tp->snd_una;
+		seq_printf(m, "unacked send buffer: %u Byte\n", answ);
+	}
+	rcu_read_unlock();
+	seq_putc(m, '\n');
+
+	seq_puts(m, "oldest peer requests\n");
+	seq_print_resource_pending_peer_requests(m, resource, jif);
+	seq_putc(m, '\n');
+
+	seq_puts(m, "application requests waiting for activity log\n");
+	seq_print_waiting_for_AL(m, resource, jif);
+	seq_putc(m, '\n');
+
+	seq_puts(m, "oldest application requests\n");
+	seq_print_resource_transfer_log_summary(m, resource, connection, jif);
+	seq_putc(m, '\n');
+
+	jif = jiffies - jif;
+	if (jif)
+		seq_printf(m, "generated in %d ms\n", jiffies_to_msecs(jif));
+	kref_put(&connection->kref, drbd_destroy_connection);
+	return 0;
+}
+
+/* simple_positive(file->f_dentry) respectively debugfs_positive(),
+ * but neither is "reachable" from here.
+ * So we have our own inline version of it above.  :-( */
+static inline int debugfs_positive(struct dentry *dentry)
+{
+        return dentry->d_inode && !d_unhashed(dentry);
+}
+
+/* make sure at *open* time that the respective object won't go away. */
+static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, void *),
+		                void *data, struct kref *kref,
+				void (*release)(struct kref *))
+{
+	struct dentry *parent;
+	int ret = -ESTALE;
+
+	/* Are we still linked,
+	 * or has debugfs_remove() already been called? */
+	parent = file->f_dentry->d_parent;
+	/* not sure if this can happen: */
+	if (!parent || !parent->d_inode)
+		goto out;
+	/* serialize with d_delete() */
+	mutex_lock(&parent->d_inode->i_mutex);
+	/* Make sure the object is still alive */
+	if (debugfs_positive(file->f_dentry)
+	&& kref_get_unless_zero(kref))
+		ret = 0;
+	mutex_unlock(&parent->d_inode->i_mutex);
+	if (!ret) {
+		ret = single_open(file, show, data);
+		if (ret)
+			kref_put(kref, release);
+	}
+out:
+	return ret;
+}
+
+static int in_flight_summary_open(struct inode *inode, struct file *file)
+{
+	struct drbd_resource *resource = inode->i_private;
+	return drbd_single_open(file, in_flight_summary_show, resource,
+				&resource->kref, drbd_destroy_resource);
+}
+
+static int in_flight_summary_release(struct inode *inode, struct file *file)
+{
+	struct drbd_resource *resource = inode->i_private;
+	kref_put(&resource->kref, drbd_destroy_resource);
+	return single_release(inode, file);
+}
+
+static const struct file_operations in_flight_summary_fops = {
+	.owner		= THIS_MODULE,
+	.open		= in_flight_summary_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= in_flight_summary_release,
+};
+
+void drbd_debugfs_resource_add(struct drbd_resource *resource)
+{
+	struct dentry *dentry;
+	if (!drbd_debugfs_resources)
+		return;
+
+	dentry = debugfs_create_dir(resource->name, drbd_debugfs_resources);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	resource->debugfs_res = dentry;
+
+	dentry = debugfs_create_dir("volumes", resource->debugfs_res);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	resource->debugfs_res_volumes = dentry;
+
+	dentry = debugfs_create_dir("connections", resource->debugfs_res);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	resource->debugfs_res_connections = dentry;
+
+	dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP,
+			resource->debugfs_res, resource,
+			&in_flight_summary_fops);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	resource->debugfs_res_in_flight_summary = dentry;
+	return;
+
+fail:
+	drbd_debugfs_resource_cleanup(resource);
+	drbd_err(resource, "failed to create debugfs dentry\n");
+}
+
+static void drbd_debugfs_remove(struct dentry **dp)
+{
+	debugfs_remove(*dp);
+	*dp = NULL;
+}
+
+void drbd_debugfs_resource_cleanup(struct drbd_resource *resource)
+{
+	/* it is ok to call debugfs_remove(NULL) */
+	drbd_debugfs_remove(&resource->debugfs_res_in_flight_summary);
+	drbd_debugfs_remove(&resource->debugfs_res_connections);
+	drbd_debugfs_remove(&resource->debugfs_res_volumes);
+	drbd_debugfs_remove(&resource->debugfs_res);
+}
+
+static void seq_print_one_timing_detail(struct seq_file *m,
+	const struct drbd_thread_timing_details *tdp,
+	unsigned long now)
+{
+	struct drbd_thread_timing_details td;
+	/* No locking...
+	 * use temporary assignment to get at consistent data. */
+	do {
+		td = *tdp;
+	} while (td.cb_nr != tdp->cb_nr);
+	if (!td.cb_addr)
+		return;
+	seq_printf(m, "%u\t%d\t%s:%u\t%ps\n",
+			td.cb_nr,
+			jiffies_to_msecs(now - td.start_jif),
+			td.caller_fn, td.line,
+			td.cb_addr);
+}
+
+static void seq_print_timing_details(struct seq_file *m,
+		const char *title,
+		unsigned int cb_nr, struct drbd_thread_timing_details *tdp, unsigned long now)
+{
+	unsigned int start_idx;
+	unsigned int i;
+
+	seq_printf(m, "%s\n", title);
+	/* If not much is going on, this will result in natural ordering.
+	 * If it is very busy, we will possibly skip events, or even see wrap
+	 * arounds, which could only be avoided with locking.
+	 */
+	start_idx = cb_nr % DRBD_THREAD_DETAILS_HIST;
+	for (i = start_idx; i < DRBD_THREAD_DETAILS_HIST; i++)
+		seq_print_one_timing_detail(m, tdp+i, now);
+	for (i = 0; i < start_idx; i++)
+		seq_print_one_timing_detail(m, tdp+i, now);
+}
+
+static int callback_history_show(struct seq_file *m, void *ignored)
+{
+	struct drbd_connection *connection = m->private;
+	unsigned long jif = jiffies;
+
+	/* BUMP me if you change the file format/content/presentation */
+	seq_printf(m, "v: %u\n\n", 0);
+
+	seq_puts(m, "n\tage\tcallsite\tfn\n");
+	seq_print_timing_details(m, "worker", connection->w_cb_nr, connection->w_timing_details, jif);
+	seq_print_timing_details(m, "receiver", connection->r_cb_nr, connection->r_timing_details, jif);
+	return 0;
+}
+
+static int callback_history_open(struct inode *inode, struct file *file)
+{
+	struct drbd_connection *connection = inode->i_private;
+	return drbd_single_open(file, callback_history_show, connection,
+				&connection->kref, drbd_destroy_connection);
+}
+
+static int callback_history_release(struct inode *inode, struct file *file)
+{
+	struct drbd_connection *connection = inode->i_private;
+	kref_put(&connection->kref, drbd_destroy_connection);
+	return single_release(inode, file);
+}
+
+static const struct file_operations connection_callback_history_fops = {
+	.owner		= THIS_MODULE,
+	.open		= callback_history_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= callback_history_release,
+};
+
+static int connection_oldest_requests_show(struct seq_file *m, void *ignored)
+{
+	struct drbd_connection *connection = m->private;
+	unsigned long now = jiffies;
+	struct drbd_request *r1, *r2;
+
+	/* BUMP me if you change the file format/content/presentation */
+	seq_printf(m, "v: %u\n\n", 0);
+
+	spin_lock_irq(&connection->resource->req_lock);
+	r1 = connection->req_next;
+	if (r1)
+		seq_print_minor_vnr_req(m, r1, now);
+	r2 = connection->req_ack_pending;
+	if (r2 && r2 != r1) {
+		r1 = r2;
+		seq_print_minor_vnr_req(m, r1, now);
+	}
+	r2 = connection->req_not_net_done;
+	if (r2 && r2 != r1)
+		seq_print_minor_vnr_req(m, r2, now);
+	spin_unlock_irq(&connection->resource->req_lock);
+	return 0;
+}
+
+static int connection_oldest_requests_open(struct inode *inode, struct file *file)
+{
+	struct drbd_connection *connection = inode->i_private;
+	return drbd_single_open(file, connection_oldest_requests_show, connection,
+				&connection->kref, drbd_destroy_connection);
+}
+
+static int connection_oldest_requests_release(struct inode *inode, struct file *file)
+{
+	struct drbd_connection *connection = inode->i_private;
+	kref_put(&connection->kref, drbd_destroy_connection);
+	return single_release(inode, file);
+}
+
+static const struct file_operations connection_oldest_requests_fops = {
+	.owner		= THIS_MODULE,
+	.open		= connection_oldest_requests_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= connection_oldest_requests_release,
+};
+
+void drbd_debugfs_connection_add(struct drbd_connection *connection)
+{
+	struct dentry *conns_dir = connection->resource->debugfs_res_connections;
+	struct dentry *dentry;
+	if (!conns_dir)
+		return;
+
+	/* Once we enable mutliple peers,
+	 * these connections will have descriptive names.
+	 * For now, it is just the one connection to the (only) "peer". */
+	dentry = debugfs_create_dir("peer", conns_dir);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	connection->debugfs_conn = dentry;
+
+	dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP,
+			connection->debugfs_conn, connection,
+			&connection_callback_history_fops);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	connection->debugfs_conn_callback_history = dentry;
+
+	dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP,
+			connection->debugfs_conn, connection,
+			&connection_oldest_requests_fops);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	connection->debugfs_conn_oldest_requests = dentry;
+	return;
+
+fail:
+	drbd_debugfs_connection_cleanup(connection);
+	drbd_err(connection, "failed to create debugfs dentry\n");
+}
+
+void drbd_debugfs_connection_cleanup(struct drbd_connection *connection)
+{
+	drbd_debugfs_remove(&connection->debugfs_conn_callback_history);
+	drbd_debugfs_remove(&connection->debugfs_conn_oldest_requests);
+	drbd_debugfs_remove(&connection->debugfs_conn);
+}
+
+static void resync_dump_detail(struct seq_file *m, struct lc_element *e)
+{
+       struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
+
+       seq_printf(m, "%5d %s %s %s\n", bme->rs_left,
+		  test_bit(BME_NO_WRITES, &bme->flags) ? "NO_WRITES" : "---------",
+		  test_bit(BME_LOCKED, &bme->flags) ? "LOCKED" : "------",
+		  test_bit(BME_PRIORITY, &bme->flags) ? "PRIORITY" : "--------"
+		  );
+}
+
+static int device_resync_extents_show(struct seq_file *m, void *ignored)
+{
+	struct drbd_device *device = m->private;
+
+	/* BUMP me if you change the file format/content/presentation */
+	seq_printf(m, "v: %u\n\n", 0);
+
+	if (get_ldev_if_state(device, D_FAILED)) {
+		lc_seq_printf_stats(m, device->resync);
+		lc_seq_dump_details(m, device->resync, "rs_left flags", resync_dump_detail);
+		put_ldev(device);
+	}
+	return 0;
+}
+
+static int device_act_log_extents_show(struct seq_file *m, void *ignored)
+{
+	struct drbd_device *device = m->private;
+
+	/* BUMP me if you change the file format/content/presentation */
+	seq_printf(m, "v: %u\n\n", 0);
+
+	if (get_ldev_if_state(device, D_FAILED)) {
+		lc_seq_printf_stats(m, device->act_log);
+		lc_seq_dump_details(m, device->act_log, "", NULL);
+		put_ldev(device);
+	}
+	return 0;
+}
+
+static int device_oldest_requests_show(struct seq_file *m, void *ignored)
+{
+	struct drbd_device *device = m->private;
+	struct drbd_resource *resource = device->resource;
+	unsigned long now = jiffies;
+	struct drbd_request *r1, *r2;
+	int i;
+
+	/* BUMP me if you change the file format/content/presentation */
+	seq_printf(m, "v: %u\n\n", 0);
+
+	seq_puts(m, RQ_HDR);
+	spin_lock_irq(&resource->req_lock);
+	/* WRITE, then READ */
+	for (i = 1; i >= 0; --i) {
+		r1 = list_first_entry_or_null(&device->pending_master_completion[i],
+			struct drbd_request, req_pending_master_completion);
+		r2 = list_first_entry_or_null(&device->pending_completion[i],
+			struct drbd_request, req_pending_local);
+		if (r1)
+			seq_print_one_request(m, r1, now);
+		if (r2 && r2 != r1)
+			seq_print_one_request(m, r2, now);
+	}
+	spin_unlock_irq(&resource->req_lock);
+	return 0;
+}
+
+static int device_data_gen_id_show(struct seq_file *m, void *ignored)
+{
+	struct drbd_device *device = m->private;
+	struct drbd_md *md;
+	enum drbd_uuid_index idx;
+
+	if (!get_ldev_if_state(device, D_FAILED))
+		return -ENODEV;
+
+	md = &device->ldev->md;
+	spin_lock_irq(&md->uuid_lock);
+	for (idx = UI_CURRENT; idx <= UI_HISTORY_END; idx++) {
+		seq_printf(m, "0x%016llX\n", md->uuid[idx]);
+	}
+	spin_unlock_irq(&md->uuid_lock);
+	put_ldev(device);
+	return 0;
+}
+
+#define drbd_debugfs_device_attr(name)						\
+static int device_ ## name ## _open(struct inode *inode, struct file *file)	\
+{										\
+	struct drbd_device *device = inode->i_private;				\
+	return drbd_single_open(file, device_ ## name ## _show, device,		\
+				&device->kref, drbd_destroy_device);		\
+}										\
+static int device_ ## name ## _release(struct inode *inode, struct file *file)	\
+{										\
+	struct drbd_device *device = inode->i_private;				\
+	kref_put(&device->kref, drbd_destroy_device);				\
+	return single_release(inode, file);					\
+}										\
+static const struct file_operations device_ ## name ## _fops = {		\
+	.owner		= THIS_MODULE,						\
+	.open		= device_ ## name ## _open,				\
+	.read		= seq_read,						\
+	.llseek		= seq_lseek,						\
+	.release	= device_ ## name ## _release,				\
+};
+
+drbd_debugfs_device_attr(oldest_requests)
+drbd_debugfs_device_attr(act_log_extents)
+drbd_debugfs_device_attr(resync_extents)
+drbd_debugfs_device_attr(data_gen_id)
+
+void drbd_debugfs_device_add(struct drbd_device *device)
+{
+	struct dentry *vols_dir = device->resource->debugfs_res_volumes;
+	char minor_buf[8]; /* MINORMASK, MINORBITS == 20; */
+	char vnr_buf[8];   /* volume number vnr is even 16 bit only; */
+	char *slink_name = NULL;
+
+	struct dentry *dentry;
+	if (!vols_dir || !drbd_debugfs_minors)
+		return;
+
+	snprintf(vnr_buf, sizeof(vnr_buf), "%u", device->vnr);
+	dentry = debugfs_create_dir(vnr_buf, vols_dir);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	device->debugfs_vol = dentry;
+
+	snprintf(minor_buf, sizeof(minor_buf), "%u", device->minor);
+	slink_name = kasprintf(GFP_KERNEL, "../resources/%s/volumes/%u",
+			device->resource->name, device->vnr);
+	if (!slink_name)
+		goto fail;
+	dentry = debugfs_create_symlink(minor_buf, drbd_debugfs_minors, slink_name);
+	kfree(slink_name);
+	slink_name = NULL;
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	device->debugfs_minor = dentry;
+
+#define DCF(name)	do {					\
+	dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP,	\
+			device->debugfs_vol, device,		\
+			&device_ ## name ## _fops);		\
+	if (IS_ERR_OR_NULL(dentry))				\
+		goto fail;					\
+	device->debugfs_vol_ ## name = dentry;			\
+	} while (0)
+
+	DCF(oldest_requests);
+	DCF(act_log_extents);
+	DCF(resync_extents);
+	DCF(data_gen_id);
+#undef DCF
+	return;
+
+fail:
+	drbd_debugfs_device_cleanup(device);
+	drbd_err(device, "failed to create debugfs entries\n");
+}
+
+void drbd_debugfs_device_cleanup(struct drbd_device *device)
+{
+	drbd_debugfs_remove(&device->debugfs_minor);
+	drbd_debugfs_remove(&device->debugfs_vol_oldest_requests);
+	drbd_debugfs_remove(&device->debugfs_vol_act_log_extents);
+	drbd_debugfs_remove(&device->debugfs_vol_resync_extents);
+	drbd_debugfs_remove(&device->debugfs_vol_data_gen_id);
+	drbd_debugfs_remove(&device->debugfs_vol);
+}
+
+void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device)
+{
+	struct dentry *conn_dir = peer_device->connection->debugfs_conn;
+	struct dentry *dentry;
+	char vnr_buf[8];
+
+	if (!conn_dir)
+		return;
+
+	snprintf(vnr_buf, sizeof(vnr_buf), "%u", peer_device->device->vnr);
+	dentry = debugfs_create_dir(vnr_buf, conn_dir);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	peer_device->debugfs_peer_dev = dentry;
+	return;
+
+fail:
+	drbd_debugfs_peer_device_cleanup(peer_device);
+	drbd_err(peer_device, "failed to create debugfs entries\n");
+}
+
+void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device)
+{
+	drbd_debugfs_remove(&peer_device->debugfs_peer_dev);
+}
+
+static int drbd_version_show(struct seq_file *m, void *ignored)
+{
+	seq_printf(m, "# %s\n", drbd_buildtag());
+	seq_printf(m, "VERSION=%s\n", REL_VERSION);
+	seq_printf(m, "API_VERSION=%u\n", API_VERSION);
+	seq_printf(m, "PRO_VERSION_MIN=%u\n", PRO_VERSION_MIN);
+	seq_printf(m, "PRO_VERSION_MAX=%u\n", PRO_VERSION_MAX);
+	return 0;
+}
+
+static int drbd_version_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, drbd_version_show, NULL);
+}
+
+static struct file_operations drbd_version_fops = {
+	.owner = THIS_MODULE,
+	.open = drbd_version_open,
+	.llseek = seq_lseek,
+	.read = seq_read,
+	.release = single_release,
+};
+
+/* not __exit, may be indirectly called
+ * from the module-load-failure path as well. */
+void drbd_debugfs_cleanup(void)
+{
+	drbd_debugfs_remove(&drbd_debugfs_resources);
+	drbd_debugfs_remove(&drbd_debugfs_minors);
+	drbd_debugfs_remove(&drbd_debugfs_version);
+	drbd_debugfs_remove(&drbd_debugfs_root);
+}
+
+int __init drbd_debugfs_init(void)
+{
+	struct dentry *dentry;
+
+	dentry = debugfs_create_dir("drbd", NULL);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	drbd_debugfs_root = dentry;
+
+	dentry = debugfs_create_file("version", 0444, drbd_debugfs_root, NULL, &drbd_version_fops);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	drbd_debugfs_version = dentry;
+
+	dentry = debugfs_create_dir("resources", drbd_debugfs_root);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	drbd_debugfs_resources = dentry;
+
+	dentry = debugfs_create_dir("minors", drbd_debugfs_root);
+	if (IS_ERR_OR_NULL(dentry))
+		goto fail;
+	drbd_debugfs_minors = dentry;
+	return 0;
+
+fail:
+	drbd_debugfs_cleanup();
+	if (dentry)
+		return PTR_ERR(dentry);
+	else
+		return -EINVAL;
+}
diff --git a/drivers/block/drbd/drbd_debugfs.h b/drivers/block/drbd/drbd_debugfs.h
new file mode 100644
index 0000000..8bee213
--- /dev/null
+++ b/drivers/block/drbd/drbd_debugfs.h
@@ -0,0 +1,39 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+
+#include "drbd_int.h"
+
+#ifdef CONFIG_DEBUG_FS
+int __init drbd_debugfs_init(void);
+void drbd_debugfs_cleanup(void);
+
+void drbd_debugfs_resource_add(struct drbd_resource *resource);
+void drbd_debugfs_resource_cleanup(struct drbd_resource *resource);
+
+void drbd_debugfs_connection_add(struct drbd_connection *connection);
+void drbd_debugfs_connection_cleanup(struct drbd_connection *connection);
+
+void drbd_debugfs_device_add(struct drbd_device *device);
+void drbd_debugfs_device_cleanup(struct drbd_device *device);
+
+void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device);
+void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device);
+#else
+
+static inline int __init drbd_debugfs_init(void) { return -ENODEV; }
+static inline void drbd_debugfs_cleanup(void) { }
+
+static inline void drbd_debugfs_resource_add(struct drbd_resource *resource) { }
+static inline void drbd_debugfs_resource_cleanup(struct drbd_resource *resource) { }
+
+static inline void drbd_debugfs_connection_add(struct drbd_connection *connection) { }
+static inline void drbd_debugfs_connection_cleanup(struct drbd_connection *connection) { }
+
+static inline void drbd_debugfs_device_add(struct drbd_device *device) { }
+static inline void drbd_debugfs_device_cleanup(struct drbd_device *device) { }
+
+static inline void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device) { }
+static inline void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device) { }
+
+#endif
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index a76ceb3..1a00001 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -317,7 +317,63 @@ struct drbd_request {
 
 	struct list_head tl_requests; /* ring list in the transfer log */
 	struct bio *master_bio;       /* master bio pointer */
-	unsigned long start_time;
+
+	/* see struct drbd_device */
+	struct list_head req_pending_master_completion;
+	struct list_head req_pending_local;
+
+	/* for generic IO accounting */
+	unsigned long start_jif;
+
+	/* for DRBD internal statistics */
+
+	/* Minimal set of time stamps to determine if we wait for activity log
+	 * transactions, local disk or peer.  32 bit "jiffies" are good enough,
+	 * we don't expect a DRBD request to be stalled for several month.
+	 */
+
+	/* before actual request processing */
+	unsigned long in_actlog_jif;
+
+	/* local disk */
+	unsigned long pre_submit_jif;
+
+	/* per connection */
+	unsigned long pre_send_jif;
+	unsigned long acked_jif;
+	unsigned long net_done_jif;
+
+	/* Possibly even more detail to track each phase:
+	 *  master_completion_jif
+	 *      how long did it take to complete the master bio
+	 *      (application visible latency)
+	 *  allocated_jif
+	 *      how long the master bio was blocked until we finally allocated
+	 *      a tracking struct
+	 *  in_actlog_jif
+	 *      how long did we wait for activity log transactions
+	 *
+	 *  net_queued_jif
+	 *      when did we finally queue it for sending
+	 *  pre_send_jif
+	 *      when did we start sending it
+	 *  post_send_jif
+	 *      how long did we block in the network stack trying to send it
+	 *  acked_jif
+	 *      when did we receive (or fake, in protocol A) a remote ACK
+	 *  net_done_jif
+	 *      when did we receive final acknowledgement (P_BARRIER_ACK),
+	 *      or decide, e.g. on connection loss, that we do no longer expect
+	 *      anything from this peer for this request.
+	 *
+	 *  pre_submit_jif
+	 *  post_sub_jif
+	 *      when did we start submiting to the lower level device,
+	 *      and how long did we block in that submit function
+	 *  local_completion_jif
+	 *      how long did it take the lower level device to complete this request
+	 */
+
 
 	/* once it hits 0, we may complete the master_bio */
 	atomic_t completion_ref;
@@ -366,6 +422,7 @@ struct drbd_peer_request {
 	struct drbd_interval i;
 	/* see comments on ee flag bits below */
 	unsigned long flags;
+	unsigned long submit_jif;
 	union {
 		u64 block_id;
 		struct digest_info *digest;
@@ -408,6 +465,17 @@ enum {
 
 	/* Is set when net_conf had two_primaries set while creating this peer_req */
 	__EE_IN_INTERVAL_TREE,
+
+	/* for debugfs: */
+	/* has this been submitted, or does it still wait for something else? */
+	__EE_SUBMITTED,
+
+	/* this is/was a write request */
+	__EE_WRITE,
+
+	/* this originates from application on peer
+	 * (not some resync or verify or other DRBD internal request) */
+	__EE_APPLICATION,
 };
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
@@ -419,6 +487,9 @@ enum {
 #define EE_RESTART_REQUESTS	(1<<__EE_RESTART_REQUESTS)
 #define EE_SEND_WRITE_ACK	(1<<__EE_SEND_WRITE_ACK)
 #define EE_IN_INTERVAL_TREE	(1<<__EE_IN_INTERVAL_TREE)
+#define EE_SUBMITTED		(1<<__EE_SUBMITTED)
+#define EE_WRITE		(1<<__EE_WRITE)
+#define EE_APPLICATION		(1<<__EE_APPLICATION)
 
 /* flag bits per device */
 enum {
@@ -433,11 +504,11 @@ enum {
 	CONSIDER_RESYNC,
 
 	MD_NO_FUA,		/* Users wants us to not use FUA/FLUSH on meta data dev */
+
 	SUSPEND_IO,		/* suspend application io */
 	BITMAP_IO,		/* suspend application io;
 				   once no more io in flight, start bitmap io */
 	BITMAP_IO_QUEUED,       /* Started bitmap IO */
-	GO_DISKLESS,		/* Disk is being detached, on io-error or admin request. */
 	WAS_IO_ERROR,		/* Local disk failed, returned IO error */
 	WAS_READ_ERROR,		/* Local disk READ failed (set additionally to the above) */
 	FORCE_DETACH,		/* Force-detach from local disk, aborting any pending local IO */
@@ -450,6 +521,20 @@ enum {
 	B_RS_H_DONE,		/* Before resync handler done (already executed) */
 	DISCARD_MY_DATA,	/* discard_my_data flag per volume */
 	READ_BALANCE_RR,
+
+	FLUSH_PENDING,		/* if set, device->flush_jif is when we submitted that flush
+				 * from drbd_flush_after_epoch() */
+
+	/* cleared only after backing device related structures have been destroyed. */
+	GOING_DISKLESS,		/* Disk is being detached, because of io-error, or admin request. */
+
+	/* to be used in drbd_device_post_work() */
+	GO_DISKLESS,		/* tell worker to schedule cleanup before detach */
+	DESTROY_DISK,		/* tell worker to close backing devices and destroy related structures. */
+	MD_SYNC,		/* tell worker to call drbd_md_sync() */
+	RS_START,		/* tell worker to start resync/OV */
+	RS_PROGRESS,		/* tell worker that resync made significant progress */
+	RS_DONE,		/* tell worker that resync is done */
 };
 
 struct drbd_bitmap; /* opaque for drbd_device */
@@ -531,6 +616,11 @@ struct drbd_backing_dev {
 };
 
 struct drbd_md_io {
+	struct page *page;
+	unsigned long start_jif;	/* last call to drbd_md_get_buffer */
+	unsigned long submit_jif;	/* last _drbd_md_sync_page_io() submit */
+	const char *current_use;
+	atomic_t in_use;
 	unsigned int done;
 	int error;
 };
@@ -577,10 +667,18 @@ enum {
 				 * and potentially deadlock on, this drbd worker.
 				 */
 	DISCONNECT_SENT,
+
+	DEVICE_WORK_PENDING,	/* tell worker that some device has pending work */
 };
 
 struct drbd_resource {
 	char *name;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_res;
+	struct dentry *debugfs_res_volumes;
+	struct dentry *debugfs_res_connections;
+	struct dentry *debugfs_res_in_flight_summary;
+#endif
 	struct kref kref;
 	struct idr devices;		/* volume number to device mapping */
 	struct list_head connections;
@@ -594,12 +692,28 @@ struct drbd_resource {
 	unsigned susp_nod:1;		/* IO suspended because no data */
 	unsigned susp_fen:1;		/* IO suspended because fence peer handler runs */
 
+	enum write_ordering_e write_ordering;
+
 	cpumask_var_t cpu_mask;
 };
 
+struct drbd_thread_timing_details
+{
+	unsigned long start_jif;
+	void *cb_addr;
+	const char *caller_fn;
+	unsigned int line;
+	unsigned int cb_nr;
+};
+
 struct drbd_connection {
 	struct list_head connections;
 	struct drbd_resource *resource;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_conn;
+	struct dentry *debugfs_conn_callback_history;
+	struct dentry *debugfs_conn_oldest_requests;
+#endif
 	struct kref kref;
 	struct idr peer_devices;	/* volume number to peer device mapping */
 	enum drbd_conns cstate;		/* Only C_STANDALONE to C_WF_REPORT_PARAMS */
@@ -636,7 +750,6 @@ struct drbd_connection {
 	struct drbd_epoch *current_epoch;
 	spinlock_t epoch_lock;
 	unsigned int epochs;
-	enum write_ordering_e write_ordering;
 	atomic_t current_tle_nr;	/* transfer log epoch number */
 	unsigned current_tle_writes;	/* writes seen within this tl epoch */
 
@@ -645,9 +758,22 @@ struct drbd_connection {
 	struct drbd_thread worker;
 	struct drbd_thread asender;
 
+	/* cached pointers,
+	 * so we can look up the oldest pending requests more quickly.
+	 * protected by resource->req_lock */
+	struct drbd_request *req_next; /* DRBD 9: todo.req_next */
+	struct drbd_request *req_ack_pending;
+	struct drbd_request *req_not_net_done;
+
 	/* sender side */
 	struct drbd_work_queue sender_work;
 
+#define DRBD_THREAD_DETAILS_HIST	16
+	unsigned int w_cb_nr; /* keeps counting up */
+	unsigned int r_cb_nr; /* keeps counting up */
+	struct drbd_thread_timing_details w_timing_details[DRBD_THREAD_DETAILS_HIST];
+	struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST];
+
 	struct {
 		/* whether this sender thread
 		 * has processed a single write yet. */
@@ -663,11 +789,22 @@ struct drbd_connection {
 	} send;
 };
 
+void __update_timing_details(
+		struct drbd_thread_timing_details *tdp,
+		unsigned int *cb_nr,
+		void *cb,
+		const char *fn, const unsigned int line);
+
+#define update_worker_timing_details(c, cb) \
+	__update_timing_details(c->w_timing_details, &c->w_cb_nr, cb, __func__ , __LINE__ )
+#define update_receiver_timing_details(c, cb) \
+	__update_timing_details(c->r_timing_details, &c->r_cb_nr, cb, __func__ , __LINE__ )
+
 struct submit_worker {
 	struct workqueue_struct *wq;
 	struct work_struct worker;
 
-	spinlock_t lock;
+	/* protected by ..->resource->req_lock */
 	struct list_head writes;
 };
 
@@ -675,12 +812,29 @@ struct drbd_peer_device {
 	struct list_head peer_devices;
 	struct drbd_device *device;
 	struct drbd_connection *connection;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_peer_dev;
+#endif
 };
 
 struct drbd_device {
 	struct drbd_resource *resource;
 	struct list_head peer_devices;
-	int vnr;			/* volume number within the connection */
+	struct list_head pending_bitmap_io;
+
+	unsigned long flush_jif;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_minor;
+	struct dentry *debugfs_vol;
+	struct dentry *debugfs_vol_oldest_requests;
+	struct dentry *debugfs_vol_act_log_extents;
+	struct dentry *debugfs_vol_resync_extents;
+	struct dentry *debugfs_vol_data_gen_id;
+#endif
+
+	unsigned int vnr;	/* volume number within the connection */
+	unsigned int minor;	/* device minor number */
+
 	struct kref kref;
 
 	/* things that are stored as / read from meta data on disk */
@@ -697,19 +851,10 @@ struct drbd_device {
 	unsigned long last_reattach_jif;
 	struct drbd_work resync_work;
 	struct drbd_work unplug_work;
-	struct drbd_work go_diskless;
-	struct drbd_work md_sync_work;
-	struct drbd_work start_resync_work;
 	struct timer_list resync_timer;
 	struct timer_list md_sync_timer;
 	struct timer_list start_resync_timer;
 	struct timer_list request_timer;
-#ifdef DRBD_DEBUG_MD_SYNC
-	struct {
-		unsigned int line;
-		const char* func;
-	} last_md_mark_dirty;
-#endif
 
 	/* Used after attach while negotiating new disk state. */
 	union drbd_state new_state_tmp;
@@ -724,6 +869,7 @@ struct drbd_device {
 	unsigned int al_writ_cnt;
 	unsigned int bm_writ_cnt;
 	atomic_t ap_bio_cnt;	 /* Requests we need to complete */
+	atomic_t ap_actlog_cnt;  /* Requests waiting for activity log */
 	atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
 	atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
 	atomic_t unacked_cnt;	 /* Need to send replies for */
@@ -733,6 +879,13 @@ struct drbd_device {
 	struct rb_root read_requests;
 	struct rb_root write_requests;
 
+	/* for statistics and timeouts */
+	/* [0] read, [1] write */
+	struct list_head pending_master_completion[2];
+	struct list_head pending_completion[2];
+
+	/* use checksums for *this* resync */
+	bool use_csums;
 	/* blocks to resync in this run [unit BM_BLOCK_SIZE] */
 	unsigned long rs_total;
 	/* number of resync blocks that failed in this run */
@@ -788,9 +941,7 @@ struct drbd_device {
 	atomic_t pp_in_use;		/* allocated from page pool */
 	atomic_t pp_in_use_by_net;	/* sendpage()d, still referenced by tcp */
 	wait_queue_head_t ee_wait;
-	struct page *md_io_page;	/* one page buffer for md_io */
 	struct drbd_md_io md_io;
-	atomic_t md_io_in_use;		/* protects the md_io, md_io_page and md_io_tmpp */
 	spinlock_t al_lock;
 	wait_queue_head_t al_wait;
 	struct lru_cache *act_log;	/* activity log */
@@ -800,7 +951,6 @@ struct drbd_device {
 	atomic_t packet_seq;
 	unsigned int peer_seq;
 	spinlock_t peer_seq_lock;
-	unsigned int minor;
 	unsigned long comm_bm_set; /* communicated number of set bits. */
 	struct bm_io_work bm_io_work;
 	u64 ed_uuid; /* UUID of the exposed data */
@@ -824,6 +974,21 @@ struct drbd_device {
 	struct submit_worker submit;
 };
 
+struct drbd_bm_aio_ctx {
+	struct drbd_device *device;
+	struct list_head list; /* on device->pending_bitmap_io */;
+	unsigned long start_jif;
+	atomic_t in_flight;
+	unsigned int done;
+	unsigned flags;
+#define BM_AIO_COPY_PAGES	1
+#define BM_AIO_WRITE_HINTED	2
+#define BM_AIO_WRITE_ALL_PAGES	4
+#define BM_AIO_READ		8
+	int error;
+	struct kref kref;
+};
+
 struct drbd_config_context {
 	/* assigned from drbd_genlmsghdr */
 	unsigned int minor;
@@ -949,7 +1114,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int
 extern int drbd_send_bitmap(struct drbd_device *device);
 extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
 extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
-extern void drbd_free_bc(struct drbd_backing_dev *ldev);
+extern void drbd_free_ldev(struct drbd_backing_dev *ldev);
 extern void drbd_device_cleanup(struct drbd_device *device);
 void drbd_print_uuids(struct drbd_device *device, const char *text);
 
@@ -966,13 +1131,7 @@ extern void __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must
 extern void drbd_md_set_flag(struct drbd_device *device, int flags) __must_hold(local);
 extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold(local);
 extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
-#ifndef DRBD_DEBUG_MD_SYNC
 extern void drbd_md_mark_dirty(struct drbd_device *device);
-#else
-#define drbd_md_mark_dirty(m)	drbd_md_mark_dirty_(m, __LINE__ , __func__ )
-extern void drbd_md_mark_dirty_(struct drbd_device *device,
-		unsigned int line, const char *func);
-#endif
 extern void drbd_queue_bitmap_io(struct drbd_device *device,
 				 int (*io_fn)(struct drbd_device *),
 				 void (*done)(struct drbd_device *, int),
@@ -983,9 +1142,8 @@ extern int drbd_bitmap_io(struct drbd_device *device,
 extern int drbd_bitmap_io_from_worker(struct drbd_device *device,
 		int (*io_fn)(struct drbd_device *),
 		char *why, enum bm_flag flags);
-extern int drbd_bmio_set_n_write(struct drbd_device *device);
-extern int drbd_bmio_clear_n_write(struct drbd_device *device);
-extern void drbd_ldev_destroy(struct drbd_device *device);
+extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local);
+extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local);
 
 /* Meta data layout
  *
@@ -1105,17 +1263,21 @@ struct bm_extent {
 /* in which _bitmap_ extent (resp. sector) the bit for a certain
  * _storage_ sector is located in */
 #define BM_SECT_TO_EXT(x)   ((x)>>(BM_EXT_SHIFT-9))
+#define BM_BIT_TO_EXT(x)    ((x) >> (BM_EXT_SHIFT - BM_BLOCK_SHIFT))
 
-/* how much _storage_ sectors we have per bitmap sector */
+/* first storage sector a bitmap extent corresponds to */
 #define BM_EXT_TO_SECT(x)   ((sector_t)(x) << (BM_EXT_SHIFT-9))
+/* how much _storage_ sectors we have per bitmap extent */
 #define BM_SECT_PER_EXT     BM_EXT_TO_SECT(1)
+/* how many bits are covered by one bitmap extent (resync extent) */
+#define BM_BITS_PER_EXT     (1UL << (BM_EXT_SHIFT - BM_BLOCK_SHIFT))
+
+#define BM_BLOCKS_PER_BM_EXT_MASK  (BM_BITS_PER_EXT - 1)
+
 
 /* in one sector of the bitmap, we have this many activity_log extents. */
 #define AL_EXT_PER_BM_SECT  (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT))
 
-#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT)
-#define BM_BLOCKS_PER_BM_EXT_MASK  ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1)
-
 /* the extent in "PER_EXTENT" below is an activity log extent
  * we need that many (long words/bytes) to store the bitmap
  *		     of one AL_EXTENT_SIZE chunk of storage.
@@ -1195,11 +1357,11 @@ extern void _drbd_bm_set_bits(struct drbd_device *device,
 		const unsigned long s, const unsigned long e);
 extern int  drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr);
 extern int  drbd_bm_e_weight(struct drbd_device *device, unsigned long enr);
-extern int  drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local);
 extern int  drbd_bm_read(struct drbd_device *device) __must_hold(local);
 extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr);
 extern int  drbd_bm_write(struct drbd_device *device) __must_hold(local);
 extern int  drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local);
+extern int  drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local);
 extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local);
 extern int  drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local);
 extern size_t	     drbd_bm_words(struct drbd_device *device);
@@ -1213,7 +1375,6 @@ extern unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned lon
 extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo);
 extern unsigned long _drbd_bm_total_weight(struct drbd_device *device);
 extern unsigned long drbd_bm_total_weight(struct drbd_device *device);
-extern int drbd_bm_rs_done(struct drbd_device *device);
 /* for receive_bitmap */
 extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset,
 		size_t number, unsigned long *buffer);
@@ -1312,7 +1473,7 @@ enum determine_dev_size {
 extern enum determine_dev_size
 drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_device *);
-extern void drbd_reconsider_max_bio_size(struct drbd_device *device);
+extern void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_device *device,
 					enum drbd_role new_role,
 					int force);
@@ -1333,7 +1494,7 @@ extern void resume_next_sg(struct drbd_device *device);
 extern void suspend_other_sg(struct drbd_device *device);
 extern int drbd_resync_finished(struct drbd_device *device);
 /* maybe rather drbd_main.c ? */
-extern void *drbd_md_get_buffer(struct drbd_device *device);
+extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent);
 extern void drbd_md_put_buffer(struct drbd_device *device);
 extern int drbd_md_sync_page_io(struct drbd_device *device,
 		struct drbd_backing_dev *bdev, sector_t sector, int rw);
@@ -1380,7 +1541,8 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
 extern int drbd_receiver(struct drbd_thread *thi);
 extern int drbd_asender(struct drbd_thread *thi);
 extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
-extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector);
+extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
+		bool throttle_if_app_is_waiting);
 extern int drbd_submit_peer_request(struct drbd_device *,
 				    struct drbd_peer_request *, const unsigned,
 				    const int);
@@ -1464,10 +1626,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
 {
 	__release(local);
 	if (!bio->bi_bdev) {
-		printk(KERN_ERR "drbd%d: drbd_generic_make_request: "
-				"bio->bi_bdev == NULL\n",
-		       device_to_minor(device));
-		dump_stack();
+		drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
 		bio_endio(bio, -ENODEV);
 		return;
 	}
@@ -1478,7 +1637,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
 		generic_make_request(bio);
 }
 
-void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo);
+void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
+			      enum write_ordering_e wo);
 
 /* drbd_proc.c */
 extern struct proc_dir_entry *drbd_proc;
@@ -1489,9 +1649,9 @@ extern const char *drbd_role_str(enum drbd_role s);
 /* drbd_actlog.c */
 extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
 extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i);
-extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate);
+extern void drbd_al_begin_io_commit(struct drbd_device *device);
 extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i);
-extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate);
+extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i);
 extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i);
 extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector);
 extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector);
@@ -1501,14 +1661,17 @@ extern int drbd_rs_del_all(struct drbd_device *device);
 extern void drbd_rs_failed_io(struct drbd_device *device,
 		sector_t sector, int size);
 extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go);
-extern void __drbd_set_in_sync(struct drbd_device *device, sector_t sector,
-		int size, const char *file, const unsigned int line);
+
+enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC };
+extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
+		enum update_sync_bits_mode mode,
+		const char *file, const unsigned int line);
 #define drbd_set_in_sync(device, sector, size) \
-	__drbd_set_in_sync(device, sector, size, __FILE__, __LINE__)
-extern int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector,
-		int size, const char *file, const unsigned int line);
+	__drbd_change_sync(device, sector, size, SET_IN_SYNC, __FILE__, __LINE__)
 #define drbd_set_out_of_sync(device, sector, size) \
-	__drbd_set_out_of_sync(device, sector, size, __FILE__, __LINE__)
+	__drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC, __FILE__, __LINE__)
+#define drbd_rs_failed_io(device, sector, size) \
+	__drbd_change_sync(device, sector, size, RECORD_RS_FAILED, __FILE__, __LINE__)
 extern void drbd_al_shrink(struct drbd_device *device);
 extern int drbd_initialize_al(struct drbd_device *, void *);
 
@@ -1764,25 +1927,38 @@ static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev)
 }
 
 static inline void
-drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
+drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&q->q_lock, flags);
-	list_add(&w->list, &q->q);
+	list_add_tail(&w->list, &q->q);
 	spin_unlock_irqrestore(&q->q_lock, flags);
 	wake_up(&q->q_wait);
 }
 
 static inline void
-drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
+drbd_queue_work_if_unqueued(struct drbd_work_queue *q, struct drbd_work *w)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&q->q_lock, flags);
-	list_add_tail(&w->list, &q->q);
+	if (list_empty_careful(&w->list))
+		list_add_tail(&w->list, &q->q);
 	spin_unlock_irqrestore(&q->q_lock, flags);
 	wake_up(&q->q_wait);
 }
 
+static inline void
+drbd_device_post_work(struct drbd_device *device, int work_bit)
+{
+	if (!test_and_set_bit(work_bit, &device->flags)) {
+		struct drbd_connection *connection =
+			first_peer_device(device)->connection;
+		struct drbd_work_queue *q = &connection->sender_work;
+		if (!test_and_set_bit(DEVICE_WORK_PENDING, &connection->flags))
+			wake_up(&q->q_wait);
+	}
+}
+
 extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue);
 
 static inline void wake_asender(struct drbd_connection *connection)
@@ -1859,7 +2035,7 @@ static inline void inc_ap_pending(struct drbd_device *device)
 			func, line,					\
 			atomic_read(&device->which))
 
-#define dec_ap_pending(device) _dec_ap_pending(device, __FUNCTION__, __LINE__)
+#define dec_ap_pending(device) _dec_ap_pending(device, __func__, __LINE__)
 static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line)
 {
 	if (atomic_dec_and_test(&device->ap_pending_cnt))
@@ -1878,7 +2054,7 @@ static inline void inc_rs_pending(struct drbd_device *device)
 	atomic_inc(&device->rs_pending_cnt);
 }
 
-#define dec_rs_pending(device) _dec_rs_pending(device, __FUNCTION__, __LINE__)
+#define dec_rs_pending(device) _dec_rs_pending(device, __func__, __LINE__)
 static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line)
 {
 	atomic_dec(&device->rs_pending_cnt);
@@ -1899,20 +2075,29 @@ static inline void inc_unacked(struct drbd_device *device)
 	atomic_inc(&device->unacked_cnt);
 }
 
-#define dec_unacked(device) _dec_unacked(device, __FUNCTION__, __LINE__)
+#define dec_unacked(device) _dec_unacked(device, __func__, __LINE__)
 static inline void _dec_unacked(struct drbd_device *device, const char *func, int line)
 {
 	atomic_dec(&device->unacked_cnt);
 	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
 }
 
-#define sub_unacked(device, n) _sub_unacked(device, n, __FUNCTION__, __LINE__)
+#define sub_unacked(device, n) _sub_unacked(device, n, __func__, __LINE__)
 static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line)
 {
 	atomic_sub(n, &device->unacked_cnt);
 	ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
 }
 
+static inline bool is_sync_state(enum drbd_conns connection_state)
+{
+	return
+	   (connection_state == C_SYNC_SOURCE
+	||  connection_state == C_SYNC_TARGET
+	||  connection_state == C_PAUSED_SYNC_S
+	||  connection_state == C_PAUSED_SYNC_T);
+}
+
 /**
  * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev
  * @M:		DRBD device.
@@ -1924,6 +2109,11 @@ static inline void _sub_unacked(struct drbd_device *device, int n, const char *f
 
 static inline void put_ldev(struct drbd_device *device)
 {
+	enum drbd_disk_state ds = device->state.disk;
+	/* We must check the state *before* the atomic_dec becomes visible,
+	 * or we have a theoretical race where someone hitting zero,
+	 * while state still D_FAILED, will then see D_DISKLESS in the
+	 * condition below and calling into destroy, where he must not, yet. */
 	int i = atomic_dec_return(&device->local_cnt);
 
 	/* This may be called from some endio handler,
@@ -1932,15 +2122,13 @@ static inline void put_ldev(struct drbd_device *device)
 	__release(local);
 	D_ASSERT(device, i >= 0);
 	if (i == 0) {
-		if (device->state.disk == D_DISKLESS)
+		if (ds == D_DISKLESS)
 			/* even internal references gone, safe to destroy */
-			drbd_ldev_destroy(device);
-		if (device->state.disk == D_FAILED) {
+			drbd_device_post_work(device, DESTROY_DISK);
+		if (ds == D_FAILED)
 			/* all application IO references gone. */
-			if (!test_and_set_bit(GO_DISKLESS, &device->flags))
-				drbd_queue_work(&first_peer_device(device)->connection->sender_work,
-						&device->go_diskless);
-		}
+			if (!test_and_set_bit(GOING_DISKLESS, &device->flags))
+				drbd_device_post_work(device, GO_DISKLESS);
 		wake_up(&device->misc_wait);
 	}
 }
@@ -1964,54 +2152,6 @@ static inline int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_
 extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins);
 #endif
 
-/* you must have an "get_ldev" reference */
-static inline void drbd_get_syncer_progress(struct drbd_device *device,
-		unsigned long *bits_left, unsigned int *per_mil_done)
-{
-	/* this is to break it at compile time when we change that, in case we
-	 * want to support more than (1<<32) bits on a 32bit arch. */
-	typecheck(unsigned long, device->rs_total);
-
-	/* note: both rs_total and rs_left are in bits, i.e. in
-	 * units of BM_BLOCK_SIZE.
-	 * for the percentage, we don't care. */
-
-	if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
-		*bits_left = device->ov_left;
-	else
-		*bits_left = drbd_bm_total_weight(device) - device->rs_failed;
-	/* >> 10 to prevent overflow,
-	 * +1 to prevent division by zero */
-	if (*bits_left > device->rs_total) {
-		/* doh. maybe a logic bug somewhere.
-		 * may also be just a race condition
-		 * between this and a disconnect during sync.
-		 * for now, just prevent in-kernel buffer overflow.
-		 */
-		smp_rmb();
-		drbd_warn(device, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n",
-				drbd_conn_str(device->state.conn),
-				*bits_left, device->rs_total, device->rs_failed);
-		*per_mil_done = 0;
-	} else {
-		/* Make sure the division happens in long context.
-		 * We allow up to one petabyte storage right now,
-		 * at a granularity of 4k per bit that is 2**38 bits.
-		 * After shift right and multiplication by 1000,
-		 * this should still fit easily into a 32bit long,
-		 * so we don't need a 64bit division on 32bit arch.
-		 * Note: currently we don't support such large bitmaps on 32bit
-		 * arch anyways, but no harm done to be prepared for it here.
-		 */
-		unsigned int shift = device->rs_total > UINT_MAX ? 16 : 10;
-		unsigned long left = *bits_left >> shift;
-		unsigned long total = 1UL + (device->rs_total >> shift);
-		unsigned long tmp = 1000UL - left * 1000UL/total;
-		*per_mil_done = tmp;
-	}
-}
-
-
 /* this throttles on-the-fly application requests
  * according to max_buffers settings;
  * maybe re-implement using semaphores? */
@@ -2201,25 +2341,6 @@ static inline int drbd_queue_order_type(struct drbd_device *device)
 	return QUEUE_ORDERED_NONE;
 }
 
-static inline void drbd_md_flush(struct drbd_device *device)
-{
-	int r;
-
-	if (device->ldev == NULL) {
-		drbd_warn(device, "device->ldev == NULL in drbd_md_flush\n");
-		return;
-	}
-
-	if (test_bit(MD_NO_FUA, &device->flags))
-		return;
-
-	r = blkdev_issue_flush(device->ldev->md_bdev, GFP_NOIO, NULL);
-	if (r) {
-		set_bit(MD_NO_FUA, &device->flags);
-		drbd_err(device, "meta data flush failed with status %d, disabling md-flushes\n", r);
-	}
-}
-
 static inline struct drbd_connection *first_connection(struct drbd_resource *resource)
 {
 	return list_first_entry_or_null(&resource->connections,
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h
index f38fcb0..f210543 100644
--- a/drivers/block/drbd/drbd_interval.h
+++ b/drivers/block/drbd/drbd_interval.h
@@ -10,7 +10,9 @@ struct drbd_interval {
 	unsigned int size;	/* size in bytes */
 	sector_t end;		/* highest interval end in subtree */
 	int local:1		/* local or remote request? */;
-	int waiting:1;
+	int waiting:1;		/* someone is waiting for this to complete */
+	int completed:1;	/* this has been completed already;
+				 * ignore for conflict detection */
 };
 
 static inline void drbd_clear_interval(struct drbd_interval *i)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 960645c..9b465bb 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -26,7 +26,10 @@
 
  */
 
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
+#include <linux/jiffies.h>
 #include <linux/drbd.h>
 #include <asm/uaccess.h>
 #include <asm/types.h>
@@ -54,16 +57,14 @@
 #include "drbd_int.h"
 #include "drbd_protocol.h"
 #include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
-
 #include "drbd_vli.h"
+#include "drbd_debugfs.h"
 
 static DEFINE_MUTEX(drbd_main_mutex);
 static int drbd_open(struct block_device *bdev, fmode_t mode);
 static void drbd_release(struct gendisk *gd, fmode_t mode);
-static int w_md_sync(struct drbd_work *w, int unused);
 static void md_sync_timer_fn(unsigned long data);
 static int w_bitmap_io(struct drbd_work *w, int unused);
-static int w_go_diskless(struct drbd_work *w, int unused);
 
 MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
 	      "Lars Ellenberg <lars@linbit.com>");
@@ -264,7 +265,7 @@ bail:
 
 /**
  * _tl_restart() - Walks the transfer log, and applies an action to all requests
- * @device:	DRBD device.
+ * @connection:	DRBD connection to operate on.
  * @what:       The action/event to perform with all request objects
  *
  * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
@@ -662,6 +663,11 @@ static int __send_command(struct drbd_connection *connection, int vnr,
 			    msg_flags);
 	if (data && !err)
 		err = drbd_send_all(connection, sock->socket, data, size, 0);
+	/* DRBD protocol "pings" are latency critical.
+	 * This is supposed to trigger tcp_push_pending_frames() */
+	if (!err && (cmd == P_PING || cmd == P_PING_ACK))
+		drbd_tcp_nodelay(sock->socket);
+
 	return err;
 }
 
@@ -1636,7 +1642,10 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
 	if (peer_device->connection->agreed_pro_version >= 100) {
 		if (req->rq_state & RQ_EXP_RECEIVE_ACK)
 			dp_flags |= DP_SEND_RECEIVE_ACK;
-		if (req->rq_state & RQ_EXP_WRITE_ACK)
+		/* During resync, request an explicit write ack,
+		 * even in protocol != C */
+		if (req->rq_state & RQ_EXP_WRITE_ACK
+		|| (dp_flags & DP_MAY_SET_IN_SYNC))
 			dp_flags |= DP_SEND_WRITE_ACK;
 	}
 	p->dp_flags = cpu_to_be32(dp_flags);
@@ -1900,6 +1909,7 @@ void drbd_init_set_defaults(struct drbd_device *device)
 	drbd_set_defaults(device);
 
 	atomic_set(&device->ap_bio_cnt, 0);
+	atomic_set(&device->ap_actlog_cnt, 0);
 	atomic_set(&device->ap_pending_cnt, 0);
 	atomic_set(&device->rs_pending_cnt, 0);
 	atomic_set(&device->unacked_cnt, 0);
@@ -1908,7 +1918,7 @@ void drbd_init_set_defaults(struct drbd_device *device)
 	atomic_set(&device->rs_sect_in, 0);
 	atomic_set(&device->rs_sect_ev, 0);
 	atomic_set(&device->ap_in_flight, 0);
-	atomic_set(&device->md_io_in_use, 0);
+	atomic_set(&device->md_io.in_use, 0);
 
 	mutex_init(&device->own_state_mutex);
 	device->state_mutex = &device->own_state_mutex;
@@ -1924,17 +1934,15 @@ void drbd_init_set_defaults(struct drbd_device *device)
 	INIT_LIST_HEAD(&device->resync_reads);
 	INIT_LIST_HEAD(&device->resync_work.list);
 	INIT_LIST_HEAD(&device->unplug_work.list);
-	INIT_LIST_HEAD(&device->go_diskless.list);
-	INIT_LIST_HEAD(&device->md_sync_work.list);
-	INIT_LIST_HEAD(&device->start_resync_work.list);
 	INIT_LIST_HEAD(&device->bm_io_work.w.list);
+	INIT_LIST_HEAD(&device->pending_master_completion[0]);
+	INIT_LIST_HEAD(&device->pending_master_completion[1]);
+	INIT_LIST_HEAD(&device->pending_completion[0]);
+	INIT_LIST_HEAD(&device->pending_completion[1]);
 
 	device->resync_work.cb  = w_resync_timer;
 	device->unplug_work.cb  = w_send_write_hint;
-	device->go_diskless.cb  = w_go_diskless;
-	device->md_sync_work.cb = w_md_sync;
 	device->bm_io_work.w.cb = w_bitmap_io;
-	device->start_resync_work.cb = w_start_resync;
 
 	init_timer(&device->resync_timer);
 	init_timer(&device->md_sync_timer);
@@ -1992,7 +2000,7 @@ void drbd_device_cleanup(struct drbd_device *device)
 		drbd_bm_cleanup(device);
 	}
 
-	drbd_free_bc(device->ldev);
+	drbd_free_ldev(device->ldev);
 	device->ldev = NULL;
 
 	clear_bit(AL_SUSPENDED, &device->flags);
@@ -2006,7 +2014,6 @@ void drbd_device_cleanup(struct drbd_device *device)
 	D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q));
 	D_ASSERT(device, list_empty(&device->resync_work.list));
 	D_ASSERT(device, list_empty(&device->unplug_work.list));
-	D_ASSERT(device, list_empty(&device->go_diskless.list));
 
 	drbd_set_defaults(device);
 }
@@ -2129,20 +2136,6 @@ Enomem:
 	return -ENOMEM;
 }
 
-static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
-	void *unused)
-{
-	/* just so we have it.  you never know what interesting things we
-	 * might want to do here some day...
-	 */
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block drbd_notifier = {
-	.notifier_call = drbd_notify_sys,
-};
-
 static void drbd_release_all_peer_reqs(struct drbd_device *device)
 {
 	int rr;
@@ -2173,7 +2166,7 @@ void drbd_destroy_device(struct kref *kref)
 {
 	struct drbd_device *device = container_of(kref, struct drbd_device, kref);
 	struct drbd_resource *resource = device->resource;
-	struct drbd_connection *connection;
+	struct drbd_peer_device *peer_device, *tmp_peer_device;
 
 	del_timer_sync(&device->request_timer);
 
@@ -2187,7 +2180,7 @@ void drbd_destroy_device(struct kref *kref)
 	if (device->this_bdev)
 		bdput(device->this_bdev);
 
-	drbd_free_bc(device->ldev);
+	drbd_free_ldev(device->ldev);
 	device->ldev = NULL;
 
 	drbd_release_all_peer_reqs(device);
@@ -2200,15 +2193,20 @@ void drbd_destroy_device(struct kref *kref)
 
 	if (device->bitmap) /* should no longer be there. */
 		drbd_bm_cleanup(device);
-	__free_page(device->md_io_page);
+	__free_page(device->md_io.page);
 	put_disk(device->vdisk);
 	blk_cleanup_queue(device->rq_queue);
 	kfree(device->rs_plan_s);
-	kfree(first_peer_device(device));
-	kfree(device);
 
-	for_each_connection(connection, resource)
-		kref_put(&connection->kref, drbd_destroy_connection);
+	/* not for_each_connection(connection, resource):
+	 * those may have been cleaned up and disassociated already.
+	 */
+	for_each_peer_device_safe(peer_device, tmp_peer_device, device) {
+		kref_put(&peer_device->connection->kref, drbd_destroy_connection);
+		kfree(peer_device);
+	}
+	memset(device, 0xfd, sizeof(*device));
+	kfree(device);
 	kref_put(&resource->kref, drbd_destroy_resource);
 }
 
@@ -2236,7 +2234,7 @@ static void do_retry(struct work_struct *ws)
 	list_for_each_entry_safe(req, tmp, &writes, tl_requests) {
 		struct drbd_device *device = req->device;
 		struct bio *bio = req->master_bio;
-		unsigned long start_time = req->start_time;
+		unsigned long start_jif = req->start_jif;
 		bool expected;
 
 		expected =
@@ -2271,10 +2269,12 @@ static void do_retry(struct work_struct *ws)
 		/* We are not just doing generic_make_request(),
 		 * as we want to keep the start_time information. */
 		inc_ap_bio(device);
-		__drbd_make_request(device, bio, start_time);
+		__drbd_make_request(device, bio, start_jif);
 	}
 }
 
+/* called via drbd_req_put_completion_ref(),
+ * holds resource->req_lock */
 void drbd_restart_request(struct drbd_request *req)
 {
 	unsigned long flags;
@@ -2298,6 +2298,7 @@ void drbd_destroy_resource(struct kref *kref)
 	idr_destroy(&resource->devices);
 	free_cpumask_var(resource->cpu_mask);
 	kfree(resource->name);
+	memset(resource, 0xf2, sizeof(*resource));
 	kfree(resource);
 }
 
@@ -2307,8 +2308,10 @@ void drbd_free_resource(struct drbd_resource *resource)
 
 	for_each_connection_safe(connection, tmp, resource) {
 		list_del(&connection->connections);
+		drbd_debugfs_connection_cleanup(connection);
 		kref_put(&connection->kref, drbd_destroy_connection);
 	}
+	drbd_debugfs_resource_cleanup(resource);
 	kref_put(&resource->kref, drbd_destroy_resource);
 }
 
@@ -2318,8 +2321,6 @@ static void drbd_cleanup(void)
 	struct drbd_device *device;
 	struct drbd_resource *resource, *tmp;
 
-	unregister_reboot_notifier(&drbd_notifier);
-
 	/* first remove proc,
 	 * drbdsetup uses it's presence to detect
 	 * whether DRBD is loaded.
@@ -2335,6 +2336,7 @@ static void drbd_cleanup(void)
 		destroy_workqueue(retry.wq);
 
 	drbd_genl_unregister();
+	drbd_debugfs_cleanup();
 
 	idr_for_each_entry(&drbd_devices, device, i)
 		drbd_delete_device(device);
@@ -2350,7 +2352,7 @@ static void drbd_cleanup(void)
 
 	idr_destroy(&drbd_devices);
 
-	printk(KERN_INFO "drbd: module cleanup done.\n");
+	pr_info("module cleanup done.\n");
 }
 
 /**
@@ -2539,6 +2541,20 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op
 	if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) {
 		err = bitmap_parse(res_opts->cpu_mask, DRBD_CPU_MASK_SIZE,
 				   cpumask_bits(new_cpu_mask), nr_cpu_ids);
+		if (err == -EOVERFLOW) {
+			/* So what. mask it out. */
+			cpumask_var_t tmp_cpu_mask;
+			if (zalloc_cpumask_var(&tmp_cpu_mask, GFP_KERNEL)) {
+				cpumask_setall(tmp_cpu_mask);
+				cpumask_and(new_cpu_mask, new_cpu_mask, tmp_cpu_mask);
+				drbd_warn(resource, "Overflow in bitmap_parse(%.12s%s), truncating to %u bits\n",
+					res_opts->cpu_mask,
+					strlen(res_opts->cpu_mask) > 12 ? "..." : "",
+					nr_cpu_ids);
+				free_cpumask_var(tmp_cpu_mask);
+				err = 0;
+			}
+		}
 		if (err) {
 			drbd_warn(resource, "bitmap_parse() failed with %d\n", err);
 			/* retcode = ERR_CPU_MASK_PARSE; */
@@ -2579,10 +2595,12 @@ struct drbd_resource *drbd_create_resource(const char *name)
 	kref_init(&resource->kref);
 	idr_init(&resource->devices);
 	INIT_LIST_HEAD(&resource->connections);
+	resource->write_ordering = WO_bdev_flush;
 	list_add_tail_rcu(&resource->resources, &drbd_resources);
 	mutex_init(&resource->conf_update);
 	mutex_init(&resource->adm_mutex);
 	spin_lock_init(&resource->req_lock);
+	drbd_debugfs_resource_add(resource);
 	return resource;
 
 fail_free_name:
@@ -2593,7 +2611,7 @@ fail:
 	return NULL;
 }
 
-/* caller must be under genl_lock() */
+/* caller must be under adm_mutex */
 struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
 {
 	struct drbd_resource *resource;
@@ -2617,7 +2635,6 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
 	INIT_LIST_HEAD(&connection->current_epoch->list);
 	connection->epochs = 1;
 	spin_lock_init(&connection->epoch_lock);
-	connection->write_ordering = WO_bdev_flush;
 
 	connection->send.seen_any_write_yet = false;
 	connection->send.current_epoch_nr = 0;
@@ -2652,6 +2669,7 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
 
 	kref_get(&resource->kref);
 	list_add_tail_rcu(&connection->connections, &resource->connections);
+	drbd_debugfs_connection_add(connection);
 	return connection;
 
 fail_resource:
@@ -2680,6 +2698,7 @@ void drbd_destroy_connection(struct kref *kref)
 	drbd_free_socket(&connection->data);
 	kfree(connection->int_dig_in);
 	kfree(connection->int_dig_vv);
+	memset(connection, 0xfc, sizeof(*connection));
 	kfree(connection);
 	kref_put(&resource->kref, drbd_destroy_resource);
 }
@@ -2694,7 +2713,6 @@ static int init_submitter(struct drbd_device *device)
 		return -ENOMEM;
 
 	INIT_WORK(&device->submit.worker, do_submit);
-	spin_lock_init(&device->submit.lock);
 	INIT_LIST_HEAD(&device->submit.writes);
 	return 0;
 }
@@ -2764,8 +2782,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 	blk_queue_merge_bvec(q, drbd_merge_bvec);
 	q->queue_lock = &resource->req_lock;
 
-	device->md_io_page = alloc_page(GFP_KERNEL);
-	if (!device->md_io_page)
+	device->md_io.page = alloc_page(GFP_KERNEL);
+	if (!device->md_io.page)
 		goto out_no_io_page;
 
 	if (drbd_bm_init(device))
@@ -2794,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 	kref_get(&device->kref);
 
 	INIT_LIST_HEAD(&device->peer_devices);
+	INIT_LIST_HEAD(&device->pending_bitmap_io);
 	for_each_connection(connection, resource) {
 		peer_device = kzalloc(sizeof(struct drbd_peer_device), GFP_KERNEL);
 		if (!peer_device)
@@ -2829,7 +2848,10 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 		for_each_peer_device(peer_device, device)
 			drbd_connected(peer_device);
 	}
-
+	/* move to create_peer_device() */
+	for_each_peer_device(peer_device, device)
+		drbd_debugfs_peer_device_add(peer_device);
+	drbd_debugfs_device_add(device);
 	return NO_ERROR;
 
 out_idr_remove_vol:
@@ -2853,7 +2875,7 @@ out_idr_remove_minor:
 out_no_minor_idr:
 	drbd_bm_cleanup(device);
 out_no_bitmap:
-	__free_page(device->md_io_page);
+	__free_page(device->md_io.page);
 out_no_io_page:
 	put_disk(disk);
 out_no_disk:
@@ -2868,8 +2890,13 @@ void drbd_delete_device(struct drbd_device *device)
 {
 	struct drbd_resource *resource = device->resource;
 	struct drbd_connection *connection;
+	struct drbd_peer_device *peer_device;
 	int refs = 3;
 
+	/* move to free_peer_device() */
+	for_each_peer_device(peer_device, device)
+		drbd_debugfs_peer_device_cleanup(peer_device);
+	drbd_debugfs_device_cleanup(device);
 	for_each_connection(connection, resource) {
 		idr_remove(&connection->peer_devices, device->vnr);
 		refs++;
@@ -2881,13 +2908,12 @@ void drbd_delete_device(struct drbd_device *device)
 	kref_sub(&device->kref, refs, drbd_destroy_device);
 }
 
-int __init drbd_init(void)
+static int __init drbd_init(void)
 {
 	int err;
 
 	if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
-		printk(KERN_ERR
-		       "drbd: invalid minor_count (%d)\n", minor_count);
+		pr_err("invalid minor_count (%d)\n", minor_count);
 #ifdef MODULE
 		return -EINVAL;
 #else
@@ -2897,14 +2923,11 @@ int __init drbd_init(void)
 
 	err = register_blkdev(DRBD_MAJOR, "drbd");
 	if (err) {
-		printk(KERN_ERR
-		       "drbd: unable to register block device major %d\n",
+		pr_err("unable to register block device major %d\n",
 		       DRBD_MAJOR);
 		return err;
 	}
 
-	register_reboot_notifier(&drbd_notifier);
-
 	/*
 	 * allocate all necessary structs
 	 */
@@ -2918,7 +2941,7 @@ int __init drbd_init(void)
 
 	err = drbd_genl_register();
 	if (err) {
-		printk(KERN_ERR "drbd: unable to register generic netlink family\n");
+		pr_err("unable to register generic netlink family\n");
 		goto fail;
 	}
 
@@ -2929,38 +2952,39 @@ int __init drbd_init(void)
 	err = -ENOMEM;
 	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
 	if (!drbd_proc)	{
-		printk(KERN_ERR "drbd: unable to register proc file\n");
+		pr_err("unable to register proc file\n");
 		goto fail;
 	}
 
 	retry.wq = create_singlethread_workqueue("drbd-reissue");
 	if (!retry.wq) {
-		printk(KERN_ERR "drbd: unable to create retry workqueue\n");
+		pr_err("unable to create retry workqueue\n");
 		goto fail;
 	}
 	INIT_WORK(&retry.worker, do_retry);
 	spin_lock_init(&retry.lock);
 	INIT_LIST_HEAD(&retry.writes);
 
-	printk(KERN_INFO "drbd: initialized. "
+	if (drbd_debugfs_init())
+		pr_notice("failed to initialize debugfs -- will not be available\n");
+
+	pr_info("initialized. "
 	       "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
 	       API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
-	printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
-	printk(KERN_INFO "drbd: registered as block device major %d\n",
-		DRBD_MAJOR);
-
+	pr_info("%s\n", drbd_buildtag());
+	pr_info("registered as block device major %d\n", DRBD_MAJOR);
 	return 0; /* Success! */
 
 fail:
 	drbd_cleanup();
 	if (err == -ENOMEM)
-		printk(KERN_ERR "drbd: ran out of memory\n");
+		pr_err("ran out of memory\n");
 	else
-		printk(KERN_ERR "drbd: initialization failure\n");
+		pr_err("initialization failure\n");
 	return err;
 }
 
-void drbd_free_bc(struct drbd_backing_dev *ldev)
+void drbd_free_ldev(struct drbd_backing_dev *ldev)
 {
 	if (ldev == NULL)
 		return;
@@ -2972,24 +2996,29 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
 	kfree(ldev);
 }
 
-void drbd_free_sock(struct drbd_connection *connection)
+static void drbd_free_one_sock(struct drbd_socket *ds)
 {
-	if (connection->data.socket) {
-		mutex_lock(&connection->data.mutex);
-		kernel_sock_shutdown(connection->data.socket, SHUT_RDWR);
-		sock_release(connection->data.socket);
-		connection->data.socket = NULL;
-		mutex_unlock(&connection->data.mutex);
-	}
-	if (connection->meta.socket) {
-		mutex_lock(&connection->meta.mutex);
-		kernel_sock_shutdown(connection->meta.socket, SHUT_RDWR);
-		sock_release(connection->meta.socket);
-		connection->meta.socket = NULL;
-		mutex_unlock(&connection->meta.mutex);
+	struct socket *s;
+	mutex_lock(&ds->mutex);
+	s = ds->socket;
+	ds->socket = NULL;
+	mutex_unlock(&ds->mutex);
+	if (s) {
+		/* so debugfs does not need to mutex_lock() */
+		synchronize_rcu();
+		kernel_sock_shutdown(s, SHUT_RDWR);
+		sock_release(s);
 	}
 }
 
+void drbd_free_sock(struct drbd_connection *connection)
+{
+	if (connection->data.socket)
+		drbd_free_one_sock(&connection->data);
+	if (connection->meta.socket)
+		drbd_free_one_sock(&connection->meta);
+}
+
 /* meta data management */
 
 void conn_md_sync(struct drbd_connection *connection)
@@ -3093,7 +3122,7 @@ void drbd_md_sync(struct drbd_device *device)
 	if (!get_ldev_if_state(device, D_FAILED))
 		return;
 
-	buffer = drbd_md_get_buffer(device);
+	buffer = drbd_md_get_buffer(device, __func__);
 	if (!buffer)
 		goto out;
 
@@ -3253,7 +3282,7 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
 	if (device->state.disk != D_DISKLESS)
 		return ERR_DISK_CONFIGURED;
 
-	buffer = drbd_md_get_buffer(device);
+	buffer = drbd_md_get_buffer(device, __func__);
 	if (!buffer)
 		return ERR_NOMEM;
 
@@ -3466,23 +3495,19 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local)
  *
  * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
  */
-int drbd_bmio_set_n_write(struct drbd_device *device)
+int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
 {
 	int rv = -EIO;
 
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		drbd_md_set_flag(device, MDF_FULL_SYNC);
-		drbd_md_sync(device);
-		drbd_bm_set_all(device);
-
-		rv = drbd_bm_write(device);
+	drbd_md_set_flag(device, MDF_FULL_SYNC);
+	drbd_md_sync(device);
+	drbd_bm_set_all(device);
 
-		if (!rv) {
-			drbd_md_clear_flag(device, MDF_FULL_SYNC);
-			drbd_md_sync(device);
-		}
+	rv = drbd_bm_write(device);
 
-		put_ldev(device);
+	if (!rv) {
+		drbd_md_clear_flag(device, MDF_FULL_SYNC);
+		drbd_md_sync(device);
 	}
 
 	return rv;
@@ -3494,18 +3519,11 @@ int drbd_bmio_set_n_write(struct drbd_device *device)
  *
  * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
  */
-int drbd_bmio_clear_n_write(struct drbd_device *device)
+int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local)
 {
-	int rv = -EIO;
-
 	drbd_resume_al(device);
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		drbd_bm_clear_all(device);
-		rv = drbd_bm_write(device);
-		put_ldev(device);
-	}
-
-	return rv;
+	drbd_bm_clear_all(device);
+	return drbd_bm_write(device);
 }
 
 static int w_bitmap_io(struct drbd_work *w, int unused)
@@ -3537,61 +3555,6 @@ static int w_bitmap_io(struct drbd_work *w, int unused)
 	return 0;
 }
 
-void drbd_ldev_destroy(struct drbd_device *device)
-{
-	lc_destroy(device->resync);
-	device->resync = NULL;
-	lc_destroy(device->act_log);
-	device->act_log = NULL;
-	__no_warn(local,
-		drbd_free_bc(device->ldev);
-		device->ldev = NULL;);
-
-	clear_bit(GO_DISKLESS, &device->flags);
-}
-
-static int w_go_diskless(struct drbd_work *w, int unused)
-{
-	struct drbd_device *device =
-		container_of(w, struct drbd_device, go_diskless);
-
-	D_ASSERT(device, device->state.disk == D_FAILED);
-	/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
-	 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
-	 * the protected members anymore, though, so once put_ldev reaches zero
-	 * again, it will be safe to free them. */
-
-	/* Try to write changed bitmap pages, read errors may have just
-	 * set some bits outside the area covered by the activity log.
-	 *
-	 * If we have an IO error during the bitmap writeout,
-	 * we will want a full sync next time, just in case.
-	 * (Do we want a specific meta data flag for this?)
-	 *
-	 * If that does not make it to stable storage either,
-	 * we cannot do anything about that anymore.
-	 *
-	 * We still need to check if both bitmap and ldev are present, we may
-	 * end up here after a failed attach, before ldev was even assigned.
-	 */
-	if (device->bitmap && device->ldev) {
-		/* An interrupted resync or similar is allowed to recounts bits
-		 * while we detach.
-		 * Any modifications would not be expected anymore, though.
-		 */
-		if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
-					"detach", BM_LOCKED_TEST_ALLOWED)) {
-			if (test_bit(WAS_READ_ERROR, &device->flags)) {
-				drbd_md_set_flag(device, MDF_FULL_SYNC);
-				drbd_md_sync(device);
-			}
-		}
-	}
-
-	drbd_force_state(device, NS(disk, D_DISKLESS));
-	return 0;
-}
-
 /**
  * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
  * @device:	DRBD device.
@@ -3603,6 +3566,9 @@ static int w_go_diskless(struct drbd_work *w, int unused)
  * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
  * called from worker context. It MUST NOT be used while a previous such
  * work is still pending!
+ *
+ * Its worker function encloses the call of io_fn() by get_ldev() and
+ * put_ldev().
  */
 void drbd_queue_bitmap_io(struct drbd_device *device,
 			  int (*io_fn)(struct drbd_device *),
@@ -3685,25 +3651,7 @@ int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
 static void md_sync_timer_fn(unsigned long data)
 {
 	struct drbd_device *device = (struct drbd_device *) data;
-
-	/* must not double-queue! */
-	if (list_empty(&device->md_sync_work.list))
-		drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
-				      &device->md_sync_work);
-}
-
-static int w_md_sync(struct drbd_work *w, int unused)
-{
-	struct drbd_device *device =
-		container_of(w, struct drbd_device, md_sync_work);
-
-	drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
-#ifdef DEBUG
-	drbd_warn(device, "last md_mark_dirty: %s:%u\n",
-		device->last_md_mark_dirty.func, device->last_md_mark_dirty.line);
-#endif
-	drbd_md_sync(device);
-	return 0;
+	drbd_device_post_work(device, MD_SYNC);
 }
 
 const char *cmdname(enum drbd_packet cmd)
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 3f2e167..1cd47df 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -23,6 +23,8 @@
 
  */
 
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/drbd.h>
 #include <linux/in.h>
@@ -85,7 +87,7 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
 {
 	genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
 	if (genlmsg_reply(skb, info))
-		printk(KERN_ERR "drbd: error sending genl reply\n");
+		pr_err("error sending genl reply\n");
 }
 
 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
@@ -558,8 +560,10 @@ void conn_try_outdate_peer_async(struct drbd_connection *connection)
 }
 
 enum drbd_state_rv
-drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
+drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
 {
+	struct drbd_peer_device *const peer_device = first_peer_device(device);
+	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
 	const int max_tries = 4;
 	enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
 	struct net_conf *nc;
@@ -607,7 +611,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
 		    device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
 			D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
 
-			if (conn_try_outdate_peer(first_peer_device(device)->connection)) {
+			if (conn_try_outdate_peer(connection)) {
 				val.disk = D_UP_TO_DATE;
 				mask.disk = D_MASK;
 			}
@@ -617,7 +621,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
 		if (rv == SS_NOTHING_TO_DO)
 			goto out;
 		if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
-			if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) {
+			if (!conn_try_outdate_peer(connection) && force) {
 				drbd_warn(device, "Forced into split brain situation!\n");
 				mask.pdsk = D_MASK;
 				val.pdsk  = D_OUTDATED;
@@ -630,7 +634,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
 			   retry at most once more in this case. */
 			int timeo;
 			rcu_read_lock();
-			nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+			nc = rcu_dereference(connection->net_conf);
 			timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
 			rcu_read_unlock();
 			schedule_timeout_interruptible(timeo);
@@ -659,19 +663,17 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
 	/* FIXME also wait for all pending P_BARRIER_ACK? */
 
 	if (new_role == R_SECONDARY) {
-		set_disk_ro(device->vdisk, true);
 		if (get_ldev(device)) {
 			device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
 			put_ldev(device);
 		}
 	} else {
-		/* Called from drbd_adm_set_role only.
-		 * We are still holding the conf_update mutex. */
-		nc = first_peer_device(device)->connection->net_conf;
+		mutex_lock(&device->resource->conf_update);
+		nc = connection->net_conf;
 		if (nc)
 			nc->discard_my_data = 0; /* without copy; single bit op is atomic */
+		mutex_unlock(&device->resource->conf_update);
 
-		set_disk_ro(device->vdisk, false);
 		if (get_ldev(device)) {
 			if (((device->state.conn < C_CONNECTED ||
 			       device->state.pdsk <= D_FAILED)
@@ -689,12 +691,12 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
 	if (device->state.conn >= C_WF_REPORT_PARAMS) {
 		/* if this was forced, we should consider sync */
 		if (forced)
-			drbd_send_uuids(first_peer_device(device));
-		drbd_send_current_state(first_peer_device(device));
+			drbd_send_uuids(peer_device);
+		drbd_send_current_state(peer_device);
 	}
 
 	drbd_md_sync(device);
-
+	set_disk_ro(device->vdisk, new_role == R_SECONDARY);
 	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
 out:
 	mutex_unlock(device->state_mutex);
@@ -891,7 +893,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
 	 * still lock the act_log to not trigger ASSERTs there.
 	 */
 	drbd_suspend_io(device);
-	buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
+	buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
 	if (!buffer) {
 		drbd_resume_io(device);
 		return DS_ERROR;
@@ -971,6 +973,10 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
 	if (la_size_changed || md_moved || rs) {
 		u32 prev_flags;
 
+		/* We do some synchronous IO below, which may take some time.
+		 * Clear the timer, to avoid scary "timer expired!" messages,
+		 * "Superblock" is written out at least twice below, anyways. */
+		del_timer(&device->md_sync_timer);
 		drbd_al_shrink(device); /* All extents inactive. */
 
 		prev_flags = md->flags;
@@ -1116,15 +1122,16 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
 	return 0;
 }
 
-static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
+static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
+				   unsigned int max_bio_size)
 {
 	struct request_queue * const q = device->rq_queue;
 	unsigned int max_hw_sectors = max_bio_size >> 9;
 	unsigned int max_segments = 0;
 	struct request_queue *b = NULL;
 
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		b = device->ldev->backing_bdev->bd_disk->queue;
+	if (bdev) {
+		b = bdev->backing_bdev->bd_disk->queue;
 
 		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
 		rcu_read_lock();
@@ -1169,11 +1176,10 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_
 				 b->backing_dev_info.ra_pages);
 			q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
 		}
-		put_ldev(device);
 	}
 }
 
-void drbd_reconsider_max_bio_size(struct drbd_device *device)
+void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
 {
 	unsigned int now, new, local, peer;
 
@@ -1181,10 +1187,9 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
 	local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
 	peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
 
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
+	if (bdev) {
+		local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
 		device->local_max_bio_size = local;
-		put_ldev(device);
 	}
 	local = min(local, DRBD_MAX_BIO_SIZE);
 
@@ -1217,7 +1222,7 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
 	if (new != now)
 		drbd_info(device, "max BIO size = %u\n", new);
 
-	drbd_setup_queue_param(device, new);
+	drbd_setup_queue_param(device, bdev, new);
 }
 
 /* Starts the worker thread */
@@ -1299,6 +1304,13 @@ static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
 	return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
 }
 
+static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
+{
+	return	a->disk_barrier != b->disk_barrier ||
+		a->disk_flushes != b->disk_flushes ||
+		a->disk_drain != b->disk_drain;
+}
+
 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 {
 	struct drbd_config_context adm_ctx;
@@ -1405,7 +1417,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 	else
 		set_bit(MD_NO_FUA, &device->flags);
 
-	drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
+	if (write_ordering_changed(old_disk_conf, new_disk_conf))
+		drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
 
 	drbd_md_sync(device);
 
@@ -1440,6 +1453,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 {
 	struct drbd_config_context adm_ctx;
 	struct drbd_device *device;
+	struct drbd_peer_device *peer_device;
+	struct drbd_connection *connection;
 	int err;
 	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
@@ -1462,7 +1477,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 
 	device = adm_ctx.device;
 	mutex_lock(&adm_ctx.resource->adm_mutex);
-	conn_reconfig_start(first_peer_device(device)->connection);
+	peer_device = first_peer_device(device);
+	connection = peer_device ? peer_device->connection : NULL;
+	conn_reconfig_start(connection);
 
 	/* if you want to reconfigure, please tear down first */
 	if (device->state.disk > D_DISKLESS) {
@@ -1473,7 +1490,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	 * drbd_ldev_destroy is done already, we may end up here very fast,
 	 * e.g. if someone calls attach from the on-io-error handler,
 	 * to realize a "hot spare" feature (not that I'd recommend that) */
-	wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
+	wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
 
 	/* make sure there is no leftover from previous force-detach attempts */
 	clear_bit(FORCE_DETACH, &device->flags);
@@ -1529,7 +1546,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 		goto fail;
 
 	rcu_read_lock();
-	nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+	nc = rcu_dereference(connection->net_conf);
 	if (nc) {
 		if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
 			rcu_read_unlock();
@@ -1649,7 +1666,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	 */
 	wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
 	/* and for any other previously queued work */
-	drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
+	drbd_flush_workqueue(&connection->sender_work);
 
 	rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
 	retcode = rv;  /* FIXME: Type mismatch. */
@@ -1710,7 +1727,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	new_disk_conf = NULL;
 	new_plan = NULL;
 
-	drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
+	drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
 
 	if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
 		set_bit(CRASHED_PRIMARY, &device->flags);
@@ -1726,7 +1743,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	device->read_cnt = 0;
 	device->writ_cnt = 0;
 
-	drbd_reconsider_max_bio_size(device);
+	drbd_reconsider_max_bio_size(device, device->ldev);
 
 	/* If I am currently not R_PRIMARY,
 	 * but meta data primary indicator is set,
@@ -1845,7 +1862,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 
 	kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
 	put_ldev(device);
-	conn_reconfig_done(first_peer_device(device)->connection);
+	conn_reconfig_done(connection);
 	mutex_unlock(&adm_ctx.resource->adm_mutex);
 	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
@@ -1856,7 +1873,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
 	drbd_force_state(device, NS(disk, D_DISKLESS));
 	drbd_md_sync(device);
  fail:
-	conn_reconfig_done(first_peer_device(device)->connection);
+	conn_reconfig_done(connection);
 	if (nbc) {
 		if (nbc->backing_bdev)
 			blkdev_put(nbc->backing_bdev,
@@ -1888,7 +1905,7 @@ static int adm_detach(struct drbd_device *device, int force)
 	}
 
 	drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
-	drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
+	drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
 	retcode = drbd_request_state(device, NS(disk, D_FAILED));
 	drbd_md_put_buffer(device);
 	/* D_FAILED will transition to DISKLESS. */
@@ -2654,8 +2671,13 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
 	device = adm_ctx.device;
+	if (!get_ldev(device)) {
+		retcode = ERR_NO_DISK;
+		goto out;
+	}
+
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	 * resync just being finished, wait for it before requesting a new resync.
@@ -2679,6 +2701,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
 	drbd_resume_io(device);
 	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	put_ldev(device);
 out:
 	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
@@ -2704,7 +2727,7 @@ out:
 	return 0;
 }
 
-static int drbd_bmio_set_susp_al(struct drbd_device *device)
+static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
 {
 	int rv;
 
@@ -2725,8 +2748,13 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 	if (retcode != NO_ERROR)
 		goto out;
 
-	mutex_lock(&adm_ctx.resource->adm_mutex);
 	device = adm_ctx.device;
+	if (!get_ldev(device)) {
+		retcode = ERR_NO_DISK;
+		goto out;
+	}
+
+	mutex_lock(&adm_ctx.resource->adm_mutex);
 
 	/* If there is still bitmap IO pending, probably because of a previous
 	 * resync just being finished, wait for it before requesting a new resync.
@@ -2753,6 +2781,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
 		retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
 	drbd_resume_io(device);
 	mutex_unlock(&adm_ctx.resource->adm_mutex);
+	put_ldev(device);
 out:
 	drbd_adm_finish(&adm_ctx, info, retcode);
 	return 0;
@@ -2892,7 +2921,7 @@ static struct drbd_connection *the_only_connection(struct drbd_resource *resourc
 	return list_first_entry(&resource->connections, struct drbd_connection, connections);
 }
 
-int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
+static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
 		const struct sib_info *sib)
 {
 	struct drbd_resource *resource = device->resource;
@@ -3622,13 +3651,6 @@ void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
 	unsigned seq;
 	int err = -ENOMEM;
 
-	if (sib->sib_reason == SIB_SYNC_PROGRESS) {
-		if (time_after(jiffies, device->rs_last_bcast + HZ))
-			device->rs_last_bcast = jiffies;
-		else
-			return;
-	}
-
 	seq = atomic_inc_return(&drbd_genl_seq);
 	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
 	if (!msg)
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 89736bd..06e6147 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -60,20 +60,65 @@ static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
 		seq_printf(seq, "%ld", v);
 }
 
+static void drbd_get_syncer_progress(struct drbd_device *device,
+		union drbd_dev_state state, unsigned long *rs_total,
+		unsigned long *bits_left, unsigned int *per_mil_done)
+{
+	/* this is to break it at compile time when we change that, in case we
+	 * want to support more than (1<<32) bits on a 32bit arch. */
+	typecheck(unsigned long, device->rs_total);
+	*rs_total = device->rs_total;
+
+	/* note: both rs_total and rs_left are in bits, i.e. in
+	 * units of BM_BLOCK_SIZE.
+	 * for the percentage, we don't care. */
+
+	if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
+		*bits_left = device->ov_left;
+	else
+		*bits_left = drbd_bm_total_weight(device) - device->rs_failed;
+	/* >> 10 to prevent overflow,
+	 * +1 to prevent division by zero */
+	if (*bits_left > *rs_total) {
+		/* D'oh. Maybe a logic bug somewhere.  More likely just a race
+		 * between state change and reset of rs_total.
+		 */
+		*bits_left = *rs_total;
+		*per_mil_done = *rs_total ? 0 : 1000;
+	} else {
+		/* Make sure the division happens in long context.
+		 * We allow up to one petabyte storage right now,
+		 * at a granularity of 4k per bit that is 2**38 bits.
+		 * After shift right and multiplication by 1000,
+		 * this should still fit easily into a 32bit long,
+		 * so we don't need a 64bit division on 32bit arch.
+		 * Note: currently we don't support such large bitmaps on 32bit
+		 * arch anyways, but no harm done to be prepared for it here.
+		 */
+		unsigned int shift = *rs_total > UINT_MAX ? 16 : 10;
+		unsigned long left = *bits_left >> shift;
+		unsigned long total = 1UL + (*rs_total >> shift);
+		unsigned long tmp = 1000UL - left * 1000UL/total;
+		*per_mil_done = tmp;
+	}
+}
+
+
 /*lge
  * progress bars shamelessly adapted from driver/md/md.c
  * output looks like
  *	[=====>..............] 33.5% (23456/123456)
  *	finish: 2:20:20 speed: 6,345 (6,456) K/sec
  */
-static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq)
+static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq,
+		union drbd_dev_state state)
 {
-	unsigned long db, dt, dbdt, rt, rs_left;
+	unsigned long db, dt, dbdt, rt, rs_total, rs_left;
 	unsigned int res;
 	int i, x, y;
 	int stalled = 0;
 
-	drbd_get_syncer_progress(device, &rs_left, &res);
+	drbd_get_syncer_progress(device, state, &rs_total, &rs_left, &res);
 
 	x = res/50;
 	y = 20-x;
@@ -85,21 +130,21 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
 		seq_printf(seq, ".");
 	seq_printf(seq, "] ");
 
-	if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
+	if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
 		seq_printf(seq, "verified:");
 	else
 		seq_printf(seq, "sync'ed:");
 	seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
 
 	/* if more than a few GB, display in MB */
-	if (device->rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
+	if (rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
 		seq_printf(seq, "(%lu/%lu)M",
 			    (unsigned long) Bit2KB(rs_left >> 10),
-			    (unsigned long) Bit2KB(device->rs_total >> 10));
+			    (unsigned long) Bit2KB(rs_total >> 10));
 	else
 		seq_printf(seq, "(%lu/%lu)K\n\t",
 			    (unsigned long) Bit2KB(rs_left),
-			    (unsigned long) Bit2KB(device->rs_total));
+			    (unsigned long) Bit2KB(rs_total));
 
 	/* see drivers/md/md.c
 	 * We do not want to overflow, so the order of operands and
@@ -150,13 +195,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
 	dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
 	if (dt == 0)
 		dt = 1;
-	db = device->rs_total - rs_left;
+	db = rs_total - rs_left;
 	dbdt = Bit2KB(db/dt);
 	seq_printf_with_thousands_grouping(seq, dbdt);
 	seq_printf(seq, ")");
 
-	if (device->state.conn == C_SYNC_TARGET ||
-	    device->state.conn == C_VERIFY_S) {
+	if (state.conn == C_SYNC_TARGET ||
+	    state.conn == C_VERIFY_S) {
 		seq_printf(seq, " want: ");
 		seq_printf_with_thousands_grouping(seq, device->c_sync_rate);
 	}
@@ -168,8 +213,8 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
 		unsigned long bm_bits = drbd_bm_bits(device);
 		unsigned long bit_pos;
 		unsigned long long stop_sector = 0;
-		if (device->state.conn == C_VERIFY_S ||
-		    device->state.conn == C_VERIFY_T) {
+		if (state.conn == C_VERIFY_S ||
+		    state.conn == C_VERIFY_T) {
 			bit_pos = bm_bits - device->ov_left;
 			if (verify_can_do_stop_sector(device))
 				stop_sector = device->ov_stop_sector;
@@ -188,22 +233,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
 	}
 }
 
-static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
-{
-	struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
-
-	seq_printf(seq, "%5d %s %s\n", bme->rs_left,
-		   bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------",
-		   bme->flags & BME_LOCKED ? "LOCKED" : "------"
-		   );
-}
-
 static int drbd_seq_show(struct seq_file *seq, void *v)
 {
 	int i, prev_i = -1;
 	const char *sn;
 	struct drbd_device *device;
 	struct net_conf *nc;
+	union drbd_dev_state state;
 	char wp;
 
 	static char write_ordering_chars[] = {
@@ -241,11 +277,12 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 			seq_printf(seq, "\n");
 		prev_i = i;
 
-		sn = drbd_conn_str(device->state.conn);
+		state = device->state;
+		sn = drbd_conn_str(state.conn);
 
-		if (device->state.conn == C_STANDALONE &&
-		    device->state.disk == D_DISKLESS &&
-		    device->state.role == R_SECONDARY) {
+		if (state.conn == C_STANDALONE &&
+		    state.disk == D_DISKLESS &&
+		    state.role == R_SECONDARY) {
 			seq_printf(seq, "%2d: cs:Unconfigured\n", i);
 		} else {
 			/* reset device->congestion_reason */
@@ -258,15 +295,15 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 			   "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
 			   "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
 			   i, sn,
-			   drbd_role_str(device->state.role),
-			   drbd_role_str(device->state.peer),
-			   drbd_disk_str(device->state.disk),
-			   drbd_disk_str(device->state.pdsk),
+			   drbd_role_str(state.role),
+			   drbd_role_str(state.peer),
+			   drbd_disk_str(state.disk),
+			   drbd_disk_str(state.pdsk),
 			   wp,
 			   drbd_suspended(device) ? 's' : 'r',
-			   device->state.aftr_isp ? 'a' : '-',
-			   device->state.peer_isp ? 'p' : '-',
-			   device->state.user_isp ? 'u' : '-',
+			   state.aftr_isp ? 'a' : '-',
+			   state.peer_isp ? 'p' : '-',
+			   state.user_isp ? 'u' : '-',
 			   device->congestion_reason ?: '-',
 			   test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-',
 			   device->send_cnt/2,
@@ -281,17 +318,17 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 			   atomic_read(&device->unacked_cnt),
 			   atomic_read(&device->ap_bio_cnt),
 			   first_peer_device(device)->connection->epochs,
-			   write_ordering_chars[first_peer_device(device)->connection->write_ordering]
+			   write_ordering_chars[device->resource->write_ordering]
 			);
 			seq_printf(seq, " oos:%llu\n",
 				   Bit2KB((unsigned long long)
 					   drbd_bm_total_weight(device)));
 		}
-		if (device->state.conn == C_SYNC_SOURCE ||
-		    device->state.conn == C_SYNC_TARGET ||
-		    device->state.conn == C_VERIFY_S ||
-		    device->state.conn == C_VERIFY_T)
-			drbd_syncer_progress(device, seq);
+		if (state.conn == C_SYNC_SOURCE ||
+		    state.conn == C_SYNC_TARGET ||
+		    state.conn == C_VERIFY_S ||
+		    state.conn == C_VERIFY_T)
+			drbd_syncer_progress(device, seq, state);
 
 		if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
 			lc_seq_printf_stats(seq, device->resync);
@@ -299,12 +336,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 			put_ldev(device);
 		}
 
-		if (proc_details >= 2) {
-			if (device->resync) {
-				lc_seq_dump_details(seq, device->resync, "rs_left",
-					resync_dump_detail);
-			}
-		}
+		if (proc_details >= 2)
+			seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt));
 	}
 	rcu_read_unlock();
 
@@ -316,7 +349,7 @@ static int drbd_proc_open(struct inode *inode, struct file *file)
 	int err;
 
 	if (try_module_get(THIS_MODULE)) {
-		err = single_open(file, drbd_seq_show, PDE_DATA(inode));
+		err = single_open(file, drbd_seq_show, NULL);
 		if (err)
 			module_put(THIS_MODULE);
 		return err;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 5b17ec8..9342b8d 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -362,17 +362,14 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
 			goto fail;
 	}
 
+	memset(peer_req, 0, sizeof(*peer_req));
+	INIT_LIST_HEAD(&peer_req->w.list);
 	drbd_clear_interval(&peer_req->i);
 	peer_req->i.size = data_size;
 	peer_req->i.sector = sector;
-	peer_req->i.local = false;
-	peer_req->i.waiting = false;
-
-	peer_req->epoch = NULL;
+	peer_req->submit_jif = jiffies;
 	peer_req->peer_device = peer_device;
 	peer_req->pages = page;
-	atomic_set(&peer_req->pending_bios, 0);
-	peer_req->flags = 0;
 	/*
 	 * The block_id is opaque to the receiver.  It is not endianness
 	 * converted, and sent back to the sender unchanged.
@@ -389,11 +386,16 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
 void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
 		       int is_net)
 {
+	might_sleep();
 	if (peer_req->flags & EE_HAS_DIGEST)
 		kfree(peer_req->digest);
 	drbd_free_pages(device, peer_req->pages, is_net);
 	D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
 	D_ASSERT(device, drbd_interval_empty(&peer_req->i));
+	if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
+		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
+		drbd_al_complete_io(device, &peer_req->i);
+	}
 	mempool_free(peer_req, drbd_ee_mempool);
 }
 
@@ -791,8 +793,18 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke
 {
 	unsigned int header_size = drbd_header_size(connection);
 	struct packet_info pi;
+	struct net_conf *nc;
 	int err;
 
+	rcu_read_lock();
+	nc = rcu_dereference(connection->net_conf);
+	if (!nc) {
+		rcu_read_unlock();
+		return -EIO;
+	}
+	sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
+	rcu_read_unlock();
+
 	err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
 	if (err != header_size) {
 		if (err >= 0)
@@ -809,7 +821,7 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke
  * drbd_socket_okay() - Free the socket if its connection is not okay
  * @sock:	pointer to the pointer to the socket.
  */
-static int drbd_socket_okay(struct socket **sock)
+static bool drbd_socket_okay(struct socket **sock)
 {
 	int rr;
 	char tb[4];
@@ -827,6 +839,30 @@ static int drbd_socket_okay(struct socket **sock)
 		return false;
 	}
 }
+
+static bool connection_established(struct drbd_connection *connection,
+				   struct socket **sock1,
+				   struct socket **sock2)
+{
+	struct net_conf *nc;
+	int timeout;
+	bool ok;
+
+	if (!*sock1 || !*sock2)
+		return false;
+
+	rcu_read_lock();
+	nc = rcu_dereference(connection->net_conf);
+	timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
+	rcu_read_unlock();
+	schedule_timeout_interruptible(timeout);
+
+	ok = drbd_socket_okay(sock1);
+	ok = drbd_socket_okay(sock2) && ok;
+
+	return ok;
+}
+
 /* Gets called if a connection is established, or if a new minor gets created
    in a connection */
 int drbd_connected(struct drbd_peer_device *peer_device)
@@ -868,8 +904,8 @@ static int conn_connect(struct drbd_connection *connection)
 	struct drbd_socket sock, msock;
 	struct drbd_peer_device *peer_device;
 	struct net_conf *nc;
-	int vnr, timeout, h, ok;
-	bool discard_my_data;
+	int vnr, timeout, h;
+	bool discard_my_data, ok;
 	enum drbd_state_rv rv;
 	struct accept_wait_data ad = {
 		.connection = connection,
@@ -913,17 +949,8 @@ static int conn_connect(struct drbd_connection *connection)
 			}
 		}
 
-		if (sock.socket && msock.socket) {
-			rcu_read_lock();
-			nc = rcu_dereference(connection->net_conf);
-			timeout = nc->ping_timeo * HZ / 10;
-			rcu_read_unlock();
-			schedule_timeout_interruptible(timeout);
-			ok = drbd_socket_okay(&sock.socket);
-			ok = drbd_socket_okay(&msock.socket) && ok;
-			if (ok)
-				break;
-		}
+		if (connection_established(connection, &sock.socket, &msock.socket))
+			break;
 
 retry:
 		s = drbd_wait_for_connect(connection, &ad);
@@ -969,8 +996,7 @@ randomize:
 				goto out_release_sockets;
 		}
 
-		ok = drbd_socket_okay(&sock.socket);
-		ok = drbd_socket_okay(&msock.socket) && ok;
+		ok = connection_established(connection, &sock.socket, &msock.socket);
 	} while (!ok);
 
 	if (ad.s_listen)
@@ -1151,7 +1177,7 @@ static void drbd_flush(struct drbd_connection *connection)
 	struct drbd_peer_device *peer_device;
 	int vnr;
 
-	if (connection->write_ordering >= WO_bdev_flush) {
+	if (connection->resource->write_ordering >= WO_bdev_flush) {
 		rcu_read_lock();
 		idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 			struct drbd_device *device = peer_device->device;
@@ -1161,14 +1187,22 @@ static void drbd_flush(struct drbd_connection *connection)
 			kref_get(&device->kref);
 			rcu_read_unlock();
 
+			/* Right now, we have only this one synchronous code path
+			 * for flushes between request epochs.
+			 * We may want to make those asynchronous,
+			 * or at least parallelize the flushes to the volume devices.
+			 */
+			device->flush_jif = jiffies;
+			set_bit(FLUSH_PENDING, &device->flags);
 			rv = blkdev_issue_flush(device->ldev->backing_bdev,
 					GFP_NOIO, NULL);
+			clear_bit(FLUSH_PENDING, &device->flags);
 			if (rv) {
 				drbd_info(device, "local disk flush failed with status %d\n", rv);
 				/* would rather check on EOPNOTSUPP, but that is not reliable.
 				 * don't try again for ANY return value != 0
 				 * if (rv == -EOPNOTSUPP) */
-				drbd_bump_write_ordering(connection, WO_drain_io);
+				drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
 			}
 			put_ldev(device);
 			kref_put(&device->kref, drbd_destroy_device);
@@ -1257,15 +1291,30 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connectio
 	return rv;
 }
 
+static enum write_ordering_e
+max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
+{
+	struct disk_conf *dc;
+
+	dc = rcu_dereference(bdev->disk_conf);
+
+	if (wo == WO_bdev_flush && !dc->disk_flushes)
+		wo = WO_drain_io;
+	if (wo == WO_drain_io && !dc->disk_drain)
+		wo = WO_none;
+
+	return wo;
+}
+
 /**
  * drbd_bump_write_ordering() - Fall back to an other write ordering method
  * @connection:	DRBD connection.
  * @wo:		Write ordering method to try.
  */
-void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
+void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
+			      enum write_ordering_e wo)
 {
-	struct disk_conf *dc;
-	struct drbd_peer_device *peer_device;
+	struct drbd_device *device;
 	enum write_ordering_e pwo;
 	int vnr;
 	static char *write_ordering_str[] = {
@@ -1274,26 +1323,27 @@ void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ord
 		[WO_bdev_flush] = "flush",
 	};
 
-	pwo = connection->write_ordering;
-	wo = min(pwo, wo);
+	pwo = resource->write_ordering;
+	if (wo != WO_bdev_flush)
+		wo = min(pwo, wo);
 	rcu_read_lock();
-	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
-		struct drbd_device *device = peer_device->device;
+	idr_for_each_entry(&resource->devices, device, vnr) {
+		if (get_ldev(device)) {
+			wo = max_allowed_wo(device->ldev, wo);
+			if (device->ldev == bdev)
+				bdev = NULL;
+			put_ldev(device);
+		}
+	}
 
-		if (!get_ldev_if_state(device, D_ATTACHING))
-			continue;
-		dc = rcu_dereference(device->ldev->disk_conf);
+	if (bdev)
+		wo = max_allowed_wo(bdev, wo);
 
-		if (wo == WO_bdev_flush && !dc->disk_flushes)
-			wo = WO_drain_io;
-		if (wo == WO_drain_io && !dc->disk_drain)
-			wo = WO_none;
-		put_ldev(device);
-	}
 	rcu_read_unlock();
-	connection->write_ordering = wo;
-	if (pwo != connection->write_ordering || wo == WO_bdev_flush)
-		drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
+
+	resource->write_ordering = wo;
+	if (pwo != resource->write_ordering || wo == WO_bdev_flush)
+		drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
 }
 
 /**
@@ -1330,6 +1380,13 @@ int drbd_submit_peer_request(struct drbd_device *device,
 		/* wait for all pending IO completions, before we start
 		 * zeroing things out. */
 		conn_wait_active_ee_empty(first_peer_device(device)->connection);
+		/* add it to the active list now,
+		 * so we can find it to present it in debugfs */
+		peer_req->submit_jif = jiffies;
+		peer_req->flags |= EE_SUBMITTED;
+		spin_lock_irq(&device->resource->req_lock);
+		list_add_tail(&peer_req->w.list, &device->active_ee);
+		spin_unlock_irq(&device->resource->req_lock);
 		if (blkdev_issue_zeroout(device->ldev->backing_bdev,
 			sector, ds >> 9, GFP_NOIO))
 			peer_req->flags |= EE_WAS_ERROR;
@@ -1398,6 +1455,9 @@ submit:
 	D_ASSERT(device, page == NULL);
 
 	atomic_set(&peer_req->pending_bios, n_bios);
+	/* for debugfs: update timestamp, mark as submitted */
+	peer_req->submit_jif = jiffies;
+	peer_req->flags |= EE_SUBMITTED;
 	do {
 		bio = bios;
 		bios = bios->bi_next;
@@ -1471,7 +1531,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
 	 * R_PRIMARY crashes now.
 	 * Therefore we must send the barrier_ack after the barrier request was
 	 * completed. */
-	switch (connection->write_ordering) {
+	switch (connection->resource->write_ordering) {
 	case WO_none:
 		if (rv == FE_RECYCLED)
 			return 0;
@@ -1498,7 +1558,8 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
 
 		return 0;
 	default:
-		drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
+		drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
+			 connection->resource->write_ordering);
 		return -EIO;
 	}
 
@@ -1531,7 +1592,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 	struct drbd_peer_request *peer_req;
 	struct page *page;
 	int dgs, ds, err;
-	int data_size = pi->size;
+	unsigned int data_size = pi->size;
 	void *dig_in = peer_device->connection->int_dig_in;
 	void *dig_vv = peer_device->connection->int_dig_vv;
 	unsigned long *data;
@@ -1578,6 +1639,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 	if (!peer_req)
 		return NULL;
 
+	peer_req->flags |= EE_WRITE;
 	if (trim)
 		return peer_req;
 
@@ -1734,9 +1796,10 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
 	 * respective _drbd_clear_done_ee */
 
 	peer_req->w.cb = e_end_resync_block;
+	peer_req->submit_jif = jiffies;
 
 	spin_lock_irq(&device->resource->req_lock);
-	list_add(&peer_req->w.list, &device->sync_ee);
+	list_add_tail(&peer_req->w.list, &device->sync_ee);
 	spin_unlock_irq(&device->resource->req_lock);
 
 	atomic_add(pi->size >> 9, &device->rs_sect_ev);
@@ -1889,6 +1952,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
 		}
 		dec_unacked(device);
 	}
+
 	/* we delete from the conflict detection hash _after_ we sent out the
 	 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
 	if (peer_req->flags & EE_IN_INTERVAL_TREE) {
@@ -2115,6 +2179,8 @@ static int handle_write_conflicts(struct drbd_device *device,
 	drbd_for_each_overlap(i, &device->write_requests, sector, size) {
 		if (i == &peer_req->i)
 			continue;
+		if (i->completed)
+			continue;
 
 		if (!i->local) {
 			/*
@@ -2147,7 +2213,6 @@ static int handle_write_conflicts(struct drbd_device *device,
 					  (unsigned long long)sector, size,
 					  superseded ? "local" : "remote");
 
-			inc_unacked(device);
 			peer_req->w.cb = superseded ? e_send_superseded :
 						   e_send_retry_write;
 			list_add_tail(&peer_req->w.list, &device->done_ee);
@@ -2206,6 +2271,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 {
 	struct drbd_peer_device *peer_device;
 	struct drbd_device *device;
+	struct net_conf *nc;
 	sector_t sector;
 	struct drbd_peer_request *peer_req;
 	struct p_data *p = pi->data;
@@ -2245,6 +2311,8 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 	}
 
 	peer_req->w.cb = e_end_block;
+	peer_req->submit_jif = jiffies;
+	peer_req->flags |= EE_APPLICATION;
 
 	dp_flags = be32_to_cpu(p->dp_flags);
 	rw |= wire_flags_to_bio(dp_flags);
@@ -2271,9 +2339,36 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 	spin_unlock(&connection->epoch_lock);
 
 	rcu_read_lock();
-	tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
+	nc = rcu_dereference(peer_device->connection->net_conf);
+	tp = nc->two_primaries;
+	if (peer_device->connection->agreed_pro_version < 100) {
+		switch (nc->wire_protocol) {
+		case DRBD_PROT_C:
+			dp_flags |= DP_SEND_WRITE_ACK;
+			break;
+		case DRBD_PROT_B:
+			dp_flags |= DP_SEND_RECEIVE_ACK;
+			break;
+		}
+	}
 	rcu_read_unlock();
+
+	if (dp_flags & DP_SEND_WRITE_ACK) {
+		peer_req->flags |= EE_SEND_WRITE_ACK;
+		inc_unacked(device);
+		/* corresponding dec_unacked() in e_end_block()
+		 * respective _drbd_clear_done_ee */
+	}
+
+	if (dp_flags & DP_SEND_RECEIVE_ACK) {
+		/* I really don't like it that the receiver thread
+		 * sends on the msock, but anyways */
+		drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
+	}
+
 	if (tp) {
+		/* two primaries implies protocol C */
+		D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
 		peer_req->flags |= EE_IN_INTERVAL_TREE;
 		err = wait_for_and_update_peer_seq(peer_device, peer_seq);
 		if (err)
@@ -2297,44 +2392,18 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 	 * active_ee to become empty in drbd_submit_peer_request();
 	 * better not add ourselves here. */
 	if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
-		list_add(&peer_req->w.list, &device->active_ee);
+		list_add_tail(&peer_req->w.list, &device->active_ee);
 	spin_unlock_irq(&device->resource->req_lock);
 
 	if (device->state.conn == C_SYNC_TARGET)
 		wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
 
-	if (peer_device->connection->agreed_pro_version < 100) {
-		rcu_read_lock();
-		switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
-		case DRBD_PROT_C:
-			dp_flags |= DP_SEND_WRITE_ACK;
-			break;
-		case DRBD_PROT_B:
-			dp_flags |= DP_SEND_RECEIVE_ACK;
-			break;
-		}
-		rcu_read_unlock();
-	}
-
-	if (dp_flags & DP_SEND_WRITE_ACK) {
-		peer_req->flags |= EE_SEND_WRITE_ACK;
-		inc_unacked(device);
-		/* corresponding dec_unacked() in e_end_block()
-		 * respective _drbd_clear_done_ee */
-	}
-
-	if (dp_flags & DP_SEND_RECEIVE_ACK) {
-		/* I really don't like it that the receiver thread
-		 * sends on the msock, but anyways */
-		drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
-	}
-
 	if (device->state.pdsk < D_INCONSISTENT) {
 		/* In case we have the only disk of the cluster, */
 		drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
-		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
 		peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
-		drbd_al_begin_io(device, &peer_req->i, true);
+		drbd_al_begin_io(device, &peer_req->i);
+		peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
 	}
 
 	err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
@@ -2347,8 +2416,10 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
 	list_del(&peer_req->w.list);
 	drbd_remove_epoch_entry_interval(device, peer_req);
 	spin_unlock_irq(&device->resource->req_lock);
-	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
+	if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
+		peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
 		drbd_al_complete_io(device, &peer_req->i);
+	}
 
 out_interrupted:
 	drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
@@ -2368,13 +2439,14 @@ out_interrupted:
  * The current sync rate used here uses only the most recent two step marks,
  * to have a short time average so we can react faster.
  */
-bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
+bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
+		bool throttle_if_app_is_waiting)
 {
 	struct lc_element *tmp;
-	bool throttle = true;
+	bool throttle = drbd_rs_c_min_rate_throttle(device);
 
-	if (!drbd_rs_c_min_rate_throttle(device))
-		return false;
+	if (!throttle || throttle_if_app_is_waiting)
+		return throttle;
 
 	spin_lock_irq(&device->al_lock);
 	tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
@@ -2382,7 +2454,8 @@ bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
 		struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
 		if (test_bit(BME_PRIORITY, &bm_ext->flags))
 			throttle = false;
-		/* Do not slow down if app IO is already waiting for this extent */
+		/* Do not slow down if app IO is already waiting for this extent,
+		 * and our progress is necessary for application IO to complete. */
 	}
 	spin_unlock_irq(&device->al_lock);
 
@@ -2407,7 +2480,9 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
 	curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
 		      (int)part_stat_read(&disk->part0, sectors[1]) -
 			atomic_read(&device->rs_sect_ev);
-	if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
+
+	if (atomic_read(&device->ap_actlog_cnt)
+	    || !device->rs_last_events || curr_events - device->rs_last_events > 64) {
 		unsigned long rs_left;
 		int i;
 
@@ -2508,6 +2583,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
 		peer_req->w.cb = w_e_end_data_req;
 		fault_type = DRBD_FAULT_DT_RD;
 		/* application IO, don't drbd_rs_begin_io */
+		peer_req->flags |= EE_APPLICATION;
 		goto submit;
 
 	case P_RS_DATA_REQUEST:
@@ -2538,6 +2614,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
 			peer_req->w.cb = w_e_end_csum_rs_req;
 			/* used in the sector offset progress display */
 			device->bm_resync_fo = BM_SECT_TO_BIT(sector);
+			/* remember to report stats in drbd_resync_finished */
+			device->use_csums = true;
 		} else if (pi->cmd == P_OV_REPLY) {
 			/* track progress, we may need to throttle */
 			atomic_add(size >> 9, &device->rs_sect_in);
@@ -2595,8 +2673,20 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
 	 * we would also throttle its application reads.
 	 * In that case, throttling is done on the SyncTarget only.
 	 */
-	if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
+
+	/* Even though this may be a resync request, we do add to "read_ee";
+	 * "sync_ee" is only used for resync WRITEs.
+	 * Add to list early, so debugfs can find this request
+	 * even if we have to sleep below. */
+	spin_lock_irq(&device->resource->req_lock);
+	list_add_tail(&peer_req->w.list, &device->read_ee);
+	spin_unlock_irq(&device->resource->req_lock);
+
+	update_receiver_timing_details(connection, drbd_rs_should_slow_down);
+	if (device->state.peer != R_PRIMARY
+	&& drbd_rs_should_slow_down(device, sector, false))
 		schedule_timeout_uninterruptible(HZ/10);
+	update_receiver_timing_details(connection, drbd_rs_begin_io);
 	if (drbd_rs_begin_io(device, sector))
 		goto out_free_e;
 
@@ -2604,22 +2694,20 @@ submit_for_resync:
 	atomic_add(size >> 9, &device->rs_sect_ev);
 
 submit:
+	update_receiver_timing_details(connection, drbd_submit_peer_request);
 	inc_unacked(device);
-	spin_lock_irq(&device->resource->req_lock);
-	list_add_tail(&peer_req->w.list, &device->read_ee);
-	spin_unlock_irq(&device->resource->req_lock);
-
 	if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
 		return 0;
 
 	/* don't care for the reason here */
 	drbd_err(device, "submit failed, triggering re-connect\n");
+
+out_free_e:
 	spin_lock_irq(&device->resource->req_lock);
 	list_del(&peer_req->w.list);
 	spin_unlock_irq(&device->resource->req_lock);
 	/* no drbd_rs_complete_io(), we are dropping the connection anyways */
 
-out_free_e:
 	put_ldev(device);
 	drbd_free_peer_req(device, peer_req);
 	return -EIO;
@@ -2842,8 +2930,10 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
 -1091   requires proto 91
 -1096   requires proto 96
  */
-static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
+static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
 {
+	struct drbd_peer_device *const peer_device = first_peer_device(device);
+	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
 	u64 self, peer;
 	int i, j;
 
@@ -2869,7 +2959,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
 
 		if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
 
-			if (first_peer_device(device)->connection->agreed_pro_version < 91)
+			if (connection->agreed_pro_version < 91)
 				return -1091;
 
 			if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
@@ -2892,7 +2982,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
 
 		if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
 
-			if (first_peer_device(device)->connection->agreed_pro_version < 91)
+			if (connection->agreed_pro_version < 91)
 				return -1091;
 
 			if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
@@ -2925,7 +3015,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
 		case 1: /*  self_pri && !peer_pri */ return 1;
 		case 2: /* !self_pri &&  peer_pri */ return -1;
 		case 3: /*  self_pri &&  peer_pri */
-			dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
+			dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
 			return dc ? -1 : 1;
 		}
 	}
@@ -2938,14 +3028,14 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
 	*rule_nr = 51;
 	peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
 	if (self == peer) {
-		if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
+		if (connection->agreed_pro_version < 96 ?
 		    (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
 		    (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
 		    peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
 			/* The last P_SYNC_UUID did not get though. Undo the last start of
 			   resync as sync source modifications of the peer's UUIDs. */
 
-			if (first_peer_device(device)->connection->agreed_pro_version < 91)
+			if (connection->agreed_pro_version < 91)
 				return -1091;
 
 			device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
@@ -2975,14 +3065,14 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
 	*rule_nr = 71;
 	self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
 	if (self == peer) {
-		if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
+		if (connection->agreed_pro_version < 96 ?
 		    (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
 		    (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
 		    self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
 			/* The last P_SYNC_UUID did not get though. Undo the last start of
 			   resync as sync source modifications of our UUIDs. */
 
-			if (first_peer_device(device)->connection->agreed_pro_version < 91)
+			if (connection->agreed_pro_version < 91)
 				return -1091;
 
 			__drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
@@ -3352,8 +3442,7 @@ disconnect:
  * return: NULL (alg name was "")
  *         ERR_PTR(error) if something goes wrong
  *         or the crypto hash ptr, if it worked out ok. */
-static
-struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
+static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
 		const char *alg, const char *name)
 {
 	struct crypto_hash *tfm;
@@ -3639,7 +3728,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 	struct drbd_device *device;
 	struct p_sizes *p = pi->data;
 	enum determine_dev_size dd = DS_UNCHANGED;
-	sector_t p_size, p_usize, my_usize;
+	sector_t p_size, p_usize, p_csize, my_usize;
 	int ldsc = 0; /* local disk size changed */
 	enum dds_flags ddsf;
 
@@ -3650,6 +3739,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 
 	p_size = be64_to_cpu(p->d_size);
 	p_usize = be64_to_cpu(p->u_size);
+	p_csize = be64_to_cpu(p->c_size);
 
 	/* just store the peer's disk size for now.
 	 * we still need to figure out whether we accept that. */
@@ -3710,7 +3800,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 	}
 
 	device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
-	drbd_reconsider_max_bio_size(device);
 	/* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
 	   In case we cleared the QUEUE_FLAG_DISCARD from our queue in
 	   drbd_reconsider_max_bio_size(), we can be sure that after
@@ -3718,14 +3807,28 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(device)) {
+		drbd_reconsider_max_bio_size(device, device->ldev);
 		dd = drbd_determine_dev_size(device, ddsf, NULL);
 		put_ldev(device);
 		if (dd == DS_ERROR)
 			return -EIO;
 		drbd_md_sync(device);
 	} else {
-		/* I am diskless, need to accept the peer's size. */
-		drbd_set_my_capacity(device, p_size);
+		/*
+		 * I am diskless, need to accept the peer's *current* size.
+		 * I must NOT accept the peers backing disk size,
+		 * it may have been larger than mine all along...
+		 *
+		 * At this point, the peer knows more about my disk, or at
+		 * least about what we last agreed upon, than myself.
+		 * So if his c_size is less than his d_size, the most likely
+		 * reason is that *my* d_size was smaller last time we checked.
+		 *
+		 * However, if he sends a zero current size,
+		 * take his (user-capped or) backing disk size anyways.
+		 */
+		drbd_reconsider_max_bio_size(device, NULL);
+		drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
 	}
 
 	if (get_ldev(device)) {
@@ -4501,6 +4604,7 @@ static void drbdd(struct drbd_connection *connection)
 		struct data_cmd *cmd;
 
 		drbd_thread_current_set_cpu(&connection->receiver);
+		update_receiver_timing_details(connection, drbd_recv_header);
 		if (drbd_recv_header(connection, &pi))
 			goto err_out;
 
@@ -4519,12 +4623,14 @@ static void drbdd(struct drbd_connection *connection)
 		}
 
 		if (shs) {
+			update_receiver_timing_details(connection, drbd_recv_all_warn);
 			err = drbd_recv_all_warn(connection, pi.data, shs);
 			if (err)
 				goto err_out;
 			pi.size -= shs;
 		}
 
+		update_receiver_timing_details(connection, cmd->fn);
 		err = cmd->fn(connection, &pi);
 		if (err) {
 			drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 09803d0..c67717d 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -52,7 +52,7 @@ static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request
 static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req)
 {
 	int rw = bio_data_dir(req->master_bio);
-	unsigned long duration = jiffies - req->start_time;
+	unsigned long duration = jiffies - req->start_jif;
 	int cpu;
 	cpu = part_stat_lock();
 	part_stat_add(cpu, &device->vdisk->part0, ticks[rw], duration);
@@ -66,7 +66,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
 {
 	struct drbd_request *req;
 
-	req = mempool_alloc(drbd_request_mempool, GFP_NOIO);
+	req = mempool_alloc(drbd_request_mempool, GFP_NOIO | __GFP_ZERO);
 	if (!req)
 		return NULL;
 
@@ -84,6 +84,8 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
 
 	INIT_LIST_HEAD(&req->tl_requests);
 	INIT_LIST_HEAD(&req->w.list);
+	INIT_LIST_HEAD(&req->req_pending_master_completion);
+	INIT_LIST_HEAD(&req->req_pending_local);
 
 	/* one reference to be put by __drbd_make_request */
 	atomic_set(&req->completion_ref, 1);
@@ -92,6 +94,19 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
 	return req;
 }
 
+static void drbd_remove_request_interval(struct rb_root *root,
+					 struct drbd_request *req)
+{
+	struct drbd_device *device = req->device;
+	struct drbd_interval *i = &req->i;
+
+	drbd_remove_interval(root, i);
+
+	/* Wake up any processes waiting for this request to complete.  */
+	if (i->waiting)
+		wake_up(&device->misc_wait);
+}
+
 void drbd_req_destroy(struct kref *kref)
 {
 	struct drbd_request *req = container_of(kref, struct drbd_request, kref);
@@ -107,14 +122,30 @@ void drbd_req_destroy(struct kref *kref)
 		return;
 	}
 
-	/* remove it from the transfer log.
-	 * well, only if it had been there in the first
-	 * place... if it had not (local only or conflicting
-	 * and never sent), it should still be "empty" as
-	 * initialized in drbd_req_new(), so we can list_del() it
-	 * here unconditionally */
+	/* If called from mod_rq_state (expected normal case) or
+	 * drbd_send_and_submit (the less likely normal path), this holds the
+	 * req_lock, and req->tl_requests will typicaly be on ->transfer_log,
+	 * though it may be still empty (never added to the transfer log).
+	 *
+	 * If called from do_retry(), we do NOT hold the req_lock, but we are
+	 * still allowed to unconditionally list_del(&req->tl_requests),
+	 * because it will be on a local on-stack list only. */
 	list_del_init(&req->tl_requests);
 
+	/* finally remove the request from the conflict detection
+	 * respective block_id verification interval tree. */
+	if (!drbd_interval_empty(&req->i)) {
+		struct rb_root *root;
+
+		if (s & RQ_WRITE)
+			root = &device->write_requests;
+		else
+			root = &device->read_requests;
+		drbd_remove_request_interval(root, req);
+	} else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0)
+		drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n",
+			s, (unsigned long long)req->i.sector, req->i.size);
+
 	/* if it was a write, we may have to set the corresponding
 	 * bit(s) out-of-sync first. If it had a local part, we need to
 	 * release the reference to the activity log. */
@@ -188,19 +219,6 @@ void complete_master_bio(struct drbd_device *device,
 }
 
 
-static void drbd_remove_request_interval(struct rb_root *root,
-					 struct drbd_request *req)
-{
-	struct drbd_device *device = req->device;
-	struct drbd_interval *i = &req->i;
-
-	drbd_remove_interval(root, i);
-
-	/* Wake up any processes waiting for this request to complete.  */
-	if (i->waiting)
-		wake_up(&device->misc_wait);
-}
-
 /* Helper for __req_mod().
  * Set m->bio to the master bio, if it is fit to be completed,
  * or leave it alone (it is initialized to NULL in __req_mod),
@@ -254,18 +272,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
 	ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
 	error = PTR_ERR(req->private_bio);
 
-	/* remove the request from the conflict detection
-	 * respective block_id verification hash */
-	if (!drbd_interval_empty(&req->i)) {
-		struct rb_root *root;
-
-		if (rw == WRITE)
-			root = &device->write_requests;
-		else
-			root = &device->read_requests;
-		drbd_remove_request_interval(root, req);
-	}
-
 	/* Before we can signal completion to the upper layers,
 	 * we may need to close the current transfer log epoch.
 	 * We are within the request lock, so we can simply compare
@@ -301,9 +307,24 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
 		m->error = ok ? 0 : (error ?: -EIO);
 		m->bio = req->master_bio;
 		req->master_bio = NULL;
+		/* We leave it in the tree, to be able to verify later
+		 * write-acks in protocol != C during resync.
+		 * But we mark it as "complete", so it won't be counted as
+		 * conflict in a multi-primary setup. */
+		req->i.completed = true;
 	}
+
+	if (req->i.waiting)
+		wake_up(&device->misc_wait);
+
+	/* Either we are about to complete to upper layers,
+	 * or we will restart this request.
+	 * In either case, the request object will be destroyed soon,
+	 * so better remove it from all lists. */
+	list_del_init(&req->req_pending_master_completion);
 }
 
+/* still holds resource->req_lock */
 static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
 {
 	struct drbd_device *device = req->device;
@@ -324,12 +345,91 @@ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_
 	return 1;
 }
 
+static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+	if (!connection)
+		return;
+	if (connection->req_next == NULL)
+		connection->req_next = req;
+}
+
+static void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+	if (!connection)
+		return;
+	if (connection->req_next != req)
+		return;
+	list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
+		const unsigned s = req->rq_state;
+		if (s & RQ_NET_QUEUED)
+			break;
+	}
+	if (&req->tl_requests == &connection->transfer_log)
+		req = NULL;
+	connection->req_next = req;
+}
+
+static void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+	if (!connection)
+		return;
+	if (connection->req_ack_pending == NULL)
+		connection->req_ack_pending = req;
+}
+
+static void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+	if (!connection)
+		return;
+	if (connection->req_ack_pending != req)
+		return;
+	list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
+		const unsigned s = req->rq_state;
+		if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING))
+			break;
+	}
+	if (&req->tl_requests == &connection->transfer_log)
+		req = NULL;
+	connection->req_ack_pending = req;
+}
+
+static void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+	if (!connection)
+		return;
+	if (connection->req_not_net_done == NULL)
+		connection->req_not_net_done = req;
+}
+
+static void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
+{
+	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
+	if (!connection)
+		return;
+	if (connection->req_not_net_done != req)
+		return;
+	list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
+		const unsigned s = req->rq_state;
+		if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE))
+			break;
+	}
+	if (&req->tl_requests == &connection->transfer_log)
+		req = NULL;
+	connection->req_not_net_done = req;
+}
+
 /* I'd like this to be the only place that manipulates
  * req->completion_ref and req->kref. */
 static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
 		int clear, int set)
 {
 	struct drbd_device *device = req->device;
+	struct drbd_peer_device *peer_device = first_peer_device(device);
 	unsigned s = req->rq_state;
 	int c_put = 0;
 	int k_put = 0;
@@ -356,14 +456,23 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
 		atomic_inc(&req->completion_ref);
 	}
 
-	if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED))
+	if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) {
 		atomic_inc(&req->completion_ref);
+		set_if_null_req_next(peer_device, req);
+	}
 
 	if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
 		kref_get(&req->kref); /* wait for the DONE */
 
-	if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT))
-		atomic_add(req->i.size >> 9, &device->ap_in_flight);
+	if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
+		/* potentially already completed in the asender thread */
+		if (!(s & RQ_NET_DONE)) {
+			atomic_add(req->i.size >> 9, &device->ap_in_flight);
+			set_if_null_req_not_net_done(peer_device, req);
+		}
+		if (s & RQ_NET_PENDING)
+			set_if_null_req_ack_pending(peer_device, req);
+	}
 
 	if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
 		atomic_inc(&req->completion_ref);
@@ -386,20 +495,34 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
 			++k_put;
 		else
 			++c_put;
+		list_del_init(&req->req_pending_local);
 	}
 
 	if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) {
 		dec_ap_pending(device);
 		++c_put;
+		req->acked_jif = jiffies;
+		advance_conn_req_ack_pending(peer_device, req);
 	}
 
-	if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED))
+	if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) {
 		++c_put;
+		advance_conn_req_next(peer_device, req);
+	}
 
-	if ((s & RQ_EXP_BARR_ACK) && !(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
-		if (req->rq_state & RQ_NET_SENT)
+	if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
+		if (s & RQ_NET_SENT)
 			atomic_sub(req->i.size >> 9, &device->ap_in_flight);
-		++k_put;
+		if (s & RQ_EXP_BARR_ACK)
+			++k_put;
+		req->net_done_jif = jiffies;
+
+		/* in ahead/behind mode, or just in case,
+		 * before we finally destroy this request,
+		 * the caching pointers must not reference it anymore */
+		advance_conn_req_next(peer_device, req);
+		advance_conn_req_ack_pending(peer_device, req);
+		advance_conn_req_not_net_done(peer_device, req);
 	}
 
 	/* potentially complete and destroy */
@@ -439,6 +562,19 @@ static void drbd_report_io_error(struct drbd_device *device, struct drbd_request
 			bdevname(device->ldev->backing_bdev, b));
 }
 
+/* Helper for HANDED_OVER_TO_NETWORK.
+ * Is this a protocol A write (neither WRITE_ACK nor RECEIVE_ACK expected)?
+ * Is it also still "PENDING"?
+ * --> If so, clear PENDING and set NET_OK below.
+ * If it is a protocol A write, but not RQ_PENDING anymore, neg-ack was faster
+ * (and we must not set RQ_NET_OK) */
+static inline bool is_pending_write_protocol_A(struct drbd_request *req)
+{
+	return (req->rq_state &
+		   (RQ_WRITE|RQ_NET_PENDING|RQ_EXP_WRITE_ACK|RQ_EXP_RECEIVE_ACK))
+		== (RQ_WRITE|RQ_NET_PENDING);
+}
+
 /* obviously this could be coded as many single functions
  * instead of one huge switch,
  * or by putting the code directly in the respective locations
@@ -454,7 +590,9 @@ static void drbd_report_io_error(struct drbd_device *device, struct drbd_request
 int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		struct bio_and_error *m)
 {
-	struct drbd_device *device = req->device;
+	struct drbd_device *const device = req->device;
+	struct drbd_peer_device *const peer_device = first_peer_device(device);
+	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
 	struct net_conf *nc;
 	int p, rv = 0;
 
@@ -477,7 +615,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		 * and from w_read_retry_remote */
 		D_ASSERT(device, !(req->rq_state & RQ_NET_MASK));
 		rcu_read_lock();
-		nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+		nc = rcu_dereference(connection->net_conf);
 		p = nc->wire_protocol;
 		rcu_read_unlock();
 		req->rq_state |=
@@ -549,7 +687,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0);
 		mod_rq_state(req, m, 0, RQ_NET_QUEUED);
 		req->w.cb = w_send_read_req;
-		drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+		drbd_queue_work(&connection->sender_work,
 				&req->w);
 		break;
 
@@ -585,23 +723,23 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
 		mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK);
 		req->w.cb =  w_send_dblock;
-		drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+		drbd_queue_work(&connection->sender_work,
 				&req->w);
 
 		/* close the epoch, in case it outgrew the limit */
 		rcu_read_lock();
-		nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+		nc = rcu_dereference(connection->net_conf);
 		p = nc->max_epoch_size;
 		rcu_read_unlock();
-		if (first_peer_device(device)->connection->current_tle_writes >= p)
-			start_new_tl_epoch(first_peer_device(device)->connection);
+		if (connection->current_tle_writes >= p)
+			start_new_tl_epoch(connection);
 
 		break;
 
 	case QUEUE_FOR_SEND_OOS:
 		mod_rq_state(req, m, 0, RQ_NET_QUEUED);
 		req->w.cb =  w_send_out_of_sync;
-		drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+		drbd_queue_work(&connection->sender_work,
 				&req->w);
 		break;
 
@@ -615,18 +753,16 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 
 	case HANDED_OVER_TO_NETWORK:
 		/* assert something? */
-		if (bio_data_dir(req->master_bio) == WRITE &&
-		    !(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) {
+		if (is_pending_write_protocol_A(req))
 			/* this is what is dangerous about protocol A:
 			 * pretend it was successfully written on the peer. */
-			if (req->rq_state & RQ_NET_PENDING)
-				mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
-			/* else: neg-ack was faster... */
-			/* it is still not yet RQ_NET_DONE until the
-			 * corresponding epoch barrier got acked as well,
-			 * so we know what to dirty on connection loss */
-		}
-		mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
+			mod_rq_state(req, m, RQ_NET_QUEUED|RQ_NET_PENDING,
+						RQ_NET_SENT|RQ_NET_OK);
+		else
+			mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
+		/* It is still not yet RQ_NET_DONE until the
+		 * corresponding epoch barrier got acked as well,
+		 * so we know what to dirty on connection loss. */
 		break;
 
 	case OOS_HANDED_TO_NETWORK:
@@ -658,12 +794,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 	case WRITE_ACKED_BY_PEER_AND_SIS:
 		req->rq_state |= RQ_NET_SIS;
 	case WRITE_ACKED_BY_PEER:
-		D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK);
-		/* protocol C; successfully written on peer.
+		/* Normal operation protocol C: successfully written on peer.
+		 * During resync, even in protocol != C,
+		 * we requested an explicit write ack anyways.
+		 * Which means we cannot even assert anything here.
 		 * Nothing more to do here.
 		 * We want to keep the tl in place for all protocols, to cater
 		 * for volatile write-back caches on lower level devices. */
-
 		goto ack_common;
 	case RECV_ACKED_BY_PEER:
 		D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK);
@@ -671,7 +808,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		 * see also notes above in HANDED_OVER_TO_NETWORK about
 		 * protocol != C */
 	ack_common:
-		D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
 		mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
 		break;
 
@@ -714,7 +850,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 
 		get_ldev(device); /* always succeeds in this call path */
 		req->w.cb = w_restart_disk_io;
-		drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+		drbd_queue_work(&connection->sender_work,
 				&req->w);
 		break;
 
@@ -736,7 +872,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 
 			mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING);
 			if (req->w.cb) {
-				drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+				/* w.cb expected to be w_send_dblock, or w_send_read_req */
+				drbd_queue_work(&connection->sender_work,
 						&req->w);
 				rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
 			} /* else: FIXME can this happen? */
@@ -769,7 +906,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		break;
 
 	case QUEUE_AS_DRBD_BARRIER:
-		start_new_tl_epoch(first_peer_device(device)->connection);
+		start_new_tl_epoch(connection);
 		mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
 		break;
 	};
@@ -886,6 +1023,9 @@ static void maybe_pull_ahead(struct drbd_device *device)
 	    connection->agreed_pro_version < 96)
 		return;
 
+	if (on_congestion == OC_PULL_AHEAD && device->state.conn == C_AHEAD)
+		return; /* nothing to do ... */
+
 	/* If I don't even have good local storage, we can not reasonably try
 	 * to pull ahead of the peer. We also need the local reference to make
 	 * sure device->act_log is there.
@@ -1021,6 +1161,7 @@ drbd_submit_req_private_bio(struct drbd_request *req)
 	 * stable storage, and this is a WRITE, we may not even submit
 	 * this bio. */
 	if (get_ldev(device)) {
+		req->pre_submit_jif = jiffies;
 		if (drbd_insert_fault(device,
 				      rw == WRITE ? DRBD_FAULT_DT_WR
 				    : rw == READ  ? DRBD_FAULT_DT_RD
@@ -1035,10 +1176,14 @@ drbd_submit_req_private_bio(struct drbd_request *req)
 
 static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req)
 {
-	spin_lock(&device->submit.lock);
+	spin_lock_irq(&device->resource->req_lock);
 	list_add_tail(&req->tl_requests, &device->submit.writes);
-	spin_unlock(&device->submit.lock);
+	list_add_tail(&req->req_pending_master_completion,
+			&device->pending_master_completion[1 /* WRITE */]);
+	spin_unlock_irq(&device->resource->req_lock);
 	queue_work(device->submit.wq, &device->submit.worker);
+	/* do_submit() may sleep internally on al_wait, too */
+	wake_up(&device->al_wait);
 }
 
 /* returns the new drbd_request pointer, if the caller is expected to
@@ -1047,7 +1192,7 @@ static void drbd_queue_write(struct drbd_device *device, struct drbd_request *re
  * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request.
  */
 static struct drbd_request *
-drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_time)
+drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_jif)
 {
 	const int rw = bio_data_dir(bio);
 	struct drbd_request *req;
@@ -1062,7 +1207,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
 		bio_endio(bio, -ENOMEM);
 		return ERR_PTR(-ENOMEM);
 	}
-	req->start_time = start_time;
+	req->start_jif = start_jif;
 
 	if (!get_ldev(device)) {
 		bio_put(req->private_bio);
@@ -1075,10 +1220,12 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
 	if (rw == WRITE && req->private_bio && req->i.size
 	&& !test_bit(AL_SUSPENDED, &device->flags)) {
 		if (!drbd_al_begin_io_fastpath(device, &req->i)) {
+			atomic_inc(&device->ap_actlog_cnt);
 			drbd_queue_write(device, req);
 			return NULL;
 		}
 		req->rq_state |= RQ_IN_ACT_LOG;
+		req->in_actlog_jif = jiffies;
 	}
 
 	return req;
@@ -1086,11 +1233,13 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
 
 static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
 {
+	struct drbd_resource *resource = device->resource;
 	const int rw = bio_rw(req->master_bio);
 	struct bio_and_error m = { NULL, };
 	bool no_remote = false;
+	bool submit_private_bio = false;
 
-	spin_lock_irq(&device->resource->req_lock);
+	spin_lock_irq(&resource->req_lock);
 	if (rw == WRITE) {
 		/* This may temporarily give up the req_lock,
 		 * but will re-aquire it before it returns here.
@@ -1148,13 +1297,18 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
 			no_remote = true;
 	}
 
+	/* If it took the fast path in drbd_request_prepare, add it here.
+	 * The slow path has added it already. */
+	if (list_empty(&req->req_pending_master_completion))
+		list_add_tail(&req->req_pending_master_completion,
+			&device->pending_master_completion[rw == WRITE]);
 	if (req->private_bio) {
 		/* needs to be marked within the same spinlock */
+		list_add_tail(&req->req_pending_local,
+			&device->pending_completion[rw == WRITE]);
 		_req_mod(req, TO_BE_SUBMITTED);
 		/* but we need to give up the spinlock to submit */
-		spin_unlock_irq(&device->resource->req_lock);
-		drbd_submit_req_private_bio(req);
-		spin_lock_irq(&device->resource->req_lock);
+		submit_private_bio = true;
 	} else if (no_remote) {
 nodata:
 		if (__ratelimit(&drbd_ratelimit_state))
@@ -1167,15 +1321,23 @@ nodata:
 out:
 	if (drbd_req_put_completion_ref(req, &m, 1))
 		kref_put(&req->kref, drbd_req_destroy);
-	spin_unlock_irq(&device->resource->req_lock);
-
+	spin_unlock_irq(&resource->req_lock);
+
+	/* Even though above is a kref_put(), this is safe.
+	 * As long as we still need to submit our private bio,
+	 * we hold a completion ref, and the request cannot disappear.
+	 * If however this request did not even have a private bio to submit
+	 * (e.g. remote read), req may already be invalid now.
+	 * That's why we cannot check on req->private_bio. */
+	if (submit_private_bio)
+		drbd_submit_req_private_bio(req);
 	if (m.bio)
 		complete_master_bio(device, &m);
 }
 
-void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_time)
+void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_jif)
 {
-	struct drbd_request *req = drbd_request_prepare(device, bio, start_time);
+	struct drbd_request *req = drbd_request_prepare(device, bio, start_jif);
 	if (IS_ERR_OR_NULL(req))
 		return;
 	drbd_send_and_submit(device, req);
@@ -1194,6 +1356,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
 				continue;
 
 			req->rq_state |= RQ_IN_ACT_LOG;
+			req->in_actlog_jif = jiffies;
+			atomic_dec(&device->ap_actlog_cnt);
 		}
 
 		list_del_init(&req->tl_requests);
@@ -1203,7 +1367,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
 
 static bool prepare_al_transaction_nonblock(struct drbd_device *device,
 					    struct list_head *incoming,
-					    struct list_head *pending)
+					    struct list_head *pending,
+					    struct list_head *later)
 {
 	struct drbd_request *req, *tmp;
 	int wake = 0;
@@ -1212,45 +1377,105 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device,
 	spin_lock_irq(&device->al_lock);
 	list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
 		err = drbd_al_begin_io_nonblock(device, &req->i);
+		if (err == -ENOBUFS)
+			break;
 		if (err == -EBUSY)
 			wake = 1;
 		if (err)
-			continue;
-		req->rq_state |= RQ_IN_ACT_LOG;
-		list_move_tail(&req->tl_requests, pending);
+			list_move_tail(&req->tl_requests, later);
+		else
+			list_move_tail(&req->tl_requests, pending);
 	}
 	spin_unlock_irq(&device->al_lock);
 	if (wake)
 		wake_up(&device->al_wait);
-
 	return !list_empty(pending);
 }
 
+void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
+{
+	struct drbd_request *req, *tmp;
+
+	list_for_each_entry_safe(req, tmp, pending, tl_requests) {
+		req->rq_state |= RQ_IN_ACT_LOG;
+		req->in_actlog_jif = jiffies;
+		atomic_dec(&device->ap_actlog_cnt);
+		list_del_init(&req->tl_requests);
+		drbd_send_and_submit(device, req);
+	}
+}
+
 void do_submit(struct work_struct *ws)
 {
 	struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker);
-	LIST_HEAD(incoming);
-	LIST_HEAD(pending);
-	struct drbd_request *req, *tmp;
+	LIST_HEAD(incoming);	/* from drbd_make_request() */
+	LIST_HEAD(pending);	/* to be submitted after next AL-transaction commit */
+	LIST_HEAD(busy);	/* blocked by resync requests */
+
+	/* grab new incoming requests */
+	spin_lock_irq(&device->resource->req_lock);
+	list_splice_tail_init(&device->submit.writes, &incoming);
+	spin_unlock_irq(&device->resource->req_lock);
 
 	for (;;) {
-		spin_lock(&device->submit.lock);
-		list_splice_tail_init(&device->submit.writes, &incoming);
-		spin_unlock(&device->submit.lock);
+		DEFINE_WAIT(wait);
 
+		/* move used-to-be-busy back to front of incoming */
+		list_splice_init(&busy, &incoming);
 		submit_fast_path(device, &incoming);
 		if (list_empty(&incoming))
 			break;
 
-skip_fast_path:
-		wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending));
-		/* Maybe more was queued, while we prepared the transaction?
-		 * Try to stuff them into this transaction as well.
-		 * Be strictly non-blocking here, no wait_event, we already
-		 * have something to commit.
-		 * Stop if we don't make any more progres.
-		 */
 		for (;;) {
+			prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE);
+
+			list_splice_init(&busy, &incoming);
+			prepare_al_transaction_nonblock(device, &incoming, &pending, &busy);
+			if (!list_empty(&pending))
+				break;
+
+			schedule();
+
+			/* If all currently "hot" activity log extents are kept busy by
+			 * incoming requests, we still must not totally starve new
+			 * requests to "cold" extents.
+			 * Something left on &incoming means there had not been
+			 * enough update slots available, and the activity log
+			 * has been marked as "starving".
+			 *
+			 * Try again now, without looking for new requests,
+			 * effectively blocking all new requests until we made
+			 * at least _some_ progress with what we currently have.
+			 */
+			if (!list_empty(&incoming))
+				continue;
+
+			/* Nothing moved to pending, but nothing left
+			 * on incoming: all moved to busy!
+			 * Grab new and iterate. */
+			spin_lock_irq(&device->resource->req_lock);
+			list_splice_tail_init(&device->submit.writes, &incoming);
+			spin_unlock_irq(&device->resource->req_lock);
+		}
+		finish_wait(&device->al_wait, &wait);
+
+		/* If the transaction was full, before all incoming requests
+		 * had been processed, skip ahead to commit, and iterate
+		 * without splicing in more incoming requests from upper layers.
+		 *
+		 * Else, if all incoming have been processed,
+		 * they have become either "pending" (to be submitted after
+		 * next transaction commit) or "busy" (blocked by resync).
+		 *
+		 * Maybe more was queued, while we prepared the transaction?
+		 * Try to stuff those into this transaction as well.
+		 * Be strictly non-blocking here,
+		 * we already have something to commit.
+		 *
+		 * Commit if we don't make any more progres.
+		 */
+
+		while (list_empty(&incoming)) {
 			LIST_HEAD(more_pending);
 			LIST_HEAD(more_incoming);
 			bool made_progress;
@@ -1260,55 +1485,32 @@ skip_fast_path:
 			if (list_empty(&device->submit.writes))
 				break;
 
-			spin_lock(&device->submit.lock);
+			spin_lock_irq(&device->resource->req_lock);
 			list_splice_tail_init(&device->submit.writes, &more_incoming);
-			spin_unlock(&device->submit.lock);
+			spin_unlock_irq(&device->resource->req_lock);
 
 			if (list_empty(&more_incoming))
 				break;
 
-			made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending);
+			made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy);
 
 			list_splice_tail_init(&more_pending, &pending);
 			list_splice_tail_init(&more_incoming, &incoming);
-
 			if (!made_progress)
 				break;
 		}
-		drbd_al_begin_io_commit(device, false);
-
-		list_for_each_entry_safe(req, tmp, &pending, tl_requests) {
-			list_del_init(&req->tl_requests);
-			drbd_send_and_submit(device, req);
-		}
 
-		/* If all currently hot activity log extents are kept busy by
-		 * incoming requests, we still must not totally starve new
-		 * requests to cold extents. In that case, prepare one request
-		 * in blocking mode. */
-		list_for_each_entry_safe(req, tmp, &incoming, tl_requests) {
-			list_del_init(&req->tl_requests);
-			req->rq_state |= RQ_IN_ACT_LOG;
-			if (!drbd_al_begin_io_prepare(device, &req->i)) {
-				/* Corresponding extent was hot after all? */
-				drbd_send_and_submit(device, req);
-			} else {
-				/* Found a request to a cold extent.
-				 * Put on "pending" list,
-				 * and try to cumulate with more. */
-				list_add(&req->tl_requests, &pending);
-				goto skip_fast_path;
-			}
-		}
+		drbd_al_begin_io_commit(device);
+		send_and_submit_pending(device, &pending);
 	}
 }
 
 void drbd_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct drbd_device *device = (struct drbd_device *) q->queuedata;
-	unsigned long start_time;
+	unsigned long start_jif;
 
-	start_time = jiffies;
+	start_jif = jiffies;
 
 	/*
 	 * what we "blindly" assume:
@@ -1316,7 +1518,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
 	D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512));
 
 	inc_ap_bio(device);
-	__drbd_make_request(device, bio, start_time);
+	__drbd_make_request(device, bio, start_jif);
 }
 
 /* This is called by bio_add_page().
@@ -1353,36 +1555,13 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
 	return limit;
 }
 
-static void find_oldest_requests(
-		struct drbd_connection *connection,
-		struct drbd_device *device,
-		struct drbd_request **oldest_req_waiting_for_peer,
-		struct drbd_request **oldest_req_waiting_for_disk)
-{
-	struct drbd_request *r;
-	*oldest_req_waiting_for_peer = NULL;
-	*oldest_req_waiting_for_disk = NULL;
-	list_for_each_entry(r, &connection->transfer_log, tl_requests) {
-		const unsigned s = r->rq_state;
-		if (!*oldest_req_waiting_for_peer
-		&& ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE)))
-			*oldest_req_waiting_for_peer = r;
-
-		if (!*oldest_req_waiting_for_disk
-		&& (s & RQ_LOCAL_PENDING) && r->device == device)
-			*oldest_req_waiting_for_disk = r;
-
-		if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk)
-			break;
-	}
-}
-
 void request_timer_fn(unsigned long data)
 {
 	struct drbd_device *device = (struct drbd_device *) data;
 	struct drbd_connection *connection = first_peer_device(device)->connection;
-	struct drbd_request *req_disk, *req_peer; /* oldest request */
+	struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */
 	struct net_conf *nc;
+	unsigned long oldest_submit_jif;
 	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
 	unsigned long now;
 
@@ -1403,14 +1582,31 @@ void request_timer_fn(unsigned long data)
 		return; /* Recurring timer stopped */
 
 	now = jiffies;
+	nt = now + et;
 
 	spin_lock_irq(&device->resource->req_lock);
-	find_oldest_requests(connection, device, &req_peer, &req_disk);
-	if (req_peer == NULL && req_disk == NULL) {
-		spin_unlock_irq(&device->resource->req_lock);
-		mod_timer(&device->request_timer, now + et);
-		return;
-	}
+	req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
+	req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
+	req_peer = connection->req_not_net_done;
+	/* maybe the oldest request waiting for the peer is in fact still
+	 * blocking in tcp sendmsg */
+	if (!req_peer && connection->req_next && connection->req_next->pre_send_jif)
+		req_peer = connection->req_next;
+
+	/* evaluate the oldest peer request only in one timer! */
+	if (req_peer && req_peer->device != device)
+		req_peer = NULL;
+
+	/* do we have something to evaluate? */
+	if (req_peer == NULL && req_write == NULL && req_read == NULL)
+		goto out;
+
+	oldest_submit_jif =
+		(req_write && req_read)
+		? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif)
+		  ? req_write->pre_submit_jif : req_read->pre_submit_jif )
+		: req_write ? req_write->pre_submit_jif
+		: req_read ? req_read->pre_submit_jif : now;
 
 	/* The request is considered timed out, if
 	 * - we have some effective timeout from the configuration,
@@ -1429,13 +1625,13 @@ void request_timer_fn(unsigned long data)
 	 * to expire twice (worst case) to become effective. Good enough.
 	 */
 	if (ent && req_peer &&
-		 time_after(now, req_peer->start_time + ent) &&
+		 time_after(now, req_peer->pre_send_jif + ent) &&
 		!time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
 		drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
 		_drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
 	}
-	if (dt && req_disk &&
-		 time_after(now, req_disk->start_time + dt) &&
+	if (dt && oldest_submit_jif != now &&
+		 time_after(now, oldest_submit_jif + dt) &&
 		!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
 		drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
 		__drbd_chk_io_error(device, DRBD_FORCE_DETACH);
@@ -1443,11 +1639,12 @@ void request_timer_fn(unsigned long data)
 
 	/* Reschedule timer for the nearest not already expired timeout.
 	 * Fallback to now + min(effective network timeout, disk timeout). */
-	ent = (ent && req_peer && time_before(now, req_peer->start_time + ent))
-		? req_peer->start_time + ent : now + et;
-	dt = (dt && req_disk && time_before(now, req_disk->start_time + dt))
-		? req_disk->start_time + dt : now + et;
+	ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent))
+		? req_peer->pre_send_jif + ent : now + et;
+	dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt))
+		? oldest_submit_jif + dt : now + et;
 	nt = time_before(ent, dt) ? ent : dt;
+out:
 	spin_unlock_irq(&connection->resource->req_lock);
 	mod_timer(&device->request_timer, nt);
 }
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 8566cd5..9f6a040 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -288,6 +288,7 @@ extern void complete_master_bio(struct drbd_device *device,
 extern void request_timer_fn(unsigned long data);
 extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
 extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
+extern void tl_abort_disk_io(struct drbd_device *device);
 
 /* this is in drbd_main.c */
 extern void drbd_restart_request(struct drbd_request *req);
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index a5d8aae..c35c0f0 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -410,7 +410,7 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask,
 	return rv;
 }
 
-static void print_st(struct drbd_device *device, char *name, union drbd_state ns)
+static void print_st(struct drbd_device *device, const char *name, union drbd_state ns)
 {
 	drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
 	    name,
@@ -952,11 +952,12 @@ enum drbd_state_rv
 __drbd_set_state(struct drbd_device *device, union drbd_state ns,
 	         enum chg_state_flags flags, struct completion *done)
 {
+	struct drbd_peer_device *peer_device = first_peer_device(device);
+	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
 	union drbd_state os;
 	enum drbd_state_rv rv = SS_SUCCESS;
 	enum sanitize_state_warnings ssw;
 	struct after_state_chg_work *ascw;
-	bool did_remote, should_do_remote;
 
 	os = drbd_read_state(device);
 
@@ -978,9 +979,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
 			   this happen...*/
 
 			if (is_valid_state(device, os) == rv)
-				rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection);
+				rv = is_valid_soft_transition(os, ns, connection);
 		} else
-			rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection);
+			rv = is_valid_soft_transition(os, ns, connection);
 	}
 
 	if (rv < SS_SUCCESS) {
@@ -997,7 +998,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
 	   sanitize_state(). Only display it here if we where not called from
 	   _conn_request_state() */
 	if (!(flags & CS_DC_SUSP))
-		conn_pr_state_change(first_peer_device(device)->connection, os, ns,
+		conn_pr_state_change(connection, os, ns,
 				     (flags & ~CS_DC_MASK) | CS_DC_SUSP);
 
 	/* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
@@ -1008,28 +1009,35 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
 	    (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
 		atomic_inc(&device->local_cnt);
 
-	did_remote = drbd_should_do_remote(device->state);
+	if (!is_sync_state(os.conn) && is_sync_state(ns.conn))
+		clear_bit(RS_DONE, &device->flags);
+
+	/* changes to local_cnt and device flags should be visible before
+	 * changes to state, which again should be visible before anything else
+	 * depending on that change happens. */
+	smp_wmb();
 	device->state.i = ns.i;
-	should_do_remote = drbd_should_do_remote(device->state);
 	device->resource->susp = ns.susp;
 	device->resource->susp_nod = ns.susp_nod;
 	device->resource->susp_fen = ns.susp_fen;
+	smp_wmb();
 
 	/* put replicated vs not-replicated requests in seperate epochs */
-	if (did_remote != should_do_remote)
-		start_new_tl_epoch(first_peer_device(device)->connection);
+	if (drbd_should_do_remote((union drbd_dev_state)os.i) !=
+	    drbd_should_do_remote((union drbd_dev_state)ns.i))
+		start_new_tl_epoch(connection);
 
 	if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
 		drbd_print_uuids(device, "attached to UUIDs");
 
 	/* Wake up role changes, that were delayed because of connection establishing */
 	if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS &&
-	    no_peer_wf_report_params(first_peer_device(device)->connection))
-		clear_bit(STATE_SENT, &first_peer_device(device)->connection->flags);
+	    no_peer_wf_report_params(connection))
+		clear_bit(STATE_SENT, &connection->flags);
 
 	wake_up(&device->misc_wait);
 	wake_up(&device->state_wait);
-	wake_up(&first_peer_device(device)->connection->ping_wait);
+	wake_up(&connection->ping_wait);
 
 	/* Aborted verify run, or we reached the stop sector.
 	 * Log the last position, unless end-of-device. */
@@ -1118,21 +1126,21 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
 
 	/* Receiver should clean up itself */
 	if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
-		drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver);
+		drbd_thread_stop_nowait(&connection->receiver);
 
 	/* Now the receiver finished cleaning up itself, it should die */
 	if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
-		drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver);
+		drbd_thread_stop_nowait(&connection->receiver);
 
 	/* Upon network failure, we need to restart the receiver. */
 	if (os.conn > C_WF_CONNECTION &&
 	    ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
-		drbd_thread_restart_nowait(&first_peer_device(device)->connection->receiver);
+		drbd_thread_restart_nowait(&connection->receiver);
 
 	/* Resume AL writing if we get a connection */
 	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
 		drbd_resume_al(device);
-		first_peer_device(device)->connection->connect_cnt++;
+		connection->connect_cnt++;
 	}
 
 	/* remember last attach time so request_timer_fn() won't
@@ -1150,7 +1158,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
 		ascw->w.cb = w_after_state_ch;
 		ascw->device = device;
 		ascw->done = done;
-		drbd_queue_work(&first_peer_device(device)->connection->sender_work,
+		drbd_queue_work(&connection->sender_work,
 				&ascw->w);
 	} else {
 		drbd_err(device, "Could not kmalloc an ascw\n");
@@ -1222,13 +1230,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 			   union drbd_state ns, enum chg_state_flags flags)
 {
 	struct drbd_resource *resource = device->resource;
+	struct drbd_peer_device *peer_device = first_peer_device(device);
+	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
 	struct sib_info sib;
 
 	sib.sib_reason = SIB_STATE_CHANGE;
 	sib.os = os;
 	sib.ns = ns;
 
-	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
+	if ((os.disk != D_UP_TO_DATE || os.pdsk != D_UP_TO_DATE)
+	&&  (ns.disk == D_UP_TO_DATE && ns.pdsk == D_UP_TO_DATE)) {
 		clear_bit(CRASHED_PRIMARY, &device->flags);
 		if (device->p_uuid)
 			device->p_uuid[UI_FLAGS] &= ~((u64)2);
@@ -1245,7 +1256,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 	   state change. This function might sleep */
 
 	if (ns.susp_nod) {
-		struct drbd_connection *connection = first_peer_device(device)->connection;
 		enum drbd_req_event what = NOTHING;
 
 		spin_lock_irq(&device->resource->req_lock);
@@ -1267,8 +1277,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 	}
 
 	if (ns.susp_fen) {
-		struct drbd_connection *connection = first_peer_device(device)->connection;
-
 		spin_lock_irq(&device->resource->req_lock);
 		if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) {
 			/* case2: The connection was established again: */
@@ -1294,8 +1302,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 	 * which is unexpected. */
 	if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
 	    (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
-	    first_peer_device(device)->connection->agreed_pro_version >= 96 && get_ldev(device)) {
-		drbd_gen_and_send_sync_uuid(first_peer_device(device));
+	    connection->agreed_pro_version >= 96 && get_ldev(device)) {
+		drbd_gen_and_send_sync_uuid(peer_device);
 		put_ldev(device);
 	}
 
@@ -1309,8 +1317,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 		atomic_set(&device->rs_pending_cnt, 0);
 		drbd_rs_cancel_all(device);
 
-		drbd_send_uuids(first_peer_device(device));
-		drbd_send_state(first_peer_device(device), ns);
+		drbd_send_uuids(peer_device);
+		drbd_send_state(peer_device, ns);
 	}
 	/* No point in queuing send_bitmap if we don't have a connection
 	 * anymore, so check also the _current_ state, not only the new state
@@ -1335,7 +1343,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 					set_bit(NEW_CUR_UUID, &device->flags);
 				} else {
 					drbd_uuid_new_current(device);
-					drbd_send_uuids(first_peer_device(device));
+					drbd_send_uuids(peer_device);
 				}
 			}
 			put_ldev(device);
@@ -1346,7 +1354,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 		if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
 		    device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
 			drbd_uuid_new_current(device);
-			drbd_send_uuids(first_peer_device(device));
+			drbd_send_uuids(peer_device);
 		}
 		/* D_DISKLESS Peer becomes secondary */
 		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
@@ -1373,16 +1381,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 	/* Last part of the attaching process ... */
 	if (ns.conn >= C_CONNECTED &&
 	    os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
-		drbd_send_sizes(first_peer_device(device), 0, 0);  /* to start sync... */
-		drbd_send_uuids(first_peer_device(device));
-		drbd_send_state(first_peer_device(device), ns);
+		drbd_send_sizes(peer_device, 0, 0);  /* to start sync... */
+		drbd_send_uuids(peer_device);
+		drbd_send_state(peer_device, ns);
 	}
 
 	/* We want to pause/continue resync, tell peer. */
 	if (ns.conn >= C_CONNECTED &&
 	     ((os.aftr_isp != ns.aftr_isp) ||
 	      (os.user_isp != ns.user_isp)))
-		drbd_send_state(first_peer_device(device), ns);
+		drbd_send_state(peer_device, ns);
 
 	/* In case one of the isp bits got set, suspend other devices. */
 	if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
@@ -1392,10 +1400,10 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 	/* Make sure the peer gets informed about eventual state
 	   changes (ISP bits) while we were in WFReportParams. */
 	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
-		drbd_send_state(first_peer_device(device), ns);
+		drbd_send_state(peer_device, ns);
 
 	if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
-		drbd_send_state(first_peer_device(device), ns);
+		drbd_send_state(peer_device, ns);
 
 	/* We are in the progress to start a full sync... */
 	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
@@ -1449,7 +1457,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 					drbd_disk_str(device->state.disk));
 
 			if (ns.conn >= C_CONNECTED)
-				drbd_send_state(first_peer_device(device), ns);
+				drbd_send_state(peer_device, ns);
 
 			drbd_rs_cancel_all(device);
 
@@ -1473,7 +1481,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 				 drbd_disk_str(device->state.disk));
 
 		if (ns.conn >= C_CONNECTED)
-			drbd_send_state(first_peer_device(device), ns);
+			drbd_send_state(peer_device, ns);
 		/* corresponding get_ldev in __drbd_set_state
 		 * this may finally trigger drbd_ldev_destroy. */
 		put_ldev(device);
@@ -1481,7 +1489,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 
 	/* Notify peer that I had a local IO error, and did not detached.. */
 	if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
-		drbd_send_state(first_peer_device(device), ns);
+		drbd_send_state(peer_device, ns);
 
 	/* Disks got bigger while they were detached */
 	if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
@@ -1499,14 +1507,14 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
 	/* sync target done with resync.  Explicitly notify peer, even though
 	 * it should (at least for non-empty resyncs) already know itself. */
 	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
-		drbd_send_state(first_peer_device(device), ns);
+		drbd_send_state(peer_device, ns);
 
 	/* Verify finished, or reached stop sector.  Peer did not know about
 	 * the stop sector, and we may even have changed the stop sector during
 	 * verify to interrupt/stop early.  Send the new state. */
 	if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
 	&& verify_can_do_stop_sector(device))
-		drbd_send_state(first_peer_device(device), ns);
+		drbd_send_state(peer_device, ns);
 
 	/* This triggers bitmap writeout of potentially still unwritten pages
 	 * if the resync finished cleanly, or aborted because of peer disk
@@ -1563,7 +1571,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
 		old_conf = connection->net_conf;
 		connection->my_addr_len = 0;
 		connection->peer_addr_len = 0;
-		rcu_assign_pointer(connection->net_conf, NULL);
+		RCU_INIT_POINTER(connection->net_conf, NULL);
 		conn_free_crypto(connection);
 		mutex_unlock(&connection->resource->conf_update);
 
@@ -1599,7 +1607,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
 	return 0;
 }
 
-void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf)
+static void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf)
 {
 	enum chg_state_flags flags = ~0;
 	struct drbd_peer_device *peer_device;
@@ -1688,7 +1696,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma
 	return rv;
 }
 
-void
+static void
 conn_set_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
 	       union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags)
 {
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index d8f57b6..50776b3 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -67,13 +67,10 @@ rwlock_t global_state_lock;
  */
 void drbd_md_io_complete(struct bio *bio, int error)
 {
-	struct drbd_md_io *md_io;
 	struct drbd_device *device;
 
-	md_io = (struct drbd_md_io *)bio->bi_private;
-	device = container_of(md_io, struct drbd_device, md_io);
-
-	md_io->error = error;
+	device = bio->bi_private;
+	device->md_io.error = error;
 
 	/* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
 	 * to timeout on the lower level device, and eventually detach from it.
@@ -87,7 +84,7 @@ void drbd_md_io_complete(struct bio *bio, int error)
 	 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
 	 */
 	drbd_md_put_buffer(device);
-	md_io->done = 1;
+	device->md_io.done = 1;
 	wake_up(&device->misc_wait);
 	bio_put(bio);
 	if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
@@ -135,6 +132,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
 	i = peer_req->i;
 	do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
 	block_id = peer_req->block_id;
+	peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
 
 	spin_lock_irqsave(&device->resource->req_lock, flags);
 	device->writ_cnt += peer_req->i.size >> 9;
@@ -398,9 +396,6 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
 	if (!get_ldev(device))
 		return -EIO;
 
-	if (drbd_rs_should_slow_down(device, sector))
-		goto defer;
-
 	/* GFP_TRY, because if there is no memory available right now, this may
 	 * be rescheduled for later. It is "only" background resync, after all. */
 	peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
@@ -410,7 +405,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
 
 	peer_req->w.cb = w_e_send_csum;
 	spin_lock_irq(&device->resource->req_lock);
-	list_add(&peer_req->w.list, &device->read_ee);
+	list_add_tail(&peer_req->w.list, &device->read_ee);
 	spin_unlock_irq(&device->resource->req_lock);
 
 	atomic_add(size >> 9, &device->rs_sect_ev);
@@ -452,9 +447,9 @@ void resync_timer_fn(unsigned long data)
 {
 	struct drbd_device *device = (struct drbd_device *) data;
 
-	if (list_empty(&device->resync_work.list))
-		drbd_queue_work(&first_peer_device(device)->connection->sender_work,
-				&device->resync_work);
+	drbd_queue_work_if_unqueued(
+		&first_peer_device(device)->connection->sender_work,
+		&device->resync_work);
 }
 
 static void fifo_set(struct fifo_buffer *fb, int value)
@@ -504,9 +499,9 @@ struct fifo_buffer *fifo_alloc(int fifo_size)
 static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
 {
 	struct disk_conf *dc;
-	unsigned int want;     /* The number of sectors we want in the proxy */
+	unsigned int want;     /* The number of sectors we want in-flight */
 	int req_sect; /* Number of sectors to request in this turn */
-	int correction; /* Number of sectors more we need in the proxy*/
+	int correction; /* Number of sectors more we need in-flight */
 	int cps; /* correction per invocation of drbd_rs_controller() */
 	int steps; /* Number of time steps to plan ahead */
 	int curr_corr;
@@ -577,20 +572,27 @@ static int drbd_rs_number_requests(struct drbd_device *device)
 	 * potentially causing a distributed deadlock on congestion during
 	 * online-verify or (checksum-based) resync, if max-buffers,
 	 * socket buffer sizes and resync rate settings are mis-configured. */
-	if (mxb - device->rs_in_flight < number)
-		number = mxb - device->rs_in_flight;
+
+	/* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
+	 * mxb (as used here, and in drbd_alloc_pages on the peer) is
+	 * "number of pages" (typically also 4k),
+	 * but "rs_in_flight" is in "sectors" (512 Byte). */
+	if (mxb - device->rs_in_flight/8 < number)
+		number = mxb - device->rs_in_flight/8;
 
 	return number;
 }
 
-static int make_resync_request(struct drbd_device *device, int cancel)
+static int make_resync_request(struct drbd_device *const device, int cancel)
 {
+	struct drbd_peer_device *const peer_device = first_peer_device(device);
+	struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
 	unsigned long bit;
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(device->this_bdev);
 	int max_bio_size;
 	int number, rollback_i, size;
-	int align, queued, sndbuf;
+	int align, requeue = 0;
 	int i = 0;
 
 	if (unlikely(cancel))
@@ -617,17 +619,22 @@ static int make_resync_request(struct drbd_device *device, int cancel)
 		goto requeue;
 
 	for (i = 0; i < number; i++) {
-		/* Stop generating RS requests, when half of the send buffer is filled */
-		mutex_lock(&first_peer_device(device)->connection->data.mutex);
-		if (first_peer_device(device)->connection->data.socket) {
-			queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued;
-			sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf;
-		} else {
-			queued = 1;
-			sndbuf = 0;
-		}
-		mutex_unlock(&first_peer_device(device)->connection->data.mutex);
-		if (queued > sndbuf / 2)
+		/* Stop generating RS requests when half of the send buffer is filled,
+		 * but notify TCP that we'd like to have more space. */
+		mutex_lock(&connection->data.mutex);
+		if (connection->data.socket) {
+			struct sock *sk = connection->data.socket->sk;
+			int queued = sk->sk_wmem_queued;
+			int sndbuf = sk->sk_sndbuf;
+			if (queued > sndbuf / 2) {
+				requeue = 1;
+				if (sk->sk_socket)
+					set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+			}
+		} else
+			requeue = 1;
+		mutex_unlock(&connection->data.mutex);
+		if (requeue)
 			goto requeue;
 
 next_sector:
@@ -642,8 +649,7 @@ next_sector:
 
 		sector = BM_BIT_TO_SECT(bit);
 
-		if (drbd_rs_should_slow_down(device, sector) ||
-		    drbd_try_rs_begin_io(device, sector)) {
+		if (drbd_try_rs_begin_io(device, sector)) {
 			device->bm_resync_fo = bit;
 			goto requeue;
 		}
@@ -696,9 +702,9 @@ next_sector:
 		/* adjust very last sectors, in case we are oddly sized */
 		if (sector + (size>>9) > capacity)
 			size = (capacity-sector)<<9;
-		if (first_peer_device(device)->connection->agreed_pro_version >= 89 &&
-		    first_peer_device(device)->connection->csums_tfm) {
-			switch (read_for_csum(first_peer_device(device), sector, size)) {
+
+		if (device->use_csums) {
+			switch (read_for_csum(peer_device, sector, size)) {
 			case -EIO: /* Disk failure */
 				put_ldev(device);
 				return -EIO;
@@ -717,7 +723,7 @@ next_sector:
 			int err;
 
 			inc_rs_pending(device);
-			err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST,
+			err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST,
 						 sector, size, ID_SYNCER);
 			if (err) {
 				drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
@@ -774,8 +780,7 @@ static int make_ov_request(struct drbd_device *device, int cancel)
 
 		size = BM_BLOCK_SIZE;
 
-		if (drbd_rs_should_slow_down(device, sector) ||
-		    drbd_try_rs_begin_io(device, sector)) {
+		if (drbd_try_rs_begin_io(device, sector)) {
 			device->ov_position = sector;
 			goto requeue;
 		}
@@ -911,7 +916,7 @@ int drbd_resync_finished(struct drbd_device *device)
 		if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
 			khelper_cmd = "after-resync-target";
 
-		if (first_peer_device(device)->connection->csums_tfm && device->rs_total) {
+		if (device->use_csums && device->rs_total) {
 			const unsigned long s = device->rs_same_csum;
 			const unsigned long t = device->rs_total;
 			const int ratio =
@@ -1351,13 +1356,15 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
 {
 	struct drbd_request *req = container_of(w, struct drbd_request, w);
 	struct drbd_device *device = req->device;
-	struct drbd_connection *connection = first_peer_device(device)->connection;
+	struct drbd_peer_device *const peer_device = first_peer_device(device);
+	struct drbd_connection *const connection = peer_device->connection;
 	int err;
 
 	if (unlikely(cancel)) {
 		req_mod(req, SEND_CANCELED);
 		return 0;
 	}
+	req->pre_send_jif = jiffies;
 
 	/* this time, no connection->send.current_epoch_writes++;
 	 * If it was sent, it was the closing barrier for the last
@@ -1365,7 +1372,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
 	 * No more barriers will be sent, until we leave AHEAD mode again. */
 	maybe_send_barrier(connection, req->epoch);
 
-	err = drbd_send_out_of_sync(first_peer_device(device), req);
+	err = drbd_send_out_of_sync(peer_device, req);
 	req_mod(req, OOS_HANDED_TO_NETWORK);
 
 	return err;
@@ -1380,19 +1387,21 @@ int w_send_dblock(struct drbd_work *w, int cancel)
 {
 	struct drbd_request *req = container_of(w, struct drbd_request, w);
 	struct drbd_device *device = req->device;
-	struct drbd_connection *connection = first_peer_device(device)->connection;
+	struct drbd_peer_device *const peer_device = first_peer_device(device);
+	struct drbd_connection *connection = peer_device->connection;
 	int err;
 
 	if (unlikely(cancel)) {
 		req_mod(req, SEND_CANCELED);
 		return 0;
 	}
+	req->pre_send_jif = jiffies;
 
 	re_init_if_first_write(connection, req->epoch);
 	maybe_send_barrier(connection, req->epoch);
 	connection->send.current_epoch_writes++;
 
-	err = drbd_send_dblock(first_peer_device(device), req);
+	err = drbd_send_dblock(peer_device, req);
 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
 
 	return err;
@@ -1407,19 +1416,21 @@ int w_send_read_req(struct drbd_work *w, int cancel)
 {
 	struct drbd_request *req = container_of(w, struct drbd_request, w);
 	struct drbd_device *device = req->device;
-	struct drbd_connection *connection = first_peer_device(device)->connection;
+	struct drbd_peer_device *const peer_device = first_peer_device(device);
+	struct drbd_connection *connection = peer_device->connection;
 	int err;
 
 	if (unlikely(cancel)) {
 		req_mod(req, SEND_CANCELED);
 		return 0;
 	}
+	req->pre_send_jif = jiffies;
 
 	/* Even read requests may close a write epoch,
 	 * if there was any yet. */
 	maybe_send_barrier(connection, req->epoch);
 
-	err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size,
+	err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
 				 (unsigned long)req);
 
 	req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
@@ -1433,7 +1444,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
 	struct drbd_device *device = req->device;
 
 	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
-		drbd_al_begin_io(device, &req->i, false);
+		drbd_al_begin_io(device, &req->i);
 
 	drbd_req_make_private_bio(req, req->master_bio);
 	req->private_bio->bi_bdev = device->ldev->backing_bdev;
@@ -1601,26 +1612,32 @@ void drbd_rs_controller_reset(struct drbd_device *device)
 void start_resync_timer_fn(unsigned long data)
 {
 	struct drbd_device *device = (struct drbd_device *) data;
-
-	drbd_queue_work(&first_peer_device(device)->connection->sender_work,
-			&device->start_resync_work);
+	drbd_device_post_work(device, RS_START);
 }
 
-int w_start_resync(struct drbd_work *w, int cancel)
+static void do_start_resync(struct drbd_device *device)
 {
-	struct drbd_device *device =
-		container_of(w, struct drbd_device, start_resync_work);
-
 	if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
-		drbd_warn(device, "w_start_resync later...\n");
+		drbd_warn(device, "postponing start_resync ...\n");
 		device->start_resync_timer.expires = jiffies + HZ/10;
 		add_timer(&device->start_resync_timer);
-		return 0;
+		return;
 	}
 
 	drbd_start_resync(device, C_SYNC_SOURCE);
 	clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
-	return 0;
+}
+
+static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
+{
+	bool csums_after_crash_only;
+	rcu_read_lock();
+	csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
+	rcu_read_unlock();
+	return connection->agreed_pro_version >= 89 &&		/* supported? */
+		connection->csums_tfm &&			/* configured? */
+		(csums_after_crash_only == 0			/* use for each resync? */
+		 || test_bit(CRASHED_PRIMARY, &device->flags));	/* or only after Primary crash? */
 }
 
 /**
@@ -1633,6 +1650,8 @@ int w_start_resync(struct drbd_work *w, int cancel)
  */
 void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 {
+	struct drbd_peer_device *peer_device = first_peer_device(device);
+	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
 	union drbd_state ns;
 	int r;
 
@@ -1651,7 +1670,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 			if (r > 0) {
 				drbd_info(device, "before-resync-target handler returned %d, "
 					 "dropping connection.\n", r);
-				conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD);
+				conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
 				return;
 			}
 		} else /* C_SYNC_SOURCE */ {
@@ -1664,7 +1683,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 				} else {
 					drbd_info(device, "before-resync-source handler returned %d, "
 						 "dropping connection.\n", r);
-					conn_request_state(first_peer_device(device)->connection,
+					conn_request_state(connection,
 							   NS(conn, C_DISCONNECTING), CS_HARD);
 					return;
 				}
@@ -1672,7 +1691,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 		}
 	}
 
-	if (current == first_peer_device(device)->connection->worker.task) {
+	if (current == connection->worker.task) {
 		/* The worker should not sleep waiting for state_mutex,
 		   that can take long */
 		if (!mutex_trylock(device->state_mutex)) {
@@ -1733,11 +1752,20 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 			device->rs_mark_time[i] = now;
 		}
 		_drbd_pause_after(device);
+		/* Forget potentially stale cached per resync extent bit-counts.
+		 * Open coded drbd_rs_cancel_all(device), we already have IRQs
+		 * disabled, and know the disk state is ok. */
+		spin_lock(&device->al_lock);
+		lc_reset(device->resync);
+		device->resync_locked = 0;
+		device->resync_wenr = LC_FREE;
+		spin_unlock(&device->al_lock);
 	}
 	write_unlock(&global_state_lock);
 	spin_unlock_irq(&device->resource->req_lock);
 
 	if (r == SS_SUCCESS) {
+		wake_up(&device->al_wait); /* for lc_reset() above */
 		/* reset rs_last_bcast when a resync or verify is started,
 		 * to deal with potential jiffies wrap. */
 		device->rs_last_bcast = jiffies - HZ;
@@ -1746,8 +1774,12 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 		     drbd_conn_str(ns.conn),
 		     (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
 		     (unsigned long) device->rs_total);
-		if (side == C_SYNC_TARGET)
+		if (side == C_SYNC_TARGET) {
 			device->bm_resync_fo = 0;
+			device->use_csums = use_checksum_based_resync(connection, device);
+		} else {
+			device->use_csums = 0;
+		}
 
 		/* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
 		 * with w_send_oos, or the sync target will get confused as to
@@ -1756,12 +1788,10 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 		 * drbd_resync_finished from here in that case.
 		 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
 		 * and from after_state_ch otherwise. */
-		if (side == C_SYNC_SOURCE &&
-		    first_peer_device(device)->connection->agreed_pro_version < 96)
-			drbd_gen_and_send_sync_uuid(first_peer_device(device));
+		if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
+			drbd_gen_and_send_sync_uuid(peer_device);
 
-		if (first_peer_device(device)->connection->agreed_pro_version < 95 &&
-		    device->rs_total == 0) {
+		if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
 			/* This still has a race (about when exactly the peers
 			 * detect connection loss) that can lead to a full sync
 			 * on next handshake. In 8.3.9 we fixed this with explicit
@@ -1777,7 +1807,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 				int timeo;
 
 				rcu_read_lock();
-				nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
+				nc = rcu_dereference(connection->net_conf);
 				timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
 				rcu_read_unlock();
 				schedule_timeout_interruptible(timeo);
@@ -1799,10 +1829,165 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
 	mutex_unlock(device->state_mutex);
 }
 
+static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
+{
+	struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
+	device->rs_last_bcast = jiffies;
+
+	if (!get_ldev(device))
+		return;
+
+	drbd_bm_write_lazy(device, 0);
+	if (resync_done && is_sync_state(device->state.conn))
+		drbd_resync_finished(device);
+
+	drbd_bcast_event(device, &sib);
+	/* update timestamp, in case it took a while to write out stuff */
+	device->rs_last_bcast = jiffies;
+	put_ldev(device);
+}
+
+static void drbd_ldev_destroy(struct drbd_device *device)
+{
+	lc_destroy(device->resync);
+	device->resync = NULL;
+	lc_destroy(device->act_log);
+	device->act_log = NULL;
+	__no_warn(local,
+		drbd_free_ldev(device->ldev);
+		device->ldev = NULL;);
+	clear_bit(GOING_DISKLESS, &device->flags);
+	wake_up(&device->misc_wait);
+}
+
+static void go_diskless(struct drbd_device *device)
+{
+	D_ASSERT(device, device->state.disk == D_FAILED);
+	/* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
+	 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
+	 * the protected members anymore, though, so once put_ldev reaches zero
+	 * again, it will be safe to free them. */
+
+	/* Try to write changed bitmap pages, read errors may have just
+	 * set some bits outside the area covered by the activity log.
+	 *
+	 * If we have an IO error during the bitmap writeout,
+	 * we will want a full sync next time, just in case.
+	 * (Do we want a specific meta data flag for this?)
+	 *
+	 * If that does not make it to stable storage either,
+	 * we cannot do anything about that anymore.
+	 *
+	 * We still need to check if both bitmap and ldev are present, we may
+	 * end up here after a failed attach, before ldev was even assigned.
+	 */
+	if (device->bitmap && device->ldev) {
+		/* An interrupted resync or similar is allowed to recounts bits
+		 * while we detach.
+		 * Any modifications would not be expected anymore, though.
+		 */
+		if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
+					"detach", BM_LOCKED_TEST_ALLOWED)) {
+			if (test_bit(WAS_READ_ERROR, &device->flags)) {
+				drbd_md_set_flag(device, MDF_FULL_SYNC);
+				drbd_md_sync(device);
+			}
+		}
+	}
+
+	drbd_force_state(device, NS(disk, D_DISKLESS));
+}
+
+static int do_md_sync(struct drbd_device *device)
+{
+	drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
+	drbd_md_sync(device);
+	return 0;
+}
+
+/* only called from drbd_worker thread, no locking */
+void __update_timing_details(
+		struct drbd_thread_timing_details *tdp,
+		unsigned int *cb_nr,
+		void *cb,
+		const char *fn, const unsigned int line)
+{
+	unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
+	struct drbd_thread_timing_details *td = tdp + i;
+
+	td->start_jif = jiffies;
+	td->cb_addr = cb;
+	td->caller_fn = fn;
+	td->line = line;
+	td->cb_nr = *cb_nr;
+
+	i = (i+1) % DRBD_THREAD_DETAILS_HIST;
+	td = tdp + i;
+	memset(td, 0, sizeof(*td));
+
+	++(*cb_nr);
+}
+
+#define WORK_PENDING(work_bit, todo)	(todo & (1UL << work_bit))
+static void do_device_work(struct drbd_device *device, const unsigned long todo)
+{
+	if (WORK_PENDING(MD_SYNC, todo))
+		do_md_sync(device);
+	if (WORK_PENDING(RS_DONE, todo) ||
+	    WORK_PENDING(RS_PROGRESS, todo))
+		update_on_disk_bitmap(device, WORK_PENDING(RS_DONE, todo));
+	if (WORK_PENDING(GO_DISKLESS, todo))
+		go_diskless(device);
+	if (WORK_PENDING(DESTROY_DISK, todo))
+		drbd_ldev_destroy(device);
+	if (WORK_PENDING(RS_START, todo))
+		do_start_resync(device);
+}
+
+#define DRBD_DEVICE_WORK_MASK	\
+	((1UL << GO_DISKLESS)	\
+	|(1UL << DESTROY_DISK)	\
+	|(1UL << MD_SYNC)	\
+	|(1UL << RS_START)	\
+	|(1UL << RS_PROGRESS)	\
+	|(1UL << RS_DONE)	\
+	)
+
+static unsigned long get_work_bits(unsigned long *flags)
+{
+	unsigned long old, new;
+	do {
+		old = *flags;
+		new = old & ~DRBD_DEVICE_WORK_MASK;
+	} while (cmpxchg(flags, old, new) != old);
+	return old & DRBD_DEVICE_WORK_MASK;
+}
+
+static void do_unqueued_work(struct drbd_connection *connection)
+{
+	struct drbd_peer_device *peer_device;
+	int vnr;
+
+	rcu_read_lock();
+	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
+		struct drbd_device *device = peer_device->device;
+		unsigned long todo = get_work_bits(&device->flags);
+		if (!todo)
+			continue;
+
+		kref_get(&device->kref);
+		rcu_read_unlock();
+		do_device_work(device, todo);
+		kref_put(&device->kref, drbd_destroy_device);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+}
+
 static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
 {
 	spin_lock_irq(&queue->q_lock);
-	list_splice_init(&queue->q, work_list);
+	list_splice_tail_init(&queue->q, work_list);
 	spin_unlock_irq(&queue->q_lock);
 	return !list_empty(work_list);
 }
@@ -1851,7 +2036,7 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
 		/* dequeue single item only,
 		 * we still use drbd_queue_work_front() in some places */
 		if (!list_empty(&connection->sender_work.q))
-			list_move(connection->sender_work.q.next, work_list);
+			list_splice_tail_init(&connection->sender_work.q, work_list);
 		spin_unlock(&connection->sender_work.q_lock);	/* FIXME get rid of this one? */
 		if (!list_empty(work_list) || signal_pending(current)) {
 			spin_unlock_irq(&connection->resource->req_lock);
@@ -1873,6 +2058,14 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
 		if (send_barrier)
 			maybe_send_barrier(connection,
 					connection->send.current_epoch_nr + 1);
+
+		if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
+			break;
+
+		/* drbd_send() may have called flush_signals() */
+		if (get_t_state(&connection->worker) != RUNNING)
+			break;
+
 		schedule();
 		/* may be woken up for other things but new work, too,
 		 * e.g. if the current epoch got closed.
@@ -1906,10 +2099,15 @@ int drbd_worker(struct drbd_thread *thi)
 	while (get_t_state(thi) == RUNNING) {
 		drbd_thread_current_set_cpu(thi);
 
-		/* as long as we use drbd_queue_work_front(),
-		 * we may only dequeue single work items here, not batches. */
-		if (list_empty(&work_list))
+		if (list_empty(&work_list)) {
+			update_worker_timing_details(connection, wait_for_work);
 			wait_for_work(connection, &work_list);
+		}
+
+		if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
+			update_worker_timing_details(connection, do_unqueued_work);
+			do_unqueued_work(connection);
+		}
 
 		if (signal_pending(current)) {
 			flush_signals(current);
@@ -1926,6 +2124,7 @@ int drbd_worker(struct drbd_thread *thi)
 		while (!list_empty(&work_list)) {
 			w = list_first_entry(&work_list, struct drbd_work, list);
 			list_del_init(&w->list);
+			update_worker_timing_details(connection, w->cb);
 			if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
 				continue;
 			if (connection->cstate >= C_WF_REPORT_PARAMS)
@@ -1934,13 +2133,18 @@ int drbd_worker(struct drbd_thread *thi)
 	}
 
 	do {
+		if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
+			update_worker_timing_details(connection, do_unqueued_work);
+			do_unqueued_work(connection);
+		}
 		while (!list_empty(&work_list)) {
 			w = list_first_entry(&work_list, struct drbd_work, list);
 			list_del_init(&w->list);
+			update_worker_timing_details(connection, w->cb);
 			w->cb(w, 1);
 		}
 		dequeue_work_batch(&connection->sender_work, &work_list);
-	} while (!list_empty(&work_list));
+	} while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
 
 	rcu_read_lock();
 	idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index b2c98c1..623c841 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -42,6 +42,7 @@
 #include <linux/blkdev.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
+#include <linux/workqueue.h>
 
 #include "rbd_types.h"
 
@@ -332,7 +333,10 @@ struct rbd_device {
 
 	char			name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */
 
+	struct list_head	rq_queue;	/* incoming rq queue */
 	spinlock_t		lock;		/* queue, flags, open_count */
+	struct workqueue_struct	*rq_wq;
+	struct work_struct	rq_work;
 
 	struct rbd_image_header	header;
 	unsigned long		flags;		/* possibly lock protected */
@@ -514,7 +518,8 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
 
 static int rbd_dev_refresh(struct rbd_device *rbd_dev);
 static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
-static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev);
+static int rbd_dev_header_info(struct rbd_device *rbd_dev);
+static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev);
 static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
 					u64 snap_id);
 static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
@@ -971,12 +976,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev,
 	header->snap_names = snap_names;
 	header->snap_sizes = snap_sizes;
 
-	/* Make sure mapping size is consistent with header info */
-
-	if (rbd_dev->spec->snap_id == CEPH_NOSNAP || first_time)
-		if (rbd_dev->mapping.size != header->image_size)
-			rbd_dev->mapping.size = header->image_size;
-
 	return 0;
 out_2big:
 	ret = -EIO;
@@ -1139,6 +1138,13 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
 	rbd_dev->mapping.features = 0;
 }
 
+static void rbd_segment_name_free(const char *name)
+{
+	/* The explicit cast here is needed to drop the const qualifier */
+
+	kmem_cache_free(rbd_segment_name_cache, (void *)name);
+}
+
 static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
 {
 	char *name;
@@ -1158,20 +1164,13 @@ static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
 	if (ret < 0 || ret > CEPH_MAX_OID_NAME_LEN) {
 		pr_err("error formatting segment name for #%llu (%d)\n",
 			segment, ret);
-		kfree(name);
+		rbd_segment_name_free(name);
 		name = NULL;
 	}
 
 	return name;
 }
 
-static void rbd_segment_name_free(const char *name)
-{
-	/* The explicit cast here is needed to drop the const qualifier */
-
-	kmem_cache_free(rbd_segment_name_cache, (void *)name);
-}
-
 static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
 {
 	u64 segment_size = (u64) 1 << rbd_dev->header.obj_order;
@@ -1371,7 +1370,7 @@ static void obj_request_img_data_set(struct rbd_obj_request *obj_request)
 		struct rbd_device *rbd_dev;
 
 		rbd_dev = obj_request->img_request->rbd_dev;
-		rbd_warn(rbd_dev, "obj_request %p already marked img_data\n",
+		rbd_warn(rbd_dev, "obj_request %p already marked img_data",
 			obj_request);
 	}
 }
@@ -1389,7 +1388,7 @@ static void obj_request_done_set(struct rbd_obj_request *obj_request)
 
 		if (obj_request_img_data_test(obj_request))
 			rbd_dev = obj_request->img_request->rbd_dev;
-		rbd_warn(rbd_dev, "obj_request %p already marked done\n",
+		rbd_warn(rbd_dev, "obj_request %p already marked done",
 			obj_request);
 	}
 }
@@ -1527,11 +1526,37 @@ static bool obj_request_type_valid(enum obj_request_type type)
 static int rbd_obj_request_submit(struct ceph_osd_client *osdc,
 				struct rbd_obj_request *obj_request)
 {
-	dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request);
-
+	dout("%s %p\n", __func__, obj_request);
 	return ceph_osdc_start_request(osdc, obj_request->osd_req, false);
 }
 
+static void rbd_obj_request_end(struct rbd_obj_request *obj_request)
+{
+	dout("%s %p\n", __func__, obj_request);
+	ceph_osdc_cancel_request(obj_request->osd_req);
+}
+
+/*
+ * Wait for an object request to complete.  If interrupted, cancel the
+ * underlying osd request.
+ */
+static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
+{
+	int ret;
+
+	dout("%s %p\n", __func__, obj_request);
+
+	ret = wait_for_completion_interruptible(&obj_request->completion);
+	if (ret < 0) {
+		dout("%s %p interrupted\n", __func__, obj_request);
+		rbd_obj_request_end(obj_request);
+		return ret;
+	}
+
+	dout("%s %p done\n", __func__, obj_request);
+	return 0;
+}
+
 static void rbd_img_request_complete(struct rbd_img_request *img_request)
 {
 
@@ -1558,15 +1583,6 @@ static void rbd_img_request_complete(struct rbd_img_request *img_request)
 		rbd_img_request_put(img_request);
 }
 
-/* Caller is responsible for rbd_obj_request_destroy(obj_request) */
-
-static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
-{
-	dout("%s: obj %p\n", __func__, obj_request);
-
-	return wait_for_completion_interruptible(&obj_request->completion);
-}
-
 /*
  * The default/initial value for all image request flags is 0.  Each
  * is conditionally set to 1 at image request initialization time
@@ -1763,7 +1779,7 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
 		rbd_osd_trivial_callback(obj_request);
 		break;
 	default:
-		rbd_warn(NULL, "%s: unsupported op %hu\n",
+		rbd_warn(NULL, "%s: unsupported op %hu",
 			obj_request->object_name, (unsigned short) opcode);
 		break;
 	}
@@ -1998,7 +2014,7 @@ static void rbd_dev_parent_put(struct rbd_device *rbd_dev)
 	if (!counter)
 		rbd_dev_unparent(rbd_dev);
 	else
-		rbd_warn(rbd_dev, "parent reference underflow\n");
+		rbd_warn(rbd_dev, "parent reference underflow");
 }
 
 /*
@@ -2028,7 +2044,7 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
 	/* Image was flattened, but parent is not yet torn down */
 
 	if (counter < 0)
-		rbd_warn(rbd_dev, "parent reference overflow\n");
+		rbd_warn(rbd_dev, "parent reference overflow");
 
 	return false;
 }
@@ -2045,7 +2061,7 @@ static struct rbd_img_request *rbd_img_request_create(
 {
 	struct rbd_img_request *img_request;
 
-	img_request = kmem_cache_alloc(rbd_img_request_cache, GFP_ATOMIC);
+	img_request = kmem_cache_alloc(rbd_img_request_cache, GFP_NOIO);
 	if (!img_request)
 		return NULL;
 
@@ -2161,11 +2177,11 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
 	if (result) {
 		struct rbd_device *rbd_dev = img_request->rbd_dev;
 
-		rbd_warn(rbd_dev, "%s %llx at %llx (%llx)\n",
+		rbd_warn(rbd_dev, "%s %llx at %llx (%llx)",
 			img_request_write_test(img_request) ? "write" : "read",
 			obj_request->length, obj_request->img_offset,
 			obj_request->offset);
-		rbd_warn(rbd_dev, "  result %d xferred %x\n",
+		rbd_warn(rbd_dev, "  result %d xferred %x",
 			result, xferred);
 		if (!img_request->result)
 			img_request->result = result;
@@ -2946,154 +2962,135 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
 	dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__,
 		rbd_dev->header_name, (unsigned long long)notify_id,
 		(unsigned int)opcode);
+
+	/*
+	 * Until adequate refresh error handling is in place, there is
+	 * not much we can do here, except warn.
+	 *
+	 * See http://tracker.ceph.com/issues/5040
+	 */
 	ret = rbd_dev_refresh(rbd_dev);
 	if (ret)
-		rbd_warn(rbd_dev, "header refresh error (%d)\n", ret);
+		rbd_warn(rbd_dev, "refresh failed: %d", ret);
 
-	rbd_obj_notify_ack_sync(rbd_dev, notify_id);
+	ret = rbd_obj_notify_ack_sync(rbd_dev, notify_id);
+	if (ret)
+		rbd_warn(rbd_dev, "notify_ack ret %d", ret);
 }
 
 /*
- * Initiate a watch request, synchronously.
+ * Send a (un)watch request and wait for the ack.  Return a request
+ * with a ref held on success or error.
  */
-static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
+static struct rbd_obj_request *rbd_obj_watch_request_helper(
+						struct rbd_device *rbd_dev,
+						bool watch)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_obj_request *obj_request;
 	int ret;
 
-	rbd_assert(!rbd_dev->watch_event);
-	rbd_assert(!rbd_dev->watch_request);
-
-	ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
-				     &rbd_dev->watch_event);
-	if (ret < 0)
-		return ret;
-
-	rbd_assert(rbd_dev->watch_event);
-
 	obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
 					     OBJ_REQUEST_NODATA);
-	if (!obj_request) {
-		ret = -ENOMEM;
-		goto out_cancel;
-	}
+	if (!obj_request)
+		return ERR_PTR(-ENOMEM);
 
 	obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1,
 						  obj_request);
 	if (!obj_request->osd_req) {
 		ret = -ENOMEM;
-		goto out_put;
+		goto out;
 	}
 
-	ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
-
 	osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
-			      rbd_dev->watch_event->cookie, 0, 1);
+			      rbd_dev->watch_event->cookie, 0, watch);
 	rbd_osd_req_format_write(obj_request);
 
+	if (watch)
+		ceph_osdc_set_request_linger(osdc, obj_request->osd_req);
+
 	ret = rbd_obj_request_submit(osdc, obj_request);
 	if (ret)
-		goto out_linger;
+		goto out;
 
 	ret = rbd_obj_request_wait(obj_request);
 	if (ret)
-		goto out_linger;
+		goto out;
 
 	ret = obj_request->result;
-	if (ret)
-		goto out_linger;
-
-	/*
-	 * A watch request is set to linger, so the underlying osd
-	 * request won't go away until we unregister it.  We retain
-	 * a pointer to the object request during that time (in
-	 * rbd_dev->watch_request), so we'll keep a reference to
-	 * it.  We'll drop that reference (below) after we've
-	 * unregistered it.
-	 */
-	rbd_dev->watch_request = obj_request;
+	if (ret) {
+		if (watch)
+			rbd_obj_request_end(obj_request);
+		goto out;
+	}
 
-	return 0;
+	return obj_request;
 
-out_linger:
-	ceph_osdc_unregister_linger_request(osdc, obj_request->osd_req);
-out_put:
+out:
 	rbd_obj_request_put(obj_request);
-out_cancel:
-	ceph_osdc_cancel_event(rbd_dev->watch_event);
-	rbd_dev->watch_event = NULL;
-
-	return ret;
+	return ERR_PTR(ret);
 }
 
 /*
- * Tear down a watch request, synchronously.
+ * Initiate a watch request, synchronously.
  */
-static int __rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
+static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_obj_request *obj_request;
 	int ret;
 
-	rbd_assert(rbd_dev->watch_event);
-	rbd_assert(rbd_dev->watch_request);
+	rbd_assert(!rbd_dev->watch_event);
+	rbd_assert(!rbd_dev->watch_request);
 
-	obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0,
-					     OBJ_REQUEST_NODATA);
-	if (!obj_request) {
-		ret = -ENOMEM;
-		goto out_cancel;
-	}
+	ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev,
+				     &rbd_dev->watch_event);
+	if (ret < 0)
+		return ret;
 
-	obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1,
-						  obj_request);
-	if (!obj_request->osd_req) {
-		ret = -ENOMEM;
-		goto out_put;
+	obj_request = rbd_obj_watch_request_helper(rbd_dev, true);
+	if (IS_ERR(obj_request)) {
+		ceph_osdc_cancel_event(rbd_dev->watch_event);
+		rbd_dev->watch_event = NULL;
+		return PTR_ERR(obj_request);
 	}
 
-	osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
-			      rbd_dev->watch_event->cookie, 0, 0);
-	rbd_osd_req_format_write(obj_request);
-
-	ret = rbd_obj_request_submit(osdc, obj_request);
-	if (ret)
-		goto out_put;
+	/*
+	 * A watch request is set to linger, so the underlying osd
+	 * request won't go away until we unregister it.  We retain
+	 * a pointer to the object request during that time (in
+	 * rbd_dev->watch_request), so we'll keep a reference to it.
+	 * We'll drop that reference after we've unregistered it in
+	 * rbd_dev_header_unwatch_sync().
+	 */
+	rbd_dev->watch_request = obj_request;
 
-	ret = rbd_obj_request_wait(obj_request);
-	if (ret)
-		goto out_put;
+	return 0;
+}
 
-	ret = obj_request->result;
-	if (ret)
-		goto out_put;
+/*
+ * Tear down a watch request, synchronously.
+ */
+static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
+{
+	struct rbd_obj_request *obj_request;
 
-	/* We have successfully torn down the watch request */
+	rbd_assert(rbd_dev->watch_event);
+	rbd_assert(rbd_dev->watch_request);
 
-	ceph_osdc_unregister_linger_request(osdc,
-					    rbd_dev->watch_request->osd_req);
+	rbd_obj_request_end(rbd_dev->watch_request);
 	rbd_obj_request_put(rbd_dev->watch_request);
 	rbd_dev->watch_request = NULL;
 
-out_put:
-	rbd_obj_request_put(obj_request);
-out_cancel:
+	obj_request = rbd_obj_watch_request_helper(rbd_dev, false);
+	if (!IS_ERR(obj_request))
+		rbd_obj_request_put(obj_request);
+	else
+		rbd_warn(rbd_dev, "unable to tear down watch request (%ld)",
+			 PTR_ERR(obj_request));
+
 	ceph_osdc_cancel_event(rbd_dev->watch_event);
 	rbd_dev->watch_event = NULL;
-
-	return ret;
-}
-
-static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev)
-{
-	int ret;
-
-	ret = __rbd_dev_header_unwatch_sync(rbd_dev);
-	if (ret) {
-		rbd_warn(rbd_dev, "unable to tear down watch request: %d\n",
-			 ret);
-	}
 }
 
 /*
@@ -3183,102 +3180,129 @@ out:
 	return ret;
 }
 
-static void rbd_request_fn(struct request_queue *q)
-		__releases(q->queue_lock) __acquires(q->queue_lock)
+static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq)
 {
-	struct rbd_device *rbd_dev = q->queuedata;
-	struct request *rq;
+	struct rbd_img_request *img_request;
+	u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT;
+	u64 length = blk_rq_bytes(rq);
+	bool wr = rq_data_dir(rq) == WRITE;
 	int result;
 
-	while ((rq = blk_fetch_request(q))) {
-		bool write_request = rq_data_dir(rq) == WRITE;
-		struct rbd_img_request *img_request;
-		u64 offset;
-		u64 length;
+	/* Ignore/skip any zero-length requests */
 
-		/* Ignore any non-FS requests that filter through. */
+	if (!length) {
+		dout("%s: zero-length request\n", __func__);
+		result = 0;
+		goto err_rq;
+	}
 
-		if (rq->cmd_type != REQ_TYPE_FS) {
-			dout("%s: non-fs request type %d\n", __func__,
-				(int) rq->cmd_type);
-			__blk_end_request_all(rq, 0);
-			continue;
+	/* Disallow writes to a read-only device */
+
+	if (wr) {
+		if (rbd_dev->mapping.read_only) {
+			result = -EROFS;
+			goto err_rq;
 		}
+		rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
+	}
 
-		/* Ignore/skip any zero-length requests */
+	/*
+	 * Quit early if the mapped snapshot no longer exists.  It's
+	 * still possible the snapshot will have disappeared by the
+	 * time our request arrives at the osd, but there's no sense in
+	 * sending it if we already know.
+	 */
+	if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) {
+		dout("request for non-existent snapshot");
+		rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
+		result = -ENXIO;
+		goto err_rq;
+	}
 
-		offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT;
-		length = (u64) blk_rq_bytes(rq);
+	if (offset && length > U64_MAX - offset + 1) {
+		rbd_warn(rbd_dev, "bad request range (%llu~%llu)", offset,
+			 length);
+		result = -EINVAL;
+		goto err_rq;	/* Shouldn't happen */
+	}
 
-		if (!length) {
-			dout("%s: zero-length request\n", __func__);
-			__blk_end_request_all(rq, 0);
-			continue;
-		}
+	if (offset + length > rbd_dev->mapping.size) {
+		rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)", offset,
+			 length, rbd_dev->mapping.size);
+		result = -EIO;
+		goto err_rq;
+	}
 
-		spin_unlock_irq(q->queue_lock);
+	img_request = rbd_img_request_create(rbd_dev, offset, length, wr);
+	if (!img_request) {
+		result = -ENOMEM;
+		goto err_rq;
+	}
+	img_request->rq = rq;
 
-		/* Disallow writes to a read-only device */
+	result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, rq->bio);
+	if (result)
+		goto err_img_request;
 
-		if (write_request) {
-			result = -EROFS;
-			if (rbd_dev->mapping.read_only)
-				goto end_request;
-			rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP);
-		}
+	result = rbd_img_request_submit(img_request);
+	if (result)
+		goto err_img_request;
 
-		/*
-		 * Quit early if the mapped snapshot no longer
-		 * exists.  It's still possible the snapshot will
-		 * have disappeared by the time our request arrives
-		 * at the osd, but there's no sense in sending it if
-		 * we already know.
-		 */
-		if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) {
-			dout("request for non-existent snapshot");
-			rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP);
-			result = -ENXIO;
-			goto end_request;
-		}
+	return;
 
-		result = -EINVAL;
-		if (offset && length > U64_MAX - offset + 1) {
-			rbd_warn(rbd_dev, "bad request range (%llu~%llu)\n",
-				offset, length);
-			goto end_request;	/* Shouldn't happen */
-		}
+err_img_request:
+	rbd_img_request_put(img_request);
+err_rq:
+	if (result)
+		rbd_warn(rbd_dev, "%s %llx at %llx result %d",
+			 wr ? "write" : "read", length, offset, result);
+	blk_end_request_all(rq, result);
+}
 
-		result = -EIO;
-		if (offset + length > rbd_dev->mapping.size) {
-			rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)\n",
-				offset, length, rbd_dev->mapping.size);
-			goto end_request;
-		}
+static void rbd_request_workfn(struct work_struct *work)
+{
+	struct rbd_device *rbd_dev =
+	    container_of(work, struct rbd_device, rq_work);
+	struct request *rq, *next;
+	LIST_HEAD(requests);
 
-		result = -ENOMEM;
-		img_request = rbd_img_request_create(rbd_dev, offset, length,
-							write_request);
-		if (!img_request)
-			goto end_request;
+	spin_lock_irq(&rbd_dev->lock); /* rq->q->queue_lock */
+	list_splice_init(&rbd_dev->rq_queue, &requests);
+	spin_unlock_irq(&rbd_dev->lock);
 
-		img_request->rq = rq;
+	list_for_each_entry_safe(rq, next, &requests, queuelist) {
+		list_del_init(&rq->queuelist);
+		rbd_handle_request(rbd_dev, rq);
+	}
+}
 
-		result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
-						rq->bio);
-		if (!result)
-			result = rbd_img_request_submit(img_request);
-		if (result)
-			rbd_img_request_put(img_request);
-end_request:
-		spin_lock_irq(q->queue_lock);
-		if (result < 0) {
-			rbd_warn(rbd_dev, "%s %llx at %llx result %d\n",
-				write_request ? "write" : "read",
-				length, offset, result);
-
-			__blk_end_request_all(rq, result);
+/*
+ * Called with q->queue_lock held and interrupts disabled, possibly on
+ * the way to schedule().  Do not sleep here!
+ */
+static void rbd_request_fn(struct request_queue *q)
+{
+	struct rbd_device *rbd_dev = q->queuedata;
+	struct request *rq;
+	int queued = 0;
+
+	rbd_assert(rbd_dev);
+
+	while ((rq = blk_fetch_request(q))) {
+		/* Ignore any non-FS requests that filter through. */
+		if (rq->cmd_type != REQ_TYPE_FS) {
+			dout("%s: non-fs request type %d\n", __func__,
+				(int) rq->cmd_type);
+			__blk_end_request_all(rq, 0);
+			continue;
 		}
+
+		list_add_tail(&rq->queuelist, &rbd_dev->rq_queue);
+		queued++;
 	}
+
+	if (queued)
+		queue_work(rbd_dev->rq_wq, &rbd_dev->rq_work);
 }
 
 /*
@@ -3517,24 +3541,37 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev)
 	u64 mapping_size;
 	int ret;
 
-	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
 	down_write(&rbd_dev->header_rwsem);
 	mapping_size = rbd_dev->mapping.size;
-	if (rbd_dev->image_format == 1)
-		ret = rbd_dev_v1_header_info(rbd_dev);
-	else
-		ret = rbd_dev_v2_header_info(rbd_dev);
 
-	/* If it's a mapped snapshot, validate its EXISTS flag */
+	ret = rbd_dev_header_info(rbd_dev);
+	if (ret)
+		return ret;
+
+	/*
+	 * If there is a parent, see if it has disappeared due to the
+	 * mapped image getting flattened.
+	 */
+	if (rbd_dev->parent) {
+		ret = rbd_dev_v2_parent_info(rbd_dev);
+		if (ret)
+			return ret;
+	}
+
+	if (rbd_dev->spec->snap_id == CEPH_NOSNAP) {
+		if (rbd_dev->mapping.size != rbd_dev->header.image_size)
+			rbd_dev->mapping.size = rbd_dev->header.image_size;
+	} else {
+		/* validate mapped snapshot's EXISTS flag */
+		rbd_exists_validate(rbd_dev);
+	}
 
-	rbd_exists_validate(rbd_dev);
 	up_write(&rbd_dev->header_rwsem);
 
-	if (mapping_size != rbd_dev->mapping.size) {
+	if (mapping_size != rbd_dev->mapping.size)
 		rbd_dev_update_size(rbd_dev);
-	}
 
-	return ret;
+	return 0;
 }
 
 static int rbd_init_disk(struct rbd_device *rbd_dev)
@@ -3696,46 +3733,36 @@ static ssize_t rbd_snap_show(struct device *dev,
 }
 
 /*
- * For an rbd v2 image, shows the pool id, image id, and snapshot id
- * for the parent image.  If there is no parent, simply shows
- * "(no parent image)".
+ * For a v2 image, shows the chain of parent images, separated by empty
+ * lines.  For v1 images or if there is no parent, shows "(no parent
+ * image)".
  */
 static ssize_t rbd_parent_show(struct device *dev,
-			     struct device_attribute *attr,
-			     char *buf)
+			       struct device_attribute *attr,
+			       char *buf)
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
-	struct rbd_spec *spec = rbd_dev->parent_spec;
-	int count;
-	char *bufp = buf;
+	ssize_t count = 0;
 
-	if (!spec)
+	if (!rbd_dev->parent)
 		return sprintf(buf, "(no parent image)\n");
 
-	count = sprintf(bufp, "pool_id %llu\npool_name %s\n",
-			(unsigned long long) spec->pool_id, spec->pool_name);
-	if (count < 0)
-		return count;
-	bufp += count;
-
-	count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id,
-			spec->image_name ? spec->image_name : "(unknown)");
-	if (count < 0)
-		return count;
-	bufp += count;
-
-	count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n",
-			(unsigned long long) spec->snap_id, spec->snap_name);
-	if (count < 0)
-		return count;
-	bufp += count;
-
-	count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap);
-	if (count < 0)
-		return count;
-	bufp += count;
+	for ( ; rbd_dev->parent; rbd_dev = rbd_dev->parent) {
+		struct rbd_spec *spec = rbd_dev->parent_spec;
+
+		count += sprintf(&buf[count], "%s"
+			    "pool_id %llu\npool_name %s\n"
+			    "image_id %s\nimage_name %s\n"
+			    "snap_id %llu\nsnap_name %s\n"
+			    "overlap %llu\n",
+			    !count ? "" : "\n", /* first? */
+			    spec->pool_id, spec->pool_name,
+			    spec->image_id, spec->image_name ?: "(unknown)",
+			    spec->snap_id, spec->snap_name,
+			    rbd_dev->parent_overlap);
+	}
 
-	return (ssize_t) (bufp - buf);
+	return count;
 }
 
 static ssize_t rbd_image_refresh(struct device *dev,
@@ -3748,9 +3775,9 @@ static ssize_t rbd_image_refresh(struct device *dev,
 
 	ret = rbd_dev_refresh(rbd_dev);
 	if (ret)
-		rbd_warn(rbd_dev, ": manual header refresh error (%d)\n", ret);
+		return ret;
 
-	return ret < 0 ? ret : size;
+	return size;
 }
 
 static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
@@ -3822,6 +3849,9 @@ static struct rbd_spec *rbd_spec_alloc(void)
 	spec = kzalloc(sizeof (*spec), GFP_KERNEL);
 	if (!spec)
 		return NULL;
+
+	spec->pool_id = CEPH_NOPOOL;
+	spec->snap_id = CEPH_NOSNAP;
 	kref_init(&spec->kref);
 
 	return spec;
@@ -3848,6 +3878,8 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
 		return NULL;
 
 	spin_lock_init(&rbd_dev->lock);
+	INIT_LIST_HEAD(&rbd_dev->rq_queue);
+	INIT_WORK(&rbd_dev->rq_work, rbd_request_workfn);
 	rbd_dev->flags = 0;
 	atomic_set(&rbd_dev->parent_ref, 0);
 	INIT_LIST_HEAD(&rbd_dev->node);
@@ -4021,7 +4053,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 		goto out_err;
 	}
 
-	snapid = cpu_to_le64(CEPH_NOSNAP);
+	snapid = cpu_to_le64(rbd_dev->spec->snap_id);
 	ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name,
 				"rbd", "get_parent",
 				&snapid, sizeof (snapid),
@@ -4059,7 +4091,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 
 	ret = -EIO;
 	if (pool_id > (u64)U32_MAX) {
-		rbd_warn(NULL, "parent pool id too large (%llu > %u)\n",
+		rbd_warn(NULL, "parent pool id too large (%llu > %u)",
 			(unsigned long long)pool_id, U32_MAX);
 		goto out_err;
 	}
@@ -4083,6 +4115,8 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 		parent_spec->snap_id = snap_id;
 		rbd_dev->parent_spec = parent_spec;
 		parent_spec = NULL;	/* rbd_dev now owns this */
+	} else {
+		kfree(image_id);
 	}
 
 	/*
@@ -4110,8 +4144,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 			 * overlap is zero we just pretend there was
 			 * no parent image.
 			 */
-			rbd_warn(rbd_dev, "ignoring parent of "
-						"clone with overlap 0\n");
+			rbd_warn(rbd_dev, "ignoring parent with overlap 0");
 		}
 	}
 out:
@@ -4279,18 +4312,38 @@ static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name)
 }
 
 /*
- * When an rbd image has a parent image, it is identified by the
- * pool, image, and snapshot ids (not names).  This function fills
- * in the names for those ids.  (It's OK if we can't figure out the
- * name for an image id, but the pool and snapshot ids should always
- * exist and have names.)  All names in an rbd spec are dynamically
- * allocated.
+ * An image being mapped will have everything but the snap id.
+ */
+static int rbd_spec_fill_snap_id(struct rbd_device *rbd_dev)
+{
+	struct rbd_spec *spec = rbd_dev->spec;
+
+	rbd_assert(spec->pool_id != CEPH_NOPOOL && spec->pool_name);
+	rbd_assert(spec->image_id && spec->image_name);
+	rbd_assert(spec->snap_name);
+
+	if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) {
+		u64 snap_id;
+
+		snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name);
+		if (snap_id == CEPH_NOSNAP)
+			return -ENOENT;
+
+		spec->snap_id = snap_id;
+	} else {
+		spec->snap_id = CEPH_NOSNAP;
+	}
+
+	return 0;
+}
+
+/*
+ * A parent image will have all ids but none of the names.
  *
- * When an image being mapped (not a parent) is probed, we have the
- * pool name and pool id, image name and image id, and the snapshot
- * name.  The only thing we're missing is the snapshot id.
+ * All names in an rbd spec are dynamically allocated.  It's OK if we
+ * can't figure out the name for an image id.
  */
-static int rbd_dev_spec_update(struct rbd_device *rbd_dev)
+static int rbd_spec_fill_names(struct rbd_device *rbd_dev)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_spec *spec = rbd_dev->spec;
@@ -4299,24 +4352,9 @@ static int rbd_dev_spec_update(struct rbd_device *rbd_dev)
 	const char *snap_name;
 	int ret;
 
-	/*
-	 * An image being mapped will have the pool name (etc.), but
-	 * we need to look up the snapshot id.
-	 */
-	if (spec->pool_name) {
-		if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) {
-			u64 snap_id;
-
-			snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name);
-			if (snap_id == CEPH_NOSNAP)
-				return -ENOENT;
-			spec->snap_id = snap_id;
-		} else {
-			spec->snap_id = CEPH_NOSNAP;
-		}
-
-		return 0;
-	}
+	rbd_assert(spec->pool_id != CEPH_NOPOOL);
+	rbd_assert(spec->image_id);
+	rbd_assert(spec->snap_id != CEPH_NOSNAP);
 
 	/* Get the pool name; we have to make our own copy of this */
 
@@ -4335,7 +4373,7 @@ static int rbd_dev_spec_update(struct rbd_device *rbd_dev)
 	if (!image_name)
 		rbd_warn(rbd_dev, "unable to get image name");
 
-	/* Look up the snapshot name, and make a copy */
+	/* Fetch the snapshot name */
 
 	snap_name = rbd_snap_name(rbd_dev, spec->snap_id);
 	if (IS_ERR(snap_name)) {
@@ -4348,10 +4386,10 @@ static int rbd_dev_spec_update(struct rbd_device *rbd_dev)
 	spec->snap_name = snap_name;
 
 	return 0;
+
 out_err:
 	kfree(image_name);
 	kfree(pool_name);
-
 	return ret;
 }
 
@@ -4483,43 +4521,22 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
 			return ret;
 	}
 
-	/*
-	 * If the image supports layering, get the parent info.  We
-	 * need to probe the first time regardless.  Thereafter we
-	 * only need to if there's a parent, to see if it has
-	 * disappeared due to the mapped image getting flattened.
-	 */
-	if (rbd_dev->header.features & RBD_FEATURE_LAYERING &&
-			(first_time || rbd_dev->parent_spec)) {
-		bool warn;
-
-		ret = rbd_dev_v2_parent_info(rbd_dev);
-		if (ret)
-			return ret;
-
-		/*
-		 * Print a warning if this is the initial probe and
-		 * the image has a parent.  Don't print it if the
-		 * image now being probed is itself a parent.  We
-		 * can tell at this point because we won't know its
-		 * pool name yet (just its pool id).
-		 */
-		warn = rbd_dev->parent_spec && rbd_dev->spec->pool_name;
-		if (first_time && warn)
-			rbd_warn(rbd_dev, "WARNING: kernel layering "
-					"is EXPERIMENTAL!");
-	}
-
-	if (rbd_dev->spec->snap_id == CEPH_NOSNAP)
-		if (rbd_dev->mapping.size != rbd_dev->header.image_size)
-			rbd_dev->mapping.size = rbd_dev->header.image_size;
-
 	ret = rbd_dev_v2_snap_context(rbd_dev);
 	dout("rbd_dev_v2_snap_context returned %d\n", ret);
 
 	return ret;
 }
 
+static int rbd_dev_header_info(struct rbd_device *rbd_dev)
+{
+	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+
+	if (rbd_dev->image_format == 1)
+		return rbd_dev_v1_header_info(rbd_dev);
+
+	return rbd_dev_v2_header_info(rbd_dev);
+}
+
 static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
 {
 	struct device *dev;
@@ -5066,12 +5083,17 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
 	ret = rbd_dev_mapping_set(rbd_dev);
 	if (ret)
 		goto err_out_disk;
+
 	set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
 	set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only);
 
+	rbd_dev->rq_wq = alloc_workqueue(rbd_dev->disk->disk_name, 0, 0);
+	if (!rbd_dev->rq_wq)
+		goto err_out_mapping;
+
 	ret = rbd_bus_add_dev(rbd_dev);
 	if (ret)
-		goto err_out_mapping;
+		goto err_out_workqueue;
 
 	/* Everything's ready.  Announce the disk to the world. */
 
@@ -5083,6 +5105,9 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
 
 	return ret;
 
+err_out_workqueue:
+	destroy_workqueue(rbd_dev->rq_wq);
+	rbd_dev->rq_wq = NULL;
 err_out_mapping:
 	rbd_dev_mapping_clear(rbd_dev);
 err_out_disk:
@@ -5155,8 +5180,6 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
 	ret = rbd_dev_image_id(rbd_dev);
 	if (ret)
 		return ret;
-	rbd_assert(rbd_dev->spec->image_id);
-	rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
 
 	ret = rbd_dev_header_name(rbd_dev);
 	if (ret)
@@ -5168,25 +5191,45 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
 			goto out_header_name;
 	}
 
-	if (rbd_dev->image_format == 1)
-		ret = rbd_dev_v1_header_info(rbd_dev);
-	else
-		ret = rbd_dev_v2_header_info(rbd_dev);
+	ret = rbd_dev_header_info(rbd_dev);
 	if (ret)
 		goto err_out_watch;
 
-	ret = rbd_dev_spec_update(rbd_dev);
+	/*
+	 * If this image is the one being mapped, we have pool name and
+	 * id, image name and id, and snap name - need to fill snap id.
+	 * Otherwise this is a parent image, identified by pool, image
+	 * and snap ids - need to fill in names for those ids.
+	 */
+	if (mapping)
+		ret = rbd_spec_fill_snap_id(rbd_dev);
+	else
+		ret = rbd_spec_fill_names(rbd_dev);
 	if (ret)
 		goto err_out_probe;
 
+	if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
+		ret = rbd_dev_v2_parent_info(rbd_dev);
+		if (ret)
+			goto err_out_probe;
+
+		/*
+		 * Need to warn users if this image is the one being
+		 * mapped and has a parent.
+		 */
+		if (mapping && rbd_dev->parent_spec)
+			rbd_warn(rbd_dev,
+				 "WARNING: kernel layering is EXPERIMENTAL!");
+	}
+
 	ret = rbd_dev_probe_parent(rbd_dev);
 	if (ret)
 		goto err_out_probe;
 
 	dout("discovered format %u image, header name is %s\n",
 		rbd_dev->image_format, rbd_dev->header_name);
-
 	return 0;
+
 err_out_probe:
 	rbd_dev_unprobe(rbd_dev);
 err_out_watch:
@@ -5199,9 +5242,6 @@ err_out_format:
 	rbd_dev->image_format = 0;
 	kfree(rbd_dev->spec->image_id);
 	rbd_dev->spec->image_id = NULL;
-
-	dout("probe failed, returning %d\n", ret);
-
 	return ret;
 }
 
@@ -5243,7 +5283,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 	/* The ceph file layout needs to fit pool id in 32 bits */
 
 	if (spec->pool_id > (u64)U32_MAX) {
-		rbd_warn(NULL, "pool id too large (%llu > %u)\n",
+		rbd_warn(NULL, "pool id too large (%llu > %u)",
 				(unsigned long long)spec->pool_id, U32_MAX);
 		rc = -EIO;
 		goto err_out_client;
@@ -5314,6 +5354,7 @@ static void rbd_dev_device_release(struct device *dev)
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
 
+	destroy_workqueue(rbd_dev->rq_wq);
 	rbd_free_disk(rbd_dev);
 	clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
 	rbd_dev_mapping_clear(rbd_dev);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index f63d358..0a58140 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -15,17 +15,22 @@
 #include <linux/numa.h>
 
 #define PART_BITS 4
+#define VQ_NAME_LEN 16
 
 static int major;
 static DEFINE_IDA(vd_index_ida);
 
 static struct workqueue_struct *virtblk_wq;
 
+struct virtio_blk_vq {
+	struct virtqueue *vq;
+	spinlock_t lock;
+	char name[VQ_NAME_LEN];
+} ____cacheline_aligned_in_smp;
+
 struct virtio_blk
 {
 	struct virtio_device *vdev;
-	struct virtqueue *vq;
-	spinlock_t vq_lock;
 
 	/* The disk structure for the kernel. */
 	struct gendisk *disk;
@@ -47,6 +52,10 @@ struct virtio_blk
 
 	/* Ida index - used to track minor number allocations. */
 	int index;
+
+	/* num of vqs */
+	int num_vqs;
+	struct virtio_blk_vq *vqs;
 };
 
 struct virtblk_req
@@ -133,14 +142,15 @@ static void virtblk_done(struct virtqueue *vq)
 {
 	struct virtio_blk *vblk = vq->vdev->priv;
 	bool req_done = false;
+	int qid = vq->index;
 	struct virtblk_req *vbr;
 	unsigned long flags;
 	unsigned int len;
 
-	spin_lock_irqsave(&vblk->vq_lock, flags);
+	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
 	do {
 		virtqueue_disable_cb(vq);
-		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
+		while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
 			blk_mq_complete_request(vbr->req);
 			req_done = true;
 		}
@@ -151,7 +161,7 @@ static void virtblk_done(struct virtqueue *vq)
 	/* In case queue is stopped waiting for more buffers. */
 	if (req_done)
 		blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
-	spin_unlock_irqrestore(&vblk->vq_lock, flags);
+	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 }
 
 static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
@@ -160,6 +170,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 	unsigned long flags;
 	unsigned int num;
+	int qid = hctx->queue_num;
 	const bool last = (req->cmd_flags & REQ_END) != 0;
 	int err;
 	bool notify = false;
@@ -202,12 +213,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
 	}
 
-	spin_lock_irqsave(&vblk->vq_lock, flags);
-	err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num);
+	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
+	err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
 	if (err) {
-		virtqueue_kick(vblk->vq);
+		virtqueue_kick(vblk->vqs[qid].vq);
 		blk_mq_stop_hw_queue(hctx);
-		spin_unlock_irqrestore(&vblk->vq_lock, flags);
+		spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 		/* Out of mem doesn't actually happen, since we fall back
 		 * to direct descriptors */
 		if (err == -ENOMEM || err == -ENOSPC)
@@ -215,12 +226,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
 		return BLK_MQ_RQ_QUEUE_ERROR;
 	}
 
-	if (last && virtqueue_kick_prepare(vblk->vq))
+	if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
 		notify = true;
-	spin_unlock_irqrestore(&vblk->vq_lock, flags);
+	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 
 	if (notify)
-		virtqueue_notify(vblk->vq);
+		virtqueue_notify(vblk->vqs[qid].vq);
 	return BLK_MQ_RQ_QUEUE_OK;
 }
 
@@ -377,12 +388,64 @@ static void virtblk_config_changed(struct virtio_device *vdev)
 static int init_vq(struct virtio_blk *vblk)
 {
 	int err = 0;
+	int i;
+	vq_callback_t **callbacks;
+	const char **names;
+	struct virtqueue **vqs;
+	unsigned short num_vqs;
+	struct virtio_device *vdev = vblk->vdev;
+
+	err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
+				   struct virtio_blk_config, num_queues,
+				   &num_vqs);
+	if (err)
+		num_vqs = 1;
+
+	vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
+	if (!vblk->vqs) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
+	if (!names)
+		goto err_names;
+
+	callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
+	if (!callbacks)
+		goto err_callbacks;
+
+	vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
+	if (!vqs)
+		goto err_vqs;
 
-	/* We expect one virtqueue, for output. */
-	vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
-	if (IS_ERR(vblk->vq))
-		err = PTR_ERR(vblk->vq);
+	for (i = 0; i < num_vqs; i++) {
+		callbacks[i] = virtblk_done;
+		snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
+		names[i] = vblk->vqs[i].name;
+	}
+
+	/* Discover virtqueues and write information to configuration.  */
+	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
+	if (err)
+		goto err_find_vqs;
 
+	for (i = 0; i < num_vqs; i++) {
+		spin_lock_init(&vblk->vqs[i].lock);
+		vblk->vqs[i].vq = vqs[i];
+	}
+	vblk->num_vqs = num_vqs;
+
+ err_find_vqs:
+	kfree(vqs);
+ err_vqs:
+	kfree(callbacks);
+ err_callbacks:
+	kfree(names);
+ err_names:
+	if (err)
+		kfree(vblk->vqs);
+ out:
 	return err;
 }
 
@@ -551,7 +614,6 @@ static int virtblk_probe(struct virtio_device *vdev)
 	err = init_vq(vblk);
 	if (err)
 		goto out_free_vblk;
-	spin_lock_init(&vblk->vq_lock);
 
 	/* FIXME: How many partitions?  How long is a piece of string? */
 	vblk->disk = alloc_disk(1 << PART_BITS);
@@ -562,7 +624,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 
 	/* Default queue sizing is to fill the ring. */
 	if (!virtblk_queue_depth) {
-		virtblk_queue_depth = vblk->vq->num_free;
+		virtblk_queue_depth = vblk->vqs[0].vq->num_free;
 		/* ... but without indirect descs, we use 2 descs per req */
 		if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
 			virtblk_queue_depth /= 2;
@@ -570,7 +632,6 @@ static int virtblk_probe(struct virtio_device *vdev)
 
 	memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
 	vblk->tag_set.ops = &virtio_mq_ops;
-	vblk->tag_set.nr_hw_queues = 1;
 	vblk->tag_set.queue_depth = virtblk_queue_depth;
 	vblk->tag_set.numa_node = NUMA_NO_NODE;
 	vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
@@ -578,6 +639,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 		sizeof(struct virtblk_req) +
 		sizeof(struct scatterlist) * sg_elems;
 	vblk->tag_set.driver_data = vblk;
+	vblk->tag_set.nr_hw_queues = vblk->num_vqs;
 
 	err = blk_mq_alloc_tag_set(&vblk->tag_set);
 	if (err)
@@ -727,6 +789,7 @@ static void virtblk_remove(struct virtio_device *vdev)
 	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
 	put_disk(vblk->disk);
 	vdev->config->del_vqs(vdev);
+	kfree(vblk->vqs);
 	kfree(vblk);
 
 	/* Only free device id if we don't have any users */
@@ -777,7 +840,8 @@ static const struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
-	VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
+	VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
+	VIRTIO_BLK_F_MQ,
 };
 
 static struct virtio_driver virtio_blk = {
diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index 8bc4229..4ff8687 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c
@@ -499,8 +499,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
 	}
 
 	/* Lookup the i2c hwclock */
-	for (hwclock = NULL;
-	     (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){
+	for_each_node_by_name(hwclock, "i2c-hwclock") {
 		const char *loc = of_get_property(hwclock,
 				"hwctrl-location", NULL);
 		if (loc == NULL)
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 544f6d3..061407d 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -936,28 +936,14 @@ static int nx842_OF_upd(struct property *new_prop)
 		goto error_out;
 	}
 
-	/* Set ptr to new property if provided */
-	if (new_prop) {
-		/* Single property */
-		if (!strncmp(new_prop->name, "status", new_prop->length)) {
-			status = new_prop;
-
-		} else if (!strncmp(new_prop->name, "ibm,max-sg-len",
-					new_prop->length)) {
-			maxsglen = new_prop;
-
-		} else if (!strncmp(new_prop->name, "ibm,max-sync-cop",
-					new_prop->length)) {
-			maxsyncop = new_prop;
-
-		} else {
-			/*
-			 * Skip the update, the property being updated
-			 * has no impact.
-			 */
-			goto out;
-		}
-	}
+	/*
+	 * If this is a property update, there are only certain properties that
+	 * we care about. Bail if it isn't in the below list
+	 */
+	if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) ||
+		         strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) ||
+		         strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length)))
+		goto out;
 
 	/* Perform property updates */
 	ret = nx842_OF_upd_status(new_devdata, status);
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 8f6afbf..9b1ea0e 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -393,6 +393,22 @@ config XILINX_VDMA
 	  channels, Memory Mapped to Stream (MM2S) and Stream to
 	  Memory Mapped (S2MM) for the data transfers.
 
+config DMA_SUN6I
+	tristate "Allwinner A31 SoCs DMA support"
+	depends on MACH_SUN6I || COMPILE_TEST
+	depends on RESET_CONTROLLER
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support for the DMA engine for Allwinner A31 SoCs.
+
+config NBPFAXI_DMA
+	tristate "Renesas Type-AXI NBPF DMA support"
+	select DMA_ENGINE
+	depends on ARM || COMPILE_TEST
+	help
+	  Support for "Type-AXI" NBPF DMA IPs from Renesas
+
 config DMA_ENGINE
 	bool
 
@@ -406,6 +422,7 @@ config DMA_ACPI
 config DMA_OF
 	def_bool y
 	depends on OF
+	select DMA_ENGINE
 
 comment "DMA Clients"
 	depends on DMA_ENGINE
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index bd9e7fa..c6adb92 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -1,5 +1,5 @@
-ccflags-$(CONFIG_DMADEVICES_DEBUG)  := -DDEBUG
-ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG
+subdir-ccflags-$(CONFIG_DMADEVICES_DEBUG)  := -DDEBUG
+subdir-ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG
 
 obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
 obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o
@@ -48,3 +48,5 @@ obj-$(CONFIG_FSL_EDMA) += fsl-edma.o
 obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o
 obj-y += xilinx/
 obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o
+obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o
+obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o
diff --git a/drivers/dma/TODO b/drivers/dma/TODO
index 734ed02..b8045cd 100644
--- a/drivers/dma/TODO
+++ b/drivers/dma/TODO
@@ -7,7 +7,6 @@ TODO for slave dma
 	- imx-dma
 	- imx-sdma
 	- mxs-dma.c
-	- dw_dmac
 	- intel_mid_dma
 4. Check other subsystems for dma drivers and merge/move to dmaengine
 5. Remove dma_slave_config's dma direction.
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 8114731..e34024b 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1040,7 +1040,7 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 
 		if (early_bytes) {
 			dev_vdbg(&pl08x->adev->dev,
-				"%s byte width LLIs (remain 0x%08x)\n",
+				"%s byte width LLIs (remain 0x%08zx)\n",
 				__func__, bd.remainder);
 			prep_byte_width_lli(pl08x, &bd, &cctl, early_bytes,
 				num_llis++, &total_bytes);
@@ -1653,7 +1653,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 static struct dma_async_tx_descriptor *pl08x_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
@@ -1662,7 +1662,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_cyclic(
 	dma_addr_t slave_addr;
 
 	dev_dbg(&pl08x->adev->dev,
-		"%s prepare cyclic transaction of %d/%d bytes %s %s\n",
+		"%s prepare cyclic transaction of %zd/%zd bytes %s %s\n",
 		__func__, period_len, buf_len,
 		direction == DMA_MEM_TO_DEV ? "to" : "from",
 		plchan->name);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index c13a3bb..ca9dd26 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -294,14 +294,16 @@ static int atc_get_bytes_left(struct dma_chan *chan)
 			ret = -EINVAL;
 			goto out;
 		}
-		atchan->remain_desc -= (desc_cur->lli.ctrla & ATC_BTSIZE_MAX)
-						<< (desc_first->tx_width);
-		if (atchan->remain_desc < 0) {
+
+		count = (desc_cur->lli.ctrla & ATC_BTSIZE_MAX)
+			<< desc_first->tx_width;
+		if (atchan->remain_desc < count) {
 			ret = -EINVAL;
 			goto out;
-		} else {
-			ret = atchan->remain_desc;
 		}
+
+		atchan->remain_desc -= count;
+		ret = atchan->remain_desc;
 	} else {
 		/*
 		 * Get residual bytes when current
@@ -893,12 +895,11 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
  * @period_len: number of bytes for each period
  * @direction: transfer direction, to or from device
  * @flags: tx descriptor status flags
- * @context: transfer context (ignored)
  */
 static struct dma_async_tx_descriptor *
 atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma_slave	*atslave = chan->private;
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index a036021..6800797 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -335,7 +335,7 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan)
 static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long flags, void *context)
+	unsigned long flags)
 {
 	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
 	enum dma_slave_buswidth dev_width;
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c
index 94c380f..6a9d89c 100644
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -433,7 +433,7 @@ static struct dma_async_tx_descriptor *jz4740_dma_prep_slave_sg(
 static struct dma_async_tx_descriptor *jz4740_dma_prep_dma_cyclic(
 	struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long flags, void *context)
+	unsigned long flags)
 {
 	struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
 	struct jz4740_dma_desc *desc;
@@ -614,4 +614,4 @@ module_platform_driver(jz4740_dma_driver);
 
 MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
 MODULE_DESCRIPTION("JZ4740 DMA driver");
-MODULE_LICENSE("GPLv2");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index a27ded5..1af731b 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -279,6 +279,19 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
 	channel_set_bit(dw, CH_EN, dwc->mask);
 }
 
+static void dwc_dostart_first_queued(struct dw_dma_chan *dwc)
+{
+	struct dw_desc *desc;
+
+	if (list_empty(&dwc->queue))
+		return;
+
+	list_move(dwc->queue.next, &dwc->active_list);
+	desc = dwc_first_active(dwc);
+	dev_vdbg(chan2dev(&dwc->chan), "%s: started %u\n", __func__, desc->txd.cookie);
+	dwc_dostart(dwc, desc);
+}
+
 /*----------------------------------------------------------------------*/
 
 static void
@@ -335,10 +348,7 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
 	 * the completed ones.
 	 */
 	list_splice_init(&dwc->active_list, &list);
-	if (!list_empty(&dwc->queue)) {
-		list_move(dwc->queue.next, &dwc->active_list);
-		dwc_dostart(dwc, dwc_first_active(dwc));
-	}
+	dwc_dostart_first_queued(dwc);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
@@ -467,10 +477,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
 	/* Try to continue after resetting the channel... */
 	dwc_chan_disable(dw, dwc);
 
-	if (!list_empty(&dwc->queue)) {
-		list_move(dwc->queue.next, &dwc->active_list);
-		dwc_dostart(dwc, dwc_first_active(dwc));
-	}
+	dwc_dostart_first_queued(dwc);
 	spin_unlock_irqrestore(&dwc->lock, flags);
 }
 
@@ -677,17 +684,9 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
 	 * possible, perhaps even appending to those already submitted
 	 * for DMA. But this is hard to do in a race-free manner.
 	 */
-	if (list_empty(&dwc->active_list)) {
-		dev_vdbg(chan2dev(tx->chan), "%s: started %u\n", __func__,
-				desc->txd.cookie);
-		list_add_tail(&desc->desc_node, &dwc->active_list);
-		dwc_dostart(dwc, dwc_first_active(dwc));
-	} else {
-		dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__,
-				desc->txd.cookie);
 
-		list_add_tail(&desc->desc_node, &dwc->queue);
-	}
+	dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie);
+	list_add_tail(&desc->desc_node, &dwc->queue);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
@@ -1092,9 +1091,12 @@ dwc_tx_status(struct dma_chan *chan,
 static void dwc_issue_pending(struct dma_chan *chan)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	unsigned long		flags;
 
-	if (!list_empty(&dwc->queue))
-		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+	spin_lock_irqsave(&dwc->lock, flags);
+	if (list_empty(&dwc->active_list))
+		dwc_dostart_first_queued(dwc);
+	spin_unlock_irqrestore(&dwc->lock, flags);
 }
 
 static int dwc_alloc_chan_resources(struct dma_chan *chan)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index b512caf..7b65633 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -23,6 +23,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/of.h>
 
 #include <linux/platform_data/edma.h>
 
@@ -256,8 +257,13 @@ static int edma_terminate_all(struct edma_chan *echan)
 	 * echan->edesc is NULL and exit.)
 	 */
 	if (echan->edesc) {
+		int cyclic = echan->edesc->cyclic;
 		echan->edesc = NULL;
 		edma_stop(echan->ch_num);
+		/* Move the cyclic channel back to default queue */
+		if (cyclic)
+			edma_assign_channel_eventq(echan->ch_num,
+						   EVENTQ_DEFAULT);
 	}
 
 	vchan_get_all_descriptors(&echan->vchan, &head);
@@ -592,7 +598,7 @@ struct dma_async_tx_descriptor *edma_prep_dma_memcpy(
 static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long tx_flags, void *context)
+	unsigned long tx_flags)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 	struct device *dev = chan->device->dev;
@@ -718,12 +724,15 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 		edesc->absync = ret;
 
 		/*
-		 * Enable interrupts for every period because callback
-		 * has to be called for every period.
+		 * Enable period interrupt only if it is requested
 		 */
-		edesc->pset[i].param.opt |= TCINTEN;
+		if (tx_flags & DMA_PREP_INTERRUPT)
+			edesc->pset[i].param.opt |= TCINTEN;
 	}
 
+	/* Place the cyclic channel to highest priority queue */
+	edma_assign_channel_eventq(echan->ch_num, EVENTQ_0);
+
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
 }
 
@@ -993,7 +1002,7 @@ static int edma_dma_device_slave_caps(struct dma_chan *dchan,
 	caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
 	caps->cmd_pause = true;
 	caps->cmd_terminate = true;
-	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 
 	return 0;
 }
@@ -1040,7 +1049,7 @@ static int edma_probe(struct platform_device *pdev)
 	ecc->dummy_slot = edma_alloc_slot(ecc->ctlr, EDMA_SLOT_ANY);
 	if (ecc->dummy_slot < 0) {
 		dev_err(&pdev->dev, "Can't allocate PaRAM dummy slot\n");
-		return -EIO;
+		return ecc->dummy_slot;
 	}
 
 	dma_cap_zero(ecc->dma_slave.cap_mask);
@@ -1125,7 +1134,7 @@ static int edma_init(void)
 		}
 	}
 
-	if (EDMA_CTLRS == 2) {
+	if (!of_have_populated_dt() && EDMA_CTLRS == 2) {
 		pdev1 = platform_device_register_full(&edma_dev_info1);
 		if (IS_ERR(pdev1)) {
 			platform_driver_unregister(&edma_driver);
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index cb4bf68..7650470 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -1092,7 +1092,6 @@ fail:
  * @period_len: length of a single period
  * @dir: direction of the operation
  * @flags: tx descriptor status flags
- * @context: operation context (ignored)
  *
  * Prepares a descriptor for cyclic DMA operation. This means that once the
  * descriptor is submitted, we will be submitting in a @period_len sized
@@ -1105,8 +1104,7 @@ fail:
 static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 			   size_t buf_len, size_t period_len,
-			   enum dma_transfer_direction dir, unsigned long flags,
-			   void *context)
+			   enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
 	struct ep93xx_dma_desc *desc, *first;
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index b396a7f..3c5711d 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -248,11 +248,12 @@ static void fsl_edma_chan_mux(struct fsl_edma_chan *fsl_chan,
 			unsigned int slot, bool enable)
 {
 	u32 ch = fsl_chan->vchan.chan.chan_id;
-	void __iomem *muxaddr = fsl_chan->edma->muxbase[ch / DMAMUX_NR];
+	void __iomem *muxaddr;
 	unsigned chans_per_mux, ch_off;
 
 	chans_per_mux = fsl_chan->edma->n_chans / DMAMUX_NR;
 	ch_off = fsl_chan->vchan.chan.chan_id % chans_per_mux;
+	muxaddr = fsl_chan->edma->muxbase[ch / chans_per_mux];
 
 	if (enable)
 		edma_writeb(fsl_chan->edma,
@@ -516,7 +517,7 @@ err:
 static struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
 	struct fsl_edma_desc *fsl_desc;
@@ -724,6 +725,7 @@ static struct dma_chan *fsl_edma_xlate(struct of_phandle_args *dma_spec,
 {
 	struct fsl_edma_engine *fsl_edma = ofdma->of_dma_data;
 	struct dma_chan *chan, *_chan;
+	unsigned long chans_per_mux = fsl_edma->n_chans / DMAMUX_NR;
 
 	if (dma_spec->args_count != 2)
 		return NULL;
@@ -732,7 +734,7 @@ static struct dma_chan *fsl_edma_xlate(struct of_phandle_args *dma_spec,
 	list_for_each_entry_safe(chan, _chan, &fsl_edma->dma_dev.channels, device_node) {
 		if (chan->client_count)
 			continue;
-		if ((chan->chan_id / DMAMUX_NR) == dma_spec->args[0]) {
+		if ((chan->chan_id / chans_per_mux) == dma_spec->args[0]) {
 			chan = dma_get_slave_channel(chan);
 			if (chan) {
 				chan->device->privatecnt++;
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index e0fec68..d5d6885 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -396,10 +396,17 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	struct fsldma_chan *chan = to_fsl_chan(tx->chan);
 	struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
 	struct fsl_desc_sw *child;
-	unsigned long flags;
 	dma_cookie_t cookie = -EINVAL;
 
-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);
+
+#ifdef CONFIG_PM
+	if (unlikely(chan->pm_state != RUNNING)) {
+		chan_dbg(chan, "cannot submit due to suspend\n");
+		spin_unlock_bh(&chan->desc_lock);
+		return -1;
+	}
+#endif
 
 	/*
 	 * assign cookies to all of the software descriptors
@@ -412,7 +419,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 	/* put this transaction onto the tail of the pending queue */
 	append_ld_queue(chan, desc);
 
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	spin_unlock_bh(&chan->desc_lock);
 
 	return cookie;
 }
@@ -459,6 +466,88 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan)
 }
 
 /**
+ * fsldma_clean_completed_descriptor - free all descriptors which
+ * has been completed and acked
+ * @chan: Freescale DMA channel
+ *
+ * This function is used on all completed and acked descriptors.
+ * All descriptors should only be freed in this function.
+ */
+static void fsldma_clean_completed_descriptor(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc, *_desc;
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node)
+		if (async_tx_test_ack(&desc->async_tx))
+			fsl_dma_free_descriptor(chan, desc);
+}
+
+/**
+ * fsldma_run_tx_complete_actions - cleanup a single link descriptor
+ * @chan: Freescale DMA channel
+ * @desc: descriptor to cleanup and free
+ * @cookie: Freescale DMA transaction identifier
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies.
+ */
+static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan *chan,
+		struct fsl_desc_sw *desc, dma_cookie_t cookie)
+{
+	struct dma_async_tx_descriptor *txd = &desc->async_tx;
+	dma_cookie_t ret = cookie;
+
+	BUG_ON(txd->cookie < 0);
+
+	if (txd->cookie > 0) {
+		ret = txd->cookie;
+
+		/* Run the link descriptor callback function */
+		if (txd->callback) {
+			chan_dbg(chan, "LD %p callback\n", desc);
+			txd->callback(txd->callback_param);
+		}
+	}
+
+	/* Run any dependencies */
+	dma_run_dependencies(txd);
+
+	return ret;
+}
+
+/**
+ * fsldma_clean_running_descriptor - move the completed descriptor from
+ * ld_running to ld_completed
+ * @chan: Freescale DMA channel
+ * @desc: the descriptor which is completed
+ *
+ * Free the descriptor directly if acked by async_tx api, or move it to
+ * queue ld_completed.
+ */
+static void fsldma_clean_running_descriptor(struct fsldma_chan *chan,
+		struct fsl_desc_sw *desc)
+{
+	/* Remove from the list of transactions */
+	list_del(&desc->node);
+
+	/*
+	 * the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx)) {
+		/*
+		 * Move this descriptor to the list of descriptors which is
+		 * completed, but still awaiting the 'ack' bit to be set.
+		 */
+		list_add_tail(&desc->node, &chan->ld_completed);
+		return;
+	}
+
+	dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+}
+
+/**
  * fsl_chan_xfer_ld_queue - transfer any pending transactions
  * @chan : Freescale DMA channel
  *
@@ -526,31 +615,58 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
 }
 
 /**
- * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
+ * fsldma_cleanup_descriptors - cleanup link descriptors which are completed
+ * and move them to ld_completed to free until flag 'ack' is set
  * @chan: Freescale DMA channel
- * @desc: descriptor to cleanup and free
  *
- * This function is used on a descriptor which has been executed by the DMA
- * controller. It will run any callbacks, submit any dependencies, and then
- * free the descriptor.
+ * This function is used on descriptors which have been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, then
+ * free these descriptors if flag 'ack' is set.
  */
-static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
-				      struct fsl_desc_sw *desc)
+static void fsldma_cleanup_descriptors(struct fsldma_chan *chan)
 {
-	struct dma_async_tx_descriptor *txd = &desc->async_tx;
+	struct fsl_desc_sw *desc, *_desc;
+	dma_cookie_t cookie = 0;
+	dma_addr_t curr_phys = get_cdar(chan);
+	int seen_current = 0;
+
+	fsldma_clean_completed_descriptor(chan);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
+		/*
+		 * do not advance past the current descriptor loaded into the
+		 * hardware channel, subsequent descriptors are either in
+		 * process or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/*
+		 * stop the search if we reach the current descriptor and the
+		 * channel is busy
+		 */
+		if (desc->async_tx.phys == curr_phys) {
+			seen_current = 1;
+			if (!dma_is_idle(chan))
+				break;
+		}
+
+		cookie = fsldma_run_tx_complete_actions(chan, desc, cookie);
 
-	/* Run the link descriptor callback function */
-	if (txd->callback) {
-		chan_dbg(chan, "LD %p callback\n", desc);
-		txd->callback(txd->callback_param);
+		fsldma_clean_running_descriptor(chan, desc);
 	}
 
-	/* Run any dependencies */
-	dma_run_dependencies(txd);
+	/*
+	 * Start any pending transactions automatically
+	 *
+	 * In the ideal case, we keep the DMA controller busy while we go
+	 * ahead and free the descriptors below.
+	 */
+	fsl_chan_xfer_ld_queue(chan);
 
-	dma_descriptor_unmap(txd);
-	chan_dbg(chan, "LD %p free\n", desc);
-	dma_pool_free(chan->desc_pool, desc, txd->phys);
+	if (cookie > 0)
+		chan->common.completed_cookie = cookie;
 }
 
 /**
@@ -617,13 +733,14 @@ static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan,
 static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
-	unsigned long flags;
 
 	chan_dbg(chan, "free all channel resources\n");
-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);
+	fsldma_cleanup_descriptors(chan);
 	fsldma_free_desc_list(chan, &chan->ld_pending);
 	fsldma_free_desc_list(chan, &chan->ld_running);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	fsldma_free_desc_list(chan, &chan->ld_completed);
+	spin_unlock_bh(&chan->desc_lock);
 
 	dma_pool_destroy(chan->desc_pool);
 	chan->desc_pool = NULL;
@@ -842,7 +959,6 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 {
 	struct dma_slave_config *config;
 	struct fsldma_chan *chan;
-	unsigned long flags;
 	int size;
 
 	if (!dchan)
@@ -852,7 +968,7 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
-		spin_lock_irqsave(&chan->desc_lock, flags);
+		spin_lock_bh(&chan->desc_lock);
 
 		/* Halt the DMA engine */
 		dma_halt(chan);
@@ -860,9 +976,10 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 		/* Remove and free all of the descriptors in the LD queue */
 		fsldma_free_desc_list(chan, &chan->ld_pending);
 		fsldma_free_desc_list(chan, &chan->ld_running);
+		fsldma_free_desc_list(chan, &chan->ld_completed);
 		chan->idle = true;
 
-		spin_unlock_irqrestore(&chan->desc_lock, flags);
+		spin_unlock_bh(&chan->desc_lock);
 		return 0;
 
 	case DMA_SLAVE_CONFIG:
@@ -904,11 +1021,10 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
 static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan)
 {
 	struct fsldma_chan *chan = to_fsl_chan(dchan);
-	unsigned long flags;
 
-	spin_lock_irqsave(&chan->desc_lock, flags);
+	spin_lock_bh(&chan->desc_lock);
 	fsl_chan_xfer_ld_queue(chan);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	spin_unlock_bh(&chan->desc_lock);
 }
 
 /**
@@ -919,6 +1035,17 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
 					dma_cookie_t cookie,
 					struct dma_tx_state *txstate)
 {
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	spin_lock_bh(&chan->desc_lock);
+	fsldma_cleanup_descriptors(chan);
+	spin_unlock_bh(&chan->desc_lock);
+
 	return dma_cookie_status(dchan, cookie, txstate);
 }
 
@@ -996,52 +1123,18 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
 static void dma_do_tasklet(unsigned long data)
 {
 	struct fsldma_chan *chan = (struct fsldma_chan *)data;
-	struct fsl_desc_sw *desc, *_desc;
-	LIST_HEAD(ld_cleanup);
-	unsigned long flags;
 
 	chan_dbg(chan, "tasklet entry\n");
 
-	spin_lock_irqsave(&chan->desc_lock, flags);
-
-	/* update the cookie if we have some descriptors to cleanup */
-	if (!list_empty(&chan->ld_running)) {
-		dma_cookie_t cookie;
-
-		desc = to_fsl_desc(chan->ld_running.prev);
-		cookie = desc->async_tx.cookie;
-		dma_cookie_complete(&desc->async_tx);
-
-		chan_dbg(chan, "completed_cookie=%d\n", cookie);
-	}
-
-	/*
-	 * move the descriptors to a temporary list so we can drop the lock
-	 * during the entire cleanup operation
-	 */
-	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
+	spin_lock_bh(&chan->desc_lock);
 
 	/* the hardware is now idle and ready for more */
 	chan->idle = true;
 
-	/*
-	 * Start any pending transactions automatically
-	 *
-	 * In the ideal case, we keep the DMA controller busy while we go
-	 * ahead and free the descriptors below.
-	 */
-	fsl_chan_xfer_ld_queue(chan);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
-
-	/* Run the callback for each descriptor, in order */
-	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
-
-		/* Remove from the list of transactions */
-		list_del(&desc->node);
+	/* Run all cleanup for descriptors which have been completed */
+	fsldma_cleanup_descriptors(chan);
 
-		/* Run all cleanup for this descriptor */
-		fsldma_cleanup_descriptor(chan, desc);
-	}
+	spin_unlock_bh(&chan->desc_lock);
 
 	chan_dbg(chan, "tasklet exit\n");
 }
@@ -1225,7 +1318,11 @@ static int fsl_dma_chan_probe(struct fsldma_device *fdev,
 	spin_lock_init(&chan->desc_lock);
 	INIT_LIST_HEAD(&chan->ld_pending);
 	INIT_LIST_HEAD(&chan->ld_running);
+	INIT_LIST_HEAD(&chan->ld_completed);
 	chan->idle = true;
+#ifdef CONFIG_PM
+	chan->pm_state = RUNNING;
+#endif
 
 	chan->common.device = &fdev->common;
 	dma_cookie_init(&chan->common);
@@ -1365,6 +1462,69 @@ static int fsldma_of_remove(struct platform_device *op)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int fsldma_suspend_late(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct fsldma_device *fdev = platform_get_drvdata(pdev);
+	struct fsldma_chan *chan;
+	int i;
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		spin_lock_bh(&chan->desc_lock);
+		if (unlikely(!chan->idle))
+			goto out;
+		chan->regs_save.mr = get_mr(chan);
+		chan->pm_state = SUSPENDED;
+		spin_unlock_bh(&chan->desc_lock);
+	}
+	return 0;
+
+out:
+	for (; i >= 0; i--) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+		chan->pm_state = RUNNING;
+		spin_unlock_bh(&chan->desc_lock);
+	}
+	return -EBUSY;
+}
+
+static int fsldma_resume_early(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct fsldma_device *fdev = platform_get_drvdata(pdev);
+	struct fsldma_chan *chan;
+	u32 mode;
+	int i;
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		spin_lock_bh(&chan->desc_lock);
+		mode = chan->regs_save.mr
+			& ~FSL_DMA_MR_CS & ~FSL_DMA_MR_CC & ~FSL_DMA_MR_CA;
+		set_mr(chan, mode);
+		chan->pm_state = RUNNING;
+		spin_unlock_bh(&chan->desc_lock);
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops fsldma_pm_ops = {
+	.suspend_late	= fsldma_suspend_late,
+	.resume_early	= fsldma_resume_early,
+};
+#endif
+
 static const struct of_device_id fsldma_of_ids[] = {
 	{ .compatible = "fsl,elo3-dma", },
 	{ .compatible = "fsl,eloplus-dma", },
@@ -1377,6 +1537,9 @@ static struct platform_driver fsldma_of_driver = {
 		.name = "fsl-elo-dma",
 		.owner = THIS_MODULE,
 		.of_match_table = fsldma_of_ids,
+#ifdef CONFIG_PM
+		.pm = &fsldma_pm_ops,
+#endif
 	},
 	.probe = fsldma_of_probe,
 	.remove = fsldma_of_remove,
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index d56e835..239c20c 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -134,12 +134,36 @@ struct fsldma_device {
 #define FSL_DMA_CHAN_PAUSE_EXT	0x00001000
 #define FSL_DMA_CHAN_START_EXT	0x00002000
 
+#ifdef CONFIG_PM
+struct fsldma_chan_regs_save {
+	u32 mr;
+};
+
+enum fsldma_pm_state {
+	RUNNING = 0,
+	SUSPENDED,
+};
+#endif
+
 struct fsldma_chan {
 	char name[8];			/* Channel name */
 	struct fsldma_chan_regs __iomem *regs;
 	spinlock_t desc_lock;		/* Descriptor operation lock */
-	struct list_head ld_pending;	/* Link descriptors queue */
-	struct list_head ld_running;	/* Link descriptors queue */
+	/*
+	 * Descriptors which are queued to run, but have not yet been
+	 * submitted to the hardware for execution
+	 */
+	struct list_head ld_pending;
+	/*
+	 * Descriptors which are currently being executed by the hardware
+	 */
+	struct list_head ld_running;
+	/*
+	 * Descriptors which have finished execution by the hardware. These
+	 * descriptors have already had their cleanup actions run. They are
+	 * waiting for the ACK bit to be set by the async_tx API.
+	 */
+	struct list_head ld_completed;	/* Link descriptors queue */
 	struct dma_chan common;		/* DMA common channel */
 	struct dma_pool *desc_pool;	/* Descriptors pool */
 	struct device *dev;		/* Channel device */
@@ -148,6 +172,10 @@ struct fsldma_chan {
 	struct tasklet_struct tasklet;
 	u32 feature;
 	bool idle;			/* DMA controller is idle */
+#ifdef CONFIG_PM
+	struct fsldma_chan_regs_save regs_save;
+	enum fsldma_pm_state pm_state;
+#endif
 
 	void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable);
 	void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable);
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 286660a..9d2c9e7 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -866,7 +866,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
 static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 14867e3..f7626e3 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -271,6 +271,7 @@ struct sdma_channel {
 	unsigned int			chn_count;
 	unsigned int			chn_real_count;
 	struct tasklet_struct		tasklet;
+	struct imx_dma_data		data;
 };
 
 #define IMX_DMA_SG_LOOP		BIT(0)
@@ -749,6 +750,11 @@ static void sdma_get_pc(struct sdma_channel *sdmac,
 		emi_2_per = sdma->script_addrs->asrc_2_mcu_addr;
 		per_2_per = sdma->script_addrs->per_2_per_addr;
 		break;
+	case IMX_DMATYPE_ASRC_SP:
+		per_2_emi = sdma->script_addrs->shp_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+		per_2_per = sdma->script_addrs->per_2_per_addr;
+		break;
 	case IMX_DMATYPE_MSHC:
 		per_2_emi = sdma->script_addrs->mshc_2_mcu_addr;
 		emi_2_per = sdma->script_addrs->mcu_2_mshc_addr;
@@ -911,14 +917,13 @@ static int sdma_request_channel(struct sdma_channel *sdmac)
 	int channel = sdmac->channel;
 	int ret = -EBUSY;
 
-	sdmac->bd = dma_alloc_coherent(NULL, PAGE_SIZE, &sdmac->bd_phys, GFP_KERNEL);
+	sdmac->bd = dma_zalloc_coherent(NULL, PAGE_SIZE, &sdmac->bd_phys,
+					GFP_KERNEL);
 	if (!sdmac->bd) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	memset(sdmac->bd, 0, PAGE_SIZE);
-
 	sdma->channel_control[channel].base_bd_ptr = sdmac->bd_phys;
 	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
 
@@ -1120,7 +1125,7 @@ err_out:
 static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
@@ -1414,12 +1419,14 @@ err_dma_alloc:
 
 static bool sdma_filter_fn(struct dma_chan *chan, void *fn_param)
 {
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct imx_dma_data *data = fn_param;
 
 	if (!imx_dma_is_general_purpose(chan))
 		return false;
 
-	chan->private = data;
+	sdmac->data = *data;
+	chan->private = &sdmac->data;
 
 	return true;
 }
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index 128ca14..bbf6292 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -1532,11 +1532,17 @@ static int idmac_alloc_chan_resources(struct dma_chan *chan)
 #ifdef DEBUG
 	if (chan->chan_id == IDMAC_IC_7) {
 		ic_sof = ipu_irq_map(69);
-		if (ic_sof > 0)
-			request_irq(ic_sof, ic_sof_irq, 0, "IC SOF", ichan);
+		if (ic_sof > 0) {
+			ret = request_irq(ic_sof, ic_sof_irq, 0, "IC SOF", ichan);
+			if (ret)
+				dev_err(&chan->dev->device, "request irq failed for IC SOF");
+		}
 		ic_eof = ipu_irq_map(70);
-		if (ic_eof > 0)
-			request_irq(ic_eof, ic_eof_irq, 0, "IC EOF", ichan);
+		if (ic_eof > 0) {
+			ret = request_irq(ic_eof, ic_eof_irq, 0, "IC EOF", ichan);
+			if (ret)
+				dev_err(&chan->dev->device, "request irq failed for IC EOF");
+		}
 	}
 #endif
 
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index a7b186d..a1a4db5 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -601,7 +601,7 @@ static struct dma_async_tx_descriptor *
 mmp_pdma_prep_dma_cyclic(struct dma_chan *dchan,
 			 dma_addr_t buf_addr, size_t len, size_t period_len,
 			 enum dma_transfer_direction direction,
-			 unsigned long flags, void *context)
+			 unsigned long flags)
 {
 	struct mmp_pdma_chan *chan;
 	struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new;
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index 724f7f4..6ad30e2 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -389,7 +389,7 @@ struct mmp_tdma_desc *mmp_tdma_alloc_descriptor(struct mmp_tdma_chan *tdmac)
 static struct dma_async_tx_descriptor *mmp_tdma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
 	struct mmp_tdma_desc *desc;
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 2ad4373..881db2b 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -53,6 +53,7 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
+#include <linux/of_dma.h>
 #include <linux/of_platform.h>
 
 #include <linux/random.h>
@@ -1036,7 +1037,15 @@ static int mpc_dma_probe(struct platform_device *op)
 	if (retval)
 		goto err_free2;
 
-	return retval;
+	/* Register with OF helpers for DMA lookups (nonfatal) */
+	if (dev->of_node) {
+		retval = of_dma_controller_register(dev->of_node,
+						of_dma_xlate_by_chan_id, mdma);
+		if (retval)
+			dev_warn(dev, "Could not register for OF lookup\n");
+	}
+
+	return 0;
 
 err_free2:
 	if (mdma->is_mpc8308)
@@ -1057,6 +1066,8 @@ static int mpc_dma_remove(struct platform_device *op)
 	struct device *dev = &op->dev;
 	struct mpc_dma *mdma = dev_get_drvdata(dev);
 
+	if (dev->of_node)
+		of_dma_controller_free(dev->of_node);
 	dma_async_device_unregister(&mdma->dma);
 	if (mdma->is_mpc8308) {
 		free_irq(mdma->irq2, mdma);
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index ead4913..5ea6120 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -413,16 +413,14 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
 	int ret;
 
-	mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev,
-				CCW_BLOCK_SIZE, &mxs_chan->ccw_phys,
-				GFP_KERNEL);
+	mxs_chan->ccw = dma_zalloc_coherent(mxs_dma->dma_device.dev,
+					    CCW_BLOCK_SIZE,
+					    &mxs_chan->ccw_phys, GFP_KERNEL);
 	if (!mxs_chan->ccw) {
 		ret = -ENOMEM;
 		goto err_alloc;
 	}
 
-	memset(mxs_chan->ccw, 0, CCW_BLOCK_SIZE);
-
 	if (mxs_chan->chan_irq != NO_IRQ) {
 		ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
 					0, "mxs-dma", mxs_dma);
@@ -591,7 +589,7 @@ err_out:
 static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c
new file mode 100644
index 0000000..5aeada5
--- /dev/null
+++ b/drivers/dma/nbpfaxi.c
@@ -0,0 +1,1517 @@
+/*
+ * Copyright (C) 2013-2014 Renesas Electronics Europe Ltd.
+ * Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <dt-bindings/dma/nbpfaxi.h>
+
+#include "dmaengine.h"
+
+#define NBPF_REG_CHAN_OFFSET	0
+#define NBPF_REG_CHAN_SIZE	0x40
+
+/* Channel Current Transaction Byte register */
+#define NBPF_CHAN_CUR_TR_BYTE	0x20
+
+/* Channel Status register */
+#define NBPF_CHAN_STAT	0x24
+#define NBPF_CHAN_STAT_EN	1
+#define NBPF_CHAN_STAT_TACT	4
+#define NBPF_CHAN_STAT_ERR	0x10
+#define NBPF_CHAN_STAT_END	0x20
+#define NBPF_CHAN_STAT_TC	0x40
+#define NBPF_CHAN_STAT_DER	0x400
+
+/* Channel Control register */
+#define NBPF_CHAN_CTRL	0x28
+#define NBPF_CHAN_CTRL_SETEN	1
+#define NBPF_CHAN_CTRL_CLREN	2
+#define NBPF_CHAN_CTRL_STG	4
+#define NBPF_CHAN_CTRL_SWRST	8
+#define NBPF_CHAN_CTRL_CLRRQ	0x10
+#define NBPF_CHAN_CTRL_CLREND	0x20
+#define NBPF_CHAN_CTRL_CLRTC	0x40
+#define NBPF_CHAN_CTRL_SETSUS	0x100
+#define NBPF_CHAN_CTRL_CLRSUS	0x200
+
+/* Channel Configuration register */
+#define NBPF_CHAN_CFG	0x2c
+#define NBPF_CHAN_CFG_SEL	7		/* terminal SELect: 0..7 */
+#define NBPF_CHAN_CFG_REQD	8		/* REQuest Direction: DMAREQ is 0: input, 1: output */
+#define NBPF_CHAN_CFG_LOEN	0x10		/* LOw ENable: low DMA request line is: 0: inactive, 1: active */
+#define NBPF_CHAN_CFG_HIEN	0x20		/* HIgh ENable: high DMA request line is: 0: inactive, 1: active */
+#define NBPF_CHAN_CFG_LVL	0x40		/* LeVeL: DMA request line is sensed as 0: edge, 1: level */
+#define NBPF_CHAN_CFG_AM	0x700		/* ACK Mode: 0: Pulse mode, 1: Level mode, b'1x: Bus Cycle */
+#define NBPF_CHAN_CFG_SDS	0xf000		/* Source Data Size: 0: 8 bits,... , 7: 1024 bits */
+#define NBPF_CHAN_CFG_DDS	0xf0000		/* Destination Data Size: as above */
+#define NBPF_CHAN_CFG_SAD	0x100000	/* Source ADdress counting: 0: increment, 1: fixed */
+#define NBPF_CHAN_CFG_DAD	0x200000	/* Destination ADdress counting: 0: increment, 1: fixed */
+#define NBPF_CHAN_CFG_TM	0x400000	/* Transfer Mode: 0: single, 1: block TM */
+#define NBPF_CHAN_CFG_DEM	0x1000000	/* DMAEND interrupt Mask */
+#define NBPF_CHAN_CFG_TCM	0x2000000	/* DMATCO interrupt Mask */
+#define NBPF_CHAN_CFG_SBE	0x8000000	/* Sweep Buffer Enable */
+#define NBPF_CHAN_CFG_RSEL	0x10000000	/* RM: Register Set sELect */
+#define NBPF_CHAN_CFG_RSW	0x20000000	/* RM: Register Select sWitch */
+#define NBPF_CHAN_CFG_REN	0x40000000	/* RM: Register Set Enable */
+#define NBPF_CHAN_CFG_DMS	0x80000000	/* 0: register mode (RM), 1: link mode (LM) */
+
+#define NBPF_CHAN_NXLA	0x38
+#define NBPF_CHAN_CRLA	0x3c
+
+/* Link Header field */
+#define NBPF_HEADER_LV	1
+#define NBPF_HEADER_LE	2
+#define NBPF_HEADER_WBD	4
+#define NBPF_HEADER_DIM	8
+
+#define NBPF_CTRL	0x300
+#define NBPF_CTRL_PR	1		/* 0: fixed priority, 1: round robin */
+#define NBPF_CTRL_LVINT	2		/* DMAEND and DMAERR signalling: 0: pulse, 1: level */
+
+#define NBPF_DSTAT_ER	0x314
+#define NBPF_DSTAT_END	0x318
+
+#define NBPF_DMA_BUSWIDTHS \
+	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+	 BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+	 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+	 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+	 BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+struct nbpf_config {
+	int num_channels;
+	int buffer_size;
+};
+
+/*
+ * We've got 3 types of objects, used to describe DMA transfers:
+ * 1. high-level descriptor, containing a struct dma_async_tx_descriptor object
+ *	in it, used to communicate with the user
+ * 2. hardware DMA link descriptors, that we pass to DMAC for DMA transfer
+ *	queuing, these must be DMAable, using either the streaming DMA API or
+ *	allocated from coherent memory - one per SG segment
+ * 3. one per SG segment descriptors, used to manage HW link descriptors from
+ *	(2). They do not have to be DMAable. They can either be (a) allocated
+ *	together with link descriptors as mixed (DMA / CPU) objects, or (b)
+ *	separately. Even if allocated separately it would be best to link them
+ *	to link descriptors once during channel resource allocation and always
+ *	use them as a single object.
+ * Therefore for both cases (a) and (b) at run-time objects (2) and (3) shall be
+ * treated as a single SG segment descriptor.
+ */
+
+struct nbpf_link_reg {
+	u32	header;
+	u32	src_addr;
+	u32	dst_addr;
+	u32	transaction_size;
+	u32	config;
+	u32	interval;
+	u32	extension;
+	u32	next;
+} __packed;
+
+struct nbpf_device;
+struct nbpf_channel;
+struct nbpf_desc;
+
+struct nbpf_link_desc {
+	struct nbpf_link_reg *hwdesc;
+	dma_addr_t hwdesc_dma_addr;
+	struct nbpf_desc *desc;
+	struct list_head node;
+};
+
+/**
+ * struct nbpf_desc - DMA transfer descriptor
+ * @async_tx:	dmaengine object
+ * @user_wait:	waiting for a user ack
+ * @length:	total transfer length
+ * @sg:		list of hardware descriptors, represented by struct nbpf_link_desc
+ * @node:	member in channel descriptor lists
+ */
+struct nbpf_desc {
+	struct dma_async_tx_descriptor async_tx;
+	bool user_wait;
+	size_t length;
+	struct nbpf_channel *chan;
+	struct list_head sg;
+	struct list_head node;
+};
+
+/* Take a wild guess: allocate 4 segments per descriptor */
+#define NBPF_SEGMENTS_PER_DESC 4
+#define NBPF_DESCS_PER_PAGE ((PAGE_SIZE - sizeof(struct list_head)) /	\
+	(sizeof(struct nbpf_desc) +					\
+	 NBPF_SEGMENTS_PER_DESC *					\
+	 (sizeof(struct nbpf_link_desc) + sizeof(struct nbpf_link_reg))))
+#define NBPF_SEGMENTS_PER_PAGE (NBPF_SEGMENTS_PER_DESC * NBPF_DESCS_PER_PAGE)
+
+struct nbpf_desc_page {
+	struct list_head node;
+	struct nbpf_desc desc[NBPF_DESCS_PER_PAGE];
+	struct nbpf_link_desc ldesc[NBPF_SEGMENTS_PER_PAGE];
+	struct nbpf_link_reg hwdesc[NBPF_SEGMENTS_PER_PAGE];
+};
+
+/**
+ * struct nbpf_channel - one DMAC channel
+ * @dma_chan:	standard dmaengine channel object
+ * @base:	register address base
+ * @nbpf:	DMAC
+ * @name:	IRQ name
+ * @irq:	IRQ number
+ * @slave_addr:	address for slave DMA
+ * @slave_width:slave data size in bytes
+ * @slave_burst:maximum slave burst size in bytes
+ * @terminal:	DMA terminal, assigned to this channel
+ * @dmarq_cfg:	DMA request line configuration - high / low, edge / level for NBPF_CHAN_CFG
+ * @flags:	configuration flags from DT
+ * @lock:	protect descriptor lists
+ * @free_links:	list of free link descriptors
+ * @free:	list of free descriptors
+ * @queued:	list of queued descriptors
+ * @active:	list of descriptors, scheduled for processing
+ * @done:	list of completed descriptors, waiting post-processing
+ * @desc_page:	list of additionally allocated descriptor pages - if any
+ */
+struct nbpf_channel {
+	struct dma_chan dma_chan;
+	struct tasklet_struct tasklet;
+	void __iomem *base;
+	struct nbpf_device *nbpf;
+	char name[16];
+	int irq;
+	dma_addr_t slave_src_addr;
+	size_t slave_src_width;
+	size_t slave_src_burst;
+	dma_addr_t slave_dst_addr;
+	size_t slave_dst_width;
+	size_t slave_dst_burst;
+	unsigned int terminal;
+	u32 dmarq_cfg;
+	unsigned long flags;
+	spinlock_t lock;
+	struct list_head free_links;
+	struct list_head free;
+	struct list_head queued;
+	struct list_head active;
+	struct list_head done;
+	struct list_head desc_page;
+	struct nbpf_desc *running;
+	bool paused;
+};
+
+struct nbpf_device {
+	struct dma_device dma_dev;
+	void __iomem *base;
+	struct clk *clk;
+	const struct nbpf_config *config;
+	struct nbpf_channel chan[];
+};
+
+enum nbpf_model {
+	NBPF1B4,
+	NBPF1B8,
+	NBPF1B16,
+	NBPF4B4,
+	NBPF4B8,
+	NBPF4B16,
+	NBPF8B4,
+	NBPF8B8,
+	NBPF8B16,
+};
+
+static struct nbpf_config nbpf_cfg[] = {
+	[NBPF1B4] = {
+		.num_channels = 1,
+		.buffer_size = 4,
+	},
+	[NBPF1B8] = {
+		.num_channels = 1,
+		.buffer_size = 8,
+	},
+	[NBPF1B16] = {
+		.num_channels = 1,
+		.buffer_size = 16,
+	},
+	[NBPF4B4] = {
+		.num_channels = 4,
+		.buffer_size = 4,
+	},
+	[NBPF4B8] = {
+		.num_channels = 4,
+		.buffer_size = 8,
+	},
+	[NBPF4B16] = {
+		.num_channels = 4,
+		.buffer_size = 16,
+	},
+	[NBPF8B4] = {
+		.num_channels = 8,
+		.buffer_size = 4,
+	},
+	[NBPF8B8] = {
+		.num_channels = 8,
+		.buffer_size = 8,
+	},
+	[NBPF8B16] = {
+		.num_channels = 8,
+		.buffer_size = 16,
+	},
+};
+
+#define nbpf_to_chan(d) container_of(d, struct nbpf_channel, dma_chan)
+
+/*
+ * dmaengine drivers seem to have a lot in common and instead of sharing more
+ * code, they reimplement those common algorithms independently. In this driver
+ * we try to separate the hardware-specific part from the (largely) generic
+ * part. This improves code readability and makes it possible in the future to
+ * reuse the generic code in form of a helper library. That generic code should
+ * be suitable for various DMA controllers, using transfer descriptors in RAM
+ * and pushing one SG list at a time to the DMA controller.
+ */
+
+/*		Hardware-specific part		*/
+
+static inline u32 nbpf_chan_read(struct nbpf_channel *chan,
+				 unsigned int offset)
+{
+	u32 data = ioread32(chan->base + offset);
+	dev_dbg(chan->dma_chan.device->dev, "%s(0x%p + 0x%x) = 0x%x\n",
+		__func__, chan->base, offset, data);
+	return data;
+}
+
+static inline void nbpf_chan_write(struct nbpf_channel *chan,
+				   unsigned int offset, u32 data)
+{
+	iowrite32(data, chan->base + offset);
+	dev_dbg(chan->dma_chan.device->dev, "%s(0x%p + 0x%x) = 0x%x\n",
+		__func__, chan->base, offset, data);
+}
+
+static inline u32 nbpf_read(struct nbpf_device *nbpf,
+			    unsigned int offset)
+{
+	u32 data = ioread32(nbpf->base + offset);
+	dev_dbg(nbpf->dma_dev.dev, "%s(0x%p + 0x%x) = 0x%x\n",
+		__func__, nbpf->base, offset, data);
+	return data;
+}
+
+static inline void nbpf_write(struct nbpf_device *nbpf,
+			      unsigned int offset, u32 data)
+{
+	iowrite32(data, nbpf->base + offset);
+	dev_dbg(nbpf->dma_dev.dev, "%s(0x%p + 0x%x) = 0x%x\n",
+		__func__, nbpf->base, offset, data);
+}
+
+static void nbpf_chan_halt(struct nbpf_channel *chan)
+{
+	nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREN);
+}
+
+static bool nbpf_status_get(struct nbpf_channel *chan)
+{
+	u32 status = nbpf_read(chan->nbpf, NBPF_DSTAT_END);
+
+	return status & BIT(chan - chan->nbpf->chan);
+}
+
+static void nbpf_status_ack(struct nbpf_channel *chan)
+{
+	nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREND);
+}
+
+static u32 nbpf_error_get(struct nbpf_device *nbpf)
+{
+	return nbpf_read(nbpf, NBPF_DSTAT_ER);
+}
+
+static struct nbpf_channel *nbpf_error_get_channel(struct nbpf_device *nbpf, u32 error)
+{
+	return nbpf->chan + __ffs(error);
+}
+
+static void nbpf_error_clear(struct nbpf_channel *chan)
+{
+	u32 status;
+	int i;
+
+	/* Stop the channel, make sure DMA has been aborted */
+	nbpf_chan_halt(chan);
+
+	for (i = 1000; i; i--) {
+		status = nbpf_chan_read(chan, NBPF_CHAN_STAT);
+		if (!(status & NBPF_CHAN_STAT_TACT))
+			break;
+		cpu_relax();
+	}
+
+	if (!i)
+		dev_err(chan->dma_chan.device->dev,
+			"%s(): abort timeout, channel status 0x%x\n", __func__, status);
+
+	nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SWRST);
+}
+
+static int nbpf_start(struct nbpf_desc *desc)
+{
+	struct nbpf_channel *chan = desc->chan;
+	struct nbpf_link_desc *ldesc = list_first_entry(&desc->sg, struct nbpf_link_desc, node);
+
+	nbpf_chan_write(chan, NBPF_CHAN_NXLA, (u32)ldesc->hwdesc_dma_addr);
+	nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SETEN | NBPF_CHAN_CTRL_CLRSUS);
+	chan->paused = false;
+
+	/* Software trigger MEMCPY - only MEMCPY uses the block mode */
+	if (ldesc->hwdesc->config & NBPF_CHAN_CFG_TM)
+		nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_STG);
+
+	dev_dbg(chan->nbpf->dma_dev.dev, "%s(): next 0x%x, cur 0x%x\n", __func__,
+		nbpf_chan_read(chan, NBPF_CHAN_NXLA), nbpf_chan_read(chan, NBPF_CHAN_CRLA));
+
+	return 0;
+}
+
+static void nbpf_chan_prepare(struct nbpf_channel *chan)
+{
+	chan->dmarq_cfg = (chan->flags & NBPF_SLAVE_RQ_HIGH ? NBPF_CHAN_CFG_HIEN : 0) |
+		(chan->flags & NBPF_SLAVE_RQ_LOW ? NBPF_CHAN_CFG_LOEN : 0) |
+		(chan->flags & NBPF_SLAVE_RQ_LEVEL ?
+		 NBPF_CHAN_CFG_LVL | (NBPF_CHAN_CFG_AM & 0x200) : 0) |
+		chan->terminal;
+}
+
+static void nbpf_chan_prepare_default(struct nbpf_channel *chan)
+{
+	/* Don't output DMAACK */
+	chan->dmarq_cfg = NBPF_CHAN_CFG_AM & 0x400;
+	chan->terminal = 0;
+	chan->flags = 0;
+}
+
+static void nbpf_chan_configure(struct nbpf_channel *chan)
+{
+	/*
+	 * We assume, that only the link mode and DMA request line configuration
+	 * have to be set in the configuration register manually. Dynamic
+	 * per-transfer configuration will be loaded from transfer descriptors.
+	 */
+	nbpf_chan_write(chan, NBPF_CHAN_CFG, NBPF_CHAN_CFG_DMS | chan->dmarq_cfg);
+}
+
+static u32 nbpf_xfer_ds(struct nbpf_device *nbpf, size_t size)
+{
+	/* Maximum supported bursts depend on the buffer size */
+	return min_t(int, __ffs(size), ilog2(nbpf->config->buffer_size * 8));
+}
+
+static size_t nbpf_xfer_size(struct nbpf_device *nbpf,
+			     enum dma_slave_buswidth width, u32 burst)
+{
+	size_t size;
+
+	if (!burst)
+		burst = 1;
+
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		size = 8 * burst;
+		break;
+
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		size = 4 * burst;
+		break;
+
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		size = 2 * burst;
+		break;
+
+	default:
+		pr_warn("%s(): invalid bus width %u\n", __func__, width);
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		size = burst;
+	}
+
+	return nbpf_xfer_ds(nbpf, size);
+}
+
+/*
+ * We need a way to recognise slaves, whose data is sent "raw" over the bus,
+ * i.e. it isn't known in advance how many bytes will be received. Therefore
+ * the slave driver has to provide a "large enough" buffer and either read the
+ * buffer, when it is full, or detect, that some data has arrived, then wait for
+ * a timeout, if no more data arrives - receive what's already there. We want to
+ * handle such slaves in a special way to allow an optimised mode for other
+ * users, for whom the amount of data is known in advance. So far there's no way
+ * to recognise such slaves. We use a data-width check to distinguish between
+ * the SD host and the PL011 UART.
+ */
+
+static int nbpf_prep_one(struct nbpf_link_desc *ldesc,
+			 enum dma_transfer_direction direction,
+			 dma_addr_t src, dma_addr_t dst, size_t size, bool last)
+{
+	struct nbpf_link_reg *hwdesc = ldesc->hwdesc;
+	struct nbpf_desc *desc = ldesc->desc;
+	struct nbpf_channel *chan = desc->chan;
+	struct device *dev = chan->dma_chan.device->dev;
+	size_t mem_xfer, slave_xfer;
+	bool can_burst;
+
+	hwdesc->header = NBPF_HEADER_WBD | NBPF_HEADER_LV |
+		(last ? NBPF_HEADER_LE : 0);
+
+	hwdesc->src_addr = src;
+	hwdesc->dst_addr = dst;
+	hwdesc->transaction_size = size;
+
+	/*
+	 * set config: SAD, DAD, DDS, SDS, etc.
+	 * Note on transfer sizes: the DMAC can perform unaligned DMA transfers,
+	 * but it is important to have transaction size a multiple of both
+	 * receiver and transmitter transfer sizes. It is also possible to use
+	 * different RAM and device transfer sizes, and it does work well with
+	 * some devices, e.g. with V08R07S01E SD host controllers, which can use
+	 * 128 byte transfers. But this doesn't work with other devices,
+	 * especially when the transaction size is unknown. This is the case,
+	 * e.g. with serial drivers like amba-pl011.c. For reception it sets up
+	 * the transaction size of 4K and if fewer bytes are received, it
+	 * pauses DMA and reads out data received via DMA as well as those left
+	 * in the Rx FIFO. For this to work with the RAM side using burst
+	 * transfers we enable the SBE bit and terminate the transfer in our
+	 * DMA_PAUSE handler.
+	 */
+	mem_xfer = nbpf_xfer_ds(chan->nbpf, size);
+
+	switch (direction) {
+	case DMA_DEV_TO_MEM:
+		can_burst = chan->slave_src_width >= 3;
+		slave_xfer = min(mem_xfer, can_burst ?
+				 chan->slave_src_burst : chan->slave_src_width);
+		/*
+		 * Is the slave narrower than 64 bits, i.e. isn't using the full
+		 * bus width and cannot use bursts?
+		 */
+		if (mem_xfer > chan->slave_src_burst && !can_burst)
+			mem_xfer = chan->slave_src_burst;
+		/* Device-to-RAM DMA is unreliable without REQD set */
+		hwdesc->config = NBPF_CHAN_CFG_SAD | (NBPF_CHAN_CFG_DDS & (mem_xfer << 16)) |
+			(NBPF_CHAN_CFG_SDS & (slave_xfer << 12)) | NBPF_CHAN_CFG_REQD |
+			NBPF_CHAN_CFG_SBE;
+		break;
+
+	case DMA_MEM_TO_DEV:
+		slave_xfer = min(mem_xfer, chan->slave_dst_width >= 3 ?
+				 chan->slave_dst_burst : chan->slave_dst_width);
+		hwdesc->config = NBPF_CHAN_CFG_DAD | (NBPF_CHAN_CFG_SDS & (mem_xfer << 12)) |
+			(NBPF_CHAN_CFG_DDS & (slave_xfer << 16)) | NBPF_CHAN_CFG_REQD;
+		break;
+
+	case DMA_MEM_TO_MEM:
+		hwdesc->config = NBPF_CHAN_CFG_TCM | NBPF_CHAN_CFG_TM |
+			(NBPF_CHAN_CFG_SDS & (mem_xfer << 12)) |
+			(NBPF_CHAN_CFG_DDS & (mem_xfer << 16));
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	hwdesc->config |= chan->dmarq_cfg | (last ? 0 : NBPF_CHAN_CFG_DEM) |
+		NBPF_CHAN_CFG_DMS;
+
+	dev_dbg(dev, "%s(): desc @ %pad: hdr 0x%x, cfg 0x%x, %zu @ %pad -> %pad\n",
+		__func__, &ldesc->hwdesc_dma_addr, hwdesc->header,
+		hwdesc->config, size, &src, &dst);
+
+	dma_sync_single_for_device(dev, ldesc->hwdesc_dma_addr, sizeof(*hwdesc),
+				   DMA_TO_DEVICE);
+
+	return 0;
+}
+
+static size_t nbpf_bytes_left(struct nbpf_channel *chan)
+{
+	return nbpf_chan_read(chan, NBPF_CHAN_CUR_TR_BYTE);
+}
+
+static void nbpf_configure(struct nbpf_device *nbpf)
+{
+	nbpf_write(nbpf, NBPF_CTRL, NBPF_CTRL_LVINT);
+}
+
+static void nbpf_pause(struct nbpf_channel *chan)
+{
+	nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SETSUS);
+	/* See comment in nbpf_prep_one() */
+	nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREN);
+}
+
+/*		Generic part			*/
+
+/* DMA ENGINE functions */
+static void nbpf_issue_pending(struct dma_chan *dchan)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	unsigned long flags;
+
+	dev_dbg(dchan->device->dev, "Entry %s()\n", __func__);
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (list_empty(&chan->queued))
+		goto unlock;
+
+	list_splice_tail_init(&chan->queued, &chan->active);
+
+	if (!chan->running) {
+		struct nbpf_desc *desc = list_first_entry(&chan->active,
+						struct nbpf_desc, node);
+		if (!nbpf_start(desc))
+			chan->running = desc;
+	}
+
+unlock:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static enum dma_status nbpf_tx_status(struct dma_chan *dchan,
+		dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	enum dma_status status = dma_cookie_status(dchan, cookie, state);
+
+	if (state) {
+		dma_cookie_t running;
+		unsigned long flags;
+
+		spin_lock_irqsave(&chan->lock, flags);
+		running = chan->running ? chan->running->async_tx.cookie : -EINVAL;
+
+		if (cookie == running) {
+			state->residue = nbpf_bytes_left(chan);
+			dev_dbg(dchan->device->dev, "%s(): residue %u\n", __func__,
+				state->residue);
+		} else if (status == DMA_IN_PROGRESS) {
+			struct nbpf_desc *desc;
+			bool found = false;
+
+			list_for_each_entry(desc, &chan->active, node)
+				if (desc->async_tx.cookie == cookie) {
+					found = true;
+					break;
+				}
+
+			if (!found)
+				list_for_each_entry(desc, &chan->queued, node)
+					if (desc->async_tx.cookie == cookie) {
+						found = true;
+						break;
+
+					}
+
+			state->residue = found ? desc->length : 0;
+		}
+
+		spin_unlock_irqrestore(&chan->lock, flags);
+	}
+
+	if (chan->paused)
+		status = DMA_PAUSED;
+
+	return status;
+}
+
+static dma_cookie_t nbpf_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct nbpf_desc *desc = container_of(tx, struct nbpf_desc, async_tx);
+	struct nbpf_channel *chan = desc->chan;
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	cookie = dma_cookie_assign(tx);
+	list_add_tail(&desc->node, &chan->queued);
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	dev_dbg(chan->dma_chan.device->dev, "Entry %s(%d)\n", __func__, cookie);
+
+	return cookie;
+}
+
+static int nbpf_desc_page_alloc(struct nbpf_channel *chan)
+{
+	struct dma_chan *dchan = &chan->dma_chan;
+	struct nbpf_desc_page *dpage = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	struct nbpf_link_desc *ldesc;
+	struct nbpf_link_reg *hwdesc;
+	struct nbpf_desc *desc;
+	LIST_HEAD(head);
+	LIST_HEAD(lhead);
+	int i;
+	struct device *dev = dchan->device->dev;
+
+	if (!dpage)
+		return -ENOMEM;
+
+	dev_dbg(dev, "%s(): alloc %lu descriptors, %lu segments, total alloc %zu\n",
+		__func__, NBPF_DESCS_PER_PAGE, NBPF_SEGMENTS_PER_PAGE, sizeof(*dpage));
+
+	for (i = 0, ldesc = dpage->ldesc, hwdesc = dpage->hwdesc;
+	     i < ARRAY_SIZE(dpage->ldesc);
+	     i++, ldesc++, hwdesc++) {
+		ldesc->hwdesc = hwdesc;
+		list_add_tail(&ldesc->node, &lhead);
+		ldesc->hwdesc_dma_addr = dma_map_single(dchan->device->dev,
+					hwdesc, sizeof(*hwdesc), DMA_TO_DEVICE);
+
+		dev_dbg(dev, "%s(): mapped 0x%p to %pad\n", __func__,
+			hwdesc, &ldesc->hwdesc_dma_addr);
+	}
+
+	for (i = 0, desc = dpage->desc;
+	     i < ARRAY_SIZE(dpage->desc);
+	     i++, desc++) {
+		dma_async_tx_descriptor_init(&desc->async_tx, dchan);
+		desc->async_tx.tx_submit = nbpf_tx_submit;
+		desc->chan = chan;
+		INIT_LIST_HEAD(&desc->sg);
+		list_add_tail(&desc->node, &head);
+	}
+
+	/*
+	 * This function cannot be called from interrupt context, so, no need to
+	 * save flags
+	 */
+	spin_lock_irq(&chan->lock);
+	list_splice_tail(&lhead, &chan->free_links);
+	list_splice_tail(&head, &chan->free);
+	list_add(&dpage->node, &chan->desc_page);
+	spin_unlock_irq(&chan->lock);
+
+	return ARRAY_SIZE(dpage->desc);
+}
+
+static void nbpf_desc_put(struct nbpf_desc *desc)
+{
+	struct nbpf_channel *chan = desc->chan;
+	struct nbpf_link_desc *ldesc, *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	list_for_each_entry_safe(ldesc, tmp, &desc->sg, node)
+		list_move(&ldesc->node, &chan->free_links);
+
+	list_add(&desc->node, &chan->free);
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static void nbpf_scan_acked(struct nbpf_channel *chan)
+{
+	struct nbpf_desc *desc, *tmp;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->lock, flags);
+	list_for_each_entry_safe(desc, tmp, &chan->done, node)
+		if (async_tx_test_ack(&desc->async_tx) && desc->user_wait) {
+			list_move(&desc->node, &head);
+			desc->user_wait = false;
+		}
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	list_for_each_entry_safe(desc, tmp, &head, node) {
+		list_del(&desc->node);
+		nbpf_desc_put(desc);
+	}
+}
+
+/*
+ * We have to allocate descriptors with the channel lock dropped. This means,
+ * before we re-acquire the lock buffers can be taken already, so we have to
+ * re-check after re-acquiring the lock and possibly retry, if buffers are gone
+ * again.
+ */
+static struct nbpf_desc *nbpf_desc_get(struct nbpf_channel *chan, size_t len)
+{
+	struct nbpf_desc *desc = NULL;
+	struct nbpf_link_desc *ldesc, *prev = NULL;
+
+	nbpf_scan_acked(chan);
+
+	spin_lock_irq(&chan->lock);
+
+	do {
+		int i = 0, ret;
+
+		if (list_empty(&chan->free)) {
+			/* No more free descriptors */
+			spin_unlock_irq(&chan->lock);
+			ret = nbpf_desc_page_alloc(chan);
+			if (ret < 0)
+				return NULL;
+			spin_lock_irq(&chan->lock);
+			continue;
+		}
+		desc = list_first_entry(&chan->free, struct nbpf_desc, node);
+		list_del(&desc->node);
+
+		do {
+			if (list_empty(&chan->free_links)) {
+				/* No more free link descriptors */
+				spin_unlock_irq(&chan->lock);
+				ret = nbpf_desc_page_alloc(chan);
+				if (ret < 0) {
+					nbpf_desc_put(desc);
+					return NULL;
+				}
+				spin_lock_irq(&chan->lock);
+				continue;
+			}
+
+			ldesc = list_first_entry(&chan->free_links,
+						 struct nbpf_link_desc, node);
+			ldesc->desc = desc;
+			if (prev)
+				prev->hwdesc->next = (u32)ldesc->hwdesc_dma_addr;
+
+			prev = ldesc;
+			list_move_tail(&ldesc->node, &desc->sg);
+
+			i++;
+		} while (i < len);
+	} while (!desc);
+
+	prev->hwdesc->next = 0;
+
+	spin_unlock_irq(&chan->lock);
+
+	return desc;
+}
+
+static void nbpf_chan_idle(struct nbpf_channel *chan)
+{
+	struct nbpf_desc *desc, *tmp;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	list_splice_init(&chan->done, &head);
+	list_splice_init(&chan->active, &head);
+	list_splice_init(&chan->queued, &head);
+
+	chan->running = NULL;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	list_for_each_entry_safe(desc, tmp, &head, node) {
+		dev_dbg(chan->nbpf->dma_dev.dev, "%s(): force-free desc %p cookie %d\n",
+			__func__, desc, desc->async_tx.cookie);
+		list_del(&desc->node);
+		nbpf_desc_put(desc);
+	}
+}
+
+static int nbpf_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd,
+			unsigned long arg)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	struct dma_slave_config *config;
+
+	dev_dbg(dchan->device->dev, "Entry %s(%d)\n", __func__, cmd);
+
+	switch (cmd) {
+	case DMA_TERMINATE_ALL:
+		dev_dbg(dchan->device->dev, "Terminating\n");
+		nbpf_chan_halt(chan);
+		nbpf_chan_idle(chan);
+		break;
+
+	case DMA_SLAVE_CONFIG:
+		if (!arg)
+			return -EINVAL;
+		config = (struct dma_slave_config *)arg;
+
+		/*
+		 * We could check config->slave_id to match chan->terminal here,
+		 * but with DT they would be coming from the same source, so
+		 * such a check would be superflous
+		 */
+
+		chan->slave_dst_addr = config->dst_addr;
+		chan->slave_dst_width = nbpf_xfer_size(chan->nbpf,
+						       config->dst_addr_width, 1);
+		chan->slave_dst_burst = nbpf_xfer_size(chan->nbpf,
+						       config->dst_addr_width,
+						       config->dst_maxburst);
+		chan->slave_src_addr = config->src_addr;
+		chan->slave_src_width = nbpf_xfer_size(chan->nbpf,
+						       config->src_addr_width, 1);
+		chan->slave_src_burst = nbpf_xfer_size(chan->nbpf,
+						       config->src_addr_width,
+						       config->src_maxburst);
+		break;
+
+	case DMA_PAUSE:
+		chan->paused = true;
+		nbpf_pause(chan);
+		break;
+
+	default:
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *nbpf_prep_sg(struct nbpf_channel *chan,
+		struct scatterlist *src_sg, struct scatterlist *dst_sg,
+		size_t len, enum dma_transfer_direction direction,
+		unsigned long flags)
+{
+	struct nbpf_link_desc *ldesc;
+	struct scatterlist *mem_sg;
+	struct nbpf_desc *desc;
+	bool inc_src, inc_dst;
+	size_t data_len = 0;
+	int i = 0;
+
+	switch (direction) {
+	case DMA_DEV_TO_MEM:
+		mem_sg = dst_sg;
+		inc_src = false;
+		inc_dst = true;
+		break;
+
+	case DMA_MEM_TO_DEV:
+		mem_sg = src_sg;
+		inc_src = true;
+		inc_dst = false;
+		break;
+
+	default:
+	case DMA_MEM_TO_MEM:
+		mem_sg = src_sg;
+		inc_src = true;
+		inc_dst = true;
+	}
+
+	desc = nbpf_desc_get(chan, len);
+	if (!desc)
+		return NULL;
+
+	desc->async_tx.flags = flags;
+	desc->async_tx.cookie = -EBUSY;
+	desc->user_wait = false;
+
+	/*
+	 * This is a private descriptor list, and we own the descriptor. No need
+	 * to lock.
+	 */
+	list_for_each_entry(ldesc, &desc->sg, node) {
+		int ret = nbpf_prep_one(ldesc, direction,
+					sg_dma_address(src_sg),
+					sg_dma_address(dst_sg),
+					sg_dma_len(mem_sg),
+					i == len - 1);
+		if (ret < 0) {
+			nbpf_desc_put(desc);
+			return NULL;
+		}
+		data_len += sg_dma_len(mem_sg);
+		if (inc_src)
+			src_sg = sg_next(src_sg);
+		if (inc_dst)
+			dst_sg = sg_next(dst_sg);
+		mem_sg = direction == DMA_DEV_TO_MEM ? dst_sg : src_sg;
+		i++;
+	}
+
+	desc->length = data_len;
+
+	/* The user has to return the descriptor to us ASAP via .tx_submit() */
+	return &desc->async_tx;
+}
+
+static struct dma_async_tx_descriptor *nbpf_prep_memcpy(
+	struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src,
+	size_t len, unsigned long flags)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	struct scatterlist dst_sg;
+	struct scatterlist src_sg;
+
+	sg_init_table(&dst_sg, 1);
+	sg_init_table(&src_sg, 1);
+
+	sg_dma_address(&dst_sg) = dst;
+	sg_dma_address(&src_sg) = src;
+
+	sg_dma_len(&dst_sg) = len;
+	sg_dma_len(&src_sg) = len;
+
+	dev_dbg(dchan->device->dev, "%s(): %zu @ %pad -> %pad\n",
+		__func__, len, &src, &dst);
+
+	return nbpf_prep_sg(chan, &src_sg, &dst_sg, 1,
+			    DMA_MEM_TO_MEM, flags);
+}
+
+static struct dma_async_tx_descriptor *nbpf_prep_memcpy_sg(
+	struct dma_chan *dchan,
+	struct scatterlist *dst_sg, unsigned int dst_nents,
+	struct scatterlist *src_sg, unsigned int src_nents,
+	unsigned long flags)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+
+	if (dst_nents != src_nents)
+		return NULL;
+
+	return nbpf_prep_sg(chan, src_sg, dst_sg, src_nents,
+			    DMA_MEM_TO_MEM, flags);
+}
+
+static struct dma_async_tx_descriptor *nbpf_prep_slave_sg(
+	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags, void *context)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	struct scatterlist slave_sg;
+
+	dev_dbg(dchan->device->dev, "Entry %s()\n", __func__);
+
+	sg_init_table(&slave_sg, 1);
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		sg_dma_address(&slave_sg) = chan->slave_dst_addr;
+		return nbpf_prep_sg(chan, sgl, &slave_sg, sg_len,
+				    direction, flags);
+
+	case DMA_DEV_TO_MEM:
+		sg_dma_address(&slave_sg) = chan->slave_src_addr;
+		return nbpf_prep_sg(chan, &slave_sg, sgl, sg_len,
+				    direction, flags);
+
+	default:
+		return NULL;
+	}
+}
+
+static int nbpf_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	int ret;
+
+	INIT_LIST_HEAD(&chan->free);
+	INIT_LIST_HEAD(&chan->free_links);
+	INIT_LIST_HEAD(&chan->queued);
+	INIT_LIST_HEAD(&chan->active);
+	INIT_LIST_HEAD(&chan->done);
+
+	ret = nbpf_desc_page_alloc(chan);
+	if (ret < 0)
+		return ret;
+
+	dev_dbg(dchan->device->dev, "Entry %s(): terminal %u\n", __func__,
+		chan->terminal);
+
+	nbpf_chan_configure(chan);
+
+	return ret;
+}
+
+static void nbpf_free_chan_resources(struct dma_chan *dchan)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	struct nbpf_desc_page *dpage, *tmp;
+
+	dev_dbg(dchan->device->dev, "Entry %s()\n", __func__);
+
+	nbpf_chan_halt(chan);
+	nbpf_chan_idle(chan);
+	/* Clean up for if a channel is re-used for MEMCPY after slave DMA */
+	nbpf_chan_prepare_default(chan);
+
+	list_for_each_entry_safe(dpage, tmp, &chan->desc_page, node) {
+		struct nbpf_link_desc *ldesc;
+		int i;
+		list_del(&dpage->node);
+		for (i = 0, ldesc = dpage->ldesc;
+		     i < ARRAY_SIZE(dpage->ldesc);
+		     i++, ldesc++)
+			dma_unmap_single(dchan->device->dev, ldesc->hwdesc_dma_addr,
+					 sizeof(*ldesc->hwdesc), DMA_TO_DEVICE);
+		free_page((unsigned long)dpage);
+	}
+}
+
+static int nbpf_slave_caps(struct dma_chan *dchan,
+			   struct dma_slave_caps *caps)
+{
+	caps->src_addr_widths = NBPF_DMA_BUSWIDTHS;
+	caps->dstn_addr_widths = NBPF_DMA_BUSWIDTHS;
+	caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	caps->cmd_pause = false;
+	caps->cmd_terminate = true;
+
+	return 0;
+}
+
+static struct dma_chan *nbpf_of_xlate(struct of_phandle_args *dma_spec,
+				      struct of_dma *ofdma)
+{
+	struct nbpf_device *nbpf = ofdma->of_dma_data;
+	struct dma_chan *dchan;
+	struct nbpf_channel *chan;
+
+	if (dma_spec->args_count != 2)
+		return NULL;
+
+	dchan = dma_get_any_slave_channel(&nbpf->dma_dev);
+	if (!dchan)
+		return NULL;
+
+	dev_dbg(dchan->device->dev, "Entry %s(%s)\n", __func__,
+		dma_spec->np->name);
+
+	chan = nbpf_to_chan(dchan);
+
+	chan->terminal = dma_spec->args[0];
+	chan->flags = dma_spec->args[1];
+
+	nbpf_chan_prepare(chan);
+	nbpf_chan_configure(chan);
+
+	return dchan;
+}
+
+static void nbpf_chan_tasklet(unsigned long data)
+{
+	struct nbpf_channel *chan = (struct nbpf_channel *)data;
+	struct nbpf_desc *desc, *tmp;
+	dma_async_tx_callback callback;
+	void *param;
+
+	while (!list_empty(&chan->done)) {
+		bool found = false, must_put, recycling = false;
+
+		spin_lock_irq(&chan->lock);
+
+		list_for_each_entry_safe(desc, tmp, &chan->done, node) {
+			if (!desc->user_wait) {
+				/* Newly completed descriptor, have to process */
+				found = true;
+				break;
+			} else if (async_tx_test_ack(&desc->async_tx)) {
+				/*
+				 * This descriptor was waiting for a user ACK,
+				 * it can be recycled now.
+				 */
+				list_del(&desc->node);
+				spin_unlock_irq(&chan->lock);
+				nbpf_desc_put(desc);
+				recycling = true;
+				break;
+			}
+		}
+
+		if (recycling)
+			continue;
+
+		if (!found) {
+			/* This can happen if TERMINATE_ALL has been called */
+			spin_unlock_irq(&chan->lock);
+			break;
+		}
+
+		dma_cookie_complete(&desc->async_tx);
+
+		/*
+		 * With released lock we cannot dereference desc, maybe it's
+		 * still on the "done" list
+		 */
+		if (async_tx_test_ack(&desc->async_tx)) {
+			list_del(&desc->node);
+			must_put = true;
+		} else {
+			desc->user_wait = true;
+			must_put = false;
+		}
+
+		callback = desc->async_tx.callback;
+		param = desc->async_tx.callback_param;
+
+		/* ack and callback completed descriptor */
+		spin_unlock_irq(&chan->lock);
+
+		if (callback)
+			callback(param);
+
+		if (must_put)
+			nbpf_desc_put(desc);
+	}
+}
+
+static irqreturn_t nbpf_chan_irq(int irq, void *dev)
+{
+	struct nbpf_channel *chan = dev;
+	bool done = nbpf_status_get(chan);
+	struct nbpf_desc *desc;
+	irqreturn_t ret;
+	bool bh = false;
+
+	if (!done)
+		return IRQ_NONE;
+
+	nbpf_status_ack(chan);
+
+	dev_dbg(&chan->dma_chan.dev->device, "%s()\n", __func__);
+
+	spin_lock(&chan->lock);
+	desc = chan->running;
+	if (WARN_ON(!desc)) {
+		ret = IRQ_NONE;
+		goto unlock;
+	} else {
+		ret = IRQ_HANDLED;
+		bh = true;
+	}
+
+	list_move_tail(&desc->node, &chan->done);
+	chan->running = NULL;
+
+	if (!list_empty(&chan->active)) {
+		desc = list_first_entry(&chan->active,
+					struct nbpf_desc, node);
+		if (!nbpf_start(desc))
+			chan->running = desc;
+	}
+
+unlock:
+	spin_unlock(&chan->lock);
+
+	if (bh)
+		tasklet_schedule(&chan->tasklet);
+
+	return ret;
+}
+
+static irqreturn_t nbpf_err_irq(int irq, void *dev)
+{
+	struct nbpf_device *nbpf = dev;
+	u32 error = nbpf_error_get(nbpf);
+
+	dev_warn(nbpf->dma_dev.dev, "DMA error IRQ %u\n", irq);
+
+	if (!error)
+		return IRQ_NONE;
+
+	do {
+		struct nbpf_channel *chan = nbpf_error_get_channel(nbpf, error);
+		/* On error: abort all queued transfers, no callback */
+		nbpf_error_clear(chan);
+		nbpf_chan_idle(chan);
+		error = nbpf_error_get(nbpf);
+	} while (error);
+
+	return IRQ_HANDLED;
+}
+
+static int nbpf_chan_probe(struct nbpf_device *nbpf, int n)
+{
+	struct dma_device *dma_dev = &nbpf->dma_dev;
+	struct nbpf_channel *chan = nbpf->chan + n;
+	int ret;
+
+	chan->nbpf = nbpf;
+	chan->base = nbpf->base + NBPF_REG_CHAN_OFFSET + NBPF_REG_CHAN_SIZE * n;
+	INIT_LIST_HEAD(&chan->desc_page);
+	spin_lock_init(&chan->lock);
+	chan->dma_chan.device = dma_dev;
+	dma_cookie_init(&chan->dma_chan);
+	nbpf_chan_prepare_default(chan);
+
+	dev_dbg(dma_dev->dev, "%s(): channel %d: -> %p\n", __func__, n, chan->base);
+
+	snprintf(chan->name, sizeof(chan->name), "nbpf %d", n);
+
+	tasklet_init(&chan->tasklet, nbpf_chan_tasklet, (unsigned long)chan);
+	ret = devm_request_irq(dma_dev->dev, chan->irq,
+			nbpf_chan_irq, IRQF_SHARED,
+			chan->name, chan);
+	if (ret < 0)
+		return ret;
+
+	/* Add the channel to DMA device channel list */
+	list_add_tail(&chan->dma_chan.device_node,
+		      &dma_dev->channels);
+
+	return 0;
+}
+
+static const struct of_device_id nbpf_match[] = {
+	{.compatible = "renesas,nbpfaxi64dmac1b4",	.data = &nbpf_cfg[NBPF1B4]},
+	{.compatible = "renesas,nbpfaxi64dmac1b8",	.data = &nbpf_cfg[NBPF1B8]},
+	{.compatible = "renesas,nbpfaxi64dmac1b16",	.data = &nbpf_cfg[NBPF1B16]},
+	{.compatible = "renesas,nbpfaxi64dmac4b4",	.data = &nbpf_cfg[NBPF4B4]},
+	{.compatible = "renesas,nbpfaxi64dmac4b8",	.data = &nbpf_cfg[NBPF4B8]},
+	{.compatible = "renesas,nbpfaxi64dmac4b16",	.data = &nbpf_cfg[NBPF4B16]},
+	{.compatible = "renesas,nbpfaxi64dmac8b4",	.data = &nbpf_cfg[NBPF8B4]},
+	{.compatible = "renesas,nbpfaxi64dmac8b8",	.data = &nbpf_cfg[NBPF8B8]},
+	{.compatible = "renesas,nbpfaxi64dmac8b16",	.data = &nbpf_cfg[NBPF8B16]},
+	{}
+};
+MODULE_DEVICE_TABLE(of, nbpf_match);
+
+static int nbpf_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct of_device_id *of_id = of_match_device(nbpf_match, dev);
+	struct device_node *np = dev->of_node;
+	struct nbpf_device *nbpf;
+	struct dma_device *dma_dev;
+	struct resource *iomem, *irq_res;
+	const struct nbpf_config *cfg;
+	int num_channels;
+	int ret, irq, eirq, i;
+	int irqbuf[9] /* maximum 8 channels + error IRQ */;
+	unsigned int irqs = 0;
+
+	BUILD_BUG_ON(sizeof(struct nbpf_desc_page) > PAGE_SIZE);
+
+	/* DT only */
+	if (!np || !of_id || !of_id->data)
+		return -ENODEV;
+
+	cfg = of_id->data;
+	num_channels = cfg->num_channels;
+
+	nbpf = devm_kzalloc(dev, sizeof(*nbpf) + num_channels *
+			    sizeof(nbpf->chan[0]), GFP_KERNEL);
+	if (!nbpf) {
+		dev_err(dev, "Memory allocation failed\n");
+		return -ENOMEM;
+	}
+	dma_dev = &nbpf->dma_dev;
+	dma_dev->dev = dev;
+
+	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	nbpf->base = devm_ioremap_resource(dev, iomem);
+	if (IS_ERR(nbpf->base))
+		return PTR_ERR(nbpf->base);
+
+	nbpf->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(nbpf->clk))
+		return PTR_ERR(nbpf->clk);
+
+	nbpf->config = cfg;
+
+	for (i = 0; irqs < ARRAY_SIZE(irqbuf); i++) {
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res)
+			break;
+
+		for (irq = irq_res->start; irq <= irq_res->end;
+		     irq++, irqs++)
+			irqbuf[irqs] = irq;
+	}
+
+	/*
+	 * 3 IRQ resource schemes are supported:
+	 * 1. 1 shared IRQ for error and all channels
+	 * 2. 2 IRQs: one for error and one shared for all channels
+	 * 3. 1 IRQ for error and an own IRQ for each channel
+	 */
+	if (irqs != 1 && irqs != 2 && irqs != num_channels + 1)
+		return -ENXIO;
+
+	if (irqs == 1) {
+		eirq = irqbuf[0];
+
+		for (i = 0; i <= num_channels; i++)
+			nbpf->chan[i].irq = irqbuf[0];
+	} else {
+		eirq = platform_get_irq_byname(pdev, "error");
+		if (eirq < 0)
+			return eirq;
+
+		if (irqs == num_channels + 1) {
+			struct nbpf_channel *chan;
+
+			for (i = 0, chan = nbpf->chan; i <= num_channels;
+			     i++, chan++) {
+				/* Skip the error IRQ */
+				if (irqbuf[i] == eirq)
+					i++;
+				chan->irq = irqbuf[i];
+			}
+
+			if (chan != nbpf->chan + num_channels)
+				return -EINVAL;
+		} else {
+			/* 2 IRQs and more than one channel */
+			if (irqbuf[0] == eirq)
+				irq = irqbuf[1];
+			else
+				irq = irqbuf[0];
+
+			for (i = 0; i <= num_channels; i++)
+				nbpf->chan[i].irq = irq;
+		}
+	}
+
+	ret = devm_request_irq(dev, eirq, nbpf_err_irq,
+			       IRQF_SHARED, "dma error", nbpf);
+	if (ret < 0)
+		return ret;
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* Create DMA Channel */
+	for (i = 0; i < num_channels; i++) {
+		ret = nbpf_chan_probe(nbpf, i);
+		if (ret < 0)
+			return ret;
+	}
+
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+	dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+	dma_cap_set(DMA_SG, dma_dev->cap_mask);
+
+	/* Common and MEMCPY operations */
+	dma_dev->device_alloc_chan_resources
+		= nbpf_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = nbpf_free_chan_resources;
+	dma_dev->device_prep_dma_sg = nbpf_prep_memcpy_sg;
+	dma_dev->device_prep_dma_memcpy = nbpf_prep_memcpy;
+	dma_dev->device_tx_status = nbpf_tx_status;
+	dma_dev->device_issue_pending = nbpf_issue_pending;
+	dma_dev->device_slave_caps = nbpf_slave_caps;
+
+	/*
+	 * If we drop support for unaligned MEMCPY buffer addresses and / or
+	 * lengths by setting
+	 * dma_dev->copy_align = 4;
+	 * then we can set transfer length to 4 bytes in nbpf_prep_one() for
+	 * DMA_MEM_TO_MEM
+	 */
+
+	/* Compulsory for DMA_SLAVE fields */
+	dma_dev->device_prep_slave_sg = nbpf_prep_slave_sg;
+	dma_dev->device_control = nbpf_control;
+
+	platform_set_drvdata(pdev, nbpf);
+
+	ret = clk_prepare_enable(nbpf->clk);
+	if (ret < 0)
+		return ret;
+
+	nbpf_configure(nbpf);
+
+	ret = dma_async_device_register(dma_dev);
+	if (ret < 0)
+		goto e_clk_off;
+
+	ret = of_dma_controller_register(np, nbpf_of_xlate, nbpf);
+	if (ret < 0)
+		goto e_dma_dev_unreg;
+
+	return 0;
+
+e_dma_dev_unreg:
+	dma_async_device_unregister(dma_dev);
+e_clk_off:
+	clk_disable_unprepare(nbpf->clk);
+
+	return ret;
+}
+
+static int nbpf_remove(struct platform_device *pdev)
+{
+	struct nbpf_device *nbpf = platform_get_drvdata(pdev);
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&nbpf->dma_dev);
+	clk_disable_unprepare(nbpf->clk);
+
+	return 0;
+}
+
+static struct platform_device_id nbpf_ids[] = {
+	{"nbpfaxi64dmac1b4",	(kernel_ulong_t)&nbpf_cfg[NBPF1B4]},
+	{"nbpfaxi64dmac1b8",	(kernel_ulong_t)&nbpf_cfg[NBPF1B8]},
+	{"nbpfaxi64dmac1b16",	(kernel_ulong_t)&nbpf_cfg[NBPF1B16]},
+	{"nbpfaxi64dmac4b4",	(kernel_ulong_t)&nbpf_cfg[NBPF4B4]},
+	{"nbpfaxi64dmac4b8",	(kernel_ulong_t)&nbpf_cfg[NBPF4B8]},
+	{"nbpfaxi64dmac4b16",	(kernel_ulong_t)&nbpf_cfg[NBPF4B16]},
+	{"nbpfaxi64dmac8b4",	(kernel_ulong_t)&nbpf_cfg[NBPF8B4]},
+	{"nbpfaxi64dmac8b8",	(kernel_ulong_t)&nbpf_cfg[NBPF8B8]},
+	{"nbpfaxi64dmac8b16",	(kernel_ulong_t)&nbpf_cfg[NBPF8B16]},
+	{},
+};
+MODULE_DEVICE_TABLE(platform, nbpf_ids);
+
+#ifdef CONFIG_PM_RUNTIME
+static int nbpf_runtime_suspend(struct device *dev)
+{
+	struct nbpf_device *nbpf = platform_get_drvdata(to_platform_device(dev));
+	clk_disable_unprepare(nbpf->clk);
+	return 0;
+}
+
+static int nbpf_runtime_resume(struct device *dev)
+{
+	struct nbpf_device *nbpf = platform_get_drvdata(to_platform_device(dev));
+	return clk_prepare_enable(nbpf->clk);
+}
+#endif
+
+static const struct dev_pm_ops nbpf_pm_ops = {
+	SET_RUNTIME_PM_OPS(nbpf_runtime_suspend, nbpf_runtime_resume, NULL)
+};
+
+static struct platform_driver nbpf_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "dma-nbpf",
+		.of_match_table = nbpf_match,
+		.pm = &nbpf_pm_ops,
+	},
+	.id_table = nbpf_ids,
+	.probe = nbpf_probe,
+	.remove = nbpf_remove,
+};
+
+module_platform_driver(nbpf_driver);
+
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
+MODULE_DESCRIPTION("dmaengine driver for NBPFAXI64* DMACs");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index e8fe9dc..d5fbeaa 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -218,3 +218,38 @@ struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 			&dma_spec->args[0]);
 }
 EXPORT_SYMBOL_GPL(of_dma_simple_xlate);
+
+/**
+ * of_dma_xlate_by_chan_id - Translate dt property to DMA channel by channel id
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ * @of_dma:	pointer to DMA controller data
+ *
+ * This function can be used as the of xlate callback for DMA driver which wants
+ * to match the channel based on the channel id. When using this xlate function
+ * the #dma-cells propety of the DMA controller dt node needs to be set to 1.
+ * The data parameter of of_dma_controller_register must be a pointer to the
+ * dma_device struct the function should match upon.
+ *
+ * Returns pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct dma_device *dev = ofdma->of_dma_data;
+	struct dma_chan *chan, *candidate = NULL;
+
+	if (!dev || dma_spec->args_count != 1)
+		return NULL;
+
+	list_for_each_entry(chan, &dev->channels, device_node)
+		if (chan->chan_id == dma_spec->args[0]) {
+			candidate = chan;
+			break;
+		}
+
+	if (!candidate)
+		return NULL;
+
+	return dma_get_slave_channel(candidate);
+}
+EXPORT_SYMBOL_GPL(of_dma_xlate_by_chan_id);
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index b19f04f..4cf7d9a 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -853,8 +853,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg(
 
 static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
-	size_t period_len, enum dma_transfer_direction dir, unsigned long flags,
-	void *context)
+	size_t period_len, enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
 	struct omap_chan *c = to_omap_dma_chan(chan);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 73fa9b7..d5149aa 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -33,26 +33,15 @@
 #define PL330_MAX_IRQS		32
 #define PL330_MAX_PERI		32
 
-enum pl330_srccachectrl {
-	SCCTRL0,	/* Noncacheable and nonbufferable */
-	SCCTRL1,	/* Bufferable only */
-	SCCTRL2,	/* Cacheable, but do not allocate */
-	SCCTRL3,	/* Cacheable and bufferable, but do not allocate */
-	SINVALID1,
-	SINVALID2,
-	SCCTRL6,	/* Cacheable write-through, allocate on reads only */
-	SCCTRL7,	/* Cacheable write-back, allocate on reads only */
-};
-
-enum pl330_dstcachectrl {
-	DCCTRL0,	/* Noncacheable and nonbufferable */
-	DCCTRL1,	/* Bufferable only */
-	DCCTRL2,	/* Cacheable, but do not allocate */
-	DCCTRL3,	/* Cacheable and bufferable, but do not allocate */
-	DINVALID1,	/* AWCACHE = 0x1000 */
-	DINVALID2,
-	DCCTRL6,	/* Cacheable write-through, allocate on writes only */
-	DCCTRL7,	/* Cacheable write-back, allocate on writes only */
+enum pl330_cachectrl {
+	CCTRL0,		/* Noncacheable and nonbufferable */
+	CCTRL1,		/* Bufferable only */
+	CCTRL2,		/* Cacheable, but do not allocate */
+	CCTRL3,		/* Cacheable and bufferable, but do not allocate */
+	INVALID1,	/* AWCACHE = 0x1000 */
+	INVALID2,
+	CCTRL6,		/* Cacheable write-through, allocate on writes only */
+	CCTRL7,		/* Cacheable write-back, allocate on writes only */
 };
 
 enum pl330_byteswap {
@@ -63,13 +52,6 @@ enum pl330_byteswap {
 	SWAP_16,
 };
 
-enum pl330_reqtype {
-	MEMTOMEM,
-	MEMTODEV,
-	DEVTOMEM,
-	DEVTODEV,
-};
-
 /* Register and Bit field Definitions */
 #define DS			0x0
 #define DS_ST_STOP		0x0
@@ -263,9 +245,6 @@ enum pl330_reqtype {
  */
 #define MCODE_BUFF_PER_REQ	256
 
-/* If the _pl330_req is available to the client */
-#define IS_FREE(req)	(*((u8 *)((req)->mc_cpu)) == CMD_DMAEND)
-
 /* Use this _only_ to wait on transient states */
 #define UNTIL(t, s)	while (!(_state(t) & (s))) cpu_relax();
 
@@ -300,27 +279,6 @@ struct pl330_config {
 	u32		irq_ns;
 };
 
-/* Handle to the DMAC provided to the PL330 core */
-struct pl330_info {
-	/* Owning device */
-	struct device *dev;
-	/* Size of MicroCode buffers for each channel. */
-	unsigned mcbufsz;
-	/* ioremap'ed address of PL330 registers. */
-	void __iomem	*base;
-	/* Client can freely use it. */
-	void	*client_data;
-	/* PL330 core data, Client must not touch it. */
-	void	*pl330_data;
-	/* Populated by the PL330 core driver during pl330_add */
-	struct pl330_config	pcfg;
-	/*
-	 * If the DMAC has some reset mechanism, then the
-	 * client may want to provide pointer to the method.
-	 */
-	void (*dmac_reset)(struct pl330_info *pi);
-};
-
 /**
  * Request Configuration.
  * The PL330 core does not modify this and uses the last
@@ -344,8 +302,8 @@ struct pl330_reqcfg {
 	unsigned brst_len:5;
 	unsigned brst_size:3; /* in power of 2 */
 
-	enum pl330_dstcachectrl dcctl;
-	enum pl330_srccachectrl scctl;
+	enum pl330_cachectrl dcctl;
+	enum pl330_cachectrl scctl;
 	enum pl330_byteswap swap;
 	struct pl330_config *pcfg;
 };
@@ -359,11 +317,6 @@ struct pl330_xfer {
 	u32 dst_addr;
 	/* Size to xfer */
 	u32 bytes;
-	/*
-	 * Pointer to next xfer in the list.
-	 * The last xfer in the req must point to NULL.
-	 */
-	struct pl330_xfer *next;
 };
 
 /* The xfer callbacks are made with one of these arguments. */
@@ -376,67 +329,6 @@ enum pl330_op_err {
 	PL330_ERR_FAIL,
 };
 
-/* A request defining Scatter-Gather List ending with NULL xfer. */
-struct pl330_req {
-	enum pl330_reqtype rqtype;
-	/* Index of peripheral for the xfer. */
-	unsigned peri:5;
-	/* Unique token for this xfer, set by the client. */
-	void *token;
-	/* Callback to be called after xfer. */
-	void (*xfer_cb)(void *token, enum pl330_op_err err);
-	/* If NULL, req will be done at last set parameters. */
-	struct pl330_reqcfg *cfg;
-	/* Pointer to first xfer in the request. */
-	struct pl330_xfer *x;
-	/* Hook to attach to DMAC's list of reqs with due callback */
-	struct list_head rqd;
-};
-
-/*
- * To know the status of the channel and DMAC, the client
- * provides a pointer to this structure. The PL330 core
- * fills it with current information.
- */
-struct pl330_chanstatus {
-	/*
-	 * If the DMAC engine halted due to some error,
-	 * the client should remove-add DMAC.
-	 */
-	bool dmac_halted;
-	/*
-	 * If channel is halted due to some error,
-	 * the client should ABORT/FLUSH and START the channel.
-	 */
-	bool faulting;
-	/* Location of last load */
-	u32 src_addr;
-	/* Location of last store */
-	u32 dst_addr;
-	/*
-	 * Pointer to the currently active req, NULL if channel is
-	 * inactive, even though the requests may be present.
-	 */
-	struct pl330_req *top_req;
-	/* Pointer to req waiting second in the queue if any. */
-	struct pl330_req *wait_req;
-};
-
-enum pl330_chan_op {
-	/* Start the channel */
-	PL330_OP_START,
-	/* Abort the active xfer */
-	PL330_OP_ABORT,
-	/* Stop xfer and flush queue */
-	PL330_OP_FLUSH,
-};
-
-struct _xfer_spec {
-	u32 ccr;
-	struct pl330_req *r;
-	struct pl330_xfer *x;
-};
-
 enum dmamov_dst {
 	SAR = 0,
 	CCR,
@@ -454,12 +346,12 @@ enum pl330_cond {
 	ALWAYS,
 };
 
+struct dma_pl330_desc;
+
 struct _pl330_req {
 	u32 mc_bus;
 	void *mc_cpu;
-	/* Number of bytes taken to setup MC for the req */
-	u32 mc_len;
-	struct pl330_req *r;
+	struct dma_pl330_desc *desc;
 };
 
 /* ToBeDone for tasklet */
@@ -491,30 +383,6 @@ enum pl330_dmac_state {
 	DYING,
 };
 
-/* A DMAC */
-struct pl330_dmac {
-	spinlock_t		lock;
-	/* Holds list of reqs with due callbacks */
-	struct list_head	req_done;
-	/* Pointer to platform specific stuff */
-	struct pl330_info	*pinfo;
-	/* Maximum possible events/irqs */
-	int			events[32];
-	/* BUS address of MicroCode buffer */
-	dma_addr_t		mcode_bus;
-	/* CPU address of MicroCode buffer */
-	void			*mcode_cpu;
-	/* List of all Channel threads */
-	struct pl330_thread	*channels;
-	/* Pointer to the MANAGER thread */
-	struct pl330_thread	*manager;
-	/* To handle bad news in interrupt */
-	struct tasklet_struct	tasks;
-	struct _pl330_tbd	dmac_tbd;
-	/* State of DMAC operation */
-	enum pl330_dmac_state	state;
-};
-
 enum desc_status {
 	/* In the DMAC pool */
 	FREE,
@@ -555,15 +423,16 @@ struct dma_pl330_chan {
 	 * As the parent, this DMAC also provides descriptors
 	 * to the channel.
 	 */
-	struct dma_pl330_dmac *dmac;
+	struct pl330_dmac *dmac;
 
 	/* To protect channel manipulation */
 	spinlock_t lock;
 
-	/* Token of a hardware channel thread of PL330 DMAC
-	 * NULL if the channel is available to be acquired.
+	/*
+	 * Hardware channel thread of PL330 DMAC. NULL if the channel is
+	 * available.
 	 */
-	void *pl330_chid;
+	struct pl330_thread *thread;
 
 	/* For D-to-M and M-to-D channels */
 	int burst_sz; /* the peripheral fifo width */
@@ -574,9 +443,7 @@ struct dma_pl330_chan {
 	bool cyclic;
 };
 
-struct dma_pl330_dmac {
-	struct pl330_info pif;
-
+struct pl330_dmac {
 	/* DMA-Engine Device */
 	struct dma_device ddma;
 
@@ -588,6 +455,32 @@ struct dma_pl330_dmac {
 	/* To protect desc_pool manipulation */
 	spinlock_t pool_lock;
 
+	/* Size of MicroCode buffers for each channel. */
+	unsigned mcbufsz;
+	/* ioremap'ed address of PL330 registers. */
+	void __iomem	*base;
+	/* Populated by the PL330 core driver during pl330_add */
+	struct pl330_config	pcfg;
+
+	spinlock_t		lock;
+	/* Maximum possible events/irqs */
+	int			events[32];
+	/* BUS address of MicroCode buffer */
+	dma_addr_t		mcode_bus;
+	/* CPU address of MicroCode buffer */
+	void			*mcode_cpu;
+	/* List of all Channel threads */
+	struct pl330_thread	*channels;
+	/* Pointer to the MANAGER thread */
+	struct pl330_thread	*manager;
+	/* To handle bad news in interrupt */
+	struct tasklet_struct	tasks;
+	struct _pl330_tbd	dmac_tbd;
+	/* State of DMAC operation */
+	enum pl330_dmac_state	state;
+	/* Holds list of reqs with due callbacks */
+	struct list_head        req_done;
+
 	/* Peripheral channels connected to this DMAC */
 	unsigned int num_peripherals;
 	struct dma_pl330_chan *peripherals; /* keep at end */
@@ -604,49 +497,43 @@ struct dma_pl330_desc {
 	struct pl330_xfer px;
 
 	struct pl330_reqcfg rqcfg;
-	struct pl330_req req;
 
 	enum desc_status status;
 
 	/* The channel which currently holds this desc */
 	struct dma_pl330_chan *pchan;
+
+	enum dma_transfer_direction rqtype;
+	/* Index of peripheral for the xfer. */
+	unsigned peri:5;
+	/* Hook to attach to DMAC's list of reqs with due callback */
+	struct list_head rqd;
 };
 
-static inline void _callback(struct pl330_req *r, enum pl330_op_err err)
-{
-	if (r && r->xfer_cb)
-		r->xfer_cb(r->token, err);
-}
+struct _xfer_spec {
+	u32 ccr;
+	struct dma_pl330_desc *desc;
+};
 
 static inline bool _queue_empty(struct pl330_thread *thrd)
 {
-	return (IS_FREE(&thrd->req[0]) && IS_FREE(&thrd->req[1]))
-		? true : false;
+	return thrd->req[0].desc == NULL && thrd->req[1].desc == NULL;
 }
 
 static inline bool _queue_full(struct pl330_thread *thrd)
 {
-	return (IS_FREE(&thrd->req[0]) || IS_FREE(&thrd->req[1]))
-		? false : true;
+	return thrd->req[0].desc != NULL && thrd->req[1].desc != NULL;
 }
 
 static inline bool is_manager(struct pl330_thread *thrd)
 {
-	struct pl330_dmac *pl330 = thrd->dmac;
-
-	/* MANAGER is indexed at the end */
-	if (thrd->id == pl330->pinfo->pcfg.num_chan)
-		return true;
-	else
-		return false;
+	return thrd->dmac->manager == thrd;
 }
 
 /* If manager of the thread is in Non-Secure mode */
 static inline bool _manager_ns(struct pl330_thread *thrd)
 {
-	struct pl330_dmac *pl330 = thrd->dmac;
-
-	return (pl330->pinfo->pcfg.mode & DMAC_MODE_NS) ? true : false;
+	return (thrd->dmac->pcfg.mode & DMAC_MODE_NS) ? true : false;
 }
 
 static inline u32 get_revision(u32 periph_id)
@@ -1004,7 +891,7 @@ static inline u32 _emit_GO(unsigned dry_run, u8 buf[],
 /* Returns Time-Out */
 static bool _until_dmac_idle(struct pl330_thread *thrd)
 {
-	void __iomem *regs = thrd->dmac->pinfo->base;
+	void __iomem *regs = thrd->dmac->base;
 	unsigned long loops = msecs_to_loops(5);
 
 	do {
@@ -1024,7 +911,7 @@ static bool _until_dmac_idle(struct pl330_thread *thrd)
 static inline void _execute_DBGINSN(struct pl330_thread *thrd,
 		u8 insn[], bool as_manager)
 {
-	void __iomem *regs = thrd->dmac->pinfo->base;
+	void __iomem *regs = thrd->dmac->base;
 	u32 val;
 
 	val = (insn[0] << 16) | (insn[1] << 24);
@@ -1039,7 +926,7 @@ static inline void _execute_DBGINSN(struct pl330_thread *thrd,
 
 	/* If timed out due to halted state-machine */
 	if (_until_dmac_idle(thrd)) {
-		dev_err(thrd->dmac->pinfo->dev, "DMAC halted!\n");
+		dev_err(thrd->dmac->ddma.dev, "DMAC halted!\n");
 		return;
 	}
 
@@ -1047,25 +934,9 @@ static inline void _execute_DBGINSN(struct pl330_thread *thrd,
 	writel(0, regs + DBGCMD);
 }
 
-/*
- * Mark a _pl330_req as free.
- * We do it by writing DMAEND as the first instruction
- * because no valid request is going to have DMAEND as
- * its first instruction to execute.
- */
-static void mark_free(struct pl330_thread *thrd, int idx)
-{
-	struct _pl330_req *req = &thrd->req[idx];
-
-	_emit_END(0, req->mc_cpu);
-	req->mc_len = 0;
-
-	thrd->req_running = -1;
-}
-
 static inline u32 _state(struct pl330_thread *thrd)
 {
-	void __iomem *regs = thrd->dmac->pinfo->base;
+	void __iomem *regs = thrd->dmac->base;
 	u32 val;
 
 	if (is_manager(thrd))
@@ -1123,7 +994,7 @@ static inline u32 _state(struct pl330_thread *thrd)
 
 static void _stop(struct pl330_thread *thrd)
 {
-	void __iomem *regs = thrd->dmac->pinfo->base;
+	void __iomem *regs = thrd->dmac->base;
 	u8 insn[6] = {0, 0, 0, 0, 0, 0};
 
 	if (_state(thrd) == PL330_STATE_FAULT_COMPLETING)
@@ -1146,9 +1017,9 @@ static void _stop(struct pl330_thread *thrd)
 /* Start doing req 'idx' of thread 'thrd' */
 static bool _trigger(struct pl330_thread *thrd)
 {
-	void __iomem *regs = thrd->dmac->pinfo->base;
+	void __iomem *regs = thrd->dmac->base;
 	struct _pl330_req *req;
-	struct pl330_req *r;
+	struct dma_pl330_desc *desc;
 	struct _arg_GO go;
 	unsigned ns;
 	u8 insn[6] = {0, 0, 0, 0, 0, 0};
@@ -1159,32 +1030,27 @@ static bool _trigger(struct pl330_thread *thrd)
 		return true;
 
 	idx = 1 - thrd->lstenq;
-	if (!IS_FREE(&thrd->req[idx]))
+	if (thrd->req[idx].desc != NULL) {
 		req = &thrd->req[idx];
-	else {
+	} else {
 		idx = thrd->lstenq;
-		if (!IS_FREE(&thrd->req[idx]))
+		if (thrd->req[idx].desc != NULL)
 			req = &thrd->req[idx];
 		else
 			req = NULL;
 	}
 
 	/* Return if no request */
-	if (!req || !req->r)
+	if (!req)
 		return true;
 
-	r = req->r;
+	desc = req->desc;
 
-	if (r->cfg)
-		ns = r->cfg->nonsecure ? 1 : 0;
-	else if (readl(regs + CS(thrd->id)) & CS_CNS)
-		ns = 1;
-	else
-		ns = 0;
+	ns = desc->rqcfg.nonsecure ? 1 : 0;
 
 	/* See 'Abort Sources' point-4 at Page 2-25 */
 	if (_manager_ns(thrd) && !ns)
-		dev_info(thrd->dmac->pinfo->dev, "%s:%d Recipe for ABORT!\n",
+		dev_info(thrd->dmac->ddma.dev, "%s:%d Recipe for ABORT!\n",
 			__func__, __LINE__);
 
 	go.chan = thrd->id;
@@ -1240,7 +1106,7 @@ static inline int _ldst_memtomem(unsigned dry_run, u8 buf[],
 		const struct _xfer_spec *pxs, int cyc)
 {
 	int off = 0;
-	struct pl330_config *pcfg = pxs->r->cfg->pcfg;
+	struct pl330_config *pcfg = pxs->desc->rqcfg.pcfg;
 
 	/* check lock-up free version */
 	if (get_revision(pcfg->periph_id) >= PERIPH_REV_R1P0) {
@@ -1266,10 +1132,10 @@ static inline int _ldst_devtomem(unsigned dry_run, u8 buf[],
 	int off = 0;
 
 	while (cyc--) {
-		off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri);
-		off += _emit_LDP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->desc->peri);
+		off += _emit_LDP(dry_run, &buf[off], SINGLE, pxs->desc->peri);
 		off += _emit_ST(dry_run, &buf[off], ALWAYS);
-		off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
+		off += _emit_FLUSHP(dry_run, &buf[off], pxs->desc->peri);
 	}
 
 	return off;
@@ -1281,10 +1147,10 @@ static inline int _ldst_memtodev(unsigned dry_run, u8 buf[],
 	int off = 0;
 
 	while (cyc--) {
-		off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->desc->peri);
 		off += _emit_LD(dry_run, &buf[off], ALWAYS);
-		off += _emit_STP(dry_run, &buf[off], SINGLE, pxs->r->peri);
-		off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
+		off += _emit_STP(dry_run, &buf[off], SINGLE, pxs->desc->peri);
+		off += _emit_FLUSHP(dry_run, &buf[off], pxs->desc->peri);
 	}
 
 	return off;
@@ -1295,14 +1161,14 @@ static int _bursts(unsigned dry_run, u8 buf[],
 {
 	int off = 0;
 
-	switch (pxs->r->rqtype) {
-	case MEMTODEV:
+	switch (pxs->desc->rqtype) {
+	case DMA_MEM_TO_DEV:
 		off += _ldst_memtodev(dry_run, &buf[off], pxs, cyc);
 		break;
-	case DEVTOMEM:
+	case DMA_DEV_TO_MEM:
 		off += _ldst_devtomem(dry_run, &buf[off], pxs, cyc);
 		break;
-	case MEMTOMEM:
+	case DMA_MEM_TO_MEM:
 		off += _ldst_memtomem(dry_run, &buf[off], pxs, cyc);
 		break;
 	default:
@@ -1395,7 +1261,7 @@ static inline int _loop(unsigned dry_run, u8 buf[],
 static inline int _setup_loops(unsigned dry_run, u8 buf[],
 		const struct _xfer_spec *pxs)
 {
-	struct pl330_xfer *x = pxs->x;
+	struct pl330_xfer *x = &pxs->desc->px;
 	u32 ccr = pxs->ccr;
 	unsigned long c, bursts = BYTE_TO_BURST(x->bytes, ccr);
 	int off = 0;
@@ -1412,7 +1278,7 @@ static inline int _setup_loops(unsigned dry_run, u8 buf[],
 static inline int _setup_xfer(unsigned dry_run, u8 buf[],
 		const struct _xfer_spec *pxs)
 {
-	struct pl330_xfer *x = pxs->x;
+	struct pl330_xfer *x = &pxs->desc->px;
 	int off = 0;
 
 	/* DMAMOV SAR, x->src_addr */
@@ -1443,17 +1309,12 @@ static int _setup_req(unsigned dry_run, struct pl330_thread *thrd,
 	/* DMAMOV CCR, ccr */
 	off += _emit_MOV(dry_run, &buf[off], CCR, pxs->ccr);
 
-	x = pxs->r->x;
-	do {
-		/* Error if xfer length is not aligned at burst size */
-		if (x->bytes % (BRST_SIZE(pxs->ccr) * BRST_LEN(pxs->ccr)))
-			return -EINVAL;
-
-		pxs->x = x;
-		off += _setup_xfer(dry_run, &buf[off], pxs);
+	x = &pxs->desc->px;
+	/* Error if xfer length is not aligned at burst size */
+	if (x->bytes % (BRST_SIZE(pxs->ccr) * BRST_LEN(pxs->ccr)))
+		return -EINVAL;
 
-		x = x->next;
-	} while (x);
+	off += _setup_xfer(dry_run, &buf[off], pxs);
 
 	/* DMASEV peripheral/event */
 	off += _emit_SEV(dry_run, &buf[off], thrd->ev);
@@ -1495,31 +1356,15 @@ static inline u32 _prepare_ccr(const struct pl330_reqcfg *rqc)
 	return ccr;
 }
 
-static inline bool _is_valid(u32 ccr)
-{
-	enum pl330_dstcachectrl dcctl;
-	enum pl330_srccachectrl scctl;
-
-	dcctl = (ccr >> CC_DSTCCTRL_SHFT) & CC_DRCCCTRL_MASK;
-	scctl = (ccr >> CC_SRCCCTRL_SHFT) & CC_SRCCCTRL_MASK;
-
-	if (dcctl == DINVALID1 || dcctl == DINVALID2
-			|| scctl == SINVALID1 || scctl == SINVALID2)
-		return false;
-	else
-		return true;
-}
-
 /*
  * Submit a list of xfers after which the client wants notification.
  * Client is not notified after each xfer unit, just once after all
  * xfer units are done or some error occurs.
  */
-static int pl330_submit_req(void *ch_id, struct pl330_req *r)
+static int pl330_submit_req(struct pl330_thread *thrd,
+	struct dma_pl330_desc *desc)
 {
-	struct pl330_thread *thrd = ch_id;
-	struct pl330_dmac *pl330;
-	struct pl330_info *pi;
+	struct pl330_dmac *pl330 = thrd->dmac;
 	struct _xfer_spec xs;
 	unsigned long flags;
 	void __iomem *regs;
@@ -1528,25 +1373,24 @@ static int pl330_submit_req(void *ch_id, struct pl330_req *r)
 	int ret = 0;
 
 	/* No Req or Unacquired Channel or DMAC */
-	if (!r || !thrd || thrd->free)
+	if (!desc || !thrd || thrd->free)
 		return -EINVAL;
 
-	pl330 = thrd->dmac;
-	pi = pl330->pinfo;
-	regs = pi->base;
+	regs = thrd->dmac->base;
 
 	if (pl330->state == DYING
 		|| pl330->dmac_tbd.reset_chan & (1 << thrd->id)) {
-		dev_info(thrd->dmac->pinfo->dev, "%s:%d\n",
+		dev_info(thrd->dmac->ddma.dev, "%s:%d\n",
 			__func__, __LINE__);
 		return -EAGAIN;
 	}
 
 	/* If request for non-existing peripheral */
-	if (r->rqtype != MEMTOMEM && r->peri >= pi->pcfg.num_peri) {
-		dev_info(thrd->dmac->pinfo->dev,
+	if (desc->rqtype != DMA_MEM_TO_MEM &&
+	    desc->peri >= pl330->pcfg.num_peri) {
+		dev_info(thrd->dmac->ddma.dev,
 				"%s:%d Invalid peripheral(%u)!\n",
-				__func__, __LINE__, r->peri);
+				__func__, __LINE__, desc->peri);
 		return -EINVAL;
 	}
 
@@ -1557,41 +1401,26 @@ static int pl330_submit_req(void *ch_id, struct pl330_req *r)
 		goto xfer_exit;
 	}
 
+	/* Prefer Secure Channel */
+	if (!_manager_ns(thrd))
+		desc->rqcfg.nonsecure = 0;
+	else
+		desc->rqcfg.nonsecure = 1;
 
-	/* Use last settings, if not provided */
-	if (r->cfg) {
-		/* Prefer Secure Channel */
-		if (!_manager_ns(thrd))
-			r->cfg->nonsecure = 0;
-		else
-			r->cfg->nonsecure = 1;
-
-		ccr = _prepare_ccr(r->cfg);
-	} else {
-		ccr = readl(regs + CC(thrd->id));
-	}
-
-	/* If this req doesn't have valid xfer settings */
-	if (!_is_valid(ccr)) {
-		ret = -EINVAL;
-		dev_info(thrd->dmac->pinfo->dev, "%s:%d Invalid CCR(%x)!\n",
-			__func__, __LINE__, ccr);
-		goto xfer_exit;
-	}
+	ccr = _prepare_ccr(&desc->rqcfg);
 
-	idx = IS_FREE(&thrd->req[0]) ? 0 : 1;
+	idx = thrd->req[0].desc == NULL ? 0 : 1;
 
 	xs.ccr = ccr;
-	xs.r = r;
+	xs.desc = desc;
 
 	/* First dry run to check if req is acceptable */
 	ret = _setup_req(1, thrd, idx, &xs);
 	if (ret < 0)
 		goto xfer_exit;
 
-	if (ret > pi->mcbufsz / 2) {
-		dev_info(thrd->dmac->pinfo->dev,
-			"%s:%d Trying increasing mcbufsz\n",
+	if (ret > pl330->mcbufsz / 2) {
+		dev_info(pl330->ddma.dev, "%s:%d Trying increasing mcbufsz\n",
 				__func__, __LINE__);
 		ret = -ENOMEM;
 		goto xfer_exit;
@@ -1599,8 +1428,8 @@ static int pl330_submit_req(void *ch_id, struct pl330_req *r)
 
 	/* Hook the request */
 	thrd->lstenq = idx;
-	thrd->req[idx].mc_len = _setup_req(0, thrd, idx, &xs);
-	thrd->req[idx].r = r;
+	thrd->req[idx].desc = desc;
+	_setup_req(0, thrd, idx, &xs);
 
 	ret = 0;
 
@@ -1610,10 +1439,32 @@ xfer_exit:
 	return ret;
 }
 
+static void dma_pl330_rqcb(struct dma_pl330_desc *desc, enum pl330_op_err err)
+{
+	struct dma_pl330_chan *pch;
+	unsigned long flags;
+
+	if (!desc)
+		return;
+
+	pch = desc->pchan;
+
+	/* If desc aborted */
+	if (!pch)
+		return;
+
+	spin_lock_irqsave(&pch->lock, flags);
+
+	desc->status = DONE;
+
+	spin_unlock_irqrestore(&pch->lock, flags);
+
+	tasklet_schedule(&pch->task);
+}
+
 static void pl330_dotask(unsigned long data)
 {
 	struct pl330_dmac *pl330 = (struct pl330_dmac *) data;
-	struct pl330_info *pi = pl330->pinfo;
 	unsigned long flags;
 	int i;
 
@@ -1631,16 +1482,16 @@ static void pl330_dotask(unsigned long data)
 	if (pl330->dmac_tbd.reset_mngr) {
 		_stop(pl330->manager);
 		/* Reset all channels */
-		pl330->dmac_tbd.reset_chan = (1 << pi->pcfg.num_chan) - 1;
+		pl330->dmac_tbd.reset_chan = (1 << pl330->pcfg.num_chan) - 1;
 		/* Clear the reset flag */
 		pl330->dmac_tbd.reset_mngr = false;
 	}
 
-	for (i = 0; i < pi->pcfg.num_chan; i++) {
+	for (i = 0; i < pl330->pcfg.num_chan; i++) {
 
 		if (pl330->dmac_tbd.reset_chan & (1 << i)) {
 			struct pl330_thread *thrd = &pl330->channels[i];
-			void __iomem *regs = pi->base;
+			void __iomem *regs = pl330->base;
 			enum pl330_op_err err;
 
 			_stop(thrd);
@@ -1651,16 +1502,13 @@ static void pl330_dotask(unsigned long data)
 				err = PL330_ERR_ABORT;
 
 			spin_unlock_irqrestore(&pl330->lock, flags);
-
-			_callback(thrd->req[1 - thrd->lstenq].r, err);
-			_callback(thrd->req[thrd->lstenq].r, err);
-
+			dma_pl330_rqcb(thrd->req[1 - thrd->lstenq].desc, err);
+			dma_pl330_rqcb(thrd->req[thrd->lstenq].desc, err);
 			spin_lock_irqsave(&pl330->lock, flags);
 
-			thrd->req[0].r = NULL;
-			thrd->req[1].r = NULL;
-			mark_free(thrd, 0);
-			mark_free(thrd, 1);
+			thrd->req[0].desc = NULL;
+			thrd->req[1].desc = NULL;
+			thrd->req_running = -1;
 
 			/* Clear the reset flag */
 			pl330->dmac_tbd.reset_chan &= ~(1 << i);
@@ -1673,20 +1521,15 @@ static void pl330_dotask(unsigned long data)
 }
 
 /* Returns 1 if state was updated, 0 otherwise */
-static int pl330_update(const struct pl330_info *pi)
+static int pl330_update(struct pl330_dmac *pl330)
 {
-	struct pl330_req *rqdone, *tmp;
-	struct pl330_dmac *pl330;
+	struct dma_pl330_desc *descdone, *tmp;
 	unsigned long flags;
 	void __iomem *regs;
 	u32 val;
 	int id, ev, ret = 0;
 
-	if (!pi || !pi->pl330_data)
-		return 0;
-
-	regs = pi->base;
-	pl330 = pi->pl330_data;
+	regs = pl330->base;
 
 	spin_lock_irqsave(&pl330->lock, flags);
 
@@ -1696,13 +1539,13 @@ static int pl330_update(const struct pl330_info *pi)
 	else
 		pl330->dmac_tbd.reset_mngr = false;
 
-	val = readl(regs + FSC) & ((1 << pi->pcfg.num_chan) - 1);
+	val = readl(regs + FSC) & ((1 << pl330->pcfg.num_chan) - 1);
 	pl330->dmac_tbd.reset_chan |= val;
 	if (val) {
 		int i = 0;
-		while (i < pi->pcfg.num_chan) {
+		while (i < pl330->pcfg.num_chan) {
 			if (val & (1 << i)) {
-				dev_info(pi->dev,
+				dev_info(pl330->ddma.dev,
 					"Reset Channel-%d\t CS-%x FTC-%x\n",
 						i, readl(regs + CS(i)),
 						readl(regs + FTC(i)));
@@ -1714,15 +1557,16 @@ static int pl330_update(const struct pl330_info *pi)
 
 	/* Check which event happened i.e, thread notified */
 	val = readl(regs + ES);
-	if (pi->pcfg.num_events < 32
-			&& val & ~((1 << pi->pcfg.num_events) - 1)) {
+	if (pl330->pcfg.num_events < 32
+			&& val & ~((1 << pl330->pcfg.num_events) - 1)) {
 		pl330->dmac_tbd.reset_dmac = true;
-		dev_err(pi->dev, "%s:%d Unexpected!\n", __func__, __LINE__);
+		dev_err(pl330->ddma.dev, "%s:%d Unexpected!\n", __func__,
+			__LINE__);
 		ret = 1;
 		goto updt_exit;
 	}
 
-	for (ev = 0; ev < pi->pcfg.num_events; ev++) {
+	for (ev = 0; ev < pl330->pcfg.num_events; ev++) {
 		if (val & (1 << ev)) { /* Event occurred */
 			struct pl330_thread *thrd;
 			u32 inten = readl(regs + INTEN);
@@ -1743,25 +1587,22 @@ static int pl330_update(const struct pl330_info *pi)
 				continue;
 
 			/* Detach the req */
-			rqdone = thrd->req[active].r;
-			thrd->req[active].r = NULL;
-
-			mark_free(thrd, active);
+			descdone = thrd->req[active].desc;
+			thrd->req[active].desc = NULL;
 
 			/* Get going again ASAP */
 			_start(thrd);
 
 			/* For now, just make a list of callbacks to be done */
-			list_add_tail(&rqdone->rqd, &pl330->req_done);
+			list_add_tail(&descdone->rqd, &pl330->req_done);
 		}
 	}
 
 	/* Now that we are in no hurry, do the callbacks */
-	list_for_each_entry_safe(rqdone, tmp, &pl330->req_done, rqd) {
-		list_del(&rqdone->rqd);
-
+	list_for_each_entry_safe(descdone, tmp, &pl330->req_done, rqd) {
+		list_del(&descdone->rqd);
 		spin_unlock_irqrestore(&pl330->lock, flags);
-		_callback(rqdone, PL330_ERR_NONE);
+		dma_pl330_rqcb(descdone, PL330_ERR_NONE);
 		spin_lock_irqsave(&pl330->lock, flags);
 	}
 
@@ -1778,65 +1619,13 @@ updt_exit:
 	return ret;
 }
 
-static int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op)
-{
-	struct pl330_thread *thrd = ch_id;
-	struct pl330_dmac *pl330;
-	unsigned long flags;
-	int ret = 0, active;
-
-	if (!thrd || thrd->free || thrd->dmac->state == DYING)
-		return -EINVAL;
-
-	pl330 = thrd->dmac;
-	active = thrd->req_running;
-
-	spin_lock_irqsave(&pl330->lock, flags);
-
-	switch (op) {
-	case PL330_OP_FLUSH:
-		/* Make sure the channel is stopped */
-		_stop(thrd);
-
-		thrd->req[0].r = NULL;
-		thrd->req[1].r = NULL;
-		mark_free(thrd, 0);
-		mark_free(thrd, 1);
-		break;
-
-	case PL330_OP_ABORT:
-		/* Make sure the channel is stopped */
-		_stop(thrd);
-
-		/* ABORT is only for the active req */
-		if (active == -1)
-			break;
-
-		thrd->req[active].r = NULL;
-		mark_free(thrd, active);
-
-		/* Start the next */
-	case PL330_OP_START:
-		if ((active == -1) && !_start(thrd))
-			ret = -EIO;
-		break;
-
-	default:
-		ret = -EINVAL;
-	}
-
-	spin_unlock_irqrestore(&pl330->lock, flags);
-	return ret;
-}
-
 /* Reserve an event */
 static inline int _alloc_event(struct pl330_thread *thrd)
 {
 	struct pl330_dmac *pl330 = thrd->dmac;
-	struct pl330_info *pi = pl330->pinfo;
 	int ev;
 
-	for (ev = 0; ev < pi->pcfg.num_events; ev++)
+	for (ev = 0; ev < pl330->pcfg.num_events; ev++)
 		if (pl330->events[ev] == -1) {
 			pl330->events[ev] = thrd->id;
 			return ev;
@@ -1845,45 +1634,38 @@ static inline int _alloc_event(struct pl330_thread *thrd)
 	return -1;
 }
 
-static bool _chan_ns(const struct pl330_info *pi, int i)
+static bool _chan_ns(const struct pl330_dmac *pl330, int i)
 {
-	return pi->pcfg.irq_ns & (1 << i);
+	return pl330->pcfg.irq_ns & (1 << i);
 }
 
 /* Upon success, returns IdentityToken for the
  * allocated channel, NULL otherwise.
  */
-static void *pl330_request_channel(const struct pl330_info *pi)
+static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330)
 {
 	struct pl330_thread *thrd = NULL;
-	struct pl330_dmac *pl330;
 	unsigned long flags;
 	int chans, i;
 
-	if (!pi || !pi->pl330_data)
-		return NULL;
-
-	pl330 = pi->pl330_data;
-
 	if (pl330->state == DYING)
 		return NULL;
 
-	chans = pi->pcfg.num_chan;
+	chans = pl330->pcfg.num_chan;
 
 	spin_lock_irqsave(&pl330->lock, flags);
 
 	for (i = 0; i < chans; i++) {
 		thrd = &pl330->channels[i];
 		if ((thrd->free) && (!_manager_ns(thrd) ||
-					_chan_ns(pi, i))) {
+					_chan_ns(pl330, i))) {
 			thrd->ev = _alloc_event(thrd);
 			if (thrd->ev >= 0) {
 				thrd->free = false;
 				thrd->lstenq = 1;
-				thrd->req[0].r = NULL;
-				mark_free(thrd, 0);
-				thrd->req[1].r = NULL;
-				mark_free(thrd, 1);
+				thrd->req[0].desc = NULL;
+				thrd->req[1].desc = NULL;
+				thrd->req_running = -1;
 				break;
 			}
 		}
@@ -1899,17 +1681,15 @@ static void *pl330_request_channel(const struct pl330_info *pi)
 static inline void _free_event(struct pl330_thread *thrd, int ev)
 {
 	struct pl330_dmac *pl330 = thrd->dmac;
-	struct pl330_info *pi = pl330->pinfo;
 
 	/* If the event is valid and was held by the thread */
-	if (ev >= 0 && ev < pi->pcfg.num_events
+	if (ev >= 0 && ev < pl330->pcfg.num_events
 			&& pl330->events[ev] == thrd->id)
 		pl330->events[ev] = -1;
 }
 
-static void pl330_release_channel(void *ch_id)
+static void pl330_release_channel(struct pl330_thread *thrd)
 {
-	struct pl330_thread *thrd = ch_id;
 	struct pl330_dmac *pl330;
 	unsigned long flags;
 
@@ -1918,8 +1698,8 @@ static void pl330_release_channel(void *ch_id)
 
 	_stop(thrd);
 
-	_callback(thrd->req[1 - thrd->lstenq].r, PL330_ERR_ABORT);
-	_callback(thrd->req[thrd->lstenq].r, PL330_ERR_ABORT);
+	dma_pl330_rqcb(thrd->req[1 - thrd->lstenq].desc, PL330_ERR_ABORT);
+	dma_pl330_rqcb(thrd->req[thrd->lstenq].desc, PL330_ERR_ABORT);
 
 	pl330 = thrd->dmac;
 
@@ -1932,72 +1712,70 @@ static void pl330_release_channel(void *ch_id)
 /* Initialize the structure for PL330 configuration, that can be used
  * by the client driver the make best use of the DMAC
  */
-static void read_dmac_config(struct pl330_info *pi)
+static void read_dmac_config(struct pl330_dmac *pl330)
 {
-	void __iomem *regs = pi->base;
+	void __iomem *regs = pl330->base;
 	u32 val;
 
 	val = readl(regs + CRD) >> CRD_DATA_WIDTH_SHIFT;
 	val &= CRD_DATA_WIDTH_MASK;
-	pi->pcfg.data_bus_width = 8 * (1 << val);
+	pl330->pcfg.data_bus_width = 8 * (1 << val);
 
 	val = readl(regs + CRD) >> CRD_DATA_BUFF_SHIFT;
 	val &= CRD_DATA_BUFF_MASK;
-	pi->pcfg.data_buf_dep = val + 1;
+	pl330->pcfg.data_buf_dep = val + 1;
 
 	val = readl(regs + CR0) >> CR0_NUM_CHANS_SHIFT;
 	val &= CR0_NUM_CHANS_MASK;
 	val += 1;
-	pi->pcfg.num_chan = val;
+	pl330->pcfg.num_chan = val;
 
 	val = readl(regs + CR0);
 	if (val & CR0_PERIPH_REQ_SET) {
 		val = (val >> CR0_NUM_PERIPH_SHIFT) & CR0_NUM_PERIPH_MASK;
 		val += 1;
-		pi->pcfg.num_peri = val;
-		pi->pcfg.peri_ns = readl(regs + CR4);
+		pl330->pcfg.num_peri = val;
+		pl330->pcfg.peri_ns = readl(regs + CR4);
 	} else {
-		pi->pcfg.num_peri = 0;
+		pl330->pcfg.num_peri = 0;
 	}
 
 	val = readl(regs + CR0);
 	if (val & CR0_BOOT_MAN_NS)
-		pi->pcfg.mode |= DMAC_MODE_NS;
+		pl330->pcfg.mode |= DMAC_MODE_NS;
 	else
-		pi->pcfg.mode &= ~DMAC_MODE_NS;
+		pl330->pcfg.mode &= ~DMAC_MODE_NS;
 
 	val = readl(regs + CR0) >> CR0_NUM_EVENTS_SHIFT;
 	val &= CR0_NUM_EVENTS_MASK;
 	val += 1;
-	pi->pcfg.num_events = val;
+	pl330->pcfg.num_events = val;
 
-	pi->pcfg.irq_ns = readl(regs + CR3);
+	pl330->pcfg.irq_ns = readl(regs + CR3);
 }
 
 static inline void _reset_thread(struct pl330_thread *thrd)
 {
 	struct pl330_dmac *pl330 = thrd->dmac;
-	struct pl330_info *pi = pl330->pinfo;
 
 	thrd->req[0].mc_cpu = pl330->mcode_cpu
-				+ (thrd->id * pi->mcbufsz);
+				+ (thrd->id * pl330->mcbufsz);
 	thrd->req[0].mc_bus = pl330->mcode_bus
-				+ (thrd->id * pi->mcbufsz);
-	thrd->req[0].r = NULL;
-	mark_free(thrd, 0);
+				+ (thrd->id * pl330->mcbufsz);
+	thrd->req[0].desc = NULL;
 
 	thrd->req[1].mc_cpu = thrd->req[0].mc_cpu
-				+ pi->mcbufsz / 2;
+				+ pl330->mcbufsz / 2;
 	thrd->req[1].mc_bus = thrd->req[0].mc_bus
-				+ pi->mcbufsz / 2;
-	thrd->req[1].r = NULL;
-	mark_free(thrd, 1);
+				+ pl330->mcbufsz / 2;
+	thrd->req[1].desc = NULL;
+
+	thrd->req_running = -1;
 }
 
 static int dmac_alloc_threads(struct pl330_dmac *pl330)
 {
-	struct pl330_info *pi = pl330->pinfo;
-	int chans = pi->pcfg.num_chan;
+	int chans = pl330->pcfg.num_chan;
 	struct pl330_thread *thrd;
 	int i;
 
@@ -2028,29 +1806,28 @@ static int dmac_alloc_threads(struct pl330_dmac *pl330)
 
 static int dmac_alloc_resources(struct pl330_dmac *pl330)
 {
-	struct pl330_info *pi = pl330->pinfo;
-	int chans = pi->pcfg.num_chan;
+	int chans = pl330->pcfg.num_chan;
 	int ret;
 
 	/*
 	 * Alloc MicroCode buffer for 'chans' Channel threads.
 	 * A channel's buffer offset is (Channel_Id * MCODE_BUFF_PERCHAN)
 	 */
-	pl330->mcode_cpu = dma_alloc_coherent(pi->dev,
-				chans * pi->mcbufsz,
+	pl330->mcode_cpu = dma_alloc_coherent(pl330->ddma.dev,
+				chans * pl330->mcbufsz,
 				&pl330->mcode_bus, GFP_KERNEL);
 	if (!pl330->mcode_cpu) {
-		dev_err(pi->dev, "%s:%d Can't allocate memory!\n",
+		dev_err(pl330->ddma.dev, "%s:%d Can't allocate memory!\n",
 			__func__, __LINE__);
 		return -ENOMEM;
 	}
 
 	ret = dmac_alloc_threads(pl330);
 	if (ret) {
-		dev_err(pi->dev, "%s:%d Can't to create channels for DMAC!\n",
+		dev_err(pl330->ddma.dev, "%s:%d Can't to create channels for DMAC!\n",
 			__func__, __LINE__);
-		dma_free_coherent(pi->dev,
-				chans * pi->mcbufsz,
+		dma_free_coherent(pl330->ddma.dev,
+				chans * pl330->mcbufsz,
 				pl330->mcode_cpu, pl330->mcode_bus);
 		return ret;
 	}
@@ -2058,71 +1835,45 @@ static int dmac_alloc_resources(struct pl330_dmac *pl330)
 	return 0;
 }
 
-static int pl330_add(struct pl330_info *pi)
+static int pl330_add(struct pl330_dmac *pl330)
 {
-	struct pl330_dmac *pl330;
 	void __iomem *regs;
 	int i, ret;
 
-	if (!pi || !pi->dev)
-		return -EINVAL;
-
-	/* If already added */
-	if (pi->pl330_data)
-		return -EINVAL;
-
-	/*
-	 * If the SoC can perform reset on the DMAC, then do it
-	 * before reading its configuration.
-	 */
-	if (pi->dmac_reset)
-		pi->dmac_reset(pi);
-
-	regs = pi->base;
+	regs = pl330->base;
 
 	/* Check if we can handle this DMAC */
-	if ((pi->pcfg.periph_id & 0xfffff) != PERIPH_ID_VAL) {
-		dev_err(pi->dev, "PERIPH_ID 0x%x !\n", pi->pcfg.periph_id);
+	if ((pl330->pcfg.periph_id & 0xfffff) != PERIPH_ID_VAL) {
+		dev_err(pl330->ddma.dev, "PERIPH_ID 0x%x !\n",
+			pl330->pcfg.periph_id);
 		return -EINVAL;
 	}
 
 	/* Read the configuration of the DMAC */
-	read_dmac_config(pi);
+	read_dmac_config(pl330);
 
-	if (pi->pcfg.num_events == 0) {
-		dev_err(pi->dev, "%s:%d Can't work without events!\n",
+	if (pl330->pcfg.num_events == 0) {
+		dev_err(pl330->ddma.dev, "%s:%d Can't work without events!\n",
 			__func__, __LINE__);
 		return -EINVAL;
 	}
 
-	pl330 = kzalloc(sizeof(*pl330), GFP_KERNEL);
-	if (!pl330) {
-		dev_err(pi->dev, "%s:%d Can't allocate memory!\n",
-			__func__, __LINE__);
-		return -ENOMEM;
-	}
-
-	/* Assign the info structure and private data */
-	pl330->pinfo = pi;
-	pi->pl330_data = pl330;
-
 	spin_lock_init(&pl330->lock);
 
 	INIT_LIST_HEAD(&pl330->req_done);
 
 	/* Use default MC buffer size if not provided */
-	if (!pi->mcbufsz)
-		pi->mcbufsz = MCODE_BUFF_PER_REQ * 2;
+	if (!pl330->mcbufsz)
+		pl330->mcbufsz = MCODE_BUFF_PER_REQ * 2;
 
 	/* Mark all events as free */
-	for (i = 0; i < pi->pcfg.num_events; i++)
+	for (i = 0; i < pl330->pcfg.num_events; i++)
 		pl330->events[i] = -1;
 
 	/* Allocate resources needed by the DMAC */
 	ret = dmac_alloc_resources(pl330);
 	if (ret) {
-		dev_err(pi->dev, "Unable to create channels for DMAC\n");
-		kfree(pl330);
+		dev_err(pl330->ddma.dev, "Unable to create channels for DMAC\n");
 		return ret;
 	}
 
@@ -2135,15 +1886,13 @@ static int pl330_add(struct pl330_info *pi)
 
 static int dmac_free_threads(struct pl330_dmac *pl330)
 {
-	struct pl330_info *pi = pl330->pinfo;
-	int chans = pi->pcfg.num_chan;
 	struct pl330_thread *thrd;
 	int i;
 
 	/* Release Channel threads */
-	for (i = 0; i < chans; i++) {
+	for (i = 0; i < pl330->pcfg.num_chan; i++) {
 		thrd = &pl330->channels[i];
-		pl330_release_channel((void *)thrd);
+		pl330_release_channel(thrd);
 	}
 
 	/* Free memory */
@@ -2152,35 +1901,18 @@ static int dmac_free_threads(struct pl330_dmac *pl330)
 	return 0;
 }
 
-static void dmac_free_resources(struct pl330_dmac *pl330)
+static void pl330_del(struct pl330_dmac *pl330)
 {
-	struct pl330_info *pi = pl330->pinfo;
-	int chans = pi->pcfg.num_chan;
-
-	dmac_free_threads(pl330);
-
-	dma_free_coherent(pi->dev, chans * pi->mcbufsz,
-				pl330->mcode_cpu, pl330->mcode_bus);
-}
-
-static void pl330_del(struct pl330_info *pi)
-{
-	struct pl330_dmac *pl330;
-
-	if (!pi || !pi->pl330_data)
-		return;
-
-	pl330 = pi->pl330_data;
-
 	pl330->state = UNINIT;
 
 	tasklet_kill(&pl330->tasks);
 
 	/* Free DMAC resources */
-	dmac_free_resources(pl330);
+	dmac_free_threads(pl330);
 
-	kfree(pl330);
-	pi->pl330_data = NULL;
+	dma_free_coherent(pl330->ddma.dev,
+		pl330->pcfg.num_chan * pl330->mcbufsz, pl330->mcode_cpu,
+		pl330->mcode_bus);
 }
 
 /* forward declaration */
@@ -2212,8 +1944,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch)
 		if (desc->status == BUSY)
 			continue;
 
-		ret = pl330_submit_req(pch->pl330_chid,
-						&desc->req);
+		ret = pl330_submit_req(pch->thread, desc);
 		if (!ret) {
 			desc->status = BUSY;
 		} else if (ret == -EAGAIN) {
@@ -2222,7 +1953,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch)
 		} else {
 			/* Unacceptable request */
 			desc->status = DONE;
-			dev_err(pch->dmac->pif.dev, "%s:%d Bad Desc(%d)\n",
+			dev_err(pch->dmac->ddma.dev, "%s:%d Bad Desc(%d)\n",
 					__func__, __LINE__, desc->txd.cookie);
 			tasklet_schedule(&pch->task);
 		}
@@ -2249,7 +1980,9 @@ static void pl330_tasklet(unsigned long data)
 	fill_queue(pch);
 
 	/* Make sure the PL330 Channel thread is active */
-	pl330_chan_ctrl(pch->pl330_chid, PL330_OP_START);
+	spin_lock(&pch->thread->dmac->lock);
+	_start(pch->thread);
+	spin_unlock(&pch->thread->dmac->lock);
 
 	while (!list_empty(&pch->completed_list)) {
 		dma_async_tx_callback callback;
@@ -2280,25 +2013,6 @@ static void pl330_tasklet(unsigned long data)
 	spin_unlock_irqrestore(&pch->lock, flags);
 }
 
-static void dma_pl330_rqcb(void *token, enum pl330_op_err err)
-{
-	struct dma_pl330_desc *desc = token;
-	struct dma_pl330_chan *pch = desc->pchan;
-	unsigned long flags;
-
-	/* If desc aborted */
-	if (!pch)
-		return;
-
-	spin_lock_irqsave(&pch->lock, flags);
-
-	desc->status = DONE;
-
-	spin_unlock_irqrestore(&pch->lock, flags);
-
-	tasklet_schedule(&pch->task);
-}
-
 bool pl330_filter(struct dma_chan *chan, void *param)
 {
 	u8 *peri_id;
@@ -2315,23 +2029,26 @@ static struct dma_chan *of_dma_pl330_xlate(struct of_phandle_args *dma_spec,
 						struct of_dma *ofdma)
 {
 	int count = dma_spec->args_count;
-	struct dma_pl330_dmac *pdmac = ofdma->of_dma_data;
+	struct pl330_dmac *pl330 = ofdma->of_dma_data;
 	unsigned int chan_id;
 
+	if (!pl330)
+		return NULL;
+
 	if (count != 1)
 		return NULL;
 
 	chan_id = dma_spec->args[0];
-	if (chan_id >= pdmac->num_peripherals)
+	if (chan_id >= pl330->num_peripherals)
 		return NULL;
 
-	return dma_get_slave_channel(&pdmac->peripherals[chan_id].chan);
+	return dma_get_slave_channel(&pl330->peripherals[chan_id].chan);
 }
 
 static int pl330_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct dma_pl330_chan *pch = to_pchan(chan);
-	struct dma_pl330_dmac *pdmac = pch->dmac;
+	struct pl330_dmac *pl330 = pch->dmac;
 	unsigned long flags;
 
 	spin_lock_irqsave(&pch->lock, flags);
@@ -2339,8 +2056,8 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan)
 	dma_cookie_init(chan);
 	pch->cyclic = false;
 
-	pch->pl330_chid = pl330_request_channel(&pdmac->pif);
-	if (!pch->pl330_chid) {
+	pch->thread = pl330_request_channel(pl330);
+	if (!pch->thread) {
 		spin_unlock_irqrestore(&pch->lock, flags);
 		return -ENOMEM;
 	}
@@ -2357,7 +2074,7 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned
 	struct dma_pl330_chan *pch = to_pchan(chan);
 	struct dma_pl330_desc *desc;
 	unsigned long flags;
-	struct dma_pl330_dmac *pdmac = pch->dmac;
+	struct pl330_dmac *pl330 = pch->dmac;
 	struct dma_slave_config *slave_config;
 	LIST_HEAD(list);
 
@@ -2365,8 +2082,13 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned
 	case DMA_TERMINATE_ALL:
 		spin_lock_irqsave(&pch->lock, flags);
 
-		/* FLUSH the PL330 Channel thread */
-		pl330_chan_ctrl(pch->pl330_chid, PL330_OP_FLUSH);
+		spin_lock(&pl330->lock);
+		_stop(pch->thread);
+		spin_unlock(&pl330->lock);
+
+		pch->thread->req[0].desc = NULL;
+		pch->thread->req[1].desc = NULL;
+		pch->thread->req_running = -1;
 
 		/* Mark all desc done */
 		list_for_each_entry(desc, &pch->submitted_list, node) {
@@ -2384,9 +2106,9 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned
 			dma_cookie_complete(&desc->txd);
 		}
 
-		list_splice_tail_init(&pch->submitted_list, &pdmac->desc_pool);
-		list_splice_tail_init(&pch->work_list, &pdmac->desc_pool);
-		list_splice_tail_init(&pch->completed_list, &pdmac->desc_pool);
+		list_splice_tail_init(&pch->submitted_list, &pl330->desc_pool);
+		list_splice_tail_init(&pch->work_list, &pl330->desc_pool);
+		list_splice_tail_init(&pch->completed_list, &pl330->desc_pool);
 		spin_unlock_irqrestore(&pch->lock, flags);
 		break;
 	case DMA_SLAVE_CONFIG:
@@ -2409,7 +2131,7 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned
 		}
 		break;
 	default:
-		dev_err(pch->dmac->pif.dev, "Not supported command.\n");
+		dev_err(pch->dmac->ddma.dev, "Not supported command.\n");
 		return -ENXIO;
 	}
 
@@ -2425,8 +2147,8 @@ static void pl330_free_chan_resources(struct dma_chan *chan)
 
 	spin_lock_irqsave(&pch->lock, flags);
 
-	pl330_release_channel(pch->pl330_chid);
-	pch->pl330_chid = NULL;
+	pl330_release_channel(pch->thread);
+	pch->thread = NULL;
 
 	if (pch->cyclic)
 		list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool);
@@ -2489,57 +2211,46 @@ static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx)
 
 static inline void _init_desc(struct dma_pl330_desc *desc)
 {
-	desc->req.x = &desc->px;
-	desc->req.token = desc;
 	desc->rqcfg.swap = SWAP_NO;
-	desc->rqcfg.scctl = SCCTRL0;
-	desc->rqcfg.dcctl = DCCTRL0;
-	desc->req.cfg = &desc->rqcfg;
-	desc->req.xfer_cb = dma_pl330_rqcb;
+	desc->rqcfg.scctl = CCTRL0;
+	desc->rqcfg.dcctl = CCTRL0;
 	desc->txd.tx_submit = pl330_tx_submit;
 
 	INIT_LIST_HEAD(&desc->node);
 }
 
 /* Returns the number of descriptors added to the DMAC pool */
-static int add_desc(struct dma_pl330_dmac *pdmac, gfp_t flg, int count)
+static int add_desc(struct pl330_dmac *pl330, gfp_t flg, int count)
 {
 	struct dma_pl330_desc *desc;
 	unsigned long flags;
 	int i;
 
-	if (!pdmac)
-		return 0;
-
 	desc = kcalloc(count, sizeof(*desc), flg);
 	if (!desc)
 		return 0;
 
-	spin_lock_irqsave(&pdmac->pool_lock, flags);
+	spin_lock_irqsave(&pl330->pool_lock, flags);
 
 	for (i = 0; i < count; i++) {
 		_init_desc(&desc[i]);
-		list_add_tail(&desc[i].node, &pdmac->desc_pool);
+		list_add_tail(&desc[i].node, &pl330->desc_pool);
 	}
 
-	spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+	spin_unlock_irqrestore(&pl330->pool_lock, flags);
 
 	return count;
 }
 
-static struct dma_pl330_desc *
-pluck_desc(struct dma_pl330_dmac *pdmac)
+static struct dma_pl330_desc *pluck_desc(struct pl330_dmac *pl330)
 {
 	struct dma_pl330_desc *desc = NULL;
 	unsigned long flags;
 
-	if (!pdmac)
-		return NULL;
-
-	spin_lock_irqsave(&pdmac->pool_lock, flags);
+	spin_lock_irqsave(&pl330->pool_lock, flags);
 
-	if (!list_empty(&pdmac->desc_pool)) {
-		desc = list_entry(pdmac->desc_pool.next,
+	if (!list_empty(&pl330->desc_pool)) {
+		desc = list_entry(pl330->desc_pool.next,
 				struct dma_pl330_desc, node);
 
 		list_del_init(&desc->node);
@@ -2548,29 +2259,29 @@ pluck_desc(struct dma_pl330_dmac *pdmac)
 		desc->txd.callback = NULL;
 	}
 
-	spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+	spin_unlock_irqrestore(&pl330->pool_lock, flags);
 
 	return desc;
 }
 
 static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
 {
-	struct dma_pl330_dmac *pdmac = pch->dmac;
+	struct pl330_dmac *pl330 = pch->dmac;
 	u8 *peri_id = pch->chan.private;
 	struct dma_pl330_desc *desc;
 
 	/* Pluck one desc from the pool of DMAC */
-	desc = pluck_desc(pdmac);
+	desc = pluck_desc(pl330);
 
 	/* If the DMAC pool is empty, alloc new */
 	if (!desc) {
-		if (!add_desc(pdmac, GFP_ATOMIC, 1))
+		if (!add_desc(pl330, GFP_ATOMIC, 1))
 			return NULL;
 
 		/* Try again */
-		desc = pluck_desc(pdmac);
+		desc = pluck_desc(pl330);
 		if (!desc) {
-			dev_err(pch->dmac->pif.dev,
+			dev_err(pch->dmac->ddma.dev,
 				"%s:%d ALERT!\n", __func__, __LINE__);
 			return NULL;
 		}
@@ -2581,8 +2292,8 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
 	desc->txd.cookie = 0;
 	async_tx_ack(&desc->txd);
 
-	desc->req.peri = peri_id ? pch->chan.chan_id : 0;
-	desc->rqcfg.pcfg = &pch->dmac->pif.pcfg;
+	desc->peri = peri_id ? pch->chan.chan_id : 0;
+	desc->rqcfg.pcfg = &pch->dmac->pcfg;
 
 	dma_async_tx_descriptor_init(&desc->txd, &pch->chan);
 
@@ -2592,7 +2303,6 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
 static inline void fill_px(struct pl330_xfer *px,
 		dma_addr_t dst, dma_addr_t src, size_t len)
 {
-	px->next = NULL;
 	px->bytes = len;
 	px->dst_addr = dst;
 	px->src_addr = src;
@@ -2605,7 +2315,7 @@ __pl330_prep_dma_memcpy(struct dma_pl330_chan *pch, dma_addr_t dst,
 	struct dma_pl330_desc *desc = pl330_get_desc(pch);
 
 	if (!desc) {
-		dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n",
+		dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n",
 			__func__, __LINE__);
 		return NULL;
 	}
@@ -2629,11 +2339,11 @@ __pl330_prep_dma_memcpy(struct dma_pl330_chan *pch, dma_addr_t dst,
 static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
 {
 	struct dma_pl330_chan *pch = desc->pchan;
-	struct pl330_info *pi = &pch->dmac->pif;
+	struct pl330_dmac *pl330 = pch->dmac;
 	int burst_len;
 
-	burst_len = pi->pcfg.data_bus_width / 8;
-	burst_len *= pi->pcfg.data_buf_dep;
+	burst_len = pl330->pcfg.data_bus_width / 8;
+	burst_len *= pl330->pcfg.data_buf_dep;
 	burst_len >>= desc->rqcfg.brst_size;
 
 	/* src/dst_burst_len can't be more than 16 */
@@ -2652,11 +2362,11 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
 static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
 		size_t period_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+		unsigned long flags)
 {
 	struct dma_pl330_desc *desc = NULL, *first = NULL;
 	struct dma_pl330_chan *pch = to_pchan(chan);
-	struct dma_pl330_dmac *pdmac = pch->dmac;
+	struct pl330_dmac *pl330 = pch->dmac;
 	unsigned int i;
 	dma_addr_t dst;
 	dma_addr_t src;
@@ -2665,7 +2375,7 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 		return NULL;
 
 	if (!is_slave_direction(direction)) {
-		dev_err(pch->dmac->pif.dev, "%s:%d Invalid dma direction\n",
+		dev_err(pch->dmac->ddma.dev, "%s:%d Invalid dma direction\n",
 		__func__, __LINE__);
 		return NULL;
 	}
@@ -2673,23 +2383,23 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 	for (i = 0; i < len / period_len; i++) {
 		desc = pl330_get_desc(pch);
 		if (!desc) {
-			dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n",
+			dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n",
 				__func__, __LINE__);
 
 			if (!first)
 				return NULL;
 
-			spin_lock_irqsave(&pdmac->pool_lock, flags);
+			spin_lock_irqsave(&pl330->pool_lock, flags);
 
 			while (!list_empty(&first->node)) {
 				desc = list_entry(first->node.next,
 						struct dma_pl330_desc, node);
-				list_move_tail(&desc->node, &pdmac->desc_pool);
+				list_move_tail(&desc->node, &pl330->desc_pool);
 			}
 
-			list_move_tail(&first->node, &pdmac->desc_pool);
+			list_move_tail(&first->node, &pl330->desc_pool);
 
-			spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+			spin_unlock_irqrestore(&pl330->pool_lock, flags);
 
 			return NULL;
 		}
@@ -2698,14 +2408,12 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 		case DMA_MEM_TO_DEV:
 			desc->rqcfg.src_inc = 1;
 			desc->rqcfg.dst_inc = 0;
-			desc->req.rqtype = MEMTODEV;
 			src = dma_addr;
 			dst = pch->fifo_addr;
 			break;
 		case DMA_DEV_TO_MEM:
 			desc->rqcfg.src_inc = 0;
 			desc->rqcfg.dst_inc = 1;
-			desc->req.rqtype = DEVTOMEM;
 			src = pch->fifo_addr;
 			dst = dma_addr;
 			break;
@@ -2713,6 +2421,7 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 			break;
 		}
 
+		desc->rqtype = direction;
 		desc->rqcfg.brst_size = pch->burst_sz;
 		desc->rqcfg.brst_len = 1;
 		fill_px(&desc->px, dst, src, period_len);
@@ -2740,24 +2449,22 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 {
 	struct dma_pl330_desc *desc;
 	struct dma_pl330_chan *pch = to_pchan(chan);
-	struct pl330_info *pi;
+	struct pl330_dmac *pl330 = pch->dmac;
 	int burst;
 
 	if (unlikely(!pch || !len))
 		return NULL;
 
-	pi = &pch->dmac->pif;
-
 	desc = __pl330_prep_dma_memcpy(pch, dst, src, len);
 	if (!desc)
 		return NULL;
 
 	desc->rqcfg.src_inc = 1;
 	desc->rqcfg.dst_inc = 1;
-	desc->req.rqtype = MEMTOMEM;
+	desc->rqtype = DMA_MEM_TO_MEM;
 
 	/* Select max possible burst size */
-	burst = pi->pcfg.data_bus_width / 8;
+	burst = pl330->pcfg.data_bus_width / 8;
 
 	while (burst > 1) {
 		if (!(len % burst))
@@ -2776,7 +2483,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 	return &desc->txd;
 }
 
-static void __pl330_giveback_desc(struct dma_pl330_dmac *pdmac,
+static void __pl330_giveback_desc(struct pl330_dmac *pl330,
 				  struct dma_pl330_desc *first)
 {
 	unsigned long flags;
@@ -2785,17 +2492,17 @@ static void __pl330_giveback_desc(struct dma_pl330_dmac *pdmac,
 	if (!first)
 		return;
 
-	spin_lock_irqsave(&pdmac->pool_lock, flags);
+	spin_lock_irqsave(&pl330->pool_lock, flags);
 
 	while (!list_empty(&first->node)) {
 		desc = list_entry(first->node.next,
 				struct dma_pl330_desc, node);
-		list_move_tail(&desc->node, &pdmac->desc_pool);
+		list_move_tail(&desc->node, &pl330->desc_pool);
 	}
 
-	list_move_tail(&first->node, &pdmac->desc_pool);
+	list_move_tail(&first->node, &pl330->desc_pool);
 
-	spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+	spin_unlock_irqrestore(&pl330->pool_lock, flags);
 }
 
 static struct dma_async_tx_descriptor *
@@ -2820,12 +2527,12 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 		desc = pl330_get_desc(pch);
 		if (!desc) {
-			struct dma_pl330_dmac *pdmac = pch->dmac;
+			struct pl330_dmac *pl330 = pch->dmac;
 
-			dev_err(pch->dmac->pif.dev,
+			dev_err(pch->dmac->ddma.dev,
 				"%s:%d Unable to fetch desc\n",
 				__func__, __LINE__);
-			__pl330_giveback_desc(pdmac, first);
+			__pl330_giveback_desc(pl330, first);
 
 			return NULL;
 		}
@@ -2838,19 +2545,18 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		if (direction == DMA_MEM_TO_DEV) {
 			desc->rqcfg.src_inc = 1;
 			desc->rqcfg.dst_inc = 0;
-			desc->req.rqtype = MEMTODEV;
 			fill_px(&desc->px,
 				addr, sg_dma_address(sg), sg_dma_len(sg));
 		} else {
 			desc->rqcfg.src_inc = 0;
 			desc->rqcfg.dst_inc = 1;
-			desc->req.rqtype = DEVTOMEM;
 			fill_px(&desc->px,
 				sg_dma_address(sg), addr, sg_dma_len(sg));
 		}
 
 		desc->rqcfg.brst_size = pch->burst_sz;
 		desc->rqcfg.brst_len = 1;
+		desc->rqtype = direction;
 	}
 
 	/* Return the last desc in the chain */
@@ -2890,9 +2596,9 @@ static int
 pl330_probe(struct amba_device *adev, const struct amba_id *id)
 {
 	struct dma_pl330_platdata *pdat;
-	struct dma_pl330_dmac *pdmac;
+	struct pl330_config *pcfg;
+	struct pl330_dmac *pl330;
 	struct dma_pl330_chan *pch, *_p;
-	struct pl330_info *pi;
 	struct dma_device *pd;
 	struct resource *res;
 	int i, ret, irq;
@@ -2905,30 +2611,27 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		return ret;
 
 	/* Allocate a new DMAC and its Channels */
-	pdmac = devm_kzalloc(&adev->dev, sizeof(*pdmac), GFP_KERNEL);
-	if (!pdmac) {
+	pl330 = devm_kzalloc(&adev->dev, sizeof(*pl330), GFP_KERNEL);
+	if (!pl330) {
 		dev_err(&adev->dev, "unable to allocate mem\n");
 		return -ENOMEM;
 	}
 
-	pi = &pdmac->pif;
-	pi->dev = &adev->dev;
-	pi->pl330_data = NULL;
-	pi->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
+	pl330->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
 
 	res = &adev->res;
-	pi->base = devm_ioremap_resource(&adev->dev, res);
-	if (IS_ERR(pi->base))
-		return PTR_ERR(pi->base);
+	pl330->base = devm_ioremap_resource(&adev->dev, res);
+	if (IS_ERR(pl330->base))
+		return PTR_ERR(pl330->base);
 
-	amba_set_drvdata(adev, pdmac);
+	amba_set_drvdata(adev, pl330);
 
 	for (i = 0; i < AMBA_NR_IRQS; i++) {
 		irq = adev->irq[i];
 		if (irq) {
 			ret = devm_request_irq(&adev->dev, irq,
 					       pl330_irq_handler, 0,
-					       dev_name(&adev->dev), pi);
+					       dev_name(&adev->dev), pl330);
 			if (ret)
 				return ret;
 		} else {
@@ -2936,38 +2639,40 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		}
 	}
 
-	pi->pcfg.periph_id = adev->periphid;
-	ret = pl330_add(pi);
+	pcfg = &pl330->pcfg;
+
+	pcfg->periph_id = adev->periphid;
+	ret = pl330_add(pl330);
 	if (ret)
 		return ret;
 
-	INIT_LIST_HEAD(&pdmac->desc_pool);
-	spin_lock_init(&pdmac->pool_lock);
+	INIT_LIST_HEAD(&pl330->desc_pool);
+	spin_lock_init(&pl330->pool_lock);
 
 	/* Create a descriptor pool of default size */
-	if (!add_desc(pdmac, GFP_KERNEL, NR_DEFAULT_DESC))
+	if (!add_desc(pl330, GFP_KERNEL, NR_DEFAULT_DESC))
 		dev_warn(&adev->dev, "unable to allocate desc\n");
 
-	pd = &pdmac->ddma;
+	pd = &pl330->ddma;
 	INIT_LIST_HEAD(&pd->channels);
 
 	/* Initialize channel parameters */
 	if (pdat)
-		num_chan = max_t(int, pdat->nr_valid_peri, pi->pcfg.num_chan);
+		num_chan = max_t(int, pdat->nr_valid_peri, pcfg->num_chan);
 	else
-		num_chan = max_t(int, pi->pcfg.num_peri, pi->pcfg.num_chan);
+		num_chan = max_t(int, pcfg->num_peri, pcfg->num_chan);
 
-	pdmac->num_peripherals = num_chan;
+	pl330->num_peripherals = num_chan;
 
-	pdmac->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL);
-	if (!pdmac->peripherals) {
+	pl330->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL);
+	if (!pl330->peripherals) {
 		ret = -ENOMEM;
-		dev_err(&adev->dev, "unable to allocate pdmac->peripherals\n");
+		dev_err(&adev->dev, "unable to allocate pl330->peripherals\n");
 		goto probe_err2;
 	}
 
 	for (i = 0; i < num_chan; i++) {
-		pch = &pdmac->peripherals[i];
+		pch = &pl330->peripherals[i];
 		if (!adev->dev.of_node)
 			pch->chan.private = pdat ? &pdat->peri_id[i] : NULL;
 		else
@@ -2977,9 +2682,9 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		INIT_LIST_HEAD(&pch->work_list);
 		INIT_LIST_HEAD(&pch->completed_list);
 		spin_lock_init(&pch->lock);
-		pch->pl330_chid = NULL;
+		pch->thread = NULL;
 		pch->chan.device = pd;
-		pch->dmac = pdmac;
+		pch->dmac = pl330;
 
 		/* Add the channel to the DMAC list */
 		list_add_tail(&pch->chan.device_node, &pd->channels);
@@ -2990,7 +2695,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		pd->cap_mask = pdat->cap_mask;
 	} else {
 		dma_cap_set(DMA_MEMCPY, pd->cap_mask);
-		if (pi->pcfg.num_peri) {
+		if (pcfg->num_peri) {
 			dma_cap_set(DMA_SLAVE, pd->cap_mask);
 			dma_cap_set(DMA_CYCLIC, pd->cap_mask);
 			dma_cap_set(DMA_PRIVATE, pd->cap_mask);
@@ -3015,14 +2720,14 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 
 	if (adev->dev.of_node) {
 		ret = of_dma_controller_register(adev->dev.of_node,
-					 of_dma_pl330_xlate, pdmac);
+					 of_dma_pl330_xlate, pl330);
 		if (ret) {
 			dev_err(&adev->dev,
 			"unable to register DMA to the generic DT DMA helpers\n");
 		}
 	}
 
-	adev->dev.dma_parms = &pdmac->dma_parms;
+	adev->dev.dma_parms = &pl330->dma_parms;
 
 	/*
 	 * This is the limit for transfers with a buswidth of 1, larger
@@ -3037,14 +2742,13 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		"Loaded driver for PL330 DMAC-%d\n", adev->periphid);
 	dev_info(&adev->dev,
 		"\tDBUFF-%ux%ubytes Num_Chans-%u Num_Peri-%u Num_Events-%u\n",
-		pi->pcfg.data_buf_dep,
-		pi->pcfg.data_bus_width / 8, pi->pcfg.num_chan,
-		pi->pcfg.num_peri, pi->pcfg.num_events);
+		pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan,
+		pcfg->num_peri, pcfg->num_events);
 
 	return 0;
 probe_err3:
 	/* Idle the DMAC */
-	list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels,
+	list_for_each_entry_safe(pch, _p, &pl330->ddma.channels,
 			chan.device_node) {
 
 		/* Remove the channel */
@@ -3055,27 +2759,23 @@ probe_err3:
 		pl330_free_chan_resources(&pch->chan);
 	}
 probe_err2:
-	pl330_del(pi);
+	pl330_del(pl330);
 
 	return ret;
 }
 
 static int pl330_remove(struct amba_device *adev)
 {
-	struct dma_pl330_dmac *pdmac = amba_get_drvdata(adev);
+	struct pl330_dmac *pl330 = amba_get_drvdata(adev);
 	struct dma_pl330_chan *pch, *_p;
-	struct pl330_info *pi;
-
-	if (!pdmac)
-		return 0;
 
 	if (adev->dev.of_node)
 		of_dma_controller_free(adev->dev.of_node);
 
-	dma_async_device_unregister(&pdmac->ddma);
+	dma_async_device_unregister(&pl330->ddma);
 
 	/* Idle the DMAC */
-	list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels,
+	list_for_each_entry_safe(pch, _p, &pl330->ddma.channels,
 			chan.device_node) {
 
 		/* Remove the channel */
@@ -3086,9 +2786,7 @@ static int pl330_remove(struct amba_device *adev)
 		pl330_free_chan_resources(&pch->chan);
 	}
 
-	pi = &pdmac->pif;
-
-	pl330_del(pi);
+	pl330_del(pl330);
 
 	return 0;
 }
diff --git a/drivers/dma/qcom_bam_dma.c b/drivers/dma/qcom_bam_dma.c
index 82c9231..7a4bbb0 100644
--- a/drivers/dma/qcom_bam_dma.c
+++ b/drivers/dma/qcom_bam_dma.c
@@ -61,12 +61,17 @@ struct bam_desc_hw {
 #define DESC_FLAG_INT BIT(15)
 #define DESC_FLAG_EOT BIT(14)
 #define DESC_FLAG_EOB BIT(13)
+#define DESC_FLAG_NWD BIT(12)
 
 struct bam_async_desc {
 	struct virt_dma_desc vd;
 
 	u32 num_desc;
 	u32 xfer_len;
+
+	/* transaction flags, EOT|EOB|NWD */
+	u16 flags;
+
 	struct bam_desc_hw *curr_desc;
 
 	enum dma_transfer_direction dir;
@@ -490,6 +495,14 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
 	if (!async_desc)
 		goto err_out;
 
+	if (flags & DMA_PREP_FENCE)
+		async_desc->flags |= DESC_FLAG_NWD;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		async_desc->flags |= DESC_FLAG_EOT;
+	else
+		async_desc->flags |= DESC_FLAG_INT;
+
 	async_desc->num_desc = num_alloc;
 	async_desc->curr_desc = async_desc->desc;
 	async_desc->dir = direction;
@@ -793,8 +806,11 @@ static void bam_start_dma(struct bam_chan *bchan)
 	else
 		async_desc->xfer_len = async_desc->num_desc;
 
-	/* set INT on last descriptor */
-	desc[async_desc->xfer_len - 1].flags |= DESC_FLAG_INT;
+	/* set any special flags on the last descriptor */
+	if (async_desc->num_desc == async_desc->xfer_len)
+		desc[async_desc->xfer_len - 1].flags = async_desc->flags;
+	else
+		desc[async_desc->xfer_len - 1].flags |= DESC_FLAG_INT;
 
 	if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
 		u32 partial = MAX_DESCRIPTORS - bchan->tail;
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
index 012520c..7416572 100644
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -889,8 +889,7 @@ static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy(
 
 static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
-	enum dma_transfer_direction direction, unsigned long flags,
-	void *context)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
 	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c
index 5ebdfbc..4b0ef04 100644
--- a/drivers/dma/sa11x0-dma.c
+++ b/drivers/dma/sa11x0-dma.c
@@ -612,7 +612,7 @@ static struct dma_async_tx_descriptor *sa11x0_dma_prep_slave_sg(
 
 static struct dma_async_tx_descriptor *sa11x0_dma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
-	enum dma_transfer_direction dir, unsigned long flags, void *context)
+	enum dma_transfer_direction dir, unsigned long flags)
 {
 	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
 	struct sa11x0_dma_desc *txd;
diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig
index 0f719816..0349125 100644
--- a/drivers/dma/sh/Kconfig
+++ b/drivers/dma/sh/Kconfig
@@ -2,21 +2,39 @@
 # DMA engine configuration for sh
 #
 
+#
+# DMA Engine Helpers
+#
+
 config SH_DMAE_BASE
 	bool "Renesas SuperH DMA Engine support"
-	depends on (SUPERH && SH_DMA) || ARCH_SHMOBILE || COMPILE_TEST
+	depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST
+	depends on !SUPERH || SH_DMA
 	depends on !SH_DMA_API
 	default y
 	select DMA_ENGINE
 	help
 	  Enable support for the Renesas SuperH DMA controllers.
 
+#
+# DMA Controllers
+#
+
 config SH_DMAE
 	tristate "Renesas SuperH DMAC support"
 	depends on SH_DMAE_BASE
 	help
 	  Enable support for the Renesas SuperH DMA controllers.
 
+if SH_DMAE
+
+config SH_DMAE_R8A73A4
+	def_bool y
+	depends on ARCH_R8A73A4
+	depends on OF
+
+endif
+
 config SUDMAC
 	tristate "Renesas SUDMAC support"
 	depends on SH_DMAE_BASE
@@ -34,7 +52,3 @@ config RCAR_AUDMAC_PP
 	depends on SH_DMAE_BASE
 	help
 	  Enable support for the Renesas R-Car Audio DMAC Peripheral Peripheral controllers.
-
-config SHDMA_R8A73A4
-	def_bool y
-	depends on ARCH_R8A73A4 && SH_DMAE != n
diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile
index 1ce88b2..0a5cfdb 100644
--- a/drivers/dma/sh/Makefile
+++ b/drivers/dma/sh/Makefile
@@ -1,10 +1,18 @@
+#
+# DMA Engine Helpers
+#
+
 obj-$(CONFIG_SH_DMAE_BASE) += shdma-base.o shdma-of.o
-obj-$(CONFIG_SH_DMAE) += shdma.o
+
+#
+# DMA Controllers
+#
+
 shdma-y := shdmac.o
-ifeq ($(CONFIG_OF),y)
-shdma-$(CONFIG_SHDMA_R8A73A4) += shdma-r8a73a4.o
-endif
+shdma-$(CONFIG_SH_DMAE_R8A73A4) += shdma-r8a73a4.o
 shdma-objs := $(shdma-y)
+obj-$(CONFIG_SH_DMAE) += shdma.o
+
 obj-$(CONFIG_SUDMAC) += sudmac.o
 obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o
 obj-$(CONFIG_RCAR_AUDMAC_PP) += rcar-audmapp.o
diff --git a/drivers/dma/sh/rcar-audmapp.c b/drivers/dma/sh/rcar-audmapp.c
index 2de7728..dabbf0a 100644
--- a/drivers/dma/sh/rcar-audmapp.c
+++ b/drivers/dma/sh/rcar-audmapp.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/dmaengine.h>
+#include <linux/of_dma.h>
 #include <linux/platform_data/dma-rcar-audmapp.h>
 #include <linux/platform_device.h>
 #include <linux/shdma-base.h>
@@ -45,8 +46,9 @@
 
 struct audmapp_chan {
 	struct shdma_chan shdma_chan;
-	struct audmapp_slave_config *config;
 	void __iomem *base;
+	dma_addr_t slave_addr;
+	u32 chcr;
 };
 
 struct audmapp_device {
@@ -56,7 +58,16 @@ struct audmapp_device {
 	void __iomem *chan_reg;
 };
 
+struct audmapp_desc {
+	struct shdma_desc shdma_desc;
+	dma_addr_t src;
+	dma_addr_t dst;
+};
+
+#define to_shdma_chan(c) container_of(c, struct shdma_chan, dma_chan)
+
 #define to_chan(chan) container_of(chan, struct audmapp_chan, shdma_chan)
+#define to_desc(sdesc) container_of(sdesc, struct audmapp_desc, shdma_desc)
 #define to_dev(chan) container_of(chan->shdma_chan.dma_chan.device,	\
 				  struct audmapp_device, shdma_dev.dma_dev)
 
@@ -90,70 +101,82 @@ static void audmapp_halt(struct shdma_chan *schan)
 }
 
 static void audmapp_start_xfer(struct shdma_chan *schan,
-			       struct shdma_desc *sdecs)
+			       struct shdma_desc *sdesc)
 {
 	struct audmapp_chan *auchan = to_chan(schan);
 	struct audmapp_device *audev = to_dev(auchan);
-	struct audmapp_slave_config *cfg = auchan->config;
+	struct audmapp_desc *desc = to_desc(sdesc);
 	struct device *dev = audev->dev;
-	u32 chcr = cfg->chcr | PDMACHCR_DE;
+	u32 chcr = auchan->chcr | PDMACHCR_DE;
 
-	dev_dbg(dev, "src/dst/chcr = %pad/%pad/%x\n",
-		&cfg->src, &cfg->dst, cfg->chcr);
+	dev_dbg(dev, "src/dst/chcr = %pad/%pad/%08x\n",
+		&desc->src, &desc->dst, chcr);
 
-	audmapp_write(auchan, cfg->src,	PDMASAR);
-	audmapp_write(auchan, cfg->dst,	PDMADAR);
+	audmapp_write(auchan, desc->src,	PDMASAR);
+	audmapp_write(auchan, desc->dst,	PDMADAR);
 	audmapp_write(auchan, chcr,	PDMACHCR);
 }
 
-static struct audmapp_slave_config *
-audmapp_find_slave(struct audmapp_chan *auchan, int slave_id)
+static void audmapp_get_config(struct audmapp_chan *auchan, int slave_id,
+			      u32 *chcr, dma_addr_t *dst)
 {
 	struct audmapp_device *audev = to_dev(auchan);
 	struct audmapp_pdata *pdata = audev->pdata;
 	struct audmapp_slave_config *cfg;
 	int i;
 
+	*chcr	= 0;
+	*dst	= 0;
+
+	if (!pdata) { /* DT */
+		*chcr = ((u32)slave_id) << 16;
+		auchan->shdma_chan.slave_id = (slave_id) >> 8;
+		return;
+	}
+
+	/* non-DT */
+
 	if (slave_id >= AUDMAPP_SLAVE_NUMBER)
-		return NULL;
+		return;
 
 	for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++)
-		if (cfg->slave_id == slave_id)
-			return cfg;
-
-	return NULL;
+		if (cfg->slave_id == slave_id) {
+			*chcr	= cfg->chcr;
+			*dst	= cfg->dst;
+			break;
+		}
 }
 
 static int audmapp_set_slave(struct shdma_chan *schan, int slave_id,
 			     dma_addr_t slave_addr, bool try)
 {
 	struct audmapp_chan *auchan = to_chan(schan);
-	struct audmapp_slave_config *cfg =
-		audmapp_find_slave(auchan, slave_id);
+	u32 chcr;
+	dma_addr_t dst;
+
+	audmapp_get_config(auchan, slave_id, &chcr, &dst);
 
-	if (!cfg)
-		return -ENODEV;
 	if (try)
 		return 0;
 
-	auchan->config	= cfg;
+	auchan->chcr		= chcr;
+	auchan->slave_addr	= slave_addr ? : dst;
 
 	return 0;
 }
 
 static int audmapp_desc_setup(struct shdma_chan *schan,
-			      struct shdma_desc *sdecs,
+			      struct shdma_desc *sdesc,
 			      dma_addr_t src, dma_addr_t dst, size_t *len)
 {
-	struct audmapp_chan *auchan = to_chan(schan);
-	struct audmapp_slave_config *cfg = auchan->config;
-
-	if (!cfg)
-		return -ENODEV;
+	struct audmapp_desc *desc = to_desc(sdesc);
 
 	if (*len > (size_t)AUDMAPP_LEN_MAX)
 		*len = (size_t)AUDMAPP_LEN_MAX;
 
+	desc->src = src;
+	desc->dst = dst;
+
 	return 0;
 }
 
@@ -164,7 +187,9 @@ static void audmapp_setup_xfer(struct shdma_chan *schan,
 
 static dma_addr_t audmapp_slave_addr(struct shdma_chan *schan)
 {
-	return 0; /* always fixed address */
+	struct audmapp_chan *auchan = to_chan(schan);
+
+	return auchan->slave_addr;
 }
 
 static bool audmapp_channel_busy(struct shdma_chan *schan)
@@ -183,7 +208,7 @@ static bool audmapp_desc_completed(struct shdma_chan *schan,
 
 static struct shdma_desc *audmapp_embedded_desc(void *buf, int i)
 {
-	return &((struct shdma_desc *)buf)[i];
+	return &((struct audmapp_desc *)buf)[i].shdma_desc;
 }
 
 static const struct shdma_ops audmapp_shdma_ops = {
@@ -234,16 +259,39 @@ static void audmapp_chan_remove(struct audmapp_device *audev)
 	dma_dev->chancnt = 0;
 }
 
+static struct dma_chan *audmapp_of_xlate(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	dma_cap_mask_t mask;
+	struct dma_chan *chan;
+	u32 chcr = dma_spec->args[0];
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	chan = dma_request_channel(mask, shdma_chan_filter, NULL);
+	if (chan)
+		to_shdma_chan(chan)->hw_req = chcr;
+
+	return chan;
+}
+
 static int audmapp_probe(struct platform_device *pdev)
 {
 	struct audmapp_pdata *pdata = pdev->dev.platform_data;
+	struct device_node *np = pdev->dev.of_node;
 	struct audmapp_device *audev;
 	struct shdma_dev *sdev;
 	struct dma_device *dma_dev;
 	struct resource *res;
 	int err, i;
 
-	if (!pdata)
+	if (np)
+		of_dma_controller_register(np, audmapp_of_xlate, pdev);
+	else if (!pdata)
 		return -ENODEV;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -260,7 +308,7 @@ static int audmapp_probe(struct platform_device *pdev)
 
 	sdev		= &audev->shdma_dev;
 	sdev->ops	= &audmapp_shdma_ops;
-	sdev->desc_size	= sizeof(struct shdma_desc);
+	sdev->desc_size	= sizeof(struct audmapp_desc);
 
 	dma_dev			= &sdev->dma_dev;
 	dma_dev->copy_align	= LOG2_DEFAULT_XFER_SIZE;
@@ -305,12 +353,18 @@ static int audmapp_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct of_device_id audmapp_of_match[] = {
+	{ .compatible = "renesas,rcar-audmapp", },
+	{},
+};
+
 static struct platform_driver audmapp_driver = {
 	.probe		= audmapp_probe,
 	.remove		= audmapp_remove,
 	.driver		= {
 		.owner	= THIS_MODULE,
 		.name	= "rcar-audmapp-engine",
+		.of_match_table = audmapp_of_match,
 	},
 };
 module_platform_driver(audmapp_driver);
diff --git a/drivers/dma/sh/shdma-arm.h b/drivers/dma/sh/shdma-arm.h
index a2b8258..a1b0ef4 100644
--- a/drivers/dma/sh/shdma-arm.h
+++ b/drivers/dma/sh/shdma-arm.h
@@ -45,7 +45,7 @@ enum {
 	((((i) & TS_LOW_BIT) << TS_LOW_SHIFT) |\
 	 (((i) & TS_HI_BIT)  << TS_HI_SHIFT))
 
-#define CHCR_TX(xmit_sz) (DM_FIX | SM_INC | 0x800 | TS_INDEX2VAL((xmit_sz)))
-#define CHCR_RX(xmit_sz) (DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL((xmit_sz)))
+#define CHCR_TX(xmit_sz) (DM_FIX | SM_INC | RS_ERS | TS_INDEX2VAL((xmit_sz)))
+#define CHCR_RX(xmit_sz) (DM_INC | SM_FIX | RS_ERS | TS_INDEX2VAL((xmit_sz)))
 
 #endif
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index b35007e..42d4974 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -206,45 +206,6 @@ static int shdma_setup_slave(struct shdma_chan *schan, int slave_id,
 	return 0;
 }
 
-/*
- * This is the standard shdma filter function to be used as a replacement to the
- * "old" method, using the .private pointer. If for some reason you allocate a
- * channel without slave data, use something like ERR_PTR(-EINVAL) as a filter
- * parameter. If this filter is used, the slave driver, after calling
- * dma_request_channel(), will also have to call dmaengine_slave_config() with
- * .slave_id, .direction, and either .src_addr or .dst_addr set.
- * NOTE: this filter doesn't support multiple DMAC drivers with the DMA_SLAVE
- * capability! If this becomes a requirement, hardware glue drivers, using this
- * services would have to provide their own filters, which first would check
- * the device driver, similar to how other DMAC drivers, e.g., sa11x0-dma.c, do
- * this, and only then, in case of a match, call this common filter.
- * NOTE 2: This filter function is also used in the DT case by shdma_of_xlate().
- * In that case the MID-RID value is used for slave channel filtering and is
- * passed to this function in the "arg" parameter.
- */
-bool shdma_chan_filter(struct dma_chan *chan, void *arg)
-{
-	struct shdma_chan *schan = to_shdma_chan(chan);
-	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
-	const struct shdma_ops *ops = sdev->ops;
-	int match = (long)arg;
-	int ret;
-
-	if (match < 0)
-		/* No slave requested - arbitrary channel */
-		return true;
-
-	if (!schan->dev->of_node && match >= slave_num)
-		return false;
-
-	ret = ops->set_slave(schan, match, 0, true);
-	if (ret < 0)
-		return false;
-
-	return true;
-}
-EXPORT_SYMBOL(shdma_chan_filter);
-
 static int shdma_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct shdma_chan *schan = to_shdma_chan(chan);
@@ -295,6 +256,51 @@ esetslave:
 	return ret;
 }
 
+/*
+ * This is the standard shdma filter function to be used as a replacement to the
+ * "old" method, using the .private pointer. If for some reason you allocate a
+ * channel without slave data, use something like ERR_PTR(-EINVAL) as a filter
+ * parameter. If this filter is used, the slave driver, after calling
+ * dma_request_channel(), will also have to call dmaengine_slave_config() with
+ * .slave_id, .direction, and either .src_addr or .dst_addr set.
+ * NOTE: this filter doesn't support multiple DMAC drivers with the DMA_SLAVE
+ * capability! If this becomes a requirement, hardware glue drivers, using this
+ * services would have to provide their own filters, which first would check
+ * the device driver, similar to how other DMAC drivers, e.g., sa11x0-dma.c, do
+ * this, and only then, in case of a match, call this common filter.
+ * NOTE 2: This filter function is also used in the DT case by shdma_of_xlate().
+ * In that case the MID-RID value is used for slave channel filtering and is
+ * passed to this function in the "arg" parameter.
+ */
+bool shdma_chan_filter(struct dma_chan *chan, void *arg)
+{
+	struct shdma_chan *schan;
+	struct shdma_dev *sdev;
+	int match = (long)arg;
+	int ret;
+
+	/* Only support channels handled by this driver. */
+	if (chan->device->device_alloc_chan_resources !=
+	    shdma_alloc_chan_resources)
+		return false;
+
+	if (match < 0)
+		/* No slave requested - arbitrary channel */
+		return true;
+
+	schan = to_shdma_chan(chan);
+	if (!schan->dev->of_node && match >= slave_num)
+		return false;
+
+	sdev = to_shdma_dev(schan->dma_chan.device);
+	ret = sdev->ops->set_slave(schan, match, 0, true);
+	if (ret < 0)
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL(shdma_chan_filter);
+
 static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all)
 {
 	struct shdma_desc *desc, *_desc;
@@ -662,15 +668,16 @@ static struct dma_async_tx_descriptor *shdma_prep_slave_sg(
 static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic(
 	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long flags, void *context)
+	unsigned long flags)
 {
 	struct shdma_chan *schan = to_shdma_chan(chan);
 	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	struct dma_async_tx_descriptor *desc;
 	const struct shdma_ops *ops = sdev->ops;
 	unsigned int sg_len = buf_len / period_len;
 	int slave_id = schan->slave_id;
 	dma_addr_t slave_addr;
-	struct scatterlist sgl[SHDMA_MAX_SG_LEN];
+	struct scatterlist *sgl;
 	int i;
 
 	if (!chan)
@@ -694,7 +701,16 @@ static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic(
 
 	slave_addr = ops->slave_addr(schan);
 
+	/*
+	 * Allocate the sg list dynamically as it would consumer too much stack
+	 * space.
+	 */
+	sgl = kcalloc(sg_len, sizeof(*sgl), GFP_KERNEL);
+	if (!sgl)
+		return NULL;
+
 	sg_init_table(sgl, sg_len);
+
 	for (i = 0; i < sg_len; i++) {
 		dma_addr_t src = buf_addr + (period_len * i);
 
@@ -704,8 +720,11 @@ static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic(
 		sg_dma_len(&sgl[i]) = period_len;
 	}
 
-	return shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
+	desc = shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
 			     direction, flags, true);
+
+	kfree(sgl);
+	return desc;
 }
 
 static int shdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h
index 758a57b..2c0a969 100644
--- a/drivers/dma/sh/shdma.h
+++ b/drivers/dma/sh/shdma.h
@@ -62,7 +62,7 @@ struct sh_dmae_desc {
 #define to_sh_dev(chan) container_of(chan->shdma_chan.dma_chan.device,\
 				     struct sh_dmae_device, shdma_dev.dma_dev)
 
-#ifdef CONFIG_SHDMA_R8A73A4
+#ifdef CONFIG_SH_DMAE_R8A73A4
 extern const struct sh_dmae_pdata r8a73a4_dma_pdata;
 #define r8a73a4_shdma_devid (&r8a73a4_dma_pdata)
 #else
diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c
index 146d5df..58eb857 100644
--- a/drivers/dma/sh/shdmac.c
+++ b/drivers/dma/sh/shdmac.c
@@ -38,12 +38,12 @@
 #include "../dmaengine.h"
 #include "shdma.h"
 
-/* DMA register */
-#define SAR	0x00
-#define DAR	0x04
-#define TCR	0x08
-#define CHCR	0x0C
-#define DMAOR	0x40
+/* DMA registers */
+#define SAR	0x00	/* Source Address Register */
+#define DAR	0x04	/* Destination Address Register */
+#define TCR	0x08	/* Transfer Count Register */
+#define CHCR	0x0C	/* Channel Control Register */
+#define DMAOR	0x40	/* DMA Operation Register */
 
 #define TEND	0x18 /* USB-DMAC */
 
@@ -239,9 +239,8 @@ static void dmae_init(struct sh_dmae_chan *sh_chan)
 {
 	/*
 	 * Default configuration for dual address memory-memory transfer.
-	 * 0x400 represents auto-request.
 	 */
-	u32 chcr = DM_INC | SM_INC | 0x400 | log2size_to_chcr(sh_chan,
+	u32 chcr = DM_INC | SM_INC | RS_AUTO | log2size_to_chcr(sh_chan,
 						   LOG2_DEFAULT_XFER_SIZE);
 	sh_chan->xmit_shift = calc_xmit_shift(sh_chan, chcr);
 	chcr_write(sh_chan, chcr);
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index 03f7820..aac03ab 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -580,7 +580,7 @@ err_dir:
 static struct dma_async_tx_descriptor *
 sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr,
 	size_t buf_len, size_t period_len,
-	enum dma_transfer_direction direction, unsigned long flags, void *context)
+	enum dma_transfer_direction direction, unsigned long flags)
 {
 	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
 	struct sirfsoc_dma_desc *sdesc = NULL;
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index c798445..5fe5933 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2531,8 +2531,7 @@ d40_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 static struct dma_async_tx_descriptor *
 dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 		     size_t buf_len, size_t period_len,
-		     enum dma_transfer_direction direction, unsigned long flags,
-		     void *context)
+		     enum dma_transfer_direction direction, unsigned long flags)
 {
 	unsigned int periods = buf_len / period_len;
 	struct dma_async_tx_descriptor *txd;
diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c
new file mode 100644
index 0000000..1f92a56
--- /dev/null
+++ b/drivers/dma/sun6i-dma.c
@@ -0,0 +1,1053 @@
+/*
+ * Copyright (C) 2013-2014 Allwinner Tech Co., Ltd
+ * Author: Sugar <shuge@allwinnertech.com>
+ *
+ * Copyright (C) 2014 Maxime Ripard
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "virt-dma.h"
+
+/*
+ * There's 16 physical channels that can work in parallel.
+ *
+ * However we have 30 different endpoints for our requests.
+ *
+ * Since the channels are able to handle only an unidirectional
+ * transfer, we need to allocate more virtual channels so that
+ * everyone can grab one channel.
+ *
+ * Some devices can't work in both direction (mostly because it
+ * wouldn't make sense), so we have a bit fewer virtual channels than
+ * 2 channels per endpoints.
+ */
+
+#define NR_MAX_CHANNELS		16
+#define NR_MAX_REQUESTS		30
+#define NR_MAX_VCHANS		53
+
+/*
+ * Common registers
+ */
+#define DMA_IRQ_EN(x)		((x) * 0x04)
+#define DMA_IRQ_HALF			BIT(0)
+#define DMA_IRQ_PKG			BIT(1)
+#define DMA_IRQ_QUEUE			BIT(2)
+
+#define DMA_IRQ_CHAN_NR			8
+#define DMA_IRQ_CHAN_WIDTH		4
+
+
+#define DMA_IRQ_STAT(x)		((x) * 0x04 + 0x10)
+
+#define DMA_STAT		0x30
+
+/*
+ * Channels specific registers
+ */
+#define DMA_CHAN_ENABLE		0x00
+#define DMA_CHAN_ENABLE_START		BIT(0)
+#define DMA_CHAN_ENABLE_STOP		0
+
+#define DMA_CHAN_PAUSE		0x04
+#define DMA_CHAN_PAUSE_PAUSE		BIT(1)
+#define DMA_CHAN_PAUSE_RESUME		0
+
+#define DMA_CHAN_LLI_ADDR	0x08
+
+#define DMA_CHAN_CUR_CFG	0x0c
+#define DMA_CHAN_CFG_SRC_DRQ(x)		((x) & 0x1f)
+#define DMA_CHAN_CFG_SRC_IO_MODE	BIT(5)
+#define DMA_CHAN_CFG_SRC_LINEAR_MODE	(0 << 5)
+#define DMA_CHAN_CFG_SRC_BURST(x)	(((x) & 0x3) << 7)
+#define DMA_CHAN_CFG_SRC_WIDTH(x)	(((x) & 0x3) << 9)
+
+#define DMA_CHAN_CFG_DST_DRQ(x)		(DMA_CHAN_CFG_SRC_DRQ(x) << 16)
+#define DMA_CHAN_CFG_DST_IO_MODE	(DMA_CHAN_CFG_SRC_IO_MODE << 16)
+#define DMA_CHAN_CFG_DST_LINEAR_MODE	(DMA_CHAN_CFG_SRC_LINEAR_MODE << 16)
+#define DMA_CHAN_CFG_DST_BURST(x)	(DMA_CHAN_CFG_SRC_BURST(x) << 16)
+#define DMA_CHAN_CFG_DST_WIDTH(x)	(DMA_CHAN_CFG_SRC_WIDTH(x) << 16)
+
+#define DMA_CHAN_CUR_SRC	0x10
+
+#define DMA_CHAN_CUR_DST	0x14
+
+#define DMA_CHAN_CUR_CNT	0x18
+
+#define DMA_CHAN_CUR_PARA	0x1c
+
+
+/*
+ * Various hardware related defines
+ */
+#define LLI_LAST_ITEM	0xfffff800
+#define NORMAL_WAIT	8
+#define DRQ_SDRAM	1
+
+/*
+ * Hardware representation of the LLI
+ *
+ * The hardware will be fed the physical address of this structure,
+ * and read its content in order to start the transfer.
+ */
+struct sun6i_dma_lli {
+	u32			cfg;
+	u32			src;
+	u32			dst;
+	u32			len;
+	u32			para;
+	u32			p_lli_next;
+
+	/*
+	 * This field is not used by the DMA controller, but will be
+	 * used by the CPU to go through the list (mostly for dumping
+	 * or freeing it).
+	 */
+	struct sun6i_dma_lli	*v_lli_next;
+};
+
+
+struct sun6i_desc {
+	struct virt_dma_desc	vd;
+	dma_addr_t		p_lli;
+	struct sun6i_dma_lli	*v_lli;
+};
+
+struct sun6i_pchan {
+	u32			idx;
+	void __iomem		*base;
+	struct sun6i_vchan	*vchan;
+	struct sun6i_desc	*desc;
+	struct sun6i_desc	*done;
+};
+
+struct sun6i_vchan {
+	struct virt_dma_chan	vc;
+	struct list_head	node;
+	struct dma_slave_config	cfg;
+	struct sun6i_pchan	*phy;
+	u8			port;
+};
+
+struct sun6i_dma_dev {
+	struct dma_device	slave;
+	void __iomem		*base;
+	struct clk		*clk;
+	int			irq;
+	spinlock_t		lock;
+	struct reset_control	*rstc;
+	struct tasklet_struct	task;
+	atomic_t		tasklet_shutdown;
+	struct list_head	pending;
+	struct dma_pool		*pool;
+	struct sun6i_pchan	*pchans;
+	struct sun6i_vchan	*vchans;
+};
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static inline struct sun6i_dma_dev *to_sun6i_dma_dev(struct dma_device *d)
+{
+	return container_of(d, struct sun6i_dma_dev, slave);
+}
+
+static inline struct sun6i_vchan *to_sun6i_vchan(struct dma_chan *chan)
+{
+	return container_of(chan, struct sun6i_vchan, vc.chan);
+}
+
+static inline struct sun6i_desc *
+to_sun6i_desc(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct sun6i_desc, vd.tx);
+}
+
+static inline void sun6i_dma_dump_com_regs(struct sun6i_dma_dev *sdev)
+{
+	dev_dbg(sdev->slave.dev, "Common register:\n"
+		"\tmask0(%04x): 0x%08x\n"
+		"\tmask1(%04x): 0x%08x\n"
+		"\tpend0(%04x): 0x%08x\n"
+		"\tpend1(%04x): 0x%08x\n"
+		"\tstats(%04x): 0x%08x\n",
+		DMA_IRQ_EN(0), readl(sdev->base + DMA_IRQ_EN(0)),
+		DMA_IRQ_EN(1), readl(sdev->base + DMA_IRQ_EN(1)),
+		DMA_IRQ_STAT(0), readl(sdev->base + DMA_IRQ_STAT(0)),
+		DMA_IRQ_STAT(1), readl(sdev->base + DMA_IRQ_STAT(1)),
+		DMA_STAT, readl(sdev->base + DMA_STAT));
+}
+
+static inline void sun6i_dma_dump_chan_regs(struct sun6i_dma_dev *sdev,
+					    struct sun6i_pchan *pchan)
+{
+	phys_addr_t reg = virt_to_phys(pchan->base);
+
+	dev_dbg(sdev->slave.dev, "Chan %d reg: %pa\n"
+		"\t___en(%04x): \t0x%08x\n"
+		"\tpause(%04x): \t0x%08x\n"
+		"\tstart(%04x): \t0x%08x\n"
+		"\t__cfg(%04x): \t0x%08x\n"
+		"\t__src(%04x): \t0x%08x\n"
+		"\t__dst(%04x): \t0x%08x\n"
+		"\tcount(%04x): \t0x%08x\n"
+		"\t_para(%04x): \t0x%08x\n\n",
+		pchan->idx, &reg,
+		DMA_CHAN_ENABLE,
+		readl(pchan->base + DMA_CHAN_ENABLE),
+		DMA_CHAN_PAUSE,
+		readl(pchan->base + DMA_CHAN_PAUSE),
+		DMA_CHAN_LLI_ADDR,
+		readl(pchan->base + DMA_CHAN_LLI_ADDR),
+		DMA_CHAN_CUR_CFG,
+		readl(pchan->base + DMA_CHAN_CUR_CFG),
+		DMA_CHAN_CUR_SRC,
+		readl(pchan->base + DMA_CHAN_CUR_SRC),
+		DMA_CHAN_CUR_DST,
+		readl(pchan->base + DMA_CHAN_CUR_DST),
+		DMA_CHAN_CUR_CNT,
+		readl(pchan->base + DMA_CHAN_CUR_CNT),
+		DMA_CHAN_CUR_PARA,
+		readl(pchan->base + DMA_CHAN_CUR_PARA));
+}
+
+static inline int convert_burst(u32 maxburst, u8 *burst)
+{
+	switch (maxburst) {
+	case 1:
+		*burst = 0;
+		break;
+	case 8:
+		*burst = 2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline int convert_buswidth(enum dma_slave_buswidth addr_width, u8 *width)
+{
+	if ((addr_width < DMA_SLAVE_BUSWIDTH_1_BYTE) ||
+	    (addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES))
+		return -EINVAL;
+
+	*width = addr_width >> 1;
+	return 0;
+}
+
+static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev,
+			       struct sun6i_dma_lli *next,
+			       dma_addr_t next_phy,
+			       struct sun6i_desc *txd)
+{
+	if ((!prev && !txd) || !next)
+		return NULL;
+
+	if (!prev) {
+		txd->p_lli = next_phy;
+		txd->v_lli = next;
+	} else {
+		prev->p_lli_next = next_phy;
+		prev->v_lli_next = next;
+	}
+
+	next->p_lli_next = LLI_LAST_ITEM;
+	next->v_lli_next = NULL;
+
+	return next;
+}
+
+static inline int sun6i_dma_cfg_lli(struct sun6i_dma_lli *lli,
+				    dma_addr_t src,
+				    dma_addr_t dst, u32 len,
+				    struct dma_slave_config *config)
+{
+	u8 src_width, dst_width, src_burst, dst_burst;
+	int ret;
+
+	if (!config)
+		return -EINVAL;
+
+	ret = convert_burst(config->src_maxburst, &src_burst);
+	if (ret)
+		return ret;
+
+	ret = convert_burst(config->dst_maxburst, &dst_burst);
+	if (ret)
+		return ret;
+
+	ret = convert_buswidth(config->src_addr_width, &src_width);
+	if (ret)
+		return ret;
+
+	ret = convert_buswidth(config->dst_addr_width, &dst_width);
+	if (ret)
+		return ret;
+
+	lli->cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) |
+		DMA_CHAN_CFG_SRC_WIDTH(src_width) |
+		DMA_CHAN_CFG_DST_BURST(dst_burst) |
+		DMA_CHAN_CFG_DST_WIDTH(dst_width);
+
+	lli->src = src;
+	lli->dst = dst;
+	lli->len = len;
+	lli->para = NORMAL_WAIT;
+
+	return 0;
+}
+
+static inline void sun6i_dma_dump_lli(struct sun6i_vchan *vchan,
+				      struct sun6i_dma_lli *lli)
+{
+	phys_addr_t p_lli = virt_to_phys(lli);
+
+	dev_dbg(chan2dev(&vchan->vc.chan),
+		"\n\tdesc:   p - %pa v - 0x%p\n"
+		"\t\tc - 0x%08x s - 0x%08x d - 0x%08x\n"
+		"\t\tl - 0x%08x p - 0x%08x n - 0x%08x\n",
+		&p_lli, lli,
+		lli->cfg, lli->src, lli->dst,
+		lli->len, lli->para, lli->p_lli_next);
+}
+
+static void sun6i_dma_free_desc(struct virt_dma_desc *vd)
+{
+	struct sun6i_desc *txd = to_sun6i_desc(&vd->tx);
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vd->tx.chan->device);
+	struct sun6i_dma_lli *v_lli, *v_next;
+	dma_addr_t p_lli, p_next;
+
+	if (unlikely(!txd))
+		return;
+
+	p_lli = txd->p_lli;
+	v_lli = txd->v_lli;
+
+	while (v_lli) {
+		v_next = v_lli->v_lli_next;
+		p_next = v_lli->p_lli_next;
+
+		dma_pool_free(sdev->pool, v_lli, p_lli);
+
+		v_lli = v_next;
+		p_lli = p_next;
+	}
+
+	kfree(txd);
+}
+
+static int sun6i_dma_terminate_all(struct sun6i_vchan *vchan)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vchan->vc.chan.device);
+	struct sun6i_pchan *pchan = vchan->phy;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock(&sdev->lock);
+	list_del_init(&vchan->node);
+	spin_unlock(&sdev->lock);
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	vchan_get_all_descriptors(&vchan->vc, &head);
+
+	if (pchan) {
+		writel(DMA_CHAN_ENABLE_STOP, pchan->base + DMA_CHAN_ENABLE);
+		writel(DMA_CHAN_PAUSE_RESUME, pchan->base + DMA_CHAN_PAUSE);
+
+		vchan->phy = NULL;
+		pchan->vchan = NULL;
+		pchan->desc = NULL;
+		pchan->done = NULL;
+	}
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	vchan_dma_desc_free_list(&vchan->vc, &head);
+
+	return 0;
+}
+
+static int sun6i_dma_start_desc(struct sun6i_vchan *vchan)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vchan->vc.chan.device);
+	struct virt_dma_desc *desc = vchan_next_desc(&vchan->vc);
+	struct sun6i_pchan *pchan = vchan->phy;
+	u32 irq_val, irq_reg, irq_offset;
+
+	if (!pchan)
+		return -EAGAIN;
+
+	if (!desc) {
+		pchan->desc = NULL;
+		pchan->done = NULL;
+		return -EAGAIN;
+	}
+
+	list_del(&desc->node);
+
+	pchan->desc = to_sun6i_desc(&desc->tx);
+	pchan->done = NULL;
+
+	sun6i_dma_dump_lli(vchan, pchan->desc->v_lli);
+
+	irq_reg = pchan->idx / DMA_IRQ_CHAN_NR;
+	irq_offset = pchan->idx % DMA_IRQ_CHAN_NR;
+
+	irq_val = readl(sdev->base + DMA_IRQ_EN(irq_offset));
+	irq_val |= DMA_IRQ_QUEUE << (irq_offset * DMA_IRQ_CHAN_WIDTH);
+	writel(irq_val, sdev->base + DMA_IRQ_EN(irq_offset));
+
+	writel(pchan->desc->p_lli, pchan->base + DMA_CHAN_LLI_ADDR);
+	writel(DMA_CHAN_ENABLE_START, pchan->base + DMA_CHAN_ENABLE);
+
+	sun6i_dma_dump_com_regs(sdev);
+	sun6i_dma_dump_chan_regs(sdev, pchan);
+
+	return 0;
+}
+
+static void sun6i_dma_tasklet(unsigned long data)
+{
+	struct sun6i_dma_dev *sdev = (struct sun6i_dma_dev *)data;
+	struct sun6i_vchan *vchan;
+	struct sun6i_pchan *pchan;
+	unsigned int pchan_alloc = 0;
+	unsigned int pchan_idx;
+
+	list_for_each_entry(vchan, &sdev->slave.channels, vc.chan.device_node) {
+		spin_lock_irq(&vchan->vc.lock);
+
+		pchan = vchan->phy;
+
+		if (pchan && pchan->done) {
+			if (sun6i_dma_start_desc(vchan)) {
+				/*
+				 * No current txd associated with this channel
+				 */
+				dev_dbg(sdev->slave.dev, "pchan %u: free\n",
+					pchan->idx);
+
+				/* Mark this channel free */
+				vchan->phy = NULL;
+				pchan->vchan = NULL;
+			}
+		}
+		spin_unlock_irq(&vchan->vc.lock);
+	}
+
+	spin_lock_irq(&sdev->lock);
+	for (pchan_idx = 0; pchan_idx < NR_MAX_CHANNELS; pchan_idx++) {
+		pchan = &sdev->pchans[pchan_idx];
+
+		if (pchan->vchan || list_empty(&sdev->pending))
+			continue;
+
+		vchan = list_first_entry(&sdev->pending,
+					 struct sun6i_vchan, node);
+
+		/* Remove from pending channels */
+		list_del_init(&vchan->node);
+		pchan_alloc |= BIT(pchan_idx);
+
+		/* Mark this channel allocated */
+		pchan->vchan = vchan;
+		vchan->phy = pchan;
+		dev_dbg(sdev->slave.dev, "pchan %u: alloc vchan %p\n",
+			pchan->idx, &vchan->vc);
+	}
+	spin_unlock_irq(&sdev->lock);
+
+	for (pchan_idx = 0; pchan_idx < NR_MAX_CHANNELS; pchan_idx++) {
+		if (!(pchan_alloc & BIT(pchan_idx)))
+			continue;
+
+		pchan = sdev->pchans + pchan_idx;
+		vchan = pchan->vchan;
+		if (vchan) {
+			spin_lock_irq(&vchan->vc.lock);
+			sun6i_dma_start_desc(vchan);
+			spin_unlock_irq(&vchan->vc.lock);
+		}
+	}
+}
+
+static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id)
+{
+	struct sun6i_dma_dev *sdev = dev_id;
+	struct sun6i_vchan *vchan;
+	struct sun6i_pchan *pchan;
+	int i, j, ret = IRQ_NONE;
+	u32 status;
+
+	for (i = 0; i < 2; i++) {
+		status = readl(sdev->base + DMA_IRQ_STAT(i));
+		if (!status)
+			continue;
+
+		dev_dbg(sdev->slave.dev, "DMA irq status %s: 0x%x\n",
+			i ? "high" : "low", status);
+
+		writel(status, sdev->base + DMA_IRQ_STAT(i));
+
+		for (j = 0; (j < 8) && status; j++) {
+			if (status & DMA_IRQ_QUEUE) {
+				pchan = sdev->pchans + j;
+				vchan = pchan->vchan;
+
+				if (vchan) {
+					spin_lock(&vchan->vc.lock);
+					vchan_cookie_complete(&pchan->desc->vd);
+					pchan->done = pchan->desc;
+					spin_unlock(&vchan->vc.lock);
+				}
+			}
+
+			status = status >> 4;
+		}
+
+		if (!atomic_read(&sdev->tasklet_shutdown))
+			tasklet_schedule(&sdev->task);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	struct dma_slave_config *sconfig = &vchan->cfg;
+	struct sun6i_dma_lli *v_lli;
+	struct sun6i_desc *txd;
+	dma_addr_t p_lli;
+	int ret;
+
+	dev_dbg(chan2dev(chan),
+		"%s; chan: %d, dest: %pad, src: %pad, len: %zu. flags: 0x%08lx\n",
+		__func__, vchan->vc.chan.chan_id, &dest, &src, len, flags);
+
+	if (!len)
+		return NULL;
+
+	txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+	if (!txd)
+		return NULL;
+
+	v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli);
+	if (!v_lli) {
+		dev_err(sdev->slave.dev, "Failed to alloc lli memory\n");
+		goto err_txd_free;
+	}
+
+	ret = sun6i_dma_cfg_lli(v_lli, src, dest, len, sconfig);
+	if (ret)
+		goto err_dma_free;
+
+	v_lli->cfg |= DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) |
+		DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) |
+		DMA_CHAN_CFG_DST_LINEAR_MODE |
+		DMA_CHAN_CFG_SRC_LINEAR_MODE;
+
+	sun6i_dma_lli_add(NULL, v_lli, p_lli, txd);
+
+	sun6i_dma_dump_lli(vchan, v_lli);
+
+	return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_dma_free:
+	dma_pool_free(sdev->pool, v_lli, p_lli);
+err_txd_free:
+	kfree(txd);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction dir,
+		unsigned long flags, void *context)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	struct dma_slave_config *sconfig = &vchan->cfg;
+	struct sun6i_dma_lli *v_lli, *prev = NULL;
+	struct sun6i_desc *txd;
+	struct scatterlist *sg;
+	dma_addr_t p_lli;
+	int i, ret;
+
+	if (!sgl)
+		return NULL;
+
+	if (!is_slave_direction(dir)) {
+		dev_err(chan2dev(chan), "Invalid DMA direction\n");
+		return NULL;
+	}
+
+	txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+	if (!txd)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli);
+		if (!v_lli)
+			goto err_lli_free;
+
+		if (dir == DMA_MEM_TO_DEV) {
+			ret = sun6i_dma_cfg_lli(v_lli, sg_dma_address(sg),
+						sconfig->dst_addr, sg_dma_len(sg),
+						sconfig);
+			if (ret)
+				goto err_cur_lli_free;
+
+			v_lli->cfg |= DMA_CHAN_CFG_DST_IO_MODE |
+				DMA_CHAN_CFG_SRC_LINEAR_MODE |
+				DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) |
+				DMA_CHAN_CFG_DST_DRQ(vchan->port);
+
+			dev_dbg(chan2dev(chan),
+				"%s; chan: %d, dest: %pad, src: %pad, len: %u. flags: 0x%08lx\n",
+				__func__, vchan->vc.chan.chan_id,
+				&sconfig->dst_addr, &sg_dma_address(sg),
+				sg_dma_len(sg), flags);
+
+		} else {
+			ret = sun6i_dma_cfg_lli(v_lli, sconfig->src_addr,
+						sg_dma_address(sg), sg_dma_len(sg),
+						sconfig);
+			if (ret)
+				goto err_cur_lli_free;
+
+			v_lli->cfg |= DMA_CHAN_CFG_DST_LINEAR_MODE |
+				DMA_CHAN_CFG_SRC_IO_MODE |
+				DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) |
+				DMA_CHAN_CFG_SRC_DRQ(vchan->port);
+
+			dev_dbg(chan2dev(chan),
+				"%s; chan: %d, dest: %pad, src: %pad, len: %u. flags: 0x%08lx\n",
+				__func__, vchan->vc.chan.chan_id,
+				&sg_dma_address(sg), &sconfig->src_addr,
+				sg_dma_len(sg), flags);
+		}
+
+		prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd);
+	}
+
+	dev_dbg(chan2dev(chan), "First: %pad\n", &txd->p_lli);
+	for (prev = txd->v_lli; prev; prev = prev->v_lli_next)
+		sun6i_dma_dump_lli(vchan, prev);
+
+	return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_cur_lli_free:
+	dma_pool_free(sdev->pool, v_lli, p_lli);
+err_lli_free:
+	for (prev = txd->v_lli; prev; prev = prev->v_lli_next)
+		dma_pool_free(sdev->pool, prev, virt_to_phys(prev));
+	kfree(txd);
+	return NULL;
+}
+
+static int sun6i_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+		       unsigned long arg)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	struct sun6i_pchan *pchan = vchan->phy;
+	unsigned long flags;
+	int ret = 0;
+
+	switch (cmd) {
+	case DMA_RESUME:
+		dev_dbg(chan2dev(chan), "vchan %p: resume\n", &vchan->vc);
+
+		spin_lock_irqsave(&vchan->vc.lock, flags);
+
+		if (pchan) {
+			writel(DMA_CHAN_PAUSE_RESUME,
+			       pchan->base + DMA_CHAN_PAUSE);
+		} else if (!list_empty(&vchan->vc.desc_issued)) {
+			spin_lock(&sdev->lock);
+			list_add_tail(&vchan->node, &sdev->pending);
+			spin_unlock(&sdev->lock);
+		}
+
+		spin_unlock_irqrestore(&vchan->vc.lock, flags);
+		break;
+
+	case DMA_PAUSE:
+		dev_dbg(chan2dev(chan), "vchan %p: pause\n", &vchan->vc);
+
+		if (pchan) {
+			writel(DMA_CHAN_PAUSE_PAUSE,
+			       pchan->base + DMA_CHAN_PAUSE);
+		} else {
+			spin_lock(&sdev->lock);
+			list_del_init(&vchan->node);
+			spin_unlock(&sdev->lock);
+		}
+		break;
+
+	case DMA_TERMINATE_ALL:
+		ret = sun6i_dma_terminate_all(vchan);
+		break;
+	case DMA_SLAVE_CONFIG:
+		memcpy(&vchan->cfg, (void *)arg, sizeof(struct dma_slave_config));
+		break;
+	default:
+		ret = -ENXIO;
+		break;
+	}
+	return ret;
+}
+
+static enum dma_status sun6i_dma_tx_status(struct dma_chan *chan,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *state)
+{
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	struct sun6i_pchan *pchan = vchan->phy;
+	struct sun6i_dma_lli *lli;
+	struct virt_dma_desc *vd;
+	struct sun6i_desc *txd;
+	enum dma_status ret;
+	unsigned long flags;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(chan, cookie, state);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	vd = vchan_find_desc(&vchan->vc, cookie);
+	txd = to_sun6i_desc(&vd->tx);
+
+	if (vd) {
+		for (lli = txd->v_lli; lli != NULL; lli = lli->v_lli_next)
+			bytes += lli->len;
+	} else if (!pchan || !pchan->desc) {
+		bytes = 0;
+	} else {
+		bytes = readl(pchan->base + DMA_CHAN_CUR_CNT);
+	}
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	dma_set_residue(state, bytes);
+
+	return ret;
+}
+
+static void sun6i_dma_issue_pending(struct dma_chan *chan)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	if (vchan_issue_pending(&vchan->vc)) {
+		spin_lock(&sdev->lock);
+
+		if (!vchan->phy && list_empty(&vchan->node)) {
+			list_add_tail(&vchan->node, &sdev->pending);
+			tasklet_schedule(&sdev->task);
+			dev_dbg(chan2dev(chan), "vchan %p: issued\n",
+				&vchan->vc);
+		}
+
+		spin_unlock(&sdev->lock);
+	} else {
+		dev_dbg(chan2dev(chan), "vchan %p: nothing to issue\n",
+			&vchan->vc);
+	}
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+}
+
+static int sun6i_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	return 0;
+}
+
+static void sun6i_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sdev->lock, flags);
+	list_del_init(&vchan->node);
+	spin_unlock_irqrestore(&sdev->lock, flags);
+
+	vchan_free_chan_resources(&vchan->vc);
+}
+
+static struct dma_chan *sun6i_dma_of_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct sun6i_dma_dev *sdev = ofdma->of_dma_data;
+	struct sun6i_vchan *vchan;
+	struct dma_chan *chan;
+	u8 port = dma_spec->args[0];
+
+	if (port > NR_MAX_REQUESTS)
+		return NULL;
+
+	chan = dma_get_any_slave_channel(&sdev->slave);
+	if (!chan)
+		return NULL;
+
+	vchan = to_sun6i_vchan(chan);
+	vchan->port = port;
+
+	return chan;
+}
+
+static inline void sun6i_kill_tasklet(struct sun6i_dma_dev *sdev)
+{
+	/* Disable all interrupts from DMA */
+	writel(0, sdev->base + DMA_IRQ_EN(0));
+	writel(0, sdev->base + DMA_IRQ_EN(1));
+
+	/* Prevent spurious interrupts from scheduling the tasklet */
+	atomic_inc(&sdev->tasklet_shutdown);
+
+	/* Make sure we won't have any further interrupts */
+	devm_free_irq(sdev->slave.dev, sdev->irq, sdev);
+
+	/* Actually prevent the tasklet from being scheduled */
+	tasklet_kill(&sdev->task);
+}
+
+static inline void sun6i_dma_free(struct sun6i_dma_dev *sdev)
+{
+	int i;
+
+	for (i = 0; i < NR_MAX_VCHANS; i++) {
+		struct sun6i_vchan *vchan = &sdev->vchans[i];
+
+		list_del(&vchan->vc.chan.device_node);
+		tasklet_kill(&vchan->vc.task);
+	}
+}
+
+static int sun6i_dma_probe(struct platform_device *pdev)
+{
+	struct sun6i_dma_dev *sdc;
+	struct resource *res;
+	struct clk *mux, *pll6;
+	int ret, i;
+
+	sdc = devm_kzalloc(&pdev->dev, sizeof(*sdc), GFP_KERNEL);
+	if (!sdc)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	sdc->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(sdc->base))
+		return PTR_ERR(sdc->base);
+
+	sdc->irq = platform_get_irq(pdev, 0);
+	if (sdc->irq < 0) {
+		dev_err(&pdev->dev, "Cannot claim IRQ\n");
+		return sdc->irq;
+	}
+
+	sdc->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(sdc->clk)) {
+		dev_err(&pdev->dev, "No clock specified\n");
+		return PTR_ERR(sdc->clk);
+	}
+
+	mux = clk_get(NULL, "ahb1_mux");
+	if (IS_ERR(mux)) {
+		dev_err(&pdev->dev, "Couldn't get AHB1 Mux\n");
+		return PTR_ERR(mux);
+	}
+
+	pll6 = clk_get(NULL, "pll6");
+	if (IS_ERR(pll6)) {
+		dev_err(&pdev->dev, "Couldn't get PLL6\n");
+		clk_put(mux);
+		return PTR_ERR(pll6);
+	}
+
+	ret = clk_set_parent(mux, pll6);
+	clk_put(pll6);
+	clk_put(mux);
+
+	if (ret) {
+		dev_err(&pdev->dev, "Couldn't reparent AHB1 on PLL6\n");
+		return ret;
+	}
+
+	sdc->rstc = devm_reset_control_get(&pdev->dev, NULL);
+	if (IS_ERR(sdc->rstc)) {
+		dev_err(&pdev->dev, "No reset controller specified\n");
+		return PTR_ERR(sdc->rstc);
+	}
+
+	sdc->pool = dmam_pool_create(dev_name(&pdev->dev), &pdev->dev,
+				     sizeof(struct sun6i_dma_lli), 4, 0);
+	if (!sdc->pool) {
+		dev_err(&pdev->dev, "No memory for descriptors dma pool\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, sdc);
+	INIT_LIST_HEAD(&sdc->pending);
+	spin_lock_init(&sdc->lock);
+
+	dma_cap_set(DMA_PRIVATE, sdc->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, sdc->slave.cap_mask);
+	dma_cap_set(DMA_SLAVE, sdc->slave.cap_mask);
+
+	INIT_LIST_HEAD(&sdc->slave.channels);
+	sdc->slave.device_alloc_chan_resources	= sun6i_dma_alloc_chan_resources;
+	sdc->slave.device_free_chan_resources	= sun6i_dma_free_chan_resources;
+	sdc->slave.device_tx_status		= sun6i_dma_tx_status;
+	sdc->slave.device_issue_pending		= sun6i_dma_issue_pending;
+	sdc->slave.device_prep_slave_sg		= sun6i_dma_prep_slave_sg;
+	sdc->slave.device_prep_dma_memcpy	= sun6i_dma_prep_dma_memcpy;
+	sdc->slave.device_control		= sun6i_dma_control;
+	sdc->slave.chancnt			= NR_MAX_VCHANS;
+
+	sdc->slave.dev = &pdev->dev;
+
+	sdc->pchans = devm_kcalloc(&pdev->dev, NR_MAX_CHANNELS,
+				   sizeof(struct sun6i_pchan), GFP_KERNEL);
+	if (!sdc->pchans)
+		return -ENOMEM;
+
+	sdc->vchans = devm_kcalloc(&pdev->dev, NR_MAX_VCHANS,
+				   sizeof(struct sun6i_vchan), GFP_KERNEL);
+	if (!sdc->vchans)
+		return -ENOMEM;
+
+	tasklet_init(&sdc->task, sun6i_dma_tasklet, (unsigned long)sdc);
+
+	for (i = 0; i < NR_MAX_CHANNELS; i++) {
+		struct sun6i_pchan *pchan = &sdc->pchans[i];
+
+		pchan->idx = i;
+		pchan->base = sdc->base + 0x100 + i * 0x40;
+	}
+
+	for (i = 0; i < NR_MAX_VCHANS; i++) {
+		struct sun6i_vchan *vchan = &sdc->vchans[i];
+
+		INIT_LIST_HEAD(&vchan->node);
+		vchan->vc.desc_free = sun6i_dma_free_desc;
+		vchan_init(&vchan->vc, &sdc->slave);
+	}
+
+	ret = reset_control_deassert(sdc->rstc);
+	if (ret) {
+		dev_err(&pdev->dev, "Couldn't deassert the device from reset\n");
+		goto err_chan_free;
+	}
+
+	ret = clk_prepare_enable(sdc->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Couldn't enable the clock\n");
+		goto err_reset_assert;
+	}
+
+	ret = devm_request_irq(&pdev->dev, sdc->irq, sun6i_dma_interrupt, 0,
+			       dev_name(&pdev->dev), sdc);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot request IRQ\n");
+		goto err_clk_disable;
+	}
+
+	ret = dma_async_device_register(&sdc->slave);
+	if (ret) {
+		dev_warn(&pdev->dev, "Failed to register DMA engine device\n");
+		goto err_irq_disable;
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node, sun6i_dma_of_xlate,
+					 sdc);
+	if (ret) {
+		dev_err(&pdev->dev, "of_dma_controller_register failed\n");
+		goto err_dma_unregister;
+	}
+
+	return 0;
+
+err_dma_unregister:
+	dma_async_device_unregister(&sdc->slave);
+err_irq_disable:
+	sun6i_kill_tasklet(sdc);
+err_clk_disable:
+	clk_disable_unprepare(sdc->clk);
+err_reset_assert:
+	reset_control_assert(sdc->rstc);
+err_chan_free:
+	sun6i_dma_free(sdc);
+	return ret;
+}
+
+static int sun6i_dma_remove(struct platform_device *pdev)
+{
+	struct sun6i_dma_dev *sdc = platform_get_drvdata(pdev);
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&sdc->slave);
+
+	sun6i_kill_tasklet(sdc);
+
+	clk_disable_unprepare(sdc->clk);
+	reset_control_assert(sdc->rstc);
+
+	sun6i_dma_free(sdc);
+
+	return 0;
+}
+
+static struct of_device_id sun6i_dma_match[] = {
+	{ .compatible = "allwinner,sun6i-a31-dma" },
+	{ /* sentinel */ }
+};
+
+static struct platform_driver sun6i_dma_driver = {
+	.probe		= sun6i_dma_probe,
+	.remove		= sun6i_dma_remove,
+	.driver = {
+		.name		= "sun6i-dma",
+		.of_match_table	= sun6i_dma_match,
+	},
+};
+module_platform_driver(sun6i_dma_driver);
+
+MODULE_DESCRIPTION("Allwinner A31 DMA Controller Driver");
+MODULE_AUTHOR("Sugar <shuge@allwinnertech.com>");
+MODULE_AUTHOR("Maxime Ripard <maxime.ripard@free-electrons.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 03ad64e..16efa60 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -1055,7 +1055,7 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
 static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
 	struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
-	unsigned long flags, void *context)
+	unsigned long flags)
 {
 	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
 	struct tegra_dma_desc *dma_desc = NULL;
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index 374b57f..a12c855 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -134,8 +134,7 @@ static void cell_edac_init_csrows(struct mem_ctl_info *mci)
 	int				j;
 	u32				nr_pages;
 
-	for (np = NULL;
-	     (np = of_find_node_by_name(np, "memory")) != NULL;) {
+	for_each_node_by_name(np, "memory") {
 		struct resource r;
 
 		/* We "know" that the Cell firmware only creates one entry
diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c
index d3d0e8c..d6c767a 100644
--- a/drivers/hwmon/adm1025.c
+++ b/drivers/hwmon/adm1025.c
@@ -382,6 +382,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
 	if (err)
 		return err;
 
+	if (val > 255)
+		return -EINVAL;
+
 	data->vrm = val;
 	return count;
 }
diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c
index ca8430f..e67b9a5 100644
--- a/drivers/hwmon/adm1026.c
+++ b/drivers/hwmon/adm1026.c
@@ -1085,6 +1085,9 @@ static ssize_t store_vrm_reg(struct device *dev, struct device_attribute *attr,
 	if (err)
 		return err;
 
+	if (val > 255)
+		return -EINVAL;
+
 	data->vrm = val;
 	return count;
 }
diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c
index 22e0c92..1265164 100644
--- a/drivers/hwmon/ads1015.c
+++ b/drivers/hwmon/ads1015.c
@@ -212,6 +212,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client)
 				dev_err(&client->dev,
 					"invalid gain on %s\n",
 					node->full_name);
+				return -EINVAL;
 			}
 		}
 
@@ -222,6 +223,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client)
 				dev_err(&client->dev,
 					"invalid data_rate on %s\n",
 					node->full_name);
+				return -EINVAL;
 			}
 		}
 
diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c
index f960636..272fcc8 100644
--- a/drivers/hwmon/asb100.c
+++ b/drivers/hwmon/asb100.c
@@ -510,6 +510,10 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
 	err = kstrtoul(buf, 10, &val);
 	if (err)
 		return err;
+
+	if (val > 255)
+		return -EINVAL;
+
 	data->vrm = val;
 	return count;
 }
diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c
index 4ae3fff1..bea0a34 100644
--- a/drivers/hwmon/dme1737.c
+++ b/drivers/hwmon/dme1737.c
@@ -247,8 +247,8 @@ struct dme1737_data {
 	u8  pwm_acz[3];
 	u8  pwm_freq[6];
 	u8  pwm_rr[2];
-	u8  zone_low[3];
-	u8  zone_abs[3];
+	s8  zone_low[3];
+	s8  zone_abs[3];
 	u8  zone_hyst[2];
 	u32 alarms;
 };
@@ -277,7 +277,7 @@ static inline int IN_FROM_REG(int reg, int nominal, int res)
 	return (reg * nominal + (3 << (res - 3))) / (3 << (res - 2));
 }
 
-static inline int IN_TO_REG(int val, int nominal)
+static inline int IN_TO_REG(long val, int nominal)
 {
 	return clamp_val((val * 192 + nominal / 2) / nominal, 0, 255);
 }
@@ -293,7 +293,7 @@ static inline int TEMP_FROM_REG(int reg, int res)
 	return (reg * 1000) >> (res - 8);
 }
 
-static inline int TEMP_TO_REG(int val)
+static inline int TEMP_TO_REG(long val)
 {
 	return clamp_val((val < 0 ? val - 500 : val + 500) / 1000, -128, 127);
 }
@@ -308,7 +308,7 @@ static inline int TEMP_RANGE_FROM_REG(int reg)
 	return TEMP_RANGE[(reg >> 4) & 0x0f];
 }
 
-static int TEMP_RANGE_TO_REG(int val, int reg)
+static int TEMP_RANGE_TO_REG(long val, int reg)
 {
 	int i;
 
@@ -331,7 +331,7 @@ static inline int TEMP_HYST_FROM_REG(int reg, int ix)
 	return (((ix == 1) ? reg : reg >> 4) & 0x0f) * 1000;
 }
 
-static inline int TEMP_HYST_TO_REG(int val, int ix, int reg)
+static inline int TEMP_HYST_TO_REG(long val, int ix, int reg)
 {
 	int hyst = clamp_val((val + 500) / 1000, 0, 15);
 
@@ -347,7 +347,7 @@ static inline int FAN_FROM_REG(int reg, int tpc)
 		return (reg == 0 || reg == 0xffff) ? 0 : 90000 * 60 / reg;
 }
 
-static inline int FAN_TO_REG(int val, int tpc)
+static inline int FAN_TO_REG(long val, int tpc)
 {
 	if (tpc) {
 		return clamp_val(val / tpc, 0, 0xffff);
@@ -379,7 +379,7 @@ static inline int FAN_TYPE_FROM_REG(int reg)
 	return (edge > 0) ? 1 << (edge - 1) : 0;
 }
 
-static inline int FAN_TYPE_TO_REG(int val, int reg)
+static inline int FAN_TYPE_TO_REG(long val, int reg)
 {
 	int edge = (val == 4) ? 3 : val;
 
@@ -402,7 +402,7 @@ static int FAN_MAX_FROM_REG(int reg)
 	return 1000 + i * 500;
 }
 
-static int FAN_MAX_TO_REG(int val)
+static int FAN_MAX_TO_REG(long val)
 {
 	int i;
 
@@ -460,7 +460,7 @@ static inline int PWM_ACZ_FROM_REG(int reg)
 	return acz[(reg >> 5) & 0x07];
 }
 
-static inline int PWM_ACZ_TO_REG(int val, int reg)
+static inline int PWM_ACZ_TO_REG(long val, int reg)
 {
 	int acz = (val == 4) ? 2 : val - 1;
 
@@ -476,7 +476,7 @@ static inline int PWM_FREQ_FROM_REG(int reg)
 	return PWM_FREQ[reg & 0x0f];
 }
 
-static int PWM_FREQ_TO_REG(int val, int reg)
+static int PWM_FREQ_TO_REG(long val, int reg)
 {
 	int i;
 
@@ -510,7 +510,7 @@ static inline int PWM_RR_FROM_REG(int reg, int ix)
 	return (rr & 0x08) ? PWM_RR[rr & 0x07] : 0;
 }
 
-static int PWM_RR_TO_REG(int val, int ix, int reg)
+static int PWM_RR_TO_REG(long val, int ix, int reg)
 {
 	int i;
 
@@ -528,7 +528,7 @@ static inline int PWM_RR_EN_FROM_REG(int reg, int ix)
 	return PWM_RR_FROM_REG(reg, ix) ? 1 : 0;
 }
 
-static inline int PWM_RR_EN_TO_REG(int val, int ix, int reg)
+static inline int PWM_RR_EN_TO_REG(long val, int ix, int reg)
 {
 	int en = (ix == 1) ? 0x80 : 0x08;
 
@@ -1481,13 +1481,16 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
 		       const char *buf, size_t count)
 {
 	struct dme1737_data *data = dev_get_drvdata(dev);
-	long val;
+	unsigned long val;
 	int err;
 
-	err = kstrtol(buf, 10, &val);
+	err = kstrtoul(buf, 10, &val);
 	if (err)
 		return err;
 
+	if (val > 255)
+		return -EINVAL;
+
 	data->vrm = val;
 	return count;
 }
diff --git a/drivers/hwmon/emc6w201.c b/drivers/hwmon/emc6w201.c
index e87da90..ada9071 100644
--- a/drivers/hwmon/emc6w201.c
+++ b/drivers/hwmon/emc6w201.c
@@ -252,12 +252,12 @@ static ssize_t set_temp(struct device *dev, struct device_attribute *devattr,
 	if (err < 0)
 		return err;
 
-	val /= 1000;
+	val = DIV_ROUND_CLOSEST(val, 1000);
 	reg = (sf == min) ? EMC6W201_REG_TEMP_LOW(nr)
 			  : EMC6W201_REG_TEMP_HIGH(nr);
 
 	mutex_lock(&data->update_lock);
-	data->temp[sf][nr] = clamp_val(val, -127, 128);
+	data->temp[sf][nr] = clamp_val(val, -127, 127);
 	err = emc6w201_write8(client, reg, data->temp[sf][nr]);
 	mutex_unlock(&data->update_lock);
 
diff --git a/drivers/hwmon/hih6130.c b/drivers/hwmon/hih6130.c
index 0e01c4e..7b73d20 100644
--- a/drivers/hwmon/hih6130.c
+++ b/drivers/hwmon/hih6130.c
@@ -238,6 +238,9 @@ static int hih6130_probe(struct i2c_client *client,
 	hih6130->client = client;
 	mutex_init(&hih6130->lock);
 
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_QUICK))
+		hih6130->write_length = 1;
+
 	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
 							   hih6130,
 							   hih6130_groups);
diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c
index ba1d83d..a5e2958 100644
--- a/drivers/hwmon/lm87.c
+++ b/drivers/hwmon/lm87.c
@@ -617,6 +617,10 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
 	err = kstrtoul(buf, 10, &val);
 	if (err)
 		return err;
+
+	if (val > 255)
+		return -EINVAL;
+
 	data->vrm = val;
 	return count;
 }
diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c
index d2060e2..cfaf70b 100644
--- a/drivers/hwmon/lm92.c
+++ b/drivers/hwmon/lm92.c
@@ -74,12 +74,9 @@ static inline int TEMP_FROM_REG(s16 reg)
 	return reg / 8 * 625 / 10;
 }
 
-static inline s16 TEMP_TO_REG(int val)
+static inline s16 TEMP_TO_REG(long val)
 {
-	if (val <= -60000)
-		return -60000 * 10 / 625 * 8;
-	if (val >= 160000)
-		return 160000 * 10 / 625 * 8;
+	val = clamp_val(val, -60000, 160000);
 	return val * 10 / 625 * 8;
 }
 
@@ -206,10 +203,12 @@ static ssize_t set_temp_hyst(struct device *dev,
 	if (err)
 		return err;
 
+	val = clamp_val(val, -120000, 220000);
 	mutex_lock(&data->update_lock);
-	data->temp[t_hyst] = TEMP_FROM_REG(data->temp[attr->index]) - val;
+	 data->temp[t_hyst] =
+		TEMP_TO_REG(TEMP_FROM_REG(data->temp[attr->index]) - val);
 	i2c_smbus_write_word_swapped(client, LM92_REG_TEMP_HYST,
-				     TEMP_TO_REG(data->temp[t_hyst]));
+				     data->temp[t_hyst]);
 	mutex_unlock(&data->update_lock);
 	return count;
 }
diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index 988181e..145f674 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -615,6 +615,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
 	if (err)
 		return err;
 
+	if (val > 255)
+		return -EINVAL;
+
 	data->vrm = val;
 	return count;
 }
diff --git a/drivers/hwmon/tmp103.c b/drivers/hwmon/tmp103.c
index c74d2da..e42964f 100644
--- a/drivers/hwmon/tmp103.c
+++ b/drivers/hwmon/tmp103.c
@@ -131,13 +131,6 @@ static int tmp103_probe(struct i2c_client *client,
 	struct regmap *regmap;
 	int ret;
 
-	if (!i2c_check_functionality(client->adapter,
-				     I2C_FUNC_SMBUS_BYTE_DATA)) {
-		dev_err(&client->dev,
-			"adapter doesn't support SMBus byte transactions\n");
-		return -ENODEV;
-	}
-
 	regmap = devm_regmap_init_i2c(client, &tmp103_regmap_config);
 	if (IS_ERR(regmap)) {
 		dev_err(dev, "failed to allocate register map\n");
diff --git a/drivers/hwmon/vt1211.c b/drivers/hwmon/vt1211.c
index 344b22e..3ea57c3 100644
--- a/drivers/hwmon/vt1211.c
+++ b/drivers/hwmon/vt1211.c
@@ -879,6 +879,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
 	if (err)
 		return err;
 
+	if (val > 255)
+		return -EINVAL;
+
 	data->vrm = val;
 
 	return count;
diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c
index c1726be..2f55973 100644
--- a/drivers/hwmon/w83627hf.c
+++ b/drivers/hwmon/w83627hf.c
@@ -820,6 +820,9 @@ store_vrm_reg(struct device *dev, struct device_attribute *attr, const char *buf
 	err = kstrtoul(buf, 10, &val);
 	if (err)
 		return err;
+
+	if (val > 255)
+		return -EINVAL;
 	data->vrm = val;
 
 	return count;
diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c
index cb3765f..001df85 100644
--- a/drivers/hwmon/w83791d.c
+++ b/drivers/hwmon/w83791d.c
@@ -1181,6 +1181,9 @@ static ssize_t store_vrm_reg(struct device *dev,
 	if (err)
 		return err;
 
+	if (val > 255)
+		return -EINVAL;
+
 	data->vrm = val;
 	return count;
 }
diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c
index 9d63d71..816aa6c 100644
--- a/drivers/hwmon/w83793.c
+++ b/drivers/hwmon/w83793.c
@@ -353,6 +353,9 @@ store_vrm(struct device *dev, struct device_attribute *attr,
 	if (err)
 		return err;
 
+	if (val > 255)
+		return -EINVAL;
+
 	data->vrm = val;
 	return count;
 }
diff --git a/drivers/hwspinlock/Kconfig b/drivers/hwspinlock/Kconfig
index 70637d2..3612cb5 100644
--- a/drivers/hwspinlock/Kconfig
+++ b/drivers/hwspinlock/Kconfig
@@ -10,7 +10,7 @@ menu "Hardware Spinlock drivers"
 
 config HWSPINLOCK_OMAP
 	tristate "OMAP Hardware Spinlock device"
-	depends on ARCH_OMAP4 || SOC_OMAP5
+	depends on ARCH_OMAP4 || SOC_OMAP5 || SOC_DRA7XX || SOC_AM33XX || SOC_AM43XX
 	select HWSPINLOCK
 	help
 	  Say y here to support the OMAP Hardware Spinlock device (firstly
diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c
index 292869c..c1e2cd4 100644
--- a/drivers/hwspinlock/omap_hwspinlock.c
+++ b/drivers/hwspinlock/omap_hwspinlock.c
@@ -98,10 +98,29 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
 	if (!io_base)
 		return -ENOMEM;
 
+	/*
+	 * make sure the module is enabled and clocked before reading
+	 * the module SYSSTATUS register
+	 */
+	pm_runtime_enable(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(&pdev->dev);
+		goto iounmap_base;
+	}
+
 	/* Determine number of locks */
 	i = readl(io_base + SYSSTATUS_OFFSET);
 	i >>= SPINLOCK_NUMLOCKS_BIT_OFFSET;
 
+	/*
+	 * runtime PM will make sure the clock of this module is
+	 * enabled again iff at least one lock is requested
+	 */
+	ret = pm_runtime_put(&pdev->dev);
+	if (ret < 0)
+		goto iounmap_base;
+
 	/* one of the four lsb's must be set, and nothing else */
 	if (hweight_long(i & 0xf) != 1 || i > 8) {
 		ret = -EINVAL;
@@ -121,12 +140,6 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
 	for (i = 0, hwlock = &bank->lock[0]; i < num_locks; i++, hwlock++)
 		hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i;
 
-	/*
-	 * runtime PM will make sure the clock of this module is
-	 * enabled iff at least one lock is requested
-	 */
-	pm_runtime_enable(&pdev->dev);
-
 	ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops,
 						pdata->base_id, num_locks);
 	if (ret)
@@ -135,9 +148,9 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
 	return 0;
 
 reg_fail:
-	pm_runtime_disable(&pdev->dev);
 	kfree(bank);
 iounmap_base:
+	pm_runtime_disable(&pdev->dev);
 	iounmap(io_base);
 	return ret;
 }
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 2bc7f5a..f6d2961 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -94,14 +94,14 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
 		port_priv = ib_get_agent_port(device, port_num);
 
 	if (!port_priv) {
-		printk(KERN_ERR SPFX "Unable to find port agent\n");
+		dev_err(&device->dev, "Unable to find port agent\n");
 		return;
 	}
 
 	agent = port_priv->agent[qpn];
 	ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
 	if (IS_ERR(ah)) {
-		printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n",
+		dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n",
 			PTR_ERR(ah));
 		return;
 	}
@@ -110,7 +110,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
 				      IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 				      GFP_KERNEL);
 	if (IS_ERR(send_buf)) {
-		printk(KERN_ERR SPFX "ib_create_send_mad error\n");
+		dev_err(&device->dev, "ib_create_send_mad error\n");
 		goto err1;
 	}
 
@@ -125,7 +125,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
 	}
 
 	if (ib_post_send_mad(send_buf, NULL)) {
-		printk(KERN_ERR SPFX "ib_post_send_mad error\n");
+		dev_err(&device->dev, "ib_post_send_mad error\n");
 		goto err2;
 	}
 	return;
@@ -151,7 +151,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
 	/* Create new device info */
 	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
 	if (!port_priv) {
-		printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n");
+		dev_err(&device->dev, "No memory for ib_agent_port_private\n");
 		ret = -ENOMEM;
 		goto error1;
 	}
@@ -161,7 +161,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
 		port_priv->agent[0] = ib_register_mad_agent(device, port_num,
 							    IB_QPT_SMI, NULL, 0,
 							    &agent_send_handler,
-							    NULL, NULL);
+							    NULL, NULL, 0);
 		if (IS_ERR(port_priv->agent[0])) {
 			ret = PTR_ERR(port_priv->agent[0]);
 			goto error2;
@@ -172,7 +172,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
 	port_priv->agent[1] = ib_register_mad_agent(device, port_num,
 						    IB_QPT_GSI, NULL, 0,
 						    &agent_send_handler,
-						    NULL, NULL);
+						    NULL, NULL, 0);
 	if (IS_ERR(port_priv->agent[1])) {
 		ret = PTR_ERR(port_priv->agent[1]);
 		goto error3;
@@ -202,7 +202,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
 	port_priv = __ib_get_agent_port(device, port_num);
 	if (port_priv == NULL) {
 		spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
-		printk(KERN_ERR SPFX "Port %d not found\n", port_num);
+		dev_err(&device->dev, "Port %d not found\n", port_num);
 		return -ENODEV;
 	}
 	list_del(&port_priv->port_list);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c323917..e28a494 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3753,7 +3753,7 @@ static void cm_add_one(struct ib_device *ib_device)
 	struct cm_port *port;
 	struct ib_mad_reg_req reg_req = {
 		.mgmt_class = IB_MGMT_CLASS_CM,
-		.mgmt_class_version = IB_CM_CLASS_VERSION
+		.mgmt_class_version = IB_CM_CLASS_VERSION,
 	};
 	struct ib_port_modify port_modify = {
 		.set_port_cap_mask = IB_PORT_CM_SUP
@@ -3801,7 +3801,8 @@ static void cm_add_one(struct ib_device *ib_device)
 							0,
 							cm_send_handler,
 							cm_recv_handler,
-							port);
+							port,
+							0);
 		if (IS_ERR(port->mad_agent))
 			goto error2;
 
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 3d2e489..ff9163d 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -46,6 +46,7 @@
 #include <linux/completion.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/sysctl.h>
 
 #include <rdma/iw_cm.h>
 #include <rdma/ib_addr.h>
@@ -65,6 +66,20 @@ struct iwcm_work {
 	struct list_head free_list;
 };
 
+static unsigned int default_backlog = 256;
+
+static struct ctl_table_header *iwcm_ctl_table_hdr;
+static struct ctl_table iwcm_ctl_table[] = {
+	{
+		.procname	= "default_backlog",
+		.data		= &default_backlog,
+		.maxlen		= sizeof(default_backlog),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{ }
+};
+
 /*
  * The following services provide a mechanism for pre-allocating iwcm_work
  * elements.  The design pre-allocates them  based on the cm_id type:
@@ -425,6 +440,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 
+	if (!backlog)
+		backlog = default_backlog;
+
 	ret = alloc_work_entries(cm_id_priv, backlog);
 	if (ret)
 		return ret;
@@ -1030,11 +1048,20 @@ static int __init iw_cm_init(void)
 	if (!iwcm_wq)
 		return -ENOMEM;
 
+	iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm",
+						 iwcm_ctl_table);
+	if (!iwcm_ctl_table_hdr) {
+		pr_err("iw_cm: couldn't register sysctl paths\n");
+		destroy_workqueue(iwcm_wq);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
 static void __exit iw_cm_cleanup(void)
 {
+	unregister_net_sysctl_table(iwcm_ctl_table_hdr);
 	destroy_workqueue(iwcm_wq);
 }
 
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ab31f13..74c30f4 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -33,6 +33,9 @@
  * SOFTWARE.
  *
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -195,7 +198,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 					   u8 rmpp_version,
 					   ib_mad_send_handler send_handler,
 					   ib_mad_recv_handler recv_handler,
-					   void *context)
+					   void *context,
+					   u32 registration_flags)
 {
 	struct ib_mad_port_private *port_priv;
 	struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
@@ -211,68 +215,109 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 
 	/* Validate parameters */
 	qpn = get_spl_qp_index(qp_type);
-	if (qpn == -1)
+	if (qpn == -1) {
+		dev_notice(&device->dev,
+			   "ib_register_mad_agent: invalid QP Type %d\n",
+			   qp_type);
 		goto error1;
+	}
 
-	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION)
+	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
+		dev_notice(&device->dev,
+			   "ib_register_mad_agent: invalid RMPP Version %u\n",
+			   rmpp_version);
 		goto error1;
+	}
 
 	/* Validate MAD registration request if supplied */
 	if (mad_reg_req) {
-		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION)
+		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
+			dev_notice(&device->dev,
+				   "ib_register_mad_agent: invalid Class Version %u\n",
+				   mad_reg_req->mgmt_class_version);
 			goto error1;
-		if (!recv_handler)
+		}
+		if (!recv_handler) {
+			dev_notice(&device->dev,
+				   "ib_register_mad_agent: no recv_handler\n");
 			goto error1;
+		}
 		if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
 			/*
 			 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
 			 * one in this range currently allowed
 			 */
 			if (mad_reg_req->mgmt_class !=
-			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+				dev_notice(&device->dev,
+					   "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n",
+					   mad_reg_req->mgmt_class);
 				goto error1;
+			}
 		} else if (mad_reg_req->mgmt_class == 0) {
 			/*
 			 * Class 0 is reserved in IBA and is used for
 			 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
 			 */
+			dev_notice(&device->dev,
+				   "ib_register_mad_agent: Invalid Mgmt Class 0\n");
 			goto error1;
 		} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
 			/*
 			 * If class is in "new" vendor range,
 			 * ensure supplied OUI is not zero
 			 */
-			if (!is_vendor_oui(mad_reg_req->oui))
+			if (!is_vendor_oui(mad_reg_req->oui)) {
+				dev_notice(&device->dev,
+					   "ib_register_mad_agent: No OUI specified for class 0x%x\n",
+					   mad_reg_req->mgmt_class);
 				goto error1;
+			}
 		}
 		/* Make sure class supplied is consistent with RMPP */
 		if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
-			if (rmpp_version)
+			if (rmpp_version) {
+				dev_notice(&device->dev,
+					   "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n",
+					   mad_reg_req->mgmt_class);
 				goto error1;
+			}
 		}
+
 		/* Make sure class supplied is consistent with QP type */
 		if (qp_type == IB_QPT_SMI) {
 			if ((mad_reg_req->mgmt_class !=
 					IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
 			    (mad_reg_req->mgmt_class !=
-					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
+					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
+				dev_notice(&device->dev,
+					   "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n",
+					   mad_reg_req->mgmt_class);
 				goto error1;
+			}
 		} else {
 			if ((mad_reg_req->mgmt_class ==
 					IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
 			    (mad_reg_req->mgmt_class ==
-					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
+					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
+				dev_notice(&device->dev,
+					   "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n",
+					   mad_reg_req->mgmt_class);
 				goto error1;
+			}
 		}
 	} else {
 		/* No registration request supplied */
 		if (!send_handler)
 			goto error1;
+		if (registration_flags & IB_MAD_USER_RMPP)
+			goto error1;
 	}
 
 	/* Validate device and port */
 	port_priv = ib_get_mad_port(device, port_num);
 	if (!port_priv) {
+		dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
 		ret = ERR_PTR(-ENODEV);
 		goto error1;
 	}
@@ -280,6 +325,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 	/* Verify the QP requested is supported.  For example, Ethernet devices
 	 * will not have QP0 */
 	if (!port_priv->qp_info[qpn].qp) {
+		dev_notice(&device->dev,
+			   "ib_register_mad_agent: QP %d not supported\n", qpn);
 		ret = ERR_PTR(-EPROTONOSUPPORT);
 		goto error1;
 	}
@@ -316,6 +363,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 	mad_agent_priv->agent.context = context;
 	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
 	mad_agent_priv->agent.port_num = port_num;
+	mad_agent_priv->agent.flags = registration_flags;
 	spin_lock_init(&mad_agent_priv->lock);
 	INIT_LIST_HEAD(&mad_agent_priv->send_list);
 	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
@@ -706,7 +754,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 	     smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
 	     IB_SMI_DISCARD) {
 		ret = -EINVAL;
-		printk(KERN_ERR PFX "Invalid directed route\n");
+		dev_err(&device->dev, "Invalid directed route\n");
 		goto out;
 	}
 
@@ -718,7 +766,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 	local = kmalloc(sizeof *local, GFP_ATOMIC);
 	if (!local) {
 		ret = -ENOMEM;
-		printk(KERN_ERR PFX "No memory for ib_mad_local_private\n");
+		dev_err(&device->dev, "No memory for ib_mad_local_private\n");
 		goto out;
 	}
 	local->mad_priv = NULL;
@@ -726,7 +774,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 	mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
 	if (!mad_priv) {
 		ret = -ENOMEM;
-		printk(KERN_ERR PFX "No memory for local response MAD\n");
+		dev_err(&device->dev, "No memory for local response MAD\n");
 		kfree(local);
 		goto out;
 	}
@@ -837,9 +885,9 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
 	for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
 		seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
 		if (!seg) {
-			printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
-			       "alloc failed for len %zd, gfp %#x\n",
-			       sizeof (*seg) + seg_size, gfp_mask);
+			dev_err(&send_buf->mad_agent->device->dev,
+				"alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n",
+				sizeof (*seg) + seg_size, gfp_mask);
 			free_send_rmpp_list(send_wr);
 			return -ENOMEM;
 		}
@@ -862,6 +910,12 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
 	return 0;
 }
 
+int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent)
+{
+	return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
+}
+EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
+
 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 					    u32 remote_qpn, u16 pkey_index,
 					    int rmpp_active,
@@ -878,10 +932,12 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 	pad = get_pad_size(hdr_len, data_len);
 	message_size = hdr_len + data_len + pad;
 
-	if ((!mad_agent->rmpp_version &&
-	     (rmpp_active || message_size > sizeof(struct ib_mad))) ||
-	    (!rmpp_active && message_size > sizeof(struct ib_mad)))
-		return ERR_PTR(-EINVAL);
+	if (ib_mad_kernel_rmpp_agent(mad_agent)) {
+		if (!rmpp_active && message_size > sizeof(struct ib_mad))
+			return ERR_PTR(-EINVAL);
+	} else
+		if (rmpp_active || message_size > sizeof(struct ib_mad))
+			return ERR_PTR(-EINVAL);
 
 	size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
 	buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
@@ -1135,7 +1191,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
 			      &mad_agent_priv->send_list);
 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
-		if (mad_agent_priv->agent.rmpp_version) {
+		if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
 			ret = ib_send_rmpp_mad(mad_send_wr);
 			if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
 				ret = ib_send_mad(mad_send_wr);
@@ -1199,7 +1255,8 @@ EXPORT_SYMBOL(ib_redirect_mad_qp);
 int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
 		      struct ib_wc *wc)
 {
-	printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n");
+	dev_err(&mad_agent->device->dev,
+		"ib_process_mad_wc() not implemented yet\n");
 	return 0;
 }
 EXPORT_SYMBOL(ib_process_mad_wc);
@@ -1211,7 +1268,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method,
 
 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
 		if ((*method)->agent[i]) {
-			printk(KERN_ERR PFX "Method %d already in use\n", i);
+			pr_err("Method %d already in use\n", i);
 			return -EINVAL;
 		}
 	}
@@ -1223,8 +1280,7 @@ static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
 	/* Allocate management method table */
 	*method = kzalloc(sizeof **method, GFP_ATOMIC);
 	if (!*method) {
-		printk(KERN_ERR PFX "No memory for "
-		       "ib_mad_mgmt_method_table\n");
+		pr_err("No memory for ib_mad_mgmt_method_table\n");
 		return -ENOMEM;
 	}
 
@@ -1319,8 +1375,8 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
 		/* Allocate management class table for "new" class version */
 		*class = kzalloc(sizeof **class, GFP_ATOMIC);
 		if (!*class) {
-			printk(KERN_ERR PFX "No memory for "
-			       "ib_mad_mgmt_class_table\n");
+			dev_err(&agent_priv->agent.device->dev,
+				"No memory for ib_mad_mgmt_class_table\n");
 			ret = -ENOMEM;
 			goto error1;
 		}
@@ -1386,8 +1442,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
 		/* Allocate mgmt vendor class table for "new" class version */
 		vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
 		if (!vendor) {
-			printk(KERN_ERR PFX "No memory for "
-			       "ib_mad_mgmt_vendor_class_table\n");
+			dev_err(&agent_priv->agent.device->dev,
+				"No memory for ib_mad_mgmt_vendor_class_table\n");
 			goto error1;
 		}
 
@@ -1397,8 +1453,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
 		/* Allocate table for this management vendor class */
 		vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
 		if (!vendor_class) {
-			printk(KERN_ERR PFX "No memory for "
-			       "ib_mad_mgmt_vendor_class\n");
+			dev_err(&agent_priv->agent.device->dev,
+				"No memory for ib_mad_mgmt_vendor_class\n");
 			goto error2;
 		}
 
@@ -1429,7 +1485,7 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
 			goto check_in_use;
 		}
 	}
-	printk(KERN_ERR PFX "All OUI slots in use\n");
+	dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
 	goto error3;
 
 check_in_use:
@@ -1640,9 +1696,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
 		if (mad_agent->agent.recv_handler)
 			atomic_inc(&mad_agent->refcount);
 		else {
-			printk(KERN_NOTICE PFX "No receive handler for client "
-			       "%p on port %d\n",
-			       &mad_agent->agent, port_priv->port_num);
+			dev_notice(&port_priv->device->dev,
+				   "No receive handler for client %p on port %d\n",
+				   &mad_agent->agent, port_priv->port_num);
 			mad_agent = NULL;
 		}
 	}
@@ -1658,8 +1714,8 @@ static int validate_mad(struct ib_mad *mad, u32 qp_num)
 
 	/* Make sure MAD base version is understood */
 	if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
-		printk(KERN_ERR PFX "MAD received with unsupported base "
-		       "version %d\n", mad->mad_hdr.base_version);
+		pr_err("MAD received with unsupported base version %d\n",
+			mad->mad_hdr.base_version);
 		goto out;
 	}
 
@@ -1685,6 +1741,7 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
 
 	rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
 	return !mad_agent_priv->agent.rmpp_version ||
+		!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
 		!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
 				    IB_MGMT_RMPP_FLAG_ACTIVE) ||
 		(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
@@ -1812,7 +1869,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 
 	INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
 	list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
-	if (mad_agent_priv->agent.rmpp_version) {
+	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
 		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
 						      mad_recv_wc);
 		if (!mad_recv_wc) {
@@ -1827,23 +1884,39 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
 		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
 		if (!mad_send_wr) {
 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
-			ib_free_recv_mad(mad_recv_wc);
-			deref_mad_agent(mad_agent_priv);
-			return;
-		}
-		ib_mark_mad_done(mad_send_wr);
-		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
+			if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
+			   && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
+			   && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
+					& IB_MGMT_RMPP_FLAG_ACTIVE)) {
+				/* user rmpp is in effect
+				 * and this is an active RMPP MAD
+				 */
+				mad_recv_wc->wc->wr_id = 0;
+				mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+								   mad_recv_wc);
+				atomic_dec(&mad_agent_priv->refcount);
+			} else {
+				/* not user rmpp, revert to normal behavior and
+				 * drop the mad */
+				ib_free_recv_mad(mad_recv_wc);
+				deref_mad_agent(mad_agent_priv);
+				return;
+			}
+		} else {
+			ib_mark_mad_done(mad_send_wr);
+			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
-		/* Defined behavior is to complete response before request */
-		mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
-		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
-						   mad_recv_wc);
-		atomic_dec(&mad_agent_priv->refcount);
+			/* Defined behavior is to complete response before request */
+			mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
+			mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+							   mad_recv_wc);
+			atomic_dec(&mad_agent_priv->refcount);
 
-		mad_send_wc.status = IB_WC_SUCCESS;
-		mad_send_wc.vendor_err = 0;
-		mad_send_wc.send_buf = &mad_send_wr->send_buf;
-		ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+			mad_send_wc.status = IB_WC_SUCCESS;
+			mad_send_wc.vendor_err = 0;
+			mad_send_wc.send_buf = &mad_send_wr->send_buf;
+			ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
+		}
 	} else {
 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
 						   mad_recv_wc);
@@ -1911,8 +1984,8 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
 
 	response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
 	if (!response) {
-		printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
-		       "for response buffer\n");
+		dev_err(&port_priv->device->dev,
+			"ib_mad_recv_done_handler no memory for response buffer\n");
 		goto out;
 	}
 
@@ -2083,7 +2156,7 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
 
 	mad_agent_priv = mad_send_wr->mad_agent_priv;
 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
-	if (mad_agent_priv->agent.rmpp_version) {
+	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
 		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
 		if (ret == IB_RMPP_RESULT_CONSUMED)
 			goto done;
@@ -2176,7 +2249,8 @@ retry:
 		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
 				   &bad_send_wr);
 		if (ret) {
-			printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
+			dev_err(&port_priv->device->dev,
+				"ib_post_send failed: %d\n", ret);
 			mad_send_wr = queued_send_wr;
 			wc->status = IB_WC_LOC_QP_OP_ERR;
 			goto retry;
@@ -2248,8 +2322,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
 					   IB_QP_STATE | IB_QP_CUR_STATE);
 			kfree(attr);
 			if (ret)
-				printk(KERN_ERR PFX "mad_error_handler - "
-				       "ib_modify_qp to RTS : %d\n", ret);
+				dev_err(&port_priv->device->dev,
+					"mad_error_handler - ib_modify_qp to RTS : %d\n",
+					ret);
 			else
 				mark_sends_for_retry(qp_info);
 		}
@@ -2408,7 +2483,8 @@ static void local_completions(struct work_struct *work)
 		if (local->mad_priv) {
 			recv_mad_agent = local->recv_mad_agent;
 			if (!recv_mad_agent) {
-				printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
+				dev_err(&mad_agent_priv->agent.device->dev,
+					"No receive MAD agent for local completion\n");
 				free_mad = 1;
 				goto local_send_completion;
 			}
@@ -2476,7 +2552,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
 
 	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
 
-	if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
+	if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
 		ret = ib_retry_rmpp(mad_send_wr);
 		switch (ret) {
 		case IB_RMPP_RESULT_UNHANDLED:
@@ -2589,7 +2665,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
 		} else {
 			mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
 			if (!mad_priv) {
-				printk(KERN_ERR PFX "No memory for receive buffer\n");
+				dev_err(&qp_info->port_priv->device->dev,
+					"No memory for receive buffer\n");
 				ret = -ENOMEM;
 				break;
 			}
@@ -2625,7 +2702,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
 					      sizeof mad_priv->header,
 					    DMA_FROM_DEVICE);
 			kmem_cache_free(ib_mad_cache, mad_priv);
-			printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
+			dev_err(&qp_info->port_priv->device->dev,
+				"ib_post_recv failed: %d\n", ret);
 			break;
 		}
 	} while (post);
@@ -2681,7 +2759,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
 
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
 	if (!attr) {
-		printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
+		dev_err(&port_priv->device->dev,
+			"Couldn't kmalloc ib_qp_attr\n");
 		return -ENOMEM;
 	}
 
@@ -2705,16 +2784,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
 		ret = ib_modify_qp(qp, attr, IB_QP_STATE |
 					     IB_QP_PKEY_INDEX | IB_QP_QKEY);
 		if (ret) {
-			printk(KERN_ERR PFX "Couldn't change QP%d state to "
-			       "INIT: %d\n", i, ret);
+			dev_err(&port_priv->device->dev,
+				"Couldn't change QP%d state to INIT: %d\n",
+				i, ret);
 			goto out;
 		}
 
 		attr->qp_state = IB_QPS_RTR;
 		ret = ib_modify_qp(qp, attr, IB_QP_STATE);
 		if (ret) {
-			printk(KERN_ERR PFX "Couldn't change QP%d state to "
-			       "RTR: %d\n", i, ret);
+			dev_err(&port_priv->device->dev,
+				"Couldn't change QP%d state to RTR: %d\n",
+				i, ret);
 			goto out;
 		}
 
@@ -2722,16 +2803,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
 		attr->sq_psn = IB_MAD_SEND_Q_PSN;
 		ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
 		if (ret) {
-			printk(KERN_ERR PFX "Couldn't change QP%d state to "
-			       "RTS: %d\n", i, ret);
+			dev_err(&port_priv->device->dev,
+				"Couldn't change QP%d state to RTS: %d\n",
+				i, ret);
 			goto out;
 		}
 	}
 
 	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
 	if (ret) {
-		printk(KERN_ERR PFX "Failed to request completion "
-		       "notification: %d\n", ret);
+		dev_err(&port_priv->device->dev,
+			"Failed to request completion notification: %d\n",
+			ret);
 		goto out;
 	}
 
@@ -2741,7 +2824,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
 
 		ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
 		if (ret) {
-			printk(KERN_ERR PFX "Couldn't post receive WRs\n");
+			dev_err(&port_priv->device->dev,
+				"Couldn't post receive WRs\n");
 			goto out;
 		}
 	}
@@ -2755,7 +2839,8 @@ static void qp_event_handler(struct ib_event *event, void *qp_context)
 	struct ib_mad_qp_info	*qp_info = qp_context;
 
 	/* It's worse than that! He's dead, Jim! */
-	printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n",
+	dev_err(&qp_info->port_priv->device->dev,
+		"Fatal error (%d) on MAD QP (%d)\n",
 		event->event, qp_info->qp->qp_num);
 }
 
@@ -2801,8 +2886,9 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info,
 	qp_init_attr.event_handler = qp_event_handler;
 	qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
 	if (IS_ERR(qp_info->qp)) {
-		printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
-		       get_spl_qp_index(qp_type));
+		dev_err(&qp_info->port_priv->device->dev,
+			"Couldn't create ib_mad QP%d\n",
+			get_spl_qp_index(qp_type));
 		ret = PTR_ERR(qp_info->qp);
 		goto error;
 	}
@@ -2840,7 +2926,7 @@ static int ib_mad_port_open(struct ib_device *device,
 	/* Create new device info */
 	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
 	if (!port_priv) {
-		printk(KERN_ERR PFX "No memory for ib_mad_port_private\n");
+		dev_err(&device->dev, "No memory for ib_mad_port_private\n");
 		return -ENOMEM;
 	}
 
@@ -2860,21 +2946,21 @@ static int ib_mad_port_open(struct ib_device *device,
 				     ib_mad_thread_completion_handler,
 				     NULL, port_priv, cq_size, 0);
 	if (IS_ERR(port_priv->cq)) {
-		printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
+		dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
 		ret = PTR_ERR(port_priv->cq);
 		goto error3;
 	}
 
 	port_priv->pd = ib_alloc_pd(device);
 	if (IS_ERR(port_priv->pd)) {
-		printk(KERN_ERR PFX "Couldn't create ib_mad PD\n");
+		dev_err(&device->dev, "Couldn't create ib_mad PD\n");
 		ret = PTR_ERR(port_priv->pd);
 		goto error4;
 	}
 
 	port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(port_priv->mr)) {
-		printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n");
+		dev_err(&device->dev, "Couldn't get ib_mad DMA MR\n");
 		ret = PTR_ERR(port_priv->mr);
 		goto error5;
 	}
@@ -2902,7 +2988,7 @@ static int ib_mad_port_open(struct ib_device *device,
 
 	ret = ib_mad_port_start(port_priv);
 	if (ret) {
-		printk(KERN_ERR PFX "Couldn't start port\n");
+		dev_err(&device->dev, "Couldn't start port\n");
 		goto error9;
 	}
 
@@ -2946,7 +3032,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
 	port_priv = __ib_get_mad_port(device, port_num);
 	if (port_priv == NULL) {
 		spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
-		printk(KERN_ERR PFX "Port %d not found\n", port_num);
+		dev_err(&device->dev, "Port %d not found\n", port_num);
 		return -ENODEV;
 	}
 	list_del_init(&port_priv->port_list);
@@ -2984,14 +3070,12 @@ static void ib_mad_init_device(struct ib_device *device)
 
 	for (i = start; i <= end; i++) {
 		if (ib_mad_port_open(device, i)) {
-			printk(KERN_ERR PFX "Couldn't open %s port %d\n",
-			       device->name, i);
+			dev_err(&device->dev, "Couldn't open port %d\n", i);
 			goto error;
 		}
 		if (ib_agent_port_open(device, i)) {
-			printk(KERN_ERR PFX "Couldn't open %s port %d "
-			       "for agents\n",
-			       device->name, i);
+			dev_err(&device->dev,
+				"Couldn't open port %d for agents\n", i);
 			goto error_agent;
 		}
 	}
@@ -2999,20 +3083,17 @@ static void ib_mad_init_device(struct ib_device *device)
 
 error_agent:
 	if (ib_mad_port_close(device, i))
-		printk(KERN_ERR PFX "Couldn't close %s port %d\n",
-		       device->name, i);
+		dev_err(&device->dev, "Couldn't close port %d\n", i);
 
 error:
 	i--;
 
 	while (i >= start) {
 		if (ib_agent_port_close(device, i))
-			printk(KERN_ERR PFX "Couldn't close %s port %d "
-			       "for agents\n",
-			       device->name, i);
+			dev_err(&device->dev,
+				"Couldn't close port %d for agents\n", i);
 		if (ib_mad_port_close(device, i))
-			printk(KERN_ERR PFX "Couldn't close %s port %d\n",
-			       device->name, i);
+			dev_err(&device->dev, "Couldn't close port %d\n", i);
 		i--;
 	}
 }
@@ -3033,12 +3114,12 @@ static void ib_mad_remove_device(struct ib_device *device)
 	}
 	for (i = 0; i < num_ports; i++, cur_port++) {
 		if (ib_agent_port_close(device, cur_port))
-			printk(KERN_ERR PFX "Couldn't close %s port %d "
-			       "for agents\n",
-			       device->name, cur_port);
+			dev_err(&device->dev,
+				"Couldn't close port %d for agents\n",
+				cur_port);
 		if (ib_mad_port_close(device, cur_port))
-			printk(KERN_ERR PFX "Couldn't close %s port %d\n",
-			       device->name, cur_port);
+			dev_err(&device->dev, "Couldn't close port %d\n",
+				cur_port);
 	}
 }
 
@@ -3064,7 +3145,7 @@ static int __init ib_mad_init_module(void)
 					 SLAB_HWCACHE_ALIGN,
 					 NULL);
 	if (!ib_mad_cache) {
-		printk(KERN_ERR PFX "Couldn't create ib_mad cache\n");
+		pr_err("Couldn't create ib_mad cache\n");
 		ret = -ENOMEM;
 		goto error1;
 	}
@@ -3072,7 +3153,7 @@ static int __init ib_mad_init_module(void)
 	INIT_LIST_HEAD(&ib_mad_port_list);
 
 	if (ib_register_client(&mad_client)) {
-		printk(KERN_ERR PFX "Couldn't register ib_mad client\n");
+		pr_err("Couldn't register ib_mad client\n");
 		ret = -EINVAL;
 		goto error2;
 	}
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 9430ab4..d1a0b0e 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -42,9 +42,6 @@
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
 
-
-#define PFX "ib_mad: "
-
 #define IB_MAD_QPS_CORE		2 /* Always QP0 and QP1 as a minimum */
 
 /* QP and CQ parameters */
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 233eaf5..c38f030 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1184,7 +1184,7 @@ static void ib_sa_add_one(struct ib_device *device)
 		sa_dev->port[i].agent =
 			ib_register_mad_agent(device, i + s, IB_QPT_GSI,
 					      NULL, 0, send_handler,
-					      recv_handler, sa_dev);
+					      recv_handler, sa_dev, 0);
 		if (IS_ERR(sa_dev->port[i].agent))
 			goto err;
 
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 1acb991..928cdd2 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -33,6 +33,8 @@
  * SOFTWARE.
  */
 
+#define pr_fmt(fmt) "user_mad: " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/device.h>
@@ -504,13 +506,15 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 
 	rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
 	hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
-	if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) {
-		copy_offset = IB_MGMT_MAD_HDR;
-		rmpp_active = 0;
-	} else {
+
+	if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+	    && ib_mad_kernel_rmpp_agent(agent)) {
 		copy_offset = IB_MGMT_RMPP_HDR;
 		rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
-			      IB_MGMT_RMPP_FLAG_ACTIVE;
+						IB_MGMT_RMPP_FLAG_ACTIVE;
+	} else {
+		copy_offset = IB_MGMT_MAD_HDR;
+		rmpp_active = 0;
 	}
 
 	data_len = count - hdr_size(file) - hdr_len;
@@ -556,14 +560,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
 		rmpp_mad->mad_hdr.tid = *tid;
 	}
 
-	spin_lock_irq(&file->send_lock);
-	ret = is_duplicate(file, packet);
-	if (!ret)
+	if (!ib_mad_kernel_rmpp_agent(agent)
+	   && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
+	   && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
+		spin_lock_irq(&file->send_lock);
 		list_add_tail(&packet->list, &file->send_list);
-	spin_unlock_irq(&file->send_lock);
-	if (ret) {
-		ret = -EINVAL;
-		goto err_msg;
+		spin_unlock_irq(&file->send_lock);
+	} else {
+		spin_lock_irq(&file->send_lock);
+		ret = is_duplicate(file, packet);
+		if (!ret)
+			list_add_tail(&packet->list, &file->send_list);
+		spin_unlock_irq(&file->send_lock);
+		if (ret) {
+			ret = -EINVAL;
+			goto err_msg;
+		}
 	}
 
 	ret = ib_post_send_mad(packet->msg, NULL);
@@ -614,6 +626,8 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
 	mutex_lock(&file->mutex);
 
 	if (!file->port->ib_dev) {
+		dev_notice(file->port->dev,
+			   "ib_umad_reg_agent: invalid device\n");
 		ret = -EPIPE;
 		goto out;
 	}
@@ -624,6 +638,9 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
 	}
 
 	if (ureq.qpn != 0 && ureq.qpn != 1) {
+		dev_notice(file->port->dev,
+			   "ib_umad_reg_agent: invalid QPN %d specified\n",
+			   ureq.qpn);
 		ret = -EINVAL;
 		goto out;
 	}
@@ -632,11 +649,15 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
 		if (!__get_agent(file, agent_id))
 			goto found;
 
+	dev_notice(file->port->dev,
+		   "ib_umad_reg_agent: Max Agents (%u) reached\n",
+		   IB_UMAD_MAX_AGENTS);
 	ret = -ENOMEM;
 	goto out;
 
 found:
 	if (ureq.mgmt_class) {
+		memset(&req, 0, sizeof(req));
 		req.mgmt_class         = ureq.mgmt_class;
 		req.mgmt_class_version = ureq.mgmt_class_version;
 		memcpy(req.oui, ureq.oui, sizeof req.oui);
@@ -657,7 +678,7 @@ found:
 				      ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
 				      ureq.mgmt_class ? &req : NULL,
 				      ureq.rmpp_version,
-				      send_handler, recv_handler, file);
+				      send_handler, recv_handler, file, 0);
 	if (IS_ERR(agent)) {
 		ret = PTR_ERR(agent);
 		agent = NULL;
@@ -673,10 +694,11 @@ found:
 	if (!file->already_used) {
 		file->already_used = 1;
 		if (!file->use_pkey_index) {
-			printk(KERN_WARNING "user_mad: process %s did not enable "
-			       "P_Key index support.\n", current->comm);
-			printk(KERN_WARNING "user_mad:   Documentation/infiniband/user_mad.txt "
-			       "has info on the new ABI.\n");
+			dev_warn(file->port->dev,
+				"process %s did not enable P_Key index support.\n",
+				current->comm);
+			dev_warn(file->port->dev,
+				"   Documentation/infiniband/user_mad.txt has info on the new ABI.\n");
 		}
 	}
 
@@ -694,6 +716,119 @@ out:
 	return ret;
 }
 
+static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
+{
+	struct ib_user_mad_reg_req2 ureq;
+	struct ib_mad_reg_req req;
+	struct ib_mad_agent *agent = NULL;
+	int agent_id;
+	int ret;
+
+	mutex_lock(&file->port->file_mutex);
+	mutex_lock(&file->mutex);
+
+	if (!file->port->ib_dev) {
+		dev_notice(file->port->dev,
+			   "ib_umad_reg_agent2: invalid device\n");
+		ret = -EPIPE;
+		goto out;
+	}
+
+	if (copy_from_user(&ureq, arg, sizeof(ureq))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	if (ureq.qpn != 0 && ureq.qpn != 1) {
+		dev_notice(file->port->dev,
+			   "ib_umad_reg_agent2: invalid QPN %d specified\n",
+			   ureq.qpn);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) {
+		dev_notice(file->port->dev,
+			   "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n",
+			   ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
+		ret = -EINVAL;
+
+		if (put_user((u32)IB_USER_MAD_REG_FLAGS_CAP,
+				(u32 __user *) (arg + offsetof(struct
+				ib_user_mad_reg_req2, flags))))
+			ret = -EFAULT;
+
+		goto out;
+	}
+
+	for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
+		if (!__get_agent(file, agent_id))
+			goto found;
+
+	dev_notice(file->port->dev,
+		   "ib_umad_reg_agent2: Max Agents (%u) reached\n",
+		   IB_UMAD_MAX_AGENTS);
+	ret = -ENOMEM;
+	goto out;
+
+found:
+	if (ureq.mgmt_class) {
+		memset(&req, 0, sizeof(req));
+		req.mgmt_class         = ureq.mgmt_class;
+		req.mgmt_class_version = ureq.mgmt_class_version;
+		if (ureq.oui & 0xff000000) {
+			dev_notice(file->port->dev,
+				   "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n",
+				   ureq.oui);
+			ret = -EINVAL;
+			goto out;
+		}
+		req.oui[2] =  ureq.oui & 0x0000ff;
+		req.oui[1] = (ureq.oui & 0x00ff00) >> 8;
+		req.oui[0] = (ureq.oui & 0xff0000) >> 16;
+		memcpy(req.method_mask, ureq.method_mask,
+			sizeof(req.method_mask));
+	}
+
+	agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
+				      ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
+				      ureq.mgmt_class ? &req : NULL,
+				      ureq.rmpp_version,
+				      send_handler, recv_handler, file,
+				      ureq.flags);
+	if (IS_ERR(agent)) {
+		ret = PTR_ERR(agent);
+		agent = NULL;
+		goto out;
+	}
+
+	if (put_user(agent_id,
+		     (u32 __user *)(arg +
+				offsetof(struct ib_user_mad_reg_req2, id)))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	if (!file->already_used) {
+		file->already_used = 1;
+		file->use_pkey_index = 1;
+	}
+
+	file->agent[agent_id] = agent;
+	ret = 0;
+
+out:
+	mutex_unlock(&file->mutex);
+
+	if (ret && agent)
+		ib_unregister_mad_agent(agent);
+
+	mutex_unlock(&file->port->file_mutex);
+
+	return ret;
+}
+
+
 static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
 {
 	struct ib_mad_agent *agent = NULL;
@@ -749,6 +884,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
 		return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg);
 	case IB_USER_MAD_ENABLE_PKEY:
 		return ib_umad_enable_pkey(filp->private_data);
+	case IB_USER_MAD_REGISTER_AGENT2:
+		return ib_umad_reg_agent2(filp->private_data, (void __user *) arg);
 	default:
 		return -ENOIOCTLCMD;
 	}
@@ -765,6 +902,8 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
 		return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg));
 	case IB_USER_MAD_ENABLE_PKEY:
 		return ib_umad_enable_pkey(filp->private_data);
+	case IB_USER_MAD_REGISTER_AGENT2:
+		return ib_umad_reg_agent2(filp->private_data, compat_ptr(arg));
 	default:
 		return -ENOIOCTLCMD;
 	}
@@ -983,7 +1122,7 @@ static CLASS_ATTR_STRING(abi_version, S_IRUGO,
 
 static dev_t overflow_maj;
 static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
-static int find_overflow_devnum(void)
+static int find_overflow_devnum(struct ib_device *device)
 {
 	int ret;
 
@@ -991,7 +1130,8 @@ static int find_overflow_devnum(void)
 		ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
 					  "infiniband_mad");
 		if (ret) {
-			printk(KERN_ERR "user_mad: couldn't register dynamic device number\n");
+			dev_err(&device->dev,
+				"couldn't register dynamic device number\n");
 			return ret;
 		}
 	}
@@ -1014,7 +1154,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
 	devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
 	if (devnum >= IB_UMAD_MAX_PORTS) {
 		spin_unlock(&port_lock);
-		devnum = find_overflow_devnum();
+		devnum = find_overflow_devnum(device);
 		if (devnum < 0)
 			return -1;
 
@@ -1200,14 +1340,14 @@ static int __init ib_umad_init(void)
 	ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
 				     "infiniband_mad");
 	if (ret) {
-		printk(KERN_ERR "user_mad: couldn't register device number\n");
+		pr_err("couldn't register device number\n");
 		goto out;
 	}
 
 	umad_class = class_create(THIS_MODULE, "infiniband_mad");
 	if (IS_ERR(umad_class)) {
 		ret = PTR_ERR(umad_class);
-		printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n");
+		pr_err("couldn't create class infiniband_mad\n");
 		goto out_chrdev;
 	}
 
@@ -1215,13 +1355,13 @@ static int __init ib_umad_init(void)
 
 	ret = class_create_file(umad_class, &class_attr_abi_version.attr);
 	if (ret) {
-		printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n");
+		pr_err("couldn't create abi_version attribute\n");
 		goto out_class;
 	}
 
 	ret = ib_register_client(&umad_client);
 	if (ret) {
-		printk(KERN_ERR "user_mad: couldn't register ib_umad client\n");
+		pr_err("couldn't register ib_umad client\n");
 		goto out_class;
 	}
 
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index a283274..643c08a 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -221,6 +221,7 @@ IB_UVERBS_DECLARE_CMD(query_port);
 IB_UVERBS_DECLARE_CMD(alloc_pd);
 IB_UVERBS_DECLARE_CMD(dealloc_pd);
 IB_UVERBS_DECLARE_CMD(reg_mr);
+IB_UVERBS_DECLARE_CMD(rereg_mr);
 IB_UVERBS_DECLARE_CMD(dereg_mr);
 IB_UVERBS_DECLARE_CMD(alloc_mw);
 IB_UVERBS_DECLARE_CMD(dealloc_mw);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index ea6203e..0600c50 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1002,6 +1002,99 @@ err_free:
 	return ret;
 }
 
+ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
+			   const char __user *buf, int in_len,
+			   int out_len)
+{
+	struct ib_uverbs_rereg_mr      cmd;
+	struct ib_uverbs_rereg_mr_resp resp;
+	struct ib_udata              udata;
+	struct ib_pd                *pd = NULL;
+	struct ib_mr                *mr;
+	struct ib_pd		    *old_pd;
+	int                          ret;
+	struct ib_uobject	    *uobj;
+
+	if (out_len < sizeof(resp))
+		return -ENOSPC;
+
+	if (copy_from_user(&cmd, buf, sizeof(cmd)))
+		return -EFAULT;
+
+	INIT_UDATA(&udata, buf + sizeof(cmd),
+		   (unsigned long) cmd.response + sizeof(resp),
+		   in_len - sizeof(cmd), out_len - sizeof(resp));
+
+	if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
+		return -EINVAL;
+
+	if ((cmd.flags & IB_MR_REREG_TRANS) &&
+	    (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
+	     (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
+			return -EINVAL;
+
+	uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
+			      file->ucontext);
+
+	if (!uobj)
+		return -EINVAL;
+
+	mr = uobj->object;
+
+	if (cmd.flags & IB_MR_REREG_ACCESS) {
+		ret = ib_check_mr_access(cmd.access_flags);
+		if (ret)
+			goto put_uobjs;
+	}
+
+	if (cmd.flags & IB_MR_REREG_PD) {
+		pd = idr_read_pd(cmd.pd_handle, file->ucontext);
+		if (!pd) {
+			ret = -EINVAL;
+			goto put_uobjs;
+		}
+	}
+
+	if (atomic_read(&mr->usecnt)) {
+		ret = -EBUSY;
+		goto put_uobj_pd;
+	}
+
+	old_pd = mr->pd;
+	ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
+					cmd.length, cmd.hca_va,
+					cmd.access_flags, pd, &udata);
+	if (!ret) {
+		if (cmd.flags & IB_MR_REREG_PD) {
+			atomic_inc(&pd->usecnt);
+			mr->pd = pd;
+			atomic_dec(&old_pd->usecnt);
+		}
+	} else {
+		goto put_uobj_pd;
+	}
+
+	memset(&resp, 0, sizeof(resp));
+	resp.lkey      = mr->lkey;
+	resp.rkey      = mr->rkey;
+
+	if (copy_to_user((void __user *)(unsigned long)cmd.response,
+			 &resp, sizeof(resp)))
+		ret = -EFAULT;
+	else
+		ret = in_len;
+
+put_uobj_pd:
+	if (cmd.flags & IB_MR_REREG_PD)
+		put_pd_read(pd);
+
+put_uobjs:
+
+	put_uobj_write(mr->uobject);
+
+	return ret;
+}
+
 ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
 			   const char __user *buf, int in_len,
 			   int out_len)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 08219fb..c73b22a 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -87,6 +87,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
 	[IB_USER_VERBS_CMD_ALLOC_PD]		= ib_uverbs_alloc_pd,
 	[IB_USER_VERBS_CMD_DEALLOC_PD]		= ib_uverbs_dealloc_pd,
 	[IB_USER_VERBS_CMD_REG_MR]		= ib_uverbs_reg_mr,
+	[IB_USER_VERBS_CMD_REREG_MR]		= ib_uverbs_rereg_mr,
 	[IB_USER_VERBS_CMD_DEREG_MR]		= ib_uverbs_dereg_mr,
 	[IB_USER_VERBS_CMD_ALLOC_MW]		= ib_uverbs_alloc_mw,
 	[IB_USER_VERBS_CMD_DEALLOC_MW]		= ib_uverbs_dealloc_mw,
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
index 49e0e85..1b63185 100644
--- a/drivers/infiniband/hw/amso1100/c2_cq.c
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -260,11 +260,14 @@ static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
 			  mq->msg_pool.host, dma_unmap_addr(mq, mapping));
 }
 
-static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size,
-			   int msg_size)
+static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq,
+			   size_t q_size, size_t msg_size)
 {
 	u8 *pool_start;
 
+	if (q_size > SIZE_MAX / msg_size)
+		return -EINVAL;
+
 	pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
 					&mq->host_dma, GFP_KERNEL);
 	if (!pool_start)
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index fbe6051..c9df054 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -227,6 +227,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
 
 	chp = get_chp(dev, qid);
 	if (chp) {
+		t4_clear_cq_armed(&chp->cq);
 		spin_lock_irqsave(&chp->comp_handler_lock, flag);
 		(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
 		spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index c158fcc..41cd688 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1105,7 +1105,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
 		       struct c4iw_cq *schp)
 {
 	int count;
-	int flushed;
+	int rq_flushed, sq_flushed;
 	unsigned long flag;
 
 	PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
@@ -1123,27 +1123,40 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
 
 	c4iw_flush_hw_cq(rchp);
 	c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
-	flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+	rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
 	spin_unlock(&qhp->lock);
 	spin_unlock_irqrestore(&rchp->lock, flag);
-	if (flushed) {
-		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
-		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
-		spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
-	}
 
 	/* locking hierarchy: cq lock first, then qp lock. */
 	spin_lock_irqsave(&schp->lock, flag);
 	spin_lock(&qhp->lock);
 	if (schp != rchp)
 		c4iw_flush_hw_cq(schp);
-	flushed = c4iw_flush_sq(qhp);
+	sq_flushed = c4iw_flush_sq(qhp);
 	spin_unlock(&qhp->lock);
 	spin_unlock_irqrestore(&schp->lock, flag);
-	if (flushed) {
-		spin_lock_irqsave(&schp->comp_handler_lock, flag);
-		(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
-		spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+
+	if (schp == rchp) {
+		if (t4_clear_cq_armed(&rchp->cq) &&
+		    (rq_flushed || sq_flushed)) {
+			spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+			(*rchp->ibcq.comp_handler)(&rchp->ibcq,
+						   rchp->ibcq.cq_context);
+			spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+		}
+	} else {
+		if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) {
+			spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+			(*rchp->ibcq.comp_handler)(&rchp->ibcq,
+						   rchp->ibcq.cq_context);
+			spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+		}
+		if (t4_clear_cq_armed(&schp->cq) && sq_flushed) {
+			spin_lock_irqsave(&schp->comp_handler_lock, flag);
+			(*schp->ibcq.comp_handler)(&schp->ibcq,
+						   schp->ibcq.cq_context);
+			spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+		}
 	}
 }
 
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index df5edfa..c04e513 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -524,6 +524,10 @@ static inline int t4_wq_db_enabled(struct t4_wq *wq)
 	return !wq->rq.queue[wq->rq.size].status.db_off;
 }
 
+enum t4_cq_flags {
+	CQ_ARMED	= 1,
+};
+
 struct t4_cq {
 	struct t4_cqe *queue;
 	dma_addr_t dma_addr;
@@ -544,12 +548,19 @@ struct t4_cq {
 	u16 cidx_inc;
 	u8 gen;
 	u8 error;
+	unsigned long flags;
 };
 
+static inline int t4_clear_cq_armed(struct t4_cq *cq)
+{
+	return test_and_clear_bit(CQ_ARMED, &cq->flags);
+}
+
 static inline int t4_arm_cq(struct t4_cq *cq, int se)
 {
 	u32 val;
 
+	set_bit(CQ_ARMED, &cq->flags);
 	while (cq->cidx_inc > CIDXINC_MASK) {
 		val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) |
 		      INGRESSQID(cq->cqid);
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 43f2d04..e890e5b 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -726,7 +726,7 @@ bail:
  * @dd: the infinipath device
  * @pkeys: the PKEY table
  */
-static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
+static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port)
 {
 	struct ipath_portdata *pd;
 	int i;
@@ -759,6 +759,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
 	}
 	if (changed) {
 		u64 pkey;
+		struct ib_event event;
 
 		pkey = (u64) dd->ipath_pkeys[0] |
 			((u64) dd->ipath_pkeys[1] << 16) |
@@ -768,12 +769,17 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
 			   (unsigned long long) pkey);
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
 				 pkey);
+
+		event.event = IB_EVENT_PKEY_CHANGE;
+		event.device = &dd->verbs_dev->ibdev;
+		event.element.port_num = port;
+		ib_dispatch_event(&event);
 	}
 	return 0;
 }
 
 static int recv_subn_set_pkeytable(struct ib_smp *smp,
-				   struct ib_device *ibdev)
+				   struct ib_device *ibdev, u8 port)
 {
 	u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
 	__be16 *p = (__be16 *) smp->data;
@@ -784,7 +790,7 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
 	for (i = 0; i < n; i++)
 		q[i] = be16_to_cpu(p[i]);
 
-	if (startpx != 0 || set_pkeys(dev->dd, q) != 0)
+	if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0)
 		smp->status |= IB_SMP_INVALID_FIELD;
 
 	return recv_subn_get_pkeytable(smp, ibdev);
@@ -1342,7 +1348,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
 			ret = recv_subn_set_portinfo(smp, ibdev, port_num);
 			goto bail;
 		case IB_SMP_ATTR_PKEY_TABLE:
-			ret = recv_subn_set_pkeytable(smp, ibdev);
+			ret = recv_subn_set_pkeytable(smp, ibdev, port_num);
 			goto bail;
 		case IB_SMP_ATTR_SM_INFO:
 			if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 287ad05..82a7dd8 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -891,7 +891,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
 				agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
 							      q ? IB_QPT_GSI : IB_QPT_SMI,
 							      NULL, 0, send_handler,
-							      NULL, NULL);
+							      NULL, NULL, 0);
 				if (IS_ERR(agent)) {
 					ret = PTR_ERR(agent);
 					goto err;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0f7027e..e1e558a 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -910,8 +910,7 @@ static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
 	const struct default_rules *pdefault_rules = default_table;
 	u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
 
-	for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
-	     pdefault_rules++) {
+	for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
 		__u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
 		memset(&field_types, 0, sizeof(field_types));
 
@@ -965,8 +964,7 @@ static int __mlx4_ib_create_default_rules(
 	int size = 0;
 	int i;
 
-	for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
-			sizeof(pdefault_rules->rules_create_list[0]); i++) {
+	for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
 		int ret;
 		union ib_flow_spec ib_spec;
 		switch (pdefault_rules->rules_create_list[i]) {
@@ -2007,6 +2005,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
 		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
 		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_REREG_MR)		|
 		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
 		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
 		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
@@ -2059,6 +2058,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->ib_dev.req_notify_cq	= mlx4_ib_arm_cq;
 	ibdev->ib_dev.get_dma_mr	= mlx4_ib_get_dma_mr;
 	ibdev->ib_dev.reg_user_mr	= mlx4_ib_reg_user_mr;
+	ibdev->ib_dev.rereg_user_mr	= mlx4_ib_rereg_user_mr;
 	ibdev->ib_dev.dereg_mr		= mlx4_ib_dereg_mr;
 	ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
 	ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 369da3c..e8cad39 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -788,5 +788,9 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
 void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
 int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
 			 int is_attach);
+int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
+			  u64 start, u64 length, u64 virt_addr,
+			  int mr_access_flags, struct ib_pd *pd,
+			  struct ib_udata *udata);
 
 #endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index cb2a872..9b0e80e 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -144,8 +144,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
+	/* Force registering the memory as writable. */
+	/* Used for memory re-registeration. HCA protects the access */
 	mr->umem = ib_umem_get(pd->uobject->context, start, length,
-			       access_flags, 0);
+			       access_flags | IB_ACCESS_LOCAL_WRITE, 0);
 	if (IS_ERR(mr->umem)) {
 		err = PTR_ERR(mr->umem);
 		goto err_free;
@@ -183,6 +185,90 @@ err_free:
 	return ERR_PTR(err);
 }
 
+int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
+			  u64 start, u64 length, u64 virt_addr,
+			  int mr_access_flags, struct ib_pd *pd,
+			  struct ib_udata *udata)
+{
+	struct mlx4_ib_dev *dev = to_mdev(mr->device);
+	struct mlx4_ib_mr *mmr = to_mmr(mr);
+	struct mlx4_mpt_entry *mpt_entry;
+	struct mlx4_mpt_entry **pmpt_entry = &mpt_entry;
+	int err;
+
+	/* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs,
+	 * we assume that the calls can't run concurrently. Otherwise, a
+	 * race exists.
+	 */
+	err =  mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
+
+	if (err)
+		return err;
+
+	if (flags & IB_MR_REREG_PD) {
+		err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
+					   to_mpd(pd)->pdn);
+
+		if (err)
+			goto release_mpt_entry;
+	}
+
+	if (flags & IB_MR_REREG_ACCESS) {
+		err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
+					       convert_access(mr_access_flags));
+
+		if (err)
+			goto release_mpt_entry;
+	}
+
+	if (flags & IB_MR_REREG_TRANS) {
+		int shift;
+		int err;
+		int n;
+
+		mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
+		ib_umem_release(mmr->umem);
+		mmr->umem = ib_umem_get(mr->uobject->context, start, length,
+					mr_access_flags |
+					IB_ACCESS_LOCAL_WRITE,
+					0);
+		if (IS_ERR(mmr->umem)) {
+			err = PTR_ERR(mmr->umem);
+			mmr->umem = NULL;
+			goto release_mpt_entry;
+		}
+		n = ib_umem_page_count(mmr->umem);
+		shift = ilog2(mmr->umem->page_size);
+
+		mmr->mmr.iova       = virt_addr;
+		mmr->mmr.size       = length;
+		err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
+					      virt_addr, length, n, shift,
+					      *pmpt_entry);
+		if (err) {
+			ib_umem_release(mmr->umem);
+			goto release_mpt_entry;
+		}
+
+		err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem);
+		if (err) {
+			mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
+			ib_umem_release(mmr->umem);
+			goto release_mpt_entry;
+		}
+	}
+
+	/* If we couldn't transfer the MR to the HCA, just remember to
+	 * return a failure. But dereg_mr will free the resources.
+	 */
+	err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry);
+
+release_mpt_entry:
+	mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
+
+	return err;
+}
+
 int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
 {
 	struct mlx4_ib_mr *mr = to_mmr(ibmr);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7efe6e3..8c574b6 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2501,7 +2501,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	spin_lock_irqsave(&qp->sq.lock, flags);
 
 	for (nreq = 0; wr; nreq++, wr = wr->next) {
-		if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) {
+		if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
 			mlx5_ib_warn(dev, "\n");
 			err = -EINVAL;
 			*bad_wr = wr;
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index b6f7f45..8881fa3 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -294,7 +294,7 @@ int mthca_create_agents(struct mthca_dev *dev)
 			agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
 						      q ? IB_QPT_GSI : IB_QPT_SMI,
 						      NULL, 0, send_handler,
-						      NULL, NULL);
+						      NULL, NULL, 0);
 			if (IS_ERR(agent)) {
 				ret = PTR_ERR(agent);
 				goto err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 19011db..b43456a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -40,7 +40,7 @@
 #include <be_roce.h>
 #include "ocrdma_sli.h"
 
-#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u"
+#define OCRDMA_ROCE_DRV_VERSION "10.2.287.0u"
 
 #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
 #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
@@ -137,6 +137,7 @@ struct mqe_ctx {
 	u16 cqe_status;
 	u16 ext_status;
 	bool cmd_done;
+	bool fw_error_state;
 };
 
 struct ocrdma_hw_mr {
@@ -235,7 +236,10 @@ struct ocrdma_dev {
 	struct list_head entry;
 	struct rcu_head rcu;
 	int id;
-	u64 stag_arr[OCRDMA_MAX_STAG];
+	u64 *stag_arr;
+	u8 sl; /* service level */
+	bool pfc_state;
+	atomic_t update_sl;
 	u16 pvid;
 	u32 asic_id;
 
@@ -518,4 +522,22 @@ static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev)
 				OCRDMA_SLI_ASIC_GEN_NUM_SHIFT;
 }
 
+static inline u8 ocrdma_get_pfc_prio(u8 *pfc, u8 prio)
+{
+	return *(pfc + prio);
+}
+
+static inline u8 ocrdma_get_app_prio(u8 *app_prio, u8 prio)
+{
+	return *(app_prio + prio);
+}
+
+static inline u8 ocrdma_is_enabled_and_synced(u32 state)
+{	/* May also be used to interpret TC-state, QCN-state
+	 * Appl-state and Logical-link-state in future.
+	 */
+	return (state & OCRDMA_STATE_FLAG_ENABLED) &&
+		(state & OCRDMA_STATE_FLAG_SYNC);
+}
+
 #endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index d4cc01f..40f8536 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -35,6 +35,8 @@
 #include "ocrdma_ah.h"
 #include "ocrdma_hw.h"
 
+#define OCRDMA_VID_PCP_SHIFT	0xD
+
 static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
 				struct ib_ah_attr *attr, int pdid)
 {
@@ -55,7 +57,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
 	if (vlan_tag && (vlan_tag < 0x1000)) {
 		eth.eth_type = cpu_to_be16(0x8100);
 		eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
-		vlan_tag |= (attr->sl & 7) << 13;
+		vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT;
 		eth.vlan_tag = cpu_to_be16(vlan_tag);
 		eth_sz = sizeof(struct ocrdma_eth_vlan);
 		vlan_enabled = true;
@@ -100,6 +102,8 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
 	if (!(attr->ah_flags & IB_AH_GRH))
 		return ERR_PTR(-EINVAL);
 
+	if (atomic_cmpxchg(&dev->update_sl, 1, 0))
+		ocrdma_init_service_level(dev);
 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
 	if (!ah)
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 3bbf201..dd35ae5 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -525,7 +525,7 @@ static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev,
 
 	cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
 	cmd->eqn = eq->id;
-	cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe);
+	cmd->pdid_cqecnt = cq->size / sizeof(struct ocrdma_mcqe);
 
 	ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE,
 			     cq->dma, PAGE_SIZE_4K);
@@ -661,7 +661,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
 {
 	struct ocrdma_qp *qp = NULL;
 	struct ocrdma_cq *cq = NULL;
-	struct ib_event ib_evt = { 0 };
+	struct ib_event ib_evt;
 	int cq_event = 0;
 	int qp_event = 1;
 	int srq_event = 0;
@@ -674,6 +674,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
 	if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID)
 		cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK];
 
+	memset(&ib_evt, 0, sizeof(ib_evt));
+
 	ib_evt.device = &dev->ibdev;
 
 	switch (type) {
@@ -771,6 +773,10 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
 					OCRDMA_AE_PVID_MCQE_TAG_MASK) >>
 					OCRDMA_AE_PVID_MCQE_TAG_SHIFT);
 		break;
+
+	case OCRDMA_ASYNC_EVENT_COS_VALUE:
+		atomic_set(&dev->update_sl, 1);
+		break;
 	default:
 		/* Not interested evts. */
 		break;
@@ -962,8 +968,12 @@ static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev)
 				    msecs_to_jiffies(30000));
 	if (status)
 		return 0;
-	else
+	else {
+		dev->mqe_ctx.fw_error_state = true;
+		pr_err("%s(%d) mailbox timeout: fw not responding\n",
+		       __func__, dev->id);
 		return -1;
+	}
 }
 
 /* issue a mailbox command on the MQ */
@@ -975,6 +985,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
 	struct ocrdma_mbx_rsp *rsp = NULL;
 
 	mutex_lock(&dev->mqe_ctx.lock);
+	if (dev->mqe_ctx.fw_error_state)
+		goto mbx_err;
 	ocrdma_post_mqe(dev, mqe);
 	status = ocrdma_wait_mqe_cmpl(dev);
 	if (status)
@@ -1078,7 +1090,8 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
 	    OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT;
 	attr->max_mw = rsp->max_mw;
 	attr->max_mr = rsp->max_mr;
-	attr->max_mr_size = ~0ull;
+	attr->max_mr_size = ((u64)rsp->max_mr_size_hi << 32) |
+			      rsp->max_mr_size_lo;
 	attr->max_fmr = 0;
 	attr->max_pages_per_frmr = rsp->max_pages_per_frmr;
 	attr->max_num_mr_pbl = rsp->max_num_mr_pbl;
@@ -1252,7 +1265,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
 		ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va;
 		hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs;
 
-		dev->hba_port_num = hba_attribs->phy_port;
+		dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv &
+					OCRDMA_HBA_ATTRB_PTNUM_MASK)
+					>> OCRDMA_HBA_ATTRB_PTNUM_SHIFT;
 		strncpy(dev->model_number,
 			hba_attribs->controller_model_number, 31);
 	}
@@ -1302,7 +1317,8 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
 		goto mbx_err;
 
 	rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
-	*lnk_speed = rsp->phys_port_speed;
+	*lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
+			>> OCRDMA_PHY_PS_SHIFT;
 
 mbx_err:
 	kfree(cmd);
@@ -1328,11 +1344,16 @@ static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev)
 		goto mbx_err;
 
 	rsp = (struct ocrdma_get_phy_info_rsp *)cmd;
-	dev->phy.phy_type = le16_to_cpu(rsp->phy_type);
+	dev->phy.phy_type =
+			(rsp->ityp_ptyp & OCRDMA_PHY_TYPE_MASK);
+	dev->phy.interface_type =
+			(rsp->ityp_ptyp & OCRDMA_IF_TYPE_MASK)
+				>> OCRDMA_IF_TYPE_SHIFT;
 	dev->phy.auto_speeds_supported  =
-			le16_to_cpu(rsp->auto_speeds_supported);
+			(rsp->fspeed_aspeed & OCRDMA_ASPEED_SUPP_MASK);
 	dev->phy.fixed_speeds_supported =
-			le16_to_cpu(rsp->fixed_speeds_supported);
+			(rsp->fspeed_aspeed & OCRDMA_FSPEED_SUPP_MASK)
+				>> OCRDMA_FSPEED_SUPP_SHIFT;
 mbx_err:
 	kfree(cmd);
 	return status;
@@ -1457,8 +1478,8 @@ static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
 
 	pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va;
 	for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) {
-		pbes[i].pa_lo = (u32) (pa & 0xffffffff);
-		pbes[i].pa_hi = (u32) upper_32_bits(pa);
+		pbes[i].pa_lo = (u32)cpu_to_le32(pa & 0xffffffff);
+		pbes[i].pa_hi = (u32)cpu_to_le32(upper_32_bits(pa));
 		pa += PAGE_SIZE;
 	}
 	cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF);
@@ -1501,6 +1522,7 @@ static void ocrdma_mbx_delete_ah_tbl(struct ocrdma_dev *dev)
 	ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
 	dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va,
 			  dev->av_tbl.pa);
+	dev->av_tbl.va = NULL;
 	dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va,
 			  dev->av_tbl.pbl.pa);
 	kfree(cmd);
@@ -1624,14 +1646,16 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
 			cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP <<
 				OCRDMA_CREATE_CQ_TYPE_SHIFT;
 		cq->phase_change = false;
-		cmd->cmd.cqe_count = (cq->len / cqe_size);
+		cmd->cmd.pdid_cqecnt = (cq->len / cqe_size);
 	} else {
-		cmd->cmd.cqe_count = (cq->len / cqe_size) - 1;
+		cmd->cmd.pdid_cqecnt = (cq->len / cqe_size) - 1;
 		cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID;
 		cq->phase_change = true;
 	}
 
-	cmd->cmd.pd_id = pd_id; /* valid only for v3 */
+	/* pd_id valid only for v3 */
+	cmd->cmd.pdid_cqecnt |= (pd_id <<
+		OCRDMA_CREATE_CQ_CMD_PDID_SHIFT);
 	ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size);
 	status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
 	if (status)
@@ -2206,7 +2230,8 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
 				OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK;
 	qp->rq_cq = cq;
 
-	if (pd->dpp_enabled && pd->num_dpp_qp) {
+	if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
+	    (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) {
 		ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
 					     dpp_cq_id);
 	}
@@ -2264,6 +2289,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 
 	if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
 		return -EINVAL;
+	if (atomic_cmpxchg(&qp->dev->update_sl, 1, 0))
+		ocrdma_init_service_level(qp->dev);
 	cmd->params.tclass_sq_psn |=
 	    (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
 	cmd->params.rnt_rc_sl_fl |=
@@ -2297,6 +2324,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 		cmd->params.vlan_dmac_b4_to_b5 |=
 		    vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
 		cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
+		cmd->params.rnt_rc_sl_fl |=
+			(qp->dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT;
 	}
 	return 0;
 }
@@ -2604,6 +2633,168 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
 	return status;
 }
 
+static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype,
+				      struct ocrdma_dcbx_cfg *dcbxcfg)
+{
+	int status = 0;
+	dma_addr_t pa;
+	struct ocrdma_mqe cmd;
+
+	struct ocrdma_get_dcbx_cfg_req *req = NULL;
+	struct ocrdma_get_dcbx_cfg_rsp *rsp = NULL;
+	struct pci_dev *pdev = dev->nic_info.pdev;
+	struct ocrdma_mqe_sge *mqe_sge = cmd.u.nonemb_req.sge;
+
+	memset(&cmd, 0, sizeof(struct ocrdma_mqe));
+	cmd.hdr.pyld_len = max_t (u32, sizeof(struct ocrdma_get_dcbx_cfg_rsp),
+					sizeof(struct ocrdma_get_dcbx_cfg_req));
+	req = dma_alloc_coherent(&pdev->dev, cmd.hdr.pyld_len, &pa, GFP_KERNEL);
+	if (!req) {
+		status = -ENOMEM;
+		goto mem_err;
+	}
+
+	cmd.hdr.spcl_sge_cnt_emb |= (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
+					OCRDMA_MQE_HDR_SGE_CNT_MASK;
+	mqe_sge->pa_lo = (u32) (pa & 0xFFFFFFFFUL);
+	mqe_sge->pa_hi = (u32) upper_32_bits(pa);
+	mqe_sge->len = cmd.hdr.pyld_len;
+
+	memset(req, 0, sizeof(struct ocrdma_get_dcbx_cfg_req));
+	ocrdma_init_mch(&req->hdr, OCRDMA_CMD_GET_DCBX_CONFIG,
+			OCRDMA_SUBSYS_DCBX, cmd.hdr.pyld_len);
+	req->param_type = ptype;
+
+	status = ocrdma_mbx_cmd(dev, &cmd);
+	if (status)
+		goto mbx_err;
+
+	rsp = (struct ocrdma_get_dcbx_cfg_rsp *)req;
+	ocrdma_le32_to_cpu(rsp, sizeof(struct ocrdma_get_dcbx_cfg_rsp));
+	memcpy(dcbxcfg, &rsp->cfg, sizeof(struct ocrdma_dcbx_cfg));
+
+mbx_err:
+	dma_free_coherent(&pdev->dev, cmd.hdr.pyld_len, req, pa);
+mem_err:
+	return status;
+}
+
+#define OCRDMA_MAX_SERVICE_LEVEL_INDEX	0x08
+#define OCRDMA_DEFAULT_SERVICE_LEVEL	0x05
+
+static int ocrdma_parse_dcbxcfg_rsp(struct ocrdma_dev *dev, int ptype,
+				    struct ocrdma_dcbx_cfg *dcbxcfg,
+				    u8 *srvc_lvl)
+{
+	int status = -EINVAL, indx, slindx;
+	int ventry_cnt;
+	struct ocrdma_app_parameter *app_param;
+	u8 valid, proto_sel;
+	u8 app_prio, pfc_prio;
+	u16 proto;
+
+	if (!(dcbxcfg->tcv_aev_opv_st & OCRDMA_DCBX_STATE_MASK)) {
+		pr_info("%s ocrdma%d DCBX is disabled\n",
+			dev_name(&dev->nic_info.pdev->dev), dev->id);
+		goto out;
+	}
+
+	if (!ocrdma_is_enabled_and_synced(dcbxcfg->pfc_state)) {
+		pr_info("%s ocrdma%d priority flow control(%s) is %s%s\n",
+			dev_name(&dev->nic_info.pdev->dev), dev->id,
+			(ptype > 0 ? "operational" : "admin"),
+			(dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_ENABLED) ?
+			"enabled" : "disabled",
+			(dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_SYNC) ?
+			"" : ", not sync'ed");
+		goto out;
+	} else {
+		pr_info("%s ocrdma%d priority flow control is enabled and sync'ed\n",
+			dev_name(&dev->nic_info.pdev->dev), dev->id);
+	}
+
+	ventry_cnt = (dcbxcfg->tcv_aev_opv_st >>
+				OCRDMA_DCBX_APP_ENTRY_SHIFT)
+				& OCRDMA_DCBX_STATE_MASK;
+
+	for (indx = 0; indx < ventry_cnt; indx++) {
+		app_param = &dcbxcfg->app_param[indx];
+		valid = (app_param->valid_proto_app >>
+				OCRDMA_APP_PARAM_VALID_SHIFT)
+				& OCRDMA_APP_PARAM_VALID_MASK;
+		proto_sel = (app_param->valid_proto_app
+				>>  OCRDMA_APP_PARAM_PROTO_SEL_SHIFT)
+				& OCRDMA_APP_PARAM_PROTO_SEL_MASK;
+		proto = app_param->valid_proto_app &
+				OCRDMA_APP_PARAM_APP_PROTO_MASK;
+
+		if (
+			valid && proto == OCRDMA_APP_PROTO_ROCE &&
+			proto_sel == OCRDMA_PROTO_SELECT_L2) {
+			for (slindx = 0; slindx <
+				OCRDMA_MAX_SERVICE_LEVEL_INDEX; slindx++) {
+				app_prio = ocrdma_get_app_prio(
+						(u8 *)app_param->app_prio,
+						slindx);
+				pfc_prio = ocrdma_get_pfc_prio(
+						(u8 *)dcbxcfg->pfc_prio,
+						slindx);
+
+				if (app_prio && pfc_prio) {
+					*srvc_lvl = slindx;
+					status = 0;
+					goto out;
+				}
+			}
+			if (slindx == OCRDMA_MAX_SERVICE_LEVEL_INDEX) {
+				pr_info("%s ocrdma%d application priority not set for 0x%x protocol\n",
+					dev_name(&dev->nic_info.pdev->dev),
+					dev->id, proto);
+			}
+		}
+	}
+
+out:
+	return status;
+}
+
+void ocrdma_init_service_level(struct ocrdma_dev *dev)
+{
+	int status = 0, indx;
+	struct ocrdma_dcbx_cfg dcbxcfg;
+	u8 srvc_lvl = OCRDMA_DEFAULT_SERVICE_LEVEL;
+	int ptype = OCRDMA_PARAMETER_TYPE_OPER;
+
+	for (indx = 0; indx < 2; indx++) {
+		status = ocrdma_mbx_get_dcbx_config(dev, ptype, &dcbxcfg);
+		if (status) {
+			pr_err("%s(): status=%d\n", __func__, status);
+			ptype = OCRDMA_PARAMETER_TYPE_ADMIN;
+			continue;
+		}
+
+		status = ocrdma_parse_dcbxcfg_rsp(dev, ptype,
+						  &dcbxcfg, &srvc_lvl);
+		if (status) {
+			ptype = OCRDMA_PARAMETER_TYPE_ADMIN;
+			continue;
+		}
+
+		break;
+	}
+
+	if (status)
+		pr_info("%s ocrdma%d service level default\n",
+			dev_name(&dev->nic_info.pdev->dev), dev->id);
+	else
+		pr_info("%s ocrdma%d service level %d\n",
+			dev_name(&dev->nic_info.pdev->dev), dev->id,
+			srvc_lvl);
+
+	dev->pfc_state = ocrdma_is_enabled_and_synced(dcbxcfg.pfc_state);
+	dev->sl = srvc_lvl;
+}
+
 int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
 {
 	int i;
@@ -2709,13 +2900,15 @@ int ocrdma_init_hw(struct ocrdma_dev *dev)
 		goto conf_err;
 	status = ocrdma_mbx_get_phy_info(dev);
 	if (status)
-		goto conf_err;
+		goto info_attrb_err;
 	status = ocrdma_mbx_get_ctrl_attribs(dev);
 	if (status)
-		goto conf_err;
+		goto info_attrb_err;
 
 	return 0;
 
+info_attrb_err:
+	ocrdma_mbx_delete_ah_tbl(dev);
 conf_err:
 	ocrdma_destroy_mq(dev);
 mq_err:
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index e513f72..6eed8f19 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -135,4 +135,6 @@ int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq);
 
 int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset);
 char *port_speed_string(struct ocrdma_dev *dev);
+void ocrdma_init_service_level(struct ocrdma_dev *);
+
 #endif				/* __OCRDMA_HW_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 7c504e0..256a06b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -324,6 +324,11 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
 		if (!dev->qp_tbl)
 			goto alloc_err;
 	}
+
+	dev->stag_arr = kzalloc(sizeof(u64) * OCRDMA_MAX_STAG, GFP_KERNEL);
+	if (dev->stag_arr == NULL)
+		goto alloc_err;
+
 	spin_lock_init(&dev->av_tbl.lock);
 	spin_lock_init(&dev->flush_q_lock);
 	return 0;
@@ -334,6 +339,7 @@ alloc_err:
 
 static void ocrdma_free_resources(struct ocrdma_dev *dev)
 {
+	kfree(dev->stag_arr);
 	kfree(dev->qp_tbl);
 	kfree(dev->cq_tbl);
 	kfree(dev->sgid_tbl);
@@ -353,15 +359,25 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
 {
 	struct ocrdma_dev *dev = dev_get_drvdata(device);
 
-	return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]);
+	return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]);
+}
+
+static ssize_t show_hca_type(struct device *device,
+			     struct device_attribute *attr, char *buf)
+{
+	struct ocrdma_dev *dev = dev_get_drvdata(device);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
 }
 
 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
 static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
 
 static struct device_attribute *ocrdma_attributes[] = {
 	&dev_attr_hw_rev,
-	&dev_attr_fw_ver
+	&dev_attr_fw_ver,
+	&dev_attr_hca_type
 };
 
 static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
@@ -372,6 +388,58 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
 		device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
 }
 
+static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev,
+				  struct net_device *net)
+{
+	struct in_device *in_dev;
+	union ib_gid gid;
+	in_dev = in_dev_get(net);
+	if (in_dev) {
+		for_ifa(in_dev) {
+			ipv6_addr_set_v4mapped(ifa->ifa_address,
+					       (struct in6_addr *)&gid);
+			ocrdma_add_sgid(dev, &gid);
+		}
+		endfor_ifa(in_dev);
+		in_dev_put(in_dev);
+	}
+}
+
+static void ocrdma_init_ipv6_gids(struct ocrdma_dev *dev,
+				  struct net_device *net)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct inet6_dev *in6_dev;
+	union ib_gid  *pgid;
+	struct inet6_ifaddr *ifp;
+	in6_dev = in6_dev_get(net);
+	if (in6_dev) {
+		read_lock_bh(&in6_dev->lock);
+		list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
+			pgid = (union ib_gid *)&ifp->addr;
+			ocrdma_add_sgid(dev, pgid);
+		}
+		read_unlock_bh(&in6_dev->lock);
+		in6_dev_put(in6_dev);
+	}
+#endif
+}
+
+static void ocrdma_init_gid_table(struct ocrdma_dev *dev)
+{
+	struct  net_device *net_dev;
+
+	for_each_netdev(&init_net, net_dev) {
+		struct net_device *real_dev = rdma_vlan_dev_real_dev(net_dev) ?
+				rdma_vlan_dev_real_dev(net_dev) : net_dev;
+
+		if (real_dev == dev->nic_info.netdev) {
+			ocrdma_init_ipv4_gids(dev, net_dev);
+			ocrdma_init_ipv6_gids(dev, net_dev);
+		}
+	}
+}
+
 static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
 {
 	int status = 0, i;
@@ -399,6 +467,8 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
 	if (status)
 		goto alloc_err;
 
+	ocrdma_init_service_level(dev);
+	ocrdma_init_gid_table(dev);
 	status = ocrdma_register_device(dev);
 	if (status)
 		goto alloc_err;
@@ -508,6 +578,12 @@ static int ocrdma_close(struct ocrdma_dev *dev)
 	return 0;
 }
 
+static void ocrdma_shutdown(struct ocrdma_dev *dev)
+{
+	ocrdma_close(dev);
+	ocrdma_remove(dev);
+}
+
 /* event handling via NIC driver ensures that all the NIC specific
  * initialization done before RoCE driver notifies
  * event to stack.
@@ -521,6 +597,9 @@ static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
 	case BE_DEV_DOWN:
 		ocrdma_close(dev);
 		break;
+	case BE_DEV_SHUTDOWN:
+		ocrdma_shutdown(dev);
+		break;
 	}
 }
 
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 96c9ee6..904989e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -44,35 +44,39 @@ enum {
 #define OCRDMA_SUBSYS_ROCE 10
 enum {
 	OCRDMA_CMD_QUERY_CONFIG = 1,
-	OCRDMA_CMD_ALLOC_PD,
-	OCRDMA_CMD_DEALLOC_PD,
-
-	OCRDMA_CMD_CREATE_AH_TBL,
-	OCRDMA_CMD_DELETE_AH_TBL,
-
-	OCRDMA_CMD_CREATE_QP,
-	OCRDMA_CMD_QUERY_QP,
-	OCRDMA_CMD_MODIFY_QP,
-	OCRDMA_CMD_DELETE_QP,
-
-	OCRDMA_CMD_RSVD1,
-	OCRDMA_CMD_ALLOC_LKEY,
-	OCRDMA_CMD_DEALLOC_LKEY,
-	OCRDMA_CMD_REGISTER_NSMR,
-	OCRDMA_CMD_REREGISTER_NSMR,
-	OCRDMA_CMD_REGISTER_NSMR_CONT,
-	OCRDMA_CMD_QUERY_NSMR,
-	OCRDMA_CMD_ALLOC_MW,
-	OCRDMA_CMD_QUERY_MW,
-
-	OCRDMA_CMD_CREATE_SRQ,
-	OCRDMA_CMD_QUERY_SRQ,
-	OCRDMA_CMD_MODIFY_SRQ,
-	OCRDMA_CMD_DELETE_SRQ,
-
-	OCRDMA_CMD_ATTACH_MCAST,
-	OCRDMA_CMD_DETACH_MCAST,
-	OCRDMA_CMD_GET_RDMA_STATS,
+	OCRDMA_CMD_ALLOC_PD = 2,
+	OCRDMA_CMD_DEALLOC_PD = 3,
+
+	OCRDMA_CMD_CREATE_AH_TBL = 4,
+	OCRDMA_CMD_DELETE_AH_TBL = 5,
+
+	OCRDMA_CMD_CREATE_QP = 6,
+	OCRDMA_CMD_QUERY_QP = 7,
+	OCRDMA_CMD_MODIFY_QP = 8 ,
+	OCRDMA_CMD_DELETE_QP = 9,
+
+	OCRDMA_CMD_RSVD1 = 10,
+	OCRDMA_CMD_ALLOC_LKEY = 11,
+	OCRDMA_CMD_DEALLOC_LKEY = 12,
+	OCRDMA_CMD_REGISTER_NSMR = 13,
+	OCRDMA_CMD_REREGISTER_NSMR = 14,
+	OCRDMA_CMD_REGISTER_NSMR_CONT = 15,
+	OCRDMA_CMD_QUERY_NSMR = 16,
+	OCRDMA_CMD_ALLOC_MW = 17,
+	OCRDMA_CMD_QUERY_MW = 18,
+
+	OCRDMA_CMD_CREATE_SRQ = 19,
+	OCRDMA_CMD_QUERY_SRQ = 20,
+	OCRDMA_CMD_MODIFY_SRQ = 21,
+	OCRDMA_CMD_DELETE_SRQ = 22,
+
+	OCRDMA_CMD_ATTACH_MCAST = 23,
+	OCRDMA_CMD_DETACH_MCAST = 24,
+
+	OCRDMA_CMD_CREATE_RBQ = 25,
+	OCRDMA_CMD_DESTROY_RBQ = 26,
+
+	OCRDMA_CMD_GET_RDMA_STATS = 27,
 
 	OCRDMA_CMD_MAX
 };
@@ -103,7 +107,7 @@ enum {
 
 #define OCRDMA_MAX_QP    2048
 #define OCRDMA_MAX_CQ    2048
-#define OCRDMA_MAX_STAG  8192
+#define OCRDMA_MAX_STAG 16384
 
 enum {
 	OCRDMA_DB_RQ_OFFSET		= 0xE0,
@@ -422,7 +426,12 @@ struct ocrdma_ae_qp_mcqe {
 
 #define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
 #define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
-#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3
+
+enum ocrdma_async_grp5_events {
+	OCRDMA_ASYNC_EVENT_QOS_VALUE	= 0x01,
+	OCRDMA_ASYNC_EVENT_COS_VALUE	= 0x02,
+	OCRDMA_ASYNC_EVENT_PVID_STATE	= 0x03
+};
 
 enum OCRDMA_ASYNC_EVENT_TYPE {
 	OCRDMA_CQ_ERROR			= 0x00,
@@ -525,8 +534,8 @@ struct ocrdma_mbx_query_config {
 	u32 max_ird_ord_per_qp;
 	u32 max_shared_ird_ord;
 	u32 max_mr;
-	u32 max_mr_size_lo;
 	u32 max_mr_size_hi;
+	u32 max_mr_size_lo;
 	u32 max_num_mr_pbl;
 	u32 max_mw;
 	u32 max_fmr;
@@ -580,17 +589,26 @@ enum {
 	OCRDMA_FN_MODE_RDMA	= 0x4
 };
 
+enum {
+	OCRDMA_IF_TYPE_MASK		= 0xFFFF0000,
+	OCRDMA_IF_TYPE_SHIFT		= 0x10,
+	OCRDMA_PHY_TYPE_MASK		= 0x0000FFFF,
+	OCRDMA_FUTURE_DETAILS_MASK	= 0xFFFF0000,
+	OCRDMA_FUTURE_DETAILS_SHIFT	= 0x10,
+	OCRDMA_EX_PHY_DETAILS_MASK	= 0x0000FFFF,
+	OCRDMA_FSPEED_SUPP_MASK		= 0xFFFF0000,
+	OCRDMA_FSPEED_SUPP_SHIFT	= 0x10,
+	OCRDMA_ASPEED_SUPP_MASK		= 0x0000FFFF
+};
+
 struct ocrdma_get_phy_info_rsp {
 	struct ocrdma_mqe_hdr hdr;
 	struct ocrdma_mbx_rsp rsp;
 
-	u16 phy_type;
-	u16 interface_type;
+	u32 ityp_ptyp;
 	u32 misc_params;
-	u16 ext_phy_details;
-	u16 rsvd;
-	u16 auto_speeds_supported;
-	u16 fixed_speeds_supported;
+	u32 ftrdtl_exphydtl;
+	u32 fspeed_aspeed;
 	u32 future_use[2];
 };
 
@@ -603,19 +621,34 @@ enum {
 	OCRDMA_PHY_SPEED_40GBPS = 0x20
 };
 
+enum {
+	OCRDMA_PORT_NUM_MASK	= 0x3F,
+	OCRDMA_PT_MASK		= 0xC0,
+	OCRDMA_PT_SHIFT		= 0x6,
+	OCRDMA_LINK_DUP_MASK	= 0x0000FF00,
+	OCRDMA_LINK_DUP_SHIFT	= 0x8,
+	OCRDMA_PHY_PS_MASK	= 0x00FF0000,
+	OCRDMA_PHY_PS_SHIFT	= 0x10,
+	OCRDMA_PHY_PFLT_MASK	= 0xFF000000,
+	OCRDMA_PHY_PFLT_SHIFT	= 0x18,
+	OCRDMA_QOS_LNKSP_MASK	= 0xFFFF0000,
+	OCRDMA_QOS_LNKSP_SHIFT	= 0x10,
+	OCRDMA_LLST_MASK	= 0xFF,
+	OCRDMA_PLFC_MASK	= 0x00000400,
+	OCRDMA_PLFC_SHIFT	= 0x8,
+	OCRDMA_PLRFC_MASK	= 0x00000200,
+	OCRDMA_PLRFC_SHIFT	= 0x8,
+	OCRDMA_PLTFC_MASK	= 0x00000100,
+	OCRDMA_PLTFC_SHIFT	= 0x8
+};
 
 struct ocrdma_get_link_speed_rsp {
 	struct ocrdma_mqe_hdr hdr;
 	struct ocrdma_mbx_rsp rsp;
 
-	u8 pt_port_num;
-	u8 link_duplex;
-	u8 phys_port_speed;
-	u8 phys_port_fault;
-	u16 rsvd1;
-	u16 qos_lnk_speed;
-	u8 logical_lnk_status;
-	u8 rsvd2[3];
+	u32 pflt_pps_ld_pnum;
+	u32 qos_lsp;
+	u32 res_lls;
 };
 
 enum {
@@ -666,8 +699,7 @@ struct ocrdma_create_cq_cmd {
 	u32 pgsz_pgcnt;
 	u32 ev_cnt_flags;
 	u32 eqn;
-	u16 cqe_count;
-	u16 pd_id;
+	u32 pdid_cqecnt;
 	u32 rsvd6;
 	struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES];
 };
@@ -678,6 +710,10 @@ struct ocrdma_create_cq {
 };
 
 enum {
+	OCRDMA_CREATE_CQ_CMD_PDID_SHIFT	= 0x10
+};
+
+enum {
 	OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK	= 0xFFFF
 };
 
@@ -1231,7 +1267,6 @@ struct ocrdma_destroy_srq {
 
 enum {
 	OCRDMA_ALLOC_PD_ENABLE_DPP	= BIT(16),
-	OCRDMA_PD_MAX_DPP_ENABLED_QP	= 8,
 	OCRDMA_DPP_PAGE_SIZE		= 4096
 };
 
@@ -1896,12 +1931,62 @@ struct ocrdma_rdma_stats_resp {
 	struct ocrdma_rx_dbg_stats	rx_dbg_stats;
 } __packed;
 
+enum {
+	OCRDMA_HBA_ATTRB_EPROM_VER_LO_MASK	= 0xFF,
+	OCRDMA_HBA_ATTRB_EPROM_VER_HI_MASK	= 0xFF00,
+	OCRDMA_HBA_ATTRB_EPROM_VER_HI_SHIFT	= 0x08,
+	OCRDMA_HBA_ATTRB_CDBLEN_MASK		= 0xFFFF,
+	OCRDMA_HBA_ATTRB_ASIC_REV_MASK		= 0xFF0000,
+	OCRDMA_HBA_ATTRB_ASIC_REV_SHIFT		= 0x10,
+	OCRDMA_HBA_ATTRB_GUID0_MASK		= 0xFF000000,
+	OCRDMA_HBA_ATTRB_GUID0_SHIFT		= 0x18,
+	OCRDMA_HBA_ATTRB_GUID13_MASK		= 0xFF,
+	OCRDMA_HBA_ATTRB_GUID14_MASK		= 0xFF00,
+	OCRDMA_HBA_ATTRB_GUID14_SHIFT		= 0x08,
+	OCRDMA_HBA_ATTRB_GUID15_MASK		= 0xFF0000,
+	OCRDMA_HBA_ATTRB_GUID15_SHIFT		= 0x10,
+	OCRDMA_HBA_ATTRB_PCNT_MASK		= 0xFF000000,
+	OCRDMA_HBA_ATTRB_PCNT_SHIFT		= 0x18,
+	OCRDMA_HBA_ATTRB_LDTOUT_MASK		= 0xFFFF,
+	OCRDMA_HBA_ATTRB_ISCSI_VER_MASK		= 0xFF0000,
+	OCRDMA_HBA_ATTRB_ISCSI_VER_SHIFT	= 0x10,
+	OCRDMA_HBA_ATTRB_MFUNC_DEV_MASK		= 0xFF000000,
+	OCRDMA_HBA_ATTRB_MFUNC_DEV_SHIFT	= 0x18,
+	OCRDMA_HBA_ATTRB_CV_MASK		= 0xFF,
+	OCRDMA_HBA_ATTRB_HBA_ST_MASK		= 0xFF00,
+	OCRDMA_HBA_ATTRB_HBA_ST_SHIFT		= 0x08,
+	OCRDMA_HBA_ATTRB_MAX_DOMS_MASK		= 0xFF0000,
+	OCRDMA_HBA_ATTRB_MAX_DOMS_SHIFT		= 0x10,
+	OCRDMA_HBA_ATTRB_PTNUM_MASK		= 0x3F000000,
+	OCRDMA_HBA_ATTRB_PTNUM_SHIFT		= 0x18,
+	OCRDMA_HBA_ATTRB_PT_MASK		= 0xC0000000,
+	OCRDMA_HBA_ATTRB_PT_SHIFT		= 0x1E,
+	OCRDMA_HBA_ATTRB_ISCSI_FET_MASK		= 0xFF,
+	OCRDMA_HBA_ATTRB_ASIC_GEN_MASK		= 0xFF00,
+	OCRDMA_HBA_ATTRB_ASIC_GEN_SHIFT		= 0x08,
+	OCRDMA_HBA_ATTRB_PCI_VID_MASK		= 0xFFFF,
+	OCRDMA_HBA_ATTRB_PCI_DID_MASK		= 0xFFFF0000,
+	OCRDMA_HBA_ATTRB_PCI_DID_SHIFT		= 0x10,
+	OCRDMA_HBA_ATTRB_PCI_SVID_MASK		= 0xFFFF,
+	OCRDMA_HBA_ATTRB_PCI_SSID_MASK		= 0xFFFF0000,
+	OCRDMA_HBA_ATTRB_PCI_SSID_SHIFT		= 0x10,
+	OCRDMA_HBA_ATTRB_PCI_BUSNUM_MASK	= 0xFF,
+	OCRDMA_HBA_ATTRB_PCI_DEVNUM_MASK	= 0xFF00,
+	OCRDMA_HBA_ATTRB_PCI_DEVNUM_SHIFT	= 0x08,
+	OCRDMA_HBA_ATTRB_PCI_FUNCNUM_MASK	= 0xFF0000,
+	OCRDMA_HBA_ATTRB_PCI_FUNCNUM_SHIFT	= 0x10,
+	OCRDMA_HBA_ATTRB_IF_TYPE_MASK		= 0xFF000000,
+	OCRDMA_HBA_ATTRB_IF_TYPE_SHIFT		= 0x18,
+	OCRDMA_HBA_ATTRB_NETFIL_MASK		=0xFF
+};
 
 struct mgmt_hba_attribs {
 	u8 flashrom_version_string[32];
 	u8 manufacturer_name[32];
 	u32 supported_modes;
-	u32 rsvd0[3];
+	u32 rsvd_eprom_verhi_verlo;
+	u32 mbx_ds_ver;
+	u32 epfw_ds_ver;
 	u8 ncsi_ver_string[12];
 	u32 default_extended_timeout;
 	u8 controller_model_number[32];
@@ -1914,34 +1999,26 @@ struct mgmt_hba_attribs {
 	u8 driver_version_string[32];
 	u8 fw_on_flash_version_string[32];
 	u32 functionalities_supported;
-	u16 max_cdblength;
-	u8 asic_revision;
-	u8 generational_guid[16];
-	u8 hba_port_count;
-	u16 default_link_down_timeout;
-	u8 iscsi_ver_min_max;
-	u8 multifunction_device;
-	u8 cache_valid;
-	u8 hba_status;
-	u8 max_domains_supported;
-	u8 phy_port;
+	u32 guid0_asicrev_cdblen;
+	u8 generational_guid[12];
+	u32 portcnt_guid15;
+	u32 mfuncdev_iscsi_ldtout;
+	u32 ptpnum_maxdoms_hbast_cv;
 	u32 firmware_post_status;
 	u32 hba_mtu[8];
-	u32 rsvd1[4];
+	u32 res_asicgen_iscsi_feaures;
+	u32 rsvd1[3];
 };
 
 struct mgmt_controller_attrib {
 	struct mgmt_hba_attribs hba_attribs;
-	u16 pci_vendor_id;
-	u16 pci_device_id;
-	u16 pci_sub_vendor_id;
-	u16 pci_sub_system_id;
-	u8 pci_bus_number;
-	u8 pci_device_number;
-	u8 pci_function_number;
-	u8 interface_type;
-	u64 unique_identifier;
-	u32 rsvd0[5];
+	u32 pci_did_vid;
+	u32 pci_ssid_svid;
+	u32 ityp_fnum_devnum_bnum;
+	u32 uid_hi;
+	u32 uid_lo;
+	u32 res_nnetfil;
+	u32 rsvd0[4];
 };
 
 struct ocrdma_get_ctrl_attribs_rsp {
@@ -1949,5 +2026,79 @@ struct ocrdma_get_ctrl_attribs_rsp {
 	struct mgmt_controller_attrib ctrl_attribs;
 };
 
+#define OCRDMA_SUBSYS_DCBX 0x10
+
+enum OCRDMA_DCBX_OPCODE {
+	OCRDMA_CMD_GET_DCBX_CONFIG = 0x01
+};
+
+enum OCRDMA_DCBX_PARAM_TYPE {
+	OCRDMA_PARAMETER_TYPE_ADMIN	= 0x00,
+	OCRDMA_PARAMETER_TYPE_OPER	= 0x01,
+	OCRDMA_PARAMETER_TYPE_PEER	= 0x02
+};
+
+enum OCRDMA_DCBX_APP_PROTO {
+	OCRDMA_APP_PROTO_ROCE	= 0x8915
+};
+
+enum OCRDMA_DCBX_PROTO {
+	OCRDMA_PROTO_SELECT_L2	= 0x00,
+	OCRDMA_PROTO_SELECT_L4	= 0x01
+};
+
+enum OCRDMA_DCBX_APP_PARAM {
+	OCRDMA_APP_PARAM_APP_PROTO_MASK = 0xFFFF,
+	OCRDMA_APP_PARAM_PROTO_SEL_MASK = 0xFF,
+	OCRDMA_APP_PARAM_PROTO_SEL_SHIFT = 0x10,
+	OCRDMA_APP_PARAM_VALID_MASK	= 0xFF,
+	OCRDMA_APP_PARAM_VALID_SHIFT	= 0x18
+};
+
+enum OCRDMA_DCBX_STATE_FLAGS {
+	OCRDMA_STATE_FLAG_ENABLED	= 0x01,
+	OCRDMA_STATE_FLAG_ADDVERTISED	= 0x02,
+	OCRDMA_STATE_FLAG_WILLING	= 0x04,
+	OCRDMA_STATE_FLAG_SYNC		= 0x08,
+	OCRDMA_STATE_FLAG_UNSUPPORTED	= 0x40000000,
+	OCRDMA_STATE_FLAG_NEG_FAILD	= 0x80000000
+};
+
+enum OCRDMA_TCV_AEV_OPV_ST {
+	OCRDMA_DCBX_TC_SUPPORT_MASK	= 0xFF,
+	OCRDMA_DCBX_TC_SUPPORT_SHIFT	= 0x18,
+	OCRDMA_DCBX_APP_ENTRY_SHIFT	= 0x10,
+	OCRDMA_DCBX_OP_PARAM_SHIFT	= 0x08,
+	OCRDMA_DCBX_STATE_MASK		= 0xFF
+};
+
+struct ocrdma_app_parameter {
+	u32 valid_proto_app;
+	u32 oui;
+	u32 app_prio[2];
+};
+
+struct ocrdma_dcbx_cfg {
+	u32 tcv_aev_opv_st;
+	u32 tc_state;
+	u32 pfc_state;
+	u32 qcn_state;
+	u32 appl_state;
+	u32 ll_state;
+	u32 tc_bw[2];
+	u32 tc_prio[8];
+	u32 pfc_prio[2];
+	struct ocrdma_app_parameter app_param[15];
+};
+
+struct ocrdma_get_dcbx_cfg_req {
+	struct ocrdma_mbx_hdr hdr;
+	u32 param_type;
+} __packed;
+
+struct ocrdma_get_dcbx_cfg_rsp {
+	struct ocrdma_mbx_rsp hdr;
+	struct ocrdma_dcbx_cfg cfg;
+} __packed;
 
 #endif				/* __OCRDMA_SLI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index edf6211..acb434d 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -69,11 +69,11 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
 	memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
 	       min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
 	ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
-	attr->max_mr_size = ~0ull;
+	attr->max_mr_size = dev->attr.max_mr_size;
 	attr->page_size_cap = 0xffff000;
 	attr->vendor_id = dev->nic_info.pdev->vendor;
 	attr->vendor_part_id = dev->nic_info.pdev->device;
-	attr->hw_ver = 0;
+	attr->hw_ver = dev->asic_id;
 	attr->max_qp = dev->attr.max_qp;
 	attr->max_ah = OCRDMA_MAX_AH;
 	attr->max_qp_wr = dev->attr.max_wqe;
@@ -268,7 +268,8 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
 		pd->dpp_enabled =
 			ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
 		pd->num_dpp_qp =
-			pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
+			pd->dpp_enabled ? (dev->nic_info.db_page_size /
+					   dev->attr.wqe_size) : 0;
 	}
 
 retry:
@@ -328,7 +329,10 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
 	struct ocrdma_pd *pd = uctx->cntxt_pd;
 	struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
 
-	BUG_ON(uctx->pd_in_use);
+	if (uctx->pd_in_use) {
+		pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
+		       __func__, dev->id, pd->id);
+	}
 	uctx->cntxt_pd = NULL;
 	status = _ocrdma_dealloc_pd(dev, pd);
 	return status;
@@ -843,6 +847,13 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
 	if (mr->umem)
 		ib_umem_release(mr->umem);
 	kfree(mr);
+
+	/* Don't stop cleanup, in case FW is unresponsive */
+	if (dev->mqe_ctx.fw_error_state) {
+		status = 0;
+		pr_err("%s(%d) fw not responding.\n",
+		       __func__, dev->id);
+	}
 	return status;
 }
 
@@ -2054,6 +2065,13 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	}
 
 	while (wr) {
+		if (qp->qp_type == IB_QPT_UD &&
+		    (wr->opcode != IB_WR_SEND &&
+		     wr->opcode != IB_WR_SEND_WITH_IMM)) {
+			*bad_wr = wr;
+			status = -EINVAL;
+			break;
+		}
 		if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
 		    wr->num_sge > qp->sq.max_sges) {
 			*bad_wr = wr;
@@ -2488,6 +2506,11 @@ static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
 			*stop = true;
 			expand = false;
 		}
+	} else if (is_hw_sq_empty(qp)) {
+		/* Do nothing */
+		expand = false;
+		*polled = false;
+		*stop = false;
 	} else {
 		*polled = true;
 		expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
@@ -2593,6 +2616,11 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
 			*stop = true;
 			expand = false;
 		}
+	} else if (is_hw_rq_empty(qp)) {
+		/* Do nothing */
+		expand = false;
+		*polled = false;
+		*stop = false;
 	} else {
 		*polled = true;
 		expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 22c720e..636be11 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2476,7 +2476,7 @@ int qib_create_agents(struct qib_ibdev *dev)
 		ibp = &dd->pport[p].ibport_data;
 		agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
 					      NULL, 0, send_handler,
-					      NULL, NULL);
+					      NULL, NULL, 0);
 		if (IS_ERR(agent)) {
 			ret = PTR_ERR(agent);
 			goto err;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index c639f90..3edce61 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -86,7 +86,6 @@ enum {
 	IPOIB_FLAG_INITIALIZED	  = 1,
 	IPOIB_FLAG_ADMIN_UP	  = 2,
 	IPOIB_PKEY_ASSIGNED	  = 3,
-	IPOIB_PKEY_STOP		  = 4,
 	IPOIB_FLAG_SUBINTERFACE	  = 5,
 	IPOIB_MCAST_RUN		  = 6,
 	IPOIB_STOP_REAPER	  = 7,
@@ -312,7 +311,6 @@ struct ipoib_dev_priv {
 	struct list_head multicast_list;
 	struct rb_root multicast_tree;
 
-	struct delayed_work pkey_poll_task;
 	struct delayed_work mcast_task;
 	struct work_struct carrier_on_task;
 	struct work_struct flush_light;
@@ -473,10 +471,11 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
 void ipoib_pkey_event(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
-int ipoib_ib_dev_open(struct net_device *dev);
+int ipoib_ib_dev_open(struct net_device *dev, int flush);
 int ipoib_ib_dev_up(struct net_device *dev);
 int ipoib_ib_dev_down(struct net_device *dev, int flush);
 int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+void ipoib_pkey_dev_check_presence(struct net_device *dev);
 
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
 void ipoib_dev_cleanup(struct net_device *dev);
@@ -532,8 +531,7 @@ int  ipoib_set_mode(struct net_device *dev, const char *buf);
 
 void ipoib_setup(struct net_device *dev);
 
-void ipoib_pkey_poll(struct work_struct *work);
-int ipoib_pkey_dev_delay_open(struct net_device *dev);
+void ipoib_pkey_open(struct ipoib_dev_priv *priv);
 void ipoib_drain_cq(struct net_device *dev);
 
 void ipoib_set_ethtool_ops(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 5006185..6bd5740 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -281,10 +281,8 @@ void ipoib_delete_debug_files(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	if (priv->mcg_dentry)
-		debugfs_remove(priv->mcg_dentry);
-	if (priv->path_dentry)
-		debugfs_remove(priv->path_dentry);
+	debugfs_remove(priv->mcg_dentry);
+	debugfs_remove(priv->path_dentry);
 }
 
 int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 6a7003d..72626c34 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -664,17 +664,18 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx)
 	drain_tx_cq((struct net_device *)ctx);
 }
 
-int ipoib_ib_dev_open(struct net_device *dev)
+int ipoib_ib_dev_open(struct net_device *dev, int flush)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int ret;
 
-	if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
-		ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey);
-		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+	ipoib_pkey_dev_check_presence(dev);
+
+	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
+		ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
+			   (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
 		return -1;
 	}
-	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 
 	ret = ipoib_init_qp(dev);
 	if (ret) {
@@ -705,16 +706,17 @@ int ipoib_ib_dev_open(struct net_device *dev)
 dev_stop:
 	if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
 		napi_enable(&priv->napi);
-	ipoib_ib_dev_stop(dev, 1);
+	ipoib_ib_dev_stop(dev, flush);
 	return -1;
 }
 
-static void ipoib_pkey_dev_check_presence(struct net_device *dev)
+void ipoib_pkey_dev_check_presence(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	u16 pkey_index = 0;
 
-	if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
+	if (!(priv->pkey & 0x7fff) ||
+	    ib_find_pkey(priv->ca, priv->port, priv->pkey,
+			 &priv->pkey_index))
 		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 	else
 		set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
@@ -745,14 +747,6 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
 	clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
 	netif_carrier_off(dev);
 
-	/* Shutdown the P_Key thread if still active */
-	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
-		mutex_lock(&pkey_mutex);
-		set_bit(IPOIB_PKEY_STOP, &priv->flags);
-		cancel_delayed_work_sync(&priv->pkey_poll_task);
-		mutex_unlock(&pkey_mutex);
-	}
-
 	ipoib_mcast_stop_thread(dev, flush);
 	ipoib_mcast_dev_flush(dev);
 
@@ -924,7 +918,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 		    (unsigned long) dev);
 
 	if (dev->flags & IFF_UP) {
-		if (ipoib_ib_dev_open(dev)) {
+		if (ipoib_ib_dev_open(dev, 1)) {
 			ipoib_transport_dev_cleanup(dev);
 			return -ENODEV;
 		}
@@ -966,13 +960,27 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
 
 	return 1;
 }
+/*
+ * returns 0 if pkey value was found in a different slot.
+ */
+static inline int update_child_pkey(struct ipoib_dev_priv *priv)
+{
+	u16 old_index = priv->pkey_index;
+
+	priv->pkey_index = 0;
+	ipoib_pkey_dev_check_presence(priv->dev);
+
+	if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
+	    (old_index == priv->pkey_index))
+		return 1;
+	return 0;
+}
 
 static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
 				enum ipoib_flush_level level)
 {
 	struct ipoib_dev_priv *cpriv;
 	struct net_device *dev = priv->dev;
-	u16 new_index;
 	int result;
 
 	down_read(&priv->vlan_rwsem);
@@ -986,16 +994,20 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
 
 	up_read(&priv->vlan_rwsem);
 
-	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
-		/* for non-child devices must check/update the pkey value here */
-		if (level == IPOIB_FLUSH_HEAVY &&
-		    !test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
-			update_parent_pkey(priv);
+	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
+	    level != IPOIB_FLUSH_HEAVY) {
 		ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
 		return;
 	}
 
 	if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
+		/* interface is down. update pkey and leave. */
+		if (level == IPOIB_FLUSH_HEAVY) {
+			if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
+				update_parent_pkey(priv);
+			else
+				update_child_pkey(priv);
+		}
 		ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
 		return;
 	}
@@ -1005,20 +1017,13 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
 		 * (parent) devices should always takes what present in pkey index 0
 		 */
 		if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
-			if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
-				clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
-				ipoib_ib_dev_down(dev, 0);
-				ipoib_ib_dev_stop(dev, 0);
-				if (ipoib_pkey_dev_delay_open(dev))
-					return;
-			}
-			/* restart QP only if P_Key index is changed */
-			if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
-			    new_index == priv->pkey_index) {
+			result = update_child_pkey(priv);
+			if (result) {
+				/* restart QP only if P_Key index is changed */
 				ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
 				return;
 			}
-			priv->pkey_index = new_index;
+
 		} else {
 			result = update_parent_pkey(priv);
 			/* restart QP only if P_Key value changed */
@@ -1038,8 +1043,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
 		ipoib_ib_dev_down(dev, 0);
 
 	if (level == IPOIB_FLUSH_HEAVY) {
-		ipoib_ib_dev_stop(dev, 0);
-		ipoib_ib_dev_open(dev);
+		if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
+			ipoib_ib_dev_stop(dev, 0);
+		if (ipoib_ib_dev_open(dev, 0) != 0)
+			return;
+		if (netif_queue_stopped(dev))
+			netif_start_queue(dev);
 	}
 
 	/*
@@ -1094,54 +1103,4 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
 	ipoib_transport_dev_cleanup(dev);
 }
 
-/*
- * Delayed P_Key Assigment Interim Support
- *
- * The following is initial implementation of delayed P_Key assigment
- * mechanism. It is using the same approach implemented for the multicast
- * group join. The single goal of this implementation is to quickly address
- * Bug #2507. This implementation will probably be removed when the P_Key
- * change async notification is available.
- */
-
-void ipoib_pkey_poll(struct work_struct *work)
-{
-	struct ipoib_dev_priv *priv =
-		container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
-	struct net_device *dev = priv->dev;
-
-	ipoib_pkey_dev_check_presence(dev);
-
-	if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
-		ipoib_open(dev);
-	else {
-		mutex_lock(&pkey_mutex);
-		if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
-			queue_delayed_work(ipoib_workqueue,
-					   &priv->pkey_poll_task,
-					   HZ);
-		mutex_unlock(&pkey_mutex);
-	}
-}
-
-int ipoib_pkey_dev_delay_open(struct net_device *dev)
-{
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-
-	/* Look for the interface pkey value in the IB Port P_Key table and */
-	/* set the interface pkey assigment flag                            */
-	ipoib_pkey_dev_check_presence(dev);
 
-	/* P_Key value not assigned yet - start polling */
-	if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
-		mutex_lock(&pkey_mutex);
-		clear_bit(IPOIB_PKEY_STOP, &priv->flags);
-		queue_delayed_work(ipoib_workqueue,
-				   &priv->pkey_poll_task,
-				   HZ);
-		mutex_unlock(&pkey_mutex);
-		return 1;
-	}
-
-	return 0;
-}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 4e675f4..1310acf 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,11 +108,11 @@ int ipoib_open(struct net_device *dev)
 
 	set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 
-	if (ipoib_pkey_dev_delay_open(dev))
-		return 0;
-
-	if (ipoib_ib_dev_open(dev))
+	if (ipoib_ib_dev_open(dev, 1)) {
+		if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
+			return 0;
 		goto err_disable;
+	}
 
 	if (ipoib_ib_dev_up(dev))
 		goto err_stop;
@@ -1379,7 +1379,6 @@ void ipoib_setup(struct net_device *dev)
 	INIT_LIST_HEAD(&priv->dead_ahs);
 	INIT_LIST_HEAD(&priv->multicast_list);
 
-	INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
 	INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
 	INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
 	INIT_WORK(&priv->flush_light,   ipoib_ib_dev_flush_light);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index eb79739..61ee91d 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -596,20 +596,28 @@ iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
 	struct iser_conn *ib_conn;
 	struct iscsi_endpoint *ep;
 
-	ep = iscsi_create_endpoint(sizeof(*ib_conn));
+	ep = iscsi_create_endpoint(0);
 	if (!ep)
 		return ERR_PTR(-ENOMEM);
 
-	ib_conn = ep->dd_data;
+	ib_conn = kzalloc(sizeof(*ib_conn), GFP_KERNEL);
+	if (!ib_conn) {
+		err = -ENOMEM;
+		goto failure;
+	}
+
+	ep->dd_data = ib_conn;
 	ib_conn->ep = ep;
 	iser_conn_init(ib_conn);
 
-	err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr,
-			   non_blocking);
+	err = iser_connect(ib_conn, NULL, dst_addr, non_blocking);
 	if (err)
-		return ERR_PTR(err);
+		goto failure;
 
 	return ep;
+failure:
+	iscsi_destroy_endpoint(ep);
+	return ERR_PTR(err);
 }
 
 static int
@@ -619,15 +627,16 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
 	int rc;
 
 	ib_conn = ep->dd_data;
-	rc = wait_event_interruptible_timeout(ib_conn->wait,
-			     ib_conn->state == ISER_CONN_UP,
-			     msecs_to_jiffies(timeout_ms));
-
+	rc = wait_for_completion_interruptible_timeout(&ib_conn->up_completion,
+						       msecs_to_jiffies(timeout_ms));
 	/* if conn establishment failed, return error code to iscsi */
-	if (!rc &&
-	    (ib_conn->state == ISER_CONN_TERMINATING ||
-	     ib_conn->state == ISER_CONN_DOWN))
-		rc = -1;
+	if (rc == 0) {
+		mutex_lock(&ib_conn->state_mutex);
+		if (ib_conn->state == ISER_CONN_TERMINATING ||
+		    ib_conn->state == ISER_CONN_DOWN)
+			rc = -1;
+		mutex_unlock(&ib_conn->state_mutex);
+	}
 
 	iser_info("ib conn %p rc = %d\n", ib_conn, rc);
 
@@ -646,19 +655,25 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
 
 	ib_conn = ep->dd_data;
 	iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);
+	mutex_lock(&ib_conn->state_mutex);
 	iser_conn_terminate(ib_conn);
 
 	/*
-	 * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop
-	 * call and ISER_CONN_DOWN state before freeing the iser resources.
-	 * otherwise we are safe to free resources immediately.
+	 * if iser_conn and iscsi_conn are bound, we must wait for
+	 * iscsi_conn_stop and flush errors completion before freeing
+	 * the iser resources. Otherwise we are safe to free resources
+	 * immediately.
 	 */
 	if (ib_conn->iscsi_conn) {
 		INIT_WORK(&ib_conn->release_work, iser_release_work);
 		queue_work(release_wq, &ib_conn->release_work);
+		mutex_unlock(&ib_conn->state_mutex);
 	} else {
+		ib_conn->state = ISER_CONN_DOWN;
+		mutex_unlock(&ib_conn->state_mutex);
 		iser_conn_release(ib_conn);
 	}
+	iscsi_destroy_endpoint(ep);
 }
 
 static umode_t iser_attr_is_visible(int param_type, int param)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 97cd385..c877dad 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -326,7 +326,6 @@ struct iser_conn {
 	struct iser_device           *device;       /* device context          */
 	struct rdma_cm_id            *cma_id;       /* CMA ID		       */
 	struct ib_qp	             *qp;           /* QP 		       */
-	wait_queue_head_t	     wait;          /* waitq for conn/disconn  */
 	unsigned		     qp_max_recv_dtos; /* num of rx buffers */
 	unsigned		     qp_max_recv_dtos_mask; /* above minus 1 */
 	unsigned		     min_posted_rx; /* qp_max_recv_dtos >> 2 */
@@ -335,6 +334,9 @@ struct iser_conn {
 	char 			     name[ISER_OBJECT_NAME_SIZE];
 	struct work_struct	     release_work;
 	struct completion	     stop_completion;
+	struct mutex		     state_mutex;
+	struct completion	     flush_completion;
+	struct completion	     up_completion;
 	struct list_head	     conn_list;       /* entry in ig conn list */
 
 	char  			     *login_buf;
@@ -448,8 +450,8 @@ int  iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task,
 			       enum iser_data_dir cmd_dir);
 
 int  iser_connect(struct iser_conn   *ib_conn,
-		  struct sockaddr_in *src_addr,
-		  struct sockaddr_in *dst_addr,
+		  struct sockaddr    *src_addr,
+		  struct sockaddr    *dst_addr,
 		  int                non_blocking);
 
 int  iser_reg_page_vec(struct iser_conn     *ib_conn,
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index ea01075..3ef167f 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -491,10 +491,9 @@ out_err:
 }
 
 /**
- * releases the QP objects, returns 0 on success,
- * -1 on failure
+ * releases the QP object
  */
-static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
+static void iser_free_ib_conn_res(struct iser_conn *ib_conn)
 {
 	int cq_index;
 	BUG_ON(ib_conn == NULL);
@@ -513,8 +512,6 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
 	}
 
 	ib_conn->qp	  = NULL;
-
-	return 0;
 }
 
 /**
@@ -568,31 +565,40 @@ static void iser_device_try_release(struct iser_device *device)
 	mutex_unlock(&ig.device_list_mutex);
 }
 
+/**
+ * Called with state mutex held
+ **/
 static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
 				     enum iser_ib_conn_state comp,
 				     enum iser_ib_conn_state exch)
 {
 	int ret;
 
-	spin_lock_bh(&ib_conn->lock);
 	if ((ret = (ib_conn->state == comp)))
 		ib_conn->state = exch;
-	spin_unlock_bh(&ib_conn->lock);
 	return ret;
 }
 
 void iser_release_work(struct work_struct *work)
 {
 	struct iser_conn *ib_conn;
+	int rc;
 
 	ib_conn = container_of(work, struct iser_conn, release_work);
 
 	/* wait for .conn_stop callback */
-	wait_for_completion(&ib_conn->stop_completion);
+	rc = wait_for_completion_timeout(&ib_conn->stop_completion, 30 * HZ);
+	WARN_ON(rc == 0);
 
 	/* wait for the qp`s post send and post receive buffers to empty */
-	wait_event_interruptible(ib_conn->wait,
-				 ib_conn->state == ISER_CONN_DOWN);
+	rc = wait_for_completion_timeout(&ib_conn->flush_completion, 30 * HZ);
+	WARN_ON(rc == 0);
+
+	ib_conn->state = ISER_CONN_DOWN;
+
+	mutex_lock(&ib_conn->state_mutex);
+	ib_conn->state = ISER_CONN_DOWN;
+	mutex_unlock(&ib_conn->state_mutex);
 
 	iser_conn_release(ib_conn);
 }
@@ -604,23 +610,27 @@ void iser_conn_release(struct iser_conn *ib_conn)
 {
 	struct iser_device  *device = ib_conn->device;
 
-	BUG_ON(ib_conn->state == ISER_CONN_UP);
-
 	mutex_lock(&ig.connlist_mutex);
 	list_del(&ib_conn->conn_list);
 	mutex_unlock(&ig.connlist_mutex);
+
+	mutex_lock(&ib_conn->state_mutex);
+	BUG_ON(ib_conn->state != ISER_CONN_DOWN);
+
 	iser_free_rx_descriptors(ib_conn);
 	iser_free_ib_conn_res(ib_conn);
 	ib_conn->device = NULL;
 	/* on EVENT_ADDR_ERROR there's no device yet for this conn */
 	if (device != NULL)
 		iser_device_try_release(device);
+	mutex_unlock(&ib_conn->state_mutex);
+
 	/* if cma handler context, the caller actually destroy the id */
 	if (ib_conn->cma_id != NULL) {
 		rdma_destroy_id(ib_conn->cma_id);
 		ib_conn->cma_id = NULL;
 	}
-	iscsi_destroy_endpoint(ib_conn->ep);
+	kfree(ib_conn);
 }
 
 /**
@@ -642,22 +652,31 @@ void iser_conn_terminate(struct iser_conn *ib_conn)
 			 ib_conn,err);
 }
 
+/**
+ * Called with state mutex held
+ **/
 static void iser_connect_error(struct rdma_cm_id *cma_id)
 {
 	struct iser_conn *ib_conn;
 
 	ib_conn = (struct iser_conn *)cma_id->context;
-
 	ib_conn->state = ISER_CONN_DOWN;
-	wake_up_interruptible(&ib_conn->wait);
 }
 
+/**
+ * Called with state mutex held
+ **/
 static void iser_addr_handler(struct rdma_cm_id *cma_id)
 {
 	struct iser_device *device;
 	struct iser_conn   *ib_conn;
 	int    ret;
 
+	ib_conn = (struct iser_conn *)cma_id->context;
+	if (ib_conn->state != ISER_CONN_PENDING)
+		/* bailout */
+		return;
+
 	device = iser_device_find_by_ib_device(cma_id);
 	if (!device) {
 		iser_err("device lookup/creation failed\n");
@@ -665,7 +684,6 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
 		return;
 	}
 
-	ib_conn = (struct iser_conn *)cma_id->context;
 	ib_conn->device = device;
 
 	/* connection T10-PI support */
@@ -689,18 +707,27 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
 	}
 }
 
+/**
+ * Called with state mutex held
+ **/
 static void iser_route_handler(struct rdma_cm_id *cma_id)
 {
 	struct rdma_conn_param conn_param;
 	int    ret;
 	struct iser_cm_hdr req_hdr;
+	struct iser_conn *ib_conn = (struct iser_conn *)cma_id->context;
+	struct iser_device *device = ib_conn->device;
+
+	if (ib_conn->state != ISER_CONN_PENDING)
+		/* bailout */
+		return;
 
 	ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context);
 	if (ret)
 		goto failure;
 
 	memset(&conn_param, 0, sizeof conn_param);
-	conn_param.responder_resources = 4;
+	conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
 	conn_param.initiator_depth     = 1;
 	conn_param.retry_count	       = 7;
 	conn_param.rnr_retry_count     = 6;
@@ -728,12 +755,16 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
 	struct ib_qp_attr attr;
 	struct ib_qp_init_attr init_attr;
 
+	ib_conn = (struct iser_conn *)cma_id->context;
+	if (ib_conn->state != ISER_CONN_PENDING)
+		/* bailout */
+		return;
+
 	(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
 	iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
 
-	ib_conn = (struct iser_conn *)cma_id->context;
-	if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP))
-		wake_up_interruptible(&ib_conn->wait);
+	ib_conn->state = ISER_CONN_UP;
+	complete(&ib_conn->up_completion);
 }
 
 static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
@@ -752,19 +783,25 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
 			iser_err("iscsi_iser connection isn't bound\n");
 	}
 
-	/* Complete the termination process if no posts are pending */
+	/* Complete the termination process if no posts are pending. This code
+	 * block also exists in iser_handle_comp_error(), but it is needed here
+	 * for cases of no flushes at all, e.g. discovery over rdma.
+	 */
 	if (ib_conn->post_recv_buf_count == 0 &&
 	    (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
-		ib_conn->state = ISER_CONN_DOWN;
-		wake_up_interruptible(&ib_conn->wait);
+		complete(&ib_conn->flush_completion);
 	}
 }
 
 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
+	struct iser_conn *ib_conn;
+
+	ib_conn = (struct iser_conn *)cma_id->context;
 	iser_info("event %d status %d conn %p id %p\n",
 		  event->event, event->status, cma_id->context, cma_id);
 
+	mutex_lock(&ib_conn->state_mutex);
 	switch (event->event) {
 	case RDMA_CM_EVENT_ADDR_RESOLVED:
 		iser_addr_handler(cma_id);
@@ -785,24 +822,28 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
 	case RDMA_CM_EVENT_DISCONNECTED:
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 	case RDMA_CM_EVENT_ADDR_CHANGE:
+	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
 		iser_disconnected_handler(cma_id);
 		break;
 	default:
 		iser_err("Unexpected RDMA CM event (%d)\n", event->event);
 		break;
 	}
+	mutex_unlock(&ib_conn->state_mutex);
 	return 0;
 }
 
 void iser_conn_init(struct iser_conn *ib_conn)
 {
 	ib_conn->state = ISER_CONN_INIT;
-	init_waitqueue_head(&ib_conn->wait);
 	ib_conn->post_recv_buf_count = 0;
 	atomic_set(&ib_conn->post_send_buf_count, 0);
 	init_completion(&ib_conn->stop_completion);
+	init_completion(&ib_conn->flush_completion);
+	init_completion(&ib_conn->up_completion);
 	INIT_LIST_HEAD(&ib_conn->conn_list);
 	spin_lock_init(&ib_conn->lock);
+	mutex_init(&ib_conn->state_mutex);
 }
 
  /**
@@ -810,22 +851,21 @@ void iser_conn_init(struct iser_conn *ib_conn)
  * sleeps until the connection is established or rejected
  */
 int iser_connect(struct iser_conn   *ib_conn,
-		 struct sockaddr_in *src_addr,
-		 struct sockaddr_in *dst_addr,
+		 struct sockaddr    *src_addr,
+		 struct sockaddr    *dst_addr,
 		 int                 non_blocking)
 {
-	struct sockaddr *src, *dst;
 	int err = 0;
 
-	sprintf(ib_conn->name, "%pI4:%d",
-		&dst_addr->sin_addr.s_addr, dst_addr->sin_port);
+	mutex_lock(&ib_conn->state_mutex);
+
+	sprintf(ib_conn->name, "%pISp", dst_addr);
+
+	iser_info("connecting to: %s\n", ib_conn->name);
 
 	/* the device is known only --after-- address resolution */
 	ib_conn->device = NULL;
 
-	iser_info("connecting to: %pI4, port 0x%x\n",
-		  &dst_addr->sin_addr, dst_addr->sin_port);
-
 	ib_conn->state = ISER_CONN_PENDING;
 
 	ib_conn->cma_id = rdma_create_id(iser_cma_handler,
@@ -837,23 +877,21 @@ int iser_connect(struct iser_conn   *ib_conn,
 		goto id_failure;
 	}
 
-	src = (struct sockaddr *)src_addr;
-	dst = (struct sockaddr *)dst_addr;
-	err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000);
+	err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000);
 	if (err) {
 		iser_err("rdma_resolve_addr failed: %d\n", err);
 		goto addr_failure;
 	}
 
 	if (!non_blocking) {
-		wait_event_interruptible(ib_conn->wait,
-					 (ib_conn->state != ISER_CONN_PENDING));
+		wait_for_completion_interruptible(&ib_conn->up_completion);
 
 		if (ib_conn->state != ISER_CONN_UP) {
 			err =  -EIO;
 			goto connect_failure;
 		}
 	}
+	mutex_unlock(&ib_conn->state_mutex);
 
 	mutex_lock(&ig.connlist_mutex);
 	list_add(&ib_conn->conn_list, &ig.connlist);
@@ -865,6 +903,7 @@ id_failure:
 addr_failure:
 	ib_conn->state = ISER_CONN_DOWN;
 connect_failure:
+	mutex_unlock(&ib_conn->state_mutex);
 	iser_conn_release(ib_conn);
 	return err;
 }
@@ -1049,18 +1088,19 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
 
 	if (ib_conn->post_recv_buf_count == 0 &&
 	    atomic_read(&ib_conn->post_send_buf_count) == 0) {
-		/* getting here when the state is UP means that the conn is *
-		 * being terminated asynchronously from the iSCSI layer's   *
-		 * perspective.                                             */
-		if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP,
-		    ISER_CONN_TERMINATING))
+		/**
+		 * getting here when the state is UP means that the conn is
+		 * being terminated asynchronously from the iSCSI layer's
+		 * perspective. It is safe to peek at the connection state
+		 * since iscsi_conn_failure is allowed to be called twice.
+		 **/
+		if (ib_conn->state == ISER_CONN_UP)
 			iscsi_conn_failure(ib_conn->iscsi_conn,
 					   ISCSI_ERR_CONN_FAILED);
 
 		/* no more non completed posts to the QP, complete the
 		 * termination process w.o worrying on disconnect event */
-		ib_conn->state = ISER_CONN_DOWN;
-		wake_up_interruptible(&ib_conn->wait);
+		complete(&ib_conn->flush_completion);
 	}
 }
 
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index e3c2c5b..62d2a18 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -130,6 +130,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr);
 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
 
 static struct scsi_transport_template *ib_srp_transport_template;
+static struct workqueue_struct *srp_remove_wq;
 
 static struct ib_client srp_client = {
 	.name   = "srp",
@@ -731,7 +732,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target)
 	spin_unlock_irq(&target->lock);
 
 	if (changed)
-		queue_work(system_long_wq, &target->remove_work);
+		queue_work(srp_remove_wq, &target->remove_work);
 
 	return changed;
 }
@@ -1643,10 +1644,14 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
 				     SCSI_SENSE_BUFFERSIZE));
 		}
 
-		if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER))
-			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
-		else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
+		if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
+		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
+			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
+		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
+			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
+		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
+			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
 
 		srp_free_req(target, req, scmnd,
 			     be32_to_cpu(rsp->req_lim_delta));
@@ -3261,9 +3266,10 @@ static void srp_remove_one(struct ib_device *device)
 		spin_unlock(&host->target_lock);
 
 		/*
-		 * Wait for target port removal tasks.
+		 * Wait for tl_err and target port removal tasks.
 		 */
 		flush_workqueue(system_long_wq);
+		flush_workqueue(srp_remove_wq);
 
 		kfree(host);
 	}
@@ -3313,16 +3319,22 @@ static int __init srp_init_module(void)
 		indirect_sg_entries = cmd_sg_entries;
 	}
 
+	srp_remove_wq = create_workqueue("srp_remove");
+	if (!srp_remove_wq) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = -ENOMEM;
 	ib_srp_transport_template =
 		srp_attach_transport(&ib_srp_transport_functions);
 	if (!ib_srp_transport_template)
-		return -ENOMEM;
+		goto destroy_wq;
 
 	ret = class_register(&srp_class);
 	if (ret) {
 		pr_err("couldn't register class infiniband_srp\n");
-		srp_release_transport(ib_srp_transport_template);
-		return ret;
+		goto release_tr;
 	}
 
 	ib_sa_register_client(&srp_sa_client);
@@ -3330,13 +3342,22 @@ static int __init srp_init_module(void)
 	ret = ib_register_client(&srp_client);
 	if (ret) {
 		pr_err("couldn't register IB client\n");
-		srp_release_transport(ib_srp_transport_template);
-		ib_sa_unregister_client(&srp_sa_client);
-		class_unregister(&srp_class);
-		return ret;
+		goto unreg_sa;
 	}
 
-	return 0;
+out:
+	return ret;
+
+unreg_sa:
+	ib_sa_unregister_client(&srp_sa_client);
+	class_unregister(&srp_class);
+
+release_tr:
+	srp_release_transport(ib_srp_transport_template);
+
+destroy_wq:
+	destroy_workqueue(srp_remove_wq);
+	goto out;
 }
 
 static void __exit srp_cleanup_module(void)
@@ -3345,6 +3366,7 @@ static void __exit srp_cleanup_module(void)
 	ib_sa_unregister_client(&srp_sa_client);
 	class_unregister(&srp_class);
 	srp_release_transport(ib_srp_transport_template);
+	destroy_workqueue(srp_remove_wq);
 }
 
 module_init(srp_init_module);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index fe09f27..d28a8c2 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -198,6 +198,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
 	case IB_EVENT_PKEY_CHANGE:
 	case IB_EVENT_SM_CHANGE:
 	case IB_EVENT_CLIENT_REREGISTER:
+	case IB_EVENT_GID_CHANGE:
 		/* Refresh port data asynchronously. */
 		if (event->element.port_num <= sdev->device->phys_port_cnt) {
 			sport = &sdev->port[event->element.port_num - 1];
@@ -563,7 +564,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
 							 &reg_req, 0,
 							 srpt_mad_send_handler,
 							 srpt_mad_recv_handler,
-							 sport);
+							 sport, 0);
 		if (IS_ERR(sport->mad_agent)) {
 			ret = PTR_ERR(sport->mad_agent);
 			sport->mad_agent = NULL;
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 443d03f..8eeab72 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -331,7 +331,7 @@ static int bch_allocator_thread(void *arg)
 				mutex_unlock(&ca->set->bucket_lock);
 				blkdev_issue_discard(ca->bdev,
 					bucket_to_sector(ca->set, bucket),
-					ca->sb.block_size, GFP_KERNEL, 0);
+					ca->sb.bucket_size, GFP_KERNEL, 0);
 				mutex_lock(&ca->set->bucket_lock);
 			}
 
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d2ebcf3..04f7bc2 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -477,9 +477,13 @@ struct gc_stat {
  * CACHE_SET_STOPPING always gets set first when we're closing down a cache set;
  * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e.
  * flushing dirty data).
+ *
+ * CACHE_SET_RUNNING means all cache devices have been registered and journal
+ * replay is complete.
  */
 #define CACHE_SET_UNREGISTERING		0
 #define	CACHE_SET_STOPPING		1
+#define	CACHE_SET_RUNNING		2
 
 struct cache_set {
 	struct closure		cl;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 5454164..646fe85 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1182,7 +1182,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
 {
 	uint64_t start_time;
 	bool used_mempool = false;
-	struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
+	struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT,
 						     order);
 	if (!out) {
 		struct page *outp;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 5f6728d..ae96462 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -453,7 +453,7 @@ static inline bool bch_bkey_equal_header(const struct bkey *l,
 {
 	return (KEY_DIRTY(l) == KEY_DIRTY(r) &&
 		KEY_PTRS(l) == KEY_PTRS(r) &&
-		KEY_CSUM(l) == KEY_CSUM(l));
+		KEY_CSUM(l) == KEY_CSUM(r));
 }
 
 /* Keylists */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 7347b61..00cde40 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -117,9 +117,9 @@
 ({									\
 	int _r, l = (b)->level - 1;					\
 	bool _w = l <= (op)->lock;					\
-	struct btree *_child = bch_btree_node_get((b)->c, op, key, l, _w);\
+	struct btree *_child = bch_btree_node_get((b)->c, op, key, l,	\
+						  _w, b);		\
 	if (!IS_ERR(_child)) {						\
-		_child->parent = (b);					\
 		_r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__);	\
 		rw_unlock(_w, _child);					\
 	} else								\
@@ -142,7 +142,6 @@
 		rw_lock(_w, _b, _b->level);				\
 		if (_b == (c)->root &&					\
 		    _w == insert_lock(op, _b)) {			\
-			_b->parent = NULL;				\
 			_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__);	\
 		}							\
 		rw_unlock(_w, _b);					\
@@ -202,7 +201,7 @@ void bch_btree_node_read_done(struct btree *b)
 	struct bset *i = btree_bset_first(b);
 	struct btree_iter *iter;
 
-	iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT);
+	iter = mempool_alloc(b->c->fill_iter, GFP_NOIO);
 	iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
 	iter->used = 0;
 
@@ -421,7 +420,7 @@ static void do_btree_node_write(struct btree *b)
 	SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
 		       bset_sector_offset(&b->keys, i));
 
-	if (!bio_alloc_pages(b->bio, GFP_NOIO)) {
+	if (!bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) {
 		int j;
 		struct bio_vec *bv;
 		void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
@@ -967,7 +966,8 @@ err:
  * level and op->lock.
  */
 struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
-				 struct bkey *k, int level, bool write)
+				 struct bkey *k, int level, bool write,
+				 struct btree *parent)
 {
 	int i = 0;
 	struct btree *b;
@@ -1002,6 +1002,7 @@ retry:
 		BUG_ON(b->level != level);
 	}
 
+	b->parent = parent;
 	b->accessed = 1;
 
 	for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
@@ -1022,15 +1023,16 @@ retry:
 	return b;
 }
 
-static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level)
+static void btree_node_prefetch(struct btree *parent, struct bkey *k)
 {
 	struct btree *b;
 
-	mutex_lock(&c->bucket_lock);
-	b = mca_alloc(c, NULL, k, level);
-	mutex_unlock(&c->bucket_lock);
+	mutex_lock(&parent->c->bucket_lock);
+	b = mca_alloc(parent->c, NULL, k, parent->level - 1);
+	mutex_unlock(&parent->c->bucket_lock);
 
 	if (!IS_ERR_OR_NULL(b)) {
+		b->parent = parent;
 		bch_btree_node_read(b);
 		rw_unlock(true, b);
 	}
@@ -1060,15 +1062,16 @@ static void btree_node_free(struct btree *b)
 	mutex_unlock(&b->c->bucket_lock);
 }
 
-struct btree *bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
-				   int level)
+struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
+				     int level, bool wait,
+				     struct btree *parent)
 {
 	BKEY_PADDED(key) k;
 	struct btree *b = ERR_PTR(-EAGAIN);
 
 	mutex_lock(&c->bucket_lock);
 retry:
-	if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, op != NULL))
+	if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
 		goto err;
 
 	bkey_put(c, &k.key);
@@ -1085,6 +1088,7 @@ retry:
 	}
 
 	b->accessed = 1;
+	b->parent = parent;
 	bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
 
 	mutex_unlock(&c->bucket_lock);
@@ -1096,14 +1100,21 @@ err_free:
 err:
 	mutex_unlock(&c->bucket_lock);
 
-	trace_bcache_btree_node_alloc_fail(b);
+	trace_bcache_btree_node_alloc_fail(c);
 	return b;
 }
 
+static struct btree *bch_btree_node_alloc(struct cache_set *c,
+					  struct btree_op *op, int level,
+					  struct btree *parent)
+{
+	return __bch_btree_node_alloc(c, op, level, op != NULL, parent);
+}
+
 static struct btree *btree_node_alloc_replacement(struct btree *b,
 						  struct btree_op *op)
 {
-	struct btree *n = bch_btree_node_alloc(b->c, op, b->level);
+	struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
 	if (!IS_ERR_OR_NULL(n)) {
 		mutex_lock(&n->write_lock);
 		bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
@@ -1403,6 +1414,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
 	BUG_ON(btree_bset_first(new_nodes[0])->keys);
 	btree_node_free(new_nodes[0]);
 	rw_unlock(true, new_nodes[0]);
+	new_nodes[0] = NULL;
 
 	for (i = 0; i < nodes; i++) {
 		if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key)))
@@ -1516,7 +1528,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
 		k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
 		if (k) {
 			r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
-						  true);
+						  true, b);
 			if (IS_ERR(r->b)) {
 				ret = PTR_ERR(r->b);
 				break;
@@ -1811,7 +1823,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
 			k = bch_btree_iter_next_filter(&iter, &b->keys,
 						       bch_ptr_bad);
 			if (k)
-				btree_node_prefetch(b->c, k, b->level - 1);
+				btree_node_prefetch(b, k);
 
 			if (p)
 				ret = btree(check_recurse, p, b, op);
@@ -1976,12 +1988,12 @@ static int btree_split(struct btree *b, struct btree_op *op,
 
 		trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
 
-		n2 = bch_btree_node_alloc(b->c, op, b->level);
+		n2 = bch_btree_node_alloc(b->c, op, b->level, b->parent);
 		if (IS_ERR(n2))
 			goto err_free1;
 
 		if (!b->parent) {
-			n3 = bch_btree_node_alloc(b->c, op, b->level + 1);
+			n3 = bch_btree_node_alloc(b->c, op, b->level + 1, NULL);
 			if (IS_ERR(n3))
 				goto err_free2;
 		}
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 91dfa5e..5c391fa 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -242,9 +242,10 @@ void __bch_btree_node_write(struct btree *, struct closure *);
 void bch_btree_node_write(struct btree *, struct closure *);
 
 void bch_btree_set_root(struct btree *);
-struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int);
+struct btree *__bch_btree_node_alloc(struct cache_set *, struct btree_op *,
+				     int, bool, struct btree *);
 struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *,
-				 struct bkey *, int, bool);
+				 struct bkey *, int, bool, struct btree *);
 
 int bch_btree_insert_check_key(struct btree *, struct btree_op *,
 			       struct bkey *);
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 3a0de4c..243de0bf 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -474,9 +474,8 @@ out:
 	return false;
 }
 
-static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
+bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k)
 {
-	struct btree *b = container_of(bk, struct btree, keys);
 	char buf[80];
 
 	if (!KEY_SIZE(k))
@@ -485,16 +484,22 @@ static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
 	if (KEY_SIZE(k) > KEY_OFFSET(k))
 		goto bad;
 
-	if (__ptr_invalid(b->c, k))
+	if (__ptr_invalid(c, k))
 		goto bad;
 
 	return false;
 bad:
 	bch_extent_to_text(buf, sizeof(buf), k);
-	cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k));
+	cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
 	return true;
 }
 
+static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
+{
+	struct btree *b = container_of(bk, struct btree, keys);
+	return __bch_extent_invalid(b->c, k);
+}
+
 static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
 				     unsigned ptr)
 {
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index e4e2340..e2ed540 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -9,5 +9,6 @@ struct cache_set;
 
 void bch_extent_to_text(char *, size_t, const struct bkey *);
 bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
+bool __bch_extent_invalid(struct cache_set *, const struct bkey *);
 
 #endif /* _BCACHE_EXTENTS_H */
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 59e8202..fe080ad 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -7,6 +7,7 @@
 #include "bcache.h"
 #include "btree.h"
 #include "debug.h"
+#include "extents.h"
 
 #include <trace/events/bcache.h>
 
@@ -189,11 +190,15 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
 			if (read_bucket(l))
 				goto bsearch;
 
-		if (list_empty(list))
+		/* no journal entries on this device? */
+		if (l == ca->sb.njournal_buckets)
 			continue;
 bsearch:
+		BUG_ON(list_empty(list));
+
 		/* Binary search */
-		m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
+		m = l;
+		r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
 		pr_debug("starting binary search, l %u r %u", l, r);
 
 		while (l + 1 < r) {
@@ -291,15 +296,16 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
 
 		for (k = i->j.start;
 		     k < bset_bkey_last(&i->j);
-		     k = bkey_next(k)) {
-			unsigned j;
+		     k = bkey_next(k))
+			if (!__bch_extent_invalid(c, k)) {
+				unsigned j;
 
-			for (j = 0; j < KEY_PTRS(k); j++)
-				if (ptr_available(c, k, j))
-					atomic_inc(&PTR_BUCKET(c, k, j)->pin);
+				for (j = 0; j < KEY_PTRS(k); j++)
+					if (ptr_available(c, k, j))
+						atomic_inc(&PTR_BUCKET(c, k, j)->pin);
 
-			bch_initial_mark_key(c, 0, k);
-		}
+				bch_initial_mark_key(c, 0, k);
+			}
 	}
 }
 
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 15fff4f..62e6e98 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -311,7 +311,8 @@ void bch_data_insert(struct closure *cl)
 {
 	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
 
-	trace_bcache_write(op->bio, op->writeback, op->bypass);
+	trace_bcache_write(op->c, op->inode, op->bio,
+			   op->writeback, op->bypass);
 
 	bch_keylist_init(&op->insert_keys);
 	bio_get(op->bio);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 926ded8..d4713d0 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -733,8 +733,6 @@ static void bcache_device_detach(struct bcache_device *d)
 static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
 				 unsigned id)
 {
-	BUG_ON(test_bit(CACHE_SET_STOPPING, &c->flags));
-
 	d->id = id;
 	d->c = c;
 	c->devices[id] = d;
@@ -927,6 +925,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
 	list_move(&dc->list, &uncached_devices);
 
 	clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags);
+	clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags);
 
 	mutex_unlock(&bch_register_lock);
 
@@ -1041,6 +1040,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
 	 */
 	atomic_set(&dc->count, 1);
 
+	if (bch_cached_dev_writeback_start(dc))
+		return -ENOMEM;
+
 	if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
 		bch_sectors_dirty_init(dc);
 		atomic_set(&dc->has_dirty, 1);
@@ -1070,7 +1072,8 @@ static void cached_dev_free(struct closure *cl)
 	struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
 
 	cancel_delayed_work_sync(&dc->writeback_rate_update);
-	kthread_stop(dc->writeback_thread);
+	if (!IS_ERR_OR_NULL(dc->writeback_thread))
+		kthread_stop(dc->writeback_thread);
 
 	mutex_lock(&bch_register_lock);
 
@@ -1081,12 +1084,8 @@ static void cached_dev_free(struct closure *cl)
 
 	mutex_unlock(&bch_register_lock);
 
-	if (!IS_ERR_OR_NULL(dc->bdev)) {
-		if (dc->bdev->bd_disk)
-			blk_sync_queue(bdev_get_queue(dc->bdev));
-
+	if (!IS_ERR_OR_NULL(dc->bdev))
 		blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-	}
 
 	wake_up(&unregister_wait);
 
@@ -1213,7 +1212,9 @@ void bch_flash_dev_release(struct kobject *kobj)
 static void flash_dev_free(struct closure *cl)
 {
 	struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+	mutex_lock(&bch_register_lock);
 	bcache_device_free(d);
+	mutex_unlock(&bch_register_lock);
 	kobject_put(&d->kobj);
 }
 
@@ -1221,7 +1222,9 @@ static void flash_dev_flush(struct closure *cl)
 {
 	struct bcache_device *d = container_of(cl, struct bcache_device, cl);
 
+	mutex_lock(&bch_register_lock);
 	bcache_device_unlink(d);
+	mutex_unlock(&bch_register_lock);
 	kobject_del(&d->kobj);
 	continue_at(cl, flash_dev_free, system_wq);
 }
@@ -1277,6 +1280,9 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
 	if (test_bit(CACHE_SET_STOPPING, &c->flags))
 		return -EINTR;
 
+	if (!test_bit(CACHE_SET_RUNNING, &c->flags))
+		return -EPERM;
+
 	u = uuid_find_empty(c);
 	if (!u) {
 		pr_err("Can't create volume, no room for UUID");
@@ -1346,8 +1352,11 @@ static void cache_set_free(struct closure *cl)
 	bch_journal_free(c);
 
 	for_each_cache(ca, c, i)
-		if (ca)
+		if (ca) {
+			ca->set = NULL;
+			c->cache[ca->sb.nr_this_dev] = NULL;
 			kobject_put(&ca->kobj);
+		}
 
 	bch_bset_sort_state_free(&c->sort);
 	free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
@@ -1405,9 +1414,11 @@ static void cache_set_flush(struct closure *cl)
 		if (ca->alloc_thread)
 			kthread_stop(ca->alloc_thread);
 
-	cancel_delayed_work_sync(&c->journal.work);
-	/* flush last journal entry if needed */
-	c->journal.work.work.func(&c->journal.work.work);
+	if (c->journal.cur) {
+		cancel_delayed_work_sync(&c->journal.work);
+		/* flush last journal entry if needed */
+		c->journal.work.work.func(&c->journal.work.work);
+	}
 
 	closure_return(cl);
 }
@@ -1586,7 +1597,7 @@ static void run_cache_set(struct cache_set *c)
 			goto err;
 
 		err = "error reading btree root";
-		c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true);
+		c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL);
 		if (IS_ERR_OR_NULL(c->root))
 			goto err;
 
@@ -1661,7 +1672,7 @@ static void run_cache_set(struct cache_set *c)
 			goto err;
 
 		err = "cannot allocate new btree root";
-		c->root = bch_btree_node_alloc(c, NULL, 0);
+		c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
 		if (IS_ERR_OR_NULL(c->root))
 			goto err;
 
@@ -1697,6 +1708,7 @@ static void run_cache_set(struct cache_set *c)
 
 	flash_devs_run(c);
 
+	set_bit(CACHE_SET_RUNNING, &c->flags);
 	return;
 err:
 	closure_sync(&cl);
@@ -1760,6 +1772,7 @@ found:
 		pr_debug("set version = %llu", c->sb.version);
 	}
 
+	kobject_get(&ca->kobj);
 	ca->set = c;
 	ca->set->cache[ca->sb.nr_this_dev] = ca;
 	c->cache_by_alloc[c->caches_loaded++] = ca;
@@ -1780,8 +1793,10 @@ void bch_cache_release(struct kobject *kobj)
 	struct cache *ca = container_of(kobj, struct cache, kobj);
 	unsigned i;
 
-	if (ca->set)
+	if (ca->set) {
+		BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca);
 		ca->set->cache[ca->sb.nr_this_dev] = NULL;
+	}
 
 	bio_split_pool_free(&ca->bio_split_hook);
 
@@ -1798,10 +1813,8 @@ void bch_cache_release(struct kobject *kobj)
 	if (ca->sb_bio.bi_inline_vecs[0].bv_page)
 		put_page(ca->sb_bio.bi_io_vec[0].bv_page);
 
-	if (!IS_ERR_OR_NULL(ca->bdev)) {
-		blk_sync_queue(bdev_get_queue(ca->bdev));
+	if (!IS_ERR_OR_NULL(ca->bdev))
 		blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-	}
 
 	kfree(ca);
 	module_put(THIS_MODULE);
@@ -1844,7 +1857,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
 }
 
 static void register_cache(struct cache_sb *sb, struct page *sb_page,
-				  struct block_device *bdev, struct cache *ca)
+				struct block_device *bdev, struct cache *ca)
 {
 	char name[BDEVNAME_SIZE];
 	const char *err = "cannot allocate memory";
@@ -1877,10 +1890,12 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page,
 		goto err;
 
 	pr_info("registered cache device %s", bdevname(bdev, name));
+out:
+	kobject_put(&ca->kobj);
 	return;
 err:
 	pr_notice("error opening %s: %s", bdevname(bdev, name), err);
-	kobject_put(&ca->kobj);
+	goto out;
 }
 
 /* Global interfaces/init */
@@ -1945,10 +1960,12 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 	if (IS_ERR(bdev)) {
 		if (bdev == ERR_PTR(-EBUSY)) {
 			bdev = lookup_bdev(strim(path));
+			mutex_lock(&bch_register_lock);
 			if (!IS_ERR(bdev) && bch_is_open(bdev))
 				err = "device already registered";
 			else
 				err = "device busy";
+			mutex_unlock(&bch_register_lock);
 		}
 		goto err;
 	}
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index ac7d0d1..98df757 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -416,8 +416,8 @@ do {									\
 			  average_frequency,	frequency_units);	\
 	__print_time_stat(stats, name,					\
 			  average_duration,	duration_units);	\
-	__print_time_stat(stats, name,					\
-			  max_duration,		duration_units);	\
+	sysfs_print(name ## _ ##max_duration ## _ ## duration_units,	\
+			div_u64((stats)->max_duration, NSEC_PER_ ## duration_units));\
 									\
 	sysfs_print(name ## _last_ ## frequency_units, (stats)->last	\
 		    ? div_s64(local_clock() - (stats)->last,		\
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index f4300e4..f1986bc 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -239,7 +239,7 @@ static void read_dirty(struct cached_dev *dc)
 		if (KEY_START(&w->key) != dc->last_read ||
 		    jiffies_to_msecs(delay) > 50)
 			while (!kthread_should_stop() && delay)
-				delay = schedule_timeout_uninterruptible(delay);
+				delay = schedule_timeout_interruptible(delay);
 
 		dc->last_read	= KEY_OFFSET(&w->key);
 
@@ -436,7 +436,7 @@ static int bch_writeback_thread(void *arg)
 			while (delay &&
 			       !kthread_should_stop() &&
 			       !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
-				delay = schedule_timeout_uninterruptible(delay);
+				delay = schedule_timeout_interruptible(delay);
 		}
 	}
 
@@ -478,7 +478,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
 	dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk);
 }
 
-int bch_cached_dev_writeback_init(struct cached_dev *dc)
+void bch_cached_dev_writeback_init(struct cached_dev *dc)
 {
 	sema_init(&dc->in_flight, 64);
 	init_rwsem(&dc->writeback_lock);
@@ -494,14 +494,20 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc)
 	dc->writeback_rate_d_term	= 30;
 	dc->writeback_rate_p_term_inverse = 6000;
 
+	INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
+}
+
+int bch_cached_dev_writeback_start(struct cached_dev *dc)
+{
 	dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
 					      "bcache_writeback");
 	if (IS_ERR(dc->writeback_thread))
 		return PTR_ERR(dc->writeback_thread);
 
-	INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
 	schedule_delayed_work(&dc->writeback_rate_update,
 			      dc->writeback_rate_update_seconds * HZ);
 
+	bch_writeback_queue(dc);
+
 	return 0;
 }
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index e2f8598..0a9dab1 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -85,6 +85,7 @@ static inline void bch_writeback_add(struct cached_dev *dc)
 void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
 
 void bch_sectors_dirty_init(struct cached_dev *dc);
-int bch_cached_dev_writeback_init(struct cached_dev *);
+void bch_cached_dev_writeback_init(struct cached_dev *);
+int bch_cached_dev_writeback_start(struct cached_dev *);
 
 #endif
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index d2899e7..0670925 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -330,7 +330,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
 	disk_super->discard_root = cpu_to_le64(cmd->discard_root);
 	disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
 	disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
-	disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
+	disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
 	disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
 	disk_super->cache_blocks = cpu_to_le32(0);
 
@@ -478,7 +478,7 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
 					    bool may_format_device)
 {
 	int r;
-	cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE,
+	cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
 					  CACHE_METADATA_CACHE_SIZE,
 					  CACHE_MAX_CONCURRENT_LOCKS);
 	if (IS_ERR(cmd->bm)) {
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index cd70a78..7383c90 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -9,19 +9,17 @@
 
 #include "dm-cache-block-types.h"
 #include "dm-cache-policy-internal.h"
+#include "persistent-data/dm-space-map-metadata.h"
 
 /*----------------------------------------------------------------*/
 
-#define DM_CACHE_METADATA_BLOCK_SIZE 4096
+#define DM_CACHE_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE
 
 /* FIXME: remove this restriction */
 /*
  * The metadata device is currently limited in size.
- *
- * We have one block of index, which can hold 255 index entries.  Each
- * index entry contains allocation info about 16k metadata blocks.
  */
-#define DM_CACHE_METADATA_MAX_SECTORS (255 * (1 << 14) * (DM_CACHE_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT)))
+#define DM_CACHE_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS
 
 /*
  * A metadata device larger than 16GB triggers a warning.
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 2c63326..1af40ee 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -718,6 +718,22 @@ static int bio_triggers_commit(struct cache *cache, struct bio *bio)
 	return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
 }
 
+/*
+ * You must increment the deferred set whilst the prison cell is held.  To
+ * encourage this, we ask for 'cell' to be passed in.
+ */
+static void inc_ds(struct cache *cache, struct bio *bio,
+		   struct dm_bio_prison_cell *cell)
+{
+	size_t pb_data_size = get_per_bio_data_size(cache);
+	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
+
+	BUG_ON(!cell);
+	BUG_ON(pb->all_io_entry);
+
+	pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+}
+
 static void issue(struct cache *cache, struct bio *bio)
 {
 	unsigned long flags;
@@ -737,6 +753,12 @@ static void issue(struct cache *cache, struct bio *bio)
 	spin_unlock_irqrestore(&cache->lock, flags);
 }
 
+static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
+{
+	inc_ds(cache, bio, cell);
+	issue(cache, bio);
+}
+
 static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
 {
 	unsigned long flags;
@@ -1015,6 +1037,11 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
 
 	dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
 	remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
+
+	/*
+	 * No need to inc_ds() here, since the cell will be held for the
+	 * duration of the io.
+	 */
 	generic_make_request(bio);
 }
 
@@ -1115,8 +1142,7 @@ static void check_for_quiesced_migrations(struct cache *cache,
 		return;
 
 	INIT_LIST_HEAD(&work);
-	if (pb->all_io_entry)
-		dm_deferred_entry_dec(pb->all_io_entry, &work);
+	dm_deferred_entry_dec(pb->all_io_entry, &work);
 
 	if (!list_empty(&work))
 		queue_quiesced_migrations(cache, &work);
@@ -1252,6 +1278,11 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
 	else
 		remap_to_cache(cache, bio, 0);
 
+	/*
+	 * REQ_FLUSH is not directed at any particular block so we don't
+	 * need to inc_ds().  REQ_FUA's are split into a write + REQ_FLUSH
+	 * by dm-core.
+	 */
 	issue(cache, bio);
 }
 
@@ -1301,15 +1332,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
 		   &cache->stats.read_miss : &cache->stats.write_miss);
 }
 
-static void issue_cache_bio(struct cache *cache, struct bio *bio,
-			    struct per_bio_data *pb,
-			    dm_oblock_t oblock, dm_cblock_t cblock)
-{
-	pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-	remap_to_cache_dirty(cache, bio, oblock, cblock);
-	issue(cache, bio);
-}
-
 static void process_bio(struct cache *cache, struct prealloc *structs,
 			struct bio *bio)
 {
@@ -1318,8 +1340,6 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 	dm_oblock_t block = get_bio_block(cache, bio);
 	struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
 	struct policy_result lookup_result;
-	size_t pb_data_size = get_per_bio_data_size(cache);
-	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
 	bool discarded_block = is_discarded_oblock(cache, block);
 	bool passthrough = passthrough_mode(&cache->features);
 	bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
@@ -1359,9 +1379,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 
 			} else {
 				/* FIXME: factor out issue_origin() */
-				pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
 				remap_to_origin_clear_discard(cache, bio, block);
-				issue(cache, bio);
+				inc_and_issue(cache, bio, new_ocell);
 			}
 		} else {
 			inc_hit_counter(cache, bio);
@@ -1369,20 +1388,21 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
 			if (bio_data_dir(bio) == WRITE &&
 			    writethrough_mode(&cache->features) &&
 			    !is_dirty(cache, lookup_result.cblock)) {
-				pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
 				remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
-				issue(cache, bio);
-			} else
-				issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
+				inc_and_issue(cache, bio, new_ocell);
+
+			} else  {
+				remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+				inc_and_issue(cache, bio, new_ocell);
+			}
 		}
 
 		break;
 
 	case POLICY_MISS:
 		inc_miss_counter(cache, bio);
-		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
 		remap_to_origin_clear_discard(cache, bio, block);
-		issue(cache, bio);
+		inc_and_issue(cache, bio, new_ocell);
 		break;
 
 	case POLICY_NEW:
@@ -1501,6 +1521,9 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
 	bio_list_init(&cache->deferred_flush_bios);
 	spin_unlock_irqrestore(&cache->lock, flags);
 
+	/*
+	 * These bios have already been through inc_ds()
+	 */
 	while ((bio = bio_list_pop(&bios)))
 		submit_bios ? generic_make_request(bio) : bio_io_error(bio);
 }
@@ -1518,6 +1541,9 @@ static void process_deferred_writethrough_bios(struct cache *cache)
 	bio_list_init(&cache->deferred_writethrough_bios);
 	spin_unlock_irqrestore(&cache->lock, flags);
 
+	/*
+	 * These bios have already been through inc_ds()
+	 */
 	while ((bio = bio_list_pop(&bios)))
 		generic_make_request(bio);
 }
@@ -1694,6 +1720,7 @@ static void do_worker(struct work_struct *ws)
 
 		if (commit_if_needed(cache)) {
 			process_deferred_flush_bios(cache, false);
+			process_migrations(cache, &cache->need_commit_migrations, migration_failure);
 
 			/*
 			 * FIXME: rollback metadata or just go into a
@@ -2406,16 +2433,13 @@ out:
 	return r;
 }
 
-static int cache_map(struct dm_target *ti, struct bio *bio)
+static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell)
 {
-	struct cache *cache = ti->private;
-
 	int r;
 	dm_oblock_t block = get_bio_block(cache, bio);
 	size_t pb_data_size = get_per_bio_data_size(cache);
 	bool can_migrate = false;
 	bool discarded_block;
-	struct dm_bio_prison_cell *cell;
 	struct policy_result lookup_result;
 	struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
 
@@ -2437,15 +2461,15 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 	/*
 	 * Check to see if that block is currently migrating.
 	 */
-	cell = alloc_prison_cell(cache);
-	if (!cell) {
+	*cell = alloc_prison_cell(cache);
+	if (!*cell) {
 		defer_bio(cache, bio);
 		return DM_MAPIO_SUBMITTED;
 	}
 
-	r = bio_detain(cache, block, bio, cell,
+	r = bio_detain(cache, block, bio, *cell,
 		       (cell_free_fn) free_prison_cell,
-		       cache, &cell);
+		       cache, cell);
 	if (r) {
 		if (r < 0)
 			defer_bio(cache, bio);
@@ -2458,11 +2482,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 	r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
 		       bio, &lookup_result);
 	if (r == -EWOULDBLOCK) {
-		cell_defer(cache, cell, true);
+		cell_defer(cache, *cell, true);
 		return DM_MAPIO_SUBMITTED;
 
 	} else if (r) {
 		DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
+		cell_defer(cache, *cell, false);
 		bio_io_error(bio);
 		return DM_MAPIO_SUBMITTED;
 	}
@@ -2476,52 +2501,44 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 				 * We need to invalidate this block, so
 				 * defer for the worker thread.
 				 */
-				cell_defer(cache, cell, true);
+				cell_defer(cache, *cell, true);
 				r = DM_MAPIO_SUBMITTED;
 
 			} else {
-				pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
 				inc_miss_counter(cache, bio);
 				remap_to_origin_clear_discard(cache, bio, block);
-
-				cell_defer(cache, cell, false);
 			}
 
 		} else {
 			inc_hit_counter(cache, bio);
-			pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
 			if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
 			    !is_dirty(cache, lookup_result.cblock))
 				remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
 			else
 				remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
-
-			cell_defer(cache, cell, false);
 		}
 		break;
 
 	case POLICY_MISS:
 		inc_miss_counter(cache, bio);
-		pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
-
 		if (pb->req_nr != 0) {
 			/*
 			 * This is a duplicate writethrough io that is no
 			 * longer needed because the block has been demoted.
 			 */
 			bio_endio(bio, 0);
-			cell_defer(cache, cell, false);
-			return DM_MAPIO_SUBMITTED;
-		} else {
+			cell_defer(cache, *cell, false);
+			r = DM_MAPIO_SUBMITTED;
+
+		} else
 			remap_to_origin_clear_discard(cache, bio, block);
-			cell_defer(cache, cell, false);
-		}
+
 		break;
 
 	default:
 		DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
 			    (unsigned) lookup_result.op);
+		cell_defer(cache, *cell, false);
 		bio_io_error(bio);
 		r = DM_MAPIO_SUBMITTED;
 	}
@@ -2529,6 +2546,21 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
 	return r;
 }
 
+static int cache_map(struct dm_target *ti, struct bio *bio)
+{
+	int r;
+	struct dm_bio_prison_cell *cell;
+	struct cache *cache = ti->private;
+
+	r = __cache_map(cache, bio, &cell);
+	if (r == DM_MAPIO_REMAPPED) {
+		inc_ds(cache, bio, cell);
+		cell_defer(cache, cell, false);
+	}
+
+	return r;
+}
+
 static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
 {
 	struct cache *cache = ti->private;
@@ -2808,7 +2840,7 @@ static void cache_status(struct dm_target *ti, status_type_t type,
 		residency = policy_residency(cache->policy);
 
 		DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
-		       (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT),
+		       (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
 		       (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
 		       (unsigned long long)nr_blocks_metadata,
 		       cache->sectors_per_block,
@@ -3062,7 +3094,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
 	 */
 	if (io_opt_sectors < cache->sectors_per_block ||
 	    do_div(io_opt_sectors, cache->sectors_per_block)) {
-		blk_limits_io_min(limits, 0);
+		blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
 		blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
 	}
 	set_discard_limits(cache, limits);
@@ -3072,7 +3104,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type cache_target = {
 	.name = "cache",
-	.version = {1, 4, 0},
+	.version = {1, 5, 0},
 	.module = THIS_MODULE,
 	.ctr = cache_ctr,
 	.dtr = cache_dtr,
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 4cba2d8..2785007 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -59,7 +59,7 @@ struct dm_crypt_io {
 	int error;
 	sector_t sector;
 	struct dm_crypt_io *base_io;
-};
+} CRYPTO_MINALIGN_ATTR;
 
 struct dm_crypt_request {
 	struct convert_context *ctx;
@@ -162,6 +162,8 @@ struct crypt_config {
 	 */
 	unsigned int dmreq_start;
 
+	unsigned int per_bio_data_size;
+
 	unsigned long flags;
 	unsigned int key_size;
 	unsigned int key_parts;      /* independent parts in key buffer */
@@ -895,6 +897,15 @@ static void crypt_alloc_req(struct crypt_config *cc,
 	    kcryptd_async_done, dmreq_of_req(cc, ctx->req));
 }
 
+static void crypt_free_req(struct crypt_config *cc,
+			   struct ablkcipher_request *req, struct bio *base_bio)
+{
+	struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size);
+
+	if ((struct ablkcipher_request *)(io + 1) != req)
+		mempool_free(req, cc->req_pool);
+}
+
 /*
  * Encrypt / decrypt data from one bio to another one (can be the same one)
  */
@@ -1008,12 +1019,9 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
 	}
 }
 
-static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc,
-					  struct bio *bio, sector_t sector)
+static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
+			  struct bio *bio, sector_t sector)
 {
-	struct dm_crypt_io *io;
-
-	io = mempool_alloc(cc->io_pool, GFP_NOIO);
 	io->cc = cc;
 	io->base_bio = bio;
 	io->sector = sector;
@@ -1021,8 +1029,6 @@ static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc,
 	io->base_io = NULL;
 	io->ctx.req = NULL;
 	atomic_set(&io->io_pending, 0);
-
-	return io;
 }
 
 static void crypt_inc_pending(struct dm_crypt_io *io)
@@ -1046,8 +1052,9 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
 		return;
 
 	if (io->ctx.req)
-		mempool_free(io->ctx.req, cc->req_pool);
-	mempool_free(io, cc->io_pool);
+		crypt_free_req(cc, io->ctx.req, base_bio);
+	if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size))
+		mempool_free(io, cc->io_pool);
 
 	if (likely(!base_io))
 		bio_endio(base_bio, error);
@@ -1255,8 +1262,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 		 * between fragments, so switch to a new dm_crypt_io structure.
 		 */
 		if (unlikely(!crypt_finished && remaining)) {
-			new_io = crypt_io_alloc(io->cc, io->base_bio,
-						sector);
+			new_io = mempool_alloc(cc->io_pool, GFP_NOIO);
+			crypt_io_init(new_io, io->cc, io->base_bio, sector);
 			crypt_inc_pending(new_io);
 			crypt_convert_init(cc, &new_io->ctx, NULL,
 					   io->base_bio, sector);
@@ -1325,7 +1332,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
 	if (error < 0)
 		io->error = -EIO;
 
-	mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool);
+	crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
 
 	if (!atomic_dec_and_test(&ctx->cc_pending))
 		return;
@@ -1728,6 +1735,10 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		goto bad;
 	}
 
+	cc->per_bio_data_size = ti->per_bio_data_size =
+				sizeof(struct dm_crypt_io) + cc->dmreq_start +
+				sizeof(struct dm_crypt_request) + cc->iv_size;
+
 	cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
 	if (!cc->page_pool) {
 		ti->error = "Cannot allocate page mempool";
@@ -1824,7 +1835,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
 		return DM_MAPIO_REMAPPED;
 	}
 
-	io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
+	io = dm_per_bio_data(bio, cc->per_bio_data_size);
+	crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
+	io->ctx.req = (struct ablkcipher_request *)(io + 1);
 
 	if (bio_data_dir(io->base_bio) == READ) {
 		if (kcryptd_io_read(io, GFP_NOWAIT))
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index db404a0..c09359d 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -33,7 +33,6 @@ struct dm_io_client {
 struct io {
 	unsigned long error_bits;
 	atomic_t count;
-	struct completion *wait;
 	struct dm_io_client *client;
 	io_notify_fn callback;
 	void *context;
@@ -112,28 +111,27 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
  * We need an io object to keep track of the number of bios that
  * have been dispatched for a particular io.
  *---------------------------------------------------------------*/
-static void dec_count(struct io *io, unsigned int region, int error)
+static void complete_io(struct io *io)
 {
-	if (error)
-		set_bit(region, &io->error_bits);
+	unsigned long error_bits = io->error_bits;
+	io_notify_fn fn = io->callback;
+	void *context = io->context;
 
-	if (atomic_dec_and_test(&io->count)) {
-		if (io->vma_invalidate_size)
-			invalidate_kernel_vmap_range(io->vma_invalidate_address,
-						     io->vma_invalidate_size);
+	if (io->vma_invalidate_size)
+		invalidate_kernel_vmap_range(io->vma_invalidate_address,
+					     io->vma_invalidate_size);
 
-		if (io->wait)
-			complete(io->wait);
+	mempool_free(io, io->client->pool);
+	fn(error_bits, context);
+}
 
-		else {
-			unsigned long r = io->error_bits;
-			io_notify_fn fn = io->callback;
-			void *context = io->context;
+static void dec_count(struct io *io, unsigned int region, int error)
+{
+	if (error)
+		set_bit(region, &io->error_bits);
 
-			mempool_free(io, io->client->pool);
-			fn(r, context);
-		}
-	}
+	if (atomic_dec_and_test(&io->count))
+		complete_io(io);
 }
 
 static void endio(struct bio *bio, int error)
@@ -376,41 +374,51 @@ static void dispatch_io(int rw, unsigned int num_regions,
 	dec_count(io, 0, 0);
 }
 
+struct sync_io {
+	unsigned long error_bits;
+	struct completion wait;
+};
+
+static void sync_io_complete(unsigned long error, void *context)
+{
+	struct sync_io *sio = context;
+
+	sio->error_bits = error;
+	complete(&sio->wait);
+}
+
 static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 		   struct dm_io_region *where, int rw, struct dpages *dp,
 		   unsigned long *error_bits)
 {
-	/*
-	 * gcc <= 4.3 can't do the alignment for stack variables, so we must
-	 * align it on our own.
-	 * volatile prevents the optimizer from removing or reusing
-	 * "io_" field from the stack frame (allowed in ANSI C).
-	 */
-	volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
-	struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
-	DECLARE_COMPLETION_ONSTACK(wait);
+	struct io *io;
+	struct sync_io sio;
 
 	if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
 		WARN_ON(1);
 		return -EIO;
 	}
 
+	init_completion(&sio.wait);
+
+	io = mempool_alloc(client->pool, GFP_NOIO);
 	io->error_bits = 0;
 	atomic_set(&io->count, 1); /* see dispatch_io() */
-	io->wait = &wait;
 	io->client = client;
+	io->callback = sync_io_complete;
+	io->context = &sio;
 
 	io->vma_invalidate_address = dp->vma_invalidate_address;
 	io->vma_invalidate_size = dp->vma_invalidate_size;
 
 	dispatch_io(rw, num_regions, where, dp, io, 1);
 
-	wait_for_completion_io(&wait);
+	wait_for_completion_io(&sio.wait);
 
 	if (error_bits)
-		*error_bits = io->error_bits;
+		*error_bits = sio.error_bits;
 
-	return io->error_bits ? -EIO : 0;
+	return sio.error_bits ? -EIO : 0;
 }
 
 static int async_io(struct dm_io_client *client, unsigned int num_regions,
@@ -428,7 +436,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
 	io = mempool_alloc(client->pool, GFP_NOIO);
 	io->error_bits = 0;
 	atomic_set(&io->count, 1); /* see dispatch_io() */
-	io->wait = NULL;
 	io->client = client;
 	io->callback = fn;
 	io->context = context;
@@ -481,9 +488,9 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
  * New collapsed (a)synchronous interface.
  *
  * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug
- * the queue with blk_unplug() some time later or set REQ_SYNC in
-io_req->bi_rw. If you fail to do one of these, the IO will be submitted to
- * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c.
+ * the queue with blk_unplug() some time later or set REQ_SYNC in io_req->bi_rw.
+ * If you fail to do one of these, the IO will be submitted to the disk after
+ * q->unplug_delay, which defaults to 3ms in blk-settings.c.
  */
 int dm_io(struct dm_io_request *io_req, unsigned num_regions,
 	  struct dm_io_region *where, unsigned long *sync_error_bits)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index f4167b0..833d7e7 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -373,8 +373,6 @@ static int __must_push_back(struct multipath *m)
 		 dm_noflush_suspending(m->ti)));
 }
 
-#define pg_ready(m) (!(m)->queue_io && !(m)->pg_init_required)
-
 /*
  * Map cloned requests
  */
@@ -402,11 +400,11 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
 		if (!__must_push_back(m))
 			r = -EIO;	/* Failed */
 		goto out_unlock;
-	}
-	if (!pg_ready(m)) {
+	} else if (m->queue_io || m->pg_init_required) {
 		__pg_init_all_paths(m);
 		goto out_unlock;
 	}
+
 	if (set_mapinfo(m, map_context) < 0)
 		/* ENOMEM, requeue */
 		goto out_unlock;
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index 09a688b..50fca46 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -137,13 +137,23 @@ static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr
 	*bit *= sctx->region_table_entry_bits;
 }
 
+static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long region_nr)
+{
+	unsigned long region_index;
+	unsigned bit;
+
+	switch_get_position(sctx, region_nr, &region_index, &bit);
+
+	return (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) &
+		((1 << sctx->region_table_entry_bits) - 1);
+}
+
 /*
  * Find which path to use at given offset.
  */
 static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
 {
-	unsigned long region_index;
-	unsigned bit, path_nr;
+	unsigned path_nr;
 	sector_t p;
 
 	p = offset;
@@ -152,9 +162,7 @@ static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
 	else
 		sector_div(p, sctx->region_size);
 
-	switch_get_position(sctx, p, &region_index, &bit);
-	path_nr = (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) &
-	       ((1 << sctx->region_table_entry_bits) - 1);
+	path_nr = switch_region_table_read(sctx, p);
 
 	/* This can only happen if the processor uses non-atomic stores. */
 	if (unlikely(path_nr >= sctx->nr_paths))
@@ -363,7 +371,7 @@ static __always_inline unsigned long parse_hex(const char **string)
 }
 
 static int process_set_region_mappings(struct switch_ctx *sctx,
-			     unsigned argc, char **argv)
+				       unsigned argc, char **argv)
 {
 	unsigned i;
 	unsigned long region_index = 0;
@@ -372,6 +380,51 @@ static int process_set_region_mappings(struct switch_ctx *sctx,
 		unsigned long path_nr;
 		const char *string = argv[i];
 
+		if ((*string & 0xdf) == 'R') {
+			unsigned long cycle_length, num_write;
+
+			string++;
+			if (unlikely(*string == ',')) {
+				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
+				return -EINVAL;
+			}
+			cycle_length = parse_hex(&string);
+			if (unlikely(*string != ',')) {
+				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
+				return -EINVAL;
+			}
+			string++;
+			if (unlikely(!*string)) {
+				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
+				return -EINVAL;
+			}
+			num_write = parse_hex(&string);
+			if (unlikely(*string)) {
+				DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
+				return -EINVAL;
+			}
+
+			if (unlikely(!cycle_length) || unlikely(cycle_length - 1 > region_index)) {
+				DMWARN("invalid set_region_mappings cycle length: %lu > %lu",
+				       cycle_length - 1, region_index);
+				return -EINVAL;
+			}
+			if (unlikely(region_index + num_write < region_index) ||
+			    unlikely(region_index + num_write >= sctx->nr_regions)) {
+				DMWARN("invalid set_region_mappings region number: %lu + %lu >= %lu",
+				       region_index, num_write, sctx->nr_regions);
+				return -EINVAL;
+			}
+
+			while (num_write--) {
+				region_index++;
+				path_nr = switch_region_table_read(sctx, region_index - cycle_length);
+				switch_region_table_write(sctx, region_index, path_nr);
+			}
+
+			continue;
+		}
+
 		if (*string == ':')
 			region_index++;
 		else {
@@ -500,7 +553,7 @@ static int switch_iterate_devices(struct dm_target *ti,
 
 static struct target_type switch_target = {
 	.name = "switch",
-	.version = {1, 0, 0},
+	.version = {1, 1, 0},
 	.module = THIS_MODULE,
 	.ctr = switch_ctr,
 	.dtr = switch_dtr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5f59f1e..f9c6cb8 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1386,6 +1386,14 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
 	return q && !blk_queue_add_random(q);
 }
 
+static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev,
+				   sector_t start, sector_t len, void *data)
+{
+	struct request_queue *q = bdev_get_queue(dev->bdev);
+
+	return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
+}
+
 static bool dm_table_all_devices_attribute(struct dm_table *t,
 					   iterate_devices_callout_fn func)
 {
@@ -1430,6 +1438,43 @@ static bool dm_table_supports_write_same(struct dm_table *t)
 	return true;
 }
 
+static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
+				  sector_t start, sector_t len, void *data)
+{
+	struct request_queue *q = bdev_get_queue(dev->bdev);
+
+	return q && blk_queue_discard(q);
+}
+
+static bool dm_table_supports_discards(struct dm_table *t)
+{
+	struct dm_target *ti;
+	unsigned i = 0;
+
+	/*
+	 * Unless any target used by the table set discards_supported,
+	 * require at least one underlying device to support discards.
+	 * t->devices includes internal dm devices such as mirror logs
+	 * so we need to use iterate_devices here, which targets
+	 * supporting discard selectively must provide.
+	 */
+	while (i < dm_table_get_num_targets(t)) {
+		ti = dm_table_get_target(t, i++);
+
+		if (!ti->num_discard_bios)
+			continue;
+
+		if (ti->discards_supported)
+			return 1;
+
+		if (ti->type->iterate_devices &&
+		    ti->type->iterate_devices(ti, device_discard_capable, NULL))
+			return 1;
+	}
+
+	return 0;
+}
+
 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			       struct queue_limits *limits)
 {
@@ -1464,6 +1509,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	if (!dm_table_supports_write_same(t))
 		q->limits.max_write_same_sectors = 0;
 
+	if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
+		queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+	else
+		queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
+
 	dm_table_set_integrity(t);
 
 	/*
@@ -1636,39 +1686,3 @@ void dm_table_run_md_queue_async(struct dm_table *t)
 }
 EXPORT_SYMBOL(dm_table_run_md_queue_async);
 
-static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
-				  sector_t start, sector_t len, void *data)
-{
-	struct request_queue *q = bdev_get_queue(dev->bdev);
-
-	return q && blk_queue_discard(q);
-}
-
-bool dm_table_supports_discards(struct dm_table *t)
-{
-	struct dm_target *ti;
-	unsigned i = 0;
-
-	/*
-	 * Unless any target used by the table set discards_supported,
-	 * require at least one underlying device to support discards.
-	 * t->devices includes internal dm devices such as mirror logs
-	 * so we need to use iterate_devices here, which targets
-	 * supporting discard selectively must provide.
-	 */
-	while (i < dm_table_get_num_targets(t)) {
-		ti = dm_table_get_target(t, i++);
-
-		if (!ti->num_discard_bios)
-			continue;
-
-		if (ti->discards_supported)
-			return 1;
-
-		if (ti->type->iterate_devices &&
-		    ti->type->iterate_devices(ti, device_discard_capable, NULL))
-			return 1;
-	}
-
-	return 0;
-}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index fc9c848..4843801 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -227,6 +227,7 @@ struct thin_c {
 	struct list_head list;
 	struct dm_dev *pool_dev;
 	struct dm_dev *origin_dev;
+	sector_t origin_size;
 	dm_thin_id dev_id;
 
 	struct pool *pool;
@@ -554,11 +555,16 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio,
 struct dm_thin_new_mapping {
 	struct list_head list;
 
-	bool quiesced:1;
-	bool prepared:1;
 	bool pass_discard:1;
 	bool definitely_not_shared:1;
 
+	/*
+	 * Track quiescing, copying and zeroing preparation actions.  When this
+	 * counter hits zero the block is prepared and can be inserted into the
+	 * btree.
+	 */
+	atomic_t prepare_actions;
+
 	int err;
 	struct thin_c *tc;
 	dm_block_t virt_block;
@@ -575,43 +581,41 @@ struct dm_thin_new_mapping {
 	bio_end_io_t *saved_bi_end_io;
 };
 
-static void __maybe_add_mapping(struct dm_thin_new_mapping *m)
+static void __complete_mapping_preparation(struct dm_thin_new_mapping *m)
 {
 	struct pool *pool = m->tc->pool;
 
-	if (m->quiesced && m->prepared) {
+	if (atomic_dec_and_test(&m->prepare_actions)) {
 		list_add_tail(&m->list, &pool->prepared_mappings);
 		wake_worker(pool);
 	}
 }
 
-static void copy_complete(int read_err, unsigned long write_err, void *context)
+static void complete_mapping_preparation(struct dm_thin_new_mapping *m)
 {
 	unsigned long flags;
-	struct dm_thin_new_mapping *m = context;
 	struct pool *pool = m->tc->pool;
 
-	m->err = read_err || write_err ? -EIO : 0;
-
 	spin_lock_irqsave(&pool->lock, flags);
-	m->prepared = true;
-	__maybe_add_mapping(m);
+	__complete_mapping_preparation(m);
 	spin_unlock_irqrestore(&pool->lock, flags);
 }
 
+static void copy_complete(int read_err, unsigned long write_err, void *context)
+{
+	struct dm_thin_new_mapping *m = context;
+
+	m->err = read_err || write_err ? -EIO : 0;
+	complete_mapping_preparation(m);
+}
+
 static void overwrite_endio(struct bio *bio, int err)
 {
-	unsigned long flags;
 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
 	struct dm_thin_new_mapping *m = h->overwrite_mapping;
-	struct pool *pool = m->tc->pool;
 
 	m->err = err;
-
-	spin_lock_irqsave(&pool->lock, flags);
-	m->prepared = true;
-	__maybe_add_mapping(m);
-	spin_unlock_irqrestore(&pool->lock, flags);
+	complete_mapping_preparation(m);
 }
 
 /*----------------------------------------------------------------*/
@@ -821,10 +825,31 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
 	return m;
 }
 
+static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
+		    sector_t begin, sector_t end)
+{
+	int r;
+	struct dm_io_region to;
+
+	to.bdev = tc->pool_dev->bdev;
+	to.sector = begin;
+	to.count = end - begin;
+
+	r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
+	if (r < 0) {
+		DMERR_LIMIT("dm_kcopyd_zero() failed");
+		copy_complete(1, 1, m);
+	}
+}
+
+/*
+ * A partial copy also needs to zero the uncopied region.
+ */
 static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 			  struct dm_dev *origin, dm_block_t data_origin,
 			  dm_block_t data_dest,
-			  struct dm_bio_prison_cell *cell, struct bio *bio)
+			  struct dm_bio_prison_cell *cell, struct bio *bio,
+			  sector_t len)
 {
 	int r;
 	struct pool *pool = tc->pool;
@@ -835,8 +860,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 	m->data_block = data_dest;
 	m->cell = cell;
 
+	/*
+	 * quiesce action + copy action + an extra reference held for the
+	 * duration of this function (we may need to inc later for a
+	 * partial zero).
+	 */
+	atomic_set(&m->prepare_actions, 3);
+
 	if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list))
-		m->quiesced = true;
+		complete_mapping_preparation(m); /* already quiesced */
 
 	/*
 	 * IO to pool_dev remaps to the pool target's data_dev.
@@ -857,20 +889,38 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
 
 		from.bdev = origin->bdev;
 		from.sector = data_origin * pool->sectors_per_block;
-		from.count = pool->sectors_per_block;
+		from.count = len;
 
 		to.bdev = tc->pool_dev->bdev;
 		to.sector = data_dest * pool->sectors_per_block;
-		to.count = pool->sectors_per_block;
+		to.count = len;
 
 		r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
 				   0, copy_complete, m);
 		if (r < 0) {
-			mempool_free(m, pool->mapping_pool);
 			DMERR_LIMIT("dm_kcopyd_copy() failed");
-			cell_error(pool, cell);
+			copy_complete(1, 1, m);
+
+			/*
+			 * We allow the zero to be issued, to simplify the
+			 * error path.  Otherwise we'd need to start
+			 * worrying about decrementing the prepare_actions
+			 * counter.
+			 */
+		}
+
+		/*
+		 * Do we need to zero a tail region?
+		 */
+		if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) {
+			atomic_inc(&m->prepare_actions);
+			ll_zero(tc, m,
+				data_dest * pool->sectors_per_block + len,
+				(data_dest + 1) * pool->sectors_per_block);
 		}
 	}
+
+	complete_mapping_preparation(m); /* drop our ref */
 }
 
 static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
@@ -878,15 +928,8 @@ static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
 				   struct dm_bio_prison_cell *cell, struct bio *bio)
 {
 	schedule_copy(tc, virt_block, tc->pool_dev,
-		      data_origin, data_dest, cell, bio);
-}
-
-static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
-				   dm_block_t data_dest,
-				   struct dm_bio_prison_cell *cell, struct bio *bio)
-{
-	schedule_copy(tc, virt_block, tc->origin_dev,
-		      virt_block, data_dest, cell, bio);
+		      data_origin, data_dest, cell, bio,
+		      tc->pool->sectors_per_block);
 }
 
 static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
@@ -896,8 +939,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
 	struct pool *pool = tc->pool;
 	struct dm_thin_new_mapping *m = get_next_mapping(pool);
 
-	m->quiesced = true;
-	m->prepared = false;
+	atomic_set(&m->prepare_actions, 1); /* no need to quiesce */
 	m->tc = tc;
 	m->virt_block = virt_block;
 	m->data_block = data_block;
@@ -919,21 +961,33 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
 		save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
 		inc_all_io_entry(pool, bio);
 		remap_and_issue(tc, bio, data_block);
-	} else {
-		int r;
-		struct dm_io_region to;
 
-		to.bdev = tc->pool_dev->bdev;
-		to.sector = data_block * pool->sectors_per_block;
-		to.count = pool->sectors_per_block;
+	} else
+		ll_zero(tc, m,
+			data_block * pool->sectors_per_block,
+			(data_block + 1) * pool->sectors_per_block);
+}
 
-		r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m);
-		if (r < 0) {
-			mempool_free(m, pool->mapping_pool);
-			DMERR_LIMIT("dm_kcopyd_zero() failed");
-			cell_error(pool, cell);
-		}
-	}
+static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
+				   dm_block_t data_dest,
+				   struct dm_bio_prison_cell *cell, struct bio *bio)
+{
+	struct pool *pool = tc->pool;
+	sector_t virt_block_begin = virt_block * pool->sectors_per_block;
+	sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block;
+
+	if (virt_block_end <= tc->origin_size)
+		schedule_copy(tc, virt_block, tc->origin_dev,
+			      virt_block, data_dest, cell, bio,
+			      pool->sectors_per_block);
+
+	else if (virt_block_begin < tc->origin_size)
+		schedule_copy(tc, virt_block, tc->origin_dev,
+			      virt_block, data_dest, cell, bio,
+			      tc->origin_size - virt_block_begin);
+
+	else
+		schedule_zero(tc, virt_block, data_dest, cell, bio);
 }
 
 /*
@@ -1315,7 +1369,18 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
 			inc_all_io_entry(pool, bio);
 			cell_defer_no_holder(tc, cell);
 
-			remap_to_origin_and_issue(tc, bio);
+			if (bio_end_sector(bio) <= tc->origin_size)
+				remap_to_origin_and_issue(tc, bio);
+
+			else if (bio->bi_iter.bi_sector < tc->origin_size) {
+				zero_fill_bio(bio);
+				bio->bi_iter.bi_size = (tc->origin_size - bio->bi_iter.bi_sector) << SECTOR_SHIFT;
+				remap_to_origin_and_issue(tc, bio);
+
+			} else {
+				zero_fill_bio(bio);
+				bio_endio(bio, 0);
+			}
 		} else
 			provision_block(tc, bio, block, cell);
 		break;
@@ -3112,7 +3177,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
 	 */
 	if (io_opt_sectors < pool->sectors_per_block ||
 	    do_div(io_opt_sectors, pool->sectors_per_block)) {
-		blk_limits_io_min(limits, 0);
+		blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT);
 		blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
 	}
 
@@ -3141,7 +3206,7 @@ static struct target_type pool_target = {
 	.name = "thin-pool",
 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
 		    DM_TARGET_IMMUTABLE,
-	.version = {1, 12, 0},
+	.version = {1, 13, 0},
 	.module = THIS_MODULE,
 	.ctr = pool_ctr,
 	.dtr = pool_dtr,
@@ -3361,8 +3426,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
 		spin_lock_irqsave(&pool->lock, flags);
 		list_for_each_entry_safe(m, tmp, &work, list) {
 			list_del(&m->list);
-			m->quiesced = true;
-			__maybe_add_mapping(m);
+			__complete_mapping_preparation(m);
 		}
 		spin_unlock_irqrestore(&pool->lock, flags);
 	}
@@ -3401,6 +3465,16 @@ static void thin_postsuspend(struct dm_target *ti)
 	noflush_work(tc, do_noflush_stop);
 }
 
+static int thin_preresume(struct dm_target *ti)
+{
+	struct thin_c *tc = ti->private;
+
+	if (tc->origin_dev)
+		tc->origin_size = get_dev_size(tc->origin_dev->bdev);
+
+	return 0;
+}
+
 /*
  * <nr mapped sectors> <highest mapped sector>
  */
@@ -3483,12 +3557,13 @@ static int thin_iterate_devices(struct dm_target *ti,
 
 static struct target_type thin_target = {
 	.name = "thin",
-	.version = {1, 12, 0},
+	.version = {1, 13, 0},
 	.module	= THIS_MODULE,
 	.ctr = thin_ctr,
 	.dtr = thin_dtr,
 	.map = thin_map,
 	.end_io = thin_endio,
+	.preresume = thin_preresume,
 	.presuspend = thin_presuspend,
 	.postsuspend = thin_postsuspend,
 	.status = thin_status,
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index ed76126..e81d215 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -72,7 +72,6 @@ int dm_table_any_busy_target(struct dm_table *t);
 unsigned dm_table_get_type(struct dm_table *t);
 struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
 bool dm_table_request_based(struct dm_table *t);
-bool dm_table_supports_discards(struct dm_table *t);
 void dm_table_free_md_mempools(struct dm_table *t);
 struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 32fc19c..1294238 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5961,7 +5961,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
 	int err = 0;
 
 	if (mddev->pers) {
-		if (!mddev->pers->quiesce)
+		if (!mddev->pers->quiesce || !mddev->thread)
 			return -EBUSY;
 		if (mddev->recovery || mddev->sync_thread)
 			return -EBUSY;
@@ -6263,7 +6263,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
 		rv = update_raid_disks(mddev, info->raid_disks);
 
 	if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
-		if (mddev->pers->quiesce == NULL)
+		if (mddev->pers->quiesce == NULL || mddev->thread == NULL)
 			return -EINVAL;
 		if (mddev->recovery || mddev->sync_thread)
 			return -EBUSY;
@@ -7376,7 +7376,7 @@ void md_do_sync(struct md_thread *thread)
 	struct mddev *mddev2;
 	unsigned int currspeed = 0,
 		 window;
-	sector_t max_sectors,j, io_sectors;
+	sector_t max_sectors,j, io_sectors, recovery_done;
 	unsigned long mark[SYNC_MARKS];
 	unsigned long update_time;
 	sector_t mark_cnt[SYNC_MARKS];
@@ -7652,7 +7652,8 @@ void md_do_sync(struct md_thread *thread)
 		 */
 		cond_resched();
 
-		currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
+		recovery_done = io_sectors - atomic_read(&mddev->recovery_active);
+		currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2
 			/((jiffies-mddev->resync_mark)/HZ +1) +1;
 
 		if (currspeed > speed_min(mddev)) {
@@ -8592,7 +8593,7 @@ static int __init md_init(void)
 		goto err_mdp;
 	mdp_major = ret;
 
-	blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE,
+	blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
 			    md_probe, NULL, NULL);
 	blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
 			    md_probe, NULL, NULL);
@@ -8687,7 +8688,7 @@ static __exit void md_exit(void)
 	struct list_head *tmp;
 	int delay = 1;
 
-	blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);
+	blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
 	blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
 
 	unregister_blkdev(MD_MAJOR,"md");
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 407a99e..cf91f59 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -685,6 +685,12 @@ static void *raid0_takeover(struct mddev *mddev)
 	 *  raid10 - assuming we have all necessary active disks
 	 *  raid1 - with (N -1) mirror drives faulty
 	 */
+
+	if (mddev->bitmap) {
+		printk(KERN_ERR "md/raid0: %s: cannot takeover array with bitmap\n",
+		       mdname(mddev));
+		return ERR_PTR(-EBUSY);
+	}
 	if (mddev->level == 4)
 		return raid0_takeover_raid45(mddev);
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 56e24c0..d7690f8 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1501,12 +1501,12 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
 		mddev->degraded++;
 		set_bit(Faulty, &rdev->flags);
 		spin_unlock_irqrestore(&conf->device_lock, flags);
-		/*
-		 * if recovery is running, make sure it aborts.
-		 */
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	} else
 		set_bit(Faulty, &rdev->flags);
+	/*
+	 * if recovery is running, make sure it aborts.
+	 */
+	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
 	printk(KERN_ALERT
 	       "md/raid1:%s: Disk failure on %s, disabling device.\n"
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index cb882aa..b08c188 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1684,13 +1684,12 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 		return;
 	}
-	if (test_and_clear_bit(In_sync, &rdev->flags)) {
+	if (test_and_clear_bit(In_sync, &rdev->flags))
 		mddev->degraded++;
-			/*
-		 * if recovery is running, make sure it aborts.
-		 */
-		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-	}
+	/*
+	 * If recovery is running, make sure it aborts.
+	 */
+	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	set_bit(Blocked, &rdev->flags);
 	set_bit(Faulty, &rdev->flags);
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c
index 6352bec..71f387c 100644
--- a/drivers/mfd/rtsx_usb.c
+++ b/drivers/mfd/rtsx_usb.c
@@ -744,6 +744,7 @@ static struct usb_device_id rtsx_usb_usb_ids[] = {
 	{ USB_DEVICE(0x0BDA, 0x0140) },
 	{ }
 };
+MODULE_DEVICE_TABLE(usb, rtsx_usb_usb_ids);
 
 static struct usb_driver rtsx_usb_driver = {
 	.name			= "rtsx_usb",
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 452782b..ede41f0 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -2028,8 +2028,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 		/* complete ongoing async transfer before issuing discard */
 		if (card->host->areq)
 			mmc_blk_issue_rw_rq(mq, NULL);
-		if (req->cmd_flags & REQ_SECURE &&
-			!(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
+		if (req->cmd_flags & REQ_SECURE)
 			ret = mmc_blk_issue_secdiscard_rq(mq, req);
 		else
 			ret = mmc_blk_issue_discard_rq(mq, req);
@@ -2432,6 +2431,8 @@ static int mmc_blk_probe(struct mmc_card *card)
 	if (!(card->csd.cmdclass & CCC_BLOCK_READ))
 		return -ENODEV;
 
+	mmc_fixup_device(card, blk_fixups);
+
 	md = mmc_blk_alloc(card);
 	if (IS_ERR(md))
 		return PTR_ERR(md);
@@ -2446,7 +2447,6 @@ static int mmc_blk_probe(struct mmc_card *card)
 		goto out;
 
 	mmc_set_drvdata(card, md);
-	mmc_fixup_device(card, blk_fixups);
 
 	if (mmc_add_disk(md))
 		goto out;
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index d2dbf02..8a1f124 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -180,7 +180,6 @@ static int mmc_bus_resume(struct device *dev)
 #endif
 
 #ifdef CONFIG_PM_RUNTIME
-
 static int mmc_runtime_suspend(struct device *dev)
 {
 	struct mmc_card *card = mmc_dev_to_card(dev);
@@ -196,17 +195,10 @@ static int mmc_runtime_resume(struct device *dev)
 
 	return host->bus_ops->runtime_resume(host);
 }
-
-static int mmc_runtime_idle(struct device *dev)
-{
-	return 0;
-}
-
 #endif /* !CONFIG_PM_RUNTIME */
 
 static const struct dev_pm_ops mmc_bus_pm_ops = {
-	SET_RUNTIME_PM_OPS(mmc_runtime_suspend, mmc_runtime_resume,
-			mmc_runtime_idle)
+	SET_RUNTIME_PM_OPS(mmc_runtime_suspend, mmc_runtime_resume, NULL)
 	SET_SYSTEM_SLEEP_PM_OPS(mmc_bus_suspend, mmc_bus_resume)
 };
 
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 7dc0c85..d03a080 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2102,7 +2102,8 @@ EXPORT_SYMBOL(mmc_can_sanitize);
 
 int mmc_can_secure_erase_trim(struct mmc_card *card)
 {
-	if (card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN)
+	if ((card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN) &&
+	    !(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
 		return 1;
 	return 0;
 }
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 793c6f7..1eda8dd 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -324,13 +324,12 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 		}
 	}
 
+	/*
+	 * The EXT_CSD format is meant to be forward compatible. As long
+	 * as CSD_STRUCTURE does not change, all values for EXT_CSD_REV
+	 * are authorized, see JEDEC JESD84-B50 section B.8.
+	 */
 	card->ext_csd.rev = ext_csd[EXT_CSD_REV];
-	if (card->ext_csd.rev > 7) {
-		pr_err("%s: unrecognised EXT_CSD revision %d\n",
-			mmc_hostname(card->host), card->ext_csd.rev);
-		err = -EINVAL;
-		goto out;
-	}
 
 	card->ext_csd.raw_sectors[0] = ext_csd[EXT_CSD_SEC_CNT + 0];
 	card->ext_csd.raw_sectors[1] = ext_csd[EXT_CSD_SEC_CNT + 1];
diff --git a/drivers/mmc/core/quirks.c b/drivers/mmc/core/quirks.c
index 6c36fcc..dd1d1e0 100644
--- a/drivers/mmc/core/quirks.c
+++ b/drivers/mmc/core/quirks.c
@@ -91,7 +91,7 @@ void mmc_fixup_device(struct mmc_card *card, const struct mmc_fixup *table)
 		    (f->cis_device == card->cis.device ||
 		     f->cis_device == (u16) SDIO_ANY_ID) &&
 		    rev >= f->rev_start && rev <= f->rev_end) {
-			dev_dbg(&card->dev, "calling %pF\n", f->vendor_fixup);
+			dev_dbg(&card->dev, "calling %pf\n", f->vendor_fixup);
 			f->vendor_fixup(card, f->data);
 		}
 	}
diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c
index 274ef00..48d0c93 100644
--- a/drivers/mmc/core/sd_ops.c
+++ b/drivers/mmc/core/sd_ops.c
@@ -184,6 +184,9 @@ int mmc_send_app_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr)
 		mmc_delay(10);
 	}
 
+	if (!i)
+		pr_err("%s: card never left busy state\n", mmc_hostname(host));
+
 	if (rocr && !mmc_host_is_spi(host))
 		*rocr = cmd.resp[0];
 
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index a565254..4511358 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -290,6 +290,18 @@ config MMC_MOXART
 	  be found on some embedded hardware such as UC-7112-LX.
 	  If you have a controller with this interface, say Y here.
 
+config MMC_SDHCI_ST
+	tristate "SDHCI support on STMicroelectronics SoC"
+	depends on ARCH_STI
+	depends on MMC_SDHCI_PLTFM
+	select MMC_SDHCI_IO_ACCESSORS
+	help
+	  This selects the Secure Digital Host Controller Interface in
+	  STMicroelectronics SoCs.
+
+	  If you have a controller with this interface, say Y or M here.
+	  If unsure, say N.
+
 config MMC_OMAP
 	tristate "TI OMAP Multimedia Card Interface support"
 	depends on ARCH_OMAP
@@ -303,6 +315,7 @@ config MMC_OMAP
 
 config MMC_OMAP_HS
 	tristate "TI OMAP High Speed Multimedia Card Interface support"
+	depends on HAS_DMA
 	depends on ARCH_OMAP2PLUS || COMPILE_TEST
 	help
 	  This selects the TI OMAP High Speed Multimedia card Interface.
@@ -343,7 +356,7 @@ config MMC_ATMELMCI
 
 config MMC_SDHCI_MSM
 	tristate "Qualcomm SDHCI Controller Support"
-	depends on ARCH_QCOM
+	depends on ARCH_QCOM || (ARM && COMPILE_TEST)
 	depends on MMC_SDHCI_PLTFM
 	help
 	  This selects the Secure Digital Host Controller Interface (SDHCI)
@@ -440,6 +453,7 @@ config MMC_SPI
 config MMC_S3C
 	tristate "Samsung S3C SD/MMC Card Interface support"
 	depends on ARCH_S3C24XX
+	depends on S3C24XX_DMAC
 	help
 	  This selects a driver for the MCI interface found in
           Samsung's S3C2410, S3C2412, S3C2440, S3C2442 CPUs.
@@ -477,15 +491,6 @@ config MMC_S3C_DMA
 	  working properly and needs to be debugged before this
 	  option is useful.
 
-config MMC_S3C_PIODMA
-	bool "Support for both PIO and DMA"
-	help
-	  Compile both the PIO and DMA transfer routines into the
-	  driver and let the platform select at run-time which one
-	  is best.
-
-	  See notes for the DMA option.
-
 endchoice
 
 config MMC_SDRICOH_CS
@@ -623,7 +628,7 @@ config MMC_DW_PCI
 
 config MMC_SH_MMCIF
 	tristate "SuperH Internal MMCIF support"
-	depends on MMC_BLOCK
+	depends on MMC_BLOCK && HAS_DMA
 	depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST
 	help
 	  This selects the MMC Host Interface controller (MMCIF).
@@ -697,6 +702,7 @@ config MMC_WMT
 
 config MMC_USDHI6ROL0
 	tristate "Renesas USDHI6ROL0 SD/SDIO Host Controller support"
+	depends on HAS_DMA
 	help
 	  This selects support for the Renesas USDHI6ROL0 SD/SDIO
 	  Host Controller
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 7f81ddf..f211eed 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_MMC_SDHCI_OF_HLWD)		+= sdhci-of-hlwd.o
 obj-$(CONFIG_MMC_SDHCI_BCM_KONA)	+= sdhci-bcm-kona.o
 obj-$(CONFIG_MMC_SDHCI_BCM2835)		+= sdhci-bcm2835.o
 obj-$(CONFIG_MMC_SDHCI_MSM)		+= sdhci-msm.o
+obj-$(CONFIG_MMC_SDHCI_ST)		+= sdhci-st.o
 
 ifeq ($(CONFIG_CB710_DEBUG),y)
 	CFLAGS-cb710-mmc	+= -DDEBUG
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 1ac227c..8f216ed 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -111,8 +111,7 @@ static const u8 tuning_blk_pattern_8bit[] = {
 	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
 };
 
-static inline bool dw_mci_fifo_reset(struct dw_mci *host);
-static inline bool dw_mci_ctrl_all_reset(struct dw_mci *host);
+static bool dw_mci_reset(struct dw_mci *host);
 
 #if defined(CONFIG_DEBUG_FS)
 static int dw_mci_req_show(struct seq_file *s, void *v)
@@ -997,7 +996,8 @@ static int dw_mci_get_ro(struct mmc_host *mmc)
 	int gpio_ro = mmc_gpio_get_ro(mmc);
 
 	/* Use platform get_ro function, else try on board write protect */
-	if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT)
+	if ((slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) ||
+			(slot->host->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT))
 		read_only = 0;
 	else if (!IS_ERR_VALUE(gpio_ro))
 		read_only = gpio_ro;
@@ -1235,7 +1235,7 @@ static int dw_mci_data_complete(struct dw_mci *host, struct mmc_data *data)
 		 * After an error, there may be data lingering
 		 * in the FIFO
 		 */
-		dw_mci_fifo_reset(host);
+		dw_mci_reset(host);
 	} else {
 		data->bytes_xfered = data->blocks * data->blksz;
 		data->error = 0;
@@ -1352,7 +1352,7 @@ static void dw_mci_tasklet_func(unsigned long priv)
 
 			/* CMD error in data command */
 			if (mrq->cmd->error && mrq->data)
-				dw_mci_fifo_reset(host);
+				dw_mci_reset(host);
 
 			host->cmd = NULL;
 			host->data = NULL;
@@ -1963,14 +1963,8 @@ static void dw_mci_work_routine_card(struct work_struct *work)
 			}
 
 			/* Power down slot */
-			if (present == 0) {
-				/* Clear down the FIFO */
-				dw_mci_fifo_reset(host);
-#ifdef CONFIG_MMC_DW_IDMAC
-				dw_mci_idmac_reset(host);
-#endif
-
-			}
+			if (present == 0)
+				dw_mci_reset(host);
 
 			spin_unlock_bh(&host->lock);
 
@@ -2021,8 +2015,11 @@ static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot)
 
 	/* get quirks */
 	for (idx = 0; idx < ARRAY_SIZE(of_slot_quirks); idx++)
-		if (of_get_property(np, of_slot_quirks[idx].quirk, NULL))
+		if (of_get_property(np, of_slot_quirks[idx].quirk, NULL)) {
+			dev_warn(dev, "Slot quirk %s is deprecated\n",
+					of_slot_quirks[idx].quirk);
 			quirks |= of_slot_quirks[idx].id;
+		}
 
 	return quirks;
 }
@@ -2208,8 +2205,11 @@ static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset)
 	return false;
 }
 
-static inline bool dw_mci_fifo_reset(struct dw_mci *host)
+static bool dw_mci_reset(struct dw_mci *host)
 {
+	u32 flags = SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET;
+	bool ret = false;
+
 	/*
 	 * Reseting generates a block interrupt, hence setting
 	 * the scatter-gather pointer to NULL.
@@ -2219,15 +2219,60 @@ static inline bool dw_mci_fifo_reset(struct dw_mci *host)
 		host->sg = NULL;
 	}
 
-	return dw_mci_ctrl_reset(host, SDMMC_CTRL_FIFO_RESET);
-}
+	if (host->use_dma)
+		flags |= SDMMC_CTRL_DMA_RESET;
 
-static inline bool dw_mci_ctrl_all_reset(struct dw_mci *host)
-{
-	return dw_mci_ctrl_reset(host,
-				 SDMMC_CTRL_FIFO_RESET |
-				 SDMMC_CTRL_RESET |
-				 SDMMC_CTRL_DMA_RESET);
+	if (dw_mci_ctrl_reset(host, flags)) {
+		/*
+		 * In all cases we clear the RAWINTS register to clear any
+		 * interrupts.
+		 */
+		mci_writel(host, RINTSTS, 0xFFFFFFFF);
+
+		/* if using dma we wait for dma_req to clear */
+		if (host->use_dma) {
+			unsigned long timeout = jiffies + msecs_to_jiffies(500);
+			u32 status;
+			do {
+				status = mci_readl(host, STATUS);
+				if (!(status & SDMMC_STATUS_DMA_REQ))
+					break;
+				cpu_relax();
+			} while (time_before(jiffies, timeout));
+
+			if (status & SDMMC_STATUS_DMA_REQ) {
+				dev_err(host->dev,
+					"%s: Timeout waiting for dma_req to "
+					"clear during reset\n", __func__);
+				goto ciu_out;
+			}
+
+			/* when using DMA next we reset the fifo again */
+			if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_FIFO_RESET))
+				goto ciu_out;
+		}
+	} else {
+		/* if the controller reset bit did clear, then set clock regs */
+		if (!(mci_readl(host, CTRL) & SDMMC_CTRL_RESET)) {
+			dev_err(host->dev, "%s: fifo/dma reset bits didn't "
+				"clear but ciu was reset, doing clock update\n",
+				__func__);
+			goto ciu_out;
+		}
+	}
+
+#if IS_ENABLED(CONFIG_MMC_DW_IDMAC)
+	/* It is also recommended that we reset and reprogram idmac */
+	dw_mci_idmac_reset(host);
+#endif
+
+	ret = true;
+
+ciu_out:
+	/* After a CTRL reset we need to have CIU set clock registers  */
+	mci_send_cmd(host->cur_slot, SDMMC_CMD_UPD_CLK, 0);
+
+	return ret;
 }
 
 #ifdef CONFIG_OF
@@ -2238,6 +2283,9 @@ static struct dw_mci_of_quirks {
 	{
 		.quirk	= "broken-cd",
 		.id	= DW_MCI_QUIRK_BROKEN_CARD_DETECTION,
+	}, {
+		.quirk	= "disable-wp",
+		.id	= DW_MCI_QUIRK_NO_WRITE_PROTECT,
 	},
 };
 
@@ -2425,7 +2473,7 @@ int dw_mci_probe(struct dw_mci *host)
 	}
 
 	/* Reset all blocks */
-	if (!dw_mci_ctrl_all_reset(host))
+	if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS))
 		return -ENODEV;
 
 	host->dma_ops = host->pdata->dma_ops;
@@ -2612,7 +2660,7 @@ int dw_mci_resume(struct dw_mci *host)
 		}
 	}
 
-	if (!dw_mci_ctrl_all_reset(host)) {
+	if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS)) {
 		ret = -ENODEV;
 		return ret;
 	}
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 738fa24..08fd956 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -129,6 +129,7 @@
 #define SDMMC_CMD_INDX(n)		((n) & 0x1F)
 /* Status register defines */
 #define SDMMC_GET_FCNT(x)		(((x)>>17) & 0x1FFF)
+#define SDMMC_STATUS_DMA_REQ		BIT(31)
 /* FIFOTH register defines */
 #define SDMMC_SET_FIFOTH(m, r, t)	(((m) & 0x7) << 28 | \
 					 ((r) & 0xFFF) << 16 | \
@@ -150,6 +151,10 @@
 /* Card read threshold */
 #define SDMMC_SET_RD_THLD(v, x)		(((v) & 0x1FFF) << 16 | (x))
 
+/* All ctrl reset bits */
+#define SDMMC_CTRL_ALL_RESET_FLAGS \
+	(SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | SDMMC_CTRL_DMA_RESET)
+
 /* Register access macros */
 #define mci_readl(dev, reg)			\
 	__raw_readl((dev)->regs + SDMMC_##reg)
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 7ad463e..e4d4707 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -52,34 +52,53 @@ static unsigned int fmax = 515633;
  * struct variant_data - MMCI variant-specific quirks
  * @clkreg: default value for MCICLOCK register
  * @clkreg_enable: enable value for MMCICLOCK register
+ * @clkreg_8bit_bus_enable: enable value for 8 bit bus
+ * @clkreg_neg_edge_enable: enable value for inverted data/cmd output
  * @datalength_bits: number of bits in the MMCIDATALENGTH register
  * @fifosize: number of bytes that can be written when MMCI_TXFIFOEMPTY
  *	      is asserted (likewise for RX)
  * @fifohalfsize: number of bytes that can be written when MCI_TXFIFOHALFEMPTY
  *		  is asserted (likewise for RX)
+ * @data_cmd_enable: enable value for data commands.
  * @sdio: variant supports SDIO
  * @st_clkdiv: true if using a ST-specific clock divider algorithm
+ * @datactrl_mask_ddrmode: ddr mode mask in datactrl register.
  * @blksz_datactrl16: true if Block size is at b16..b30 position in datactrl register
+ * @blksz_datactrl4: true if Block size is at b4..b16 position in datactrl
+ *		     register
  * @pwrreg_powerup: power up value for MMCIPOWER register
+ * @f_max: maximum clk frequency supported by the controller.
  * @signal_direction: input/out direction of bus signals can be indicated
  * @pwrreg_clkgate: MMCIPOWER register must be used to gate the clock
  * @busy_detect: true if busy detection on dat0 is supported
  * @pwrreg_nopower: bits in MMCIPOWER don't controls ext. power supply
+ * @explicit_mclk_control: enable explicit mclk control in driver.
+ * @qcom_fifo: enables qcom specific fifo pio read logic.
+ * @reversed_irq_handling: handle data irq before cmd irq.
  */
 struct variant_data {
 	unsigned int		clkreg;
 	unsigned int		clkreg_enable;
+	unsigned int		clkreg_8bit_bus_enable;
+	unsigned int		clkreg_neg_edge_enable;
 	unsigned int		datalength_bits;
 	unsigned int		fifosize;
 	unsigned int		fifohalfsize;
+	unsigned int		data_cmd_enable;
+	unsigned int		datactrl_mask_ddrmode;
 	bool			sdio;
 	bool			st_clkdiv;
 	bool			blksz_datactrl16;
+	bool			blksz_datactrl4;
 	u32			pwrreg_powerup;
+	u32			f_max;
 	bool			signal_direction;
 	bool			pwrreg_clkgate;
 	bool			busy_detect;
 	bool			pwrreg_nopower;
+	bool			explicit_mclk_control;
+	bool			qcom_fifo;
+	bool			reversed_irq_handling;
 };
 
 static struct variant_data variant_arm = {
@@ -87,6 +106,8 @@ static struct variant_data variant_arm = {
 	.fifohalfsize		= 8 * 4,
 	.datalength_bits	= 16,
 	.pwrreg_powerup		= MCI_PWR_UP,
+	.f_max			= 100000000,
+	.reversed_irq_handling	= true,
 };
 
 static struct variant_data variant_arm_extended_fifo = {
@@ -94,6 +115,7 @@ static struct variant_data variant_arm_extended_fifo = {
 	.fifohalfsize		= 64 * 4,
 	.datalength_bits	= 16,
 	.pwrreg_powerup		= MCI_PWR_UP,
+	.f_max			= 100000000,
 };
 
 static struct variant_data variant_arm_extended_fifo_hwfc = {
@@ -102,15 +124,18 @@ static struct variant_data variant_arm_extended_fifo_hwfc = {
 	.clkreg_enable		= MCI_ARM_HWFCEN,
 	.datalength_bits	= 16,
 	.pwrreg_powerup		= MCI_PWR_UP,
+	.f_max			= 100000000,
 };
 
 static struct variant_data variant_u300 = {
 	.fifosize		= 16 * 4,
 	.fifohalfsize		= 8 * 4,
 	.clkreg_enable		= MCI_ST_U300_HWFCEN,
+	.clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
 	.datalength_bits	= 16,
 	.sdio			= true,
 	.pwrreg_powerup		= MCI_PWR_ON,
+	.f_max			= 100000000,
 	.signal_direction	= true,
 	.pwrreg_clkgate		= true,
 	.pwrreg_nopower		= true,
@@ -124,6 +149,7 @@ static struct variant_data variant_nomadik = {
 	.sdio			= true,
 	.st_clkdiv		= true,
 	.pwrreg_powerup		= MCI_PWR_ON,
+	.f_max			= 100000000,
 	.signal_direction	= true,
 	.pwrreg_clkgate		= true,
 	.pwrreg_nopower		= true,
@@ -134,10 +160,13 @@ static struct variant_data variant_ux500 = {
 	.fifohalfsize		= 8 * 4,
 	.clkreg			= MCI_CLK_ENABLE,
 	.clkreg_enable		= MCI_ST_UX500_HWFCEN,
+	.clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
+	.clkreg_neg_edge_enable	= MCI_ST_UX500_NEG_EDGE,
 	.datalength_bits	= 24,
 	.sdio			= true,
 	.st_clkdiv		= true,
 	.pwrreg_powerup		= MCI_PWR_ON,
+	.f_max			= 100000000,
 	.signal_direction	= true,
 	.pwrreg_clkgate		= true,
 	.busy_detect		= true,
@@ -149,17 +178,38 @@ static struct variant_data variant_ux500v2 = {
 	.fifohalfsize		= 8 * 4,
 	.clkreg			= MCI_CLK_ENABLE,
 	.clkreg_enable		= MCI_ST_UX500_HWFCEN,
+	.clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
+	.clkreg_neg_edge_enable	= MCI_ST_UX500_NEG_EDGE,
+	.datactrl_mask_ddrmode	= MCI_ST_DPSM_DDRMODE,
 	.datalength_bits	= 24,
 	.sdio			= true,
 	.st_clkdiv		= true,
 	.blksz_datactrl16	= true,
 	.pwrreg_powerup		= MCI_PWR_ON,
+	.f_max			= 100000000,
 	.signal_direction	= true,
 	.pwrreg_clkgate		= true,
 	.busy_detect		= true,
 	.pwrreg_nopower		= true,
 };
 
+static struct variant_data variant_qcom = {
+	.fifosize		= 16 * 4,
+	.fifohalfsize		= 8 * 4,
+	.clkreg			= MCI_CLK_ENABLE,
+	.clkreg_enable		= MCI_QCOM_CLK_FLOWENA |
+				  MCI_QCOM_CLK_SELECT_IN_FBCLK,
+	.clkreg_8bit_bus_enable = MCI_QCOM_CLK_WIDEBUS_8,
+	.datactrl_mask_ddrmode	= MCI_QCOM_CLK_SELECT_IN_DDR_MODE,
+	.data_cmd_enable	= MCI_QCOM_CSPM_DATCMD,
+	.blksz_datactrl4	= true,
+	.datalength_bits	= 24,
+	.pwrreg_powerup		= MCI_PWR_UP,
+	.f_max			= 208000000,
+	.explicit_mclk_control	= true,
+	.qcom_fifo		= true,
+};
+
 static int mmci_card_busy(struct mmc_host *mmc)
 {
 	struct mmci_host *host = mmc_priv(mmc);
@@ -260,7 +310,9 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
 	host->cclk = 0;
 
 	if (desired) {
-		if (desired >= host->mclk) {
+		if (variant->explicit_mclk_control) {
+			host->cclk = host->mclk;
+		} else if (desired >= host->mclk) {
 			clk = MCI_CLK_BYPASS;
 			if (variant->st_clkdiv)
 				clk |= MCI_ST_UX500_NEG_EDGE;
@@ -299,11 +351,11 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
 	if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_4)
 		clk |= MCI_4BIT_BUS;
 	if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_8)
-		clk |= MCI_ST_8BIT_BUS;
+		clk |= variant->clkreg_8bit_bus_enable;
 
 	if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 ||
 	    host->mmc->ios.timing == MMC_TIMING_MMC_DDR52)
-		clk |= MCI_ST_UX500_NEG_EDGE;
+		clk |= variant->clkreg_neg_edge_enable;
 
 	mmci_write_clkreg(host, clk);
 }
@@ -719,7 +771,7 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 	data->bytes_xfered = 0;
 
 	clks = (unsigned long long)data->timeout_ns * host->cclk;
-	do_div(clks, 1000000000UL);
+	do_div(clks, NSEC_PER_SEC);
 
 	timeout = data->timeout_clks + (unsigned int)clks;
 
@@ -732,6 +784,8 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 
 	if (variant->blksz_datactrl16)
 		datactrl = MCI_DPSM_ENABLE | (data->blksz << 16);
+	else if (variant->blksz_datactrl4)
+		datactrl = MCI_DPSM_ENABLE | (data->blksz << 4);
 	else
 		datactrl = MCI_DPSM_ENABLE | blksz_bits << 4;
 
@@ -767,7 +821,7 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 
 	if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 ||
 	    host->mmc->ios.timing == MMC_TIMING_MMC_DDR52)
-		datactrl |= MCI_ST_DPSM_DDRMODE;
+		datactrl |= variant->datactrl_mask_ddrmode;
 
 	/*
 	 * Attempt to use DMA operation mode, if this
@@ -812,7 +866,7 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c)
 
 	if (readl(base + MMCICOMMAND) & MCI_CPSM_ENABLE) {
 		writel(0, base + MMCICOMMAND);
-		udelay(1);
+		mmci_reg_delay(host);
 	}
 
 	c |= cmd->opcode | MCI_CPSM_ENABLE;
@@ -824,6 +878,9 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c)
 	if (/*interrupt*/0)
 		c |= MCI_CPSM_INTERRUPT;
 
+	if (mmc_cmd_type(cmd) == MMC_CMD_ADTC)
+		c |= host->variant->data_cmd_enable;
+
 	host->cmd = cmd;
 
 	writel(cmd->arg, base + MMCIARGUMENT);
@@ -834,6 +891,10 @@ static void
 mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 	      unsigned int status)
 {
+	/* Make sure we have data to handle */
+	if (!data)
+		return;
+
 	/* First check for errors */
 	if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR|
 		      MCI_TXUNDERRUN|MCI_RXOVERRUN)) {
@@ -902,9 +963,17 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
 	     unsigned int status)
 {
 	void __iomem *base = host->base;
-	bool sbc = (cmd == host->mrq->sbc);
-	bool busy_resp = host->variant->busy_detect &&
-			(cmd->flags & MMC_RSP_BUSY);
+	bool sbc, busy_resp;
+
+	if (!cmd)
+		return;
+
+	sbc = (cmd == host->mrq->sbc);
+	busy_resp = host->variant->busy_detect && (cmd->flags & MMC_RSP_BUSY);
+
+	if (!((status|host->busy_status) & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT|
+		MCI_CMDSENT|MCI_CMDRESPEND)))
+		return;
 
 	/* Check if we need to wait for busy completion. */
 	if (host->busy_status && (status & MCI_ST_CARDBUSY))
@@ -957,15 +1026,34 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
 	}
 }
 
+static int mmci_get_rx_fifocnt(struct mmci_host *host, u32 status, int remain)
+{
+	return remain - (readl(host->base + MMCIFIFOCNT) << 2);
+}
+
+static int mmci_qcom_get_rx_fifocnt(struct mmci_host *host, u32 status, int r)
+{
+	/*
+	 * on qcom SDCC4 only 8 words are used in each burst so only 8 addresses
+	 * from the fifo range should be used
+	 */
+	if (status & MCI_RXFIFOHALFFULL)
+		return host->variant->fifohalfsize;
+	else if (status & MCI_RXDATAAVLBL)
+		return 4;
+
+	return 0;
+}
+
 static int mmci_pio_read(struct mmci_host *host, char *buffer, unsigned int remain)
 {
 	void __iomem *base = host->base;
 	char *ptr = buffer;
-	u32 status;
+	u32 status = readl(host->base + MMCISTATUS);
 	int host_remain = host->size;
 
 	do {
-		int count = host_remain - (readl(base + MMCIFIFOCNT) << 2);
+		int count = host->get_rx_fifocnt(host, status, host_remain);
 
 		if (count > remain)
 			count = remain;
@@ -1132,9 +1220,6 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
 	spin_lock(&host->lock);
 
 	do {
-		struct mmc_command *cmd;
-		struct mmc_data *data;
-
 		status = readl(host->base + MMCISTATUS);
 
 		if (host->singleirq) {
@@ -1154,16 +1239,13 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
 
 		dev_dbg(mmc_dev(host->mmc), "irq0 (data+cmd) %08x\n", status);
 
-		cmd = host->cmd;
-		if ((status|host->busy_status) & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT|
-			MCI_CMDSENT|MCI_CMDRESPEND) && cmd)
-			mmci_cmd_irq(host, cmd, status);
-
-		data = host->data;
-		if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR|
-			      MCI_TXUNDERRUN|MCI_RXOVERRUN|MCI_DATAEND|
-			      MCI_DATABLOCKEND) && data)
-			mmci_data_irq(host, data, status);
+		if (host->variant->reversed_irq_handling) {
+			mmci_data_irq(host, host->data, status);
+			mmci_cmd_irq(host, host->cmd, status);
+		} else {
+			mmci_cmd_irq(host, host->cmd, status);
+			mmci_data_irq(host, host->data, status);
+		}
 
 		/* Don't poll for busy completion in irq context. */
 		if (host->busy_status)
@@ -1296,6 +1378,17 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (!ios->clock && variant->pwrreg_clkgate)
 		pwr &= ~MCI_PWR_ON;
 
+	if (host->variant->explicit_mclk_control &&
+	    ios->clock != host->clock_cache) {
+		ret = clk_set_rate(host->clk, ios->clock);
+		if (ret < 0)
+			dev_err(mmc_dev(host->mmc),
+				"Error setting clock rate (%d)\n", ret);
+		else
+			host->mclk = clk_get_rate(host->clk);
+	}
+	host->clock_cache = ios->clock;
+
 	spin_lock_irqsave(&host->lock, flags);
 
 	mmci_set_clkreg(host, ios->clock);
@@ -1443,6 +1536,11 @@ static int mmci_probe(struct amba_device *dev,
 	if (ret)
 		goto host_free;
 
+	if (variant->qcom_fifo)
+		host->get_rx_fifocnt = mmci_qcom_get_rx_fifocnt;
+	else
+		host->get_rx_fifocnt = mmci_get_rx_fifocnt;
+
 	host->plat = plat;
 	host->variant = variant;
 	host->mclk = clk_get_rate(host->clk);
@@ -1451,8 +1549,8 @@ static int mmci_probe(struct amba_device *dev,
 	 * so we try to adjust the clock down to this,
 	 * (if possible).
 	 */
-	if (host->mclk > 100000000) {
-		ret = clk_set_rate(host->clk, 100000000);
+	if (host->mclk > variant->f_max) {
+		ret = clk_set_rate(host->clk, variant->f_max);
 		if (ret < 0)
 			goto clk_disable;
 		host->mclk = clk_get_rate(host->clk);
@@ -1471,9 +1569,12 @@ static int mmci_probe(struct amba_device *dev,
 	 * The ARM and ST versions of the block have slightly different
 	 * clock divider equations which means that the minimum divider
 	 * differs too.
+	 * on Qualcomm like controllers get the nearest minimum clock to 100Khz
 	 */
 	if (variant->st_clkdiv)
 		mmc->f_min = DIV_ROUND_UP(host->mclk, 257);
+	else if (variant->explicit_mclk_control)
+		mmc->f_min = clk_round_rate(host->clk, 100000);
 	else
 		mmc->f_min = DIV_ROUND_UP(host->mclk, 512);
 	/*
@@ -1483,9 +1584,14 @@ static int mmci_probe(struct amba_device *dev,
 	 * the block, of course.
 	 */
 	if (mmc->f_max)
-		mmc->f_max = min(host->mclk, mmc->f_max);
+		mmc->f_max = variant->explicit_mclk_control ?
+				min(variant->f_max, mmc->f_max) :
+				min(host->mclk, mmc->f_max);
 	else
-		mmc->f_max = min(host->mclk, fmax);
+		mmc->f_max = variant->explicit_mclk_control ?
+				fmax : min(host->mclk, fmax);
+
+
 	dev_dbg(mmc_dev(mmc), "clocking block at %u Hz\n", mmc->f_max);
 
 	/* Get regulators and the supported OCR mask */
@@ -1752,6 +1858,12 @@ static struct amba_id mmci_ids[] = {
 		.mask   = 0xf0ffffff,
 		.data	= &variant_ux500v2,
 	},
+	/* Qualcomm variants */
+	{
+		.id     = 0x00051180,
+		.mask	= 0x000fffff,
+		.data	= &variant_qcom,
+	},
 	{ 0, 0 },
 };
 
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 347d942..a1f5e4f 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -41,6 +41,15 @@
 /* Modified PL180 on Versatile Express platform */
 #define MCI_ARM_HWFCEN		(1 << 12)
 
+/* Modified on Qualcomm Integrations */
+#define MCI_QCOM_CLK_WIDEBUS_8	(BIT(10) | BIT(11))
+#define MCI_QCOM_CLK_FLOWENA	BIT(12)
+#define MCI_QCOM_CLK_INVERTOUT	BIT(13)
+
+/* select in latch data and command in */
+#define MCI_QCOM_CLK_SELECT_IN_FBCLK	BIT(15)
+#define MCI_QCOM_CLK_SELECT_IN_DDR_MODE	(BIT(14) | BIT(15))
+
 #define MMCIARGUMENT		0x008
 #define MMCICOMMAND		0x00c
 #define MCI_CPSM_RESPONSE	(1 << 6)
@@ -54,6 +63,14 @@
 #define MCI_ST_NIEN		(1 << 13)
 #define MCI_ST_CE_ATACMD	(1 << 14)
 
+/* Modified on Qualcomm Integrations */
+#define MCI_QCOM_CSPM_DATCMD		BIT(12)
+#define MCI_QCOM_CSPM_MCIABORT		BIT(13)
+#define MCI_QCOM_CSPM_CCSENABLE		BIT(14)
+#define MCI_QCOM_CSPM_CCSDISABLE	BIT(15)
+#define MCI_QCOM_CSPM_AUTO_CMD19	BIT(16)
+#define MCI_QCOM_CSPM_AUTO_CMD21	BIT(21)
+
 #define MMCIRESPCMD		0x010
 #define MMCIRESPONSE0		0x014
 #define MMCIRESPONSE1		0x018
@@ -191,6 +208,8 @@ struct mmci_host {
 	spinlock_t		lock;
 
 	unsigned int		mclk;
+	/* cached value of requested clk in set_ios */
+	unsigned int		clock_cache;
 	unsigned int		cclk;
 	u32			pwr_reg;
 	u32			pwr_reg_add;
@@ -210,6 +229,7 @@ struct mmci_host {
 	/* pio stuff */
 	struct sg_mapping_iter	sg_miter;
 	unsigned int		size;
+	int (*get_rx_fifocnt)(struct mmci_host *h, u32 status, int remain);
 
 #ifdef CONFIG_DMA_ENGINE
 	/* DMA stuff */
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index 74924a0..b4b1efb 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -13,7 +13,6 @@
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index babfea0..140885a 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -86,7 +86,8 @@ static int mxs_mmc_get_cd(struct mmc_host *mmc)
 	if (ret >= 0)
 		return ret;
 
-	present = !(readl(ssp->base + HW_SSP_STATUS(ssp)) &
+	present = mmc->caps & MMC_CAP_NEEDS_POLL ||
+		!(readl(ssp->base + HW_SSP_STATUS(ssp)) &
 			BM_SSP_STATUS_CARD_DETECT);
 
 	if (mmc->caps2 & MMC_CAP2_CD_ACTIVE_HIGH)
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 6b7b755..9656726 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -29,6 +29,7 @@
 #include <linux/timer.h>
 #include <linux/clk.h>
 #include <linux/of.h>
+#include <linux/of_irq.h>
 #include <linux/of_gpio.h>
 #include <linux/of_device.h>
 #include <linux/omap-dmaengine.h>
@@ -36,6 +37,7 @@
 #include <linux/mmc/core.h>
 #include <linux/mmc/mmc.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/regulator/consumer.h>
 #include <linux/pinctrl/consumer.h>
@@ -54,6 +56,7 @@
 #define OMAP_HSMMC_RSP54	0x0118
 #define OMAP_HSMMC_RSP76	0x011C
 #define OMAP_HSMMC_DATA		0x0120
+#define OMAP_HSMMC_PSTATE	0x0124
 #define OMAP_HSMMC_HCTL		0x0128
 #define OMAP_HSMMC_SYSCTL	0x012C
 #define OMAP_HSMMC_STAT		0x0130
@@ -91,7 +94,10 @@
 #define BCE			(1 << 1)
 #define FOUR_BIT		(1 << 1)
 #define HSPE			(1 << 2)
+#define IWE			(1 << 24)
 #define DDR			(1 << 19)
+#define CLKEXTFREE		(1 << 16)
+#define CTPL			(1 << 11)
 #define DW8			(1 << 5)
 #define OD			0x1
 #define STAT_CLEAR		0xFFFFFFFF
@@ -101,11 +107,15 @@
 #define SRD			(1 << 26)
 #define SOFTRESET		(1 << 1)
 
+/* PSTATE */
+#define DLEV_DAT(x)		(1 << (20 + (x)))
+
 /* Interrupt masks for IE and ISE register */
 #define CC_EN			(1 << 0)
 #define TC_EN			(1 << 1)
 #define BWR_EN			(1 << 4)
 #define BRR_EN			(1 << 5)
+#define CIRQ_EN			(1 << 8)
 #define ERR_EN			(1 << 15)
 #define CTO_EN			(1 << 16)
 #define CCRC_EN			(1 << 17)
@@ -140,7 +150,6 @@
 #define VDD_3V0			3000000		/* 300000 uV */
 #define VDD_165_195		(ffs(MMC_VDD_165_195) - 1)
 
-#define AUTO_CMD23		(1 << 1)	/* Auto CMD23 support */
 /*
  * One controller can have multiple slots, like on some omap boards using
  * omap.c controller driver. Luckily this is not currently done on any known
@@ -194,6 +203,7 @@ struct omap_hsmmc_host {
 	u32			sysctl;
 	u32			capa;
 	int			irq;
+	int			wake_irq;
 	int			use_dma, dma_ch;
 	struct dma_chan		*tx_chan;
 	struct dma_chan		*rx_chan;
@@ -206,6 +216,9 @@ struct omap_hsmmc_host {
 	int			req_in_progress;
 	unsigned long		clk_rate;
 	unsigned int		flags;
+#define AUTO_CMD23		(1 << 0)        /* Auto CMD23 support */
+#define HSMMC_SDIO_IRQ_ENABLED	(1 << 1)        /* SDIO irq enabled */
+#define HSMMC_WAKE_IRQ_ENABLED	(1 << 2)
 	struct omap_hsmmc_next	next_data;
 	struct	omap_mmc_platform_data	*pdata;
 };
@@ -510,27 +523,40 @@ static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host)
 static void omap_hsmmc_enable_irq(struct omap_hsmmc_host *host,
 				  struct mmc_command *cmd)
 {
-	unsigned int irq_mask;
+	u32 irq_mask = INT_EN_MASK;
+	unsigned long flags;
 
 	if (host->use_dma)
-		irq_mask = INT_EN_MASK & ~(BRR_EN | BWR_EN);
-	else
-		irq_mask = INT_EN_MASK;
+		irq_mask &= ~(BRR_EN | BWR_EN);
 
 	/* Disable timeout for erases */
 	if (cmd->opcode == MMC_ERASE)
 		irq_mask &= ~DTO_EN;
 
+	spin_lock_irqsave(&host->irq_lock, flags);
 	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
 	OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+
+	/* latch pending CIRQ, but don't signal MMC core */
+	if (host->flags & HSMMC_SDIO_IRQ_ENABLED)
+		irq_mask |= CIRQ_EN;
 	OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
+	spin_unlock_irqrestore(&host->irq_lock, flags);
 }
 
 static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host)
 {
-	OMAP_HSMMC_WRITE(host->base, ISE, 0);
-	OMAP_HSMMC_WRITE(host->base, IE, 0);
+	u32 irq_mask = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->irq_lock, flags);
+	/* no transfer running but need to keep cirq if enabled */
+	if (host->flags & HSMMC_SDIO_IRQ_ENABLED)
+		irq_mask |= CIRQ_EN;
+	OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+	OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
 	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+	spin_unlock_irqrestore(&host->irq_lock, flags);
 }
 
 /* Calculate divisor for the given clock frequency */
@@ -667,6 +693,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
 		capa = VS18;
 	}
 
+	if (host->mmc->caps & MMC_CAP_SDIO_IRQ)
+		hctl |= IWE;
+
 	OMAP_HSMMC_WRITE(host->base, HCTL,
 			OMAP_HSMMC_READ(host->base, HCTL) | hctl);
 
@@ -681,7 +710,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
 		&& time_before(jiffies, timeout))
 		;
 
-	omap_hsmmc_disable_irq(host);
+	OMAP_HSMMC_WRITE(host->base, ISE, 0);
+	OMAP_HSMMC_WRITE(host->base, IE, 0);
+	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
 
 	/* Do not initialize card-specific things if the power is off */
 	if (host->power_mode == MMC_POWER_OFF)
@@ -1118,8 +1149,12 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
 	int status;
 
 	status = OMAP_HSMMC_READ(host->base, STAT);
-	while (status & INT_EN_MASK && host->req_in_progress) {
-		omap_hsmmc_do_irq(host, status);
+	while (status & (INT_EN_MASK | CIRQ_EN)) {
+		if (host->req_in_progress)
+			omap_hsmmc_do_irq(host, status);
+
+		if (status & CIRQ_EN)
+			mmc_signal_sdio_irq(host->mmc);
 
 		/* Flush posted write */
 		status = OMAP_HSMMC_READ(host->base, STAT);
@@ -1128,6 +1163,22 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t omap_hsmmc_wake_irq(int irq, void *dev_id)
+{
+	struct omap_hsmmc_host *host = dev_id;
+
+	/* cirq is level triggered, disable to avoid infinite loop */
+	spin_lock(&host->irq_lock);
+	if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
+		disable_irq_nosync(host->wake_irq);
+		host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
+	}
+	spin_unlock(&host->irq_lock);
+	pm_request_resume(host->dev); /* no use counter */
+
+	return IRQ_HANDLED;
+}
+
 static void set_sd_bus_power(struct omap_hsmmc_host *host)
 {
 	unsigned long i;
@@ -1639,6 +1690,103 @@ static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card)
 		mmc_slot(host).init_card(card);
 }
 
+static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
+	u32 irq_mask, con;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->irq_lock, flags);
+
+	con = OMAP_HSMMC_READ(host->base, CON);
+	irq_mask = OMAP_HSMMC_READ(host->base, ISE);
+	if (enable) {
+		host->flags |= HSMMC_SDIO_IRQ_ENABLED;
+		irq_mask |= CIRQ_EN;
+		con |= CTPL | CLKEXTFREE;
+	} else {
+		host->flags &= ~HSMMC_SDIO_IRQ_ENABLED;
+		irq_mask &= ~CIRQ_EN;
+		con &= ~(CTPL | CLKEXTFREE);
+	}
+	OMAP_HSMMC_WRITE(host->base, CON, con);
+	OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
+
+	/*
+	 * if enable, piggy back detection on current request
+	 * but always disable immediately
+	 */
+	if (!host->req_in_progress || !enable)
+		OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
+
+	/* flush posted write */
+	OMAP_HSMMC_READ(host->base, IE);
+
+	spin_unlock_irqrestore(&host->irq_lock, flags);
+}
+
+static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host)
+{
+	struct mmc_host *mmc = host->mmc;
+	int ret;
+
+	/*
+	 * For omaps with wake-up path, wakeirq will be irq from pinctrl and
+	 * for other omaps, wakeirq will be from GPIO (dat line remuxed to
+	 * gpio). wakeirq is needed to detect sdio irq in runtime suspend state
+	 * with functional clock disabled.
+	 */
+	if (!host->dev->of_node || !host->wake_irq)
+		return -ENODEV;
+
+	/* Prevent auto-enabling of IRQ */
+	irq_set_status_flags(host->wake_irq, IRQ_NOAUTOEN);
+	ret = devm_request_irq(host->dev, host->wake_irq, omap_hsmmc_wake_irq,
+			       IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+			       mmc_hostname(mmc), host);
+	if (ret) {
+		dev_err(mmc_dev(host->mmc), "Unable to request wake IRQ\n");
+		goto err;
+	}
+
+	/*
+	 * Some omaps don't have wake-up path from deeper idle states
+	 * and need to remux SDIO DAT1 to GPIO for wake-up from idle.
+	 */
+	if (host->pdata->controller_flags & OMAP_HSMMC_SWAKEUP_MISSING) {
+		struct pinctrl *p = devm_pinctrl_get(host->dev);
+		if (!p) {
+			ret = -ENODEV;
+			goto err_free_irq;
+		}
+		if (IS_ERR(pinctrl_lookup_state(p, PINCTRL_STATE_DEFAULT))) {
+			dev_info(host->dev, "missing default pinctrl state\n");
+			devm_pinctrl_put(p);
+			ret = -EINVAL;
+			goto err_free_irq;
+		}
+
+		if (IS_ERR(pinctrl_lookup_state(p, PINCTRL_STATE_IDLE))) {
+			dev_info(host->dev, "missing idle pinctrl state\n");
+			devm_pinctrl_put(p);
+			ret = -EINVAL;
+			goto err_free_irq;
+		}
+		devm_pinctrl_put(p);
+	}
+
+	OMAP_HSMMC_WRITE(host->base, HCTL,
+			 OMAP_HSMMC_READ(host->base, HCTL) | IWE);
+	return 0;
+
+err_free_irq:
+	devm_free_irq(host->dev, host->wake_irq, host);
+err:
+	dev_warn(host->dev, "no SDIO IRQ support, falling back to polling\n");
+	host->wake_irq = 0;
+	return ret;
+}
+
 static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host)
 {
 	u32 hctl, capa, value;
@@ -1691,7 +1839,7 @@ static const struct mmc_host_ops omap_hsmmc_ops = {
 	.get_cd = omap_hsmmc_get_cd,
 	.get_ro = omap_hsmmc_get_ro,
 	.init_card = omap_hsmmc_init_card,
-	/* NYET -- enable_sdio_irq */
+	.enable_sdio_irq = omap_hsmmc_enable_sdio_irq,
 };
 
 #ifdef CONFIG_DEBUG_FS
@@ -1701,13 +1849,23 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data)
 	struct mmc_host *mmc = s->private;
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	seq_printf(s, "mmc%d:\n ctx_loss:\t%d\n\nregs:\n",
-			mmc->index, host->context_loss);
+	seq_printf(s, "mmc%d:\n", mmc->index);
+	seq_printf(s, "sdio irq mode\t%s\n",
+		   (mmc->caps & MMC_CAP_SDIO_IRQ) ? "interrupt" : "polling");
 
-	pm_runtime_get_sync(host->dev);
+	if (mmc->caps & MMC_CAP_SDIO_IRQ) {
+		seq_printf(s, "sdio irq \t%s\n",
+			   (host->flags & HSMMC_SDIO_IRQ_ENABLED) ?  "enabled"
+			   : "disabled");
+	}
+	seq_printf(s, "ctx_loss:\t%d\n", host->context_loss);
 
+	pm_runtime_get_sync(host->dev);
+	seq_puts(s, "\nregs:\n");
 	seq_printf(s, "CON:\t\t0x%08x\n",
 			OMAP_HSMMC_READ(host->base, CON));
+	seq_printf(s, "PSTATE:\t\t0x%08x\n",
+		   OMAP_HSMMC_READ(host->base, PSTATE));
 	seq_printf(s, "HCTL:\t\t0x%08x\n",
 			OMAP_HSMMC_READ(host->base, HCTL));
 	seq_printf(s, "SYSCTL:\t\t0x%08x\n",
@@ -1761,6 +1919,10 @@ static const struct omap_mmc_of_data omap3_pre_es3_mmc_of_data = {
 static const struct omap_mmc_of_data omap4_mmc_of_data = {
 	.reg_offset = 0x100,
 };
+static const struct omap_mmc_of_data am33xx_mmc_of_data = {
+	.reg_offset = 0x100,
+	.controller_flags = OMAP_HSMMC_SWAKEUP_MISSING,
+};
 
 static const struct of_device_id omap_mmc_of_match[] = {
 	{
@@ -1777,6 +1939,10 @@ static const struct of_device_id omap_mmc_of_match[] = {
 		.compatible = "ti,omap4-hsmmc",
 		.data = &omap4_mmc_of_data,
 	},
+	{
+		.compatible = "ti,am33xx-hsmmc",
+		.data = &am33xx_mmc_of_data,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, omap_mmc_of_match);
@@ -1850,7 +2016,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 	const struct of_device_id *match;
 	dma_cap_mask_t mask;
 	unsigned tx_req, rx_req;
-	struct pinctrl *pinctrl;
 	const struct omap_mmc_of_data *data;
 	void __iomem *base;
 
@@ -1913,6 +2078,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, host);
 
+	if (pdev->dev.of_node)
+		host->wake_irq = irq_of_parse_and_map(pdev->dev.of_node, 1);
+
 	mmc->ops	= &omap_hsmmc_ops;
 
 	mmc->f_min = OMAP_MMC_MIN_CLOCK;
@@ -2061,10 +2229,17 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 
 	omap_hsmmc_disable_irq(host);
 
-	pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
-	if (IS_ERR(pinctrl))
-		dev_warn(&pdev->dev,
-			"pins are not configured from the driver\n");
+	/*
+	 * For now, only support SDIO interrupt if we have a separate
+	 * wake-up interrupt configured from device tree. This is because
+	 * the wake-up interrupt is needed for idle state and some
+	 * platforms need special quirks. And we don't want to add new
+	 * legacy mux platform init code callbacks any longer as we
+	 * are moving to DT based booting anyways.
+	 */
+	ret = omap_hsmmc_configure_wake_irq(host);
+	if (!ret)
+		mmc->caps |= MMC_CAP_SDIO_IRQ;
 
 	omap_hsmmc_protect_card(host);
 
@@ -2170,11 +2345,18 @@ static int omap_hsmmc_suspend(struct device *dev)
 	pm_runtime_get_sync(host->dev);
 
 	if (!(host->mmc->pm_flags & MMC_PM_KEEP_POWER)) {
-		omap_hsmmc_disable_irq(host);
+		OMAP_HSMMC_WRITE(host->base, ISE, 0);
+		OMAP_HSMMC_WRITE(host->base, IE, 0);
+		OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
 		OMAP_HSMMC_WRITE(host->base, HCTL,
 				OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
 	}
 
+	/* do not wake up due to sdio irq */
+	if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+	    !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
+		disable_irq(host->wake_irq);
+
 	if (host->dbclk)
 		clk_disable_unprepare(host->dbclk);
 
@@ -2200,6 +2382,10 @@ static int omap_hsmmc_resume(struct device *dev)
 
 	omap_hsmmc_protect_card(host);
 
+	if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+	    !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
+		enable_irq(host->wake_irq);
+
 	pm_runtime_mark_last_busy(host->dev);
 	pm_runtime_put_autosuspend(host->dev);
 	return 0;
@@ -2215,22 +2401,77 @@ static int omap_hsmmc_resume(struct device *dev)
 static int omap_hsmmc_runtime_suspend(struct device *dev)
 {
 	struct omap_hsmmc_host *host;
+	unsigned long flags;
+	int ret = 0;
 
 	host = platform_get_drvdata(to_platform_device(dev));
 	omap_hsmmc_context_save(host);
 	dev_dbg(dev, "disabled\n");
 
-	return 0;
+	spin_lock_irqsave(&host->irq_lock, flags);
+	if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+	    (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
+		/* disable sdio irq handling to prevent race */
+		OMAP_HSMMC_WRITE(host->base, ISE, 0);
+		OMAP_HSMMC_WRITE(host->base, IE, 0);
+
+		if (!(OMAP_HSMMC_READ(host->base, PSTATE) & DLEV_DAT(1))) {
+			/*
+			 * dat1 line low, pending sdio irq
+			 * race condition: possible irq handler running on
+			 * multi-core, abort
+			 */
+			dev_dbg(dev, "pending sdio irq, abort suspend\n");
+			OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+			OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN);
+			OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN);
+			pm_runtime_mark_last_busy(dev);
+			ret = -EBUSY;
+			goto abort;
+		}
+
+		pinctrl_pm_select_idle_state(dev);
+
+		WARN_ON(host->flags & HSMMC_WAKE_IRQ_ENABLED);
+		enable_irq(host->wake_irq);
+		host->flags |= HSMMC_WAKE_IRQ_ENABLED;
+	} else {
+		pinctrl_pm_select_idle_state(dev);
+	}
+
+abort:
+	spin_unlock_irqrestore(&host->irq_lock, flags);
+	return ret;
 }
 
 static int omap_hsmmc_runtime_resume(struct device *dev)
 {
 	struct omap_hsmmc_host *host;
+	unsigned long flags;
 
 	host = platform_get_drvdata(to_platform_device(dev));
 	omap_hsmmc_context_restore(host);
 	dev_dbg(dev, "enabled\n");
 
+	spin_lock_irqsave(&host->irq_lock, flags);
+	if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
+	    (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
+		/* sdio irq flag can't change while in runtime suspend */
+		if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
+			disable_irq_nosync(host->wake_irq);
+			host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
+		}
+
+		pinctrl_pm_select_default_state(host->dev);
+
+		/* irq lost, if pinmux incorrect */
+		OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
+		OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN);
+		OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN);
+	} else {
+		pinctrl_pm_select_default_state(host->dev);
+	}
+	spin_unlock_irqrestore(&host->irq_lock, flags);
 	return 0;
 }
 
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index f237826..e5516a2 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/clk.h>
 #include <linux/mmc/host.h>
@@ -27,6 +28,7 @@
 #include <mach/dma.h>
 #include <mach/gpio-samsung.h>
 
+#include <linux/platform_data/dma-s3c24xx.h>
 #include <linux/platform_data/mmc-s3cmci.h>
 
 #include "s3cmci.h"
@@ -140,10 +142,6 @@ static const int dbgmap_debug = dbg_err | dbg_debug;
 		dev_dbg(&host->pdev->dev, args);  \
 	} while (0)
 
-static struct s3c2410_dma_client s3cmci_dma_client = {
-	.name		= "s3c-mci",
-};
-
 static void finalize_request(struct s3cmci_host *host);
 static void s3cmci_send_request(struct mmc_host *mmc);
 static void s3cmci_reset(struct s3cmci_host *host);
@@ -256,25 +254,8 @@ static inline bool s3cmci_host_usedma(struct s3cmci_host *host)
 {
 #ifdef CONFIG_MMC_S3C_PIO
 	return false;
-#elif defined(CONFIG_MMC_S3C_DMA)
+#else /* CONFIG_MMC_S3C_DMA */
 	return true;
-#else
-	return host->dodma;
-#endif
-}
-
-/**
- * s3cmci_host_canpio - return true if host has pio code available
- *
- * Return true if the driver has been compiled with the PIO support code
- * available.
- */
-static inline bool s3cmci_host_canpio(void)
-{
-#ifdef CONFIG_MMC_S3C_PIO
-	return true;
-#else
-	return false;
 #endif
 }
 
@@ -841,60 +822,24 @@ static irqreturn_t s3cmci_irq_cd(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void s3cmci_dma_done_callback(struct s3c2410_dma_chan *dma_ch,
-				     void *buf_id, int size,
-				     enum s3c2410_dma_buffresult result)
+static void s3cmci_dma_done_callback(void *arg)
 {
-	struct s3cmci_host *host = buf_id;
+	struct s3cmci_host *host = arg;
 	unsigned long iflags;
-	u32 mci_csta, mci_dsta, mci_fsta, mci_dcnt;
-
-	mci_csta = readl(host->base + S3C2410_SDICMDSTAT);
-	mci_dsta = readl(host->base + S3C2410_SDIDSTA);
-	mci_fsta = readl(host->base + S3C2410_SDIFSTA);
-	mci_dcnt = readl(host->base + S3C2410_SDIDCNT);
 
 	BUG_ON(!host->mrq);
 	BUG_ON(!host->mrq->data);
-	BUG_ON(!host->dmatogo);
 
 	spin_lock_irqsave(&host->complete_lock, iflags);
 
-	if (result != S3C2410_RES_OK) {
-		dbg(host, dbg_fail, "DMA FAILED: csta=0x%08x dsta=0x%08x "
-			"fsta=0x%08x dcnt:0x%08x result:0x%08x toGo:%u\n",
-			mci_csta, mci_dsta, mci_fsta,
-			mci_dcnt, result, host->dmatogo);
-
-		goto fail_request;
-	}
-
-	host->dmatogo--;
-	if (host->dmatogo) {
-		dbg(host, dbg_dma, "DMA DONE  Size:%i DSTA:[%08x] "
-			"DCNT:[%08x] toGo:%u\n",
-			size, mci_dsta, mci_dcnt, host->dmatogo);
-
-		goto out;
-	}
-
-	dbg(host, dbg_dma, "DMA FINISHED Size:%i DSTA:%08x DCNT:%08x\n",
-		size, mci_dsta, mci_dcnt);
+	dbg(host, dbg_dma, "DMA FINISHED\n");
 
 	host->dma_complete = 1;
 	host->complete_what = COMPLETION_FINALIZE;
 
-out:
 	tasklet_schedule(&host->pio_tasklet);
 	spin_unlock_irqrestore(&host->complete_lock, iflags);
-	return;
 
-fail_request:
-	host->mrq->data->error = -EINVAL;
-	host->complete_what = COMPLETION_FINALIZE;
-	clear_imask(host);
-
-	goto out;
 }
 
 static void finalize_request(struct s3cmci_host *host)
@@ -966,7 +911,7 @@ static void finalize_request(struct s3cmci_host *host)
 	 * DMA channel and the fifo to clear out any garbage. */
 	if (mrq->data->error != 0) {
 		if (s3cmci_host_usedma(host))
-			s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH);
+			dmaengine_terminate_all(host->dma);
 
 		if (host->is2440) {
 			/* Clear failure register and reset fifo. */
@@ -992,29 +937,6 @@ request_done:
 	mmc_request_done(host->mmc, mrq);
 }
 
-static void s3cmci_dma_setup(struct s3cmci_host *host,
-			     enum dma_data_direction source)
-{
-	static enum dma_data_direction last_source = -1;
-	static int setup_ok;
-
-	if (last_source == source)
-		return;
-
-	last_source = source;
-
-	s3c2410_dma_devconfig(host->dma, source,
-			      host->mem->start + host->sdidata);
-
-	if (!setup_ok) {
-		s3c2410_dma_config(host->dma, 4);
-		s3c2410_dma_set_buffdone_fn(host->dma,
-					    s3cmci_dma_done_callback);
-		s3c2410_dma_setflags(host->dma, S3C2410_DMAF_AUTOSTART);
-		setup_ok = 1;
-	}
-}
-
 static void s3cmci_send_command(struct s3cmci_host *host,
 					struct mmc_command *cmd)
 {
@@ -1162,43 +1084,45 @@ static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data)
 
 static int s3cmci_prepare_dma(struct s3cmci_host *host, struct mmc_data *data)
 {
-	int dma_len, i;
 	int rw = data->flags & MMC_DATA_WRITE;
+	struct dma_async_tx_descriptor *desc;
+	struct dma_slave_config conf = {
+		.src_addr = host->mem->start + host->sdidata,
+		.dst_addr = host->mem->start + host->sdidata,
+		.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+		.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+	};
 
 	BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR);
 
-	s3cmci_dma_setup(host, rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-	s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH);
-
-	dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-			     rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
-	if (dma_len == 0)
-		return -ENOMEM;
-
-	host->dma_complete = 0;
-	host->dmatogo = dma_len;
-
-	for (i = 0; i < dma_len; i++) {
-		int res;
-
-		dbg(host, dbg_dma, "enqueue %i: %08x@%u\n", i,
-		    sg_dma_address(&data->sg[i]),
-		    sg_dma_len(&data->sg[i]));
+	/* Restore prescaler value */
+	writel(host->prescaler, host->base + S3C2410_SDIPRE);
 
-		res = s3c2410_dma_enqueue(host->dma, host,
-					  sg_dma_address(&data->sg[i]),
-					  sg_dma_len(&data->sg[i]));
+	if (!rw)
+		conf.direction = DMA_DEV_TO_MEM;
+	else
+		conf.direction = DMA_MEM_TO_DEV;
 
-		if (res) {
-			s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH);
-			return -EBUSY;
-		}
-	}
+	dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+			     rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 
-	s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_START);
+	dmaengine_slave_config(host->dma, &conf);
+	desc = dmaengine_prep_slave_sg(host->dma, data->sg, data->sg_len,
+		conf.direction,
+		DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+	if (!desc)
+		goto unmap_exit;
+	desc->callback = s3cmci_dma_done_callback;
+	desc->callback_param = host;
+	dmaengine_submit(desc);
+	dma_async_issue_pending(host->dma);
 
 	return 0;
+
+unmap_exit:
+	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+			     rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+	return -ENOMEM;
 }
 
 static void s3cmci_send_request(struct mmc_host *mmc)
@@ -1676,10 +1600,6 @@ static int s3cmci_probe(struct platform_device *pdev)
 	host->complete_what 	= COMPLETION_NONE;
 	host->pio_active 	= XFER_NONE;
 
-#ifdef CONFIG_MMC_S3C_PIODMA
-	host->dodma		= host->pdata->use_dma;
-#endif
-
 	host->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!host->mem) {
 		dev_err(&pdev->dev,
@@ -1765,17 +1685,17 @@ static int s3cmci_probe(struct platform_device *pdev)
 	/* depending on the dma state, get a dma channel to use. */
 
 	if (s3cmci_host_usedma(host)) {
-		host->dma = s3c2410_dma_request(DMACH_SDI, &s3cmci_dma_client,
-						host);
-		if (host->dma < 0) {
+		dma_cap_mask_t mask;
+
+		dma_cap_zero(mask);
+		dma_cap_set(DMA_SLAVE, mask);
+
+		host->dma = dma_request_slave_channel_compat(mask,
+			s3c24xx_dma_filter, (void *)DMACH_SDI, &pdev->dev, "rx-tx");
+		if (!host->dma) {
 			dev_err(&pdev->dev, "cannot get DMA channel.\n");
-			if (!s3cmci_host_canpio()) {
-				ret = -EBUSY;
-				goto probe_free_gpio_wp;
-			} else {
-				dev_warn(&pdev->dev, "falling back to PIO.\n");
-				host->dodma = 0;
-			}
+			ret = -EBUSY;
+			goto probe_free_gpio_wp;
 		}
 	}
 
@@ -1787,7 +1707,7 @@ static int s3cmci_probe(struct platform_device *pdev)
 		goto probe_free_dma;
 	}
 
-	ret = clk_enable(host->clk);
+	ret = clk_prepare_enable(host->clk);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to enable clock source.\n");
 		goto clk_free;
@@ -1816,7 +1736,7 @@ static int s3cmci_probe(struct platform_device *pdev)
 	mmc->max_segs		= 128;
 
 	dbg(host, dbg_debug,
-	    "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%u.\n",
+	    "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%p.\n",
 	    (host->is2440?"2440":""),
 	    host->base, host->irq, host->irq_cd, host->dma);
 
@@ -1845,14 +1765,14 @@ static int s3cmci_probe(struct platform_device *pdev)
 	s3cmci_cpufreq_deregister(host);
 
  free_dmabuf:
-	clk_disable(host->clk);
+	clk_disable_unprepare(host->clk);
 
  clk_free:
 	clk_put(host->clk);
 
  probe_free_dma:
 	if (s3cmci_host_usedma(host))
-		s3c2410_dma_free(host->dma, &s3cmci_dma_client);
+		dma_release_channel(host->dma);
 
  probe_free_gpio_wp:
 	if (!host->pdata->no_wprotect)
@@ -1897,7 +1817,7 @@ static void s3cmci_shutdown(struct platform_device *pdev)
 	s3cmci_debugfs_remove(host);
 	s3cmci_cpufreq_deregister(host);
 	mmc_remove_host(mmc);
-	clk_disable(host->clk);
+	clk_disable_unprepare(host->clk);
 }
 
 static int s3cmci_remove(struct platform_device *pdev)
@@ -1914,7 +1834,7 @@ static int s3cmci_remove(struct platform_device *pdev)
 	tasklet_disable(&host->pio_tasklet);
 
 	if (s3cmci_host_usedma(host))
-		s3c2410_dma_free(host->dma, &s3cmci_dma_client);
+		dma_release_channel(host->dma);
 
 	free_irq(host->irq, host);
 
diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h
index c76b53d..cc2e46c 100644
--- a/drivers/mmc/host/s3cmci.h
+++ b/drivers/mmc/host/s3cmci.h
@@ -26,7 +26,7 @@ struct s3cmci_host {
 	void __iomem		*base;
 	int			irq;
 	int			irq_cd;
-	int			dma;
+	struct dma_chan		*dma;
 
 	unsigned long		clk_rate;
 	unsigned long		clk_div;
@@ -36,8 +36,6 @@ struct s3cmci_host {
 	int			is2440;
 	unsigned		sdiimsk;
 	unsigned		sdidata;
-	int			dodma;
-	int			dmatogo;
 
 	bool			irq_disabled;
 	bool			irq_enabled;
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 8ce3c28..8c53370 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -124,9 +124,11 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = {
 
 static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = {
 	.chip    = &sdhci_acpi_chip_int,
-	.caps    = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE | MMC_CAP_HW_RESET,
+	.caps    = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
+		   MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR,
 	.caps2   = MMC_CAP2_HC_ERASE_SZ,
 	.flags   = SDHCI_ACPI_RUNTIME_PM,
+	.quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
 };
 
 static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sdio = {
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 40573a5..1a6661e 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -16,7 +16,6 @@
 
 #include <linux/module.h>
 #include <linux/of_device.h>
-#include <linux/regulator/consumer.h>
 #include <linux/delay.h>
 #include <linux/mmc/mmc.h>
 #include <linux/slab.h>
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 52c42fc..c3a1deb 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -103,6 +103,10 @@ static const struct sdhci_pci_fixes sdhci_cafe = {
 			  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
 };
 
+static const struct sdhci_pci_fixes sdhci_intel_qrk = {
+	.quirks		= SDHCI_QUIRK_NO_HISPD_BIT,
+};
+
 static int mrst_hc_probe_slot(struct sdhci_pci_slot *slot)
 {
 	slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA;
@@ -264,7 +268,7 @@ static void sdhci_pci_int_hw_reset(struct sdhci_host *host)
 static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
 {
 	slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
-				 MMC_CAP_HW_RESET;
+				 MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR;
 	slot->host->mmc->caps2 |= MMC_CAP2_HC_ERASE_SZ;
 	slot->hw_reset = sdhci_pci_int_hw_reset;
 	return 0;
@@ -279,6 +283,7 @@ static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
 static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = {
 	.allow_runtime_pm = true,
 	.probe_slot	= byt_emmc_probe_slot,
+	.quirks2	= SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
 };
 
 static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = {
@@ -753,6 +758,14 @@ static const struct pci_device_id pci_ids[] = {
 
 	{
 		.vendor		= PCI_VENDOR_ID_INTEL,
+		.device		= PCI_DEVICE_ID_INTEL_QRK_SD,
+		.subvendor	= PCI_ANY_ID,
+		.subdevice	= PCI_ANY_ID,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_qrk,
+	},
+
+	{
+		.vendor		= PCI_VENDOR_ID_INTEL,
 		.device		= PCI_DEVICE_ID_INTEL_MRST_SD0,
 		.subvendor	= PCI_ANY_ID,
 		.subdevice	= PCI_ANY_ID,
@@ -1130,18 +1143,13 @@ static int sdhci_pci_suspend(struct device *dev)
 			goto err_pci_suspend;
 	}
 
-	pci_save_state(pdev);
 	if (pm_flags & MMC_PM_KEEP_POWER) {
-		if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) {
-			pci_pme_active(pdev, true);
-			pci_enable_wake(pdev, PCI_D3hot, 1);
-		}
-		pci_set_power_state(pdev, PCI_D3hot);
-	} else {
-		pci_enable_wake(pdev, PCI_D3hot, 0);
-		pci_disable_device(pdev);
-		pci_set_power_state(pdev, PCI_D3hot);
-	}
+		if (pm_flags & MMC_PM_WAKE_SDIO_IRQ)
+			device_init_wakeup(dev, true);
+		else
+			device_init_wakeup(dev, false);
+	} else
+		device_init_wakeup(dev, false);
 
 	return 0;
 
@@ -1162,12 +1170,6 @@ static int sdhci_pci_resume(struct device *dev)
 	if (!chip)
 		return 0;
 
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-	ret = pci_enable_device(pdev);
-	if (ret)
-		return ret;
-
 	if (chip->fixes && chip->fixes->resume) {
 		ret = chip->fixes->resume(chip);
 		if (ret)
diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
index 6d71871..c101477 100644
--- a/drivers/mmc/host/sdhci-pci.h
+++ b/drivers/mmc/host/sdhci-pci.h
@@ -17,6 +17,7 @@
 #define PCI_DEVICE_ID_INTEL_CLV_SDIO2	0x08fb
 #define PCI_DEVICE_ID_INTEL_CLV_EMMC0	0x08e5
 #define PCI_DEVICE_ID_INTEL_CLV_EMMC1	0x08e6
+#define PCI_DEVICE_ID_INTEL_QRK_SD	0x08A7
 
 /*
  * PCI registers
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index f4f1289..6f842fb 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -288,15 +288,13 @@ static int sdhci_pxav3_probe(struct platform_device *pdev)
 	int ret;
 	struct clk *clk;
 
-	pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL);
+	pxa = devm_kzalloc(&pdev->dev, sizeof(struct sdhci_pxa), GFP_KERNEL);
 	if (!pxa)
 		return -ENOMEM;
 
 	host = sdhci_pltfm_init(pdev, &sdhci_pxav3_pdata, 0);
-	if (IS_ERR(host)) {
-		kfree(pxa);
+	if (IS_ERR(host))
 		return PTR_ERR(host);
-	}
 
 	if (of_device_is_compatible(np, "marvell,armada-380-sdhci")) {
 		ret = mv_conf_mbus_windows(pdev, mv_mbus_dram_info());
@@ -308,7 +306,7 @@ static int sdhci_pxav3_probe(struct platform_device *pdev)
 	pltfm_host = sdhci_priv(host);
 	pltfm_host->priv = pxa;
 
-	clk = clk_get(dev, NULL);
+	clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(clk)) {
 		dev_err(dev, "failed to get io clock\n");
 		ret = PTR_ERR(clk);
@@ -389,11 +387,9 @@ err_add_host:
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	clk_disable_unprepare(clk);
-	clk_put(clk);
 err_clk_get:
 err_mbus_win:
 	sdhci_pltfm_free(pdev);
-	kfree(pxa);
 	return ret;
 }
 
@@ -401,17 +397,14 @@ static int sdhci_pxav3_remove(struct platform_device *pdev)
 {
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct sdhci_pxa *pxa = pltfm_host->priv;
 
 	pm_runtime_get_sync(&pdev->dev);
 	sdhci_remove_host(host, 1);
 	pm_runtime_disable(&pdev->dev);
 
 	clk_disable_unprepare(pltfm_host->clk);
-	clk_put(pltfm_host->clk);
 
 	sdhci_pltfm_free(pdev);
-	kfree(pxa);
 
 	return 0;
 }
diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
new file mode 100644
index 0000000..328f348
--- /dev/null
+++ b/drivers/mmc/host/sdhci-st.c
@@ -0,0 +1,176 @@
+/*
+ * Support for SDHCI on STMicroelectronics SoCs
+ *
+ * Copyright (C) 2014 STMicroelectronics Ltd
+ * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+ * Contributors: Peter Griffin <peter.griffin@linaro.org>
+ *
+ * Based on sdhci-cns3xxx.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/mmc/host.h>
+
+#include "sdhci-pltfm.h"
+
+static u32 sdhci_st_readl(struct sdhci_host *host, int reg)
+{
+	u32 ret;
+
+	switch (reg) {
+	case SDHCI_CAPABILITIES:
+		ret = readl_relaxed(host->ioaddr + reg);
+		/* Support 3.3V and 1.8V */
+		ret &= ~SDHCI_CAN_VDD_300;
+		break;
+	default:
+		ret = readl_relaxed(host->ioaddr + reg);
+	}
+	return ret;
+}
+
+static const struct sdhci_ops sdhci_st_ops = {
+	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
+	.set_clock = sdhci_set_clock,
+	.set_bus_width = sdhci_set_bus_width,
+	.read_l = sdhci_st_readl,
+	.reset = sdhci_reset,
+};
+
+static const struct sdhci_pltfm_data sdhci_st_pdata = {
+	.ops = &sdhci_st_ops,
+	.quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
+	    SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+};
+
+
+static int sdhci_st_probe(struct platform_device *pdev)
+{
+	struct sdhci_host *host;
+	struct sdhci_pltfm_host *pltfm_host;
+	struct clk *clk;
+	int ret = 0;
+	u16 host_version;
+
+	clk =  devm_clk_get(&pdev->dev, "mmc");
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "Peripheral clk not found\n");
+		return PTR_ERR(clk);
+	}
+
+	host = sdhci_pltfm_init(pdev, &sdhci_st_pdata, 0);
+	if (IS_ERR(host)) {
+		dev_err(&pdev->dev, "Failed sdhci_pltfm_init\n");
+		return PTR_ERR(host);
+	}
+
+	ret = mmc_of_parse(host->mmc);
+
+	if (ret) {
+		dev_err(&pdev->dev, "Failed mmc_of_parse\n");
+		return ret;
+	}
+
+	clk_prepare_enable(clk);
+
+	pltfm_host = sdhci_priv(host);
+	pltfm_host->clk = clk;
+
+	ret = sdhci_add_host(host);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed sdhci_add_host\n");
+		goto err_out;
+	}
+
+	platform_set_drvdata(pdev, host);
+
+	host_version = readw_relaxed((host->ioaddr + SDHCI_HOST_VERSION));
+
+	dev_info(&pdev->dev, "SDHCI ST Initialised: Host Version: 0x%x Vendor Version 0x%x\n",
+		((host_version & SDHCI_SPEC_VER_MASK) >> SDHCI_SPEC_VER_SHIFT),
+		((host_version & SDHCI_VENDOR_VER_MASK) >>
+		SDHCI_VENDOR_VER_SHIFT));
+
+	return 0;
+
+err_out:
+	clk_disable_unprepare(clk);
+	sdhci_pltfm_free(pdev);
+
+	return ret;
+}
+
+static int sdhci_st_remove(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+	clk_disable_unprepare(pltfm_host->clk);
+
+	return sdhci_pltfm_unregister(pdev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int sdhci_st_suspend(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	int ret = sdhci_suspend_host(host);
+
+	if (ret)
+		goto out;
+
+	clk_disable_unprepare(pltfm_host->clk);
+out:
+	return ret;
+}
+
+static int sdhci_st_resume(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+	clk_prepare_enable(pltfm_host->clk);
+
+	return sdhci_resume_host(host);
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(sdhci_st_pmops, sdhci_st_suspend, sdhci_st_resume);
+
+static const struct of_device_id st_sdhci_match[] = {
+	{ .compatible = "st,sdhci" },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, st_sdhci_match);
+
+static struct platform_driver sdhci_st_driver = {
+	.probe = sdhci_st_probe,
+	.remove = sdhci_st_remove,
+	.driver = {
+		   .name = "sdhci-st",
+		   .pm = &sdhci_st_pmops,
+		   .of_match_table = of_match_ptr(st_sdhci_match),
+		  },
+};
+
+module_platform_driver(sdhci_st_driver);
+
+MODULE_DESCRIPTION("SDHCI driver for STMicroelectronics SoCs");
+MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:st-sdhci");
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index d93a063..33100d1 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -26,8 +26,6 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/slot-gpio.h>
 
-#include <asm/gpio.h>
-
 #include "sdhci-pltfm.h"
 
 /* Tegra SDHOST controller vendor register definitions */
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 47055f3..37b2a9a 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1223,8 +1223,16 @@ EXPORT_SYMBOL_GPL(sdhci_set_clock);
 static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
 			    unsigned short vdd)
 {
+	struct mmc_host *mmc = host->mmc;
 	u8 pwr = 0;
 
+	if (!IS_ERR(mmc->supply.vmmc)) {
+		spin_unlock_irq(&host->lock);
+		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
+		spin_lock_irq(&host->lock);
+		return;
+	}
+
 	if (mode != MMC_POWER_OFF) {
 		switch (1 << vdd) {
 		case MMC_VDD_165_195:
@@ -1283,12 +1291,6 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
 		if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER)
 			mdelay(10);
 	}
-
-	if (host->vmmc) {
-		spin_unlock_irq(&host->lock);
-		mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd);
-		spin_lock_irq(&host->lock);
-	}
 }
 
 /*****************************************************************************\
@@ -1440,13 +1442,15 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 {
 	unsigned long flags;
 	u8 ctrl;
+	struct mmc_host *mmc = host->mmc;
 
 	spin_lock_irqsave(&host->lock, flags);
 
 	if (host->flags & SDHCI_DEVICE_DEAD) {
 		spin_unlock_irqrestore(&host->lock, flags);
-		if (host->vmmc && ios->power_mode == MMC_POWER_OFF)
-			mmc_regulator_set_ocr(host->mmc, host->vmmc, 0);
+		if (!IS_ERR(mmc->supply.vmmc) &&
+		    ios->power_mode == MMC_POWER_OFF)
+			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
 		return;
 	}
 
@@ -1530,7 +1534,6 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 			host->ops->set_clock(host, host->clock);
 		}
 
-
 		/* Reset SD Clock Enable */
 		clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
 		clk &= ~SDHCI_CLOCK_CARD_EN;
@@ -1707,6 +1710,7 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
 static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
 						struct mmc_ios *ios)
 {
+	struct mmc_host *mmc = host->mmc;
 	u16 ctrl;
 	int ret;
 
@@ -1725,11 +1729,12 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
 		ctrl &= ~SDHCI_CTRL_VDD_180;
 		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
 
-		if (host->vqmmc) {
-			ret = regulator_set_voltage(host->vqmmc, 2700000, 3600000);
+		if (!IS_ERR(mmc->supply.vqmmc)) {
+			ret = regulator_set_voltage(mmc->supply.vqmmc, 2700000,
+						    3600000);
 			if (ret) {
 				pr_warning("%s: Switching to 3.3V signalling voltage "
-						" failed\n", mmc_hostname(host->mmc));
+						" failed\n", mmc_hostname(mmc));
 				return -EIO;
 			}
 		}
@@ -1742,16 +1747,16 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
 			return 0;
 
 		pr_warning("%s: 3.3V regulator output did not became stable\n",
-				mmc_hostname(host->mmc));
+				mmc_hostname(mmc));
 
 		return -EAGAIN;
 	case MMC_SIGNAL_VOLTAGE_180:
-		if (host->vqmmc) {
-			ret = regulator_set_voltage(host->vqmmc,
+		if (!IS_ERR(mmc->supply.vqmmc)) {
+			ret = regulator_set_voltage(mmc->supply.vqmmc,
 					1700000, 1950000);
 			if (ret) {
 				pr_warning("%s: Switching to 1.8V signalling voltage "
-						" failed\n", mmc_hostname(host->mmc));
+						" failed\n", mmc_hostname(mmc));
 				return -EIO;
 			}
 		}
@@ -1763,24 +1768,22 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
 		ctrl |= SDHCI_CTRL_VDD_180;
 		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
 
-		/* Wait for 5ms */
-		usleep_range(5000, 5500);
-
 		/* 1.8V regulator output should be stable within 5 ms */
 		ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 		if (ctrl & SDHCI_CTRL_VDD_180)
 			return 0;
 
 		pr_warning("%s: 1.8V regulator output did not became stable\n",
-				mmc_hostname(host->mmc));
+				mmc_hostname(mmc));
 
 		return -EAGAIN;
 	case MMC_SIGNAL_VOLTAGE_120:
-		if (host->vqmmc) {
-			ret = regulator_set_voltage(host->vqmmc, 1100000, 1300000);
+		if (!IS_ERR(mmc->supply.vqmmc)) {
+			ret = regulator_set_voltage(mmc->supply.vqmmc, 1100000,
+						    1300000);
 			if (ret) {
 				pr_warning("%s: Switching to 1.2V signalling voltage "
-						" failed\n", mmc_hostname(host->mmc));
+						" failed\n", mmc_hostname(mmc));
 				return -EIO;
 			}
 		}
@@ -2643,7 +2646,6 @@ static void sdhci_runtime_pm_bus_off(struct sdhci_host *host)
 int sdhci_runtime_suspend_host(struct sdhci_host *host)
 {
 	unsigned long flags;
-	int ret = 0;
 
 	/* Disable tuning since we are suspending */
 	if (host->flags & SDHCI_USING_RETUNING_TIMER) {
@@ -2663,14 +2665,14 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host)
 	host->runtime_suspended = true;
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(sdhci_runtime_suspend_host);
 
 int sdhci_runtime_resume_host(struct sdhci_host *host)
 {
 	unsigned long flags;
-	int ret = 0, host_flags = host->flags;
+	int host_flags = host->flags;
 
 	if (host_flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
 		if (host->ops->enable_dma)
@@ -2709,7 +2711,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host)
 
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(sdhci_runtime_resume_host);
 
@@ -2820,12 +2822,12 @@ int sdhci_add_host(struct sdhci_host *host)
 		 * (128) and potentially one alignment transfer for
 		 * each of those entries.
 		 */
-		host->adma_desc = dma_alloc_coherent(mmc_dev(host->mmc),
+		host->adma_desc = dma_alloc_coherent(mmc_dev(mmc),
 						     ADMA_SIZE, &host->adma_addr,
 						     GFP_KERNEL);
 		host->align_buffer = kmalloc(128 * 4, GFP_KERNEL);
 		if (!host->adma_desc || !host->align_buffer) {
-			dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+			dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
 					  host->adma_desc, host->adma_addr);
 			kfree(host->align_buffer);
 			pr_warning("%s: Unable to allocate ADMA "
@@ -2838,7 +2840,7 @@ int sdhci_add_host(struct sdhci_host *host)
 			pr_warning("%s: unable to allocate aligned ADMA descriptor\n",
 				   mmc_hostname(mmc));
 			host->flags &= ~SDHCI_USE_ADMA;
-			dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+			dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
 					  host->adma_desc, host->adma_addr);
 			kfree(host->align_buffer);
 			host->adma_desc = NULL;
@@ -2853,7 +2855,7 @@ int sdhci_add_host(struct sdhci_host *host)
 	 */
 	if (!(host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA))) {
 		host->dma_mask = DMA_BIT_MASK(64);
-		mmc_dev(host->mmc)->dma_mask = &host->dma_mask;
+		mmc_dev(mmc)->dma_mask = &host->dma_mask;
 	}
 
 	if (host->version >= SDHCI_SPEC_300)
@@ -2959,28 +2961,25 @@ int sdhci_add_host(struct sdhci_host *host)
 		mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
 
 	if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) &&
-	    !(host->mmc->caps & MMC_CAP_NONREMOVABLE))
+	    !(mmc->caps & MMC_CAP_NONREMOVABLE))
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
+	/* If there are external regulators, get them */
+	if (mmc_regulator_get_supply(mmc) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+
 	/* If vqmmc regulator and no 1.8V signalling, then there's no UHS */
-	host->vqmmc = regulator_get_optional(mmc_dev(mmc), "vqmmc");
-	if (IS_ERR_OR_NULL(host->vqmmc)) {
-		if (PTR_ERR(host->vqmmc) < 0) {
-			pr_info("%s: no vqmmc regulator found\n",
-				mmc_hostname(mmc));
-			host->vqmmc = NULL;
-		}
-	} else {
-		ret = regulator_enable(host->vqmmc);
-		if (!regulator_is_supported_voltage(host->vqmmc, 1700000,
-			1950000))
+	if (!IS_ERR(mmc->supply.vqmmc)) {
+		ret = regulator_enable(mmc->supply.vqmmc);
+		if (!regulator_is_supported_voltage(mmc->supply.vqmmc, 1700000,
+						    1950000))
 			caps[1] &= ~(SDHCI_SUPPORT_SDR104 |
 					SDHCI_SUPPORT_SDR50 |
 					SDHCI_SUPPORT_DDR50);
 		if (ret) {
 			pr_warn("%s: Failed to enable vqmmc regulator: %d\n",
 				mmc_hostname(mmc), ret);
-			host->vqmmc = NULL;
+			mmc->supply.vqmmc = NULL;
 		}
 	}
 
@@ -3041,34 +3040,6 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	ocr_avail = 0;
 
-	host->vmmc = regulator_get_optional(mmc_dev(mmc), "vmmc");
-	if (IS_ERR_OR_NULL(host->vmmc)) {
-		if (PTR_ERR(host->vmmc) < 0) {
-			pr_info("%s: no vmmc regulator found\n",
-				mmc_hostname(mmc));
-			host->vmmc = NULL;
-		}
-	}
-
-#ifdef CONFIG_REGULATOR
-	/*
-	 * Voltage range check makes sense only if regulator reports
-	 * any voltage value.
-	 */
-	if (host->vmmc && regulator_get_voltage(host->vmmc) > 0) {
-		ret = regulator_is_supported_voltage(host->vmmc, 2700000,
-			3600000);
-		if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_330)))
-			caps[0] &= ~SDHCI_CAN_VDD_330;
-		if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_300)))
-			caps[0] &= ~SDHCI_CAN_VDD_300;
-		ret = regulator_is_supported_voltage(host->vmmc, 1700000,
-			1950000);
-		if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_180)))
-			caps[0] &= ~SDHCI_CAN_VDD_180;
-	}
-#endif /* CONFIG_REGULATOR */
-
 	/*
 	 * According to SD Host Controller spec v3.00, if the Host System
 	 * can afford more than 150mA, Host Driver should set XPC to 1. Also
@@ -3077,8 +3048,8 @@ int sdhci_add_host(struct sdhci_host *host)
 	 * value.
 	 */
 	max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT);
-	if (!max_current_caps && host->vmmc) {
-		u32 curr = regulator_get_current_limit(host->vmmc);
+	if (!max_current_caps && !IS_ERR(mmc->supply.vmmc)) {
+		u32 curr = regulator_get_current_limit(mmc->supply.vmmc);
 		if (curr > 0) {
 
 			/* convert to SDHCI_MAX_CURRENT format */
@@ -3118,8 +3089,12 @@ int sdhci_add_host(struct sdhci_host *host)
 				   SDHCI_MAX_CURRENT_MULTIPLIER;
 	}
 
+	/* If OCR set by external regulators, use it instead */
+	if (mmc->ocr_avail)
+		ocr_avail = mmc->ocr_avail;
+
 	if (host->ocr_mask)
-		ocr_avail = host->ocr_mask;
+		ocr_avail &= host->ocr_mask;
 
 	mmc->ocr_avail = ocr_avail;
 	mmc->ocr_avail_sdio = ocr_avail;
@@ -3273,6 +3248,7 @@ EXPORT_SYMBOL_GPL(sdhci_add_host);
 
 void sdhci_remove_host(struct sdhci_host *host, int dead)
 {
+	struct mmc_host *mmc = host->mmc;
 	unsigned long flags;
 
 	if (dead) {
@@ -3282,7 +3258,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 
 		if (host->mrq) {
 			pr_err("%s: Controller removed during "
-				" transfer!\n", mmc_hostname(host->mmc));
+				" transfer!\n", mmc_hostname(mmc));
 
 			host->mrq->cmd->error = -ENOMEDIUM;
 			tasklet_schedule(&host->finish_tasklet);
@@ -3293,7 +3269,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 
 	sdhci_disable_card_detection(host);
 
-	mmc_remove_host(host->mmc);
+	mmc_remove_host(mmc);
 
 #ifdef SDHCI_USE_LEDS_CLASS
 	led_classdev_unregister(&host->led);
@@ -3310,18 +3286,14 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 
 	tasklet_kill(&host->finish_tasklet);
 
-	if (host->vmmc) {
-		regulator_disable(host->vmmc);
-		regulator_put(host->vmmc);
-	}
+	if (!IS_ERR(mmc->supply.vmmc))
+		regulator_disable(mmc->supply.vmmc);
 
-	if (host->vqmmc) {
-		regulator_disable(host->vqmmc);
-		regulator_put(host->vqmmc);
-	}
+	if (!IS_ERR(mmc->supply.vqmmc))
+		regulator_disable(mmc->supply.vqmmc);
 
 	if (host->adma_desc)
-		dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+		dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
 				  host->adma_desc, host->adma_addr);
 	kfree(host->align_buffer);
 
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 656fbba..d11708c 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -386,7 +386,7 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
 			 struct sh_mmcif_plat_data *pdata,
 			 enum dma_transfer_direction direction)
 {
-	struct dma_slave_config cfg;
+	struct dma_slave_config cfg = { 0, };
 	struct dma_chan *chan;
 	unsigned int slave_id;
 	struct resource *res;
@@ -417,8 +417,15 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
 	/* In the OF case the driver will get the slave ID from the DT */
 	cfg.slave_id = slave_id;
 	cfg.direction = direction;
-	cfg.dst_addr = res->start + MMCIF_CE_DATA;
-	cfg.src_addr = 0;
+
+	if (direction == DMA_DEV_TO_MEM) {
+		cfg.src_addr = res->start + MMCIF_CE_DATA;
+		cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	} else {
+		cfg.dst_addr = res->start + MMCIF_CE_DATA;
+		cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	}
+
 	ret = dmaengine_slave_config(chan, &cfg);
 	if (ret < 0) {
 		dma_release_channel(chan);
@@ -1378,26 +1385,19 @@ static int sh_mmcif_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "Get irq error\n");
 		return -ENXIO;
 	}
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "platform_get_resource error.\n");
-		return -ENXIO;
-	}
-	reg = ioremap(res->start, resource_size(res));
-	if (!reg) {
-		dev_err(&pdev->dev, "ioremap error.\n");
-		return -ENOMEM;
-	}
+	reg = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(reg))
+		return PTR_ERR(reg);
 
 	mmc = mmc_alloc_host(sizeof(struct sh_mmcif_host), &pdev->dev);
-	if (!mmc) {
-		ret = -ENOMEM;
-		goto ealloch;
-	}
+	if (!mmc)
+		return -ENOMEM;
 
 	ret = mmc_of_parse(mmc);
 	if (ret < 0)
-		goto eofparse;
+		goto err_host;
 
 	host		= mmc_priv(mmc);
 	host->mmc	= mmc;
@@ -1427,19 +1427,19 @@ static int sh_mmcif_probe(struct platform_device *pdev)
 	pm_runtime_enable(&pdev->dev);
 	host->power = false;
 
-	host->hclk = clk_get(&pdev->dev, NULL);
+	host->hclk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(host->hclk)) {
 		ret = PTR_ERR(host->hclk);
 		dev_err(&pdev->dev, "cannot get clock: %d\n", ret);
-		goto eclkget;
+		goto err_pm;
 	}
 	ret = sh_mmcif_clk_update(host);
 	if (ret < 0)
-		goto eclkupdate;
+		goto err_pm;
 
 	ret = pm_runtime_resume(&pdev->dev);
 	if (ret < 0)
-		goto eresume;
+		goto err_clk;
 
 	INIT_DELAYED_WORK(&host->timeout_work, mmcif_timeout_work);
 
@@ -1447,65 +1447,55 @@ static int sh_mmcif_probe(struct platform_device *pdev)
 	sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL);
 
 	name = irq[1] < 0 ? dev_name(&pdev->dev) : "sh_mmc:error";
-	ret = request_threaded_irq(irq[0], sh_mmcif_intr, sh_mmcif_irqt, 0, name, host);
+	ret = devm_request_threaded_irq(&pdev->dev, irq[0], sh_mmcif_intr,
+					sh_mmcif_irqt, 0, name, host);
 	if (ret) {
 		dev_err(&pdev->dev, "request_irq error (%s)\n", name);
-		goto ereqirq0;
+		goto err_clk;
 	}
 	if (irq[1] >= 0) {
-		ret = request_threaded_irq(irq[1], sh_mmcif_intr, sh_mmcif_irqt,
-					   0, "sh_mmc:int", host);
+		ret = devm_request_threaded_irq(&pdev->dev, irq[1],
+						sh_mmcif_intr, sh_mmcif_irqt,
+						0, "sh_mmc:int", host);
 		if (ret) {
 			dev_err(&pdev->dev, "request_irq error (sh_mmc:int)\n");
-			goto ereqirq1;
+			goto err_clk;
 		}
 	}
 
 	if (pd && pd->use_cd_gpio) {
 		ret = mmc_gpio_request_cd(mmc, pd->cd_gpio, 0);
 		if (ret < 0)
-			goto erqcd;
+			goto err_clk;
 	}
 
 	mutex_init(&host->thread_lock);
 
-	clk_disable_unprepare(host->hclk);
 	ret = mmc_add_host(mmc);
 	if (ret < 0)
-		goto emmcaddh;
+		goto err_clk;
 
 	dev_pm_qos_expose_latency_limit(&pdev->dev, 100);
 
-	dev_info(&pdev->dev, "driver version %s\n", DRIVER_VERSION);
-	dev_dbg(&pdev->dev, "chip ver H'%04x\n",
-		sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0x0000ffff);
+	dev_info(&pdev->dev, "Chip version 0x%04x, clock rate %luMHz\n",
+		 sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0xffff,
+		 clk_get_rate(host->hclk) / 1000000UL);
+
+	clk_disable_unprepare(host->hclk);
 	return ret;
 
-emmcaddh:
-erqcd:
-	if (irq[1] >= 0)
-		free_irq(irq[1], host);
-ereqirq1:
-	free_irq(irq[0], host);
-ereqirq0:
-	pm_runtime_suspend(&pdev->dev);
-eresume:
+err_clk:
 	clk_disable_unprepare(host->hclk);
-eclkupdate:
-	clk_put(host->hclk);
-eclkget:
+err_pm:
 	pm_runtime_disable(&pdev->dev);
-eofparse:
+err_host:
 	mmc_free_host(mmc);
-ealloch:
-	iounmap(reg);
 	return ret;
 }
 
 static int sh_mmcif_remove(struct platform_device *pdev)
 {
 	struct sh_mmcif_host *host = platform_get_drvdata(pdev);
-	int irq[2];
 
 	host->dying = true;
 	clk_prepare_enable(host->hclk);
@@ -1523,16 +1513,6 @@ static int sh_mmcif_remove(struct platform_device *pdev)
 	 */
 	cancel_delayed_work_sync(&host->timeout_work);
 
-	if (host->addr)
-		iounmap(host->addr);
-
-	irq[0] = platform_get_irq(pdev, 0);
-	irq[1] = platform_get_irq(pdev, 1);
-
-	free_irq(irq[0], host);
-	if (irq[1] >= 0)
-		free_irq(irq[1], host);
-
 	clk_disable_unprepare(host->hclk);
 	mmc_free_host(host->mmc);
 	pm_runtime_put_sync(&pdev->dev);
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 03e7b28..eb8f1d5 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -294,6 +294,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
 			cfg.slave_id = pdata->dma->slave_id_tx;
 		cfg.direction = DMA_MEM_TO_DEV;
 		cfg.dst_addr = res->start + (CTL_SD_DATA_PORT << host->pdata->bus_shift);
+		cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
 		cfg.src_addr = 0;
 		ret = dmaengine_slave_config(host->chan_tx, &cfg);
 		if (ret < 0)
@@ -312,6 +313,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
 			cfg.slave_id = pdata->dma->slave_id_rx;
 		cfg.direction = DMA_DEV_TO_MEM;
 		cfg.src_addr = cfg.dst_addr;
+		cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
 		cfg.dst_addr = 0;
 		ret = dmaengine_slave_config(host->chan_rx, &cfg);
 		if (ret < 0)
diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
index 282891a..54181b4 100644
--- a/drivers/mmc/host/wmt-sdmmc.c
+++ b/drivers/mmc/host/wmt-sdmmc.c
@@ -72,7 +72,6 @@
 #define BM_SPI_CS			0x20
 #define BM_SD_POWER			0x40
 #define BM_SOFT_RESET			0x80
-#define BM_ONEBIT_MASK			0xFD
 
 /* SDMMC_BLKLEN bit fields */
 #define BLKL_CRCERR_ABORT		0x0800
@@ -120,6 +119,8 @@
 #define STS2_DATARSP_BUSY		0x20
 #define STS2_DIS_FORCECLK		0x80
 
+/* SDMMC_EXTCTRL bit fields */
+#define EXT_EIGHTBIT			0x04
 
 /* MMC/SD DMA Controller Registers */
 #define SDDMA_GCR			0x100
@@ -672,7 +673,7 @@ static void wmt_mci_request(struct mmc_host *mmc, struct mmc_request *req)
 static void wmt_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct wmt_mci_priv *priv;
-	u32 reg_tmp;
+	u32 busmode, extctrl;
 
 	priv = mmc_priv(mmc);
 
@@ -687,28 +688,26 @@ static void wmt_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (ios->clock != 0)
 		clk_set_rate(priv->clk_sdmmc, ios->clock);
 
+	busmode = readb(priv->sdmmc_base + SDMMC_BUSMODE);
+	extctrl = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
+
+	busmode &= ~(BM_EIGHTBIT_MODE | BM_FOURBIT_MODE);
+	extctrl &= ~EXT_EIGHTBIT;
+
 	switch (ios->bus_width) {
 	case MMC_BUS_WIDTH_8:
-		reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
-		writeb(reg_tmp | 0x04, priv->sdmmc_base + SDMMC_EXTCTRL);
+		busmode |= BM_EIGHTBIT_MODE;
+		extctrl |= EXT_EIGHTBIT;
 		break;
 	case MMC_BUS_WIDTH_4:
-		reg_tmp = readb(priv->sdmmc_base + SDMMC_BUSMODE);
-		writeb(reg_tmp | BM_FOURBIT_MODE, priv->sdmmc_base +
-		       SDMMC_BUSMODE);
-
-		reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
-		writeb(reg_tmp & 0xFB, priv->sdmmc_base + SDMMC_EXTCTRL);
+		busmode |= BM_FOURBIT_MODE;
 		break;
 	case MMC_BUS_WIDTH_1:
-		reg_tmp = readb(priv->sdmmc_base + SDMMC_BUSMODE);
-		writeb(reg_tmp & BM_ONEBIT_MASK, priv->sdmmc_base +
-		       SDMMC_BUSMODE);
-
-		reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
-		writeb(reg_tmp & 0xFB, priv->sdmmc_base + SDMMC_EXTCTRL);
 		break;
 	}
+
+	writeb(busmode, priv->sdmmc_base + SDMMC_BUSMODE);
+	writeb(extctrl, priv->sdmmc_base + SDMMC_EXTCTRL);
 }
 
 static int wmt_mci_get_ro(struct mmc_host *mmc)
@@ -830,7 +829,7 @@ static int wmt_mci_probe(struct platform_device *pdev)
 		goto fail3;
 	}
 
-	ret = request_irq(dma_irq, wmt_mci_dma_isr, 32, "sdmmc", priv);
+	ret = request_irq(dma_irq, wmt_mci_dma_isr, 0, "sdmmc", priv);
 	if (ret) {
 		dev_err(&pdev->dev, "Register DMA IRQ fail\n");
 		goto fail4;
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 8457df7..33c6495 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -378,9 +378,11 @@ int ubiblock_create(struct ubi_volume_info *vi)
 {
 	struct ubiblock *dev;
 	struct gendisk *gd;
-	int disk_capacity;
+	u64 disk_capacity = ((u64)vi->size * vi->usable_leb_size) >> 9;
 	int ret;
 
+	if ((sector_t)disk_capacity != disk_capacity)
+		return -EFBIG;
 	/* Check that the volume isn't already handled */
 	mutex_lock(&devices_mutex);
 	if (find_dev_nolock(vi->ubi_num, vi->vol_id)) {
@@ -412,7 +414,6 @@ int ubiblock_create(struct ubi_volume_info *vi)
 	gd->first_minor = dev->ubi_num * UBI_MAX_VOLUMES + dev->vol_id;
 	gd->private_data = dev;
 	sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id);
-	disk_capacity = (vi->size * vi->usable_leb_size) >> 9;
 	set_capacity(gd, disk_capacity);
 	dev->gd = gd;
 
@@ -498,11 +499,16 @@ int ubiblock_remove(struct ubi_volume_info *vi)
 	return 0;
 }
 
-static void ubiblock_resize(struct ubi_volume_info *vi)
+static int ubiblock_resize(struct ubi_volume_info *vi)
 {
 	struct ubiblock *dev;
-	int disk_capacity;
+	u64 disk_capacity = ((u64)vi->size * vi->usable_leb_size) >> 9;
 
+	if ((sector_t)disk_capacity != disk_capacity) {
+		ubi_warn("%s: the volume is too big, cannot resize (%d LEBs)",
+			 dev->gd->disk_name, vi->size);
+		return -EFBIG;
+	}
 	/*
 	 * Need to lock the device list until we stop using the device,
 	 * otherwise the device struct might get released in
@@ -512,15 +518,15 @@ static void ubiblock_resize(struct ubi_volume_info *vi)
 	dev = find_dev_nolock(vi->ubi_num, vi->vol_id);
 	if (!dev) {
 		mutex_unlock(&devices_mutex);
-		return;
+		return -ENODEV;
 	}
 
 	mutex_lock(&dev->dev_mutex);
-	disk_capacity = (vi->size * vi->usable_leb_size) >> 9;
 	set_capacity(dev->gd, disk_capacity);
 	ubi_msg("%s resized to %d LEBs", dev->gd->disk_name, vi->size);
 	mutex_unlock(&dev->dev_mutex);
 	mutex_unlock(&devices_mutex);
+	return 0;
 }
 
 static int ubiblock_notify(struct notifier_block *nb,
diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index d77b1c1..07cac5f 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c
@@ -591,7 +591,7 @@ static int init_volumes(struct ubi_device *ubi,
 
 		/* Static volumes only */
 		av = ubi_find_av(ai, i);
-		if (!av) {
+		if (!av || !av->leb_count) {
 			/*
 			 * No eraseblocks belonging to this volume found. We
 			 * don't actually know whether this static volume is
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 0f3425d..20f4917 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1718,12 +1718,12 @@ int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum)
 	       vol_id, lnum, ubi->works_count);
 
 	while (found) {
-		struct ubi_work *wrk;
+		struct ubi_work *wrk, *tmp;
 		found = 0;
 
 		down_read(&ubi->work_sem);
 		spin_lock(&ubi->wl_lock);
-		list_for_each_entry(wrk, &ubi->works, list) {
+		list_for_each_entry_safe(wrk, tmp, &ubi->works, list) {
 			if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) &&
 			    (lnum == UBI_ALL || wrk->lnum == lnum)) {
 				list_del(&wrk->list);
diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c
index 326a612..1a790a2 100644
--- a/drivers/net/arcnet/com20020_cs.c
+++ b/drivers/net/arcnet/com20020_cs.c
@@ -112,20 +112,20 @@ static void com20020_detach(struct pcmcia_device *p_dev);
 
 /*====================================================================*/
 
-typedef struct com20020_dev_t {
+struct com20020_dev {
     struct net_device       *dev;
-} com20020_dev_t;
+};
 
 static int com20020_probe(struct pcmcia_device *p_dev)
 {
-    com20020_dev_t *info;
+    struct com20020_dev *info;
     struct net_device *dev;
     struct arcnet_local *lp;
 
     dev_dbg(&p_dev->dev, "com20020_attach()\n");
 
     /* Create new network device */
-    info = kzalloc(sizeof(struct com20020_dev_t), GFP_KERNEL);
+    info = kzalloc(sizeof(*info), GFP_KERNEL);
     if (!info)
 	goto fail_alloc_info;
 
@@ -160,7 +160,7 @@ fail_alloc_info:
 
 static void com20020_detach(struct pcmcia_device *link)
 {
-    struct com20020_dev_t *info = link->priv;
+    struct com20020_dev *info = link->priv;
     struct net_device *dev = info->dev;
 
     dev_dbg(&link->dev, "detach...\n");
@@ -199,7 +199,7 @@ static void com20020_detach(struct pcmcia_device *link)
 static int com20020_config(struct pcmcia_device *link)
 {
     struct arcnet_local *lp;
-    com20020_dev_t *info;
+    struct com20020_dev *info;
     struct net_device *dev;
     int i, ret;
     int ioaddr;
@@ -291,7 +291,7 @@ static void com20020_release(struct pcmcia_device *link)
 
 static int com20020_suspend(struct pcmcia_device *link)
 {
-	com20020_dev_t *info = link->priv;
+	struct com20020_dev *info = link->priv;
 	struct net_device *dev = info->dev;
 
 	if (link->open)
@@ -302,7 +302,7 @@ static int com20020_suspend(struct pcmcia_device *link)
 
 static int com20020_resume(struct pcmcia_device *link)
 {
-	com20020_dev_t *info = link->priv;
+	struct com20020_dev *info = link->priv;
 	struct net_device *dev = info->dev;
 
 	if (link->open) {
diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig
index 0988811..2d89bd0 100644
--- a/drivers/net/ethernet/8390/Kconfig
+++ b/drivers/net/ethernet/8390/Kconfig
@@ -91,7 +91,8 @@ config MCF8390
 
 config NE2000
 	tristate "NE2000/NE1000 support"
-	depends on (ISA || (Q40 && m) || M32R || MACH_TX49XX)
+	depends on (ISA || (Q40 && m) || M32R || MACH_TX49XX || \
+		    ATARI_ETHERNEC)
 	select CRC32
 	---help---
 	  If you have a network (Ethernet) card of this type, say Y and read
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index 73c57a4..7769c05 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -108,7 +108,7 @@ static u32 axnet_msg_enable;
 
 /*====================================================================*/
 
-typedef struct axnet_dev_t {
+struct axnet_dev {
 	struct pcmcia_device	*p_dev;
 	caddr_t	base;
 	struct timer_list	watchdog;
@@ -118,9 +118,9 @@ typedef struct axnet_dev_t {
 	int	phy_id;
 	int	flags;
 	int	active_low;
-} axnet_dev_t;
+};
 
-static inline axnet_dev_t *PRIV(struct net_device *dev)
+static inline struct axnet_dev *PRIV(struct net_device *dev)
 {
 	void *p = (char *)netdev_priv(dev) + sizeof(struct ei_device);
 	return p;
@@ -141,13 +141,13 @@ static const struct net_device_ops axnet_netdev_ops = {
 
 static int axnet_probe(struct pcmcia_device *link)
 {
-    axnet_dev_t *info;
+    struct axnet_dev *info;
     struct net_device *dev;
     struct ei_device *ei_local;
 
     dev_dbg(&link->dev, "axnet_attach()\n");
 
-    dev = alloc_etherdev(sizeof(struct ei_device) + sizeof(axnet_dev_t));
+    dev = alloc_etherdev(sizeof(struct ei_device) + sizeof(struct axnet_dev));
     if (!dev)
 	return -ENOMEM;
 
@@ -274,7 +274,7 @@ static int axnet_configcheck(struct pcmcia_device *p_dev, void *priv_data)
 static int axnet_config(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
-    axnet_dev_t *info = PRIV(dev);
+    struct axnet_dev *info = PRIV(dev);
     int i, j, j2, ret;
 
     dev_dbg(&link->dev, "axnet_config(0x%p)\n", link);
@@ -389,7 +389,7 @@ static int axnet_suspend(struct pcmcia_device *link)
 static int axnet_resume(struct pcmcia_device *link)
 {
 	struct net_device *dev = link->priv;
-	axnet_dev_t *info = PRIV(dev);
+	struct axnet_dev *info = PRIV(dev);
 
 	if (link->open) {
 		if (info->active_low == 1)
@@ -467,7 +467,7 @@ static void mdio_write(unsigned int addr, int phy_id, int loc, int value)
 static int axnet_open(struct net_device *dev)
 {
     int ret;
-    axnet_dev_t *info = PRIV(dev);
+    struct axnet_dev *info = PRIV(dev);
     struct pcmcia_device *link = info->p_dev;
     unsigned int nic_base = dev->base_addr;
     
@@ -497,7 +497,7 @@ static int axnet_open(struct net_device *dev)
 
 static int axnet_close(struct net_device *dev)
 {
-    axnet_dev_t *info = PRIV(dev);
+    struct axnet_dev *info = PRIV(dev);
     struct pcmcia_device *link = info->p_dev;
 
     dev_dbg(&link->dev, "axnet_close('%s')\n", dev->name);
@@ -554,7 +554,7 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id)
 static void ei_watchdog(u_long arg)
 {
     struct net_device *dev = (struct net_device *)(arg);
-    axnet_dev_t *info = PRIV(dev);
+    struct axnet_dev *info = PRIV(dev);
     unsigned int nic_base = dev->base_addr;
     unsigned int mii_addr = nic_base + AXNET_MII_EEP;
     u_short link;
@@ -610,7 +610,7 @@ reschedule:
 
 static int axnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-    axnet_dev_t *info = PRIV(dev);
+    struct axnet_dev *info = PRIV(dev);
     struct mii_ioctl_data *data = if_mii(rq);
     unsigned int mii_addr = dev->base_addr + AXNET_MII_EEP;
     switch (cmd) {
@@ -1452,7 +1452,7 @@ static void ei_receive(struct net_device *dev)
 
 static void ei_rx_overrun(struct net_device *dev)
 {
-	axnet_dev_t *info = PRIV(dev);
+	struct axnet_dev *info = PRIV(dev);
 	long e8390_base = dev->base_addr;
 	unsigned char was_txing, must_resend = 0;
 	struct ei_device *ei_local = netdev_priv(dev);
@@ -1624,7 +1624,7 @@ static void set_multicast_list(struct net_device *dev)
 
 static void AX88190_init(struct net_device *dev, int startp)
 {
-	axnet_dev_t *info = PRIV(dev);
+	struct axnet_dev *info = PRIV(dev);
 	long e8390_base = dev->base_addr;
 	struct ei_device *ei_local = netdev_priv(dev);
 	int i;
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c
index 58eaa8f..de566fb 100644
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -169,6 +169,8 @@ bad_clone_list[] __initdata = {
 #elif defined(CONFIG_PLAT_OAKS32R)  || \
    defined(CONFIG_MACH_TX49XX)
 #  define DCR_VAL 0x48		/* 8-bit mode */
+#elif defined(CONFIG_ATARI)	/* 8-bit mode on Atari, normal on Q40 */
+#  define DCR_VAL (MACH_IS_ATARI ? 0x48 : 0x49)
 #else
 #  define DCR_VAL 0x49
 #endif
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index ca3c2b9..9fb7b9d 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -111,11 +111,11 @@ static void pcnet_detach(struct pcmcia_device *p_dev);
 
 /*====================================================================*/
 
-typedef struct hw_info_t {
+struct hw_info {
     u_int	offset;
     u_char	a0, a1, a2;
     u_int	flags;
-} hw_info_t;
+};
 
 #define DELAY_OUTPUT	0x01
 #define HAS_MISC_REG	0x02
@@ -132,7 +132,7 @@ typedef struct hw_info_t {
 #define MII_PHYID_REG1		0x02
 #define MII_PHYID_REG2		0x03
 
-static hw_info_t hw_info[] = {
+static struct hw_info hw_info[] = {
     { /* Accton EN2212 */ 0x0ff0, 0x00, 0x00, 0xe8, DELAY_OUTPUT },
     { /* Allied Telesis LA-PCM */ 0x0ff0, 0x00, 0x00, 0xf4, 0 },
     { /* APEX MultiCard */ 0x03f4, 0x00, 0x20, 0xe5, 0 },
@@ -196,11 +196,11 @@ static hw_info_t hw_info[] = {
 
 #define NR_INFO		ARRAY_SIZE(hw_info)
 
-static hw_info_t default_info = { 0, 0, 0, 0, 0 };
-static hw_info_t dl10019_info = { 0, 0, 0, 0, IS_DL10019|HAS_MII };
-static hw_info_t dl10022_info = { 0, 0, 0, 0, IS_DL10022|HAS_MII };
+static struct hw_info default_info = { 0, 0, 0, 0, 0 };
+static struct hw_info dl10019_info = { 0, 0, 0, 0, IS_DL10019|HAS_MII };
+static struct hw_info dl10022_info = { 0, 0, 0, 0, IS_DL10022|HAS_MII };
 
-typedef struct pcnet_dev_t {
+struct pcnet_dev {
 	struct pcmcia_device	*p_dev;
     u_int		flags;
     void		__iomem *base;
@@ -210,12 +210,12 @@ typedef struct pcnet_dev_t {
     u_char		eth_phy, pna_phy;
     u_short		link_status;
     u_long		mii_reset;
-} pcnet_dev_t;
+};
 
-static inline pcnet_dev_t *PRIV(struct net_device *dev)
+static inline struct pcnet_dev *PRIV(struct net_device *dev)
 {
 	char *p = netdev_priv(dev);
-	return (pcnet_dev_t *)(p + sizeof(struct ei_device));
+	return (struct pcnet_dev *)(p + sizeof(struct ei_device));
 }
 
 static const struct net_device_ops pcnet_netdev_ops = {
@@ -237,13 +237,13 @@ static const struct net_device_ops pcnet_netdev_ops = {
 
 static int pcnet_probe(struct pcmcia_device *link)
 {
-    pcnet_dev_t *info;
+    struct pcnet_dev *info;
     struct net_device *dev;
 
     dev_dbg(&link->dev, "pcnet_attach()\n");
 
     /* Create new ethernet device */
-    dev = __alloc_ei_netdev(sizeof(pcnet_dev_t));
+    dev = __alloc_ei_netdev(sizeof(struct pcnet_dev));
     if (!dev) return -ENOMEM;
     info = PRIV(dev);
     info->p_dev = link;
@@ -276,7 +276,7 @@ static void pcnet_detach(struct pcmcia_device *link)
 
 ======================================================================*/
 
-static hw_info_t *get_hwinfo(struct pcmcia_device *link)
+static struct hw_info *get_hwinfo(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
     u_char __iomem *base, *virt;
@@ -317,7 +317,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link)
 
 ======================================================================*/
 
-static hw_info_t *get_prom(struct pcmcia_device *link)
+static struct hw_info *get_prom(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
     unsigned int ioaddr = dev->base_addr;
@@ -371,7 +371,7 @@ static hw_info_t *get_prom(struct pcmcia_device *link)
 
 ======================================================================*/
 
-static hw_info_t *get_dl10019(struct pcmcia_device *link)
+static struct hw_info *get_dl10019(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
     int i;
@@ -393,7 +393,7 @@ static hw_info_t *get_dl10019(struct pcmcia_device *link)
 
 ======================================================================*/
 
-static hw_info_t *get_ax88190(struct pcmcia_device *link)
+static struct hw_info *get_ax88190(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
     unsigned int ioaddr = dev->base_addr;
@@ -424,7 +424,7 @@ static hw_info_t *get_ax88190(struct pcmcia_device *link)
 
 ======================================================================*/
 
-static hw_info_t *get_hwired(struct pcmcia_device *link)
+static struct hw_info *get_hwired(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
     int i;
@@ -489,12 +489,12 @@ static int pcnet_confcheck(struct pcmcia_device *p_dev, void *priv_data)
 	return try_io_port(p_dev);
 }
 
-static hw_info_t *pcnet_try_config(struct pcmcia_device *link,
-				   int *has_shmem, int try)
+static struct hw_info *pcnet_try_config(struct pcmcia_device *link,
+					int *has_shmem, int try)
 {
 	struct net_device *dev = link->priv;
-	hw_info_t *local_hw_info;
-	pcnet_dev_t *info = PRIV(dev);
+	struct hw_info *local_hw_info;
+	struct pcnet_dev *info = PRIV(dev);
 	int priv = try;
 	int ret;
 
@@ -553,10 +553,10 @@ static hw_info_t *pcnet_try_config(struct pcmcia_device *link,
 static int pcnet_config(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
-    pcnet_dev_t *info = PRIV(dev);
+    struct pcnet_dev *info = PRIV(dev);
     int start_pg, stop_pg, cm_offset;
     int has_shmem = 0;
-    hw_info_t *local_hw_info;
+    struct hw_info *local_hw_info;
     struct ei_device *ei_local;
 
     dev_dbg(&link->dev, "pcnet_config\n");
@@ -639,7 +639,7 @@ failed:
 
 static void pcnet_release(struct pcmcia_device *link)
 {
-	pcnet_dev_t *info = PRIV(link->priv);
+	struct pcnet_dev *info = PRIV(link->priv);
 
 	dev_dbg(&link->dev, "pcnet_release\n");
 
@@ -836,7 +836,7 @@ static void write_asic(unsigned int ioaddr, int location, short asic_data)
 static void set_misc_reg(struct net_device *dev)
 {
     unsigned int nic_base = dev->base_addr;
-    pcnet_dev_t *info = PRIV(dev);
+    struct pcnet_dev *info = PRIV(dev);
     u_char tmp;
 
     if (info->flags & HAS_MISC_REG) {
@@ -873,7 +873,7 @@ static void set_misc_reg(struct net_device *dev)
 
 static void mii_phy_probe(struct net_device *dev)
 {
-    pcnet_dev_t *info = PRIV(dev);
+    struct pcnet_dev *info = PRIV(dev);
     unsigned int mii_addr = dev->base_addr + DLINK_GPIO;
     int i;
     u_int tmp, phyid;
@@ -898,7 +898,7 @@ static void mii_phy_probe(struct net_device *dev)
 static int pcnet_open(struct net_device *dev)
 {
     int ret;
-    pcnet_dev_t *info = PRIV(dev);
+    struct pcnet_dev *info = PRIV(dev);
     struct pcmcia_device *link = info->p_dev;
     unsigned int nic_base = dev->base_addr;
 
@@ -931,7 +931,7 @@ static int pcnet_open(struct net_device *dev)
 
 static int pcnet_close(struct net_device *dev)
 {
-    pcnet_dev_t *info = PRIV(dev);
+    struct pcnet_dev *info = PRIV(dev);
     struct pcmcia_device *link = info->p_dev;
 
     dev_dbg(&link->dev, "pcnet_close('%s')\n", dev->name);
@@ -982,7 +982,7 @@ static void pcnet_reset_8390(struct net_device *dev)
 
 static int set_config(struct net_device *dev, struct ifmap *map)
 {
-    pcnet_dev_t *info = PRIV(dev);
+    struct pcnet_dev *info = PRIV(dev);
     if ((map->port != (u_char)(-1)) && (map->port != dev->if_port)) {
 	if (!(info->flags & HAS_MISC_REG))
 	    return -EOPNOTSUPP;
@@ -1000,7 +1000,7 @@ static int set_config(struct net_device *dev, struct ifmap *map)
 static irqreturn_t ei_irq_wrapper(int irq, void *dev_id)
 {
     struct net_device *dev = dev_id;
-    pcnet_dev_t *info;
+    struct pcnet_dev *info;
     irqreturn_t ret = ei_interrupt(irq, dev_id);
 
     if (ret == IRQ_HANDLED) {
@@ -1013,7 +1013,7 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id)
 static void ei_watchdog(u_long arg)
 {
     struct net_device *dev = (struct net_device *)arg;
-    pcnet_dev_t *info = PRIV(dev);
+    struct pcnet_dev *info = PRIV(dev);
     unsigned int nic_base = dev->base_addr;
     unsigned int mii_addr = nic_base + DLINK_GPIO;
     u_short link;
@@ -1101,7 +1101,7 @@ reschedule:
 
 static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-    pcnet_dev_t *info = PRIV(dev);
+    struct pcnet_dev *info = PRIV(dev);
     struct mii_ioctl_data *data = if_mii(rq);
     unsigned int mii_addr = dev->base_addr + DLINK_GPIO;
 
@@ -1214,7 +1214,7 @@ static void dma_block_output(struct net_device *dev, int count,
 			     const u_char *buf, const int start_page)
 {
     unsigned int nic_base = dev->base_addr;
-    pcnet_dev_t *info = PRIV(dev);
+    struct pcnet_dev *info = PRIV(dev);
 #ifdef PCMCIA_DEBUG
     int retries = 0;
     struct ei_device *ei_local = netdev_priv(dev);
@@ -1403,7 +1403,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg,
 			      int stop_pg, int cm_offset)
 {
     struct net_device *dev = link->priv;
-    pcnet_dev_t *info = PRIV(dev);
+    struct pcnet_dev *info = PRIV(dev);
     int i, window_size, offset, ret;
 
     window_size = (stop_pg - start_pg) << 8;
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index edb7186..dc7406c 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -24,6 +24,7 @@ source "drivers/net/ethernet/allwinner/Kconfig"
 source "drivers/net/ethernet/alteon/Kconfig"
 source "drivers/net/ethernet/altera/Kconfig"
 source "drivers/net/ethernet/amd/Kconfig"
+source "drivers/net/ethernet/apm/Kconfig"
 source "drivers/net/ethernet/apple/Kconfig"
 source "drivers/net/ethernet/arc/Kconfig"
 source "drivers/net/ethernet/atheros/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 58de333..224a018 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/
 obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/
 obj-$(CONFIG_ALTERA_TSE) += altera/
 obj-$(CONFIG_NET_VENDOR_AMD) += amd/
+obj-$(CONFIG_NET_XGENE) += apm/
 obj-$(CONFIG_NET_VENDOR_APPLE) += apple/
 obj-$(CONFIG_NET_VENDOR_ARC) += arc/
 obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 1f5487f..dc84f71 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -117,7 +117,6 @@
 #include <linux/spinlock.h>
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
-#include <linux/phy.h>
 #include <net/busy_poll.h>
 #include <linux/clk.h>
 #include <linux/if_ether.h>
diff --git a/drivers/net/ethernet/apm/Kconfig b/drivers/net/ethernet/apm/Kconfig
new file mode 100644
index 0000000..ec63d70
--- /dev/null
+++ b/drivers/net/ethernet/apm/Kconfig
@@ -0,0 +1 @@
+source "drivers/net/ethernet/apm/xgene/Kconfig"
diff --git a/drivers/net/ethernet/apm/Makefile b/drivers/net/ethernet/apm/Makefile
new file mode 100644
index 0000000..65ce32a
--- /dev/null
+++ b/drivers/net/ethernet/apm/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for APM X-GENE Ethernet driver.
+#
+
+obj-$(CONFIG_NET_XGENE) += xgene/
diff --git a/drivers/net/ethernet/apm/xgene/Kconfig b/drivers/net/ethernet/apm/xgene/Kconfig
new file mode 100644
index 0000000..616dff6
--- /dev/null
+++ b/drivers/net/ethernet/apm/xgene/Kconfig
@@ -0,0 +1,9 @@
+config NET_XGENE
+	tristate "APM X-Gene SoC Ethernet Driver"
+	select PHYLIB
+	help
+	  This is the Ethernet driver for the on-chip ethernet interface on the
+	  APM X-Gene SoC.
+
+	  To compile this driver as a module, choose M here. This module will
+	  be called xgene_enet.
diff --git a/drivers/net/ethernet/apm/xgene/Makefile b/drivers/net/ethernet/apm/xgene/Makefile
new file mode 100644
index 0000000..c643e8a
--- /dev/null
+++ b/drivers/net/ethernet/apm/xgene/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for APM X-Gene Ethernet Driver.
+#
+
+xgene-enet-objs := xgene_enet_hw.o xgene_enet_main.o xgene_enet_ethtool.o
+obj-$(CONFIG_NET_XGENE) += xgene-enet.o
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
new file mode 100644
index 0000000..63f2aa5
--- /dev/null
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
@@ -0,0 +1,125 @@
+/* Applied Micro X-Gene SoC Ethernet Driver
+ *
+ * Copyright (c) 2014, Applied Micro Circuits Corporation
+ * Authors: Iyappan Subramanian <isubramanian@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/ethtool.h>
+#include "xgene_enet_main.h"
+
+struct xgene_gstrings_stats {
+	char name[ETH_GSTRING_LEN];
+	int offset;
+};
+
+#define XGENE_STAT(m) { #m, offsetof(struct xgene_enet_pdata, stats.m) }
+
+static const struct xgene_gstrings_stats gstrings_stats[] = {
+	XGENE_STAT(rx_packets),
+	XGENE_STAT(tx_packets),
+	XGENE_STAT(rx_bytes),
+	XGENE_STAT(tx_bytes),
+	XGENE_STAT(rx_errors),
+	XGENE_STAT(tx_errors),
+	XGENE_STAT(rx_length_errors),
+	XGENE_STAT(rx_crc_errors),
+	XGENE_STAT(rx_frame_errors),
+	XGENE_STAT(rx_fifo_errors)
+};
+
+#define XGENE_STATS_LEN		ARRAY_SIZE(gstrings_stats)
+
+static void xgene_get_drvinfo(struct net_device *ndev,
+			      struct ethtool_drvinfo *info)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct platform_device *pdev = pdata->pdev;
+
+	strcpy(info->driver, "xgene_enet");
+	strcpy(info->version, XGENE_DRV_VERSION);
+	snprintf(info->fw_version, ETHTOOL_FWVERS_LEN, "N/A");
+	sprintf(info->bus_info, "%s", pdev->name);
+}
+
+static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct phy_device *phydev = pdata->phy_dev;
+
+	if (phydev == NULL)
+		return -ENODEV;
+
+	return phy_ethtool_gset(phydev, cmd);
+}
+
+static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct phy_device *phydev = pdata->phy_dev;
+
+	if (phydev == NULL)
+		return -ENODEV;
+
+	return phy_ethtool_sset(phydev, cmd);
+}
+
+static void xgene_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
+{
+	int i;
+	u8 *p = data;
+
+	if (stringset != ETH_SS_STATS)
+		return;
+
+	for (i = 0; i < XGENE_STATS_LEN; i++) {
+		memcpy(p, gstrings_stats[i].name, ETH_GSTRING_LEN);
+		p += ETH_GSTRING_LEN;
+	}
+}
+
+static int xgene_get_sset_count(struct net_device *ndev, int sset)
+{
+	if (sset != ETH_SS_STATS)
+		return -EINVAL;
+
+	return XGENE_STATS_LEN;
+}
+
+static void xgene_get_ethtool_stats(struct net_device *ndev,
+				    struct ethtool_stats *dummy,
+				    u64 *data)
+{
+	void *pdata = netdev_priv(ndev);
+	int i;
+
+	for (i = 0; i < XGENE_STATS_LEN; i++)
+		*data++ = *(u64 *)(pdata + gstrings_stats[i].offset);
+}
+
+static const struct ethtool_ops xgene_ethtool_ops = {
+	.get_drvinfo = xgene_get_drvinfo,
+	.get_settings = xgene_get_settings,
+	.set_settings = xgene_set_settings,
+	.get_link = ethtool_op_get_link,
+	.get_strings = xgene_get_strings,
+	.get_sset_count = xgene_get_sset_count,
+	.get_ethtool_stats = xgene_get_ethtool_stats
+};
+
+void xgene_enet_set_ethtool_ops(struct net_device *ndev)
+{
+	ndev->ethtool_ops = &xgene_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
new file mode 100644
index 0000000..812d8d6
--- /dev/null
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -0,0 +1,728 @@
+/* Applied Micro X-Gene SoC Ethernet Driver
+ *
+ * Copyright (c) 2014, Applied Micro Circuits Corporation
+ * Authors: Iyappan Subramanian <isubramanian@apm.com>
+ *	    Ravi Patel <rapatel@apm.com>
+ *	    Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "xgene_enet_main.h"
+#include "xgene_enet_hw.h"
+
+static void xgene_enet_ring_init(struct xgene_enet_desc_ring *ring)
+{
+	u32 *ring_cfg = ring->state;
+	u64 addr = ring->dma;
+	enum xgene_enet_ring_cfgsize cfgsize = ring->cfgsize;
+
+	ring_cfg[4] |= (1 << SELTHRSH_POS) &
+			CREATE_MASK(SELTHRSH_POS, SELTHRSH_LEN);
+	ring_cfg[3] |= ACCEPTLERR;
+	ring_cfg[2] |= QCOHERENT;
+
+	addr >>= 8;
+	ring_cfg[2] |= (addr << RINGADDRL_POS) &
+			CREATE_MASK_ULL(RINGADDRL_POS, RINGADDRL_LEN);
+	addr >>= RINGADDRL_LEN;
+	ring_cfg[3] |= addr & CREATE_MASK_ULL(RINGADDRH_POS, RINGADDRH_LEN);
+	ring_cfg[3] |= ((u32)cfgsize << RINGSIZE_POS) &
+			CREATE_MASK(RINGSIZE_POS, RINGSIZE_LEN);
+}
+
+static void xgene_enet_ring_set_type(struct xgene_enet_desc_ring *ring)
+{
+	u32 *ring_cfg = ring->state;
+	bool is_bufpool;
+	u32 val;
+
+	is_bufpool = xgene_enet_is_bufpool(ring->id);
+	val = (is_bufpool) ? RING_BUFPOOL : RING_REGULAR;
+	ring_cfg[4] |= (val << RINGTYPE_POS) &
+			CREATE_MASK(RINGTYPE_POS, RINGTYPE_LEN);
+
+	if (is_bufpool) {
+		ring_cfg[3] |= (BUFPOOL_MODE << RINGMODE_POS) &
+				CREATE_MASK(RINGMODE_POS, RINGMODE_LEN);
+	}
+}
+
+static void xgene_enet_ring_set_recombbuf(struct xgene_enet_desc_ring *ring)
+{
+	u32 *ring_cfg = ring->state;
+
+	ring_cfg[3] |= RECOMBBUF;
+	ring_cfg[3] |= (0xf << RECOMTIMEOUTL_POS) &
+			CREATE_MASK(RECOMTIMEOUTL_POS, RECOMTIMEOUTL_LEN);
+	ring_cfg[4] |= 0x7 & CREATE_MASK(RECOMTIMEOUTH_POS, RECOMTIMEOUTH_LEN);
+}
+
+static void xgene_enet_ring_wr32(struct xgene_enet_desc_ring *ring,
+				 u32 offset, u32 data)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev);
+
+	iowrite32(data, pdata->ring_csr_addr + offset);
+}
+
+static void xgene_enet_ring_rd32(struct xgene_enet_desc_ring *ring,
+				 u32 offset, u32 *data)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev);
+
+	*data = ioread32(pdata->ring_csr_addr + offset);
+}
+
+static void xgene_enet_write_ring_state(struct xgene_enet_desc_ring *ring)
+{
+	int i;
+
+	xgene_enet_ring_wr32(ring, CSR_RING_CONFIG, ring->num);
+	for (i = 0; i < NUM_RING_CONFIG; i++) {
+		xgene_enet_ring_wr32(ring, CSR_RING_WR_BASE + (i * 4),
+				     ring->state[i]);
+	}
+}
+
+static void xgene_enet_clr_ring_state(struct xgene_enet_desc_ring *ring)
+{
+	memset(ring->state, 0, sizeof(u32) * NUM_RING_CONFIG);
+	xgene_enet_write_ring_state(ring);
+}
+
+static void xgene_enet_set_ring_state(struct xgene_enet_desc_ring *ring)
+{
+	xgene_enet_ring_set_type(ring);
+
+	if (xgene_enet_ring_owner(ring->id) == RING_OWNER_ETH0)
+		xgene_enet_ring_set_recombbuf(ring);
+
+	xgene_enet_ring_init(ring);
+	xgene_enet_write_ring_state(ring);
+}
+
+static void xgene_enet_set_ring_id(struct xgene_enet_desc_ring *ring)
+{
+	u32 ring_id_val, ring_id_buf;
+	bool is_bufpool;
+
+	is_bufpool = xgene_enet_is_bufpool(ring->id);
+
+	ring_id_val = ring->id & GENMASK(9, 0);
+	ring_id_val |= OVERWRITE;
+
+	ring_id_buf = (ring->num << 9) & GENMASK(18, 9);
+	ring_id_buf |= PREFETCH_BUF_EN;
+	if (is_bufpool)
+		ring_id_buf |= IS_BUFFER_POOL;
+
+	xgene_enet_ring_wr32(ring, CSR_RING_ID, ring_id_val);
+	xgene_enet_ring_wr32(ring, CSR_RING_ID_BUF, ring_id_buf);
+}
+
+static void xgene_enet_clr_desc_ring_id(struct xgene_enet_desc_ring *ring)
+{
+	u32 ring_id;
+
+	ring_id = ring->id | OVERWRITE;
+	xgene_enet_ring_wr32(ring, CSR_RING_ID, ring_id);
+	xgene_enet_ring_wr32(ring, CSR_RING_ID_BUF, 0);
+}
+
+struct xgene_enet_desc_ring *xgene_enet_setup_ring(
+					struct xgene_enet_desc_ring *ring)
+{
+	u32 size = ring->size;
+	u32 i, data;
+	bool is_bufpool;
+
+	xgene_enet_clr_ring_state(ring);
+	xgene_enet_set_ring_state(ring);
+	xgene_enet_set_ring_id(ring);
+
+	ring->slots = xgene_enet_get_numslots(ring->id, size);
+
+	is_bufpool = xgene_enet_is_bufpool(ring->id);
+	if (is_bufpool || xgene_enet_ring_owner(ring->id) != RING_OWNER_CPU)
+		return ring;
+
+	for (i = 0; i < ring->slots; i++)
+		xgene_enet_mark_desc_slot_empty(&ring->raw_desc[i]);
+
+	xgene_enet_ring_rd32(ring, CSR_RING_NE_INT_MODE, &data);
+	data |= BIT(31 - xgene_enet_ring_bufnum(ring->id));
+	xgene_enet_ring_wr32(ring, CSR_RING_NE_INT_MODE, data);
+
+	return ring;
+}
+
+void xgene_enet_clear_ring(struct xgene_enet_desc_ring *ring)
+{
+	u32 data;
+	bool is_bufpool;
+
+	is_bufpool = xgene_enet_is_bufpool(ring->id);
+	if (is_bufpool || xgene_enet_ring_owner(ring->id) != RING_OWNER_CPU)
+		goto out;
+
+	xgene_enet_ring_rd32(ring, CSR_RING_NE_INT_MODE, &data);
+	data &= ~BIT(31 - xgene_enet_ring_bufnum(ring->id));
+	xgene_enet_ring_wr32(ring, CSR_RING_NE_INT_MODE, data);
+
+out:
+	xgene_enet_clr_desc_ring_id(ring);
+	xgene_enet_clr_ring_state(ring);
+}
+
+void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring,
+			    struct xgene_enet_pdata *pdata,
+			    enum xgene_enet_err_code status)
+{
+	struct rtnl_link_stats64 *stats = &pdata->stats;
+
+	switch (status) {
+	case INGRESS_CRC:
+		stats->rx_crc_errors++;
+		break;
+	case INGRESS_CHECKSUM:
+	case INGRESS_CHECKSUM_COMPUTE:
+		stats->rx_errors++;
+		break;
+	case INGRESS_TRUNC_FRAME:
+		stats->rx_frame_errors++;
+		break;
+	case INGRESS_PKT_LEN:
+		stats->rx_length_errors++;
+		break;
+	case INGRESS_PKT_UNDER:
+		stats->rx_frame_errors++;
+		break;
+	case INGRESS_FIFO_OVERRUN:
+		stats->rx_fifo_errors++;
+		break;
+	default:
+		break;
+	}
+}
+
+static void xgene_enet_wr_csr(struct xgene_enet_pdata *pdata,
+			      u32 offset, u32 val)
+{
+	void __iomem *addr = pdata->eth_csr_addr + offset;
+
+	iowrite32(val, addr);
+}
+
+static void xgene_enet_wr_ring_if(struct xgene_enet_pdata *pdata,
+				  u32 offset, u32 val)
+{
+	void __iomem *addr = pdata->eth_ring_if_addr + offset;
+
+	iowrite32(val, addr);
+}
+
+static void xgene_enet_wr_diag_csr(struct xgene_enet_pdata *pdata,
+				   u32 offset, u32 val)
+{
+	void __iomem *addr = pdata->eth_diag_csr_addr + offset;
+
+	iowrite32(val, addr);
+}
+
+static void xgene_enet_wr_mcx_csr(struct xgene_enet_pdata *pdata,
+				  u32 offset, u32 val)
+{
+	void __iomem *addr = pdata->mcx_mac_csr_addr + offset;
+
+	iowrite32(val, addr);
+}
+
+static bool xgene_enet_wr_indirect(void __iomem *addr, void __iomem *wr,
+				   void __iomem *cmd, void __iomem *cmd_done,
+				   u32 wr_addr, u32 wr_data)
+{
+	u32 done;
+	u8 wait = 10;
+
+	iowrite32(wr_addr, addr);
+	iowrite32(wr_data, wr);
+	iowrite32(XGENE_ENET_WR_CMD, cmd);
+
+	/* wait for write command to complete */
+	while (!(done = ioread32(cmd_done)) && wait--)
+		udelay(1);
+
+	if (!done)
+		return false;
+
+	iowrite32(0, cmd);
+
+	return true;
+}
+
+static void xgene_enet_wr_mcx_mac(struct xgene_enet_pdata *pdata,
+				  u32 wr_addr, u32 wr_data)
+{
+	void __iomem *addr, *wr, *cmd, *cmd_done;
+
+	addr = pdata->mcx_mac_addr + MAC_ADDR_REG_OFFSET;
+	wr = pdata->mcx_mac_addr + MAC_WRITE_REG_OFFSET;
+	cmd = pdata->mcx_mac_addr + MAC_COMMAND_REG_OFFSET;
+	cmd_done = pdata->mcx_mac_addr + MAC_COMMAND_DONE_REG_OFFSET;
+
+	if (!xgene_enet_wr_indirect(addr, wr, cmd, cmd_done, wr_addr, wr_data))
+		netdev_err(pdata->ndev, "MCX mac write failed, addr: %04x\n",
+			   wr_addr);
+}
+
+static void xgene_enet_rd_csr(struct xgene_enet_pdata *pdata,
+			      u32 offset, u32 *val)
+{
+	void __iomem *addr = pdata->eth_csr_addr + offset;
+
+	*val = ioread32(addr);
+}
+
+static void xgene_enet_rd_diag_csr(struct xgene_enet_pdata *pdata,
+				   u32 offset, u32 *val)
+{
+	void __iomem *addr = pdata->eth_diag_csr_addr + offset;
+
+	*val = ioread32(addr);
+}
+
+static void xgene_enet_rd_mcx_csr(struct xgene_enet_pdata *pdata,
+				  u32 offset, u32 *val)
+{
+	void __iomem *addr = pdata->mcx_mac_csr_addr + offset;
+
+	*val = ioread32(addr);
+}
+
+static bool xgene_enet_rd_indirect(void __iomem *addr, void __iomem *rd,
+				   void __iomem *cmd, void __iomem *cmd_done,
+				   u32 rd_addr, u32 *rd_data)
+{
+	u32 done;
+	u8 wait = 10;
+
+	iowrite32(rd_addr, addr);
+	iowrite32(XGENE_ENET_RD_CMD, cmd);
+
+	/* wait for read command to complete */
+	while (!(done = ioread32(cmd_done)) && wait--)
+		udelay(1);
+
+	if (!done)
+		return false;
+
+	*rd_data = ioread32(rd);
+	iowrite32(0, cmd);
+
+	return true;
+}
+
+static void xgene_enet_rd_mcx_mac(struct xgene_enet_pdata *pdata,
+				  u32 rd_addr, u32 *rd_data)
+{
+	void __iomem *addr, *rd, *cmd, *cmd_done;
+
+	addr = pdata->mcx_mac_addr + MAC_ADDR_REG_OFFSET;
+	rd = pdata->mcx_mac_addr + MAC_READ_REG_OFFSET;
+	cmd = pdata->mcx_mac_addr + MAC_COMMAND_REG_OFFSET;
+	cmd_done = pdata->mcx_mac_addr + MAC_COMMAND_DONE_REG_OFFSET;
+
+	if (!xgene_enet_rd_indirect(addr, rd, cmd, cmd_done, rd_addr, rd_data))
+		netdev_err(pdata->ndev, "MCX mac read failed, addr: %04x\n",
+			   rd_addr);
+}
+
+static int xgene_mii_phy_write(struct xgene_enet_pdata *pdata, int phy_id,
+			       u32 reg, u16 data)
+{
+	u32 addr = 0, wr_data = 0;
+	u32 done;
+	u8 wait = 10;
+
+	PHY_ADDR_SET(&addr, phy_id);
+	REG_ADDR_SET(&addr, reg);
+	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_ADDRESS_ADDR, addr);
+
+	PHY_CONTROL_SET(&wr_data, data);
+	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_CONTROL_ADDR, wr_data);
+	do {
+		usleep_range(5, 10);
+		xgene_enet_rd_mcx_mac(pdata, MII_MGMT_INDICATORS_ADDR, &done);
+	} while ((done & BUSY_MASK) && wait--);
+
+	if (done & BUSY_MASK) {
+		netdev_err(pdata->ndev, "MII_MGMT write failed\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int xgene_mii_phy_read(struct xgene_enet_pdata *pdata,
+			      u8 phy_id, u32 reg)
+{
+	u32 addr = 0;
+	u32 data, done;
+	u8 wait = 10;
+
+	PHY_ADDR_SET(&addr, phy_id);
+	REG_ADDR_SET(&addr, reg);
+	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_ADDRESS_ADDR, addr);
+	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_COMMAND_ADDR, READ_CYCLE_MASK);
+	do {
+		usleep_range(5, 10);
+		xgene_enet_rd_mcx_mac(pdata, MII_MGMT_INDICATORS_ADDR, &done);
+	} while ((done & BUSY_MASK) && wait--);
+
+	if (done & BUSY_MASK) {
+		netdev_err(pdata->ndev, "MII_MGMT read failed\n");
+		return -EBUSY;
+	}
+
+	xgene_enet_rd_mcx_mac(pdata, MII_MGMT_STATUS_ADDR, &data);
+	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_COMMAND_ADDR, 0);
+
+	return data;
+}
+
+void xgene_gmac_set_mac_addr(struct xgene_enet_pdata *pdata)
+{
+	u32 addr0, addr1;
+	u8 *dev_addr = pdata->ndev->dev_addr;
+
+	addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) |
+		(dev_addr[1] << 8) | dev_addr[0];
+	addr1 = (dev_addr[5] << 24) | (dev_addr[4] << 16);
+	addr1 |= pdata->phy_addr & 0xFFFF;
+
+	xgene_enet_wr_mcx_mac(pdata, STATION_ADDR0_ADDR, addr0);
+	xgene_enet_wr_mcx_mac(pdata, STATION_ADDR1_ADDR, addr1);
+}
+
+static int xgene_enet_ecc_init(struct xgene_enet_pdata *pdata)
+{
+	struct net_device *ndev = pdata->ndev;
+	u32 data;
+	u8 wait = 10;
+
+	xgene_enet_wr_diag_csr(pdata, ENET_CFG_MEM_RAM_SHUTDOWN_ADDR, 0x0);
+	do {
+		usleep_range(100, 110);
+		xgene_enet_rd_diag_csr(pdata, ENET_BLOCK_MEM_RDY_ADDR, &data);
+	} while ((data != 0xffffffff) && wait--);
+
+	if (data != 0xffffffff) {
+		netdev_err(ndev, "Failed to release memory from shutdown\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+void xgene_gmac_reset(struct xgene_enet_pdata *pdata)
+{
+	xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, SOFT_RESET1);
+	xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, 0);
+}
+
+void xgene_gmac_init(struct xgene_enet_pdata *pdata, int speed)
+{
+	u32 value, mc2;
+	u32 intf_ctl, rgmii;
+	u32 icm0, icm2;
+
+	xgene_gmac_reset(pdata);
+
+	xgene_enet_rd_mcx_csr(pdata, ICM_CONFIG0_REG_0_ADDR, &icm0);
+	xgene_enet_rd_mcx_csr(pdata, ICM_CONFIG2_REG_0_ADDR, &icm2);
+	xgene_enet_rd_mcx_mac(pdata, MAC_CONFIG_2_ADDR, &mc2);
+	xgene_enet_rd_mcx_mac(pdata, INTERFACE_CONTROL_ADDR, &intf_ctl);
+	xgene_enet_rd_csr(pdata, RGMII_REG_0_ADDR, &rgmii);
+
+	switch (speed) {
+	case SPEED_10:
+		ENET_INTERFACE_MODE2_SET(&mc2, 1);
+		CFG_MACMODE_SET(&icm0, 0);
+		CFG_WAITASYNCRD_SET(&icm2, 500);
+		rgmii &= ~CFG_SPEED_1250;
+		break;
+	case SPEED_100:
+		ENET_INTERFACE_MODE2_SET(&mc2, 1);
+		intf_ctl |= ENET_LHD_MODE;
+		CFG_MACMODE_SET(&icm0, 1);
+		CFG_WAITASYNCRD_SET(&icm2, 80);
+		rgmii &= ~CFG_SPEED_1250;
+		break;
+	default:
+		ENET_INTERFACE_MODE2_SET(&mc2, 2);
+		intf_ctl |= ENET_GHD_MODE;
+		CFG_TXCLK_MUXSEL0_SET(&rgmii, 4);
+		xgene_enet_rd_csr(pdata, DEBUG_REG_ADDR, &value);
+		value |= CFG_BYPASS_UNISEC_TX | CFG_BYPASS_UNISEC_RX;
+		xgene_enet_wr_csr(pdata, DEBUG_REG_ADDR, value);
+		break;
+	}
+
+	mc2 |= FULL_DUPLEX2;
+	xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_2_ADDR, mc2);
+	xgene_enet_wr_mcx_mac(pdata, INTERFACE_CONTROL_ADDR, intf_ctl);
+
+	xgene_gmac_set_mac_addr(pdata);
+
+	/* Adjust MDC clock frequency */
+	xgene_enet_rd_mcx_mac(pdata, MII_MGMT_CONFIG_ADDR, &value);
+	MGMT_CLOCK_SEL_SET(&value, 7);
+	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_CONFIG_ADDR, value);
+
+	/* Enable drop if bufpool not available */
+	xgene_enet_rd_csr(pdata, RSIF_CONFIG_REG_ADDR, &value);
+	value |= CFG_RSIF_FPBUFF_TIMEOUT_EN;
+	xgene_enet_wr_csr(pdata, RSIF_CONFIG_REG_ADDR, value);
+
+	/* Rtype should be copied from FP */
+	xgene_enet_wr_csr(pdata, RSIF_RAM_DBG_REG0_ADDR, 0);
+	xgene_enet_wr_csr(pdata, RGMII_REG_0_ADDR, rgmii);
+
+	/* Rx-Tx traffic resume */
+	xgene_enet_wr_csr(pdata, CFG_LINK_AGGR_RESUME_0_ADDR, TX_PORT0);
+
+	xgene_enet_wr_mcx_csr(pdata, ICM_CONFIG0_REG_0_ADDR, icm0);
+	xgene_enet_wr_mcx_csr(pdata, ICM_CONFIG2_REG_0_ADDR, icm2);
+
+	xgene_enet_rd_mcx_csr(pdata, RX_DV_GATE_REG_0_ADDR, &value);
+	value &= ~TX_DV_GATE_EN0;
+	value &= ~RX_DV_GATE_EN0;
+	value |= RESUME_RX0;
+	xgene_enet_wr_mcx_csr(pdata, RX_DV_GATE_REG_0_ADDR, value);
+
+	xgene_enet_wr_csr(pdata, CFG_BYPASS_ADDR, RESUME_TX);
+}
+
+static void xgene_enet_config_ring_if_assoc(struct xgene_enet_pdata *pdata)
+{
+	u32 val = 0xffffffff;
+
+	xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIWQASSOC_ADDR, val);
+	xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIFPQASSOC_ADDR, val);
+	xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIQMLITEWQASSOC_ADDR, val);
+	xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIQMLITEFPQASSOC_ADDR, val);
+}
+
+void xgene_enet_cle_bypass(struct xgene_enet_pdata *pdata,
+			   u32 dst_ring_num, u16 bufpool_id)
+{
+	u32 cb;
+	u32 fpsel;
+
+	fpsel = xgene_enet_ring_bufnum(bufpool_id) - 0x20;
+
+	xgene_enet_rd_csr(pdata, CLE_BYPASS_REG0_0_ADDR, &cb);
+	cb |= CFG_CLE_BYPASS_EN0;
+	CFG_CLE_IP_PROTOCOL0_SET(&cb, 3);
+	xgene_enet_wr_csr(pdata, CLE_BYPASS_REG0_0_ADDR, cb);
+
+	xgene_enet_rd_csr(pdata, CLE_BYPASS_REG1_0_ADDR, &cb);
+	CFG_CLE_DSTQID0_SET(&cb, dst_ring_num);
+	CFG_CLE_FPSEL0_SET(&cb, fpsel);
+	xgene_enet_wr_csr(pdata, CLE_BYPASS_REG1_0_ADDR, cb);
+}
+
+void xgene_gmac_rx_enable(struct xgene_enet_pdata *pdata)
+{
+	u32 data;
+
+	xgene_enet_rd_mcx_mac(pdata, MAC_CONFIG_1_ADDR, &data);
+	xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, data | RX_EN);
+}
+
+void xgene_gmac_tx_enable(struct xgene_enet_pdata *pdata)
+{
+	u32 data;
+
+	xgene_enet_rd_mcx_mac(pdata, MAC_CONFIG_1_ADDR, &data);
+	xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, data | TX_EN);
+}
+
+void xgene_gmac_rx_disable(struct xgene_enet_pdata *pdata)
+{
+	u32 data;
+
+	xgene_enet_rd_mcx_mac(pdata, MAC_CONFIG_1_ADDR, &data);
+	xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, data & ~RX_EN);
+}
+
+void xgene_gmac_tx_disable(struct xgene_enet_pdata *pdata)
+{
+	u32 data;
+
+	xgene_enet_rd_mcx_mac(pdata, MAC_CONFIG_1_ADDR, &data);
+	xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, data & ~TX_EN);
+}
+
+void xgene_enet_reset(struct xgene_enet_pdata *pdata)
+{
+	u32 val;
+
+	clk_prepare_enable(pdata->clk);
+	clk_disable_unprepare(pdata->clk);
+	clk_prepare_enable(pdata->clk);
+	xgene_enet_ecc_init(pdata);
+	xgene_enet_config_ring_if_assoc(pdata);
+
+	/* Enable auto-incr for scanning */
+	xgene_enet_rd_mcx_mac(pdata, MII_MGMT_CONFIG_ADDR, &val);
+	val |= SCAN_AUTO_INCR;
+	MGMT_CLOCK_SEL_SET(&val, 1);
+	xgene_enet_wr_mcx_mac(pdata, MII_MGMT_CONFIG_ADDR, val);
+}
+
+void xgene_gport_shutdown(struct xgene_enet_pdata *pdata)
+{
+	clk_disable_unprepare(pdata->clk);
+}
+
+static int xgene_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+	struct xgene_enet_pdata *pdata = bus->priv;
+	u32 val;
+
+	val = xgene_mii_phy_read(pdata, mii_id, regnum);
+	netdev_dbg(pdata->ndev, "mdio_rd: bus=%d reg=%d val=%x\n",
+		   mii_id, regnum, val);
+
+	return val;
+}
+
+static int xgene_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
+				 u16 val)
+{
+	struct xgene_enet_pdata *pdata = bus->priv;
+
+	netdev_dbg(pdata->ndev, "mdio_wr: bus=%d reg=%d val=%x\n",
+		   mii_id, regnum, val);
+	return xgene_mii_phy_write(pdata, mii_id, regnum, val);
+}
+
+static void xgene_enet_adjust_link(struct net_device *ndev)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct phy_device *phydev = pdata->phy_dev;
+
+	if (phydev->link) {
+		if (pdata->phy_speed != phydev->speed) {
+			xgene_gmac_init(pdata, phydev->speed);
+			xgene_gmac_rx_enable(pdata);
+			xgene_gmac_tx_enable(pdata);
+			pdata->phy_speed = phydev->speed;
+			phy_print_status(phydev);
+		}
+	} else {
+		xgene_gmac_rx_disable(pdata);
+		xgene_gmac_tx_disable(pdata);
+		pdata->phy_speed = SPEED_UNKNOWN;
+		phy_print_status(phydev);
+	}
+}
+
+static int xgene_enet_phy_connect(struct net_device *ndev)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct device_node *phy_np;
+	struct phy_device *phy_dev;
+	struct device *dev = &pdata->pdev->dev;
+
+	phy_np = of_parse_phandle(dev->of_node, "phy-handle", 0);
+	if (!phy_np) {
+		netdev_dbg(ndev, "No phy-handle found\n");
+		return -ENODEV;
+	}
+
+	phy_dev = of_phy_connect(ndev, phy_np, &xgene_enet_adjust_link,
+				 0, pdata->phy_mode);
+	if (!phy_dev) {
+		netdev_err(ndev, "Could not connect to PHY\n");
+		return  -ENODEV;
+	}
+
+	pdata->phy_speed = SPEED_UNKNOWN;
+	phy_dev->supported &= ~SUPPORTED_10baseT_Half &
+			      ~SUPPORTED_100baseT_Half &
+			      ~SUPPORTED_1000baseT_Half;
+	phy_dev->advertising = phy_dev->supported;
+	pdata->phy_dev = phy_dev;
+
+	return 0;
+}
+
+int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata)
+{
+	struct net_device *ndev = pdata->ndev;
+	struct device *dev = &pdata->pdev->dev;
+	struct device_node *child_np;
+	struct device_node *mdio_np = NULL;
+	struct mii_bus *mdio_bus;
+	int ret;
+
+	for_each_child_of_node(dev->of_node, child_np) {
+		if (of_device_is_compatible(child_np, "apm,xgene-mdio")) {
+			mdio_np = child_np;
+			break;
+		}
+	}
+
+	if (!mdio_np) {
+		netdev_dbg(ndev, "No mdio node in the dts\n");
+		return -ENXIO;
+	}
+
+	mdio_bus = mdiobus_alloc();
+	if (!mdio_bus)
+		return -ENOMEM;
+
+	mdio_bus->name = "APM X-Gene MDIO bus";
+	mdio_bus->read = xgene_enet_mdio_read;
+	mdio_bus->write = xgene_enet_mdio_write;
+	snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "%s-%s", "xgene-mii",
+		 ndev->name);
+
+	mdio_bus->priv = pdata;
+	mdio_bus->parent = &ndev->dev;
+
+	ret = of_mdiobus_register(mdio_bus, mdio_np);
+	if (ret) {
+		netdev_err(ndev, "Failed to register MDIO bus\n");
+		mdiobus_free(mdio_bus);
+		return ret;
+	}
+	pdata->mdio_bus = mdio_bus;
+
+	ret = xgene_enet_phy_connect(ndev);
+	if (ret)
+		xgene_enet_mdio_remove(pdata);
+
+	return ret;
+}
+
+void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata)
+{
+	mdiobus_unregister(pdata->mdio_bus);
+	mdiobus_free(pdata->mdio_bus);
+	pdata->mdio_bus = NULL;
+}
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
new file mode 100644
index 0000000..371e7a5
--- /dev/null
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
@@ -0,0 +1,337 @@
+/* Applied Micro X-Gene SoC Ethernet Driver
+ *
+ * Copyright (c) 2014, Applied Micro Circuits Corporation
+ * Authors: Iyappan Subramanian <isubramanian@apm.com>
+ *	    Ravi Patel <rapatel@apm.com>
+ *	    Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __XGENE_ENET_HW_H__
+#define __XGENE_ENET_HW_H__
+
+#include "xgene_enet_main.h"
+
+struct xgene_enet_pdata;
+struct xgene_enet_stats;
+
+/* clears and then set bits */
+static inline void xgene_set_bits(u32 *dst, u32 val, u32 start, u32 len)
+{
+	u32 end = start + len - 1;
+	u32 mask = GENMASK(end, start);
+
+	*dst &= ~mask;
+	*dst |= (val << start) & mask;
+}
+
+static inline u32 xgene_get_bits(u32 val, u32 start, u32 end)
+{
+	return (val & GENMASK(end, start)) >> start;
+}
+
+#define CSR_RING_ID		0x0008
+#define OVERWRITE		BIT(31)
+#define IS_BUFFER_POOL		BIT(20)
+#define PREFETCH_BUF_EN		BIT(21)
+#define CSR_RING_ID_BUF		0x000c
+#define CSR_RING_NE_INT_MODE	0x017c
+#define CSR_RING_CONFIG		0x006c
+#define CSR_RING_WR_BASE	0x0070
+#define NUM_RING_CONFIG		5
+#define BUFPOOL_MODE		3
+#define RM3			3
+#define INC_DEC_CMD_ADDR	0x002c
+#define UDP_HDR_SIZE		2
+#define BUF_LEN_CODE_2K		0x5000
+
+#define CREATE_MASK(pos, len)		GENMASK((pos)+(len)-1, (pos))
+#define CREATE_MASK_ULL(pos, len)	GENMASK_ULL((pos)+(len)-1, (pos))
+
+/* Empty slot soft signature */
+#define EMPTY_SLOT_INDEX	1
+#define EMPTY_SLOT		~0ULL
+
+#define WORK_DESC_SIZE		32
+#define BUFPOOL_DESC_SIZE	16
+
+#define RING_OWNER_MASK		GENMASK(9, 6)
+#define RING_BUFNUM_MASK	GENMASK(5, 0)
+
+#define SELTHRSH_POS		3
+#define SELTHRSH_LEN		3
+#define RINGADDRL_POS		5
+#define RINGADDRL_LEN		27
+#define RINGADDRH_POS		0
+#define RINGADDRH_LEN		6
+#define RINGSIZE_POS		23
+#define RINGSIZE_LEN		3
+#define RINGTYPE_POS		19
+#define RINGTYPE_LEN		2
+#define RINGMODE_POS		20
+#define RINGMODE_LEN		3
+#define RECOMTIMEOUTL_POS	28
+#define RECOMTIMEOUTL_LEN	3
+#define RECOMTIMEOUTH_POS	0
+#define RECOMTIMEOUTH_LEN	2
+#define NUMMSGSINQ_POS		1
+#define NUMMSGSINQ_LEN		16
+#define ACCEPTLERR		BIT(19)
+#define QCOHERENT		BIT(4)
+#define RECOMBBUF		BIT(27)
+
+#define BLOCK_ETH_CSR_OFFSET		0x2000
+#define BLOCK_ETH_RING_IF_OFFSET	0x9000
+#define BLOCK_ETH_CLKRST_CSR_OFFSET	0xC000
+#define BLOCK_ETH_DIAG_CSR_OFFSET	0xD000
+
+#define BLOCK_ETH_MAC_OFFSET		0x0000
+#define BLOCK_ETH_STATS_OFFSET		0x0014
+#define BLOCK_ETH_MAC_CSR_OFFSET	0x2800
+
+#define MAC_ADDR_REG_OFFSET		0x00
+#define MAC_COMMAND_REG_OFFSET		0x04
+#define MAC_WRITE_REG_OFFSET		0x08
+#define MAC_READ_REG_OFFSET		0x0c
+#define MAC_COMMAND_DONE_REG_OFFSET	0x10
+
+#define STAT_ADDR_REG_OFFSET		0x00
+#define STAT_COMMAND_REG_OFFSET		0x04
+#define STAT_WRITE_REG_OFFSET		0x08
+#define STAT_READ_REG_OFFSET		0x0c
+#define STAT_COMMAND_DONE_REG_OFFSET	0x10
+
+#define MII_MGMT_CONFIG_ADDR		0x20
+#define MII_MGMT_COMMAND_ADDR		0x24
+#define MII_MGMT_ADDRESS_ADDR		0x28
+#define MII_MGMT_CONTROL_ADDR		0x2c
+#define MII_MGMT_STATUS_ADDR		0x30
+#define MII_MGMT_INDICATORS_ADDR	0x34
+
+#define BUSY_MASK			BIT(0)
+#define READ_CYCLE_MASK			BIT(0)
+#define PHY_CONTROL_SET(dst, val)	xgene_set_bits(dst, val, 0, 16)
+
+#define ENET_SPARE_CFG_REG_ADDR		0x0750
+#define RSIF_CONFIG_REG_ADDR		0x0010
+#define RSIF_RAM_DBG_REG0_ADDR		0x0048
+#define RGMII_REG_0_ADDR		0x07e0
+#define CFG_LINK_AGGR_RESUME_0_ADDR	0x07c8
+#define DEBUG_REG_ADDR			0x0700
+#define CFG_BYPASS_ADDR			0x0294
+#define CLE_BYPASS_REG0_0_ADDR		0x0490
+#define CLE_BYPASS_REG1_0_ADDR		0x0494
+#define CFG_RSIF_FPBUFF_TIMEOUT_EN	BIT(31)
+#define RESUME_TX			BIT(0)
+#define CFG_SPEED_1250			BIT(24)
+#define TX_PORT0			BIT(0)
+#define CFG_BYPASS_UNISEC_TX		BIT(2)
+#define CFG_BYPASS_UNISEC_RX		BIT(1)
+#define CFG_CLE_BYPASS_EN0		BIT(31)
+#define CFG_TXCLK_MUXSEL0_SET(dst, val)	xgene_set_bits(dst, val, 29, 3)
+
+#define CFG_CLE_IP_PROTOCOL0_SET(dst, val)	xgene_set_bits(dst, val, 16, 2)
+#define CFG_CLE_DSTQID0_SET(dst, val)		xgene_set_bits(dst, val, 0, 12)
+#define CFG_CLE_FPSEL0_SET(dst, val)		xgene_set_bits(dst, val, 16, 4)
+#define CFG_MACMODE_SET(dst, val)		xgene_set_bits(dst, val, 18, 2)
+#define CFG_WAITASYNCRD_SET(dst, val)		xgene_set_bits(dst, val, 0, 16)
+#define ICM_CONFIG0_REG_0_ADDR		0x0400
+#define ICM_CONFIG2_REG_0_ADDR		0x0410
+#define RX_DV_GATE_REG_0_ADDR		0x05fc
+#define TX_DV_GATE_EN0			BIT(2)
+#define RX_DV_GATE_EN0			BIT(1)
+#define RESUME_RX0			BIT(0)
+#define ENET_CFGSSQMIWQASSOC_ADDR		0xe0
+#define ENET_CFGSSQMIFPQASSOC_ADDR		0xdc
+#define ENET_CFGSSQMIQMLITEFPQASSOC_ADDR	0xf0
+#define ENET_CFGSSQMIQMLITEWQASSOC_ADDR		0xf4
+#define ENET_CFG_MEM_RAM_SHUTDOWN_ADDR		0x70
+#define ENET_BLOCK_MEM_RDY_ADDR			0x74
+#define MAC_CONFIG_1_ADDR			0x00
+#define MAC_CONFIG_2_ADDR			0x04
+#define MAX_FRAME_LEN_ADDR			0x10
+#define INTERFACE_CONTROL_ADDR			0x38
+#define STATION_ADDR0_ADDR			0x40
+#define STATION_ADDR1_ADDR			0x44
+#define PHY_ADDR_SET(dst, val)			xgene_set_bits(dst, val, 8, 5)
+#define REG_ADDR_SET(dst, val)			xgene_set_bits(dst, val, 0, 5)
+#define ENET_INTERFACE_MODE2_SET(dst, val)	xgene_set_bits(dst, val, 8, 2)
+#define MGMT_CLOCK_SEL_SET(dst, val)		xgene_set_bits(dst, val, 0, 3)
+#define SOFT_RESET1			BIT(31)
+#define TX_EN				BIT(0)
+#define RX_EN				BIT(2)
+#define ENET_LHD_MODE			BIT(25)
+#define ENET_GHD_MODE			BIT(26)
+#define FULL_DUPLEX2			BIT(0)
+#define SCAN_AUTO_INCR			BIT(5)
+#define TBYT_ADDR			0x38
+#define TPKT_ADDR			0x39
+#define TDRP_ADDR			0x45
+#define TFCS_ADDR			0x47
+#define TUND_ADDR			0x4a
+
+#define TSO_IPPROTO_TCP			1
+#define	FULL_DUPLEX			2
+
+#define USERINFO_POS			0
+#define USERINFO_LEN			32
+#define FPQNUM_POS			32
+#define FPQNUM_LEN			12
+#define LERR_POS			60
+#define LERR_LEN			3
+#define STASH_POS			52
+#define STASH_LEN			2
+#define BUFDATALEN_POS			48
+#define BUFDATALEN_LEN			12
+#define DATAADDR_POS			0
+#define DATAADDR_LEN			42
+#define COHERENT_POS			63
+#define HENQNUM_POS			48
+#define HENQNUM_LEN			12
+#define TYPESEL_POS			44
+#define TYPESEL_LEN			4
+#define ETHHDR_POS			12
+#define ETHHDR_LEN			8
+#define IC_POS				35	/* Insert CRC */
+#define TCPHDR_POS			0
+#define TCPHDR_LEN			6
+#define IPHDR_POS			6
+#define IPHDR_LEN			6
+#define EC_POS				22	/* Enable checksum */
+#define EC_LEN				1
+#define IS_POS				24	/* IP protocol select */
+#define IS_LEN				1
+#define TYPE_ETH_WORK_MESSAGE_POS	44
+
+struct xgene_enet_raw_desc {
+	__le64 m0;
+	__le64 m1;
+	__le64 m2;
+	__le64 m3;
+};
+
+struct xgene_enet_raw_desc16 {
+	__le64 m0;
+	__le64 m1;
+};
+
+static inline void xgene_enet_mark_desc_slot_empty(void *desc_slot_ptr)
+{
+	__le64 *desc_slot = desc_slot_ptr;
+
+	desc_slot[EMPTY_SLOT_INDEX] = cpu_to_le64(EMPTY_SLOT);
+}
+
+static inline bool xgene_enet_is_desc_slot_empty(void *desc_slot_ptr)
+{
+	__le64 *desc_slot = desc_slot_ptr;
+
+	return (desc_slot[EMPTY_SLOT_INDEX] == cpu_to_le64(EMPTY_SLOT));
+}
+
+enum xgene_enet_ring_cfgsize {
+	RING_CFGSIZE_512B,
+	RING_CFGSIZE_2KB,
+	RING_CFGSIZE_16KB,
+	RING_CFGSIZE_64KB,
+	RING_CFGSIZE_512KB,
+	RING_CFGSIZE_INVALID
+};
+
+enum xgene_enet_ring_type {
+	RING_DISABLED,
+	RING_REGULAR,
+	RING_BUFPOOL
+};
+
+enum xgene_ring_owner {
+	RING_OWNER_ETH0,
+	RING_OWNER_CPU = 15,
+	RING_OWNER_INVALID
+};
+
+enum xgene_enet_ring_bufnum {
+	RING_BUFNUM_REGULAR = 0x0,
+	RING_BUFNUM_BUFPOOL = 0x20,
+	RING_BUFNUM_INVALID
+};
+
+enum xgene_enet_cmd {
+	XGENE_ENET_WR_CMD = BIT(31),
+	XGENE_ENET_RD_CMD = BIT(30)
+};
+
+enum xgene_enet_err_code {
+	HBF_READ_DATA = 3,
+	HBF_LL_READ = 4,
+	BAD_WORK_MSG = 6,
+	BUFPOOL_TIMEOUT = 15,
+	INGRESS_CRC = 16,
+	INGRESS_CHECKSUM = 17,
+	INGRESS_TRUNC_FRAME = 18,
+	INGRESS_PKT_LEN = 19,
+	INGRESS_PKT_UNDER = 20,
+	INGRESS_FIFO_OVERRUN = 21,
+	INGRESS_CHECKSUM_COMPUTE = 26,
+	ERR_CODE_INVALID
+};
+
+static inline enum xgene_ring_owner xgene_enet_ring_owner(u16 id)
+{
+	return (id & RING_OWNER_MASK) >> 6;
+}
+
+static inline u8 xgene_enet_ring_bufnum(u16 id)
+{
+	return id & RING_BUFNUM_MASK;
+}
+
+static inline bool xgene_enet_is_bufpool(u16 id)
+{
+	return ((id & RING_BUFNUM_MASK) >= 0x20) ? true : false;
+}
+
+static inline u16 xgene_enet_get_numslots(u16 id, u32 size)
+{
+	bool is_bufpool = xgene_enet_is_bufpool(id);
+
+	return (is_bufpool) ? size / BUFPOOL_DESC_SIZE :
+		      size / WORK_DESC_SIZE;
+}
+
+struct xgene_enet_desc_ring *xgene_enet_setup_ring(
+		struct xgene_enet_desc_ring *ring);
+void xgene_enet_clear_ring(struct xgene_enet_desc_ring *ring);
+void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring,
+			    struct xgene_enet_pdata *pdata,
+			    enum xgene_enet_err_code status);
+
+void xgene_enet_reset(struct xgene_enet_pdata *priv);
+void xgene_gmac_reset(struct xgene_enet_pdata *priv);
+void xgene_gmac_init(struct xgene_enet_pdata *priv, int speed);
+void xgene_gmac_tx_enable(struct xgene_enet_pdata *priv);
+void xgene_gmac_rx_enable(struct xgene_enet_pdata *priv);
+void xgene_gmac_tx_disable(struct xgene_enet_pdata *priv);
+void xgene_gmac_rx_disable(struct xgene_enet_pdata *priv);
+void xgene_gmac_set_mac_addr(struct xgene_enet_pdata *pdata);
+void xgene_enet_cle_bypass(struct xgene_enet_pdata *pdata,
+			   u32 dst_ring_num, u16 bufpool_id);
+void xgene_gport_shutdown(struct xgene_enet_pdata *priv);
+void xgene_gmac_get_tx_stats(struct xgene_enet_pdata *pdata);
+
+int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata);
+void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata);
+
+#endif /* __XGENE_ENET_HW_H__ */
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
new file mode 100644
index 0000000..e1a8f4e
--- /dev/null
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -0,0 +1,954 @@
+/* Applied Micro X-Gene SoC Ethernet Driver
+ *
+ * Copyright (c) 2014, Applied Micro Circuits Corporation
+ * Authors: Iyappan Subramanian <isubramanian@apm.com>
+ *	    Ravi Patel <rapatel@apm.com>
+ *	    Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "xgene_enet_main.h"
+#include "xgene_enet_hw.h"
+
+static void xgene_enet_init_bufpool(struct xgene_enet_desc_ring *buf_pool)
+{
+	struct xgene_enet_raw_desc16 *raw_desc;
+	int i;
+
+	for (i = 0; i < buf_pool->slots; i++) {
+		raw_desc = &buf_pool->raw_desc16[i];
+
+		/* Hardware expects descriptor in little endian format */
+		raw_desc->m0 = cpu_to_le64(i |
+				SET_VAL(FPQNUM, buf_pool->dst_ring_num) |
+				SET_VAL(STASH, 3));
+	}
+}
+
+static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool,
+				     u32 nbuf)
+{
+	struct sk_buff *skb;
+	struct xgene_enet_raw_desc16 *raw_desc;
+	struct net_device *ndev;
+	struct device *dev;
+	dma_addr_t dma_addr;
+	u32 tail = buf_pool->tail;
+	u32 slots = buf_pool->slots - 1;
+	u16 bufdatalen, len;
+	int i;
+
+	ndev = buf_pool->ndev;
+	dev = ndev_to_dev(buf_pool->ndev);
+	bufdatalen = BUF_LEN_CODE_2K | (SKB_BUFFER_SIZE & GENMASK(11, 0));
+	len = XGENE_ENET_MAX_MTU;
+
+	for (i = 0; i < nbuf; i++) {
+		raw_desc = &buf_pool->raw_desc16[tail];
+
+		skb = netdev_alloc_skb_ip_align(ndev, len);
+		if (unlikely(!skb))
+			return -ENOMEM;
+		buf_pool->rx_skb[tail] = skb;
+
+		dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE);
+		if (dma_mapping_error(dev, dma_addr)) {
+			netdev_err(ndev, "DMA mapping error\n");
+			dev_kfree_skb_any(skb);
+			return -EINVAL;
+		}
+
+		raw_desc->m1 = cpu_to_le64(SET_VAL(DATAADDR, dma_addr) |
+					   SET_VAL(BUFDATALEN, bufdatalen) |
+					   SET_BIT(COHERENT));
+		tail = (tail + 1) & slots;
+	}
+
+	iowrite32(nbuf, buf_pool->cmd);
+	buf_pool->tail = tail;
+
+	return 0;
+}
+
+static u16 xgene_enet_dst_ring_num(struct xgene_enet_desc_ring *ring)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev);
+
+	return ((u16)pdata->rm << 10) | ring->num;
+}
+
+static u8 xgene_enet_hdr_len(const void *data)
+{
+	const struct ethhdr *eth = data;
+
+	return (eth->h_proto == htons(ETH_P_8021Q)) ? VLAN_ETH_HLEN : ETH_HLEN;
+}
+
+static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring)
+{
+	u32 __iomem *cmd_base = ring->cmd_base;
+	u32 ring_state, num_msgs;
+
+	ring_state = ioread32(&cmd_base[1]);
+	num_msgs = ring_state & CREATE_MASK(NUMMSGSINQ_POS, NUMMSGSINQ_LEN);
+
+	return num_msgs >> NUMMSGSINQ_POS;
+}
+
+static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool)
+{
+	struct xgene_enet_raw_desc16 *raw_desc;
+	u32 slots = buf_pool->slots - 1;
+	u32 tail = buf_pool->tail;
+	u32 userinfo;
+	int i, len;
+
+	len = xgene_enet_ring_len(buf_pool);
+	for (i = 0; i < len; i++) {
+		tail = (tail - 1) & slots;
+		raw_desc = &buf_pool->raw_desc16[tail];
+
+		/* Hardware stores descriptor in little endian format */
+		userinfo = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0));
+		dev_kfree_skb_any(buf_pool->rx_skb[userinfo]);
+	}
+
+	iowrite32(-len, buf_pool->cmd);
+	buf_pool->tail = tail;
+}
+
+static irqreturn_t xgene_enet_rx_irq(const int irq, void *data)
+{
+	struct xgene_enet_desc_ring *rx_ring = data;
+
+	if (napi_schedule_prep(&rx_ring->napi)) {
+		disable_irq_nosync(irq);
+		__napi_schedule(&rx_ring->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring,
+				    struct xgene_enet_raw_desc *raw_desc)
+{
+	struct sk_buff *skb;
+	struct device *dev;
+	u16 skb_index;
+	u8 status;
+	int ret = 0;
+
+	skb_index = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0));
+	skb = cp_ring->cp_skb[skb_index];
+
+	dev = ndev_to_dev(cp_ring->ndev);
+	dma_unmap_single(dev, GET_VAL(DATAADDR, le64_to_cpu(raw_desc->m1)),
+			 GET_VAL(BUFDATALEN, le64_to_cpu(raw_desc->m1)),
+			 DMA_TO_DEVICE);
+
+	/* Checking for error */
+	status = GET_VAL(LERR, le64_to_cpu(raw_desc->m0));
+	if (unlikely(status > 2)) {
+		xgene_enet_parse_error(cp_ring, netdev_priv(cp_ring->ndev),
+				       status);
+		ret = -EIO;
+	}
+
+	if (likely(skb)) {
+		dev_kfree_skb_any(skb);
+	} else {
+		netdev_err(cp_ring->ndev, "completion skb is NULL\n");
+		ret = -EIO;
+	}
+
+	return ret;
+}
+
+static u64 xgene_enet_work_msg(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	u8 l3hlen, l4hlen = 0;
+	u8 csum_enable = 0;
+	u8 proto = 0;
+	u8 ethhdr;
+	u64 hopinfo;
+
+	if (unlikely(skb->protocol != htons(ETH_P_IP)) &&
+	    unlikely(skb->protocol != htons(ETH_P_8021Q)))
+		goto out;
+
+	if (unlikely(!(skb->dev->features & NETIF_F_IP_CSUM)))
+		goto out;
+
+	iph = ip_hdr(skb);
+	if (unlikely(ip_is_fragment(iph)))
+		goto out;
+
+	if (likely(iph->protocol == IPPROTO_TCP)) {
+		l4hlen = tcp_hdrlen(skb) >> 2;
+		csum_enable = 1;
+		proto = TSO_IPPROTO_TCP;
+	} else if (iph->protocol == IPPROTO_UDP) {
+		l4hlen = UDP_HDR_SIZE;
+		csum_enable = 1;
+	}
+out:
+	l3hlen = ip_hdrlen(skb) >> 2;
+	ethhdr = xgene_enet_hdr_len(skb->data);
+	hopinfo = SET_VAL(TCPHDR, l4hlen) |
+		  SET_VAL(IPHDR, l3hlen) |
+		  SET_VAL(ETHHDR, ethhdr) |
+		  SET_VAL(EC, csum_enable) |
+		  SET_VAL(IS, proto) |
+		  SET_BIT(IC) |
+		  SET_BIT(TYPE_ETH_WORK_MESSAGE);
+
+	return hopinfo;
+}
+
+static int xgene_enet_setup_tx_desc(struct xgene_enet_desc_ring *tx_ring,
+				    struct sk_buff *skb)
+{
+	struct device *dev = ndev_to_dev(tx_ring->ndev);
+	struct xgene_enet_raw_desc *raw_desc;
+	dma_addr_t dma_addr;
+	u16 tail = tx_ring->tail;
+	u64 hopinfo;
+
+	raw_desc = &tx_ring->raw_desc[tail];
+	memset(raw_desc, 0, sizeof(struct xgene_enet_raw_desc));
+
+	dma_addr = dma_map_single(dev, skb->data, skb->len, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_addr)) {
+		netdev_err(tx_ring->ndev, "DMA mapping error\n");
+		return -EINVAL;
+	}
+
+	/* Hardware expects descriptor in little endian format */
+	raw_desc->m0 = cpu_to_le64(tail);
+	raw_desc->m1 = cpu_to_le64(SET_VAL(DATAADDR, dma_addr) |
+				   SET_VAL(BUFDATALEN, skb->len) |
+				   SET_BIT(COHERENT));
+	hopinfo = xgene_enet_work_msg(skb);
+	raw_desc->m3 = cpu_to_le64(SET_VAL(HENQNUM, tx_ring->dst_ring_num) |
+				   hopinfo);
+	tx_ring->cp_ring->cp_skb[tail] = skb;
+
+	return 0;
+}
+
+static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb,
+					 struct net_device *ndev)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct xgene_enet_desc_ring *tx_ring = pdata->tx_ring;
+	struct xgene_enet_desc_ring *cp_ring = tx_ring->cp_ring;
+	u32 tx_level, cq_level;
+
+	tx_level = xgene_enet_ring_len(tx_ring);
+	cq_level = xgene_enet_ring_len(cp_ring);
+	if (unlikely(tx_level > pdata->tx_qcnt_hi ||
+		     cq_level > pdata->cp_qcnt_hi)) {
+		netif_stop_queue(ndev);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (xgene_enet_setup_tx_desc(tx_ring, skb)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	iowrite32(1, tx_ring->cmd);
+	skb_tx_timestamp(skb);
+	tx_ring->tail = (tx_ring->tail + 1) & (tx_ring->slots - 1);
+
+	pdata->stats.tx_packets++;
+	pdata->stats.tx_bytes += skb->len;
+
+	return NETDEV_TX_OK;
+}
+
+static void xgene_enet_skip_csum(struct sk_buff *skb)
+{
+	struct iphdr *iph = ip_hdr(skb);
+
+	if (!ip_is_fragment(iph) ||
+	    (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+}
+
+static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
+			       struct xgene_enet_raw_desc *raw_desc)
+{
+	struct net_device *ndev;
+	struct xgene_enet_pdata *pdata;
+	struct device *dev;
+	struct xgene_enet_desc_ring *buf_pool;
+	u32 datalen, skb_index;
+	struct sk_buff *skb;
+	u8 status;
+	int ret = 0;
+
+	ndev = rx_ring->ndev;
+	pdata = netdev_priv(ndev);
+	dev = ndev_to_dev(rx_ring->ndev);
+	buf_pool = rx_ring->buf_pool;
+
+	dma_unmap_single(dev, GET_VAL(DATAADDR, le64_to_cpu(raw_desc->m1)),
+			 XGENE_ENET_MAX_MTU, DMA_FROM_DEVICE);
+	skb_index = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0));
+	skb = buf_pool->rx_skb[skb_index];
+
+	/* checking for error */
+	status = GET_VAL(LERR, le64_to_cpu(raw_desc->m0));
+	if (unlikely(status > 2)) {
+		dev_kfree_skb_any(skb);
+		xgene_enet_parse_error(rx_ring, netdev_priv(rx_ring->ndev),
+				       status);
+		pdata->stats.rx_dropped++;
+		ret = -EIO;
+		goto out;
+	}
+
+	/* strip off CRC as HW isn't doing this */
+	datalen = GET_VAL(BUFDATALEN, le64_to_cpu(raw_desc->m1));
+	datalen -= 4;
+	prefetch(skb->data - NET_IP_ALIGN);
+	skb_put(skb, datalen);
+
+	skb_checksum_none_assert(skb);
+	skb->protocol = eth_type_trans(skb, ndev);
+	if (likely((ndev->features & NETIF_F_IP_CSUM) &&
+		   skb->protocol == htons(ETH_P_IP))) {
+		xgene_enet_skip_csum(skb);
+	}
+
+	pdata->stats.rx_packets++;
+	pdata->stats.rx_bytes += datalen;
+	napi_gro_receive(&rx_ring->napi, skb);
+out:
+	if (--rx_ring->nbufpool == 0) {
+		ret = xgene_enet_refill_bufpool(buf_pool, NUM_BUFPOOL);
+		rx_ring->nbufpool = NUM_BUFPOOL;
+	}
+
+	return ret;
+}
+
+static bool is_rx_desc(struct xgene_enet_raw_desc *raw_desc)
+{
+	return GET_VAL(FPQNUM, le64_to_cpu(raw_desc->m0)) ? true : false;
+}
+
+static int xgene_enet_process_ring(struct xgene_enet_desc_ring *ring,
+				   int budget)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev);
+	struct xgene_enet_raw_desc *raw_desc;
+	u16 head = ring->head;
+	u16 slots = ring->slots - 1;
+	int ret, count = 0;
+
+	do {
+		raw_desc = &ring->raw_desc[head];
+		if (unlikely(xgene_enet_is_desc_slot_empty(raw_desc)))
+			break;
+
+		if (is_rx_desc(raw_desc))
+			ret = xgene_enet_rx_frame(ring, raw_desc);
+		else
+			ret = xgene_enet_tx_completion(ring, raw_desc);
+		xgene_enet_mark_desc_slot_empty(raw_desc);
+
+		head = (head + 1) & slots;
+		count++;
+
+		if (ret)
+			break;
+	} while (--budget);
+
+	if (likely(count)) {
+		iowrite32(-count, ring->cmd);
+		ring->head = head;
+
+		if (netif_queue_stopped(ring->ndev)) {
+			if (xgene_enet_ring_len(ring) < pdata->cp_qcnt_low)
+				netif_wake_queue(ring->ndev);
+		}
+	}
+
+	return budget;
+}
+
+static int xgene_enet_napi(struct napi_struct *napi, const int budget)
+{
+	struct xgene_enet_desc_ring *ring;
+	int processed;
+
+	ring = container_of(napi, struct xgene_enet_desc_ring, napi);
+	processed = xgene_enet_process_ring(ring, budget);
+
+	if (processed != budget) {
+		napi_complete(napi);
+		enable_irq(ring->irq);
+	}
+
+	return processed;
+}
+
+static void xgene_enet_timeout(struct net_device *ndev)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+
+	xgene_gmac_reset(pdata);
+}
+
+static int xgene_enet_register_irq(struct net_device *ndev)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct device *dev = ndev_to_dev(ndev);
+	int ret;
+
+	ret = devm_request_irq(dev, pdata->rx_ring->irq, xgene_enet_rx_irq,
+			       IRQF_SHARED, ndev->name, pdata->rx_ring);
+	if (ret) {
+		netdev_err(ndev, "rx%d interrupt request failed\n",
+			   pdata->rx_ring->irq);
+	}
+
+	return ret;
+}
+
+static void xgene_enet_free_irq(struct net_device *ndev)
+{
+	struct xgene_enet_pdata *pdata;
+	struct device *dev;
+
+	pdata = netdev_priv(ndev);
+	dev = ndev_to_dev(ndev);
+	devm_free_irq(dev, pdata->rx_ring->irq, pdata->rx_ring);
+}
+
+static int xgene_enet_open(struct net_device *ndev)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	int ret;
+
+	xgene_gmac_tx_enable(pdata);
+	xgene_gmac_rx_enable(pdata);
+
+	ret = xgene_enet_register_irq(ndev);
+	if (ret)
+		return ret;
+	napi_enable(&pdata->rx_ring->napi);
+
+	if (pdata->phy_dev)
+		phy_start(pdata->phy_dev);
+
+	netif_start_queue(ndev);
+
+	return ret;
+}
+
+static int xgene_enet_close(struct net_device *ndev)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+
+	netif_stop_queue(ndev);
+
+	if (pdata->phy_dev)
+		phy_stop(pdata->phy_dev);
+
+	napi_disable(&pdata->rx_ring->napi);
+	xgene_enet_free_irq(ndev);
+	xgene_enet_process_ring(pdata->rx_ring, -1);
+
+	xgene_gmac_tx_disable(pdata);
+	xgene_gmac_rx_disable(pdata);
+
+	return 0;
+}
+
+static void xgene_enet_delete_ring(struct xgene_enet_desc_ring *ring)
+{
+	struct xgene_enet_pdata *pdata;
+	struct device *dev;
+
+	pdata = netdev_priv(ring->ndev);
+	dev = ndev_to_dev(ring->ndev);
+
+	xgene_enet_clear_ring(ring);
+	dma_free_coherent(dev, ring->size, ring->desc_addr, ring->dma);
+}
+
+static void xgene_enet_delete_desc_rings(struct xgene_enet_pdata *pdata)
+{
+	struct xgene_enet_desc_ring *buf_pool;
+
+	if (pdata->tx_ring) {
+		xgene_enet_delete_ring(pdata->tx_ring);
+		pdata->tx_ring = NULL;
+	}
+
+	if (pdata->rx_ring) {
+		buf_pool = pdata->rx_ring->buf_pool;
+		xgene_enet_delete_bufpool(buf_pool);
+		xgene_enet_delete_ring(buf_pool);
+		xgene_enet_delete_ring(pdata->rx_ring);
+		pdata->rx_ring = NULL;
+	}
+}
+
+static int xgene_enet_get_ring_size(struct device *dev,
+				    enum xgene_enet_ring_cfgsize cfgsize)
+{
+	int size = -EINVAL;
+
+	switch (cfgsize) {
+	case RING_CFGSIZE_512B:
+		size = 0x200;
+		break;
+	case RING_CFGSIZE_2KB:
+		size = 0x800;
+		break;
+	case RING_CFGSIZE_16KB:
+		size = 0x4000;
+		break;
+	case RING_CFGSIZE_64KB:
+		size = 0x10000;
+		break;
+	case RING_CFGSIZE_512KB:
+		size = 0x80000;
+		break;
+	default:
+		dev_err(dev, "Unsupported cfg ring size %d\n", cfgsize);
+		break;
+	}
+
+	return size;
+}
+
+static void xgene_enet_free_desc_ring(struct xgene_enet_desc_ring *ring)
+{
+	struct device *dev;
+
+	if (!ring)
+		return;
+
+	dev = ndev_to_dev(ring->ndev);
+
+	if (ring->desc_addr) {
+		xgene_enet_clear_ring(ring);
+		dma_free_coherent(dev, ring->size, ring->desc_addr, ring->dma);
+	}
+	devm_kfree(dev, ring);
+}
+
+static void xgene_enet_free_desc_rings(struct xgene_enet_pdata *pdata)
+{
+	struct device *dev = &pdata->pdev->dev;
+	struct xgene_enet_desc_ring *ring;
+
+	ring = pdata->tx_ring;
+	if (ring && ring->cp_ring && ring->cp_ring->cp_skb)
+		devm_kfree(dev, ring->cp_ring->cp_skb);
+	xgene_enet_free_desc_ring(ring);
+
+	ring = pdata->rx_ring;
+	if (ring && ring->buf_pool && ring->buf_pool->rx_skb)
+		devm_kfree(dev, ring->buf_pool->rx_skb);
+	xgene_enet_free_desc_ring(ring->buf_pool);
+	xgene_enet_free_desc_ring(ring);
+}
+
+static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
+			struct net_device *ndev, u32 ring_num,
+			enum xgene_enet_ring_cfgsize cfgsize, u32 ring_id)
+{
+	struct xgene_enet_desc_ring *ring;
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct device *dev = ndev_to_dev(ndev);
+	int size;
+
+	size = xgene_enet_get_ring_size(dev, cfgsize);
+	if (size < 0)
+		return NULL;
+
+	ring = devm_kzalloc(dev, sizeof(struct xgene_enet_desc_ring),
+			    GFP_KERNEL);
+	if (!ring)
+		return NULL;
+
+	ring->ndev = ndev;
+	ring->num = ring_num;
+	ring->cfgsize = cfgsize;
+	ring->id = ring_id;
+
+	ring->desc_addr = dma_zalloc_coherent(dev, size, &ring->dma,
+					      GFP_KERNEL);
+	if (!ring->desc_addr) {
+		devm_kfree(dev, ring);
+		return NULL;
+	}
+	ring->size = size;
+
+	ring->cmd_base = pdata->ring_cmd_addr + (ring->num << 6);
+	ring->cmd = ring->cmd_base + INC_DEC_CMD_ADDR;
+	pdata->rm = RM3;
+	ring = xgene_enet_setup_ring(ring);
+	netdev_dbg(ndev, "ring info: num=%d  size=%d  id=%d  slots=%d\n",
+		   ring->num, ring->size, ring->id, ring->slots);
+
+	return ring;
+}
+
+static u16 xgene_enet_get_ring_id(enum xgene_ring_owner owner, u8 bufnum)
+{
+	return (owner << 6) | (bufnum & GENMASK(5, 0));
+}
+
+static int xgene_enet_create_desc_rings(struct net_device *ndev)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct device *dev = ndev_to_dev(ndev);
+	struct xgene_enet_desc_ring *rx_ring, *tx_ring, *cp_ring;
+	struct xgene_enet_desc_ring *buf_pool = NULL;
+	u8 cpu_bufnum = 0, eth_bufnum = 0;
+	u8 bp_bufnum = 0x20;
+	u16 ring_id, ring_num = 0;
+	int ret;
+
+	/* allocate rx descriptor ring */
+	ring_id = xgene_enet_get_ring_id(RING_OWNER_CPU, cpu_bufnum++);
+	rx_ring = xgene_enet_create_desc_ring(ndev, ring_num++,
+					      RING_CFGSIZE_16KB, ring_id);
+	if (!rx_ring) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* allocate buffer pool for receiving packets */
+	ring_id = xgene_enet_get_ring_id(RING_OWNER_ETH0, bp_bufnum++);
+	buf_pool = xgene_enet_create_desc_ring(ndev, ring_num++,
+					       RING_CFGSIZE_2KB, ring_id);
+	if (!buf_pool) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	rx_ring->nbufpool = NUM_BUFPOOL;
+	rx_ring->buf_pool = buf_pool;
+	rx_ring->irq = pdata->rx_irq;
+	buf_pool->rx_skb = devm_kcalloc(dev, buf_pool->slots,
+					sizeof(struct sk_buff *), GFP_KERNEL);
+	if (!buf_pool->rx_skb) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	buf_pool->dst_ring_num = xgene_enet_dst_ring_num(buf_pool);
+	rx_ring->buf_pool = buf_pool;
+	pdata->rx_ring = rx_ring;
+
+	/* allocate tx descriptor ring */
+	ring_id = xgene_enet_get_ring_id(RING_OWNER_ETH0, eth_bufnum++);
+	tx_ring = xgene_enet_create_desc_ring(ndev, ring_num++,
+					      RING_CFGSIZE_16KB, ring_id);
+	if (!tx_ring) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	pdata->tx_ring = tx_ring;
+
+	cp_ring = pdata->rx_ring;
+	cp_ring->cp_skb = devm_kcalloc(dev, tx_ring->slots,
+				       sizeof(struct sk_buff *), GFP_KERNEL);
+	if (!cp_ring->cp_skb) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	pdata->tx_ring->cp_ring = cp_ring;
+	pdata->tx_ring->dst_ring_num = xgene_enet_dst_ring_num(cp_ring);
+
+	pdata->tx_qcnt_hi = pdata->tx_ring->slots / 2;
+	pdata->cp_qcnt_hi = pdata->rx_ring->slots / 2;
+	pdata->cp_qcnt_low = pdata->cp_qcnt_hi / 2;
+
+	return 0;
+
+err:
+	xgene_enet_free_desc_rings(pdata);
+	return ret;
+}
+
+static struct rtnl_link_stats64 *xgene_enet_get_stats64(
+			struct net_device *ndev,
+			struct rtnl_link_stats64 *storage)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct rtnl_link_stats64 *stats = &pdata->stats;
+
+	stats->rx_errors += stats->rx_length_errors +
+			    stats->rx_crc_errors +
+			    stats->rx_frame_errors +
+			    stats->rx_fifo_errors;
+	memcpy(storage, &pdata->stats, sizeof(struct rtnl_link_stats64));
+
+	return storage;
+}
+
+static int xgene_enet_set_mac_address(struct net_device *ndev, void *addr)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	int ret;
+
+	ret = eth_mac_addr(ndev, addr);
+	if (ret)
+		return ret;
+	xgene_gmac_set_mac_addr(pdata);
+
+	return ret;
+}
+
+static const struct net_device_ops xgene_ndev_ops = {
+	.ndo_open = xgene_enet_open,
+	.ndo_stop = xgene_enet_close,
+	.ndo_start_xmit = xgene_enet_start_xmit,
+	.ndo_tx_timeout = xgene_enet_timeout,
+	.ndo_get_stats64 = xgene_enet_get_stats64,
+	.ndo_change_mtu = eth_change_mtu,
+	.ndo_set_mac_address = xgene_enet_set_mac_address,
+};
+
+static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
+{
+	struct platform_device *pdev;
+	struct net_device *ndev;
+	struct device *dev;
+	struct resource *res;
+	void __iomem *base_addr;
+	const char *mac;
+	int ret;
+
+	pdev = pdata->pdev;
+	dev = &pdev->dev;
+	ndev = pdata->ndev;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "enet_csr");
+	if (!res) {
+		dev_err(dev, "Resource enet_csr not defined\n");
+		return -ENODEV;
+	}
+	pdata->base_addr = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pdata->base_addr)) {
+		dev_err(dev, "Unable to retrieve ENET Port CSR region\n");
+		return PTR_ERR(pdata->base_addr);
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ring_csr");
+	if (!res) {
+		dev_err(dev, "Resource ring_csr not defined\n");
+		return -ENODEV;
+	}
+	pdata->ring_csr_addr = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pdata->ring_csr_addr)) {
+		dev_err(dev, "Unable to retrieve ENET Ring CSR region\n");
+		return PTR_ERR(pdata->ring_csr_addr);
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ring_cmd");
+	if (!res) {
+		dev_err(dev, "Resource ring_cmd not defined\n");
+		return -ENODEV;
+	}
+	pdata->ring_cmd_addr = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pdata->ring_cmd_addr)) {
+		dev_err(dev, "Unable to retrieve ENET Ring command region\n");
+		return PTR_ERR(pdata->ring_cmd_addr);
+	}
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret <= 0) {
+		dev_err(dev, "Unable to get ENET Rx IRQ\n");
+		ret = ret ? : -ENXIO;
+		return ret;
+	}
+	pdata->rx_irq = ret;
+
+	mac = of_get_mac_address(dev->of_node);
+	if (mac)
+		memcpy(ndev->dev_addr, mac, ndev->addr_len);
+	else
+		eth_hw_addr_random(ndev);
+	memcpy(ndev->perm_addr, ndev->dev_addr, ndev->addr_len);
+
+	pdata->phy_mode = of_get_phy_mode(pdev->dev.of_node);
+	if (pdata->phy_mode < 0) {
+		dev_err(dev, "Incorrect phy-connection-type in DTS\n");
+		return -EINVAL;
+	}
+
+	pdata->clk = devm_clk_get(&pdev->dev, NULL);
+	ret = IS_ERR(pdata->clk);
+	if (IS_ERR(pdata->clk)) {
+		dev_err(&pdev->dev, "can't get clock\n");
+		ret = PTR_ERR(pdata->clk);
+		return ret;
+	}
+
+	base_addr = pdata->base_addr;
+	pdata->eth_csr_addr = base_addr + BLOCK_ETH_CSR_OFFSET;
+	pdata->eth_ring_if_addr = base_addr + BLOCK_ETH_RING_IF_OFFSET;
+	pdata->eth_diag_csr_addr = base_addr + BLOCK_ETH_DIAG_CSR_OFFSET;
+	pdata->mcx_mac_addr = base_addr + BLOCK_ETH_MAC_OFFSET;
+	pdata->mcx_stats_addr = base_addr + BLOCK_ETH_STATS_OFFSET;
+	pdata->mcx_mac_csr_addr = base_addr + BLOCK_ETH_MAC_CSR_OFFSET;
+	pdata->rx_buff_cnt = NUM_PKT_BUF;
+
+	return ret;
+}
+
+static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata)
+{
+	struct net_device *ndev = pdata->ndev;
+	struct xgene_enet_desc_ring *buf_pool;
+	u16 dst_ring_num;
+	int ret;
+
+	xgene_gmac_tx_disable(pdata);
+	xgene_gmac_rx_disable(pdata);
+
+	ret = xgene_enet_create_desc_rings(ndev);
+	if (ret) {
+		netdev_err(ndev, "Error in ring configuration\n");
+		return ret;
+	}
+
+	/* setup buffer pool */
+	buf_pool = pdata->rx_ring->buf_pool;
+	xgene_enet_init_bufpool(buf_pool);
+	ret = xgene_enet_refill_bufpool(buf_pool, pdata->rx_buff_cnt);
+	if (ret) {
+		xgene_enet_delete_desc_rings(pdata);
+		return ret;
+	}
+
+	dst_ring_num = xgene_enet_dst_ring_num(pdata->rx_ring);
+	xgene_enet_cle_bypass(pdata, dst_ring_num, buf_pool->id);
+
+	return ret;
+}
+
+static int xgene_enet_probe(struct platform_device *pdev)
+{
+	struct net_device *ndev;
+	struct xgene_enet_pdata *pdata;
+	struct device *dev = &pdev->dev;
+	struct napi_struct *napi;
+	int ret;
+
+	ndev = alloc_etherdev(sizeof(struct xgene_enet_pdata));
+	if (!ndev)
+		return -ENOMEM;
+
+	pdata = netdev_priv(ndev);
+
+	pdata->pdev = pdev;
+	pdata->ndev = ndev;
+	SET_NETDEV_DEV(ndev, dev);
+	platform_set_drvdata(pdev, pdata);
+	ndev->netdev_ops = &xgene_ndev_ops;
+	xgene_enet_set_ethtool_ops(ndev);
+	ndev->features |= NETIF_F_IP_CSUM |
+			  NETIF_F_GSO |
+			  NETIF_F_GRO;
+
+	ret = xgene_enet_get_resources(pdata);
+	if (ret)
+		goto err;
+
+	xgene_enet_reset(pdata);
+	xgene_gmac_init(pdata, SPEED_1000);
+
+	ret = register_netdev(ndev);
+	if (ret) {
+		netdev_err(ndev, "Failed to register netdev\n");
+		goto err;
+	}
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (ret) {
+		netdev_err(ndev, "No usable DMA configuration\n");
+		goto err;
+	}
+
+	ret = xgene_enet_init_hw(pdata);
+	if (ret)
+		goto err;
+
+	napi = &pdata->rx_ring->napi;
+	netif_napi_add(ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT);
+	ret = xgene_enet_mdio_config(pdata);
+
+	return ret;
+err:
+	free_netdev(ndev);
+	return ret;
+}
+
+static int xgene_enet_remove(struct platform_device *pdev)
+{
+	struct xgene_enet_pdata *pdata;
+	struct net_device *ndev;
+
+	pdata = platform_get_drvdata(pdev);
+	ndev = pdata->ndev;
+
+	xgene_gmac_rx_disable(pdata);
+	xgene_gmac_tx_disable(pdata);
+
+	netif_napi_del(&pdata->rx_ring->napi);
+	xgene_enet_mdio_remove(pdata);
+	xgene_enet_delete_desc_rings(pdata);
+	unregister_netdev(ndev);
+	xgene_gport_shutdown(pdata);
+	free_netdev(ndev);
+
+	return 0;
+}
+
+static struct of_device_id xgene_enet_match[] = {
+	{.compatible = "apm,xgene-enet",},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, xgene_enet_match);
+
+static struct platform_driver xgene_enet_driver = {
+	.driver = {
+		   .name = "xgene-enet",
+		   .of_match_table = xgene_enet_match,
+	},
+	.probe = xgene_enet_probe,
+	.remove = xgene_enet_remove,
+};
+
+module_platform_driver(xgene_enet_driver);
+
+MODULE_DESCRIPTION("APM X-Gene SoC Ethernet driver");
+MODULE_VERSION(XGENE_DRV_VERSION);
+MODULE_AUTHOR("Keyur Chudgar <kchudgar@apm.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
new file mode 100644
index 0000000..0815866
--- /dev/null
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
@@ -0,0 +1,135 @@
+/* Applied Micro X-Gene SoC Ethernet Driver
+ *
+ * Copyright (c) 2014, Applied Micro Circuits Corporation
+ * Authors: Iyappan Subramanian <isubramanian@apm.com>
+ *	    Ravi Patel <rapatel@apm.com>
+ *	    Keyur Chudgar <kchudgar@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __XGENE_ENET_MAIN_H__
+#define __XGENE_ENET_MAIN_H__
+
+#include <linux/clk.h>
+#include <linux/of_platform.h>
+#include <linux/of_net.h>
+#include <linux/of_mdio.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <linux/prefetch.h>
+#include <linux/if_vlan.h>
+#include <linux/phy.h>
+#include "xgene_enet_hw.h"
+
+#define XGENE_DRV_VERSION	"v1.0"
+#define XGENE_ENET_MAX_MTU	1536
+#define SKB_BUFFER_SIZE		(XGENE_ENET_MAX_MTU - NET_IP_ALIGN)
+#define NUM_PKT_BUF	64
+#define NUM_BUFPOOL	32
+
+/* software context of a descriptor ring */
+struct xgene_enet_desc_ring {
+	struct net_device *ndev;
+	u16 id;
+	u16 num;
+	u16 head;
+	u16 tail;
+	u16 slots;
+	u16 irq;
+	u32 size;
+	u32 state[NUM_RING_CONFIG];
+	void __iomem *cmd_base;
+	void __iomem *cmd;
+	dma_addr_t dma;
+	u16 dst_ring_num;
+	u8 nbufpool;
+	struct sk_buff *(*rx_skb);
+	struct sk_buff *(*cp_skb);
+	enum xgene_enet_ring_cfgsize cfgsize;
+	struct xgene_enet_desc_ring *cp_ring;
+	struct xgene_enet_desc_ring *buf_pool;
+	struct napi_struct napi;
+	union {
+		void *desc_addr;
+		struct xgene_enet_raw_desc *raw_desc;
+		struct xgene_enet_raw_desc16 *raw_desc16;
+	};
+};
+
+/* ethernet private data */
+struct xgene_enet_pdata {
+	struct net_device *ndev;
+	struct mii_bus *mdio_bus;
+	struct phy_device *phy_dev;
+	int phy_speed;
+	struct clk *clk;
+	struct platform_device *pdev;
+	struct xgene_enet_desc_ring *tx_ring;
+	struct xgene_enet_desc_ring *rx_ring;
+	char *dev_name;
+	u32 rx_buff_cnt;
+	u32 tx_qcnt_hi;
+	u32 cp_qcnt_hi;
+	u32 cp_qcnt_low;
+	u32 rx_irq;
+	void __iomem *eth_csr_addr;
+	void __iomem *eth_ring_if_addr;
+	void __iomem *eth_diag_csr_addr;
+	void __iomem *mcx_mac_addr;
+	void __iomem *mcx_stats_addr;
+	void __iomem *mcx_mac_csr_addr;
+	void __iomem *base_addr;
+	void __iomem *ring_csr_addr;
+	void __iomem *ring_cmd_addr;
+	u32 phy_addr;
+	int phy_mode;
+	u32 speed;
+	u16 rm;
+	struct rtnl_link_stats64 stats;
+};
+
+/* Set the specified value into a bit-field defined by its starting position
+ * and length within a single u64.
+ */
+static inline u64 xgene_enet_set_field_value(int pos, int len, u64 val)
+{
+	return (val & ((1ULL << len) - 1)) << pos;
+}
+
+#define SET_VAL(field, val) \
+		xgene_enet_set_field_value(field ## _POS, field ## _LEN, val)
+
+#define SET_BIT(field) \
+		xgene_enet_set_field_value(field ## _POS, 1, 1)
+
+/* Get the value from a bit-field defined by its starting position
+ * and length within the specified u64.
+ */
+static inline u64 xgene_enet_get_field_value(int pos, int len, u64 src)
+{
+	return (src >> pos) & ((1ULL << len) - 1);
+}
+
+#define GET_VAL(field, src) \
+		xgene_enet_get_field_value(field ## _POS, field ## _LEN, src)
+
+static inline struct device *ndev_to_dev(struct net_device *ndev)
+{
+	return ndev->dev.parent;
+}
+
+void xgene_enet_set_ethtool_ops(struct net_device *netdev);
+
+#endif /* __XGENE_ENET_MAIN_H__ */
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index ce455ae..3f9d4de 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -739,7 +739,6 @@ static void bcmgenet_power_down(struct bcmgenet_priv *priv,
 
 	case GENET_POWER_PASSIVE:
 		/* Power down LED */
-		bcmgenet_mii_reset(priv->dev);
 		if (priv->hw_params->flags & GENET_HAS_EXT) {
 			reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
 			reg |= (EXT_PWR_DOWN_PHY |
@@ -779,7 +778,9 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv,
 	}
 
 	bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
-	bcmgenet_mii_reset(priv->dev);
+
+	if (mode == GENET_POWER_PASSIVE)
+		bcmgenet_mii_reset(priv->dev);
 }
 
 /* ioctl handle special commands that are not present in ethtool. */
@@ -1961,7 +1962,8 @@ static void bcmgenet_set_hw_addr(struct bcmgenet_priv *priv,
 static int bcmgenet_wol_resume(struct bcmgenet_priv *priv)
 {
 	/* From WOL-enabled suspend, switch to regular clock */
-	clk_disable_unprepare(priv->clk_wol);
+	if (priv->wolopts)
+		clk_disable_unprepare(priv->clk_wol);
 
 	phy_init_hw(priv->phydev);
 	/* Speed settings must be restored */
@@ -2164,6 +2166,10 @@ static void bcmgenet_netif_stop(struct net_device *dev)
 	 * disabled no new work will be scheduled.
 	 */
 	cancel_work_sync(&priv->bcmgenet_irq_work);
+
+	priv->old_pause = -1;
+	priv->old_link = -1;
+	priv->old_duplex = -1;
 }
 
 static int bcmgenet_close(struct net_device *dev)
@@ -2533,6 +2539,13 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	priv->pdev = pdev;
 	priv->version = (enum bcmgenet_version)of_id->data;
 
+	priv->clk = devm_clk_get(&priv->pdev->dev, "enet");
+	if (IS_ERR(priv->clk))
+		dev_warn(&priv->pdev->dev, "failed to get enet clock\n");
+
+	if (!IS_ERR(priv->clk))
+		clk_prepare_enable(priv->clk);
+
 	bcmgenet_set_hw_params(priv);
 
 	/* Mii wait queue */
@@ -2541,17 +2554,10 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	priv->rx_buf_len = RX_BUF_LENGTH;
 	INIT_WORK(&priv->bcmgenet_irq_work, bcmgenet_irq_task);
 
-	priv->clk = devm_clk_get(&priv->pdev->dev, "enet");
-	if (IS_ERR(priv->clk))
-		dev_warn(&priv->pdev->dev, "failed to get enet clock\n");
-
 	priv->clk_wol = devm_clk_get(&priv->pdev->dev, "enet-wol");
 	if (IS_ERR(priv->clk_wol))
 		dev_warn(&priv->pdev->dev, "failed to get enet-wol clock\n");
 
-	if (!IS_ERR(priv->clk))
-		clk_prepare_enable(priv->clk);
-
 	err = reset_umac(priv);
 	if (err)
 		goto err_clk_disable;
@@ -2611,6 +2617,8 @@ static int bcmgenet_suspend(struct device *d)
 
 	bcmgenet_netif_stop(dev);
 
+	phy_suspend(priv->phydev);
+
 	netif_device_detach(dev);
 
 	/* Disable MAC receive */
@@ -2661,9 +2669,7 @@ static int bcmgenet_resume(struct device *d)
 	if (ret)
 		goto out_clk_disable;
 
-	if (priv->wolopts)
-		ret = bcmgenet_wol_resume(priv);
-
+	ret = bcmgenet_wol_resume(priv);
 	if (ret)
 		goto out_clk_disable;
 
@@ -2678,6 +2684,9 @@ static int bcmgenet_resume(struct device *d)
 		bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
 	}
 
+	if (priv->wolopts)
+		bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
+
 	/* Disable RX/TX DMA and flush TX queues */
 	dma_ctrl = bcmgenet_dma_disable(priv);
 
@@ -2693,6 +2702,8 @@ static int bcmgenet_resume(struct device *d)
 
 	netif_device_attach(dev);
 
+	phy_resume(priv->phydev);
+
 	bcmgenet_netif_start(dev);
 
 	return 0;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 1896161..c88f7ae 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -129,7 +129,10 @@ static void bcmgenet_mii_setup(struct net_device *dev)
 			cmd_bits |= CMD_RX_PAUSE_IGNORE | CMD_TX_PAUSE_IGNORE;
 	}
 
-	if (status_changed) {
+	if (!status_changed)
+		return;
+
+	if (phydev->link) {
 		reg = bcmgenet_umac_readl(priv, UMAC_CMD);
 		reg &= ~((CMD_SPEED_MASK << CMD_SPEED_SHIFT) |
 			       CMD_HD_EN |
@@ -137,8 +140,9 @@ static void bcmgenet_mii_setup(struct net_device *dev)
 		reg |= cmd_bits;
 		bcmgenet_umac_writel(priv, reg, UMAC_CMD);
 
-		phy_print_status(phydev);
 	}
+
+	phy_print_status(phydev);
 }
 
 void bcmgenet_mii_reset(struct net_device *dev)
@@ -303,12 +307,12 @@ static int bcmgenet_mii_probe(struct net_device *dev)
 	/* In the case of a fixed PHY, the DT node associated
 	 * to the PHY is the Ethernet MAC DT node.
 	 */
-	if (of_phy_is_fixed_link(dn)) {
+	if (!priv->phy_dn && of_phy_is_fixed_link(dn)) {
 		ret = of_phy_register_fixed_link(dn);
 		if (ret)
 			return ret;
 
-		priv->phy_dn = dn;
+		priv->phy_dn = of_node_get(dn);
 	}
 
 	phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, 0,
@@ -444,6 +448,7 @@ int bcmgenet_mii_init(struct net_device *dev)
 	return 0;
 
 out:
+	of_node_put(priv->phy_dn);
 	mdiobus_unregister(priv->mii_bus);
 out_free:
 	kfree(priv->mii_bus->irq);
@@ -455,6 +460,7 @@ void bcmgenet_mii_exit(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 
+	of_node_put(priv->phy_dn);
 	mdiobus_unregister(priv->mii_bus);
 	kfree(priv->mii_bus->irq);
 	mdiobus_free(priv->mii_bus);
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index ba3cead..3ac5d23 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14093,8 +14093,9 @@ static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
 
 	spin_lock_bh(&tp->lock);
 	if (!tp->hw_stats) {
+		*stats = tp->net_stats_prev;
 		spin_unlock_bh(&tp->lock);
-		return &tp->net_stats_prev;
+		return stats;
 	}
 
 	tg3_get_nstats(tp, stats);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index c9b922c..d572821 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -50,13 +50,13 @@
 #include "cxgb4_uld.h"
 
 #define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x09
-#define T4FW_VERSION_MICRO 0x17
+#define T4FW_VERSION_MINOR 0x0B
+#define T4FW_VERSION_MICRO 0x1B
 #define T4FW_VERSION_BUILD 0x00
 
 #define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x09
-#define T5FW_VERSION_MICRO 0x17
+#define T5FW_VERSION_MINOR 0x0B
+#define T5FW_VERSION_MICRO 0x1B
 #define T5FW_VERSION_BUILD 0x00
 
 #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
@@ -522,6 +522,9 @@ struct sge_txq {
 struct sge_eth_txq {                /* state for an SGE Ethernet Tx queue */
 	struct sge_txq q;
 	struct netdev_queue *txq;   /* associated netdev TX queue */
+#ifdef CONFIG_CHELSIO_T4_DCB
+	u8 dcb_prio;		    /* DCB Priority bound to queue */
+#endif
 	unsigned long tso;          /* # of TSO requests */
 	unsigned long tx_cso;       /* # of Tx checksum offloads */
 	unsigned long vlan_ins;     /* # of Tx VLAN insertions */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
index 0d3a9df..8edf0f5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
@@ -20,6 +20,17 @@
 
 #include "cxgb4.h"
 
+/* DCBx version control
+ */
+char *dcb_ver_array[] = {
+	"Unknown",
+	"DCBx-CIN",
+	"DCBx-CEE 1.01",
+	"DCBx-IEEE",
+	"", "", "",
+	"Auto Negotiated"
+};
+
 /* Initialize a port's Data Center Bridging state.  Typically used after a
  * Link Down event.
  */
@@ -27,25 +38,45 @@ void cxgb4_dcb_state_init(struct net_device *dev)
 {
 	struct port_info *pi = netdev2pinfo(dev);
 	struct port_dcb_info *dcb = &pi->dcb;
+	int version_temp = dcb->dcb_version;
 
 	memset(dcb, 0, sizeof(struct port_dcb_info));
 	dcb->state = CXGB4_DCB_STATE_START;
+	if (version_temp)
+		dcb->dcb_version = version_temp;
+
+	netdev_dbg(dev, "%s: Initializing DCB state for port[%d]\n",
+		    __func__, pi->port_id);
+}
+
+void cxgb4_dcb_version_init(struct net_device *dev)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct port_dcb_info *dcb = &pi->dcb;
+
+	/* Any writes here are only done on kernels that exlicitly need
+	 * a specific version, say < 2.6.38 which only support CEE
+	 */
+	dcb->dcb_version = FW_PORT_DCB_VER_AUTO;
 }
 
 /* Finite State machine for Data Center Bridging.
  */
 void cxgb4_dcb_state_fsm(struct net_device *dev,
-			 enum cxgb4_dcb_state_input input)
+			 enum cxgb4_dcb_state_input transition_to)
 {
 	struct port_info *pi = netdev2pinfo(dev);
 	struct port_dcb_info *dcb = &pi->dcb;
 	struct adapter *adap = pi->adapter;
+	enum cxgb4_dcb_state current_state = dcb->state;
 
-	switch (input) {
-	case CXGB4_DCB_INPUT_FW_DISABLED: {
-		/* Firmware tells us it's not doing DCB */
-		switch (dcb->state) {
-		case CXGB4_DCB_STATE_START: {
+	netdev_dbg(dev, "%s: State change from %d to %d for %s\n",
+		    __func__, dcb->state, transition_to, dev->name);
+
+	switch (current_state) {
+	case CXGB4_DCB_STATE_START: {
+		switch (transition_to) {
+		case CXGB4_DCB_INPUT_FW_DISABLED: {
 			/* we're going to use Host DCB */
 			dcb->state = CXGB4_DCB_STATE_HOST;
 			dcb->supported = CXGB4_DCBX_HOST_SUPPORT;
@@ -53,48 +84,62 @@ void cxgb4_dcb_state_fsm(struct net_device *dev,
 			break;
 		}
 
-		case CXGB4_DCB_STATE_HOST: {
-			/* we're alreaady in Host DCB mode */
+		case CXGB4_DCB_INPUT_FW_ENABLED: {
+			/* we're going to use Firmware DCB */
+			dcb->state = CXGB4_DCB_STATE_FW_INCOMPLETE;
+			dcb->supported = CXGB4_DCBX_FW_SUPPORT;
+			break;
+		}
+
+		case CXGB4_DCB_INPUT_FW_INCOMPLETE: {
+			/* expected transition */
+			break;
+		}
+
+		case CXGB4_DCB_INPUT_FW_ALLSYNCED: {
+			dcb->state = CXGB4_DCB_STATE_FW_ALLSYNCED;
 			break;
 		}
 
 		default:
-			goto bad_state_transition;
+			goto bad_state_input;
 		}
 		break;
 	}
 
-	case CXGB4_DCB_INPUT_FW_ENABLED: {
-		/* Firmware tells us that it is doing DCB */
-		switch (dcb->state) {
-		case CXGB4_DCB_STATE_START: {
-			/* we're going to use Firmware DCB */
-			dcb->state = CXGB4_DCB_STATE_FW_INCOMPLETE;
-			dcb->supported = CXGB4_DCBX_FW_SUPPORT;
+	case CXGB4_DCB_STATE_FW_INCOMPLETE: {
+		switch (transition_to) {
+		case CXGB4_DCB_INPUT_FW_ENABLED: {
+			/* we're alreaady in firmware DCB mode */
 			break;
 		}
 
-		case CXGB4_DCB_STATE_FW_INCOMPLETE:
-		case CXGB4_DCB_STATE_FW_ALLSYNCED: {
-			/* we're alreaady in firmware DCB mode */
+		case CXGB4_DCB_INPUT_FW_INCOMPLETE: {
+			/* we're already incomplete */
+			break;
+		}
+
+		case CXGB4_DCB_INPUT_FW_ALLSYNCED: {
+			dcb->state = CXGB4_DCB_STATE_FW_ALLSYNCED;
+			dcb->enabled = 1;
+			linkwatch_fire_event(dev);
 			break;
 		}
 
 		default:
-			goto bad_state_transition;
+			goto bad_state_input;
 		}
 		break;
 	}
 
-	case CXGB4_DCB_INPUT_FW_INCOMPLETE: {
-		/* Firmware tells us that its DCB state is incomplete */
-		switch (dcb->state) {
-		case CXGB4_DCB_STATE_FW_INCOMPLETE: {
-			/* we're already incomplete */
+	case CXGB4_DCB_STATE_FW_ALLSYNCED: {
+		switch (transition_to) {
+		case CXGB4_DCB_INPUT_FW_ENABLED: {
+			/* we're alreaady in firmware DCB mode */
 			break;
 		}
 
-		case CXGB4_DCB_STATE_FW_ALLSYNCED: {
+		case CXGB4_DCB_INPUT_FW_INCOMPLETE: {
 			/* We were successfully running with firmware DCB but
 			 * now it's telling us that it's in an "incomplete
 			 * state.  We need to reset back to a ground state
@@ -107,46 +152,48 @@ void cxgb4_dcb_state_fsm(struct net_device *dev,
 			break;
 		}
 
-		default:
-			goto bad_state_transition;
-		}
-		break;
-	}
-
-	case CXGB4_DCB_INPUT_FW_ALLSYNCED: {
-		/* Firmware tells us that its DCB state is complete */
-		switch (dcb->state) {
-		case CXGB4_DCB_STATE_FW_INCOMPLETE: {
-			dcb->state = CXGB4_DCB_STATE_FW_ALLSYNCED;
+		case CXGB4_DCB_INPUT_FW_ALLSYNCED: {
+			/* we're already all sync'ed
+			 * this is only applicable for IEEE or
+			 * when another VI already completed negotiaton
+			 */
 			dcb->enabled = 1;
 			linkwatch_fire_event(dev);
 			break;
 		}
 
-		case CXGB4_DCB_STATE_FW_ALLSYNCED: {
-			/* we're already all sync'ed */
+		default:
+			goto bad_state_input;
+		}
+		break;
+	}
+
+	case CXGB4_DCB_STATE_HOST: {
+		switch (transition_to) {
+		case CXGB4_DCB_INPUT_FW_DISABLED: {
+			/* we're alreaady in Host DCB mode */
 			break;
 		}
 
 		default:
-			goto bad_state_transition;
+			goto bad_state_input;
 		}
 		break;
 	}
 
 	default:
-		goto  bad_state_input;
+		goto bad_state_transition;
 	}
 	return;
 
 bad_state_input:
 	dev_err(adap->pdev_dev, "cxgb4_dcb_state_fsm: illegal input symbol %d\n",
-		input);
+		transition_to);
 	return;
 
 bad_state_transition:
 	dev_err(adap->pdev_dev, "cxgb4_dcb_state_fsm: bad state transition, state = %d, input = %d\n",
-		dcb->state, input);
+		current_state, transition_to);
 }
 
 /* Handle a DCB/DCBX update message from the firmware.
@@ -160,6 +207,7 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap,
 	struct port_info *pi = netdev_priv(dev);
 	struct port_dcb_info *dcb = &pi->dcb;
 	int dcb_type = pcmd->u.dcb.pgid.type;
+	int dcb_running_version;
 
 	/* Handle Firmware DCB Control messages separately since they drive
 	 * our state machine.
@@ -171,6 +219,25 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap,
 			 ? CXGB4_DCB_STATE_FW_ALLSYNCED
 			 : CXGB4_DCB_STATE_FW_INCOMPLETE);
 
+		if (dcb->dcb_version != FW_PORT_DCB_VER_UNKNOWN) {
+			dcb_running_version = FW_PORT_CMD_DCB_VERSION_GET(
+				be16_to_cpu(
+				pcmd->u.dcb.control.dcb_version_to_app_state));
+			if (dcb_running_version == FW_PORT_DCB_VER_CEE1D01 ||
+			    dcb_running_version == FW_PORT_DCB_VER_IEEE) {
+				dcb->dcb_version = dcb_running_version;
+				dev_warn(adap->pdev_dev, "Interface %s is running %s\n",
+					 dev->name,
+					 dcb_ver_array[dcb->dcb_version]);
+			} else {
+				dev_warn(adap->pdev_dev,
+					 "Something screwed up, requested firmware for %s, but firmware returned %s instead\n",
+					 dcb_ver_array[dcb->dcb_version],
+					 dcb_ver_array[dcb_running_version]);
+				dcb->dcb_version = FW_PORT_DCB_VER_UNKNOWN;
+			}
+		}
+
 		cxgb4_dcb_state_fsm(dev, input);
 		return;
 	}
@@ -199,7 +266,11 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap,
 		dcb->pg_num_tcs_supported = fwdcb->pgrate.num_tcs_supported;
 		memcpy(dcb->pgrate, &fwdcb->pgrate.pgrate,
 		       sizeof(dcb->pgrate));
+		memcpy(dcb->tsa, &fwdcb->pgrate.tsa,
+		       sizeof(dcb->tsa));
 		dcb->msgs |= CXGB4_DCB_FW_PGRATE;
+		if (dcb->msgs & CXGB4_DCB_FW_PGID)
+			IEEE_FAUX_SYNC(dev, dcb);
 		break;
 
 	case FW_PORT_DCB_TYPE_PRIORATE:
@@ -212,6 +283,7 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap,
 		dcb->pfcen = fwdcb->pfc.pfcen;
 		dcb->pfc_num_tcs_supported = fwdcb->pfc.max_pfc_tcs;
 		dcb->msgs |= CXGB4_DCB_FW_PFC;
+		IEEE_FAUX_SYNC(dev, dcb);
 		break;
 
 	case FW_PORT_DCB_TYPE_APP_ID: {
@@ -220,13 +292,25 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap,
 		struct app_priority *ap = &dcb->app_priority[idx];
 
 		struct dcb_app app = {
-			.selector = fwap->sel_field,
 			.protocol = be16_to_cpu(fwap->protocolid),
-			.priority = fwap->user_prio_map,
 		};
 		int err;
 
-		err = dcb_setapp(dev, &app);
+		/* Convert from firmware format to relevant format
+		 * when using app selector
+		 */
+		if (dcb->dcb_version == FW_PORT_DCB_VER_IEEE) {
+			app.selector = (fwap->sel_field + 1);
+			app.priority = ffs(fwap->user_prio_map) - 1;
+			err = dcb_ieee_setapp(dev, &app);
+			IEEE_FAUX_SYNC(dev, dcb);
+		} else {
+			/* Default is CEE */
+			app.selector = !!(fwap->sel_field);
+			app.priority = fwap->user_prio_map;
+			err = dcb_setapp(dev, &app);
+		}
+
 		if (err)
 			dev_err(adap->pdev_dev,
 				"Failed DCB Set Application Priority: sel=%d, prot=%d, prio=%d, err=%d\n",
@@ -408,9 +492,10 @@ static void cxgb4_getpgbwgcfg(struct net_device *dev, int pgid, u8 *bw_per,
 	if (err != FW_PORT_DCB_CFG_SUCCESS) {
 		dev_err(adap->pdev_dev, "DCB read PGRATE failed with %d\n",
 			-err);
-	} else {
-		*bw_per = pcmd.u.dcb.pgrate.pgrate[pgid];
+		return;
 	}
+
+	*bw_per = pcmd.u.dcb.pgrate.pgrate[pgid];
 }
 
 static void cxgb4_getpgbwgcfg_tx(struct net_device *dev, int pgid, u8 *bw_per)
@@ -637,7 +722,8 @@ static int __cxgb4_getapp(struct net_device *dev, u8 app_idtype, u16 app_id,
 			return err;
 		}
 		if (be16_to_cpu(pcmd.u.dcb.app_priority.protocolid) == app_id)
-			return pcmd.u.dcb.app_priority.user_prio_map;
+			if (pcmd.u.dcb.app_priority.sel_field == app_idtype)
+				return pcmd.u.dcb.app_priority.user_prio_map;
 
 		/* exhausted app list */
 		if (!pcmd.u.dcb.app_priority.protocolid)
@@ -657,8 +743,8 @@ static int cxgb4_getapp(struct net_device *dev, u8 app_idtype, u16 app_id)
 
 /* Write a new Application User Priority Map for the specified Application ID
  */
-static int cxgb4_setapp(struct net_device *dev, u8 app_idtype, u16 app_id,
-			u8 app_prio)
+static int __cxgb4_setapp(struct net_device *dev, u8 app_idtype, u16 app_id,
+			  u8 app_prio)
 {
 	struct fw_port_cmd pcmd;
 	struct port_info *pi = netdev2pinfo(dev);
@@ -673,10 +759,6 @@ static int cxgb4_setapp(struct net_device *dev, u8 app_idtype, u16 app_id,
 	if (!netif_carrier_ok(dev))
 		return -ENOLINK;
 
-	if (app_idtype != DCB_APP_IDTYPE_ETHTYPE &&
-	    app_idtype != DCB_APP_IDTYPE_PORTNUM)
-		return -EINVAL;
-
 	for (i = 0; i < CXGB4_MAX_DCBX_APP_SUPPORTED; i++) {
 		INIT_PORT_DCB_READ_LOCAL_CMD(pcmd, pi->port_id);
 		pcmd.u.dcb.app_priority.type = FW_PORT_DCB_TYPE_APP_ID;
@@ -725,6 +807,30 @@ static int cxgb4_setapp(struct net_device *dev, u8 app_idtype, u16 app_id,
 	return 0;
 }
 
+/* Priority for CEE inside dcb_app is bitmask, with 0 being an invalid value */
+static int cxgb4_setapp(struct net_device *dev, u8 app_idtype, u16 app_id,
+			u8 app_prio)
+{
+	int ret;
+	struct dcb_app app = {
+		.selector = app_idtype,
+		.protocol = app_id,
+		.priority = app_prio,
+	};
+
+	if (app_idtype != DCB_APP_IDTYPE_ETHTYPE &&
+	    app_idtype != DCB_APP_IDTYPE_PORTNUM)
+		return -EINVAL;
+
+	/* Convert app_idtype to a format that firmware understands */
+	ret = __cxgb4_setapp(dev, app_idtype == DCB_APP_IDTYPE_ETHTYPE ?
+			      app_idtype : 3, app_id, app_prio);
+	if (ret)
+		return ret;
+
+	return dcb_setapp(dev, &app);
+}
+
 /* Return whether IEEE Data Center Bridging has been negotiated.
  */
 static inline int cxgb4_ieee_negotiation_complete(struct net_device *dev)
@@ -738,6 +844,7 @@ static inline int cxgb4_ieee_negotiation_complete(struct net_device *dev)
 
 /* Fill in the Application User Priority Map associated with the
  * specified Application.
+ * Priority for IEEE dcb_app is an integer, with 0 being a valid value
  */
 static int cxgb4_ieee_getapp(struct net_device *dev, struct dcb_app *app)
 {
@@ -748,28 +855,39 @@ static int cxgb4_ieee_getapp(struct net_device *dev, struct dcb_app *app)
 	if (!(app->selector && app->protocol))
 		return -EINVAL;
 
-	prio = dcb_getapp(dev, app);
-	if (prio == 0) {
-		/* If app doesn't exist in dcb_app table, try firmware
-		 * directly.
-		 */
-		prio = __cxgb4_getapp(dev, app->selector, app->protocol, 0);
-	}
+	/* Try querying firmware first, use firmware format */
+	prio = __cxgb4_getapp(dev, app->selector - 1, app->protocol, 0);
+
+	if (prio < 0)
+		prio = dcb_ieee_getapp_mask(dev, app);
 
-	app->priority = prio;
+	app->priority = ffs(prio) - 1;
 	return 0;
 }
 
-/* Write a new Application User Priority Map for the specified App id. */
+/* Write a new Application User Priority Map for the specified Application ID.
+ * Priority for IEEE dcb_app is an integer, with 0 being a valid value
+ */
 static int cxgb4_ieee_setapp(struct net_device *dev, struct dcb_app *app)
 {
+	int ret;
+
 	if (!cxgb4_ieee_negotiation_complete(dev))
 		return -EINVAL;
-	if (!(app->selector && app->protocol && app->priority))
+	if (!(app->selector && app->protocol))
+		return -EINVAL;
+
+	if (!(app->selector > IEEE_8021QAZ_APP_SEL_ETHERTYPE  &&
+	      app->selector < IEEE_8021QAZ_APP_SEL_ANY))
 		return -EINVAL;
 
-	cxgb4_setapp(dev, app->selector, app->protocol, app->priority);
-	return dcb_setapp(dev, app);
+	/* change selector to a format that firmware understands */
+	ret = __cxgb4_setapp(dev, app->selector - 1, app->protocol,
+			     (1 << app->priority));
+	if (ret)
+		return ret;
+
+	return dcb_ieee_setapp(dev, app);
 }
 
 /* Return our DCBX parameters.
@@ -794,8 +912,9 @@ static u8 cxgb4_setdcbx(struct net_device *dev, u8 dcb_request)
 	    != dcb_request)
 		return 1;
 
-	/* Can't set DCBX capabilities if DCBX isn't enabled. */
-	if (!pi->dcb.state)
+	/* Can't enable DCB if we haven't successfully negotiated it.
+	 */
+	if (pi->dcb.state != CXGB4_DCB_STATE_FW_ALLSYNCED)
 		return 1;
 
 	/* There's currently no mechanism to allow for the firmware DCBX
@@ -874,7 +993,8 @@ static int cxgb4_getpeerapp_tbl(struct net_device *dev, struct dcb_app *table)
 		table[i].selector = pcmd.u.dcb.app_priority.sel_field;
 		table[i].protocol =
 			be16_to_cpu(pcmd.u.dcb.app_priority.protocolid);
-		table[i].priority = pcmd.u.dcb.app_priority.user_prio_map;
+		table[i].priority =
+			ffs(pcmd.u.dcb.app_priority.user_prio_map) - 1;
 	}
 	return err;
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
index 1ec1d83..2a6aa889 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
@@ -63,6 +63,13 @@
 #define INIT_PORT_DCB_WRITE_CMD(__pcmd, __port) \
 	INIT_PORT_DCB_CMD(__pcmd, __port, EXEC, FW_PORT_ACTION_L2_DCB_CFG)
 
+#define IEEE_FAUX_SYNC(__dev, __dcb) \
+	do { \
+		if ((__dcb)->dcb_version == FW_PORT_DCB_VER_IEEE) \
+			cxgb4_dcb_state_fsm((__dev), \
+					    CXGB4_DCB_STATE_FW_ALLSYNCED); \
+	} while (0)
+
 /* States we can be in for a port's Data Center Bridging.
  */
 enum cxgb4_dcb_state {
@@ -108,11 +115,13 @@ struct port_dcb_info {
 	 * Native Endian format).
 	 */
 	u32	pgid;			/* Priority Group[0..7] */
+	u8	dcb_version;		/* Running DCBx version */
 	u8	pfcen;			/* Priority Flow Control[0..7] */
 	u8	pg_num_tcs_supported;	/* max PG Traffic Classes */
 	u8	pfc_num_tcs_supported;	/* max PFC Traffic Classes */
 	u8	pgrate[8];		/* Priority Group Rate[0..7] */
 	u8	priorate[8];		/* Priority Rate[0..7] */
+	u8	tsa[8];			/* TSA Algorithm[0..7] */
 	struct app_priority { /* Application Information */
 		u8	user_prio_map;	/* Priority Map bitfield */
 		u8	sel_field;	/* Protocol ID interpretation */
@@ -121,6 +130,7 @@ struct port_dcb_info {
 };
 
 void cxgb4_dcb_state_init(struct net_device *);
+void cxgb4_dcb_version_init(struct net_device *);
 void cxgb4_dcb_state_fsm(struct net_device *, enum cxgb4_dcb_state_input);
 void cxgb4_dcb_handle_fw_update(struct adapter *, const struct fw_port_cmd *);
 void cxgb4_dcb_set_caps(struct adapter *, const struct fw_port_cmd *);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 8903e68..1afee70 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -522,6 +522,8 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable)
 			dev_err(adap->pdev_dev,
 				"Can't %s DCB Priority on port %d, TX Queue %d: err=%d\n",
 				enable ? "set" : "unset", pi->port_id, i, -err);
+		else
+			txq->dcb_prio = value;
 	}
 }
 #endif /* CONFIG_CHELSIO_T4_DCB */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index ff709e3..0549170 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1629,6 +1629,14 @@ enum fw_port_l2cfg_ctlbf {
 	FW_PORT_L2_CTLBF_TXIPG	= 0x20
 };
 
+enum fw_port_dcb_versions {
+	FW_PORT_DCB_VER_UNKNOWN,
+	FW_PORT_DCB_VER_CEE1D0,
+	FW_PORT_DCB_VER_CEE1D01,
+	FW_PORT_DCB_VER_IEEE,
+	FW_PORT_DCB_VER_AUTO = 7
+};
+
 enum fw_port_dcb_cfg {
 	FW_PORT_DCB_CFG_PG	= 0x01,
 	FW_PORT_DCB_CFG_PFC	= 0x02,
@@ -1709,6 +1717,7 @@ struct fw_port_cmd {
 				__u8   r10_lo[5];
 				__u8   num_tcs_supported;
 				__u8   pgrate[8];
+				__u8   tsa[8];
 			} pgrate;
 			struct fw_port_dcb_priorate {
 				__u8   type;
@@ -1735,7 +1744,7 @@ struct fw_port_cmd {
 			struct fw_port_dcb_control {
 				__u8   type;
 				__u8   all_syncd_pkd;
-				__be16 pfc_state_to_app_state;
+				__be16 dcb_version_to_app_state;
 				__be32 r11;
 				__be64 r12;
 			} control;
@@ -1778,6 +1787,7 @@ struct fw_port_cmd {
 #define FW_PORT_CMD_DCBXDIS (1U << 7)
 #define FW_PORT_CMD_APPLY (1U <<  7)
 #define FW_PORT_CMD_ALL_SYNCD (1U << 7)
+#define FW_PORT_CMD_DCB_VERSION_GET(x) (((x) >> 8) & 0xf)
 
 #define FW_PORT_CMD_PPPEN(x) ((x) << 31)
 #define FW_PORT_CMD_TPSRC(x) ((x) << 28)
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 23084fb..9b33057 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -93,7 +93,7 @@ enum dm9000_type {
 };
 
 /* Structure/enum declaration ------------------------------- */
-typedef struct board_info {
+struct board_info {
 
 	void __iomem	*io_addr;	/* Register I/O base address */
 	void __iomem	*io_data;	/* Data I/O address */
@@ -141,7 +141,7 @@ typedef struct board_info {
 	u32		wake_state;
 
 	int		ip_summed;
-} board_info_t;
+};
 
 /* debug code */
 
@@ -151,7 +151,7 @@ typedef struct board_info {
 	}						\
 } while (0)
 
-static inline board_info_t *to_dm9000_board(struct net_device *dev)
+static inline struct board_info *to_dm9000_board(struct net_device *dev)
 {
 	return netdev_priv(dev);
 }
@@ -162,7 +162,7 @@ static inline board_info_t *to_dm9000_board(struct net_device *dev)
  *   Read a byte from I/O port
  */
 static u8
-ior(board_info_t *db, int reg)
+ior(struct board_info *db, int reg)
 {
 	writeb(reg, db->io_addr);
 	return readb(db->io_data);
@@ -173,14 +173,14 @@ ior(board_info_t *db, int reg)
  */
 
 static void
-iow(board_info_t *db, int reg, int value)
+iow(struct board_info *db, int reg, int value)
 {
 	writeb(reg, db->io_addr);
 	writeb(value, db->io_data);
 }
 
 static void
-dm9000_reset(board_info_t *db)
+dm9000_reset(struct board_info *db)
 {
 	dev_dbg(db->dev, "resetting device\n");
 
@@ -272,7 +272,7 @@ static void dm9000_dumpblk_32bit(void __iomem *reg, int count)
  * Sleep, either by using msleep() or if we are suspending, then
  * use mdelay() to sleep.
  */
-static void dm9000_msleep(board_info_t *db, unsigned int ms)
+static void dm9000_msleep(struct board_info *db, unsigned int ms)
 {
 	if (db->in_suspend || db->in_timeout)
 		mdelay(ms);
@@ -284,7 +284,7 @@ static void dm9000_msleep(board_info_t *db, unsigned int ms)
 static int
 dm9000_phy_read(struct net_device *dev, int phy_reg_unused, int reg)
 {
-	board_info_t *db = netdev_priv(dev);
+	struct board_info *db = netdev_priv(dev);
 	unsigned long flags;
 	unsigned int reg_save;
 	int ret;
@@ -330,7 +330,7 @@ static void
 dm9000_phy_write(struct net_device *dev,
 		 int phyaddr_unused, int reg, int value)
 {
-	board_info_t *db = netdev_priv(dev);
+	struct board_info *db = netdev_priv(dev);
 	unsigned long flags;
 	unsigned long reg_save;
 
@@ -408,7 +408,7 @@ static void dm9000_set_io(struct board_info *db, int byte_width)
 	}
 }
 
-static void dm9000_schedule_poll(board_info_t *db)
+static void dm9000_schedule_poll(struct board_info *db)
 {
 	if (db->type == TYPE_DM9000E)
 		schedule_delayed_work(&db->phy_poll, HZ * 2);
@@ -416,7 +416,7 @@ static void dm9000_schedule_poll(board_info_t *db)
 
 static int dm9000_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 
 	if (!netif_running(dev))
 		return -EINVAL;
@@ -425,7 +425,7 @@ static int dm9000_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 }
 
 static unsigned int
-dm9000_read_locked(board_info_t *db, int reg)
+dm9000_read_locked(struct board_info *db, int reg)
 {
 	unsigned long flags;
 	unsigned int ret;
@@ -437,7 +437,7 @@ dm9000_read_locked(board_info_t *db, int reg)
 	return ret;
 }
 
-static int dm9000_wait_eeprom(board_info_t *db)
+static int dm9000_wait_eeprom(struct board_info *db)
 {
 	unsigned int status;
 	int timeout = 8;	/* wait max 8msec */
@@ -474,7 +474,7 @@ static int dm9000_wait_eeprom(board_info_t *db)
  *  Read a word data from EEPROM
  */
 static void
-dm9000_read_eeprom(board_info_t *db, int offset, u8 *to)
+dm9000_read_eeprom(struct board_info *db, int offset, u8 *to)
 {
 	unsigned long flags;
 
@@ -514,7 +514,7 @@ dm9000_read_eeprom(board_info_t *db, int offset, u8 *to)
  * Write a word data to SROM
  */
 static void
-dm9000_write_eeprom(board_info_t *db, int offset, u8 *data)
+dm9000_write_eeprom(struct board_info *db, int offset, u8 *data)
 {
 	unsigned long flags;
 
@@ -546,7 +546,7 @@ dm9000_write_eeprom(board_info_t *db, int offset, u8 *data)
 static void dm9000_get_drvinfo(struct net_device *dev,
 			       struct ethtool_drvinfo *info)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 
 	strlcpy(info->driver, CARDNAME, sizeof(info->driver));
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
@@ -556,21 +556,21 @@ static void dm9000_get_drvinfo(struct net_device *dev,
 
 static u32 dm9000_get_msglevel(struct net_device *dev)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 
 	return dm->msg_enable;
 }
 
 static void dm9000_set_msglevel(struct net_device *dev, u32 value)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 
 	dm->msg_enable = value;
 }
 
 static int dm9000_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 
 	mii_ethtool_gset(&dm->mii, cmd);
 	return 0;
@@ -578,21 +578,21 @@ static int dm9000_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 
 static int dm9000_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 
 	return mii_ethtool_sset(&dm->mii, cmd);
 }
 
 static int dm9000_nway_reset(struct net_device *dev)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 	return mii_nway_restart(&dm->mii);
 }
 
 static int dm9000_set_features(struct net_device *dev,
 	netdev_features_t features)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 	netdev_features_t changed = dev->features ^ features;
 	unsigned long flags;
 
@@ -608,7 +608,7 @@ static int dm9000_set_features(struct net_device *dev,
 
 static u32 dm9000_get_link(struct net_device *dev)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 	u32 ret;
 
 	if (dm->flags & DM9000_PLATF_EXT_PHY)
@@ -629,7 +629,7 @@ static int dm9000_get_eeprom_len(struct net_device *dev)
 static int dm9000_get_eeprom(struct net_device *dev,
 			     struct ethtool_eeprom *ee, u8 *data)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 	int offset = ee->offset;
 	int len = ee->len;
 	int i;
@@ -653,7 +653,7 @@ static int dm9000_get_eeprom(struct net_device *dev,
 static int dm9000_set_eeprom(struct net_device *dev,
 			     struct ethtool_eeprom *ee, u8 *data)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 	int offset = ee->offset;
 	int len = ee->len;
 	int done;
@@ -691,7 +691,7 @@ static int dm9000_set_eeprom(struct net_device *dev,
 
 static void dm9000_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 
 	memset(w, 0, sizeof(struct ethtool_wolinfo));
 
@@ -702,7 +702,7 @@ static void dm9000_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
 
 static int dm9000_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 	unsigned long flags;
 	u32 opts = w->wolopts;
 	u32 wcr = 0;
@@ -752,7 +752,7 @@ static const struct ethtool_ops dm9000_ethtool_ops = {
 	.set_eeprom		= dm9000_set_eeprom,
 };
 
-static void dm9000_show_carrier(board_info_t *db,
+static void dm9000_show_carrier(struct board_info *db,
 				unsigned carrier, unsigned nsr)
 {
 	int lpa;
@@ -775,7 +775,7 @@ static void
 dm9000_poll_work(struct work_struct *w)
 {
 	struct delayed_work *dw = to_delayed_work(w);
-	board_info_t *db = container_of(dw, board_info_t, phy_poll);
+	struct board_info *db = container_of(dw, struct board_info, phy_poll);
 	struct net_device *ndev = db->ndev;
 
 	if (db->flags & DM9000_PLATF_SIMPLE_PHY &&
@@ -843,7 +843,7 @@ static unsigned char dm9000_type_to_char(enum dm9000_type type)
 static void
 dm9000_hash_table_unlocked(struct net_device *dev)
 {
-	board_info_t *db = netdev_priv(dev);
+	struct board_info *db = netdev_priv(dev);
 	struct netdev_hw_addr *ha;
 	int i, oft;
 	u32 hash_val;
@@ -879,7 +879,7 @@ dm9000_hash_table_unlocked(struct net_device *dev)
 static void
 dm9000_hash_table(struct net_device *dev)
 {
-	board_info_t *db = netdev_priv(dev);
+	struct board_info *db = netdev_priv(dev);
 	unsigned long flags;
 
 	spin_lock_irqsave(&db->lock, flags);
@@ -888,13 +888,13 @@ dm9000_hash_table(struct net_device *dev)
 }
 
 static void
-dm9000_mask_interrupts(board_info_t *db)
+dm9000_mask_interrupts(struct board_info *db)
 {
 	iow(db, DM9000_IMR, IMR_PAR);
 }
 
 static void
-dm9000_unmask_interrupts(board_info_t *db)
+dm9000_unmask_interrupts(struct board_info *db)
 {
 	iow(db, DM9000_IMR, db->imr_all);
 }
@@ -905,7 +905,7 @@ dm9000_unmask_interrupts(board_info_t *db)
 static void
 dm9000_init_dm9000(struct net_device *dev)
 {
-	board_info_t *db = netdev_priv(dev);
+	struct board_info *db = netdev_priv(dev);
 	unsigned int imr;
 	unsigned int ncr;
 
@@ -970,7 +970,7 @@ dm9000_init_dm9000(struct net_device *dev)
 /* Our watchdog timed out. Called by the networking layer */
 static void dm9000_timeout(struct net_device *dev)
 {
-	board_info_t *db = netdev_priv(dev);
+	struct board_info *db = netdev_priv(dev);
 	u8 reg_save;
 	unsigned long flags;
 
@@ -996,7 +996,7 @@ static void dm9000_send_packet(struct net_device *dev,
 			       int ip_summed,
 			       u16 pkt_len)
 {
-	board_info_t *dm = to_dm9000_board(dev);
+	struct board_info *dm = to_dm9000_board(dev);
 
 	/* The DM9000 is not smart enough to leave fragmented packets alone. */
 	if (dm->ip_summed != ip_summed) {
@@ -1023,7 +1023,7 @@ static int
 dm9000_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	unsigned long flags;
-	board_info_t *db = netdev_priv(dev);
+	struct board_info *db = netdev_priv(dev);
 
 	dm9000_dbg(db, 3, "%s:\n", __func__);
 
@@ -1062,7 +1062,7 @@ dm9000_start_xmit(struct sk_buff *skb, struct net_device *dev)
  * receive the packet to upper layer, free the transmitted packet
  */
 
-static void dm9000_tx_done(struct net_device *dev, board_info_t *db)
+static void dm9000_tx_done(struct net_device *dev, struct board_info *db)
 {
 	int tx_status = ior(db, DM9000_NSR);	/* Got TX status */
 
@@ -1094,7 +1094,7 @@ struct dm9000_rxhdr {
 static void
 dm9000_rx(struct net_device *dev)
 {
-	board_info_t *db = netdev_priv(dev);
+	struct board_info *db = netdev_priv(dev);
 	struct dm9000_rxhdr rxhdr;
 	struct sk_buff *skb;
 	u8 rxbyte, *rdptr;
@@ -1196,7 +1196,7 @@ dm9000_rx(struct net_device *dev)
 static irqreturn_t dm9000_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
-	board_info_t *db = netdev_priv(dev);
+	struct board_info *db = netdev_priv(dev);
 	int int_status;
 	unsigned long flags;
 	u8 reg_save;
@@ -1246,7 +1246,7 @@ static irqreturn_t dm9000_interrupt(int irq, void *dev_id)
 static irqreturn_t dm9000_wol_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
-	board_info_t *db = netdev_priv(dev);
+	struct board_info *db = netdev_priv(dev);
 	unsigned long flags;
 	unsigned nsr, wcr;
 
@@ -1296,7 +1296,7 @@ static void dm9000_poll_controller(struct net_device *dev)
 static int
 dm9000_open(struct net_device *dev)
 {
-	board_info_t *db = netdev_priv(dev);
+	struct board_info *db = netdev_priv(dev);
 	unsigned long irqflags = db->irq_res->flags & IRQF_TRIGGER_MASK;
 
 	if (netif_msg_ifup(db))
@@ -1342,7 +1342,7 @@ dm9000_open(struct net_device *dev)
 static void
 dm9000_shutdown(struct net_device *dev)
 {
-	board_info_t *db = netdev_priv(dev);
+	struct board_info *db = netdev_priv(dev);
 
 	/* RESET device */
 	dm9000_phy_write(dev, 0, MII_BMCR, BMCR_RESET);	/* PHY RESET */
@@ -1358,7 +1358,7 @@ dm9000_shutdown(struct net_device *dev)
 static int
 dm9000_stop(struct net_device *ndev)
 {
-	board_info_t *db = netdev_priv(ndev);
+	struct board_info *db = netdev_priv(ndev);
 
 	if (netif_msg_ifdown(db))
 		dev_dbg(db->dev, "shutting down %s\n", ndev->name);
@@ -1681,7 +1681,7 @@ dm9000_drv_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct net_device *ndev = platform_get_drvdata(pdev);
-	board_info_t *db;
+	struct board_info *db;
 
 	if (ndev) {
 		db = netdev_priv(ndev);
@@ -1704,7 +1704,7 @@ dm9000_drv_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct net_device *ndev = platform_get_drvdata(pdev);
-	board_info_t *db = netdev_priv(ndev);
+	struct board_info *db = netdev_priv(ndev);
 
 	if (ndev) {
 		if (netif_running(ndev)) {
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 811f135..43e08d0 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -897,5 +897,6 @@ void be_roce_dev_remove(struct be_adapter *);
  */
 void be_roce_dev_open(struct be_adapter *);
 void be_roce_dev_close(struct be_adapter *);
+void be_roce_dev_shutdown(struct be_adapter *);
 
 #endif				/* BE_H */
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index b06e540..93ff8ef 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -5014,6 +5014,7 @@ static void be_shutdown(struct pci_dev *pdev)
 	if (!adapter)
 		return;
 
+	be_roce_dev_shutdown(adapter);
 	cancel_delayed_work_sync(&adapter->work);
 	cancel_delayed_work_sync(&adapter->func_recovery_work);
 
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c
index 5bf1660..ef4672d 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.c
+++ b/drivers/net/ethernet/emulex/benet/be_roce.c
@@ -120,7 +120,8 @@ static void _be_roce_dev_open(struct be_adapter *adapter)
 {
 	if (ocrdma_drv && adapter->ocrdma_dev &&
 	    ocrdma_drv->state_change_handler)
-		ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 0);
+		ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
+						 BE_DEV_UP);
 }
 
 void be_roce_dev_open(struct be_adapter *adapter)
@@ -136,7 +137,8 @@ static void _be_roce_dev_close(struct be_adapter *adapter)
 {
 	if (ocrdma_drv && adapter->ocrdma_dev &&
 	    ocrdma_drv->state_change_handler)
-		ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 1);
+		ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
+						 BE_DEV_DOWN);
 }
 
 void be_roce_dev_close(struct be_adapter *adapter)
@@ -148,6 +150,18 @@ void be_roce_dev_close(struct be_adapter *adapter)
 	}
 }
 
+void be_roce_dev_shutdown(struct be_adapter *adapter)
+{
+	if (be_roce_supported(adapter)) {
+		mutex_lock(&be_adapter_list_lock);
+		if (ocrdma_drv && adapter->ocrdma_dev &&
+		    ocrdma_drv->state_change_handler)
+			ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
+							 BE_DEV_SHUTDOWN);
+		mutex_unlock(&be_adapter_list_lock);
+	}
+}
+
 int be_roce_register_driver(struct ocrdma_driver *drv)
 {
 	struct be_adapter *dev;
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h
index a3d9e96..e6f7eb1 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.h
+++ b/drivers/net/ethernet/emulex/benet/be_roce.h
@@ -62,7 +62,8 @@ struct ocrdma_driver {
 
 enum {
 	BE_DEV_UP	= 0,
-	BE_DEV_DOWN	= 1
+	BE_DEV_DOWN	= 1,
+	BE_DEV_SHUTDOWN = 2
 };
 
 /* APIs for RoCE driver to register callback handlers,
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index bd53caf..9f7fa64 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -310,6 +310,7 @@ struct fec_enet_private {
 	int	mii_timeout;
 	uint	phy_speed;
 	phy_interface_t	phy_interface;
+	struct device_node *phy_node;
 	int	link;
 	int	full_duplex;
 	int	speed;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 66fe1f6..4f87dff 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -52,6 +52,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_gpio.h>
+#include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include <linux/regulator/consumer.h>
 #include <linux/if_vlan.h>
@@ -1648,29 +1649,37 @@ static int fec_enet_mii_probe(struct net_device *ndev)
 
 	fep->phy_dev = NULL;
 
-	/* check for attached phy */
-	for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) {
-		if ((fep->mii_bus->phy_mask & (1 << phy_id)))
-			continue;
-		if (fep->mii_bus->phy_map[phy_id] == NULL)
-			continue;
-		if (fep->mii_bus->phy_map[phy_id]->phy_id == 0)
-			continue;
-		if (dev_id--)
-			continue;
-		strncpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE);
-		break;
-	}
+	if (fep->phy_node) {
+		phy_dev = of_phy_connect(ndev, fep->phy_node,
+					 &fec_enet_adjust_link, 0,
+					 fep->phy_interface);
+	} else {
+		/* check for attached phy */
+		for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) {
+			if ((fep->mii_bus->phy_mask & (1 << phy_id)))
+				continue;
+			if (fep->mii_bus->phy_map[phy_id] == NULL)
+				continue;
+			if (fep->mii_bus->phy_map[phy_id]->phy_id == 0)
+				continue;
+			if (dev_id--)
+				continue;
+			strncpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE);
+			break;
+		}
 
-	if (phy_id >= PHY_MAX_ADDR) {
-		netdev_info(ndev, "no PHY, assuming direct connection to switch\n");
-		strncpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE);
-		phy_id = 0;
+		if (phy_id >= PHY_MAX_ADDR) {
+			netdev_info(ndev, "no PHY, assuming direct connection to switch\n");
+			strncpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE);
+			phy_id = 0;
+		}
+
+		snprintf(phy_name, sizeof(phy_name),
+			 PHY_ID_FMT, mdio_bus_id, phy_id);
+		phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link,
+				      fep->phy_interface);
 	}
 
-	snprintf(phy_name, sizeof(phy_name), PHY_ID_FMT, mdio_bus_id, phy_id);
-	phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link,
-			      fep->phy_interface);
 	if (IS_ERR(phy_dev)) {
 		netdev_err(ndev, "could not attach to PHY\n");
 		return PTR_ERR(phy_dev);
@@ -1707,6 +1716,7 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	const struct platform_device_id *id_entry =
 				platform_get_device_id(fep->pdev);
+	struct device_node *node;
 	int err = -ENXIO, i;
 
 	/*
@@ -1774,7 +1784,15 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 	for (i = 0; i < PHY_MAX_ADDR; i++)
 		fep->mii_bus->irq[i] = PHY_POLL;
 
-	if (mdiobus_register(fep->mii_bus))
+	node = of_get_child_by_name(pdev->dev.of_node, "mdio");
+	if (node) {
+		err = of_mdiobus_register(fep->mii_bus, node);
+		of_node_put(node);
+	} else {
+		err = mdiobus_register(fep->mii_bus);
+	}
+
+	if (err)
 		goto err_out_free_mdio_irq;
 
 	mii_cnt++;
@@ -2527,6 +2545,7 @@ fec_probe(struct platform_device *pdev)
 	struct resource *r;
 	const struct of_device_id *of_id;
 	static int dev_id;
+	struct device_node *np = pdev->dev.of_node, *phy_node;
 
 	of_id = of_match_device(fec_dt_ids, &pdev->dev);
 	if (of_id)
@@ -2566,6 +2585,18 @@ fec_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ndev);
 
+	phy_node = of_parse_phandle(np, "phy-handle", 0);
+	if (!phy_node && of_phy_is_fixed_link(np)) {
+		ret = of_phy_register_fixed_link(np);
+		if (ret < 0) {
+			dev_err(&pdev->dev,
+				"broken fixed-link specification\n");
+			goto failed_phy;
+		}
+		phy_node = of_node_get(np);
+	}
+	fep->phy_node = phy_node;
+
 	ret = of_get_phy_mode(pdev->dev.of_node);
 	if (ret < 0) {
 		pdata = dev_get_platdata(&pdev->dev);
@@ -2670,6 +2701,8 @@ failed_init:
 failed_regulator:
 	fec_enet_clk_enable(ndev, false);
 failed_clk:
+failed_phy:
+	of_node_put(phy_node);
 failed_ioremap:
 	free_netdev(ndev);
 
@@ -2691,6 +2724,7 @@ fec_drv_remove(struct platform_device *pdev)
 	if (fep->ptp_clock)
 		ptp_clock_unregister(fep->ptp_clock);
 	fec_enet_clk_enable(ndev, false);
+	of_node_put(fep->phy_node);
 	free_netdev(ndev);
 
 	return 0;
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index 9947765..ff55fbb 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -1015,8 +1015,7 @@ mpc52xx_fec_remove(struct platform_device *op)
 
 	unregister_netdev(ndev);
 
-	if (priv->phy_node)
-		of_node_put(priv->phy_node);
+	of_node_put(priv->phy_node);
 	priv->phy_node = NULL;
 
 	irq_dispose_mapping(ndev->irq);
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index cfaf17b..748fd24 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -1033,7 +1033,7 @@ static int fs_enet_probe(struct platform_device *ofdev)
 		/* In the case of a fixed PHY, the DT node associated
 		 * to the PHY is the Ethernet MAC DT node.
 		 */
-		fpi->phy_node = ofdev->dev.of_node;
+		fpi->phy_node = of_node_get(ofdev->dev.of_node);
 	}
 
 	if (of_device_is_compatible(ofdev->dev.of_node, "fsl,mpc5125-fec")) {
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index a6cf40e..fb29d04 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -892,12 +892,12 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 	/* In the case of a fixed PHY, the DT node associated
 	 * to the PHY is the Ethernet MAC DT node.
 	 */
-	if (of_phy_is_fixed_link(np)) {
+	if (!priv->phy_node && of_phy_is_fixed_link(np)) {
 		err = of_phy_register_fixed_link(np);
 		if (err)
 			goto err_grp_init;
 
-		priv->phy_node = np;
+		priv->phy_node = of_node_get(np);
 	}
 
 	/* Find the TBI PHY.  If it's not there, we don't support SGMII */
@@ -1435,10 +1435,8 @@ register_fail:
 	unmap_group_regs(priv);
 	gfar_free_rx_queues(priv);
 	gfar_free_tx_queues(priv);
-	if (priv->phy_node)
-		of_node_put(priv->phy_node);
-	if (priv->tbi_node)
-		of_node_put(priv->tbi_node);
+	of_node_put(priv->phy_node);
+	of_node_put(priv->tbi_node);
 	free_gfar_dev(priv);
 	return err;
 }
@@ -1447,10 +1445,8 @@ static int gfar_remove(struct platform_device *ofdev)
 {
 	struct gfar_private *priv = platform_get_drvdata(ofdev);
 
-	if (priv->phy_node)
-		of_node_put(priv->phy_node);
-	if (priv->tbi_node)
-		of_node_put(priv->tbi_node);
+	of_node_put(priv->phy_node);
+	of_node_put(priv->tbi_node);
 
 	unregister_netdev(priv->ndev);
 	unmap_group_regs(priv);
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 8ceaf7a..3cf0478 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3785,16 +3785,15 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 	ug_info->uf_info.irq = irq_of_parse_and_map(np, 0);
 
 	ug_info->phy_node = of_parse_phandle(np, "phy-handle", 0);
-	if (!ug_info->phy_node) {
-		/* In the case of a fixed PHY, the DT node associated
+	if (!ug_info->phy_node && of_phy_is_fixed_link(np)) {
+		/*
+		 * In the case of a fixed PHY, the DT node associated
 		 * to the PHY is the Ethernet MAC DT node.
 		 */
-		if (of_phy_is_fixed_link(np)) {
-			err = of_phy_register_fixed_link(np);
-			if (err)
-				return err;
-		}
-		ug_info->phy_node = np;
+		err = of_phy_register_fixed_link(np);
+		if (err)
+			return err;
+		ug_info->phy_node = of_node_get(np);
 	}
 
 	/* Find the TBI PHY node.  If it's not there, we don't support SGMII */
@@ -3862,8 +3861,11 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 	/* Create an ethernet device instance */
 	dev = alloc_etherdev(sizeof(*ugeth));
 
-	if (dev == NULL)
+	if (dev == NULL) {
+		of_node_put(ug_info->tbi_node);
+		of_node_put(ug_info->phy_node);
 		return -ENOMEM;
+	}
 
 	ugeth = netdev_priv(dev);
 	spin_lock_init(&ugeth->lock);
@@ -3897,6 +3899,8 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 			pr_err("%s: Cannot register net device, aborting\n",
 			       dev->name);
 		free_netdev(dev);
+		of_node_put(ug_info->tbi_node);
+		of_node_put(ug_info->phy_node);
 		return err;
 	}
 
@@ -3920,6 +3924,8 @@ static int ucc_geth_remove(struct platform_device* ofdev)
 	unregister_netdev(dev);
 	free_netdev(dev);
 	ucc_geth_memclean(ugeth);
+	of_node_put(ugeth->ug_info->tbi_node);
+	of_node_put(ugeth->ug_info->phy_node);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index cfe7a74..a7139f5 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -99,23 +99,23 @@ static const struct ethtool_ops netdev_ethtool_ops;
 /*
     card type
  */
-typedef enum { MBH10302, MBH10304, TDK, CONTEC, LA501, UNGERMANN, 
+enum cardtype { MBH10302, MBH10304, TDK, CONTEC, LA501, UNGERMANN,
 	       XXX10304, NEC, KME
-} cardtype_t;
+};
 
 /*
     driver specific data structure
 */
-typedef struct local_info_t {
+struct local_info {
 	struct pcmcia_device	*p_dev;
     long open_time;
     uint tx_started:1;
     uint tx_queue;
     u_short tx_queue_len;
-    cardtype_t cardtype;
+    enum cardtype cardtype;
     u_short sent;
     u_char __iomem *base;
-} local_info_t;
+};
 
 #define MC_FILTERBREAK 64
 
@@ -232,13 +232,13 @@ static const struct net_device_ops fjn_netdev_ops = {
 
 static int fmvj18x_probe(struct pcmcia_device *link)
 {
-    local_info_t *lp;
+    struct local_info *lp;
     struct net_device *dev;
 
     dev_dbg(&link->dev, "fmvj18x_attach()\n");
 
     /* Make up a FMVJ18x specific data structure */
-    dev = alloc_etherdev(sizeof(local_info_t));
+    dev = alloc_etherdev(sizeof(struct local_info));
     if (!dev)
 	return -ENOMEM;
     lp = netdev_priv(dev);
@@ -327,10 +327,10 @@ static int fmvj18x_ioprobe(struct pcmcia_device *p_dev, void *priv_data)
 static int fmvj18x_config(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
-    local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     int i, ret;
     unsigned int ioaddr;
-    cardtype_t cardtype;
+    enum cardtype cardtype;
     char *card_name = "unknown";
     u8 *buf;
     size_t len;
@@ -584,7 +584,7 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link)
     int i;
     struct net_device *dev = link->priv;
     unsigned int ioaddr;
-    local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
 
     /* Allocate a small memory window */
     link->resource[3]->flags = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE;
@@ -626,7 +626,7 @@ static void fmvj18x_release(struct pcmcia_device *link)
 {
 
     struct net_device *dev = link->priv;
-    local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     u_char __iomem *tmp;
 
     dev_dbg(&link->dev, "fmvj18x_release\n");
@@ -711,7 +711,7 @@ module_pcmcia_driver(fmvj18x_cs_driver);
 static irqreturn_t fjn_interrupt(int dummy, void *dev_id)
 {
     struct net_device *dev = dev_id;
-    local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     unsigned int ioaddr;
     unsigned short tx_stat, rx_stat;
 
@@ -772,7 +772,7 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id)
 
 static void fjn_tx_timeout(struct net_device *dev)
 {
-    struct local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     unsigned int ioaddr = dev->base_addr;
 
     netdev_notice(dev, "transmit timed out with status %04x, %s?\n",
@@ -802,7 +802,7 @@ static void fjn_tx_timeout(struct net_device *dev)
 static netdev_tx_t fjn_start_xmit(struct sk_buff *skb,
 					struct net_device *dev)
 {
-    struct local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     unsigned int ioaddr = dev->base_addr;
     short length = skb->len;
     
@@ -874,7 +874,7 @@ static netdev_tx_t fjn_start_xmit(struct sk_buff *skb,
 
 static void fjn_reset(struct net_device *dev)
 {
-    struct local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     unsigned int ioaddr = dev->base_addr;
     int i;
 
@@ -1058,7 +1058,7 @@ static int fjn_config(struct net_device *dev, struct ifmap *map){
 
 static int fjn_open(struct net_device *dev)
 {
-    struct local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     struct pcmcia_device *link = lp->p_dev;
 
     pr_debug("fjn_open('%s').\n", dev->name);
@@ -1083,7 +1083,7 @@ static int fjn_open(struct net_device *dev)
 
 static int fjn_close(struct net_device *dev)
 {
-    struct local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     struct pcmcia_device *link = lp->p_dev;
     unsigned int ioaddr = dev->base_addr;
 
diff --git a/drivers/net/ethernet/ibm/ehea/Makefile b/drivers/net/ethernet/ibm/ehea/Makefile
index 775d996..cd473e2 100644
--- a/drivers/net/ethernet/ibm/ehea/Makefile
+++ b/drivers/net/ethernet/ibm/ehea/Makefile
@@ -1,6 +1,6 @@
 #
 # Makefile for the eHEA ethernet device driver for IBM eServer System p
 #
-ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o ehea_phyp.o
+ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o
 obj-$(CONFIG_EHEA) += ehea.o
 
diff --git a/drivers/net/ethernet/intel/e1000e/manage.c b/drivers/net/ethernet/intel/e1000e/manage.c
index 5885603..06edfca 100644
--- a/drivers/net/ethernet/intel/e1000e/manage.c
+++ b/drivers/net/ethernet/intel/e1000e/manage.c
@@ -47,7 +47,7 @@ static u8 e1000_calculate_checksum(u8 *buffer, u32 length)
  *  e1000_mng_enable_host_if - Checks host interface is enabled
  *  @hw: pointer to the HW structure
  *
- *  Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND
+ *  Returns 0 upon success, else -E1000_ERR_HOST_INTERFACE_COMMAND
  *
  *  This function checks whether the HOST IF is enabled for command operation
  *  and also checks whether the previous command is completed.  It busy waits
@@ -78,7 +78,7 @@ static s32 e1000_mng_enable_host_if(struct e1000_hw *hw)
 	}
 
 	if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) {
-		e_dbg("Previous command timeout failed .\n");
+		e_dbg("Previous command timeout failed.\n");
 		return -E1000_ERR_HOST_INTERFACE_COMMAND;
 	}
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
index 6938fc1a..5d01db1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
@@ -33,6 +33,7 @@
 #include <scsi/fc/fc_fcoe.h>
 #include <scsi/libfc.h>
 #include <scsi/libfcoe.h>
+#include <uapi/linux/dcbnl.h>
 
 #include "i40e.h"
 #include "i40e_fcoe.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index d9f4972..eddec6b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4415,13 +4415,13 @@ static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 
 	switch (vsi->back->hw.phy.link_info.link_speed) {
 	case I40E_LINK_SPEED_40GB:
-		strncpy(speed, "40 Gbps", SPEED_SIZE);
+		strlcpy(speed, "40 Gbps", SPEED_SIZE);
 		break;
 	case I40E_LINK_SPEED_10GB:
-		strncpy(speed, "10 Gbps", SPEED_SIZE);
+		strlcpy(speed, "10 Gbps", SPEED_SIZE);
 		break;
 	case I40E_LINK_SPEED_1GB:
-		strncpy(speed, "1000 Mbps", SPEED_SIZE);
+		strlcpy(speed, "1000 Mbps", SPEED_SIZE);
 		break;
 	default:
 		break;
@@ -4429,16 +4429,16 @@ static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 
 	switch (vsi->back->hw.fc.current_mode) {
 	case I40E_FC_FULL:
-		strncpy(fc, "RX/TX", FC_SIZE);
+		strlcpy(fc, "RX/TX", FC_SIZE);
 		break;
 	case I40E_FC_TX_PAUSE:
-		strncpy(fc, "TX", FC_SIZE);
+		strlcpy(fc, "TX", FC_SIZE);
 		break;
 	case I40E_FC_RX_PAUSE:
-		strncpy(fc, "RX", FC_SIZE);
+		strlcpy(fc, "RX", FC_SIZE);
 		break;
 	default:
-		strncpy(fc, "None", FC_SIZE);
+		strlcpy(fc, "None", FC_SIZE);
 		break;
 	}
 
@@ -5839,7 +5839,7 @@ static void i40e_send_version(struct i40e_pf *pf)
 	dv.minor_version = DRV_VERSION_MINOR;
 	dv.build_version = DRV_VERSION_BUILD;
 	dv.subbuild_version = 0;
-	strncpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string));
+	strlcpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string));
 	i40e_aq_send_driver_version(&pf->hw, &dv, NULL);
 }
 
@@ -6293,7 +6293,7 @@ static int i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors)
 
 	if (alloc_qvectors) {
 		/* allocate memory for q_vector pointers */
-		size = sizeof(struct i40e_q_vectors *) * vsi->num_q_vectors;
+		size = sizeof(struct i40e_q_vector *) * vsi->num_q_vectors;
 		vsi->q_vectors = kzalloc(size, GFP_KERNEL);
 		if (!vsi->q_vectors) {
 			ret = -ENOMEM;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 97bda3d..25c4f9a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -251,9 +251,9 @@ i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
  *
  * Writes a 16 bit words buffer to the Shadow RAM using the admin command.
  **/
-i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
-			      u32 offset, u16 words, void *data,
-			      bool last_command)
+static i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
+				     u32 offset, u16 words, void *data,
+				     bool last_command)
 {
 	i40e_status ret_code = I40E_ERR_NVM;
 
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index dadd9a5..c9f1d1b 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2969,14 +2969,14 @@ static int mvneta_probe(struct platform_device *pdev)
 		/* In the case of a fixed PHY, the DT node associated
 		 * to the PHY is the Ethernet MAC DT node.
 		 */
-		phy_node = dn;
+		phy_node = of_node_get(dn);
 	}
 
 	phy_mode = of_get_phy_mode(dn);
 	if (phy_mode < 0) {
 		dev_err(&pdev->dev, "incorrect phy-mode\n");
 		err = -EINVAL;
-		goto err_free_irq;
+		goto err_put_phy_node;
 	}
 
 	dev->tx_queue_len = MVNETA_MAX_TXD;
@@ -2992,7 +2992,7 @@ static int mvneta_probe(struct platform_device *pdev)
 	pp->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pp->clk)) {
 		err = PTR_ERR(pp->clk);
-		goto err_free_irq;
+		goto err_put_phy_node;
 	}
 
 	clk_prepare_enable(pp->clk);
@@ -3071,6 +3071,8 @@ err_free_stats:
 	free_percpu(pp->stats);
 err_clk:
 	clk_disable_unprepare(pp->clk);
+err_put_phy_node:
+	of_node_put(phy_node);
 err_free_irq:
 	irq_dispose_mapping(dev->irq);
 err_free_netdev:
@@ -3088,6 +3090,7 @@ static int mvneta_remove(struct platform_device *pdev)
 	clk_disable_unprepare(pp->clk);
 	free_percpu(pp->stats);
 	irq_dispose_mapping(dev->irq);
+	of_node_put(pp->phy_node);
 	free_netdev(dev);
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 5d940a2..65a4a0f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1311,6 +1311,15 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.wrapper = mlx4_MAD_IFC_wrapper
 	},
 	{
+		.opcode = MLX4_CMD_MAD_DEMUX,
+		.has_inbox = false,
+		.has_outbox = false,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_CMD_EPERM_wrapper
+	},
+	{
 		.opcode = MLX4_CMD_QUERY_IF_STAT,
 		.has_inbox = false,
 		.has_outbox = true,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 688e1ea..494753e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -136,7 +136,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[7] = "FSM (MAC anti-spoofing) support",
 		[8] = "Dynamic QP updates support",
 		[9] = "Device managed flow steering IPoIB support",
-		[10] = "TCP/IP offloads/flow-steering for VXLAN support"
+		[10] = "TCP/IP offloads/flow-steering for VXLAN support",
+		[11] = "MAD DEMUX (Secure-Host) support"
 	};
 	int i;
 
@@ -571,6 +572,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET		0xa0
 #define QUERY_DEV_CAP_FW_REASSIGN_MAC		0x9d
 #define QUERY_DEV_CAP_VXLAN			0x9e
+#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET		0xb0
 
 	dev_cap->flags2 = 0;
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -748,6 +750,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		MLX4_GET(dev_cap->max_counters, outbox,
 			 QUERY_DEV_CAP_MAX_COUNTERS_OFFSET);
 
+	MLX4_GET(field32, outbox,
+		 QUERY_DEV_CAP_MAD_DEMUX_OFFSET);
+	if (field32 & (1 << 0))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX;
+
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 	if (field32 & (1 << 16))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
@@ -2016,3 +2023,85 @@ void mlx4_opreq_action(struct work_struct *work)
 out:
 	mlx4_free_cmd_mailbox(dev, mailbox);
 }
+
+static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev,
+					  struct mlx4_cmd_mailbox *mailbox)
+{
+#define MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET		0x10
+#define MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET		0x20
+#define MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET		0x40
+#define MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET	0x70
+
+	u32 set_attr_mask, getresp_attr_mask;
+	u32 trap_attr_mask, traprepress_attr_mask;
+
+	MLX4_GET(set_attr_mask, mailbox->buf,
+		 MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET);
+	mlx4_dbg(dev, "SMP firewall set_attribute_mask = 0x%x\n",
+		 set_attr_mask);
+
+	MLX4_GET(getresp_attr_mask, mailbox->buf,
+		 MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET);
+	mlx4_dbg(dev, "SMP firewall getresp_attribute_mask = 0x%x\n",
+		 getresp_attr_mask);
+
+	MLX4_GET(trap_attr_mask, mailbox->buf,
+		 MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET);
+	mlx4_dbg(dev, "SMP firewall trap_attribute_mask = 0x%x\n",
+		 trap_attr_mask);
+
+	MLX4_GET(traprepress_attr_mask, mailbox->buf,
+		 MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET);
+	mlx4_dbg(dev, "SMP firewall traprepress_attribute_mask = 0x%x\n",
+		 traprepress_attr_mask);
+
+	if (set_attr_mask && getresp_attr_mask && trap_attr_mask &&
+	    traprepress_attr_mask)
+		return 1;
+
+	return 0;
+}
+
+int mlx4_config_mad_demux(struct mlx4_dev *dev)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	int secure_host_active;
+	int err;
+
+	/* Check if mad_demux is supported */
+	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_MAD_DEMUX))
+		return 0;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox)) {
+		mlx4_warn(dev, "Failed to allocate mailbox for cmd MAD_DEMUX");
+		return -ENOMEM;
+	}
+
+	/* Query mad_demux to find out which MADs are handled by internal sma */
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0x01 /* subn mgmt class */,
+			   MLX4_CMD_MAD_DEMUX_QUERY_RESTR, MLX4_CMD_MAD_DEMUX,
+			   MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+	if (err) {
+		mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: query restrictions failed (%d)\n",
+			  err);
+		goto out;
+	}
+
+	secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox);
+
+	/* Config mad_demux to handle all MADs returned by the query above */
+	err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */,
+		       MLX4_CMD_MAD_DEMUX_CONFIG, MLX4_CMD_MAD_DEMUX,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+	if (err) {
+		mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: configure failed (%d)\n", err);
+		goto out;
+	}
+
+	if (secure_host_active)
+		mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n");
+out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 110e020..7e2d5d5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1853,6 +1853,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 			mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
 			goto err_mr_table_free;
 		}
+		err = mlx4_config_mad_demux(dev);
+		if (err) {
+			mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
+			goto err_mcg_table_free;
+		}
 	}
 
 	err = mlx4_init_eq_table(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 13fbcd0..b508c78 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -274,6 +274,8 @@ struct mlx4_icm_table {
 #define MLX4_MPT_FLAG_PHYSICAL	    (1 <<  9)
 #define MLX4_MPT_FLAG_REGION	    (1 <<  8)
 
+#define MLX4_MPT_PD_MASK	    (0x1FFFFUL)
+#define MLX4_MPT_PD_VF_MASK	    (0xFE0000UL)
 #define MLX4_MPT_PD_FLAG_FAST_REG   (1 << 27)
 #define MLX4_MPT_PD_FLAG_RAE	    (1 << 28)
 #define MLX4_MPT_PD_FLAG_EN_INV	    (3 << 24)
@@ -1306,5 +1308,6 @@ void mlx4_init_quotas(struct mlx4_dev *dev);
 int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
 /* Returns the VF index of slave */
 int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
+int mlx4_config_mad_demux(struct mlx4_dev *dev);
 
 #endif /* MLX4_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 2839abb..7d717ec 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -298,6 +298,131 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
 			    MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 }
 
+int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+		       struct mlx4_mpt_entry ***mpt_entry)
+{
+	int err;
+	int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
+	struct mlx4_cmd_mailbox *mailbox = NULL;
+
+	/* Make sure that at this point we have single-threaded access only */
+
+	if (mmr->enabled != MLX4_MPT_EN_HW)
+		return -EINVAL;
+
+	err = mlx4_HW2SW_MPT(dev, NULL, key);
+
+	if (err) {
+		mlx4_warn(dev, "HW2SW_MPT failed (%d).", err);
+		mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n");
+		return err;
+	}
+
+	mmr->enabled = MLX4_MPT_EN_SW;
+
+	if (!mlx4_is_mfunc(dev)) {
+		**mpt_entry = mlx4_table_find(
+				&mlx4_priv(dev)->mr_table.dmpt_table,
+				key, NULL);
+	} else {
+		mailbox = mlx4_alloc_cmd_mailbox(dev);
+		if (IS_ERR_OR_NULL(mailbox))
+			return PTR_ERR(mailbox);
+
+		err = mlx4_cmd_box(dev, 0, mailbox->dma, key,
+				   0, MLX4_CMD_QUERY_MPT,
+				   MLX4_CMD_TIME_CLASS_B,
+				   MLX4_CMD_WRAPPED);
+
+		if (err)
+			goto free_mailbox;
+
+		*mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf;
+	}
+
+	if (!(*mpt_entry) || !(**mpt_entry)) {
+		err = -ENOMEM;
+		goto free_mailbox;
+	}
+
+	return 0;
+
+free_mailbox:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt);
+
+int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
+			 struct mlx4_mpt_entry **mpt_entry)
+{
+	int err;
+
+	if (!mlx4_is_mfunc(dev)) {
+		/* Make sure any changes to this entry are flushed */
+		wmb();
+
+		*(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW;
+
+		/* Make sure the new status is written */
+		wmb();
+
+		err = mlx4_SYNC_TPT(dev);
+	} else {
+		int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
+
+		struct mlx4_cmd_mailbox *mailbox =
+			container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
+				     buf);
+
+		err = mlx4_SW2HW_MPT(dev, mailbox, key);
+	}
+
+	mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK;
+	if (!err)
+		mmr->enabled = MLX4_MPT_EN_HW;
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt);
+
+void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
+			struct mlx4_mpt_entry **mpt_entry)
+{
+	if (mlx4_is_mfunc(dev)) {
+		struct mlx4_cmd_mailbox *mailbox =
+			container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
+				     buf);
+		mlx4_free_cmd_mailbox(dev, mailbox);
+	}
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt);
+
+int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
+			 u32 pdn)
+{
+	u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags);
+	/* The wrapper function will put the slave's id here */
+	if (mlx4_is_mfunc(dev))
+		pd_flags &= ~MLX4_MPT_PD_VF_MASK;
+	mpt_entry->pd_flags = cpu_to_be32((pd_flags &  ~MLX4_MPT_PD_MASK) |
+					  (pdn & MLX4_MPT_PD_MASK)
+					  | MLX4_MPT_PD_FLAG_EN_INV);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd);
+
+int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
+			     struct mlx4_mpt_entry *mpt_entry,
+			     u32 access)
+{
+	u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) |
+		    (access & MLX4_PERM_MASK);
+
+	mpt_entry->flags = cpu_to_be32(flags);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access);
+
 static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
 			   u64 iova, u64 size, u32 access, int npages,
 			   int page_shift, struct mlx4_mr *mr)
@@ -463,6 +588,41 @@ int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
 }
 EXPORT_SYMBOL_GPL(mlx4_mr_free);
 
+void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr)
+{
+	mlx4_mtt_cleanup(dev, &mr->mtt);
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup);
+
+int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
+			    u64 iova, u64 size, int npages,
+			    int page_shift, struct mlx4_mpt_entry *mpt_entry)
+{
+	int err;
+
+	mpt_entry->start       = cpu_to_be64(mr->iova);
+	mpt_entry->length      = cpu_to_be64(mr->size);
+	mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
+
+	err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
+	if (err)
+		return err;
+
+	if (mr->mtt.order < 0) {
+		mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
+		mpt_entry->mtt_addr = 0;
+	} else {
+		mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
+						  &mr->mtt));
+		if (mr->mtt.page_shift == 0)
+			mpt_entry->mtt_sz    = cpu_to_be32(1 << mr->mtt.order);
+	}
+	mr->enabled = MLX4_MPT_EN_SW;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write);
+
 int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 {
 	struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 0efc136..1089367 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2613,12 +2613,34 @@ int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
 	if (err)
 		return err;
 
-	if (mpt->com.from_state != RES_MPT_HW) {
+	if (mpt->com.from_state == RES_MPT_MAPPED) {
+		/* In order to allow rereg in SRIOV, we need to alter the MPT entry. To do
+		 * that, the VF must read the MPT. But since the MPT entry memory is not
+		 * in the VF's virtual memory space, it must use QUERY_MPT to obtain the
+		 * entry contents. To guarantee that the MPT cannot be changed, the driver
+		 * must perform HW2SW_MPT before this query and return the MPT entry to HW
+		 * ownership fofollowing the change. The change here allows the VF to
+		 * perform QUERY_MPT also when the entry is in SW ownership.
+		 */
+		struct mlx4_mpt_entry *mpt_entry = mlx4_table_find(
+					&mlx4_priv(dev)->mr_table.dmpt_table,
+					mpt->key, NULL);
+
+		if (NULL == mpt_entry || NULL == outbox->buf) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		memcpy(outbox->buf, mpt_entry, sizeof(*mpt_entry));
+
+		err = 0;
+	} else if (mpt->com.from_state == RES_MPT_HW) {
+		err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+	} else {
 		err = -EBUSY;
 		goto out;
 	}
 
-	err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
 
 out:
 	put_res(dev, slave, id, RES_MPT);
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 4421004..9e7e3f1 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -574,6 +574,7 @@ myri10ge_validate_firmware(struct myri10ge_priv *mgp,
 
 	/* save firmware version for ethtool */
 	strncpy(mgp->fw_version, hdr->version, sizeof(mgp->fw_version));
+	mgp->fw_version[sizeof(mgp->fw_version) - 1] = '\0';
 
 	sscanf(mgp->fw_version, "%d.%d.%d", &mgp->fw_ver_major,
 	       &mgp->fw_ver_minor, &mgp->fw_ver_tiny);
@@ -872,6 +873,10 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
 		return -ENOMEM;
 	dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE,
 				   DMA_BIDIRECTIONAL);
+	if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) {
+		__free_page(dmatest_page);
+		return -ENOMEM;
+	}
 
 	/* Run a small DMA test.
 	 * The magic multipliers to the length tell the firmware
@@ -1293,6 +1298,7 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
 			int bytes, int watchdog)
 {
 	struct page *page;
+	dma_addr_t bus;
 	int idx;
 #if MYRI10GE_ALLOC_SIZE > 4096
 	int end_offset;
@@ -1317,11 +1323,21 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
 					rx->watchdog_needed = 1;
 				return;
 			}
+
+			bus = pci_map_page(mgp->pdev, page, 0,
+					   MYRI10GE_ALLOC_SIZE,
+					   PCI_DMA_FROMDEVICE);
+			if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+				__free_pages(page, MYRI10GE_ALLOC_ORDER);
+				if (rx->fill_cnt - rx->cnt < 16)
+					rx->watchdog_needed = 1;
+				return;
+			}
+
 			rx->page = page;
 			rx->page_offset = 0;
-			rx->bus = pci_map_page(mgp->pdev, page, 0,
-					       MYRI10GE_ALLOC_SIZE,
-					       PCI_DMA_FROMDEVICE);
+			rx->bus = bus;
+
 		}
 		rx->info[idx].page = rx->page;
 		rx->info[idx].page_offset = rx->page_offset;
@@ -2763,6 +2779,35 @@ myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src,
 	mb();
 }
 
+static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp,
+				  struct myri10ge_tx_buf *tx, int idx)
+{
+	unsigned int len;
+	int last_idx;
+
+	/* Free any DMA resources we've alloced and clear out the skb slot */
+	last_idx = (idx + 1) & tx->mask;
+	idx = tx->req & tx->mask;
+	do {
+		len = dma_unmap_len(&tx->info[idx], len);
+		if (len) {
+			if (tx->info[idx].skb != NULL)
+				pci_unmap_single(mgp->pdev,
+						 dma_unmap_addr(&tx->info[idx],
+								bus), len,
+						 PCI_DMA_TODEVICE);
+			else
+				pci_unmap_page(mgp->pdev,
+					       dma_unmap_addr(&tx->info[idx],
+							      bus), len,
+					       PCI_DMA_TODEVICE);
+			dma_unmap_len_set(&tx->info[idx], len, 0);
+			tx->info[idx].skb = NULL;
+		}
+		idx = (idx + 1) & tx->mask;
+	} while (idx != last_idx);
+}
+
 /*
  * Transmit a packet.  We need to split the packet so that a single
  * segment does not cross myri10ge->tx_boundary, so this makes segment
@@ -2786,7 +2831,7 @@ static netdev_tx_t myri10ge_xmit(struct sk_buff *skb,
 	u32 low;
 	__be32 high_swapped;
 	unsigned int len;
-	int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
+	int idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
 	u16 pseudo_hdr_offset, cksum_offset, queue;
 	int cum_len, seglen, boundary, rdma_count;
 	u8 flags, odd_flag;
@@ -2883,9 +2928,12 @@ again:
 
 	/* map the skb for DMA */
 	len = skb_headlen(skb);
+	bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
+	if (unlikely(pci_dma_mapping_error(mgp->pdev, bus)))
+		goto drop;
+
 	idx = tx->req & tx->mask;
 	tx->info[idx].skb = skb;
-	bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
 	dma_unmap_addr_set(&tx->info[idx], bus, bus);
 	dma_unmap_len_set(&tx->info[idx], len, len);
 
@@ -2984,12 +3032,16 @@ again:
 			break;
 
 		/* map next fragment for DMA */
-		idx = (count + tx->req) & tx->mask;
 		frag = &skb_shinfo(skb)->frags[frag_idx];
 		frag_idx++;
 		len = skb_frag_size(frag);
 		bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len,
 				       DMA_TO_DEVICE);
+		if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+			myri10ge_unmap_tx_dma(mgp, tx, idx);
+			goto drop;
+		}
+		idx = (count + tx->req) & tx->mask;
 		dma_unmap_addr_set(&tx->info[idx], bus, bus);
 		dma_unmap_len_set(&tx->info[idx], len, len);
 	}
@@ -3020,31 +3072,8 @@ again:
 	return NETDEV_TX_OK;
 
 abort_linearize:
-	/* Free any DMA resources we've alloced and clear out the skb
-	 * slot so as to not trip up assertions, and to avoid a
-	 * double-free if linearizing fails */
+	myri10ge_unmap_tx_dma(mgp, tx, idx);
 
-	last_idx = (idx + 1) & tx->mask;
-	idx = tx->req & tx->mask;
-	tx->info[idx].skb = NULL;
-	do {
-		len = dma_unmap_len(&tx->info[idx], len);
-		if (len) {
-			if (tx->info[idx].skb != NULL)
-				pci_unmap_single(mgp->pdev,
-						 dma_unmap_addr(&tx->info[idx],
-								bus), len,
-						 PCI_DMA_TODEVICE);
-			else
-				pci_unmap_page(mgp->pdev,
-					       dma_unmap_addr(&tx->info[idx],
-							      bus), len,
-					       PCI_DMA_TODEVICE);
-			dma_unmap_len_set(&tx->info[idx], len, 0);
-			tx->info[idx].skb = NULL;
-		}
-		idx = (idx + 1) & tx->mask;
-	} while (idx != last_idx);
 	if (skb_is_gso(skb)) {
 		netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n");
 		goto drop;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/Makefile b/drivers/net/ethernet/qlogic/qlcnic/Makefile
index a848d29..3c2c2c7 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/Makefile
+++ b/drivers/net/ethernet/qlogic/qlcnic/Makefile
@@ -8,7 +8,7 @@ qlcnic-y := qlcnic_hw.o qlcnic_main.o qlcnic_init.o \
 	qlcnic_ethtool.o qlcnic_ctx.o qlcnic_io.o \
 	qlcnic_sysfs.o qlcnic_minidump.o qlcnic_83xx_hw.o \
 	qlcnic_83xx_init.o qlcnic_83xx_vnic.o \
-	qlcnic_minidump.o qlcnic_sriov_common.o
+	qlcnic_sriov_common.o
 
 qlcnic-$(CONFIG_QLCNIC_SRIOV) += qlcnic_sriov_pf.o
 
diff --git a/drivers/net/ethernet/smsc/smsc911x.h b/drivers/net/ethernet/smsc/smsc911x.h
index 2395395..54d6489 100644
--- a/drivers/net/ethernet/smsc/smsc911x.h
+++ b/drivers/net/ethernet/smsc/smsc911x.h
@@ -51,7 +51,7 @@
 
 #ifdef CONFIG_DEBUG_SPINLOCK
 #define SMSC_ASSERT_MAC_LOCK(pdata) \
-		WARN_ON(!spin_is_locked(&pdata->mac_lock))
+		WARN_ON_SMP(!spin_is_locked(&pdata->mac_lock))
 #else
 #define SMSC_ASSERT_MAC_LOCK(pdata) do {} while (0)
 #endif				/* CONFIG_DEBUG_SPINLOCK */
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index d813bfb..23c89ab 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -32,6 +32,11 @@ MODULE_DESCRIPTION("Sun LDOM virtual network driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
 
+/* Heuristic for the number of times to exponentially backoff and
+ * retry sending an LDC trigger when EAGAIN is encountered
+ */
+#define	VNET_MAX_RETRIES	10
+
 /* Ordered from largest major to lowest */
 static struct vio_version vnet_versions[] = {
 	{ .major = 1, .minor = 0 },
@@ -260,6 +265,7 @@ static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
 		.state			= vio_dring_state,
 	};
 	int err, delay;
+	int retries = 0;
 
 	hdr.seq = dr->snd_nxt;
 	delay = 1;
@@ -272,6 +278,13 @@ static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
 		udelay(delay);
 		if ((delay <<= 1) > 128)
 			delay = 128;
+		if (retries++ > VNET_MAX_RETRIES) {
+			pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
+				port->raddr[0], port->raddr[1],
+				port->raddr[2], port->raddr[3],
+				port->raddr[4], port->raddr[5]);
+			err = -ECONNRESET;
+		}
 	} while (err == -EAGAIN);
 
 	return err;
@@ -475,8 +488,9 @@ static int handle_mcast(struct vnet_port *port, void *msgbuf)
 	return 0;
 }
 
-static void maybe_tx_wakeup(struct vnet *vp)
+static void maybe_tx_wakeup(unsigned long param)
 {
+	struct vnet *vp = (struct vnet *)param;
 	struct net_device *dev = vp->dev;
 
 	netif_tx_lock(dev);
@@ -573,8 +587,13 @@ static void vnet_event(void *arg, int event)
 			break;
 	}
 	spin_unlock(&vio->lock);
+	/* Kick off a tasklet to wake the queue.  We cannot call
+	 * maybe_tx_wakeup directly here because we could deadlock on
+	 * netif_tx_lock() with dev_watchdog()
+	 */
 	if (unlikely(tx_wakeup && err != -ECONNRESET))
-		maybe_tx_wakeup(port->vp);
+		tasklet_schedule(&port->vp->vnet_tx_wakeup);
+
 	local_irq_restore(flags);
 }
 
@@ -593,6 +612,7 @@ static int __vnet_tx_trigger(struct vnet_port *port)
 		.end_idx		= (u32) -1,
 	};
 	int err, delay;
+	int retries = 0;
 
 	hdr.seq = dr->snd_nxt;
 	delay = 1;
@@ -605,6 +625,8 @@ static int __vnet_tx_trigger(struct vnet_port *port)
 		udelay(delay);
 		if ((delay <<= 1) > 128)
 			delay = 128;
+		if (retries++ > VNET_MAX_RETRIES)
+			break;
 	} while (err == -EAGAIN);
 
 	return err;
@@ -691,7 +713,15 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		memset(tx_buf+VNET_PACKET_SKIP+skb->len, 0, len - skb->len);
 	}
 
-	d->hdr.ack = VIO_ACK_ENABLE;
+	/* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
+	 * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
+	 * the protocol itself does not require it as long as the peer
+	 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
+	 *
+	 * An ACK for every packet in the ring is expensive as the
+	 * sending of LDC messages is slow and affects performance.
+	 */
+	d->hdr.ack = VIO_ACK_DISABLE;
 	d->size = len;
 	d->ncookies = port->tx_bufs[dr->prod].ncookies;
 	for (i = 0; i < d->ncookies; i++)
@@ -1046,6 +1076,7 @@ static struct vnet *vnet_new(const u64 *local_mac)
 	vp = netdev_priv(dev);
 
 	spin_lock_init(&vp->lock);
+	tasklet_init(&vp->vnet_tx_wakeup, maybe_tx_wakeup, (unsigned long)vp);
 	vp->dev = dev;
 
 	INIT_LIST_HEAD(&vp->port_list);
@@ -1105,6 +1136,7 @@ static void vnet_cleanup(void)
 		vp = list_first_entry(&vnet_list, struct vnet, list);
 		list_del(&vp->list);
 		dev = vp->dev;
+		tasklet_kill(&vp->vnet_tx_wakeup);
 		/* vio_unregister_driver() should have cleaned up port_list */
 		BUG_ON(!list_empty(&vp->port_list));
 		unregister_netdev(dev);
diff --git a/drivers/net/ethernet/sun/sunvnet.h b/drivers/net/ethernet/sun/sunvnet.h
index d347a5b..de5c2c6 100644
--- a/drivers/net/ethernet/sun/sunvnet.h
+++ b/drivers/net/ethernet/sun/sunvnet.h
@@ -1,6 +1,8 @@
 #ifndef _SUNVNET_H
 #define _SUNVNET_H
 
+#include <linux/interrupt.h>
+
 #define DESC_NCOOKIES(entry_size)	\
 	((entry_size) - sizeof(struct vio_net_desc))
 
@@ -78,6 +80,8 @@ struct vnet {
 
 	struct list_head	list;
 	u64			local_mac;
+
+	struct tasklet_struct	vnet_tx_wakeup;
 };
 
 #endif /* _SUNVNET_H */
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 3809f4e..f9bcf7a 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -1130,6 +1130,7 @@ static int cpmac_probe(struct platform_device *pdev)
 		strncpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE); /* fixed phys bus */
 		phy_id = pdev->id;
 	}
+	mdio_bus_id[sizeof(mdio_bus_id) - 1] = '\0';
 
 	dev = alloc_etherdev_mq(sizeof(*priv), CPMAC_QUEUES);
 	if (!dev)
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 8a6e5c2..fda5891 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1148,8 +1148,7 @@ static int temac_of_remove(struct platform_device *op)
 	temac_mdio_teardown(lp);
 	unregister_netdev(ndev);
 	sysfs_remove_group(&lp->dev->kobj, &temac_attr_group);
-	if (lp->phy_node)
-		of_node_put(lp->phy_node);
+	of_node_put(lp->phy_node);
 	lp->phy_node = NULL;
 	iounmap(lp->regs);
 	if (lp->sdma_regs)
@@ -1171,7 +1170,6 @@ static struct platform_driver temac_of_driver = {
 	.probe = temac_of_probe,
 	.remove = temac_of_remove,
 	.driver = {
-		.owner = THIS_MODULE,
 		.name = "xilinx_temac",
 		.of_match_table = temac_of_match,
 	},
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 7b0a735..c8fd941 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1630,8 +1630,7 @@ static int axienet_of_remove(struct platform_device *op)
 	axienet_mdio_teardown(lp);
 	unregister_netdev(ndev);
 
-	if (lp->phy_node)
-		of_node_put(lp->phy_node);
+	of_node_put(lp->phy_node);
 	lp->phy_node = NULL;
 
 	iounmap(lp->regs);
@@ -1646,7 +1645,6 @@ static struct platform_driver axienet_of_driver = {
 	.probe = axienet_of_probe,
 	.remove = axienet_of_remove,
 	.driver = {
-		 .owner = THIS_MODULE,
 		 .name = "xilinx_axienet",
 		 .of_match_table = axienet_of_match,
 	},
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 782bb93..28dbbdc 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1245,7 +1245,6 @@ MODULE_DEVICE_TABLE(of, xemaclite_of_match);
 static struct platform_driver xemaclite_of_driver = {
 	.driver = {
 		.name = DRIVER_NAME,
-		.owner = THIS_MODULE,
 		.of_match_table = xemaclite_of_match,
 	},
 	.probe		= xemaclite_of_probe,
diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index 7c81ffb..d56f869 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -266,7 +266,7 @@ static void xirc2ps_detach(struct pcmcia_device *p_dev);
 
 static irqreturn_t xirc2ps_interrupt(int irq, void *dev_id);
 
-typedef struct local_info_t {
+struct local_info {
 	struct net_device	*dev;
 	struct pcmcia_device	*p_dev;
 
@@ -281,7 +281,7 @@ typedef struct local_info_t {
     unsigned last_ptr_value; /* last packets transmitted value */
     const char *manf_str;
     struct work_struct tx_timeout_task;
-} local_info_t;
+};
 
 /****************
  * Some more prototypes
@@ -475,12 +475,12 @@ static int
 xirc2ps_probe(struct pcmcia_device *link)
 {
     struct net_device *dev;
-    local_info_t *local;
+    struct local_info *local;
 
     dev_dbg(&link->dev, "attach()\n");
 
     /* Allocate the device structure */
-    dev = alloc_etherdev(sizeof(local_info_t));
+    dev = alloc_etherdev(sizeof(struct local_info));
     if (!dev)
 	    return -ENOMEM;
     local = netdev_priv(dev);
@@ -536,7 +536,7 @@ static int
 set_card_type(struct pcmcia_device *link)
 {
     struct net_device *dev = link->priv;
-    local_info_t *local = netdev_priv(dev);
+    struct local_info *local = netdev_priv(dev);
     u8 *buf;
     unsigned int cisrev, mediaid, prodid;
     size_t len;
@@ -690,7 +690,7 @@ static int
 xirc2ps_config(struct pcmcia_device * link)
 {
     struct net_device *dev = link->priv;
-    local_info_t *local = netdev_priv(dev);
+    struct local_info *local = netdev_priv(dev);
     unsigned int ioaddr;
     int err;
     u8 *buf;
@@ -931,7 +931,7 @@ xirc2ps_release(struct pcmcia_device *link)
 
 	if (link->resource[2]->end) {
 		struct net_device *dev = link->priv;
-		local_info_t *local = netdev_priv(dev);
+		struct local_info *local = netdev_priv(dev);
 		if (local->dingo)
 			iounmap(local->dingo_ccr - 0x0800);
 	}
@@ -975,7 +975,7 @@ static irqreturn_t
 xirc2ps_interrupt(int irq, void *dev_id)
 {
     struct net_device *dev = (struct net_device *)dev_id;
-    local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     unsigned int ioaddr;
     u_char saved_page;
     unsigned bytes_rcvd;
@@ -1194,8 +1194,8 @@ xirc2ps_interrupt(int irq, void *dev_id)
 static void
 xirc2ps_tx_timeout_task(struct work_struct *work)
 {
-	local_info_t *local =
-		container_of(work, local_info_t, tx_timeout_task);
+	struct local_info *local =
+		container_of(work, struct local_info, tx_timeout_task);
 	struct net_device *dev = local->dev;
     /* reset the card */
     do_reset(dev,1);
@@ -1206,7 +1206,7 @@ xirc2ps_tx_timeout_task(struct work_struct *work)
 static void
 xirc_tx_timeout(struct net_device *dev)
 {
-    local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     dev->stats.tx_errors++;
     netdev_notice(dev, "transmit timed out\n");
     schedule_work(&lp->tx_timeout_task);
@@ -1215,7 +1215,7 @@ xirc_tx_timeout(struct net_device *dev)
 static netdev_tx_t
 do_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-    local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     unsigned int ioaddr = dev->base_addr;
     int okay;
     unsigned freespace;
@@ -1300,7 +1300,7 @@ static void set_address(struct set_address_info *sa_info, char *addr)
 static void set_addresses(struct net_device *dev)
 {
 	unsigned int ioaddr = dev->base_addr;
-	local_info_t *lp = netdev_priv(dev);
+	struct local_info *lp = netdev_priv(dev);
 	struct netdev_hw_addr *ha;
 	struct set_address_info sa_info;
 	int i;
@@ -1362,7 +1362,7 @@ set_multicast_list(struct net_device *dev)
 static int
 do_config(struct net_device *dev, struct ifmap *map)
 {
-    local_info_t *local = netdev_priv(dev);
+    struct local_info *local = netdev_priv(dev);
 
     pr_debug("do_config(%p)\n", dev);
     if (map->port != 255 && map->port != dev->if_port) {
@@ -1387,7 +1387,7 @@ do_config(struct net_device *dev, struct ifmap *map)
 static int
 do_open(struct net_device *dev)
 {
-    local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     struct pcmcia_device *link = lp->p_dev;
 
     dev_dbg(&link->dev, "do_open(%p)\n", dev);
@@ -1421,7 +1421,7 @@ static const struct ethtool_ops netdev_ethtool_ops = {
 static int
 do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-    local_info_t *local = netdev_priv(dev);
+    struct local_info *local = netdev_priv(dev);
     unsigned int ioaddr = dev->base_addr;
     struct mii_ioctl_data *data = if_mii(rq);
 
@@ -1453,7 +1453,7 @@ do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 static void
 hardreset(struct net_device *dev)
 {
-    local_info_t *local = netdev_priv(dev);
+    struct local_info *local = netdev_priv(dev);
     unsigned int ioaddr = dev->base_addr;
 
     SelectPage(4);
@@ -1470,7 +1470,7 @@ hardreset(struct net_device *dev)
 static void
 do_reset(struct net_device *dev, int full)
 {
-    local_info_t *local = netdev_priv(dev);
+    struct local_info *local = netdev_priv(dev);
     unsigned int ioaddr = dev->base_addr;
     unsigned value;
 
@@ -1631,7 +1631,7 @@ do_reset(struct net_device *dev, int full)
 static int
 init_mii(struct net_device *dev)
 {
-    local_info_t *local = netdev_priv(dev);
+    struct local_info *local = netdev_priv(dev);
     unsigned int ioaddr = dev->base_addr;
     unsigned control, status, linkpartner;
     int i;
@@ -1715,7 +1715,7 @@ static int
 do_stop(struct net_device *dev)
 {
     unsigned int ioaddr = dev->base_addr;
-    local_info_t *lp = netdev_priv(dev);
+    struct local_info *lp = netdev_priv(dev);
     struct pcmcia_device *link = lp->p_dev;
 
     dev_dbg(&link->dev, "do_stop(%p)\n", dev);
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 9fd0c20..a87a82c 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1755,17 +1755,4 @@ static struct pci_driver donauboe_pci_driver = {
 	.resume		= toshoboe_wakeup 
 };
 
-static int __init
-donauboe_init (void)
-{
-  return pci_register_driver(&donauboe_pci_driver);
-}
-
-static void __exit
-donauboe_cleanup (void)
-{
-  pci_unregister_driver(&donauboe_pci_driver);
-}
-
-module_init(donauboe_init);
-module_exit(donauboe_cleanup);
+module_pci_driver(donauboe_pci_driver);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index ef8a5c2..60e4ca0 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -45,10 +45,9 @@ struct macvlan_port {
 	struct sk_buff_head	bc_queue;
 	struct work_struct	bc_work;
 	bool 			passthru;
+	int			count;
 };
 
-#define MACVLAN_PORT_IS_EMPTY(port)    list_empty(&port->vlans)
-
 struct macvlan_skb_cb {
 	const struct macvlan_dev *src;
 };
@@ -667,7 +666,8 @@ static void macvlan_uninit(struct net_device *dev)
 
 	free_percpu(vlan->pcpu_stats);
 
-	if (MACVLAN_PORT_IS_EMPTY(port))
+	port->count -= 1;
+	if (!port->count)
 		macvlan_port_destroy(port->dev);
 }
 
@@ -1020,12 +1020,13 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 		vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
 
 	if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
-		if (!MACVLAN_PORT_IS_EMPTY(port))
+		if (port->count)
 			return -EINVAL;
 		port->passthru = true;
 		eth_hw_addr_inherit(dev, lowerdev);
 	}
 
+	port->count += 1;
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto destroy_port;
@@ -1043,7 +1044,8 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 unregister_netdev:
 	unregister_netdevice(dev);
 destroy_port:
-	if (MACVLAN_PORT_IS_EMPTY(port))
+	port->count -= 1;
+	if (!port->count)
 		macvlan_port_destroy(lowerdev);
 
 	return err;
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 7cc64ea..e5c7e61 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -90,7 +90,7 @@
 #define LMI_ANSI_LENGTH		  14
 
 
-typedef struct {
+struct fr_hdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	unsigned ea1:	1;
 	unsigned cr:	1;
@@ -112,14 +112,14 @@ typedef struct {
 	unsigned de:	1;
 	unsigned ea2:	1;
 #endif
-}__packed fr_hdr;
+} __packed;
 
 
-typedef struct pvc_device_struct {
+struct pvc_device {
 	struct net_device *frad;
 	struct net_device *main;
 	struct net_device *ether;	/* bridged Ethernet interface	*/
-	struct pvc_device_struct *next;	/* Sorted in ascending DLCI order */
+	struct pvc_device *next;	/* Sorted in ascending DLCI order */
 	int dlci;
 	int open_count;
 
@@ -132,11 +132,11 @@ typedef struct pvc_device_struct {
 		unsigned int becn: 1;
 		unsigned int bandwidth;	/* Cisco LMI reporting only */
 	}state;
-}pvc_device;
+};
 
 struct frad_state {
 	fr_proto settings;
-	pvc_device *first_pvc;
+	struct pvc_device *first_pvc;
 	int dce_pvc_count;
 
 	struct timer_list timer;
@@ -174,9 +174,9 @@ static inline struct frad_state* state(hdlc_device *hdlc)
 }
 
 
-static inline pvc_device* find_pvc(hdlc_device *hdlc, u16 dlci)
+static inline struct pvc_device *find_pvc(hdlc_device *hdlc, u16 dlci)
 {
-	pvc_device *pvc = state(hdlc)->first_pvc;
+	struct pvc_device *pvc = state(hdlc)->first_pvc;
 
 	while (pvc) {
 		if (pvc->dlci == dlci)
@@ -190,10 +190,10 @@ static inline pvc_device* find_pvc(hdlc_device *hdlc, u16 dlci)
 }
 
 
-static pvc_device* add_pvc(struct net_device *dev, u16 dlci)
+static struct pvc_device *add_pvc(struct net_device *dev, u16 dlci)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
-	pvc_device *pvc, **pvc_p = &state(hdlc)->first_pvc;
+	struct pvc_device *pvc, **pvc_p = &state(hdlc)->first_pvc;
 
 	while (*pvc_p) {
 		if ((*pvc_p)->dlci == dlci)
@@ -203,7 +203,7 @@ static pvc_device* add_pvc(struct net_device *dev, u16 dlci)
 		pvc_p = &(*pvc_p)->next;
 	}
 
-	pvc = kzalloc(sizeof(pvc_device), GFP_ATOMIC);
+	pvc = kzalloc(sizeof(*pvc), GFP_ATOMIC);
 #ifdef DEBUG_PVC
 	printk(KERN_DEBUG "add_pvc: allocated pvc %p, frad %p\n", pvc, dev);
 #endif
@@ -218,13 +218,13 @@ static pvc_device* add_pvc(struct net_device *dev, u16 dlci)
 }
 
 
-static inline int pvc_is_used(pvc_device *pvc)
+static inline int pvc_is_used(struct pvc_device *pvc)
 {
 	return pvc->main || pvc->ether;
 }
 
 
-static inline void pvc_carrier(int on, pvc_device *pvc)
+static inline void pvc_carrier(int on, struct pvc_device *pvc)
 {
 	if (on) {
 		if (pvc->main)
@@ -246,11 +246,11 @@ static inline void pvc_carrier(int on, pvc_device *pvc)
 
 static inline void delete_unused_pvcs(hdlc_device *hdlc)
 {
-	pvc_device **pvc_p = &state(hdlc)->first_pvc;
+	struct pvc_device **pvc_p = &state(hdlc)->first_pvc;
 
 	while (*pvc_p) {
 		if (!pvc_is_used(*pvc_p)) {
-			pvc_device *pvc = *pvc_p;
+			struct pvc_device *pvc = *pvc_p;
 #ifdef DEBUG_PVC
 			printk(KERN_DEBUG "freeing unused pvc: %p\n", pvc);
 #endif
@@ -263,7 +263,8 @@ static inline void delete_unused_pvcs(hdlc_device *hdlc)
 }
 
 
-static inline struct net_device** get_dev_p(pvc_device *pvc, int type)
+static inline struct net_device **get_dev_p(struct pvc_device *pvc,
+					    int type)
 {
 	if (type == ARPHRD_ETHER)
 		return &pvc->ether;
@@ -342,7 +343,7 @@ static int fr_hard_header(struct sk_buff **skb_p, u16 dlci)
 
 static int pvc_open(struct net_device *dev)
 {
-	pvc_device *pvc = dev->ml_priv;
+	struct pvc_device *pvc = dev->ml_priv;
 
 	if ((pvc->frad->flags & IFF_UP) == 0)
 		return -EIO;  /* Frad must be UP in order to activate PVC */
@@ -362,7 +363,7 @@ static int pvc_open(struct net_device *dev)
 
 static int pvc_close(struct net_device *dev)
 {
-	pvc_device *pvc = dev->ml_priv;
+	struct pvc_device *pvc = dev->ml_priv;
 
 	if (--pvc->open_count == 0) {
 		hdlc_device *hdlc = dev_to_hdlc(pvc->frad);
@@ -381,7 +382,7 @@ static int pvc_close(struct net_device *dev)
 
 static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	pvc_device *pvc = dev->ml_priv;
+	struct pvc_device *pvc = dev->ml_priv;
 	fr_proto_pvc_info info;
 
 	if (ifr->ifr_settings.type == IF_GET_PROTO) {
@@ -409,7 +410,7 @@ static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 static netdev_tx_t pvc_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	pvc_device *pvc = dev->ml_priv;
+	struct pvc_device *pvc = dev->ml_priv;
 
 	if (pvc->state.active) {
 		if (dev->type == ARPHRD_ETHER) {
@@ -444,7 +445,7 @@ static netdev_tx_t pvc_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static inline void fr_log_dlci_active(pvc_device *pvc)
+static inline void fr_log_dlci_active(struct pvc_device *pvc)
 {
 	netdev_info(pvc->frad, "DLCI %d [%s%s%s]%s %s\n",
 		    pvc->dlci,
@@ -469,7 +470,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
 	struct sk_buff *skb;
-	pvc_device *pvc = state(hdlc)->first_pvc;
+	struct pvc_device *pvc = state(hdlc)->first_pvc;
 	int lmi = state(hdlc)->settings.lmi;
 	int dce = state(hdlc)->settings.dce;
 	int len = lmi == LMI_ANSI ? LMI_ANSI_LENGTH : LMI_CCITT_CISCO_LENGTH;
@@ -566,7 +567,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep)
 static void fr_set_link_state(int reliable, struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
-	pvc_device *pvc = state(hdlc)->first_pvc;
+	struct pvc_device *pvc = state(hdlc)->first_pvc;
 
 	state(hdlc)->reliable = reliable;
 	if (reliable) {
@@ -652,7 +653,7 @@ static void fr_timer(unsigned long arg)
 static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
-	pvc_device *pvc;
+	struct pvc_device *pvc;
 	u8 rxseq, txseq;
 	int lmi = state(hdlc)->settings.lmi;
 	int dce = state(hdlc)->settings.dce;
@@ -869,10 +870,10 @@ static int fr_rx(struct sk_buff *skb)
 {
 	struct net_device *frad = skb->dev;
 	hdlc_device *hdlc = dev_to_hdlc(frad);
-	fr_hdr *fh = (fr_hdr*)skb->data;
+	struct fr_hdr *fh = (struct fr_hdr *)skb->data;
 	u8 *data = skb->data;
 	u16 dlci;
-	pvc_device *pvc;
+	struct pvc_device *pvc;
 	struct net_device *dev = NULL;
 
 	if (skb->len <= 4 || fh->ea1 || data[2] != FR_UI)
@@ -1028,7 +1029,7 @@ static void fr_stop(struct net_device *dev)
 static void fr_close(struct net_device *dev)
 {
 	hdlc_device *hdlc = dev_to_hdlc(dev);
-	pvc_device *pvc = state(hdlc)->first_pvc;
+	struct pvc_device *pvc = state(hdlc)->first_pvc;
 
 	while (pvc) {		/* Shutdown all PVCs for this FRAD */
 		if (pvc->main)
@@ -1060,7 +1061,7 @@ static const struct net_device_ops pvc_ops = {
 static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 {
 	hdlc_device *hdlc = dev_to_hdlc(frad);
-	pvc_device *pvc;
+	struct pvc_device *pvc;
 	struct net_device *dev;
 	int used;
 
@@ -1117,7 +1118,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type)
 
 static int fr_del_pvc(hdlc_device *hdlc, unsigned int dlci, int type)
 {
-	pvc_device *pvc;
+	struct pvc_device *pvc;
 	struct net_device *dev;
 
 	if ((pvc = find_pvc(hdlc, dlci)) == NULL)
@@ -1145,13 +1146,13 @@ static int fr_del_pvc(hdlc_device *hdlc, unsigned int dlci, int type)
 static void fr_destroy(struct net_device *frad)
 {
 	hdlc_device *hdlc = dev_to_hdlc(frad);
-	pvc_device *pvc = state(hdlc)->first_pvc;
+	struct pvc_device *pvc = state(hdlc)->first_pvc;
 	state(hdlc)->first_pvc = NULL; /* All PVCs destroyed */
 	state(hdlc)->dce_pvc_count = 0;
 	state(hdlc)->dce_changed = 1;
 
 	while (pvc) {
-		pvc_device *next = pvc->next;
+		struct pvc_device *next = pvc->next;
 		/* destructors will free_netdev() main and ether */
 		if (pvc->main)
 			unregister_netdevice(pvc->main);
diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c
index dc2fc23..e73f138 100644
--- a/drivers/net/wan/wanxl.c
+++ b/drivers/net/wan/wanxl.c
@@ -54,24 +54,24 @@ static const char* version = "wanXL serial card driver version: 0.48";
 #define MBX2_MEMSZ_MASK 0xFFFF0000 /* PUTS Memory Size Register mask */
 
 
-typedef struct {
+struct port {
 	struct net_device *dev;
-	struct card_t *card;
+	struct card *card;
 	spinlock_t lock;	/* for wanxl_xmit */
         int node;		/* physical port #0 - 3 */
 	unsigned int clock_type;
 	int tx_in, tx_out;
 	struct sk_buff *tx_skbs[TX_BUFFERS];
-}port_t;
+};
 
 
-typedef struct {
+struct card_status {
 	desc_t rx_descs[RX_QUEUE_LENGTH];
 	port_status_t port_status[4];
-}card_status_t;
+};
 
 
-typedef struct card_t {
+struct card {
 	int n_ports;		/* 1, 2 or 4 ports */
 	u8 irq;
 
@@ -79,20 +79,20 @@ typedef struct card_t {
 	struct pci_dev *pdev;	/* for pci_name(pdev) */
 	int rx_in;
 	struct sk_buff *rx_skbs[RX_QUEUE_LENGTH];
-	card_status_t *status;	/* shared between host and card */
+	struct card_status *status;	/* shared between host and card */
 	dma_addr_t status_address;
-	port_t ports[0];	/* 1 - 4 port_t structures follow */
-}card_t;
+	struct port ports[0];	/* 1 - 4 port structures follow */
+};
 
 
 
-static inline port_t* dev_to_port(struct net_device *dev)
+static inline struct port *dev_to_port(struct net_device *dev)
 {
-        return (port_t *)dev_to_hdlc(dev)->priv;
+	return (struct port *)dev_to_hdlc(dev)->priv;
 }
 
 
-static inline port_status_t* get_status(port_t *port)
+static inline port_status_t *get_status(struct port *port)
 {
 	return &port->card->status->port_status[port->node];
 }
@@ -115,7 +115,7 @@ static inline dma_addr_t pci_map_single_debug(struct pci_dev *pdev, void *ptr,
 
 
 /* Cable and/or personality module change interrupt service */
-static inline void wanxl_cable_intr(port_t *port)
+static inline void wanxl_cable_intr(struct port *port)
 {
 	u32 value = get_status(port)->cable;
 	int valid = 1;
@@ -160,7 +160,7 @@ static inline void wanxl_cable_intr(port_t *port)
 
 
 /* Transmit complete interrupt service */
-static inline void wanxl_tx_intr(port_t *port)
+static inline void wanxl_tx_intr(struct port *port)
 {
 	struct net_device *dev = port->dev;
 	while (1) {
@@ -193,7 +193,7 @@ static inline void wanxl_tx_intr(port_t *port)
 
 
 /* Receive complete interrupt service */
-static inline void wanxl_rx_intr(card_t *card)
+static inline void wanxl_rx_intr(struct card *card)
 {
 	desc_t *desc;
 	while (desc = &card->status->rx_descs[card->rx_in],
@@ -203,7 +203,7 @@ static inline void wanxl_rx_intr(card_t *card)
 				pci_name(card->pdev));
 		else {
 			struct sk_buff *skb = card->rx_skbs[card->rx_in];
-			port_t *port = &card->ports[desc->stat &
+			struct port *port = &card->ports[desc->stat &
 						    PACKET_PORT_MASK];
 			struct net_device *dev = port->dev;
 
@@ -245,7 +245,7 @@ static inline void wanxl_rx_intr(card_t *card)
 
 static irqreturn_t wanxl_intr(int irq, void* dev_id)
 {
-        card_t *card = dev_id;
+	struct card *card = dev_id;
         int i;
         u32 stat;
         int handled = 0;
@@ -272,7 +272,7 @@ static irqreturn_t wanxl_intr(int irq, void* dev_id)
 
 static netdev_tx_t wanxl_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-        port_t *port = dev_to_port(dev);
+	struct port *port = dev_to_port(dev);
 	desc_t *desc;
 
         spin_lock(&port->lock);
@@ -319,7 +319,7 @@ static netdev_tx_t wanxl_xmit(struct sk_buff *skb, struct net_device *dev)
 static int wanxl_attach(struct net_device *dev, unsigned short encoding,
 			unsigned short parity)
 {
-	port_t *port = dev_to_port(dev);
+	struct port *port = dev_to_port(dev);
 
 	if (encoding != ENCODING_NRZ &&
 	    encoding != ENCODING_NRZI)
@@ -343,7 +343,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	const size_t size = sizeof(sync_serial_settings);
 	sync_serial_settings line;
-	port_t *port = dev_to_port(dev);
+	struct port *port = dev_to_port(dev);
 
 	if (cmd != SIOCWANDEV)
 		return hdlc_ioctl(dev, ifr, cmd);
@@ -393,7 +393,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 static int wanxl_open(struct net_device *dev)
 {
-	port_t *port = dev_to_port(dev);
+	struct port *port = dev_to_port(dev);
 	u8 __iomem *dbr = port->card->plx + PLX_DOORBELL_TO_CARD;
 	unsigned long timeout;
 	int i;
@@ -429,7 +429,7 @@ static int wanxl_open(struct net_device *dev)
 
 static int wanxl_close(struct net_device *dev)
 {
-	port_t *port = dev_to_port(dev);
+	struct port *port = dev_to_port(dev);
 	unsigned long timeout;
 	int i;
 
@@ -467,7 +467,7 @@ static int wanxl_close(struct net_device *dev)
 
 static struct net_device_stats *wanxl_get_stats(struct net_device *dev)
 {
-	port_t *port = dev_to_port(dev);
+	struct port *port = dev_to_port(dev);
 
 	dev->stats.rx_over_errors = get_status(port)->rx_overruns;
 	dev->stats.rx_frame_errors = get_status(port)->rx_frame_errors;
@@ -478,7 +478,7 @@ static struct net_device_stats *wanxl_get_stats(struct net_device *dev)
 
 
 
-static int wanxl_puts_command(card_t *card, u32 cmd)
+static int wanxl_puts_command(struct card *card, u32 cmd)
 {
 	unsigned long timeout = jiffies + 5 * HZ;
 
@@ -495,7 +495,7 @@ static int wanxl_puts_command(card_t *card, u32 cmd)
 
 
 
-static void wanxl_reset(card_t *card)
+static void wanxl_reset(struct card *card)
 {
 	u32 old_value = readl(card->plx + PLX_CONTROL) & ~PLX_CTL_RESET;
 
@@ -511,7 +511,7 @@ static void wanxl_reset(card_t *card)
 
 static void wanxl_pci_remove_one(struct pci_dev *pdev)
 {
-	card_t *card = pci_get_drvdata(pdev);
+	struct card *card = pci_get_drvdata(pdev);
 	int i;
 
 	for (i = 0; i < card->n_ports; i++) {
@@ -537,7 +537,7 @@ static void wanxl_pci_remove_one(struct pci_dev *pdev)
 		iounmap(card->plx);
 
 	if (card->status)
-		pci_free_consistent(pdev, sizeof(card_status_t),
+		pci_free_consistent(pdev, sizeof(struct card_status),
 				    card->status, card->status_address);
 
 	pci_release_regions(pdev);
@@ -560,7 +560,7 @@ static const struct net_device_ops wanxl_ops = {
 static int wanxl_pci_init_one(struct pci_dev *pdev,
 			      const struct pci_device_id *ent)
 {
-	card_t *card;
+	struct card *card;
 	u32 ramsize, stat;
 	unsigned long timeout;
 	u32 plx_phy;		/* PLX PCI base address */
@@ -601,7 +601,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 	default: ports = 4;
 	}
 
-	alloc_size = sizeof(card_t) + ports * sizeof(port_t);
+	alloc_size = sizeof(struct card) + ports * sizeof(struct port);
 	card = kzalloc(alloc_size, GFP_KERNEL);
 	if (card == NULL) {
 		pci_release_regions(pdev);
@@ -612,7 +612,8 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 	pci_set_drvdata(pdev, card);
 	card->pdev = pdev;
 
-	card->status = pci_alloc_consistent(pdev, sizeof(card_status_t),
+	card->status = pci_alloc_consistent(pdev,
+					    sizeof(struct card_status),
 					    &card->status_address);
 	if (card->status == NULL) {
 		wanxl_pci_remove_one(pdev);
@@ -766,7 +767,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev,
 
 	for (i = 0; i < ports; i++) {
 		hdlc_device *hdlc;
-		port_t *port = &card->ports[i];
+		struct port *port = &card->ports[i];
 		struct net_device *dev = alloc_hdlcdev(port);
 		if (!dev) {
 			pr_err("%s: unable to allocate memory\n",
diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c
index 7e9ede6..d9ed22b 100644
--- a/drivers/net/wireless/airo_cs.c
+++ b/drivers/net/wireless/airo_cs.c
@@ -56,18 +56,18 @@ static void airo_release(struct pcmcia_device *link);
 
 static void airo_detach(struct pcmcia_device *p_dev);
 
-typedef struct local_info_t {
+struct local_info {
 	struct net_device *eth_dev;
-} local_info_t;
+};
 
 static int airo_probe(struct pcmcia_device *p_dev)
 {
-	local_info_t *local;
+	struct local_info *local;
 
 	dev_dbg(&p_dev->dev, "airo_attach()\n");
 
 	/* Allocate space for private device-specific data */
-	local = kzalloc(sizeof(local_info_t), GFP_KERNEL);
+	local = kzalloc(sizeof(*local), GFP_KERNEL);
 	if (!local)
 		return -ENOMEM;
 
@@ -82,10 +82,11 @@ static void airo_detach(struct pcmcia_device *link)
 
 	airo_release(link);
 
-	if (((local_info_t *)link->priv)->eth_dev) {
-		stop_airo_card(((local_info_t *)link->priv)->eth_dev, 0);
+	if (((struct local_info *)link->priv)->eth_dev) {
+		stop_airo_card(((struct local_info *)link->priv)->eth_dev,
+			       0);
 	}
-	((local_info_t *)link->priv)->eth_dev = NULL;
+	((struct local_info *)link->priv)->eth_dev = NULL;
 
 	kfree(link->priv);
 } /* airo_detach */
@@ -101,7 +102,7 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev, void *priv_data)
 
 static int airo_config(struct pcmcia_device *link)
 {
-	local_info_t *dev;
+	struct local_info *dev;
 	int ret;
 
 	dev = link->priv;
@@ -121,10 +122,10 @@ static int airo_config(struct pcmcia_device *link)
 	ret = pcmcia_enable_device(link);
 	if (ret)
 		goto failed;
-	((local_info_t *)link->priv)->eth_dev =
+	((struct local_info *)link->priv)->eth_dev =
 		init_airo_card(link->irq,
 			       link->resource[0]->start, 1, &link->dev);
-	if (!((local_info_t *)link->priv)->eth_dev)
+	if (!((struct local_info *)link->priv)->eth_dev)
 		goto failed;
 
 	return 0;
@@ -142,7 +143,7 @@ static void airo_release(struct pcmcia_device *link)
 
 static int airo_suspend(struct pcmcia_device *link)
 {
-	local_info_t *local = link->priv;
+	struct local_info *local = link->priv;
 
 	netif_device_detach(local->eth_dev);
 
@@ -151,7 +152,7 @@ static int airo_suspend(struct pcmcia_device *link)
 
 static int airo_resume(struct pcmcia_device *link)
 {
-	local_info_t *local = link->priv;
+	struct local_info *local = link->priv;
 
 	if (link->open) {
 		reset_airo_card(local->eth_dev);
diff --git a/drivers/net/wireless/ath/carl9170/carl9170.h b/drivers/net/wireless/ath/carl9170/carl9170.h
index 8596aba..237d0cd 100644
--- a/drivers/net/wireless/ath/carl9170/carl9170.h
+++ b/drivers/net/wireless/ath/carl9170/carl9170.h
@@ -256,6 +256,7 @@ struct ar9170 {
 	atomic_t rx_work_urbs;
 	atomic_t rx_pool_urbs;
 	kernel_ulong_t features;
+	bool usb_ep_cmd_is_bulk;
 
 	/* firmware settings */
 	struct completion fw_load_wait;
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index f35c7f3..c9f9331 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -621,9 +621,16 @@ int __carl9170_exec_cmd(struct ar9170 *ar, struct carl9170_cmd *cmd,
 		goto err_free;
 	}
 
-	usb_fill_int_urb(urb, ar->udev, usb_sndintpipe(ar->udev,
-		AR9170_USB_EP_CMD), cmd, cmd->hdr.len + 4,
-		carl9170_usb_cmd_complete, ar, 1);
+	if (ar->usb_ep_cmd_is_bulk)
+		usb_fill_bulk_urb(urb, ar->udev,
+				  usb_sndbulkpipe(ar->udev, AR9170_USB_EP_CMD),
+				  cmd, cmd->hdr.len + 4,
+				  carl9170_usb_cmd_complete, ar);
+	else
+		usb_fill_int_urb(urb, ar->udev,
+				 usb_sndintpipe(ar->udev, AR9170_USB_EP_CMD),
+				 cmd, cmd->hdr.len + 4,
+				 carl9170_usb_cmd_complete, ar, 1);
 
 	if (free_buf)
 		urb->transfer_flags |= URB_FREE_BUFFER;
@@ -1032,9 +1039,10 @@ static void carl9170_usb_firmware_step2(const struct firmware *fw,
 static int carl9170_usb_probe(struct usb_interface *intf,
 			      const struct usb_device_id *id)
 {
+	struct usb_endpoint_descriptor *ep;
 	struct ar9170 *ar;
 	struct usb_device *udev;
-	int err;
+	int i, err;
 
 	err = usb_reset_device(interface_to_usbdev(intf));
 	if (err)
@@ -1050,6 +1058,21 @@ static int carl9170_usb_probe(struct usb_interface *intf,
 	ar->intf = intf;
 	ar->features = id->driver_info;
 
+	/* We need to remember the type of endpoint 4 because it differs
+	 * between high- and full-speed configuration. The high-speed
+	 * configuration specifies it as interrupt and the full-speed
+	 * configuration as bulk endpoint. This information is required
+	 * later when sending urbs to that endpoint.
+	 */
+	for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; ++i) {
+		ep = &intf->cur_altsetting->endpoint[i].desc;
+
+		if (usb_endpoint_num(ep) == AR9170_USB_EP_CMD &&
+		    usb_endpoint_dir_out(ep) &&
+		    usb_endpoint_type(ep) == USB_ENDPOINT_XFER_BULK)
+			ar->usb_ep_cmd_is_bulk = true;
+	}
+
 	usb_set_intfdata(intf, ar);
 	SET_IEEE80211_DEV(ar->hw, &intf->dev);
 
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index 1fe41af..9183f1c 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -2598,11 +2598,11 @@ static const iw_handler atmel_private_handler[] =
 	NULL,				/* SIOCIWFIRSTPRIV */
 };
 
-typedef struct atmel_priv_ioctl {
+struct atmel_priv_ioctl {
 	char id[32];
 	unsigned char __user *data;
 	unsigned short len;
-} atmel_priv_ioctl;
+};
 
 #define ATMELFWL	SIOCIWFIRSTPRIV
 #define ATMELIDIFC	ATMELFWL + 1
@@ -2615,7 +2615,7 @@ static const struct iw_priv_args atmel_private_args[] = {
 		.cmd = ATMELFWL,
 		.set_args = IW_PRIV_TYPE_BYTE
 				| IW_PRIV_SIZE_FIXED
-				| sizeof (atmel_priv_ioctl),
+				| sizeof(struct atmel_priv_ioctl),
 		.get_args = IW_PRIV_TYPE_NONE,
 		.name = "atmelfwl"
 	}, {
@@ -2645,7 +2645,7 @@ static int atmel_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
 	int i, rc = 0;
 	struct atmel_private *priv = netdev_priv(dev);
-	atmel_priv_ioctl com;
+	struct atmel_priv_ioctl com;
 	struct iwreq *wrq = (struct iwreq *) rq;
 	unsigned char *new_firmware;
 	char domain[REGDOMAINSZ + 1];
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
index 535c7eb..8f8b937 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
@@ -1318,6 +1318,8 @@ int brcmf_proto_msgbuf_attach(struct brcmf_pub *drvr)
 	msgbuf->nrof_flowrings = if_msgbuf->nrof_flowrings;
 	msgbuf->flowring_dma_handle = kzalloc(msgbuf->nrof_flowrings *
 		sizeof(*msgbuf->flowring_dma_handle), GFP_ATOMIC);
+	if (!msgbuf->flowring_dma_handle)
+		goto fail;
 
 	msgbuf->rx_dataoffset = if_msgbuf->rx_dataoffset;
 	msgbuf->max_rxbufpost = if_msgbuf->max_rxbufpost;
@@ -1362,6 +1364,7 @@ fail:
 		kfree(msgbuf->flow_map);
 		kfree(msgbuf->txstatus_done_map);
 		brcmf_msgbuf_release_pktids(msgbuf);
+		kfree(msgbuf->flowring_dma_handle);
 		if (msgbuf->ioctbuf)
 			dma_free_coherent(drvr->bus_if->dev,
 					  BRCMF_TX_IOCTL_MAX_MSG_SIZE,
@@ -1391,6 +1394,7 @@ void brcmf_proto_msgbuf_detach(struct brcmf_pub *drvr)
 				  BRCMF_TX_IOCTL_MAX_MSG_SIZE,
 				  msgbuf->ioctbuf, msgbuf->ioctbuf_handle);
 		brcmf_msgbuf_release_pktids(msgbuf);
+		kfree(msgbuf->flowring_dma_handle);
 		kfree(msgbuf);
 		drvr->proto->pd = NULL;
 	}
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
index bc972c0..e5101b2 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
@@ -591,12 +591,13 @@ static void brcmf_pcie_handle_mb_data(struct brcmf_pciedev_info *devinfo)
 	}
 	if (dtoh_mb_data & BRCMF_D2H_DEV_DS_EXIT_NOTE)
 		brcmf_dbg(PCIE, "D2H_MB_DATA: DEEP SLEEP EXIT\n");
-	if (dtoh_mb_data & BRCMF_D2H_DEV_D3_ACK)
+	if (dtoh_mb_data & BRCMF_D2H_DEV_D3_ACK) {
 		brcmf_dbg(PCIE, "D2H_MB_DATA: D3 ACK\n");
 		if (waitqueue_active(&devinfo->mbdata_resp_wait)) {
 			devinfo->mbdata_completed = true;
 			wake_up(&devinfo->mbdata_resp_wait);
 		}
+	}
 }
 
 
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index 800b62b..a42f9c3 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -9853,6 +9853,7 @@ static int ipw_wx_get_wireless_mode(struct net_device *dev,
 		strncpy(extra, "unknown", MAX_WX_STRING);
 		break;
 	}
+	extra[MAX_WX_STRING - 1] = '\0';
 
 	IPW_DEBUG_WX("PRIV GET MODE: %s\n", extra);
 
diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index 0d6a8b7..7c87965 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -396,7 +396,8 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 	else
 		hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;
 
-	hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN;
+	/* TODO: enable that only for firmwares that don't crash */
+	/* hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; */
 	hw->wiphy->max_sched_scan_ssids = PROBE_OPTION_MAX;
 	hw->wiphy->max_match_sets = IWL_SCAN_MAX_PROFILES;
 	/* we create the 802.11 header and zero length SSID IE. */
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index ef3026f..d4eb8d2 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -165,6 +165,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
 	u16 dealloc_ring[MAX_PENDING_REQS];
 	struct task_struct *dealloc_task;
 	wait_queue_head_t dealloc_wq;
+	atomic_t inflight_packets;
 
 	/* Use kthread for guest RX */
 	struct task_struct *task;
@@ -329,4 +330,8 @@ extern unsigned int xenvif_max_queues;
 extern struct dentry *xen_netback_dbg_root;
 #endif
 
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+				 struct sk_buff *skb);
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue);
+
 #endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 48a55cd..e29e15d 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -43,6 +43,23 @@
 #define XENVIF_QUEUE_LENGTH 32
 #define XENVIF_NAPI_WEIGHT  64
 
+/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
+ * increasing the inflight counter. We need to increase the inflight
+ * counter because core driver calls into xenvif_zerocopy_callback
+ * which calls xenvif_skb_zerocopy_complete.
+ */
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+				 struct sk_buff *skb)
+{
+	skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+	atomic_inc(&queue->inflight_packets);
+}
+
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
+{
+	atomic_dec(&queue->inflight_packets);
+}
+
 static inline void xenvif_stop_queue(struct xenvif_queue *queue)
 {
 	struct net_device *dev = queue->vif->dev;
@@ -78,12 +95,8 @@ int xenvif_poll(struct napi_struct *napi, int budget)
 	/* This vif is rogue, we pretend we've there is nothing to do
 	 * for this vif to deschedule it from NAPI. But this interface
 	 * will be turned off in thread context later.
-	 * Also, if a guest doesn't post enough slots to receive data on one of
-	 * its queues, the carrier goes down and NAPI is descheduled here so
-	 * the guest can't send more packets until it's ready to receive.
 	 */
-	if (unlikely(queue->vif->disabled ||
-		     !netif_carrier_ok(queue->vif->dev))) {
+	if (unlikely(queue->vif->disabled)) {
 		napi_complete(napi);
 		return 0;
 	}
@@ -528,9 +541,6 @@ int xenvif_init_queue(struct xenvif_queue *queue)
 
 	init_timer(&queue->rx_stalled);
 
-	netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
-			XENVIF_NAPI_WEIGHT);
-
 	return 0;
 }
 
@@ -564,6 +574,7 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
 
 	init_waitqueue_head(&queue->wq);
 	init_waitqueue_head(&queue->dealloc_wq);
+	atomic_set(&queue->inflight_packets, 0);
 
 	if (tx_evtchn == rx_evtchn) {
 		/* feature-split-event-channels == 0 */
@@ -618,6 +629,9 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
 	wake_up_process(queue->task);
 	wake_up_process(queue->dealloc_task);
 
+	netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
+			XENVIF_NAPI_WEIGHT);
+
 	return 0;
 
 err_rx_unbind:
@@ -646,25 +660,6 @@ void xenvif_carrier_off(struct xenvif *vif)
 	rtnl_unlock();
 }
 
-static void xenvif_wait_unmap_timeout(struct xenvif_queue *queue,
-				      unsigned int worst_case_skb_lifetime)
-{
-	int i, unmap_timeout = 0;
-
-	for (i = 0; i < MAX_PENDING_REQS; ++i) {
-		if (queue->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
-			unmap_timeout++;
-			schedule_timeout(msecs_to_jiffies(1000));
-			if (unmap_timeout > worst_case_skb_lifetime &&
-			    net_ratelimit())
-				netdev_err(queue->vif->dev,
-					   "Page still granted! Index: %x\n",
-					   i);
-			i = -1;
-		}
-	}
-}
-
 void xenvif_disconnect(struct xenvif *vif)
 {
 	struct xenvif_queue *queue = NULL;
@@ -676,6 +671,8 @@ void xenvif_disconnect(struct xenvif *vif)
 	for (queue_index = 0; queue_index < num_queues; ++queue_index) {
 		queue = &vif->queues[queue_index];
 
+		netif_napi_del(&queue->napi);
+
 		if (queue->task) {
 			del_timer_sync(&queue->rx_stalled);
 			kthread_stop(queue->task);
@@ -708,7 +705,6 @@ void xenvif_disconnect(struct xenvif *vif)
 void xenvif_deinit_queue(struct xenvif_queue *queue)
 {
 	free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages);
-	netif_napi_del(&queue->napi);
 }
 
 void xenvif_free(struct xenvif *vif)
@@ -716,21 +712,11 @@ void xenvif_free(struct xenvif *vif)
 	struct xenvif_queue *queue = NULL;
 	unsigned int num_queues = vif->num_queues;
 	unsigned int queue_index;
-	/* Here we want to avoid timeout messages if an skb can be legitimately
-	 * stuck somewhere else. Realistically this could be an another vif's
-	 * internal or QDisc queue. That another vif also has this
-	 * rx_drain_timeout_msecs timeout, so give it time to drain out.
-	 * Although if that other guest wakes up just before its timeout happens
-	 * and takes only one skb from QDisc, it can hold onto other skbs for a
-	 * longer period.
-	 */
-	unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000);
 
 	unregister_netdev(vif->dev);
 
 	for (queue_index = 0; queue_index < num_queues; ++queue_index) {
 		queue = &vif->queues[queue_index];
-		xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime);
 		xenvif_deinit_queue(queue);
 	}
 
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index aa20933..08f6599 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1525,10 +1525,12 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
 	/* remove traces of mapped pages and frag_list */
 	skb_frag_list_init(skb);
 	uarg = skb_shinfo(skb)->destructor_arg;
+	/* increase inflight counter to offset decrement in callback */
+	atomic_inc(&queue->inflight_packets);
 	uarg->callback(uarg, true);
 	skb_shinfo(skb)->destructor_arg = NULL;
 
-	skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+	xenvif_skb_zerocopy_prepare(queue, nskb);
 	kfree_skb(nskb);
 
 	return 0;
@@ -1589,7 +1591,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
 				if (net_ratelimit())
 					netdev_err(queue->vif->dev,
 						   "Not enough memory to consolidate frag_list!\n");
-				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+				xenvif_skb_zerocopy_prepare(queue, skb);
 				kfree_skb(skb);
 				continue;
 			}
@@ -1609,7 +1611,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
 				   "Can't setup checksum in net_tx_action\n");
 			/* We have to set this flag to trigger the callback */
 			if (skb_shinfo(skb)->destructor_arg)
-				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+				xenvif_skb_zerocopy_prepare(queue, skb);
 			kfree_skb(skb);
 			continue;
 		}
@@ -1641,7 +1643,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
 		 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
 		 */
 		if (skb_shinfo(skb)->destructor_arg) {
-			skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+			xenvif_skb_zerocopy_prepare(queue, skb);
 			queue->stats.tx_zerocopy_sent++;
 		}
 
@@ -1681,6 +1683,7 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
 		queue->stats.tx_zerocopy_success++;
 	else
 		queue->stats.tx_zerocopy_fail++;
+	xenvif_skb_zerocopy_complete(queue);
 }
 
 static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
@@ -2025,9 +2028,15 @@ int xenvif_kthread_guest_rx(void *data)
 		 * context so we defer it here, if this thread is
 		 * associated with queue 0.
 		 */
-		if (unlikely(queue->vif->disabled && queue->id == 0))
+		if (unlikely(queue->vif->disabled && queue->id == 0)) {
 			xenvif_carrier_off(queue->vif);
-		else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
+		} else if (unlikely(queue->vif->disabled)) {
+			/* kthread_stop() would be called upon this thread soon,
+			 * be a bit proactive
+			 */
+			skb_queue_purge(&queue->rx_queue);
+			queue->rx_last_skb_slots = 0;
+		} else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
 						     &queue->status))) {
 			xenvif_rx_purge_event(queue);
 		} else if (!netif_carrier_ok(queue->vif->dev)) {
@@ -2052,15 +2061,24 @@ int xenvif_kthread_guest_rx(void *data)
 	return 0;
 }
 
+static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
+{
+	/* Dealloc thread must remain running until all inflight
+	 * packets complete.
+	 */
+	return kthread_should_stop() &&
+		!atomic_read(&queue->inflight_packets);
+}
+
 int xenvif_dealloc_kthread(void *data)
 {
 	struct xenvif_queue *queue = data;
 
-	while (!kthread_should_stop()) {
+	for (;;) {
 		wait_event_interruptible(queue->dealloc_wq,
 					 tx_dealloc_work_todo(queue) ||
-					 kthread_should_stop());
-		if (kthread_should_stop())
+					 xenvif_dealloc_kthread_should_stop(queue));
+		if (xenvif_dealloc_kthread_should_stop(queue))
 			break;
 
 		xenvif_tx_dealloc_action(queue);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 580517d..9c47b89 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -116,6 +116,7 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v)
 }
 
 #define XENVIF_KICK_STR "kick"
+#define BUFFER_SIZE     32
 
 static ssize_t
 xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
@@ -124,22 +125,24 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
 	struct xenvif_queue *queue =
 		((struct seq_file *)filp->private_data)->private;
 	int len;
-	char write[sizeof(XENVIF_KICK_STR)];
+	char write[BUFFER_SIZE];
 
 	/* don't allow partial writes and check the length */
 	if (*ppos != 0)
 		return 0;
-	if (count < sizeof(XENVIF_KICK_STR) - 1)
+	if (count >= sizeof(write))
 		return -ENOSPC;
 
 	len = simple_write_to_buffer(write,
-				     sizeof(write),
+				     sizeof(write) - 1,
 				     ppos,
 				     buf,
 				     count);
 	if (len < 0)
 		return len;
 
+	write[len] = '\0';
+
 	if (!strncmp(write, XENVIF_KICK_STR, sizeof(XENVIF_KICK_STR) - 1))
 		xenvif_interrupt(0, (void *)queue);
 	else {
@@ -171,10 +174,9 @@ static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
 	.write = xenvif_write_io_ring,
 };
 
-static void xenvif_debugfs_addif(struct xenvif_queue *queue)
+static void xenvif_debugfs_addif(struct xenvif *vif)
 {
 	struct dentry *pfile;
-	struct xenvif *vif = queue->vif;
 	int i;
 
 	if (IS_ERR_OR_NULL(xen_netback_dbg_root))
@@ -733,10 +735,11 @@ static void connect(struct backend_info *be)
 			be->vif->num_queues = queue_index;
 			goto err;
 		}
+	}
+
 #ifdef CONFIG_DEBUG_FS
-		xenvif_debugfs_addif(queue);
+	xenvif_debugfs_addif(be->vif);
 #endif /* CONFIG_DEBUG_FS */
-	}
 
 	/* Initialisation completed, tell core driver the number of
 	 * active queues.
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 28204bc..ca82f54 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -628,9 +628,10 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	slots = DIV_ROUND_UP(offset + len, PAGE_SIZE) +
 		xennet_count_skb_frag_slots(skb);
 	if (unlikely(slots > MAX_SKB_FRAGS + 1)) {
-		net_alert_ratelimited(
-			"xennet: skb rides the rocket: %d slots\n", slots);
-		goto drop;
+		net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n",
+				    slots, skb->len);
+		if (skb_linearize(skb))
+			goto drop;
 	}
 
 	spin_lock_irqsave(&queue->tx_lock, flags);
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 2dcb054..5160c4e 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -9,7 +9,8 @@ menu "Device Tree and Open Firmware support"
 
 config OF_SELFTEST
 	bool "Device Tree Runtime self tests"
-	depends on OF_IRQ
+	depends on OF_IRQ && OF_EARLY_FLATTREE
+	select OF_DYNAMIC
 	help
 	  This option builds in test cases for the device tree infrastructure
 	  that are executed once at boot time, and the results dumped to the
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 099b1fb..2b6a7b1 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -1,11 +1,13 @@
 obj-y = base.o device.o platform.o
+obj-$(CONFIG_OF_DYNAMIC) += dynamic.o
 obj-$(CONFIG_OF_FLATTREE) += fdt.o
 obj-$(CONFIG_OF_EARLY_FLATTREE) += fdt_address.o
 obj-$(CONFIG_OF_PROMTREE) += pdt.o
 obj-$(CONFIG_OF_ADDRESS)  += address.o
 obj-$(CONFIG_OF_IRQ)    += irq.o
 obj-$(CONFIG_OF_NET)	+= of_net.o
-obj-$(CONFIG_OF_SELFTEST) += selftest.o
+obj-$(CONFIG_OF_SELFTEST) += of_selftest.o
+of_selftest-objs := selftest.o testcase-data/testcases.dtb.o
 obj-$(CONFIG_OF_MDIO)	+= of_mdio.o
 obj-$(CONFIG_OF_PCI)	+= of_pci.o
 obj-$(CONFIG_OF_PCI_IRQ)  += of_pci_irq.o
diff --git a/drivers/of/base.c b/drivers/of/base.c
index b986480..d8574ad 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -17,6 +17,7 @@
  *      as published by the Free Software Foundation; either version
  *      2 of the License, or (at your option) any later version.
  */
+#include <linux/console.h>
 #include <linux/ctype.h>
 #include <linux/cpu.h>
 #include <linux/module.h>
@@ -35,15 +36,17 @@ struct device_node *of_allnodes;
 EXPORT_SYMBOL(of_allnodes);
 struct device_node *of_chosen;
 struct device_node *of_aliases;
-static struct device_node *of_stdout;
+struct device_node *of_stdout;
 
-static struct kset *of_kset;
+struct kset *of_kset;
 
 /*
- * Used to protect the of_aliases; but also overloaded to hold off addition of
- * nodes to sysfs
+ * Used to protect the of_aliases, to hold off addition of nodes to sysfs.
+ * This mutex must be held whenever modifications are being made to the
+ * device tree. The of_{attach,detach}_node() and
+ * of_{add,remove,update}_property() helpers make sure this happens.
  */
-DEFINE_MUTEX(of_aliases_mutex);
+DEFINE_MUTEX(of_mutex);
 
 /* use when traversing tree through the allnext, child, sibling,
  * or parent members of struct device_node.
@@ -89,79 +92,7 @@ int __weak of_node_to_nid(struct device_node *np)
 }
 #endif
 
-#if defined(CONFIG_OF_DYNAMIC)
-/**
- *	of_node_get - Increment refcount of a node
- *	@node:	Node to inc refcount, NULL is supported to
- *		simplify writing of callers
- *
- *	Returns node.
- */
-struct device_node *of_node_get(struct device_node *node)
-{
-	if (node)
-		kobject_get(&node->kobj);
-	return node;
-}
-EXPORT_SYMBOL(of_node_get);
-
-static inline struct device_node *kobj_to_device_node(struct kobject *kobj)
-{
-	return container_of(kobj, struct device_node, kobj);
-}
-
-/**
- *	of_node_release - release a dynamically allocated node
- *	@kref:  kref element of the node to be released
- *
- *	In of_node_put() this function is passed to kref_put()
- *	as the destructor.
- */
-static void of_node_release(struct kobject *kobj)
-{
-	struct device_node *node = kobj_to_device_node(kobj);
-	struct property *prop = node->properties;
-
-	/* We should never be releasing nodes that haven't been detached. */
-	if (!of_node_check_flag(node, OF_DETACHED)) {
-		pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name);
-		dump_stack();
-		return;
-	}
-
-	if (!of_node_check_flag(node, OF_DYNAMIC))
-		return;
-
-	while (prop) {
-		struct property *next = prop->next;
-		kfree(prop->name);
-		kfree(prop->value);
-		kfree(prop);
-		prop = next;
-
-		if (!prop) {
-			prop = node->deadprops;
-			node->deadprops = NULL;
-		}
-	}
-	kfree(node->full_name);
-	kfree(node->data);
-	kfree(node);
-}
-
-/**
- *	of_node_put - Decrement refcount of a node
- *	@node:	Node to dec refcount, NULL is supported to
- *		simplify writing of callers
- *
- */
-void of_node_put(struct device_node *node)
-{
-	if (node)
-		kobject_put(&node->kobj);
-}
-EXPORT_SYMBOL(of_node_put);
-#else
+#ifndef CONFIG_OF_DYNAMIC
 static void of_node_release(struct kobject *kobj)
 {
 	/* Without CONFIG_OF_DYNAMIC, no nodes gets freed */
@@ -200,13 +131,16 @@ static const char *safe_name(struct kobject *kobj, const char *orig_name)
 	return name;
 }
 
-static int __of_add_property_sysfs(struct device_node *np, struct property *pp)
+int __of_add_property_sysfs(struct device_node *np, struct property *pp)
 {
 	int rc;
 
 	/* Important: Don't leak passwords */
 	bool secure = strncmp(pp->name, "security-", 9) == 0;
 
+	if (!of_kset || !of_node_is_attached(np))
+		return 0;
+
 	sysfs_bin_attr_init(&pp->attr);
 	pp->attr.attr.name = safe_name(&np->kobj, pp->name);
 	pp->attr.attr.mode = secure ? S_IRUSR : S_IRUGO;
@@ -218,12 +152,15 @@ static int __of_add_property_sysfs(struct device_node *np, struct property *pp)
 	return rc;
 }
 
-static int __of_node_add(struct device_node *np)
+int __of_attach_node_sysfs(struct device_node *np)
 {
 	const char *name;
 	struct property *pp;
 	int rc;
 
+	if (!of_kset)
+		return 0;
+
 	np->kobj.kset = of_kset;
 	if (!np->parent) {
 		/* Nodes without parents are new top level trees */
@@ -245,59 +182,20 @@ static int __of_node_add(struct device_node *np)
 	return 0;
 }
 
-int of_node_add(struct device_node *np)
-{
-	int rc = 0;
-
-	BUG_ON(!of_node_is_initialized(np));
-
-	/*
-	 * Grab the mutex here so that in a race condition between of_init() and
-	 * of_node_add(), node addition will still be consistent.
-	 */
-	mutex_lock(&of_aliases_mutex);
-	if (of_kset)
-		rc = __of_node_add(np);
-	else
-		/* This scenario may be perfectly valid, but report it anyway */
-		pr_info("of_node_add(%s) before of_init()\n", np->full_name);
-	mutex_unlock(&of_aliases_mutex);
-	return rc;
-}
-
-#if defined(CONFIG_OF_DYNAMIC)
-static void of_node_remove(struct device_node *np)
-{
-	struct property *pp;
-
-	BUG_ON(!of_node_is_initialized(np));
-
-	/* only remove properties if on sysfs */
-	if (of_node_is_attached(np)) {
-		for_each_property_of_node(np, pp)
-			sysfs_remove_bin_file(&np->kobj, &pp->attr);
-		kobject_del(&np->kobj);
-	}
-
-	/* finally remove the kobj_init ref */
-	of_node_put(np);
-}
-#endif
-
 static int __init of_init(void)
 {
 	struct device_node *np;
 
 	/* Create the kset, and register existing nodes */
-	mutex_lock(&of_aliases_mutex);
+	mutex_lock(&of_mutex);
 	of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj);
 	if (!of_kset) {
-		mutex_unlock(&of_aliases_mutex);
+		mutex_unlock(&of_mutex);
 		return -ENOMEM;
 	}
 	for_each_of_allnodes(np)
-		__of_node_add(np);
-	mutex_unlock(&of_aliases_mutex);
+		__of_attach_node_sysfs(np);
+	mutex_unlock(&of_mutex);
 
 	/* Symlink in /proc as required by userspace ABI */
 	if (of_allnodes)
@@ -369,8 +267,8 @@ EXPORT_SYMBOL(of_find_all_nodes);
  * Find a property with a given name for a given node
  * and return the value.
  */
-static const void *__of_get_property(const struct device_node *np,
-				     const char *name, int *lenp)
+const void *__of_get_property(const struct device_node *np,
+			      const char *name, int *lenp)
 {
 	struct property *pp = __of_find_property(np, name, lenp);
 
@@ -1748,32 +1646,10 @@ int of_count_phandle_with_args(const struct device_node *np, const char *list_na
 }
 EXPORT_SYMBOL(of_count_phandle_with_args);
 
-#if defined(CONFIG_OF_DYNAMIC)
-static int of_property_notify(int action, struct device_node *np,
-			      struct property *prop)
-{
-	struct of_prop_reconfig pr;
-
-	/* only call notifiers if the node is attached */
-	if (!of_node_is_attached(np))
-		return 0;
-
-	pr.dn = np;
-	pr.prop = prop;
-	return of_reconfig_notify(action, &pr);
-}
-#else
-static int of_property_notify(int action, struct device_node *np,
-			      struct property *prop)
-{
-	return 0;
-}
-#endif
-
 /**
  * __of_add_property - Add a property to a node without lock operations
  */
-static int __of_add_property(struct device_node *np, struct property *prop)
+int __of_add_property(struct device_node *np, struct property *prop)
 {
 	struct property **next;
 
@@ -1799,22 +1675,49 @@ int of_add_property(struct device_node *np, struct property *prop)
 	unsigned long flags;
 	int rc;
 
-	rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop);
-	if (rc)
-		return rc;
+	mutex_lock(&of_mutex);
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	rc = __of_add_property(np, prop);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
-	if (rc)
-		return rc;
 
-	if (of_node_is_attached(np))
+	if (!rc)
 		__of_add_property_sysfs(np, prop);
 
+	mutex_unlock(&of_mutex);
+
+	if (!rc)
+		of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop, NULL);
+
 	return rc;
 }
 
+int __of_remove_property(struct device_node *np, struct property *prop)
+{
+	struct property **next;
+
+	for (next = &np->properties; *next; next = &(*next)->next) {
+		if (*next == prop)
+			break;
+	}
+	if (*next == NULL)
+		return -ENODEV;
+
+	/* found the node */
+	*next = prop->next;
+	prop->next = np->deadprops;
+	np->deadprops = prop;
+
+	return 0;
+}
+
+void __of_remove_property_sysfs(struct device_node *np, struct property *prop)
+{
+	/* at early boot, bail here and defer setup to of_init() */
+	if (of_kset && of_node_is_attached(np))
+		sysfs_remove_bin_file(&np->kobj, &prop->attr);
+}
+
 /**
  * of_remove_property - Remove a property from a node.
  *
@@ -1825,211 +1728,98 @@ int of_add_property(struct device_node *np, struct property *prop)
  */
 int of_remove_property(struct device_node *np, struct property *prop)
 {
-	struct property **next;
 	unsigned long flags;
-	int found = 0;
 	int rc;
 
-	rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop);
-	if (rc)
-		return rc;
+	mutex_lock(&of_mutex);
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	next = &np->properties;
-	while (*next) {
-		if (*next == prop) {
-			/* found the node */
-			*next = prop->next;
-			prop->next = np->deadprops;
-			np->deadprops = prop;
-			found = 1;
-			break;
-		}
-		next = &(*next)->next;
-	}
+	rc = __of_remove_property(np, prop);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-	if (!found)
-		return -ENODEV;
+	if (!rc)
+		__of_remove_property_sysfs(np, prop);
 
-	/* at early boot, bail hear and defer setup to of_init() */
-	if (!of_kset)
-		return 0;
+	mutex_unlock(&of_mutex);
 
-	sysfs_remove_bin_file(&np->kobj, &prop->attr);
+	if (!rc)
+		of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop, NULL);
 
-	return 0;
+	return rc;
 }
 
-/*
- * of_update_property - Update a property in a node, if the property does
- * not exist, add it.
- *
- * Note that we don't actually remove it, since we have given out
- * who-knows-how-many pointers to the data using get-property.
- * Instead we just move the property to the "dead properties" list,
- * and add the new property to the property list
- */
-int of_update_property(struct device_node *np, struct property *newprop)
+int __of_update_property(struct device_node *np, struct property *newprop,
+		struct property **oldpropp)
 {
 	struct property **next, *oldprop;
-	unsigned long flags;
-	int rc;
-
-	rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop);
-	if (rc)
-		return rc;
 
-	if (!newprop->name)
-		return -EINVAL;
+	for (next = &np->properties; *next; next = &(*next)->next) {
+		if (of_prop_cmp((*next)->name, newprop->name) == 0)
+			break;
+	}
+	*oldpropp = oldprop = *next;
 
-	raw_spin_lock_irqsave(&devtree_lock, flags);
-	next = &np->properties;
-	oldprop = __of_find_property(np, newprop->name, NULL);
-	if (!oldprop) {
-		/* add the new node */
-		rc = __of_add_property(np, newprop);
-	} else while (*next) {
+	if (oldprop) {
 		/* replace the node */
-		if (*next == oldprop) {
-			newprop->next = oldprop->next;
-			*next = newprop;
-			oldprop->next = np->deadprops;
-			np->deadprops = oldprop;
-			break;
-		}
-		next = &(*next)->next;
+		newprop->next = oldprop->next;
+		*next = newprop;
+		oldprop->next = np->deadprops;
+		np->deadprops = oldprop;
+	} else {
+		/* new node */
+		newprop->next = NULL;
+		*next = newprop;
 	}
-	raw_spin_unlock_irqrestore(&devtree_lock, flags);
-	if (rc)
-		return rc;
 
+	return 0;
+}
+
+void __of_update_property_sysfs(struct device_node *np, struct property *newprop,
+		struct property *oldprop)
+{
 	/* At early boot, bail out and defer setup to of_init() */
 	if (!of_kset)
-		return 0;
+		return;
 
-	/* Update the sysfs attribute */
 	if (oldprop)
 		sysfs_remove_bin_file(&np->kobj, &oldprop->attr);
 	__of_add_property_sysfs(np, newprop);
-
-	return 0;
 }
 
-#if defined(CONFIG_OF_DYNAMIC)
 /*
- * Support for dynamic device trees.
+ * of_update_property - Update a property in a node, if the property does
+ * not exist, add it.
  *
- * On some platforms, the device tree can be manipulated at runtime.
- * The routines in this section support adding, removing and changing
- * device tree nodes.
- */
-
-static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain);
-
-int of_reconfig_notifier_register(struct notifier_block *nb)
-{
-	return blocking_notifier_chain_register(&of_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(of_reconfig_notifier_register);
-
-int of_reconfig_notifier_unregister(struct notifier_block *nb)
-{
-	return blocking_notifier_chain_unregister(&of_reconfig_chain, nb);
-}
-EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
-
-int of_reconfig_notify(unsigned long action, void *p)
-{
-	int rc;
-
-	rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p);
-	return notifier_to_errno(rc);
-}
-
-/**
- * of_attach_node - Plug a device node into the tree and global list.
+ * Note that we don't actually remove it, since we have given out
+ * who-knows-how-many pointers to the data using get-property.
+ * Instead we just move the property to the "dead properties" list,
+ * and add the new property to the property list
  */
-int of_attach_node(struct device_node *np)
+int of_update_property(struct device_node *np, struct property *newprop)
 {
+	struct property *oldprop;
 	unsigned long flags;
 	int rc;
 
-	rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
-	if (rc)
-		return rc;
-
-	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np->sibling = np->parent->child;
-	np->allnext = np->parent->allnext;
-	np->parent->allnext = np;
-	np->parent->child = np;
-	of_node_clear_flag(np, OF_DETACHED);
-	raw_spin_unlock_irqrestore(&devtree_lock, flags);
-
-	of_node_add(np);
-	return 0;
-}
-
-/**
- * of_detach_node - "Unplug" a node from the device tree.
- *
- * The caller must hold a reference to the node.  The memory associated with
- * the node is not freed until its refcount goes to zero.
- */
-int of_detach_node(struct device_node *np)
-{
-	struct device_node *parent;
-	unsigned long flags;
-	int rc = 0;
+	if (!newprop->name)
+		return -EINVAL;
 
-	rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
-	if (rc)
-		return rc;
+	mutex_lock(&of_mutex);
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
+	rc = __of_update_property(np, newprop, &oldprop);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-	if (of_node_check_flag(np, OF_DETACHED)) {
-		/* someone already detached it */
-		raw_spin_unlock_irqrestore(&devtree_lock, flags);
-		return rc;
-	}
-
-	parent = np->parent;
-	if (!parent) {
-		raw_spin_unlock_irqrestore(&devtree_lock, flags);
-		return rc;
-	}
+	if (!rc)
+		__of_update_property_sysfs(np, newprop, oldprop);
 
-	if (of_allnodes == np)
-		of_allnodes = np->allnext;
-	else {
-		struct device_node *prev;
-		for (prev = of_allnodes;
-		     prev->allnext != np;
-		     prev = prev->allnext)
-			;
-		prev->allnext = np->allnext;
-	}
+	mutex_unlock(&of_mutex);
 
-	if (parent->child == np)
-		parent->child = np->sibling;
-	else {
-		struct device_node *prevsib;
-		for (prevsib = np->parent->child;
-		     prevsib->sibling != np;
-		     prevsib = prevsib->sibling)
-			;
-		prevsib->sibling = np->sibling;
-	}
-
-	of_node_set_flag(np, OF_DETACHED);
-	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+	if (!rc)
+		of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop, oldprop);
 
-	of_node_remove(np);
 	return rc;
 }
-#endif /* defined(CONFIG_OF_DYNAMIC) */
 
 static void of_alias_add(struct alias_prop *ap, struct device_node *np,
 			 int id, const char *stem, int stem_len)
@@ -2062,9 +1852,12 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
 		of_chosen = of_find_node_by_path("/chosen@0");
 
 	if (of_chosen) {
+		/* linux,stdout-path and /aliases/stdout are for legacy compatibility */
 		const char *name = of_get_property(of_chosen, "stdout-path", NULL);
 		if (!name)
 			name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+		if (IS_ENABLED(CONFIG_PPC) && !name)
+			name = of_get_property(of_aliases, "stdout", NULL);
 		if (name)
 			of_stdout = of_find_node_by_path(name);
 	}
@@ -2122,7 +1915,7 @@ int of_alias_get_id(struct device_node *np, const char *stem)
 	struct alias_prop *app;
 	int id = -ENODEV;
 
-	mutex_lock(&of_aliases_mutex);
+	mutex_lock(&of_mutex);
 	list_for_each_entry(app, &aliases_lookup, link) {
 		if (strcmp(app->stem, stem) != 0)
 			continue;
@@ -2132,7 +1925,7 @@ int of_alias_get_id(struct device_node *np, const char *stem)
 			break;
 		}
 	}
-	mutex_unlock(&of_aliases_mutex);
+	mutex_unlock(&of_mutex);
 
 	return id;
 }
@@ -2180,20 +1973,22 @@ const char *of_prop_next_string(struct property *prop, const char *cur)
 EXPORT_SYMBOL_GPL(of_prop_next_string);
 
 /**
- * of_device_is_stdout_path - check if a device node matches the
- *                            linux,stdout-path property
- *
- * Check if this device node matches the linux,stdout-path property
- * in the chosen node. return true if yes, false otherwise.
+ * of_console_check() - Test and setup console for DT setup
+ * @dn - Pointer to device node
+ * @name - Name to use for preferred console without index. ex. "ttyS"
+ * @index - Index to use for preferred console.
+ *
+ * Check if the given device node matches the stdout-path property in the
+ * /chosen node. If it does then register it as the preferred console and return
+ * TRUE. Otherwise return FALSE.
  */
-int of_device_is_stdout_path(struct device_node *dn)
+bool of_console_check(struct device_node *dn, char *name, int index)
 {
-	if (!of_stdout)
+	if (!dn || dn != of_stdout || console_set_on_cmdline)
 		return false;
-
-	return of_stdout == dn;
+	return add_preferred_console(name, index, NULL);
 }
-EXPORT_SYMBOL_GPL(of_device_is_stdout_path);
+EXPORT_SYMBOL_GPL(of_console_check);
 
 /**
  *	of_find_next_cache_node - Find a node's subsidiary cache
diff --git a/drivers/of/device.c b/drivers/of/device.c
index dafb973..46d6c75c 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -160,7 +160,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 	add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen);
 
 	seen = 0;
-	mutex_lock(&of_aliases_mutex);
+	mutex_lock(&of_mutex);
 	list_for_each_entry(app, &aliases_lookup, link) {
 		if (dev->of_node == app->np) {
 			add_uevent_var(env, "OF_ALIAS_%d=%s", seen,
@@ -168,7 +168,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 			seen++;
 		}
 	}
-	mutex_unlock(&of_aliases_mutex);
+	mutex_unlock(&of_mutex);
 }
 
 int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
new file mode 100644
index 0000000..54fecc4
--- /dev/null
+++ b/drivers/of/dynamic.c
@@ -0,0 +1,660 @@
+/*
+ * Support for dynamic device trees.
+ *
+ * On some platforms, the device tree can be manipulated at runtime.
+ * The routines in this section support adding, removing and changing
+ * device tree nodes.
+ */
+
+#include <linux/of.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/proc_fs.h>
+
+#include "of_private.h"
+
+/**
+ * of_node_get() - Increment refcount of a node
+ * @node:	Node to inc refcount, NULL is supported to simplify writing of
+ *		callers
+ *
+ * Returns node.
+ */
+struct device_node *of_node_get(struct device_node *node)
+{
+	if (node)
+		kobject_get(&node->kobj);
+	return node;
+}
+EXPORT_SYMBOL(of_node_get);
+
+/**
+ * of_node_put() - Decrement refcount of a node
+ * @node:	Node to dec refcount, NULL is supported to simplify writing of
+ *		callers
+ */
+void of_node_put(struct device_node *node)
+{
+	if (node)
+		kobject_put(&node->kobj);
+}
+EXPORT_SYMBOL(of_node_put);
+
+void __of_detach_node_sysfs(struct device_node *np)
+{
+	struct property *pp;
+
+	BUG_ON(!of_node_is_initialized(np));
+	if (!of_kset)
+		return;
+
+	/* only remove properties if on sysfs */
+	if (of_node_is_attached(np)) {
+		for_each_property_of_node(np, pp)
+			sysfs_remove_bin_file(&np->kobj, &pp->attr);
+		kobject_del(&np->kobj);
+	}
+
+	/* finally remove the kobj_init ref */
+	of_node_put(np);
+}
+
+static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain);
+
+int of_reconfig_notifier_register(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&of_reconfig_chain, nb);
+}
+EXPORT_SYMBOL_GPL(of_reconfig_notifier_register);
+
+int of_reconfig_notifier_unregister(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&of_reconfig_chain, nb);
+}
+EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
+
+int of_reconfig_notify(unsigned long action, void *p)
+{
+	int rc;
+
+	rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p);
+	return notifier_to_errno(rc);
+}
+
+int of_property_notify(int action, struct device_node *np,
+		       struct property *prop, struct property *oldprop)
+{
+	struct of_prop_reconfig pr;
+
+	/* only call notifiers if the node is attached */
+	if (!of_node_is_attached(np))
+		return 0;
+
+	pr.dn = np;
+	pr.prop = prop;
+	pr.old_prop = oldprop;
+	return of_reconfig_notify(action, &pr);
+}
+
+void __of_attach_node(struct device_node *np)
+{
+	const __be32 *phandle;
+	int sz;
+
+	np->name = __of_get_property(np, "name", NULL) ? : "<NULL>";
+	np->type = __of_get_property(np, "device_type", NULL) ? : "<NULL>";
+
+	phandle = __of_get_property(np, "phandle", &sz);
+	if (!phandle)
+		phandle = __of_get_property(np, "linux,phandle", &sz);
+	if (IS_ENABLED(PPC_PSERIES) && !phandle)
+		phandle = __of_get_property(np, "ibm,phandle", &sz);
+	np->phandle = (phandle && (sz >= 4)) ? be32_to_cpup(phandle) : 0;
+
+	np->child = NULL;
+	np->sibling = np->parent->child;
+	np->allnext = np->parent->allnext;
+	np->parent->allnext = np;
+	np->parent->child = np;
+	of_node_clear_flag(np, OF_DETACHED);
+}
+
+/**
+ * of_attach_node() - Plug a device node into the tree and global list.
+ */
+int of_attach_node(struct device_node *np)
+{
+	unsigned long flags;
+
+	mutex_lock(&of_mutex);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	__of_attach_node(np);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+	__of_attach_node_sysfs(np);
+	mutex_unlock(&of_mutex);
+
+	of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
+
+	return 0;
+}
+
+void __of_detach_node(struct device_node *np)
+{
+	struct device_node *parent;
+
+	if (WARN_ON(of_node_check_flag(np, OF_DETACHED)))
+		return;
+
+	parent = np->parent;
+	if (WARN_ON(!parent))
+		return;
+
+	if (of_allnodes == np)
+		of_allnodes = np->allnext;
+	else {
+		struct device_node *prev;
+		for (prev = of_allnodes;
+		     prev->allnext != np;
+		     prev = prev->allnext)
+			;
+		prev->allnext = np->allnext;
+	}
+
+	if (parent->child == np)
+		parent->child = np->sibling;
+	else {
+		struct device_node *prevsib;
+		for (prevsib = np->parent->child;
+		     prevsib->sibling != np;
+		     prevsib = prevsib->sibling)
+			;
+		prevsib->sibling = np->sibling;
+	}
+
+	of_node_set_flag(np, OF_DETACHED);
+}
+
+/**
+ * of_detach_node() - "Unplug" a node from the device tree.
+ *
+ * The caller must hold a reference to the node.  The memory associated with
+ * the node is not freed until its refcount goes to zero.
+ */
+int of_detach_node(struct device_node *np)
+{
+	unsigned long flags;
+	int rc = 0;
+
+	mutex_lock(&of_mutex);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	__of_detach_node(np);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+	__of_detach_node_sysfs(np);
+	mutex_unlock(&of_mutex);
+
+	of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
+
+	return rc;
+}
+
+/**
+ * of_node_release() - release a dynamically allocated node
+ * @kref: kref element of the node to be released
+ *
+ * In of_node_put() this function is passed to kref_put() as the destructor.
+ */
+void of_node_release(struct kobject *kobj)
+{
+	struct device_node *node = kobj_to_device_node(kobj);
+	struct property *prop = node->properties;
+
+	/* We should never be releasing nodes that haven't been detached. */
+	if (!of_node_check_flag(node, OF_DETACHED)) {
+		pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name);
+		dump_stack();
+		return;
+	}
+
+	if (!of_node_check_flag(node, OF_DYNAMIC))
+		return;
+
+	while (prop) {
+		struct property *next = prop->next;
+		kfree(prop->name);
+		kfree(prop->value);
+		kfree(prop);
+		prop = next;
+
+		if (!prop) {
+			prop = node->deadprops;
+			node->deadprops = NULL;
+		}
+	}
+	kfree(node->full_name);
+	kfree(node->data);
+	kfree(node);
+}
+
+/**
+ * __of_prop_dup - Copy a property dynamically.
+ * @prop:	Property to copy
+ * @allocflags:	Allocation flags (typically pass GFP_KERNEL)
+ *
+ * Copy a property by dynamically allocating the memory of both the
+ * property stucture and the property name & contents. The property's
+ * flags have the OF_DYNAMIC bit set so that we can differentiate between
+ * dynamically allocated properties and not.
+ * Returns the newly allocated property or NULL on out of memory error.
+ */
+struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags)
+{
+	struct property *new;
+
+	new = kzalloc(sizeof(*new), allocflags);
+	if (!new)
+		return NULL;
+
+	/*
+	 * NOTE: There is no check for zero length value.
+	 * In case of a boolean property, this will allocate a value
+	 * of zero bytes. We do this to work around the use
+	 * of of_get_property() calls on boolean values.
+	 */
+	new->name = kstrdup(prop->name, allocflags);
+	new->value = kmemdup(prop->value, prop->length, allocflags);
+	new->length = prop->length;
+	if (!new->name || !new->value)
+		goto err_free;
+
+	/* mark the property as dynamic */
+	of_property_set_flag(new, OF_DYNAMIC);
+
+	return new;
+
+ err_free:
+	kfree(new->name);
+	kfree(new->value);
+	kfree(new);
+	return NULL;
+}
+
+/**
+ * __of_node_alloc() - Create an empty device node dynamically.
+ * @full_name:	Full name of the new device node
+ * @allocflags:	Allocation flags (typically pass GFP_KERNEL)
+ *
+ * Create an empty device tree node, suitable for further modification.
+ * The node data are dynamically allocated and all the node flags
+ * have the OF_DYNAMIC & OF_DETACHED bits set.
+ * Returns the newly allocated node or NULL on out of memory error.
+ */
+struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags)
+{
+	struct device_node *node;
+
+	node = kzalloc(sizeof(*node), allocflags);
+	if (!node)
+		return NULL;
+
+	node->full_name = kstrdup(full_name, allocflags);
+	of_node_set_flag(node, OF_DYNAMIC);
+	of_node_set_flag(node, OF_DETACHED);
+	if (!node->full_name)
+		goto err_free;
+
+	of_node_init(node);
+
+	return node;
+
+ err_free:
+	kfree(node->full_name);
+	kfree(node);
+	return NULL;
+}
+
+static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
+{
+	of_node_put(ce->np);
+	list_del(&ce->node);
+	kfree(ce);
+}
+
+#ifdef DEBUG
+static void __of_changeset_entry_dump(struct of_changeset_entry *ce)
+{
+	switch (ce->action) {
+	case OF_RECONFIG_ADD_PROPERTY:
+		pr_debug("%p: %s %s/%s\n",
+			ce, "ADD_PROPERTY   ", ce->np->full_name,
+			ce->prop->name);
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		pr_debug("%p: %s %s/%s\n",
+			ce, "REMOVE_PROPERTY", ce->np->full_name,
+			ce->prop->name);
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		pr_debug("%p: %s %s/%s\n",
+			ce, "UPDATE_PROPERTY", ce->np->full_name,
+			ce->prop->name);
+		break;
+	case OF_RECONFIG_ATTACH_NODE:
+		pr_debug("%p: %s %s\n",
+			ce, "ATTACH_NODE    ", ce->np->full_name);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		pr_debug("%p: %s %s\n",
+			ce, "DETACH_NODE    ", ce->np->full_name);
+		break;
+	}
+}
+#else
+static inline void __of_changeset_entry_dump(struct of_changeset_entry *ce)
+{
+	/* empty */
+}
+#endif
+
+static void __of_changeset_entry_invert(struct of_changeset_entry *ce,
+					  struct of_changeset_entry *rce)
+{
+	memcpy(rce, ce, sizeof(*rce));
+
+	switch (ce->action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		rce->action = OF_RECONFIG_DETACH_NODE;
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		rce->action = OF_RECONFIG_ATTACH_NODE;
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+		rce->action = OF_RECONFIG_REMOVE_PROPERTY;
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		rce->action = OF_RECONFIG_ADD_PROPERTY;
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		rce->old_prop = ce->prop;
+		rce->prop = ce->old_prop;
+		break;
+	}
+}
+
+static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool revert)
+{
+	struct of_changeset_entry ce_inverted;
+	int ret;
+
+	if (revert) {
+		__of_changeset_entry_invert(ce, &ce_inverted);
+		ce = &ce_inverted;
+	}
+
+	switch (ce->action) {
+	case OF_RECONFIG_ATTACH_NODE:
+	case OF_RECONFIG_DETACH_NODE:
+		ret = of_reconfig_notify(ce->action, ce->np);
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+	case OF_RECONFIG_REMOVE_PROPERTY:
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		ret = of_property_notify(ce->action, ce->np, ce->prop, ce->old_prop);
+		break;
+	default:
+		pr_err("%s: invalid devicetree changeset action: %i\n", __func__,
+			(int)ce->action);
+		return;
+	}
+
+	if (ret)
+		pr_err("%s: notifier error @%s\n", __func__, ce->np->full_name);
+}
+
+static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
+{
+	struct property *old_prop, **propp;
+	unsigned long flags;
+	int ret = 0;
+
+	__of_changeset_entry_dump(ce);
+
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	switch (ce->action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		__of_attach_node(ce->np);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		__of_detach_node(ce->np);
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+		/* If the property is in deadprops then it must be removed */
+		for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) {
+			if (*propp == ce->prop) {
+				*propp = ce->prop->next;
+				ce->prop->next = NULL;
+				break;
+			}
+		}
+
+		ret = __of_add_property(ce->np, ce->prop);
+		if (ret) {
+			pr_err("%s: add_property failed @%s/%s\n",
+				__func__, ce->np->full_name,
+				ce->prop->name);
+			break;
+		}
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		ret = __of_remove_property(ce->np, ce->prop);
+		if (ret) {
+			pr_err("%s: remove_property failed @%s/%s\n",
+				__func__, ce->np->full_name,
+				ce->prop->name);
+			break;
+		}
+		break;
+
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		/* If the property is in deadprops then it must be removed */
+		for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) {
+			if (*propp == ce->prop) {
+				*propp = ce->prop->next;
+				ce->prop->next = NULL;
+				break;
+			}
+		}
+
+		ret = __of_update_property(ce->np, ce->prop, &old_prop);
+		if (ret) {
+			pr_err("%s: update_property failed @%s/%s\n",
+				__func__, ce->np->full_name,
+				ce->prop->name);
+			break;
+		}
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+	if (ret)
+		return ret;
+
+	switch (ce->action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		__of_attach_node_sysfs(ce->np);
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		__of_detach_node_sysfs(ce->np);
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+		/* ignore duplicate names */
+		__of_add_property_sysfs(ce->np, ce->prop);
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		__of_remove_property_sysfs(ce->np, ce->prop);
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		__of_update_property_sysfs(ce->np, ce->prop, ce->old_prop);
+		break;
+	}
+
+	return 0;
+}
+
+static inline int __of_changeset_entry_revert(struct of_changeset_entry *ce)
+{
+	struct of_changeset_entry ce_inverted;
+
+	__of_changeset_entry_invert(ce, &ce_inverted);
+	return __of_changeset_entry_apply(&ce_inverted);
+}
+
+/**
+ * of_changeset_init - Initialize a changeset for use
+ *
+ * @ocs:	changeset pointer
+ *
+ * Initialize a changeset structure
+ */
+void of_changeset_init(struct of_changeset *ocs)
+{
+	memset(ocs, 0, sizeof(*ocs));
+	INIT_LIST_HEAD(&ocs->entries);
+}
+
+/**
+ * of_changeset_destroy - Destroy a changeset
+ *
+ * @ocs:	changeset pointer
+ *
+ * Destroys a changeset. Note that if a changeset is applied,
+ * its changes to the tree cannot be reverted.
+ */
+void of_changeset_destroy(struct of_changeset *ocs)
+{
+	struct of_changeset_entry *ce, *cen;
+
+	list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
+		__of_changeset_entry_destroy(ce);
+}
+
+/**
+ * of_changeset_apply - Applies a changeset
+ *
+ * @ocs:	changeset pointer
+ *
+ * Applies a changeset to the live tree.
+ * Any side-effects of live tree state changes are applied here on
+ * sucess, like creation/destruction of devices and side-effects
+ * like creation of sysfs properties and directories.
+ * Returns 0 on success, a negative error value in case of an error.
+ * On error the partially applied effects are reverted.
+ */
+int of_changeset_apply(struct of_changeset *ocs)
+{
+	struct of_changeset_entry *ce;
+	int ret;
+
+	/* perform the rest of the work */
+	pr_debug("of_changeset: applying...\n");
+	list_for_each_entry(ce, &ocs->entries, node) {
+		ret = __of_changeset_entry_apply(ce);
+		if (ret) {
+			pr_err("%s: Error applying changeset (%d)\n", __func__, ret);
+			list_for_each_entry_continue_reverse(ce, &ocs->entries, node)
+				__of_changeset_entry_revert(ce);
+			return ret;
+		}
+	}
+	pr_debug("of_changeset: applied, emitting notifiers.\n");
+
+	/* drop the global lock while emitting notifiers */
+	mutex_unlock(&of_mutex);
+	list_for_each_entry(ce, &ocs->entries, node)
+		__of_changeset_entry_notify(ce, 0);
+	mutex_lock(&of_mutex);
+	pr_debug("of_changeset: notifiers sent.\n");
+
+	return 0;
+}
+
+/**
+ * of_changeset_revert - Reverts an applied changeset
+ *
+ * @ocs:	changeset pointer
+ *
+ * Reverts a changeset returning the state of the tree to what it
+ * was before the application.
+ * Any side-effects like creation/destruction of devices and
+ * removal of sysfs properties and directories are applied.
+ * Returns 0 on success, a negative error value in case of an error.
+ */
+int of_changeset_revert(struct of_changeset *ocs)
+{
+	struct of_changeset_entry *ce;
+	int ret;
+
+	pr_debug("of_changeset: reverting...\n");
+	list_for_each_entry_reverse(ce, &ocs->entries, node) {
+		ret = __of_changeset_entry_revert(ce);
+		if (ret) {
+			pr_err("%s: Error reverting changeset (%d)\n", __func__, ret);
+			list_for_each_entry_continue(ce, &ocs->entries, node)
+				__of_changeset_entry_apply(ce);
+			return ret;
+		}
+	}
+	pr_debug("of_changeset: reverted, emitting notifiers.\n");
+
+	/* drop the global lock while emitting notifiers */
+	mutex_unlock(&of_mutex);
+	list_for_each_entry_reverse(ce, &ocs->entries, node)
+		__of_changeset_entry_notify(ce, 1);
+	mutex_lock(&of_mutex);
+	pr_debug("of_changeset: notifiers sent.\n");
+
+	return 0;
+}
+
+/**
+ * of_changeset_action - Perform a changeset action
+ *
+ * @ocs:	changeset pointer
+ * @action:	action to perform
+ * @np:		Pointer to device node
+ * @prop:	Pointer to property
+ *
+ * On action being one of:
+ * + OF_RECONFIG_ATTACH_NODE
+ * + OF_RECONFIG_DETACH_NODE,
+ * + OF_RECONFIG_ADD_PROPERTY
+ * + OF_RECONFIG_REMOVE_PROPERTY,
+ * + OF_RECONFIG_UPDATE_PROPERTY
+ * Returns 0 on success, a negative error value in case of an error.
+ */
+int of_changeset_action(struct of_changeset *ocs, unsigned long action,
+		struct device_node *np, struct property *prop)
+{
+	struct of_changeset_entry *ce;
+
+	ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+	if (!ce) {
+		pr_err("%s: Failed to allocate\n", __func__);
+		return -ENOMEM;
+	}
+	/* get a reference to the node */
+	ce->action = action;
+	ce->np = of_node_get(np);
+	ce->prop = prop;
+
+	if (action == OF_RECONFIG_UPDATE_PROPERTY && prop)
+		ce->old_prop = of_find_property(np, prop->name, NULL);
+
+	/* add it to the list */
+	list_add_tail(&ce->node, &ocs->entries);
+	return 0;
+}
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 9aa012e..f46a24f 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -923,24 +923,24 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 }
 
 #ifdef CONFIG_HAVE_MEMBLOCK
+#define MAX_PHYS_ADDR	((phys_addr_t)~0)
+
 void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
 {
 	const u64 phys_offset = __pa(PAGE_OFFSET);
 	base &= PAGE_MASK;
 	size &= PAGE_MASK;
 
-	if (sizeof(phys_addr_t) < sizeof(u64)) {
-		if (base > ULONG_MAX) {
-			pr_warning("Ignoring memory block 0x%llx - 0x%llx\n",
-					base, base + size);
-			return;
-		}
+	if (base > MAX_PHYS_ADDR) {
+		pr_warning("Ignoring memory block 0x%llx - 0x%llx\n",
+				base, base + size);
+		return;
+	}
 
-		if (base + size > ULONG_MAX) {
-			pr_warning("Ignoring memory range 0x%lx - 0x%llx\n",
-					ULONG_MAX, base + size);
-			size = ULONG_MAX - base;
-		}
+	if (base + size > MAX_PHYS_ADDR) {
+		pr_warning("Ignoring memory range 0x%lx - 0x%llx\n",
+				ULONG_MAX, base + size);
+		size = MAX_PHYS_ADDR - base;
 	}
 
 	if (base + size < phys_offset) {
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index ff350c8..858e0a5 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -31,6 +31,63 @@ struct alias_prop {
 	char stem[0];
 };
 
-extern struct mutex of_aliases_mutex;
+extern struct mutex of_mutex;
 extern struct list_head aliases_lookup;
+extern struct kset *of_kset;
+
+
+static inline struct device_node *kobj_to_device_node(struct kobject *kobj)
+{
+	return container_of(kobj, struct device_node, kobj);
+}
+
+#if defined(CONFIG_OF_DYNAMIC)
+extern int of_property_notify(int action, struct device_node *np,
+			      struct property *prop, struct property *old_prop);
+extern void of_node_release(struct kobject *kobj);
+#else /* CONFIG_OF_DYNAMIC */
+static inline int of_property_notify(int action, struct device_node *np,
+				     struct property *prop, struct property *old_prop)
+{
+	return 0;
+}
+#endif /* CONFIG_OF_DYNAMIC */
+
+/**
+ * General utilities for working with live trees.
+ *
+ * All functions with two leading underscores operate
+ * without taking node references, so you either have to
+ * own the devtree lock or work on detached trees only.
+ */
+struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags);
+struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags);
+
+extern const void *__of_get_property(const struct device_node *np,
+				     const char *name, int *lenp);
+extern int __of_add_property(struct device_node *np, struct property *prop);
+extern int __of_add_property_sysfs(struct device_node *np,
+		struct property *prop);
+extern int __of_remove_property(struct device_node *np, struct property *prop);
+extern void __of_remove_property_sysfs(struct device_node *np,
+		struct property *prop);
+extern int __of_update_property(struct device_node *np,
+		struct property *newprop, struct property **oldprop);
+extern void __of_update_property_sysfs(struct device_node *np,
+		struct property *newprop, struct property *oldprop);
+
+extern void __of_attach_node(struct device_node *np);
+extern int __of_attach_node_sysfs(struct device_node *np);
+extern void __of_detach_node(struct device_node *np);
+extern void __of_detach_node_sysfs(struct device_node *np);
+
+/* iterators for transactions, used for overlays */
+/* forward iterator */
+#define for_each_transaction_entry(_oft, _te) \
+	list_for_each_entry(_te, &(_oft)->te_list, node)
+
+/* reverse iterator */
+#define for_each_transaction_entry_reverse(_oft, _te) \
+	list_for_each_entry_reverse(_te, &(_oft)->te_list, node)
+
 #endif /* _LINUX_OF_PRIVATE_H */
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 632aae8..59fb12e 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -206,8 +206,16 @@ void __init fdt_init_reserved_mem(void)
 	for (i = 0; i < reserved_mem_count; i++) {
 		struct reserved_mem *rmem = &reserved_mem[i];
 		unsigned long node = rmem->fdt_node;
+		int len;
+		const __be32 *prop;
 		int err = 0;
 
+		prop = of_get_flat_dt_prop(node, "phandle", &len);
+		if (!prop)
+			prop = of_get_flat_dt_prop(node, "linux,phandle", &len);
+		if (prop)
+			rmem->phandle = of_read_number(prop, len/4);
+
 		if (rmem->size == 0)
 			err = __reserved_mem_alloc_size(node, rmem->name,
 						 &rmem->base, &rmem->size);
@@ -215,3 +223,65 @@ void __init fdt_init_reserved_mem(void)
 			__reserved_mem_init_node(rmem);
 	}
 }
+
+static inline struct reserved_mem *__find_rmem(struct device_node *node)
+{
+	unsigned int i;
+
+	if (!node->phandle)
+		return NULL;
+
+	for (i = 0; i < reserved_mem_count; i++)
+		if (reserved_mem[i].phandle == node->phandle)
+			return &reserved_mem[i];
+	return NULL;
+}
+
+/**
+ * of_reserved_mem_device_init() - assign reserved memory region to given device
+ *
+ * This function assign memory region pointed by "memory-region" device tree
+ * property to the given device.
+ */
+void of_reserved_mem_device_init(struct device *dev)
+{
+	struct reserved_mem *rmem;
+	struct device_node *np;
+
+	np = of_parse_phandle(dev->of_node, "memory-region", 0);
+	if (!np)
+		return;
+
+	rmem = __find_rmem(np);
+	of_node_put(np);
+
+	if (!rmem || !rmem->ops || !rmem->ops->device_init)
+		return;
+
+	rmem->ops->device_init(rmem, dev);
+	dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
+}
+
+/**
+ * of_reserved_mem_device_release() - release reserved memory device structures
+ *
+ * This function releases structures allocated for memory region handling for
+ * the given device.
+ */
+void of_reserved_mem_device_release(struct device *dev)
+{
+	struct reserved_mem *rmem;
+	struct device_node *np;
+
+	np = of_parse_phandle(dev->of_node, "memory-region", 0);
+	if (!np)
+		return;
+
+	rmem = __find_rmem(np);
+	of_node_put(np);
+
+	if (!rmem || !rmem->ops || !rmem->ops->device_release)
+		return;
+
+	rmem->ops->device_release(rmem, dev);
+}
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 500436f..0197725 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -422,6 +422,7 @@ static int of_platform_bus_create(struct device_node *bus,
 			break;
 		}
 	}
+	of_node_set_flag(bus, OF_POPULATED_BUS);
 	return rc;
 }
 
@@ -508,19 +509,13 @@ EXPORT_SYMBOL_GPL(of_platform_populate);
 
 static int of_platform_device_destroy(struct device *dev, void *data)
 {
-	bool *children_left = data;
-
 	/* Do not touch devices not populated from the device tree */
-	if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) {
-		*children_left = true;
+	if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED))
 		return 0;
-	}
 
-	/* Recurse, but don't touch this device if it has any children left */
-	if (of_platform_depopulate(dev) != 0) {
-		*children_left = true;
-		return 0;
-	}
+	/* Recurse for any nodes that were treated as busses */
+	if (of_node_check_flag(dev->of_node, OF_POPULATED_BUS))
+		device_for_each_child(dev, NULL, of_platform_device_destroy);
 
 	if (dev->bus == &platform_bus_type)
 		platform_device_unregister(to_platform_device(dev));
@@ -528,19 +523,15 @@ static int of_platform_device_destroy(struct device *dev, void *data)
 	else if (dev->bus == &amba_bustype)
 		amba_device_unregister(to_amba_device(dev));
 #endif
-	else {
-		*children_left = true;
-		return 0;
-	}
 
 	of_node_clear_flag(dev->of_node, OF_POPULATED);
-
+	of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
 	return 0;
 }
 
 /**
  * of_platform_depopulate() - Remove devices populated from device tree
- * @parent: device which childred will be removed
+ * @parent: device which children will be removed
  *
  * Complementary to of_platform_populate(), this function removes children
  * of the given device (and, recurrently, their children) that have been
@@ -550,14 +541,9 @@ static int of_platform_device_destroy(struct device *dev, void *data)
  * Returns 0 when all children devices have been removed or
  * -EBUSY when some children remained.
  */
-int of_platform_depopulate(struct device *parent)
+void of_platform_depopulate(struct device *parent)
 {
-	bool children_left = false;
-
-	device_for_each_child(parent, &children_left,
-			      of_platform_device_destroy);
-
-	return children_left ? -EBUSY : 0;
+	device_for_each_child(parent, NULL, of_platform_device_destroy);
 }
 EXPORT_SYMBOL_GPL(of_platform_depopulate);
 
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index 077314e..d410026 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -9,6 +9,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_fdt.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/list.h>
@@ -16,11 +17,17 @@
 #include <linux/slab.h>
 #include <linux/device.h>
 
+#include "of_private.h"
+
 static struct selftest_results {
 	int passed;
 	int failed;
 } selftest_results;
 
+#define NO_OF_NODES 2
+static struct device_node *nodes[NO_OF_NODES];
+static int last_node_index;
+
 #define selftest(result, fmt, ...) { \
 	if (!(result)) { \
 		selftest_results.failed++; \
@@ -266,6 +273,81 @@ static void __init of_selftest_property_match_string(void)
 	selftest(rc == -EILSEQ, "unterminated string; rc=%i", rc);
 }
 
+#define propcmp(p1, p2) (((p1)->length == (p2)->length) && \
+			(p1)->value && (p2)->value && \
+			!memcmp((p1)->value, (p2)->value, (p1)->length) && \
+			!strcmp((p1)->name, (p2)->name))
+static void __init of_selftest_property_copy(void)
+{
+#ifdef CONFIG_OF_DYNAMIC
+	struct property p1 = { .name = "p1", .length = 0, .value = "" };
+	struct property p2 = { .name = "p2", .length = 5, .value = "abcd" };
+	struct property *new;
+
+	new = __of_prop_dup(&p1, GFP_KERNEL);
+	selftest(new && propcmp(&p1, new), "empty property didn't copy correctly\n");
+	kfree(new->value);
+	kfree(new->name);
+	kfree(new);
+
+	new = __of_prop_dup(&p2, GFP_KERNEL);
+	selftest(new && propcmp(&p2, new), "non-empty property didn't copy correctly\n");
+	kfree(new->value);
+	kfree(new->name);
+	kfree(new);
+#endif
+}
+
+static void __init of_selftest_changeset(void)
+{
+#ifdef CONFIG_OF_DYNAMIC
+	struct property *ppadd, padd = { .name = "prop-add", .length = 0, .value = "" };
+	struct property *ppupdate, pupdate = { .name = "prop-update", .length = 5, .value = "abcd" };
+	struct property *ppremove;
+	struct device_node *n1, *n2, *n21, *nremove, *parent;
+	struct of_changeset chgset;
+
+	of_changeset_init(&chgset);
+	n1 = __of_node_alloc("/testcase-data/changeset/n1", GFP_KERNEL);
+	selftest(n1, "testcase setup failure\n");
+	n2 = __of_node_alloc("/testcase-data/changeset/n2", GFP_KERNEL);
+	selftest(n2, "testcase setup failure\n");
+	n21 = __of_node_alloc("/testcase-data/changeset/n2/n21", GFP_KERNEL);
+	selftest(n21, "testcase setup failure %p\n", n21);
+	nremove = of_find_node_by_path("/testcase-data/changeset/node-remove");
+	selftest(nremove, "testcase setup failure\n");
+	ppadd = __of_prop_dup(&padd, GFP_KERNEL);
+	selftest(ppadd, "testcase setup failure\n");
+	ppupdate = __of_prop_dup(&pupdate, GFP_KERNEL);
+	selftest(ppupdate, "testcase setup failure\n");
+	parent = nremove->parent;
+	n1->parent = parent;
+	n2->parent = parent;
+	n21->parent = n2;
+	n2->child = n21;
+	ppremove = of_find_property(parent, "prop-remove", NULL);
+	selftest(ppremove, "failed to find removal prop");
+
+	of_changeset_init(&chgset);
+	selftest(!of_changeset_attach_node(&chgset, n1), "fail attach n1\n");
+	selftest(!of_changeset_attach_node(&chgset, n2), "fail attach n2\n");
+	selftest(!of_changeset_detach_node(&chgset, nremove), "fail remove node\n");
+	selftest(!of_changeset_attach_node(&chgset, n21), "fail attach n21\n");
+	selftest(!of_changeset_add_property(&chgset, parent, ppadd), "fail add prop\n");
+	selftest(!of_changeset_update_property(&chgset, parent, ppupdate), "fail update prop\n");
+	selftest(!of_changeset_remove_property(&chgset, parent, ppremove), "fail remove prop\n");
+	mutex_lock(&of_mutex);
+	selftest(!of_changeset_apply(&chgset), "apply failed\n");
+	mutex_unlock(&of_mutex);
+
+	mutex_lock(&of_mutex);
+	selftest(!of_changeset_revert(&chgset), "revert failed\n");
+	mutex_unlock(&of_mutex);
+
+	of_changeset_destroy(&chgset);
+#endif
+}
+
 static void __init of_selftest_parse_interrupts(void)
 {
 	struct device_node *np;
@@ -517,9 +599,156 @@ static void __init of_selftest_platform_populate(void)
 	}
 }
 
+/**
+ *	update_node_properties - adds the properties
+ *	of np into dup node (present in live tree) and
+ *	updates parent of children of np to dup.
+ *
+ *	@np:	node already present in live tree
+ *	@dup:	node present in live tree to be updated
+ */
+static void update_node_properties(struct device_node *np,
+					struct device_node *dup)
+{
+	struct property *prop;
+	struct device_node *child;
+
+	for_each_property_of_node(np, prop)
+		of_add_property(dup, prop);
+
+	for_each_child_of_node(np, child)
+		child->parent = dup;
+}
+
+/**
+ *	attach_node_and_children - attaches nodes
+ *	and its children to live tree
+ *
+ *	@np:	Node to attach to live tree
+ */
+static int attach_node_and_children(struct device_node *np)
+{
+	struct device_node *next, *root = np, *dup;
+
+	if (!np) {
+		pr_warn("%s: No tree to attach; not running tests\n",
+			__func__);
+		return -ENODATA;
+	}
+
+
+	/* skip root node */
+	np = np->child;
+	/* storing a copy in temporary node */
+	dup = np;
+
+	while (dup) {
+		nodes[last_node_index++] = dup;
+		dup = dup->sibling;
+	}
+	dup = NULL;
+
+	while (np) {
+		next = np->allnext;
+		dup = of_find_node_by_path(np->full_name);
+		if (dup)
+			update_node_properties(np, dup);
+		else {
+			np->child = NULL;
+			if (np->parent == root)
+				np->parent = of_allnodes;
+			of_attach_node(np);
+		}
+		np = next;
+	}
+
+	return 0;
+}
+
+/**
+ *	selftest_data_add - Reads, copies data from
+ *	linked tree and attaches it to the live tree
+ */
+static int __init selftest_data_add(void)
+{
+	void *selftest_data;
+	struct device_node *selftest_data_node;
+	extern uint8_t __dtb_testcases_begin[];
+	extern uint8_t __dtb_testcases_end[];
+	const int size = __dtb_testcases_end - __dtb_testcases_begin;
+
+	if (!size || !of_allnodes) {
+		pr_warn("%s: No testcase data to attach; not running tests\n",
+			__func__);
+		return -ENODATA;
+	}
+
+	/* creating copy */
+	selftest_data = kmemdup(__dtb_testcases_begin, size, GFP_KERNEL);
+
+	if (!selftest_data) {
+		pr_warn("%s: Failed to allocate memory for selftest_data; "
+			"not running tests\n", __func__);
+		return -ENOMEM;
+	}
+	of_fdt_unflatten_tree(selftest_data, &selftest_data_node);
+
+	/* attach the sub-tree to live tree */
+	return attach_node_and_children(selftest_data_node);
+}
+
+/**
+ *	detach_node_and_children - detaches node
+ *	and its children from live tree
+ *
+ *	@np:	Node to detach from live tree
+ */
+static void detach_node_and_children(struct device_node *np)
+{
+	while (np->child)
+		detach_node_and_children(np->child);
+
+	while (np->sibling)
+		detach_node_and_children(np->sibling);
+
+	of_detach_node(np);
+}
+
+/**
+ *	selftest_data_remove - removes the selftest data
+ *	nodes from the live tree
+ */
+static void selftest_data_remove(void)
+{
+	struct device_node *np;
+	struct property *prop;
+
+	while (last_node_index >= 0) {
+		if (nodes[last_node_index]) {
+			np = of_find_node_by_path(nodes[last_node_index]->full_name);
+			if (strcmp(np->full_name, "/aliases") != 0) {
+				detach_node_and_children(np->child);
+				of_detach_node(np);
+			} else {
+				for_each_property_of_node(np, prop) {
+					if (strcmp(prop->name, "testcase-alias") == 0)
+						of_remove_property(np, prop);
+				}
+			}
+		}
+		last_node_index--;
+	}
+}
+
 static int __init of_selftest(void)
 {
 	struct device_node *np;
+	int res;
+
+	/* adding data for selftest */
+	res = selftest_data_add();
+	if (res)
+		return res;
 
 	np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a");
 	if (!np) {
@@ -533,12 +762,18 @@ static int __init of_selftest(void)
 	of_selftest_dynamic();
 	of_selftest_parse_phandle_with_args();
 	of_selftest_property_match_string();
+	of_selftest_property_copy();
+	of_selftest_changeset();
 	of_selftest_parse_interrupts();
 	of_selftest_parse_interrupts_extended();
 	of_selftest_match_node();
 	of_selftest_platform_populate();
 	pr_info("end of selftest - %i passed, %i failed\n",
 		selftest_results.passed, selftest_results.failed);
+
+	/* removing selftest data from live tree */
+	selftest_data_remove();
+
 	return 0;
 }
 late_initcall(of_selftest);
diff --git a/drivers/of/testcase-data/testcases.dts b/drivers/of/testcase-data/testcases.dts
new file mode 100644
index 0000000..219ef93
--- /dev/null
+++ b/drivers/of/testcase-data/testcases.dts
@@ -0,0 +1,15 @@
+/dts-v1/;
+/ {
+	testcase-data {
+		changeset {
+			prop-update = "hello";
+			prop-remove = "world";
+			node-remove {
+			};
+		};
+	};
+};
+#include "tests-phandle.dtsi"
+#include "tests-interrupts.dtsi"
+#include "tests-match.dtsi"
+#include "tests-platform.dtsi"
diff --git a/drivers/of/testcase-data/testcases.dtsi b/drivers/of/testcase-data/testcases.dtsi
deleted file mode 100644
index 6d8d980a..0000000
--- a/drivers/of/testcase-data/testcases.dtsi
+++ /dev/null
@@ -1,4 +0,0 @@
-#include "tests-phandle.dtsi"
-#include "tests-interrupts.dtsi"
-#include "tests-match.dtsi"
-#include "tests-platform.dtsi"
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 93aa29f..f2945fa 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -375,11 +375,11 @@ static void __exit cleanup_slots(void)
 
 static int __init rpaphp_init(void)
 {
-	struct device_node *dn = NULL;
+	struct device_node *dn;
 
 	info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
 
-	while ((dn = of_find_node_by_name(dn, "pci")))
+	for_each_node_by_name(dn, "pci")
 		rpaphp_add_slot(dn);
 
 	return 0;
diff --git a/drivers/scsi/cxgbi/cxgb3i/Kconfig b/drivers/scsi/cxgbi/cxgb3i/Kconfig
index 6bbc36f..e460398 100644
--- a/drivers/scsi/cxgbi/cxgb3i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb3i/Kconfig
@@ -1,6 +1,6 @@
 config SCSI_CXGB3_ISCSI
 	tristate "Chelsio T3 iSCSI support"
-	depends on PCI && INET
+	depends on PCI && INET && (IPV6 || IPV6=n)
 	select NETDEVICES
 	select ETHERNET
 	select NET_VENDOR_CHELSIO
diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig
index 16b2c7d..8c4e423 100644
--- a/drivers/scsi/cxgbi/cxgb4i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig
@@ -1,6 +1,6 @@
 config SCSI_CXGB4_ISCSI
 	tristate "Chelsio T4 iSCSI support"
-	depends on PCI && INET
+	depends on PCI && INET && (IPV6 || IPV6=n)
 	select NETDEVICES
 	select ETHERNET
 	select NET_VENDOR_CHELSIO
diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index 43fea22..ae45bd9 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -472,7 +472,8 @@ static void __srp_start_tl_fail_timers(struct srp_rport *rport)
 	if (delay > 0)
 		queue_delayed_work(system_long_wq, &rport->reconnect_work,
 				   1UL * delay * HZ);
-	if (srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
+	if ((fast_io_fail_tmo >= 0 || dev_loss_tmo >= 0) &&
+	    srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
 		pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev),
 			 rport->state);
 		scsi_target_block(&shost->shost_gendev);
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index f9a1386..693208e 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -151,7 +151,7 @@ config KIRKWOOD_THERMAL
 
 config DOVE_THERMAL
 	tristate "Temperature sensor on Marvell Dove SoCs"
-	depends on ARCH_DOVE
+	depends on ARCH_DOVE || MACH_DOVE
 	depends on OF
 	help
 	  Support for the Dove thermal sensor driver in the Linux thermal
@@ -243,4 +243,9 @@ depends on ARCH_EXYNOS
 source "drivers/thermal/samsung/Kconfig"
 endmenu
 
+menu "STMicroelectronics thermal drivers"
+depends on ARCH_STI && OF
+source "drivers/thermal/st/Kconfig"
+endmenu
+
 endif
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index de0636a..31e232f 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -32,3 +32,4 @@ obj-$(CONFIG_X86_PKG_TEMP_THERMAL)	+= x86_pkg_temp_thermal.o
 obj-$(CONFIG_INTEL_SOC_DTS_THERMAL)	+= intel_soc_dts_thermal.o
 obj-$(CONFIG_TI_SOC_THERMAL)	+= ti-soc-thermal/
 obj-$(CONFIG_ACPI_INT3403_THERMAL)	+= int3403_thermal.o
+obj-$(CONFIG_ST_THERMAL)	+= st/
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 84a75f8..1ab0018 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -305,7 +305,7 @@ static int cpufreq_apply_cooling(struct cpufreq_cooling_device *cpufreq_device,
  * @event: value showing cpufreq event for which this function invoked.
  * @data: callback-specific data
  *
- * Callback to highjack the notification on cpufreq policy transition.
+ * Callback to hijack the notification on cpufreq policy transition.
  * Every time there is a change in policy, we will intercept and
  * update the cpufreq policy with thermal constraints.
  *
diff --git a/drivers/thermal/int3403_thermal.c b/drivers/thermal/int3403_thermal.c
index e93f025..17554ee 100644
--- a/drivers/thermal/int3403_thermal.c
+++ b/drivers/thermal/int3403_thermal.c
@@ -33,6 +33,10 @@
 struct int3403_sensor {
 	struct thermal_zone_device *tzone;
 	unsigned long *thresholds;
+	unsigned long	crit_temp;
+	int		crit_trip_id;
+	unsigned long	psv_temp;
+	int		psv_trip_id;
 };
 
 static int sys_get_curr_temp(struct thermal_zone_device *tzone,
@@ -79,12 +83,18 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzone,
 	struct acpi_device *device = tzone->devdata;
 	struct int3403_sensor *obj = acpi_driver_data(device);
 
-	/*
-	 * get_trip_temp is a mandatory callback but
-	 * PATx method doesn't return any value, so return
-	 * cached value, which was last set from user space.
-	 */
-	*temp = obj->thresholds[trip];
+	if (trip == obj->crit_trip_id)
+		*temp = obj->crit_temp;
+	else if (trip == obj->psv_trip_id)
+		*temp = obj->psv_temp;
+	else {
+		/*
+		 * get_trip_temp is a mandatory callback but
+		 * PATx method doesn't return any value, so return
+		 * cached value, which was last set from user space.
+		 */
+		*temp = obj->thresholds[trip];
+	}
 
 	return 0;
 }
@@ -92,8 +102,14 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzone,
 static int sys_get_trip_type(struct thermal_zone_device *thermal,
 		int trip, enum thermal_trip_type *type)
 {
+	struct acpi_device *device = thermal->devdata;
+	struct int3403_sensor *obj = acpi_driver_data(device);
+
 	/* Mandatory callback, may not mean much here */
-	*type = THERMAL_TRIP_PASSIVE;
+	if (trip == obj->crit_trip_id)
+		*type = THERMAL_TRIP_CRITICAL;
+	else
+		*type = THERMAL_TRIP_PASSIVE;
 
 	return 0;
 }
@@ -155,6 +171,34 @@ static void acpi_thermal_notify(struct acpi_device *device, u32 event)
 	}
 }
 
+static int sys_get_trip_crt(struct acpi_device *device, unsigned long *temp)
+{
+	unsigned long long crt;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(device->handle, "_CRT", NULL, &crt);
+	if (ACPI_FAILURE(status))
+		return -EIO;
+
+	*temp = DECI_KELVIN_TO_MILLI_CELSIUS(crt, KELVIN_OFFSET);
+
+	return 0;
+}
+
+static int sys_get_trip_psv(struct acpi_device *device, unsigned long *temp)
+{
+	unsigned long long psv;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(device->handle, "_PSV", NULL, &psv);
+	if (ACPI_FAILURE(status))
+		return -EIO;
+
+	*temp = DECI_KELVIN_TO_MILLI_CELSIUS(psv, KELVIN_OFFSET);
+
+	return 0;
+}
+
 static int acpi_int3403_add(struct acpi_device *device)
 {
 	int result = 0;
@@ -194,6 +238,15 @@ static int acpi_int3403_add(struct acpi_device *device)
 			return -ENOMEM;
 		trip_mask = BIT(trip_cnt) - 1;
 	}
+
+	obj->psv_trip_id = -1;
+	if (!sys_get_trip_psv(device, &obj->psv_temp))
+		obj->psv_trip_id = trip_cnt++;
+
+	obj->crit_trip_id = -1;
+	if (!sys_get_trip_crt(device, &obj->crit_temp))
+		obj->crit_trip_id = trip_cnt++;
+
 	obj->tzone = thermal_zone_device_register(acpi_device_bid(device),
 				trip_cnt, trip_mask, device, &tzone_ops,
 				NULL, 0, 0);
diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index d7ca9f4..acbff14 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -505,6 +505,10 @@ static irqreturn_t exynos_tmu_irq(int irq, void *id)
 
 static const struct of_device_id exynos_tmu_match[] = {
 	{
+		.compatible = "samsung,exynos3250-tmu",
+		.data = (void *)EXYNOS3250_TMU_DRV_DATA,
+	},
+	{
 		.compatible = "samsung,exynos4210-tmu",
 		.data = (void *)EXYNOS4210_TMU_DRV_DATA,
 	},
@@ -677,7 +681,8 @@ static int exynos_tmu_probe(struct platform_device *pdev)
 		goto err_clk_sec;
 	}
 
-	if (pdata->type == SOC_ARCH_EXYNOS4210 ||
+	if (pdata->type == SOC_ARCH_EXYNOS3250 ||
+	    pdata->type == SOC_ARCH_EXYNOS4210 ||
 	    pdata->type == SOC_ARCH_EXYNOS4412 ||
 	    pdata->type == SOC_ARCH_EXYNOS5250 ||
 	    pdata->type == SOC_ARCH_EXYNOS5260 ||
@@ -759,10 +764,10 @@ static int exynos_tmu_remove(struct platform_device *pdev)
 {
 	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 
-	exynos_tmu_control(pdev, false);
-
 	exynos_unregister_thermal(data->reg_conf);
 
+	exynos_tmu_control(pdev, false);
+
 	clk_unprepare(data->clk);
 	if (!IS_ERR(data->clk_sec))
 		clk_unprepare(data->clk_sec);
diff --git a/drivers/thermal/samsung/exynos_tmu.h b/drivers/thermal/samsung/exynos_tmu.h
index edd08cf..1b4a644 100644
--- a/drivers/thermal/samsung/exynos_tmu.h
+++ b/drivers/thermal/samsung/exynos_tmu.h
@@ -40,7 +40,8 @@ enum calibration_mode {
 };
 
 enum soc_type {
-	SOC_ARCH_EXYNOS4210 = 1,
+	SOC_ARCH_EXYNOS3250 = 1,
+	SOC_ARCH_EXYNOS4210,
 	SOC_ARCH_EXYNOS4412,
 	SOC_ARCH_EXYNOS5250,
 	SOC_ARCH_EXYNOS5260,
diff --git a/drivers/thermal/samsung/exynos_tmu_data.c b/drivers/thermal/samsung/exynos_tmu_data.c
index c1d81dc..aa8e0de 100644
--- a/drivers/thermal/samsung/exynos_tmu_data.c
+++ b/drivers/thermal/samsung/exynos_tmu_data.c
@@ -90,6 +90,95 @@ struct exynos_tmu_init_data const exynos4210_default_tmu_data = {
 };
 #endif
 
+#if defined(CONFIG_SOC_EXYNOS3250)
+static const struct exynos_tmu_registers exynos3250_tmu_registers = {
+	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
+	.triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT,
+	.triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT,
+	.tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
+	.test_mux_addr_shift = EXYNOS4412_MUX_ADDR_SHIFT,
+	.buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT,
+	.buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK,
+	.therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
+	.therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
+	.therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
+	.buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT,
+	.buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK,
+	.core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT,
+	.tmu_status = EXYNOS_TMU_REG_STATUS,
+	.tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
+	.threshold_th0 = EXYNOS_THD_TEMP_RISE,
+	.threshold_th1 = EXYNOS_THD_TEMP_FALL,
+	.tmu_inten = EXYNOS_TMU_REG_INTEN,
+	.inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
+	.inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
+	.inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
+	.inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
+	.tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
+	.tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
+	.intclr_fall_shift = EXYNOS_TMU_CLEAR_FALL_INT_SHIFT,
+	.intclr_rise_shift = EXYNOS_TMU_RISE_INT_SHIFT,
+	.intclr_rise_mask = EXYNOS_TMU_RISE_INT_MASK,
+	.intclr_fall_mask = EXYNOS_TMU_FALL_INT_MASK,
+	.emul_con = EXYNOS_EMUL_CON,
+	.emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
+	.emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
+	.emul_time_mask = EXYNOS_EMUL_TIME_MASK,
+};
+
+#define EXYNOS3250_TMU_DATA \
+	.threshold_falling = 10, \
+	.trigger_levels[0] = 70, \
+	.trigger_levels[1] = 95, \
+	.trigger_levels[2] = 110, \
+	.trigger_levels[3] = 120, \
+	.trigger_enable[0] = true, \
+	.trigger_enable[1] = true, \
+	.trigger_enable[2] = true, \
+	.trigger_enable[3] = false, \
+	.trigger_type[0] = THROTTLE_ACTIVE, \
+	.trigger_type[1] = THROTTLE_ACTIVE, \
+	.trigger_type[2] = SW_TRIP, \
+	.trigger_type[3] = HW_TRIP, \
+	.max_trigger_level = 4, \
+	.gain = 8, \
+	.reference_voltage = 16, \
+	.noise_cancel_mode = 4, \
+	.cal_type = TYPE_TWO_POINT_TRIMMING, \
+	.efuse_value = 55, \
+	.min_efuse_value = 40, \
+	.max_efuse_value = 100, \
+	.first_point_trim = 25, \
+	.second_point_trim = 85, \
+	.default_temp_offset = 50, \
+	.freq_tab[0] = { \
+		.freq_clip_max = 800 * 1000, \
+		.temp_level = 70, \
+	}, \
+	.freq_tab[1] = { \
+		.freq_clip_max = 400 * 1000, \
+		.temp_level = 95, \
+	}, \
+	.freq_tab_count = 2, \
+	.registers = &exynos3250_tmu_registers, \
+	.features = (TMU_SUPPORT_EMULATION | \
+			TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \
+			TMU_SUPPORT_EMUL_TIME)
+#endif
+
+#if defined(CONFIG_SOC_EXYNOS3250)
+struct exynos_tmu_init_data const exynos3250_default_tmu_data = {
+	.tmu_data = {
+		{
+			EXYNOS3250_TMU_DATA,
+			.type = SOC_ARCH_EXYNOS3250,
+			.test_mux = EXYNOS4412_MUX_ADDR_VALUE,
+		},
+	},
+	.tmu_count = 1,
+};
+#endif
+
 #if defined(CONFIG_SOC_EXYNOS4412) || defined(CONFIG_SOC_EXYNOS5250)
 static const struct exynos_tmu_registers exynos4412_tmu_registers = {
 	.triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
diff --git a/drivers/thermal/samsung/exynos_tmu_data.h b/drivers/thermal/samsung/exynos_tmu_data.h
index d268981..f0979e5 100644
--- a/drivers/thermal/samsung/exynos_tmu_data.h
+++ b/drivers/thermal/samsung/exynos_tmu_data.h
@@ -148,6 +148,13 @@
 #define EXYNOS5440_TMU_TH_RISE4_SHIFT		24
 #define EXYNOS5440_EFUSE_SWAP_OFFSET		8
 
+#if defined(CONFIG_SOC_EXYNOS3250)
+extern struct exynos_tmu_init_data const exynos3250_default_tmu_data;
+#define EXYNOS3250_TMU_DRV_DATA (&exynos3250_default_tmu_data)
+#else
+#define EXYNOS3250_TMU_DRV_DATA (NULL)
+#endif
+
 #if defined(CONFIG_CPU_EXYNOS4210)
 extern struct exynos_tmu_init_data const exynos4210_default_tmu_data;
 #define EXYNOS4210_TMU_DRV_DATA (&exynos4210_default_tmu_data)
diff --git a/drivers/thermal/st/Kconfig b/drivers/thermal/st/Kconfig
new file mode 100644
index 0000000..490fdbe
--- /dev/null
+++ b/drivers/thermal/st/Kconfig
@@ -0,0 +1,12 @@
+config ST_THERMAL
+       tristate "Thermal sensors on STMicroelectronics STi series of SoCs"
+       help
+         Support for thermal sensors on STMicroelectronics STi series of SoCs.
+
+config ST_THERMAL_SYSCFG
+	select ST_THERMAL
+	tristate "STi series syscfg register access based thermal sensors"
+
+config ST_THERMAL_MEMMAP
+	select ST_THERMAL
+	tristate "STi series memory mapped access based thermal sensors"
diff --git a/drivers/thermal/st/Makefile b/drivers/thermal/st/Makefile
new file mode 100644
index 0000000..b388789
--- /dev/null
+++ b/drivers/thermal/st/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_ST_THERMAL)		:= st_thermal.o
+obj-$(CONFIG_ST_THERMAL_SYSCFG)		+= st_thermal_syscfg.o
+obj-$(CONFIG_ST_THERMAL_MEMMAP)		+= st_thermal_memmap.o
diff --git a/drivers/thermal/st/st_thermal.c b/drivers/thermal/st/st_thermal.c
new file mode 100644
index 0000000..90163b3
--- /dev/null
+++ b/drivers/thermal/st/st_thermal.c
@@ -0,0 +1,313 @@
+/*
+ * ST Thermal Sensor Driver core routines
+ * Author: Ajit Pal Singh <ajitpal.singh@st.com>
+ *
+ * Copyright (C) 2003-2014 STMicroelectronics (R&D) Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "st_thermal.h"
+
+/* The Thermal Framework expects millidegrees */
+#define mcelsius(temp)			((temp) * 1000)
+
+/*
+ * Function to allocate regfields which are common
+ * between syscfg and memory mapped based sensors
+ */
+int st_thermal_alloc_regfields(struct st_thermal_sensor *sensor)
+{
+	struct device *dev = sensor->dev;
+	struct regmap *regmap = sensor->regmap;
+	const struct reg_field *reg_fields = sensor->cdata->reg_fields;
+
+	sensor->dcorrect = devm_regmap_field_alloc(dev, regmap,
+						   reg_fields[DCORRECT]);
+
+	sensor->overflow = devm_regmap_field_alloc(dev, regmap,
+						   reg_fields[OVERFLOW]);
+
+	sensor->temp_data = devm_regmap_field_alloc(dev, regmap,
+						    reg_fields[DATA]);
+
+	if (IS_ERR(sensor->dcorrect) ||
+	    IS_ERR(sensor->overflow) ||
+	    IS_ERR(sensor->temp_data)) {
+		dev_err(dev, "failed to allocate common regfields\n");
+		return -EINVAL;
+	}
+
+	return sensor->ops->alloc_regfields(sensor);
+}
+
+static int st_thermal_sensor_on(struct st_thermal_sensor *sensor)
+{
+	int ret;
+	struct device *dev = sensor->dev;
+
+	ret = clk_prepare_enable(sensor->clk);
+	if (ret) {
+		dev_err(dev, "failed to enable clk\n");
+		return ret;
+	}
+
+	ret = sensor->ops->power_ctrl(sensor, POWER_ON);
+	if (ret) {
+		dev_err(dev, "failed to power on sensor\n");
+		clk_disable_unprepare(sensor->clk);
+	}
+
+	return ret;
+}
+
+static int st_thermal_sensor_off(struct st_thermal_sensor *sensor)
+{
+	int ret;
+
+	ret = sensor->ops->power_ctrl(sensor, POWER_OFF);
+	if (ret)
+		return ret;
+
+	clk_disable_unprepare(sensor->clk);
+
+	return 0;
+}
+
+static int st_thermal_calibration(struct st_thermal_sensor *sensor)
+{
+	int ret;
+	unsigned int val;
+	struct device *dev = sensor->dev;
+
+	/* Check if sensor calibration data is already written */
+	ret = regmap_field_read(sensor->dcorrect, &val);
+	if (ret) {
+		dev_err(dev, "failed to read calibration data\n");
+		return ret;
+	}
+
+	if (!val) {
+		/*
+		 * Sensor calibration value not set by bootloader,
+		 * default calibration data to be used
+		 */
+		ret = regmap_field_write(sensor->dcorrect,
+					 sensor->cdata->calibration_val);
+		if (ret)
+			dev_err(dev, "failed to set calibration data\n");
+	}
+
+	return ret;
+}
+
+/* Callback to get temperature from HW*/
+static int st_thermal_get_temp(struct thermal_zone_device *th,
+		unsigned long *temperature)
+{
+	struct st_thermal_sensor *sensor = th->devdata;
+	struct device *dev = sensor->dev;
+	unsigned int temp;
+	unsigned int overflow;
+	int ret;
+
+	ret = regmap_field_read(sensor->overflow, &overflow);
+	if (ret)
+		return ret;
+	if (overflow)
+		return -EIO;
+
+	ret = regmap_field_read(sensor->temp_data, &temp);
+	if (ret)
+		return ret;
+
+	temp += sensor->cdata->temp_adjust_val;
+	temp = mcelsius(temp);
+
+	dev_dbg(dev, "temperature: %d\n", temp);
+
+	*temperature = temp;
+
+	return 0;
+}
+
+static int st_thermal_get_trip_type(struct thermal_zone_device *th,
+				int trip, enum thermal_trip_type *type)
+{
+	struct st_thermal_sensor *sensor = th->devdata;
+	struct device *dev = sensor->dev;
+
+	switch (trip) {
+	case 0:
+		*type = THERMAL_TRIP_CRITICAL;
+		break;
+	default:
+		dev_err(dev, "invalid trip point\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int st_thermal_get_trip_temp(struct thermal_zone_device *th,
+				    int trip, unsigned long *temp)
+{
+	struct st_thermal_sensor *sensor = th->devdata;
+	struct device *dev = sensor->dev;
+
+	switch (trip) {
+	case 0:
+		*temp = mcelsius(sensor->cdata->crit_temp);
+		break;
+	default:
+		dev_err(dev, "Invalid trip point\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct thermal_zone_device_ops st_tz_ops = {
+	.get_temp	= st_thermal_get_temp,
+	.get_trip_type	= st_thermal_get_trip_type,
+	.get_trip_temp	= st_thermal_get_trip_temp,
+};
+
+int st_thermal_register(struct platform_device *pdev,
+			const struct of_device_id *st_thermal_of_match)
+{
+	struct st_thermal_sensor *sensor;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	const struct of_device_id *match;
+
+	int polling_delay;
+	int ret;
+
+	if (!np) {
+		dev_err(dev, "device tree node not found\n");
+		return -EINVAL;
+	}
+
+	sensor = devm_kzalloc(dev, sizeof(*sensor), GFP_KERNEL);
+	if (!sensor)
+		return -ENOMEM;
+
+	sensor->dev = dev;
+
+	match = of_match_device(st_thermal_of_match, dev);
+	if (!(match && match->data))
+		return -EINVAL;
+
+	sensor->cdata = match->data;
+	if (!sensor->cdata->ops)
+		return -EINVAL;
+
+	sensor->ops = sensor->cdata->ops;
+
+	ret = sensor->ops->regmap_init(sensor);
+	if (ret)
+		return ret;
+
+	ret = st_thermal_alloc_regfields(sensor);
+	if (ret)
+		return ret;
+
+	sensor->clk = devm_clk_get(dev, "thermal");
+	if (IS_ERR(sensor->clk)) {
+		dev_err(dev, "failed to fetch clock\n");
+		return PTR_ERR(sensor->clk);
+	}
+
+	if (sensor->ops->register_enable_irq) {
+		ret = sensor->ops->register_enable_irq(sensor);
+		if (ret)
+			return ret;
+	}
+
+	ret = st_thermal_sensor_on(sensor);
+	if (ret)
+		return ret;
+
+	ret = st_thermal_calibration(sensor);
+	if (ret)
+		goto sensor_off;
+
+	polling_delay = sensor->ops->register_enable_irq ? 0 : 1000;
+
+	sensor->thermal_dev =
+		thermal_zone_device_register(dev_name(dev), 1, 0, sensor,
+					     &st_tz_ops, NULL, 0, polling_delay);
+	if (IS_ERR(sensor->thermal_dev)) {
+		dev_err(dev, "failed to register thermal zone device\n");
+		ret = PTR_ERR(sensor->thermal_dev);
+		goto sensor_off;
+	}
+
+	platform_set_drvdata(pdev, sensor);
+
+	return 0;
+
+sensor_off:
+	st_thermal_sensor_off(sensor);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(st_thermal_register);
+
+int st_thermal_unregister(struct platform_device *pdev)
+{
+	struct st_thermal_sensor *sensor = platform_get_drvdata(pdev);
+
+	st_thermal_sensor_off(sensor);
+	thermal_zone_device_unregister(sensor->thermal_dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(st_thermal_unregister);
+
+static int st_thermal_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct st_thermal_sensor *sensor = platform_get_drvdata(pdev);
+
+	return st_thermal_sensor_off(sensor);
+}
+
+static int st_thermal_resume(struct device *dev)
+{
+	int ret;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct st_thermal_sensor *sensor = platform_get_drvdata(pdev);
+
+	ret = st_thermal_sensor_on(sensor);
+	if (ret)
+		return ret;
+
+	ret = st_thermal_calibration(sensor);
+	if (ret)
+		return ret;
+
+	if (sensor->ops->enable_irq) {
+		ret = sensor->ops->enable_irq(sensor);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+SIMPLE_DEV_PM_OPS(st_thermal_pm_ops, st_thermal_suspend, st_thermal_resume);
+EXPORT_SYMBOL_GPL(st_thermal_pm_ops);
+
+MODULE_AUTHOR("STMicroelectronics (R&D) Limited <ajitpal.singh@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics STi SoC Thermal Sensor Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/st/st_thermal.h b/drivers/thermal/st/st_thermal.h
new file mode 100644
index 0000000..fecafbe
--- /dev/null
+++ b/drivers/thermal/st/st_thermal.h
@@ -0,0 +1,104 @@
+/*
+ * ST Thermal Sensor Driver for STi series of SoCs
+ * Author: Ajit Pal Singh <ajitpal.singh@st.com>
+ *
+ * Copyright (C) 2003-2014 STMicroelectronics (R&D) Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __STI_THERMAL_SYSCFG_H
+#define __STI_THERMAL_SYSCFG_H
+
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/thermal.h>
+
+enum st_thermal_regfield_ids {
+	INT_THRESH_HI = 0, /* Top two regfield IDs are mutually exclusive */
+	TEMP_PWR = 0,
+	DCORRECT,
+	OVERFLOW,
+	DATA,
+	INT_ENABLE,
+
+	MAX_REGFIELDS
+};
+
+/* Thermal sensor power states */
+enum st_thermal_power_state {
+	POWER_OFF = 0,
+	POWER_ON
+};
+
+struct st_thermal_sensor;
+
+/**
+ * Description of private thermal sensor ops.
+ *
+ * @power_ctrl:		Function for powering on/off a sensor. Clock to the
+ *			sensor is also controlled from this function.
+ * @alloc_regfields: 	Allocate regmap register fields, specific to a sensor.
+ * @do_memmap_regmap: 	Memory map the thermal register space and init regmap
+ *			instance or find regmap instance.
+ * @register_irq: 	Register an interrupt handler for a sensor.
+ */
+struct st_thermal_sensor_ops {
+	int (*power_ctrl)(struct st_thermal_sensor *, enum st_thermal_power_state);
+	int (*alloc_regfields)(struct st_thermal_sensor *);
+	int (*regmap_init)(struct st_thermal_sensor *);
+	int (*register_enable_irq)(struct st_thermal_sensor *);
+	int (*enable_irq)(struct st_thermal_sensor *);
+};
+
+/**
+ * Description of thermal driver compatible data.
+ *
+ * @reg_fields:		Pointer to the regfields array for a sensor.
+ * @sys_compat:		Pointer to the syscon node compatible string.
+ * @ops: 		Pointer to private thermal ops for a sensor.
+ * @calibration_val: 	Default calibration value to be written to the DCORRECT
+ *			register field for a sensor.
+ * @temp_adjust_val: 	Value to be added/subtracted from the data read from
+ *			the sensor. If value needs to be added please provide a
+ *			positive value and if it is to be subtracted please
+ * 			provide a negative value.
+ * @crit_temp: 		The temperature beyond which the SoC should be shutdown
+ * 			to prevent damage.
+ */
+struct st_thermal_compat_data {
+	char *sys_compat;
+	const struct reg_field *reg_fields;
+	const struct st_thermal_sensor_ops *ops;
+	unsigned int calibration_val;
+	int temp_adjust_val;
+	int crit_temp;
+};
+
+struct st_thermal_sensor {
+	struct device *dev;
+	struct thermal_zone_device *thermal_dev;
+	const struct st_thermal_sensor_ops *ops;
+	const struct st_thermal_compat_data *cdata;
+	struct clk *clk;
+	struct regmap *regmap;
+	struct regmap_field *pwr;
+	struct regmap_field *dcorrect;
+	struct regmap_field *overflow;
+	struct regmap_field *temp_data;
+	struct regmap_field *int_thresh_hi;
+	struct regmap_field *int_enable;
+	int irq;
+	void __iomem *mmio_base;
+};
+
+extern int st_thermal_register(struct platform_device *pdev,
+			       const struct of_device_id *st_thermal_of_match);
+extern int st_thermal_unregister(struct platform_device *pdev);
+extern const struct dev_pm_ops st_thermal_pm_ops;
+
+#endif /* __STI_RESET_SYSCFG_H */
diff --git a/drivers/thermal/st/st_thermal_memmap.c b/drivers/thermal/st/st_thermal_memmap.c
new file mode 100644
index 0000000..39896ce
--- /dev/null
+++ b/drivers/thermal/st/st_thermal_memmap.c
@@ -0,0 +1,209 @@
+/*
+ * ST Thermal Sensor Driver for memory mapped sensors.
+ * Author: Ajit Pal Singh <ajitpal.singh@st.com>
+ *
+ * Copyright (C) 2003-2014 STMicroelectronics (R&D) Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/of.h>
+#include <linux/module.h>
+
+#include "st_thermal.h"
+
+#define STIH416_MPE_CONF			0x0
+#define STIH416_MPE_STATUS			0x4
+#define STIH416_MPE_INT_THRESH			0x8
+#define STIH416_MPE_INT_EN			0xC
+
+/* Power control bits for the memory mapped thermal sensor */
+#define THERMAL_PDN				BIT(4)
+#define THERMAL_SRSTN				BIT(10)
+
+static const struct reg_field st_mmap_thermal_regfields[MAX_REGFIELDS] = {
+	/*
+	 * According to the STIH416 MPE temp sensor data sheet -
+	 * the PDN (Power Down Bit) and SRSTN (Soft Reset Bit) need to be
+	 * written simultaneously for powering on and off the temperature
+	 * sensor. regmap_update_bits() will be used to update the register.
+	 */
+	[INT_THRESH_HI]	= REG_FIELD(STIH416_MPE_INT_THRESH, 	0,  7),
+	[DCORRECT]	= REG_FIELD(STIH416_MPE_CONF,		5,  9),
+	[OVERFLOW]	= REG_FIELD(STIH416_MPE_STATUS,		9,  9),
+	[DATA]		= REG_FIELD(STIH416_MPE_STATUS,		11, 18),
+	[INT_ENABLE]	= REG_FIELD(STIH416_MPE_INT_EN,		0,  0),
+};
+
+static irqreturn_t st_mmap_thermal_trip_handler(int irq, void *sdata)
+{
+	struct st_thermal_sensor *sensor = sdata;
+
+	thermal_zone_device_update(sensor->thermal_dev);
+
+	return IRQ_HANDLED;
+}
+
+/* Private ops for the Memory Mapped based thermal sensors */
+static int st_mmap_power_ctrl(struct st_thermal_sensor *sensor,
+			      enum st_thermal_power_state power_state)
+{
+	const unsigned int mask = (THERMAL_PDN | THERMAL_SRSTN);
+	const unsigned int val = power_state ? mask : 0;
+
+	return regmap_update_bits(sensor->regmap, STIH416_MPE_CONF, mask, val);
+}
+
+static int st_mmap_alloc_regfields(struct st_thermal_sensor *sensor)
+{
+	struct device *dev = sensor->dev;
+	struct regmap *regmap = sensor->regmap;
+	const struct reg_field *reg_fields = sensor->cdata->reg_fields;
+
+	sensor->int_thresh_hi = devm_regmap_field_alloc(dev, regmap,
+						reg_fields[INT_THRESH_HI]);
+	sensor->int_enable = devm_regmap_field_alloc(dev, regmap,
+						reg_fields[INT_ENABLE]);
+
+	if (IS_ERR(sensor->int_thresh_hi) || IS_ERR(sensor->int_enable)) {
+		dev_err(dev, "failed to alloc mmap regfields\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int st_mmap_enable_irq(struct st_thermal_sensor *sensor)
+{
+	int ret;
+
+	/* Set upper critical threshold */
+	ret = regmap_field_write(sensor->int_thresh_hi,
+				 sensor->cdata->crit_temp -
+				 sensor->cdata->temp_adjust_val);
+	if (ret)
+		return ret;
+
+	return regmap_field_write(sensor->int_enable, 1);
+}
+
+static int st_mmap_register_enable_irq(struct st_thermal_sensor *sensor)
+{
+	struct device *dev = sensor->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	int ret;
+
+	sensor->irq = platform_get_irq(pdev, 0);
+	if (sensor->irq < 0) {
+		dev_err(dev, "failed to register IRQ\n");
+		return sensor->irq;
+	}
+
+	ret = devm_request_threaded_irq(dev, sensor->irq,
+					NULL, st_mmap_thermal_trip_handler,
+					IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+					dev->driver->name, sensor);
+	if (ret) {
+		dev_err(dev, "failed to register IRQ %d\n", sensor->irq);
+		return ret;
+	}
+
+	return st_mmap_enable_irq(sensor);
+}
+
+static const struct regmap_config st_416mpe_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+};
+
+static int st_mmap_regmap_init(struct st_thermal_sensor *sensor)
+{
+	struct device *dev = sensor->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "no memory resources defined\n");
+		return -ENODEV;
+	}
+
+	sensor->mmio_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(sensor->mmio_base)) {
+		dev_err(dev, "failed to remap IO\n");
+		return PTR_ERR(sensor->mmio_base);
+	}
+
+	sensor->regmap = devm_regmap_init_mmio(dev, sensor->mmio_base,
+				&st_416mpe_regmap_config);
+	if (IS_ERR(sensor->regmap)) {
+		dev_err(dev, "failed to initialise regmap\n");
+		return PTR_ERR(sensor->regmap);
+	}
+
+	return 0;
+}
+
+static const struct st_thermal_sensor_ops st_mmap_sensor_ops = {
+	.power_ctrl		= st_mmap_power_ctrl,
+	.alloc_regfields	= st_mmap_alloc_regfields,
+	.regmap_init		= st_mmap_regmap_init,
+	.register_enable_irq	= st_mmap_register_enable_irq,
+	.enable_irq		= st_mmap_enable_irq,
+};
+
+/* Compatible device data stih416 mpe thermal sensor */
+const struct st_thermal_compat_data st_416mpe_cdata = {
+	.reg_fields		= st_mmap_thermal_regfields,
+	.ops			= &st_mmap_sensor_ops,
+	.calibration_val	= 14,
+	.temp_adjust_val	= -95,
+	.crit_temp		= 120,
+};
+
+/* Compatible device data stih407 thermal sensor */
+const struct st_thermal_compat_data st_407_cdata = {
+	.reg_fields		= st_mmap_thermal_regfields,
+	.ops			= &st_mmap_sensor_ops,
+	.calibration_val	= 16,
+	.temp_adjust_val	= -95,
+	.crit_temp		= 120,
+};
+
+static struct of_device_id st_mmap_thermal_of_match[] = {
+	{ .compatible = "st,stih416-mpe-thermal", .data = &st_416mpe_cdata },
+	{ .compatible = "st,stih407-thermal",     .data = &st_407_cdata },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, st_mmap_thermal_of_match);
+
+int st_mmap_probe(struct platform_device *pdev)
+{
+	return st_thermal_register(pdev,  st_mmap_thermal_of_match);
+}
+
+int st_mmap_remove(struct platform_device *pdev)
+{
+	return st_thermal_unregister(pdev);
+}
+
+static struct platform_driver st_mmap_thermal_driver = {
+	.driver = {
+		.name	= "st_thermal_mmap",
+		.owner  = THIS_MODULE,
+		.pm     = &st_thermal_pm_ops,
+		.of_match_table = st_mmap_thermal_of_match,
+	},
+	.probe		= st_mmap_probe,
+	.remove		= st_mmap_remove,
+};
+
+module_platform_driver(st_mmap_thermal_driver);
+
+MODULE_AUTHOR("STMicroelectronics (R&D) Limited <ajitpal.singh@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics STi SoC Thermal Sensor Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/st/st_thermal_syscfg.c b/drivers/thermal/st/st_thermal_syscfg.c
new file mode 100644
index 0000000..888b58e
--- /dev/null
+++ b/drivers/thermal/st/st_thermal_syscfg.c
@@ -0,0 +1,179 @@
+/*
+ * ST Thermal Sensor Driver for syscfg based sensors.
+ * Author: Ajit Pal Singh <ajitpal.singh@st.com>
+ *
+ * Copyright (C) 2003-2014 STMicroelectronics (R&D) Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/of.h>
+#include <linux/module.h>
+#include <linux/mfd/syscon.h>
+
+#include "st_thermal.h"
+
+/* STiH415 */
+#define STIH415_SYSCFG_FRONT(num)		((num - 100) * 4)
+#define STIH415_SAS_THSENS_CONF			STIH415_SYSCFG_FRONT(178)
+#define STIH415_SAS_THSENS_STATUS		STIH415_SYSCFG_FRONT(198)
+#define STIH415_SYSCFG_MPE(num)			((num - 600) * 4)
+#define STIH415_MPE_THSENS_CONF			STIH415_SYSCFG_MPE(607)
+#define STIH415_MPE_THSENS_STATUS		STIH415_SYSCFG_MPE(667)
+
+/* STiH416 */
+#define STIH416_SYSCFG_FRONT(num)		((num - 1000) * 4)
+#define STIH416_SAS_THSENS_CONF			STIH416_SYSCFG_FRONT(1552)
+#define STIH416_SAS_THSENS_STATUS1		STIH416_SYSCFG_FRONT(1554)
+#define STIH416_SAS_THSENS_STATUS2		STIH416_SYSCFG_FRONT(1594)
+
+/* STiD127 */
+#define STID127_SYSCFG_CPU(num)			((num - 700) * 4)
+#define STID127_THSENS_CONF			STID127_SYSCFG_CPU(743)
+#define STID127_THSENS_STATUS			STID127_SYSCFG_CPU(767)
+
+static const struct reg_field st_415sas_regfields[MAX_REGFIELDS] = {
+	[TEMP_PWR] = REG_FIELD(STIH415_SAS_THSENS_CONF,   9,  9),
+	[DCORRECT] = REG_FIELD(STIH415_SAS_THSENS_CONF,   4,  8),
+	[OVERFLOW] = REG_FIELD(STIH415_SAS_THSENS_STATUS, 8,  8),
+	[DATA] 	   = REG_FIELD(STIH415_SAS_THSENS_STATUS, 10, 16),
+};
+
+static const struct reg_field st_415mpe_regfields[MAX_REGFIELDS] = {
+	[TEMP_PWR] = REG_FIELD(STIH415_MPE_THSENS_CONF,   8,  8),
+	[DCORRECT] = REG_FIELD(STIH415_MPE_THSENS_CONF,   3,  7),
+	[OVERFLOW] = REG_FIELD(STIH415_MPE_THSENS_STATUS, 9,  9),
+	[DATA]     = REG_FIELD(STIH415_MPE_THSENS_STATUS, 11, 18),
+};
+
+static const struct reg_field st_416sas_regfields[MAX_REGFIELDS] = {
+	[TEMP_PWR] = REG_FIELD(STIH416_SAS_THSENS_CONF,    9,  9),
+	[DCORRECT] = REG_FIELD(STIH416_SAS_THSENS_CONF,    4,  8),
+	[OVERFLOW] = REG_FIELD(STIH416_SAS_THSENS_STATUS1, 8,  8),
+	[DATA]     = REG_FIELD(STIH416_SAS_THSENS_STATUS2, 10, 16),
+};
+
+static const struct reg_field st_127_regfields[MAX_REGFIELDS] = {
+	[TEMP_PWR] = REG_FIELD(STID127_THSENS_CONF,   7,  7),
+	[DCORRECT] = REG_FIELD(STID127_THSENS_CONF,   2,  6),
+	[OVERFLOW] = REG_FIELD(STID127_THSENS_STATUS, 9,  9),
+	[DATA]     = REG_FIELD(STID127_THSENS_STATUS, 11, 18),
+};
+
+/* Private OPs for System Configuration Register based thermal sensors */
+static int st_syscfg_power_ctrl(struct st_thermal_sensor *sensor,
+				enum st_thermal_power_state power_state)
+{
+	return regmap_field_write(sensor->pwr, power_state);
+}
+
+static int st_syscfg_alloc_regfields(struct st_thermal_sensor *sensor)
+{
+	struct device *dev = sensor->dev;
+
+	sensor->pwr = devm_regmap_field_alloc(dev, sensor->regmap,
+					sensor->cdata->reg_fields[TEMP_PWR]);
+
+	if (IS_ERR(sensor->pwr)) {
+		dev_err(dev, "failed to alloc syscfg regfields\n");
+		return PTR_ERR(sensor->pwr);
+	}
+
+	return 0;
+}
+
+static int st_syscfg_regmap_init(struct st_thermal_sensor *sensor)
+{
+	sensor->regmap =
+		syscon_regmap_lookup_by_compatible(sensor->cdata->sys_compat);
+	if (IS_ERR(sensor->regmap)) {
+		dev_err(sensor->dev, "failed to find syscfg regmap\n");
+		return PTR_ERR(sensor->regmap);
+	}
+
+	return 0;
+}
+
+static const struct st_thermal_sensor_ops st_syscfg_sensor_ops = {
+	.power_ctrl		= st_syscfg_power_ctrl,
+	.alloc_regfields	= st_syscfg_alloc_regfields,
+	.regmap_init		= st_syscfg_regmap_init,
+};
+
+/* Compatible device data for stih415 sas thermal sensor */
+const struct st_thermal_compat_data st_415sas_cdata = {
+	.sys_compat		= "st,stih415-front-syscfg",
+	.reg_fields		= st_415sas_regfields,
+	.ops			= &st_syscfg_sensor_ops,
+	.calibration_val	= 16,
+	.temp_adjust_val	= 20,
+	.crit_temp		= 120,
+};
+
+/* Compatible device data for stih415 mpe thermal sensor */
+const struct st_thermal_compat_data st_415mpe_cdata = {
+	.sys_compat		= "st,stih415-system-syscfg",
+	.reg_fields		= st_415mpe_regfields,
+	.ops			= &st_syscfg_sensor_ops,
+	.calibration_val	= 16,
+	.temp_adjust_val	= -103,
+	.crit_temp		= 120,
+};
+
+/* Compatible device data for stih416 sas thermal sensor */
+const struct st_thermal_compat_data st_416sas_cdata = {
+	.sys_compat		= "st,stih416-front-syscfg",
+	.reg_fields		= st_416sas_regfields,
+	.ops			= &st_syscfg_sensor_ops,
+	.calibration_val	= 16,
+	.temp_adjust_val	= 20,
+	.crit_temp		= 120,
+};
+
+/* Compatible device data for stid127 thermal sensor */
+const struct st_thermal_compat_data st_127_cdata = {
+	.sys_compat		= "st,stid127-cpu-syscfg",
+	.reg_fields		= st_127_regfields,
+	.ops			= &st_syscfg_sensor_ops,
+	.calibration_val	= 8,
+	.temp_adjust_val	= -103,
+	.crit_temp		= 120,
+};
+
+static struct of_device_id st_syscfg_thermal_of_match[] = {
+	{ .compatible = "st,stih415-sas-thermal", .data = &st_415sas_cdata },
+	{ .compatible = "st,stih415-mpe-thermal", .data = &st_415mpe_cdata },
+	{ .compatible = "st,stih416-sas-thermal", .data = &st_416sas_cdata },
+	{ .compatible = "st,stid127-thermal",     .data = &st_127_cdata },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, st_syscfg_thermal_of_match);
+
+int st_syscfg_probe(struct platform_device *pdev)
+{
+	return st_thermal_register(pdev, st_syscfg_thermal_of_match);
+}
+
+int st_syscfg_remove(struct platform_device *pdev)
+{
+	return st_thermal_unregister(pdev);
+}
+
+static struct platform_driver st_syscfg_thermal_driver = {
+	.driver = {
+		.name	= "st_syscfg_thermal",
+		.owner  = THIS_MODULE,
+		.pm     = &st_thermal_pm_ops,
+		.of_match_table =  st_syscfg_thermal_of_match,
+	},
+	.probe		= st_syscfg_probe,
+	.remove		= st_syscfg_remove,
+};
+module_platform_driver(st_syscfg_thermal_driver);
+
+MODULE_AUTHOR("STMicroelectronics (R&D) Limited <ajitpal.singh@st.com>");
+MODULE_DESCRIPTION("STMicroelectronics STi SoC Thermal Sensor Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index 0419b69..4f485e8 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c
@@ -108,55 +108,23 @@ static void disable_tx_interrupt(struct ehv_bc_data *bc)
  *
  * The byte channel to be used for the console is specified via a "stdout"
  * property in the /chosen node.
- *
- * For compatible with legacy device trees, we also look for a "stdout" alias.
  */
 static int find_console_handle(void)
 {
-	struct device_node *np, *np2;
+	struct device_node *np = of_stdout;
 	const char *sprop = NULL;
 	const uint32_t *iprop;
 
-	np = of_find_node_by_path("/chosen");
-	if (np)
-		sprop = of_get_property(np, "stdout-path", NULL);
-
-	if (!np || !sprop) {
-		of_node_put(np);
-		np = of_find_node_by_name(NULL, "aliases");
-		if (np)
-			sprop = of_get_property(np, "stdout", NULL);
-	}
-
-	if (!sprop) {
-		of_node_put(np);
-		return 0;
-	}
-
 	/* We don't care what the aliased node is actually called.  We only
 	 * care if it's compatible with "epapr,hv-byte-channel", because that
-	 * indicates that it's a byte channel node.  We use a temporary
-	 * variable, 'np2', because we can't release 'np' until we're done with
-	 * 'sprop'.
+	 * indicates that it's a byte channel node.
 	 */
-	np2 = of_find_node_by_path(sprop);
-	of_node_put(np);
-	np = np2;
-	if (!np) {
-		pr_warning("ehv-bc: stdout node '%s' does not exist\n", sprop);
-		return 0;
-	}
-
-	/* Is it a byte channel? */
-	if (!of_device_is_compatible(np, "epapr,hv-byte-channel")) {
-		of_node_put(np);
+	if (!np || !of_device_is_compatible(np, "epapr,hv-byte-channel"))
 		return 0;
-	}
 
 	stdout_irq = irq_of_parse_and_map(np, 0);
 	if (stdout_irq == NO_IRQ) {
-		pr_err("ehv-bc: no 'interrupts' property in %s node\n", sprop);
-		of_node_put(np);
+		pr_err("ehv-bc: no 'interrupts' property in %s node\n", np->full_name);
 		return 0;
 	}
 
@@ -167,12 +135,9 @@ static int find_console_handle(void)
 	if (!iprop) {
 		pr_err("ehv-bc: no 'hv-handle' property in %s node\n",
 		       np->name);
-		of_node_put(np);
 		return 0;
 	}
 	stdout_bc = be32_to_cpu(*iprop);
-
-	of_node_put(np);
 	return 1;
 }
 
diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c
index a585079..a2cc5f8 100644
--- a/drivers/tty/hvc/hvc_opal.c
+++ b/drivers/tty/hvc/hvc_opal.c
@@ -342,22 +342,13 @@ static void udbg_init_opal_common(void)
 
 void __init hvc_opal_init_early(void)
 {
-	struct device_node *stdout_node = NULL;
+	struct device_node *stdout_node = of_node_get(of_stdout);
 	const __be32 *termno;
-	const char *name = NULL;
 	const struct hv_ops *ops;
 	u32 index;
 
-	/* find the boot console from /chosen/stdout */
-	if (of_chosen)
-		name = of_get_property(of_chosen, "linux,stdout-path", NULL);
-	if (name) {
-		stdout_node = of_find_node_by_path(name);
-		if (!stdout_node) {
-			pr_err("hvc_opal: Failed to locate default console!\n");
-			return;
-		}
-	} else {
+	/* If the console wasn't in /chosen, try /ibm,opal */
+	if (!stdout_node) {
 		struct device_node *opal, *np;
 
 		/* Current OPAL takeover doesn't provide the stdout
diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c
index b594abf..5618b5f 100644
--- a/drivers/tty/hvc/hvc_vio.c
+++ b/drivers/tty/hvc/hvc_vio.c
@@ -404,42 +404,35 @@ module_exit(hvc_vio_exit);
 
 void __init hvc_vio_init_early(void)
 {
-	struct device_node *stdout_node;
 	const __be32 *termno;
 	const char *name;
 	const struct hv_ops *ops;
 
 	/* find the boot console from /chosen/stdout */
-	if (!of_chosen)
+	if (!of_stdout)
 		return;
-	name = of_get_property(of_chosen, "linux,stdout-path", NULL);
-	if (name == NULL)
-		return;
-	stdout_node = of_find_node_by_path(name);
-	if (!stdout_node)
-		return;
-	name = of_get_property(stdout_node, "name", NULL);
+	name = of_get_property(of_stdout, "name", NULL);
 	if (!name) {
 		printk(KERN_WARNING "stdout node missing 'name' property!\n");
-		goto out;
+		return;
 	}
 
 	/* Check if it's a virtual terminal */
 	if (strncmp(name, "vty", 3) != 0)
-		goto out;
-	termno = of_get_property(stdout_node, "reg", NULL);
+		return;
+	termno = of_get_property(of_stdout, "reg", NULL);
 	if (termno == NULL)
-		goto out;
+		return;
 	hvterm_priv0.termno = of_read_number(termno, 1);
 	spin_lock_init(&hvterm_priv0.buf_lock);
 	hvterm_privs[0] = &hvterm_priv0;
 
 	/* Check the protocol */
-	if (of_device_is_compatible(stdout_node, "hvterm1")) {
+	if (of_device_is_compatible(of_stdout, "hvterm1")) {
 		hvterm_priv0.proto = HV_PROTOCOL_RAW;
 		ops = &hvterm_raw_ops;
 	}
-	else if (of_device_is_compatible(stdout_node, "hvterm-protocol")) {
+	else if (of_device_is_compatible(of_stdout, "hvterm-protocol")) {
 		hvterm_priv0.proto = HV_PROTOCOL_HVSI;
 		ops = &hvterm_hvsi_ops;
 		hvsilib_init(&hvterm_priv0.hvsi, hvc_get_chars, hvc_put_chars,
@@ -447,7 +440,7 @@ void __init hvc_vio_init_early(void)
 		/* HVSI, perform the handshake now */
 		hvsilib_establish(&hvterm_priv0.hvsi);
 	} else
-		goto out;
+		return;
 	udbg_putc = udbg_hvc_putc;
 	udbg_getc = udbg_hvc_getc;
 	udbg_getc_poll = udbg_hvc_getc_poll;
@@ -456,14 +449,12 @@ void __init hvc_vio_init_early(void)
 	 * backend for HVSI, only do udbg
 	 */
 	if (hvterm_priv0.proto == HV_PROTOCOL_HVSI)
-		goto out;
+		return;
 #endif
 	/* Check whether the user has requested a different console. */
 	if (!strstr(cmd_line, "console="))
 		add_preferred_console("hvc", 0, NULL);
 	hvc_instantiate(0, 0, ops);
-out:
-	of_node_put(stdout_node);
 }
 
 /* call this from early_init() for a working debug console on
diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c
index f7ad5b9..abbfedb 100644
--- a/drivers/tty/serial/pmac_zilog.c
+++ b/drivers/tty/serial/pmac_zilog.c
@@ -1653,8 +1653,7 @@ static int __init pmz_probe(void)
 	/*
 	 * Find all escc chips in the system
 	 */
-	node_p = of_find_node_by_name(NULL, "escc");
-	while (node_p) {
+	for_each_node_by_name(node_p, "escc") {
 		/*
 		 * First get channel A/B node pointers
 		 * 
@@ -1672,7 +1671,7 @@ static int __init pmz_probe(void)
 			of_node_put(node_b);
 			printk(KERN_ERR "pmac_zilog: missing node %c for escc %s\n",
 				(!node_a) ? 'a' : 'b', node_p->full_name);
-			goto next;
+			continue;
 		}
 
 		/*
@@ -1699,11 +1698,9 @@ static int __init pmz_probe(void)
 			of_node_put(node_b);
 			memset(&pmz_ports[count], 0, sizeof(struct uart_pmac_port));
 			memset(&pmz_ports[count+1], 0, sizeof(struct uart_pmac_port));
-			goto next;
+			continue;
 		}
 		count += 2;
-next:
-		node_p = of_find_node_by_name(node_p, "escc");
 	}
 	pmz_ports_count = count;
 
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 8bb19da..29a7be4 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/console.h>
+#include <linux/of.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/device.h>
@@ -2611,6 +2612,8 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 		spin_lock_init(&uport->lock);
 		lockdep_set_class(&uport->lock, &port_lock_key);
 	}
+	if (uport->cons && uport->dev)
+		of_console_check(uport->dev->of_node, uport->cons->name, uport->line);
 
 	uart_configure_port(drv, state, uport);
 
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index af7b204..d8c5763 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE
 	depends on VFIO && SPAPR_TCE_IOMMU
 	default n
 
+config VFIO_SPAPR_EEH
+	tristate
+	depends on EEH && VFIO_IOMMU_SPAPR_TCE
+	default n
+
 menuconfig VFIO
 	tristate "VFIO Non-Privileged userspace driver framework"
 	depends on IOMMU_API
 	select VFIO_IOMMU_TYPE1 if X86
 	select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
+	select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
 	select ANON_INODES
 	help
 	  VFIO provides a framework for secure userspace device drivers.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 50e30bc..0b035b1 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
-obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
+obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index e2ee80f..f782533 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -37,6 +37,10 @@ module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(nointxmask,
 		  "Disable support for PCI 2.3 style INTx masking.  If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag.");
 
+static DEFINE_MUTEX(driver_lock);
+
+static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
+
 static int vfio_pci_enable(struct vfio_pci_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
@@ -44,6 +48,9 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
 	u16 cmd;
 	u8 msix_pos;
 
+	/* Don't allow our initial saved state to include busmaster */
+	pci_clear_master(pdev);
+
 	ret = pci_enable_device(pdev);
 	if (ret)
 		return ret;
@@ -99,7 +106,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
 	struct pci_dev *pdev = vdev->pdev;
 	int bar;
 
-	pci_disable_device(pdev);
+	/* Stop the device from further DMA */
+	pci_clear_master(pdev);
 
 	vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
 				VFIO_IRQ_SET_ACTION_TRIGGER,
@@ -117,6 +125,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
 		vdev->barmap[bar] = NULL;
 	}
 
+	vdev->needs_reset = true;
+
 	/*
 	 * If we have saved state, restore it.  If we can reset the device,
 	 * even better.  Resetting with current state seems better than
@@ -128,7 +138,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
 			__func__, dev_name(&pdev->dev));
 
 		if (!vdev->reset_works)
-			return;
+			goto out;
 
 		pci_save_state(pdev);
 	}
@@ -148,46 +158,55 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
 		if (ret)
 			pr_warn("%s: Failed to reset device %s (%d)\n",
 				__func__, dev_name(&pdev->dev), ret);
+		else
+			vdev->needs_reset = false;
 	}
 
 	pci_restore_state(pdev);
+out:
+	pci_disable_device(pdev);
+
+	vfio_pci_try_bus_reset(vdev);
 }
 
 static void vfio_pci_release(void *device_data)
 {
 	struct vfio_pci_device *vdev = device_data;
 
-	if (atomic_dec_and_test(&vdev->refcnt)) {
+	mutex_lock(&driver_lock);
+
+	if (!(--vdev->refcnt)) {
 		vfio_spapr_pci_eeh_release(vdev->pdev);
 		vfio_pci_disable(vdev);
 	}
 
+	mutex_unlock(&driver_lock);
+
 	module_put(THIS_MODULE);
 }
 
 static int vfio_pci_open(void *device_data)
 {
 	struct vfio_pci_device *vdev = device_data;
-	int ret;
+	int ret = 0;
 
 	if (!try_module_get(THIS_MODULE))
 		return -ENODEV;
 
-	if (atomic_inc_return(&vdev->refcnt) == 1) {
+	mutex_lock(&driver_lock);
+
+	if (!vdev->refcnt) {
 		ret = vfio_pci_enable(vdev);
 		if (ret)
 			goto error;
 
-		ret = vfio_spapr_pci_eeh_open(vdev->pdev);
-		if (ret) {
-			vfio_pci_disable(vdev);
-			goto error;
-		}
+		vfio_spapr_pci_eeh_open(vdev->pdev);
 	}
-
-	return 0;
+	vdev->refcnt++;
 error:
-	module_put(THIS_MODULE);
+	mutex_unlock(&driver_lock);
+	if (ret)
+		module_put(THIS_MODULE);
 	return ret;
 }
 
@@ -843,7 +862,6 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	vdev->irq_type = VFIO_PCI_NUM_IRQS;
 	mutex_init(&vdev->igate);
 	spin_lock_init(&vdev->irqlock);
-	atomic_set(&vdev->refcnt, 0);
 
 	ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
 	if (ret) {
@@ -858,12 +876,15 @@ static void vfio_pci_remove(struct pci_dev *pdev)
 {
 	struct vfio_pci_device *vdev;
 
+	mutex_lock(&driver_lock);
+
 	vdev = vfio_del_group_dev(&pdev->dev);
-	if (!vdev)
-		return;
+	if (vdev) {
+		iommu_group_put(pdev->dev.iommu_group);
+		kfree(vdev);
+	}
 
-	iommu_group_put(pdev->dev.iommu_group);
-	kfree(vdev);
+	mutex_unlock(&driver_lock);
 }
 
 static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
@@ -906,6 +927,110 @@ static struct pci_driver vfio_pci_driver = {
 	.err_handler	= &vfio_err_handlers,
 };
 
+/*
+ * Test whether a reset is necessary and possible.  We mark devices as
+ * needs_reset when they are released, but don't have a function-local reset
+ * available.  If any of these exist in the affected devices, we want to do
+ * a bus/slot reset.  We also need all of the affected devices to be unused,
+ * so we abort if any device has a non-zero refcnt.  driver_lock prevents a
+ * device from being opened during the scan or unbound from vfio-pci.
+ */
+static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data)
+{
+	bool *needs_reset = data;
+	struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
+	int ret = -EBUSY;
+
+	if (pci_drv == &vfio_pci_driver) {
+		struct vfio_device *device;
+		struct vfio_pci_device *vdev;
+
+		device = vfio_device_get_from_dev(&pdev->dev);
+		if (!device)
+			return ret;
+
+		vdev = vfio_device_data(device);
+		if (vdev) {
+			if (vdev->needs_reset)
+				*needs_reset = true;
+
+			if (!vdev->refcnt)
+				ret = 0;
+		}
+
+		vfio_device_put(device);
+	}
+
+	/*
+	 * TODO: vfio-core considers groups to be viable even if some devices
+	 * are attached to known drivers, like pci-stub or pcieport.  We can't
+	 * freeze devices from being unbound to those drivers like we can
+	 * here though, so it would be racy to test for them.  We also can't
+	 * use device_lock() to prevent changes as that would interfere with
+	 * PCI-core taking device_lock during bus reset.  For now, we require
+	 * devices to be bound to vfio-pci to get a bus/slot reset on release.
+	 */
+
+	return ret;
+}
+
+/* Clear needs_reset on all affected devices after successful bus/slot reset */
+static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data)
+{
+	struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
+
+	if (pci_drv == &vfio_pci_driver) {
+		struct vfio_device *device;
+		struct vfio_pci_device *vdev;
+
+		device = vfio_device_get_from_dev(&pdev->dev);
+		if (!device)
+			return 0;
+
+		vdev = vfio_device_data(device);
+		if (vdev)
+			vdev->needs_reset = false;
+
+		vfio_device_put(device);
+	}
+
+	return 0;
+}
+
+/*
+ * Attempt to do a bus/slot reset if there are devices affected by a reset for
+ * this device that are needs_reset and all of the affected devices are unused
+ * (!refcnt).  Callers of this function are required to hold driver_lock such
+ * that devices can not be unbound from vfio-pci or opened by a user while we
+ * test for and perform a bus/slot reset.
+ */
+static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
+{
+	bool needs_reset = false, slot = false;
+	int ret;
+
+	if (!pci_probe_reset_slot(vdev->pdev->slot))
+		slot = true;
+	else if (pci_probe_reset_bus(vdev->pdev->bus))
+		return;
+
+	if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
+					  vfio_pci_test_bus_reset,
+					  &needs_reset, slot) || !needs_reset)
+		return;
+
+	if (slot)
+		ret = pci_try_reset_slot(vdev->pdev->slot);
+	else
+		ret = pci_try_reset_bus(vdev->pdev->bus);
+
+	if (ret)
+		return;
+
+	vfio_pci_for_each_slot_or_bus(vdev->pdev,
+				      vfio_pci_clear_needs_reset, NULL, slot);
+}
+
 static void __exit vfio_pci_cleanup(void)
 {
 	pci_unregister_driver(&vfio_pci_driver);
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 9c6d5d0..671c17a 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -54,8 +54,9 @@ struct vfio_pci_device {
 	bool			extended_caps;
 	bool			bardirty;
 	bool			has_vga;
+	bool			needs_reset;
 	struct pci_saved_state	*pci_saved_state;
-	atomic_t		refcnt;
+	int			refcnt;
 	struct eventfd_ctx	*err_trigger;
 };
 
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index f834b4c..86dfceb 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -9,20 +9,27 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/module.h>
 #include <linux/uaccess.h>
 #include <linux/vfio.h>
 #include <asm/eeh.h>
 
+#define DRIVER_VERSION	"0.1"
+#define DRIVER_AUTHOR	"Gavin Shan, IBM Corporation"
+#define DRIVER_DESC	"VFIO IOMMU SPAPR EEH"
+
 /* We might build address mapping here for "fast" path later */
-int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
-	return eeh_dev_open(pdev);
+	eeh_dev_open(pdev);
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
 
 void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
 {
 	eeh_dev_release(pdev);
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
 
 long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 				unsigned int cmd, unsigned long arg)
@@ -85,3 +92,9 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 
 	return ret;
 }
+EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);