diff options
Diffstat (limited to 'drivers')
292 files changed, 15414 insertions, 4643 deletions
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 9c62340..c96887d 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -481,6 +481,10 @@ void acpi_pci_irq_disable(struct pci_dev *dev) if (!pin) return; + /* Keep IOAPIC pin configuration when suspending */ + if (dev->dev.power.is_prepared) + return; + entry = acpi_pci_irq_lookup(dev, pin); if (!entry) return; @@ -498,5 +502,6 @@ void acpi_pci_irq_disable(struct pci_dev *dev) */ dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin)); - acpi_unregister_gsi(gsi); + if (gsi >= 0 && dev->irq > 0) + acpi_unregister_gsi(gsi); } diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index 0e3f8f9..480fa6f 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -299,6 +299,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) out_vcc = find_vcc(dev, ntohs(hdr->vpi), ntohs(hdr->vci)); read_unlock(&vcc_sklist_lock); if (!out_vcc) { + result = -EUNATCH; atomic_inc(&vcc->stats->tx_err); goto done; } diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c index 943cf0d..7652e8d 100644 --- a/drivers/atm/solos-pci.c +++ b/drivers/atm/solos-pci.c @@ -1278,6 +1278,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id) card->dma_bounce = kmalloc(card->nr_ports * BUF_SIZE, GFP_KERNEL); if (!card->dma_bounce) { dev_warn(&card->dev->dev, "Failed to allocate DMA bounce buffers\n"); + err = -ENOMEM; /* Fallback to MMIO doesn't work */ goto out_unmap_both; } diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile index 8b45033..4464e35 100644 --- a/drivers/block/drbd/Makefile +++ b/drivers/block/drbd/Makefile @@ -3,5 +3,6 @@ drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o drbd-y += drbd_main.o drbd_strings.o drbd_nl.o drbd-y += drbd_interval.o drbd_state.o drbd-y += drbd_nla.o +drbd-$(CONFIG_DEBUG_FS) += drbd_debugfs.o obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 05a1780..d26a3fa 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -92,34 +92,26 @@ struct __packed al_transaction_on_disk { __be32 context[AL_CONTEXT_PER_TRANSACTION]; }; -struct update_odbm_work { - struct drbd_work w; - struct drbd_device *device; - unsigned int enr; -}; - -struct update_al_work { - struct drbd_work w; - struct drbd_device *device; - struct completion event; - int err; -}; - - -void *drbd_md_get_buffer(struct drbd_device *device) +void *drbd_md_get_buffer(struct drbd_device *device, const char *intent) { int r; wait_event(device->misc_wait, - (r = atomic_cmpxchg(&device->md_io_in_use, 0, 1)) == 0 || + (r = atomic_cmpxchg(&device->md_io.in_use, 0, 1)) == 0 || device->state.disk <= D_FAILED); - return r ? NULL : page_address(device->md_io_page); + if (r) + return NULL; + + device->md_io.current_use = intent; + device->md_io.start_jif = jiffies; + device->md_io.submit_jif = device->md_io.start_jif - 1; + return page_address(device->md_io.page); } void drbd_md_put_buffer(struct drbd_device *device) { - if (atomic_dec_and_test(&device->md_io_in_use)) + if (atomic_dec_and_test(&device->md_io.in_use)) wake_up(&device->misc_wait); } @@ -145,10 +137,11 @@ void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_b static int _drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, - struct page *page, sector_t sector, - int rw, int size) + sector_t sector, int rw) { struct bio *bio; + /* we do all our meta data IO in aligned 4k blocks. */ + const int size = 4096; int err; device->md_io.done = 0; @@ -156,15 +149,15 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags)) rw |= REQ_FUA | REQ_FLUSH; - rw |= REQ_SYNC; + rw |= REQ_SYNC | REQ_NOIDLE; bio = bio_alloc_drbd(GFP_NOIO); bio->bi_bdev = bdev->md_bdev; bio->bi_iter.bi_sector = sector; err = -EIO; - if (bio_add_page(bio, page, size, 0) != size) + if (bio_add_page(bio, device->md_io.page, size, 0) != size) goto out; - bio->bi_private = &device->md_io; + bio->bi_private = device; bio->bi_end_io = drbd_md_io_complete; bio->bi_rw = rw; @@ -179,7 +172,8 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, } bio_get(bio); /* one bio_put() is in the completion handler */ - atomic_inc(&device->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ + atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */ + device->md_io.submit_jif = jiffies; if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) bio_endio(bio, -EIO); else @@ -197,9 +191,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd sector_t sector, int rw) { int err; - struct page *iop = device->md_io_page; - - D_ASSERT(device, atomic_read(&device->md_io_in_use) == 1); + D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1); BUG_ON(!bdev->md_bdev); @@ -214,8 +206,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - /* we do all our meta data IO in aligned 4k blocks. */ - err = _drbd_md_sync_page_io(device, bdev, iop, sector, rw, 4096); + err = _drbd_md_sync_page_io(device, bdev, sector, rw); if (err) { drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err); @@ -297,26 +288,12 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval * return need_transaction; } -static int al_write_transaction(struct drbd_device *device, bool delegate); - -/* When called through generic_make_request(), we must delegate - * activity log I/O to the worker thread: a further request - * submitted via generic_make_request() within the same task - * would be queued on current->bio_list, and would only start - * after this function returns (see generic_make_request()). - * - * However, if we *are* the worker, we must not delegate to ourselves. - */ +static int al_write_transaction(struct drbd_device *device); -/* - * @delegate: delegate activity log I/O to the worker thread - */ -void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate) +void drbd_al_begin_io_commit(struct drbd_device *device) { bool locked = false; - BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); - /* Serialize multiple transactions. * This uses test_and_set_bit, memory barrier is implicit. */ @@ -335,7 +312,7 @@ void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate) rcu_read_unlock(); if (write_al_updates) - al_write_transaction(device, delegate); + al_write_transaction(device); spin_lock_irq(&device->al_lock); /* FIXME if (err) @@ -352,12 +329,10 @@ void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate) /* * @delegate: delegate activity log I/O to the worker thread */ -void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate) +void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i) { - BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task); - if (drbd_al_begin_io_prepare(device, i)) - drbd_al_begin_io_commit(device, delegate); + drbd_al_begin_io_commit(device); } int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) @@ -380,8 +355,19 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval * /* We want all necessary updates for a given request within the same transaction * We could first check how many updates are *actually* needed, * and use that instead of the worst-case nr_al_extents */ - if (available_update_slots < nr_al_extents) - return -EWOULDBLOCK; + if (available_update_slots < nr_al_extents) { + /* Too many activity log extents are currently "hot". + * + * If we have accumulated pending changes already, + * we made progress. + * + * If we cannot get even a single pending change through, + * stop the fast path until we made some progress, + * or requests to "cold" extents could be starved. */ + if (!al->pending_changes) + __set_bit(__LC_STARVING, &device->act_log->flags); + return -ENOBUFS; + } /* Is resync active in this area? */ for (enr = first; enr <= last; enr++) { @@ -452,15 +438,6 @@ static unsigned int al_extent_to_bm_page(unsigned int al_enr) (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); } -static unsigned int rs_extent_to_bm_page(unsigned int rs_enr) -{ - return rs_enr >> - /* bit to page */ - ((PAGE_SHIFT + 3) - - /* resync extent number to bit */ - (BM_EXT_SHIFT - BM_BLOCK_SHIFT)); -} - static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) { const unsigned int stripes = device->ldev->md.al_stripes; @@ -479,8 +456,7 @@ static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) return device->ldev->md.md_offset + device->ldev->md.al_offset + t; } -static int -_al_write_transaction(struct drbd_device *device) +int al_write_transaction(struct drbd_device *device) { struct al_transaction_on_disk *buffer; struct lc_element *e; @@ -505,7 +481,8 @@ _al_write_transaction(struct drbd_device *device) return -EIO; } - buffer = drbd_md_get_buffer(device); /* protects md_io_buffer, al_tr_cycle, ... */ + /* protects md_io_buffer, al_tr_cycle, ... */ + buffer = drbd_md_get_buffer(device, __func__); if (!buffer) { drbd_err(device, "disk failed while waiting for md_io buffer\n"); put_ldev(device); @@ -590,38 +567,6 @@ _al_write_transaction(struct drbd_device *device) return err; } - -static int w_al_write_transaction(struct drbd_work *w, int unused) -{ - struct update_al_work *aw = container_of(w, struct update_al_work, w); - struct drbd_device *device = aw->device; - int err; - - err = _al_write_transaction(device); - aw->err = err; - complete(&aw->event); - - return err != -EIO ? err : 0; -} - -/* Calls from worker context (see w_restart_disk_io()) need to write the - transaction directly. Others came through generic_make_request(), - those need to delegate it to the worker. */ -static int al_write_transaction(struct drbd_device *device, bool delegate) -{ - if (delegate) { - struct update_al_work al_work; - init_completion(&al_work.event); - al_work.w.cb = w_al_write_transaction; - al_work.device = device; - drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, - &al_work.w); - wait_for_completion(&al_work.event); - return al_work.err; - } else - return _al_write_transaction(device); -} - static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) { int rv; @@ -682,72 +627,56 @@ int drbd_initialize_al(struct drbd_device *device, void *buffer) return 0; } -static int w_update_odbm(struct drbd_work *w, int unused) -{ - struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); - struct drbd_device *device = udw->device; - struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; - - if (!get_ldev(device)) { - if (__ratelimit(&drbd_ratelimit_state)) - drbd_warn(device, "Can not update on disk bitmap, local IO disabled.\n"); - kfree(udw); - return 0; - } - - drbd_bm_write_page(device, rs_extent_to_bm_page(udw->enr)); - put_ldev(device); - - kfree(udw); - - if (drbd_bm_total_weight(device) <= device->rs_failed) { - switch (device->state.conn) { - case C_SYNC_SOURCE: case C_SYNC_TARGET: - case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T: - drbd_resync_finished(device); - default: - /* nothing to do */ - break; - } - } - drbd_bcast_event(device, &sib); - - return 0; -} - +static const char *drbd_change_sync_fname[] = { + [RECORD_RS_FAILED] = "drbd_rs_failed_io", + [SET_IN_SYNC] = "drbd_set_in_sync", + [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync" +}; /* ATTENTION. The AL's extents are 4MB each, while the extents in the * resync LRU-cache are 16MB each. * The caller of this function has to hold an get_ldev() reference. * + * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success), + * potentially pulling in (and recounting the corresponding bits) + * this resync extent into the resync extent lru cache. + * + * Returns whether all bits have been cleared for this resync extent, + * precisely: (rs_left <= rs_failed) + * * TODO will be obsoleted once we have a caching lru of the on disk bitmap */ -static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector, - int count, int success) +static bool update_rs_extent(struct drbd_device *device, + unsigned int enr, int count, + enum update_sync_bits_mode mode) { struct lc_element *e; - struct update_odbm_work *udw; - - unsigned int enr; D_ASSERT(device, atomic_read(&device->local_cnt)); - /* I simply assume that a sector/size pair never crosses - * a 16 MB extent border. (Currently this is true...) */ - enr = BM_SECT_TO_EXT(sector); - - e = lc_get(device->resync, enr); + /* When setting out-of-sync bits, + * we don't need it cached (lc_find). + * But if it is present in the cache, + * we should update the cached bit count. + * Otherwise, that extent should be in the resync extent lru cache + * already -- or we want to pull it in if necessary -- (lc_get), + * then update and check rs_left and rs_failed. */ + if (mode == SET_OUT_OF_SYNC) + e = lc_find(device->resync, enr); + else + e = lc_get(device->resync, enr); if (e) { struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); if (ext->lce.lc_number == enr) { - if (success) + if (mode == SET_IN_SYNC) ext->rs_left -= count; + else if (mode == SET_OUT_OF_SYNC) + ext->rs_left += count; else ext->rs_failed += count; if (ext->rs_left < ext->rs_failed) { - drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d " + drbd_warn(device, "BAD! enr=%u rs_left=%d " "rs_failed=%d count=%d cstate=%s\n", - (unsigned long long)sector, ext->lce.lc_number, ext->rs_left, ext->rs_failed, count, drbd_conn_str(device->state.conn)); @@ -781,34 +710,27 @@ static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t secto ext->lce.lc_number, ext->rs_failed); } ext->rs_left = rs_left; - ext->rs_failed = success ? 0 : count; + ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0; /* we don't keep a persistent log of the resync lru, * we can commit any change right away. */ lc_committed(device->resync); } - lc_put(device->resync, &ext->lce); + if (mode != SET_OUT_OF_SYNC) + lc_put(device->resync, &ext->lce); /* no race, we are within the al_lock! */ - if (ext->rs_left == ext->rs_failed) { + if (ext->rs_left <= ext->rs_failed) { ext->rs_failed = 0; - - udw = kmalloc(sizeof(*udw), GFP_ATOMIC); - if (udw) { - udw->enr = ext->lce.lc_number; - udw->w.cb = w_update_odbm; - udw->device = device; - drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, - &udw->w); - } else { - drbd_warn(device, "Could not kmalloc an udw\n"); - } + return true; } - } else { + } else if (mode != SET_OUT_OF_SYNC) { + /* be quiet if lc_find() did not find it. */ drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", device->resync_locked, device->resync->nr_elements, device->resync->flags); } + return false; } void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) @@ -827,105 +749,105 @@ void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go } } -/* clear the bit corresponding to the piece of storage in question: - * size byte of data starting from sector. Only clear a bits of the affected - * one ore more _aligned_ BM_BLOCK_SIZE blocks. - * - * called by worker on C_SYNC_TARGET and receiver on SyncSource. - * - */ -void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size, - const char *file, const unsigned int line) +/* It is called lazy update, so don't do write-out too often. */ +static bool lazy_bitmap_update_due(struct drbd_device *device) { - /* Is called from worker and receiver context _only_ */ - unsigned long sbnr, ebnr, lbnr; - unsigned long count = 0; - sector_t esector, nr_sectors; - int wake_up = 0; - unsigned long flags; + return time_after(jiffies, device->rs_last_bcast + 2*HZ); +} - if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { - drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", - (unsigned long long)sector, size); +static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done) +{ + if (rs_done) + set_bit(RS_DONE, &device->flags); + /* and also set RS_PROGRESS below */ + else if (!lazy_bitmap_update_due(device)) return; - } - - if (!get_ldev(device)) - return; /* no disk, no metadata, no bitmap to clear bits in */ - - nr_sectors = drbd_get_capacity(device->this_bdev); - esector = sector + (size >> 9) - 1; - - if (!expect(sector < nr_sectors)) - goto out; - if (!expect(esector < nr_sectors)) - esector = nr_sectors - 1; - - lbnr = BM_SECT_TO_BIT(nr_sectors-1); - - /* we clear it (in sync). - * round up start sector, round down end sector. we make sure we only - * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */ - if (unlikely(esector < BM_SECT_PER_BIT-1)) - goto out; - if (unlikely(esector == (nr_sectors-1))) - ebnr = lbnr; - else - ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); - sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); - if (sbnr > ebnr) - goto out; + drbd_device_post_work(device, RS_PROGRESS); +} +static int update_sync_bits(struct drbd_device *device, + unsigned long sbnr, unsigned long ebnr, + enum update_sync_bits_mode mode) +{ /* - * ok, (capacity & 7) != 0 sometimes, but who cares... - * we count rs_{total,left} in bits, not sectors. + * We keep a count of set bits per resync-extent in the ->rs_left + * caching member, so we need to loop and work within the resync extent + * alignment. Typically this loop will execute exactly once. */ - count = drbd_bm_clear_bits(device, sbnr, ebnr); - if (count) { - drbd_advance_rs_marks(device, drbd_bm_total_weight(device)); - spin_lock_irqsave(&device->al_lock, flags); - drbd_try_clear_on_disk_bm(device, sector, count, true); - spin_unlock_irqrestore(&device->al_lock, flags); - - /* just wake_up unconditional now, various lc_chaged(), - * lc_put() in drbd_try_clear_on_disk_bm(). */ - wake_up = 1; + unsigned long flags; + unsigned long count = 0; + unsigned int cleared = 0; + while (sbnr <= ebnr) { + /* set temporary boundary bit number to last bit number within + * the resync extent of the current start bit number, + * but cap at provided end bit number */ + unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK); + unsigned long c; + + if (mode == RECORD_RS_FAILED) + /* Only called from drbd_rs_failed_io(), bits + * supposedly still set. Recount, maybe some + * of the bits have been successfully cleared + * by application IO meanwhile. + */ + c = drbd_bm_count_bits(device, sbnr, tbnr); + else if (mode == SET_IN_SYNC) + c = drbd_bm_clear_bits(device, sbnr, tbnr); + else /* if (mode == SET_OUT_OF_SYNC) */ + c = drbd_bm_set_bits(device, sbnr, tbnr); + + if (c) { + spin_lock_irqsave(&device->al_lock, flags); + cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode); + spin_unlock_irqrestore(&device->al_lock, flags); + count += c; + } + sbnr = tbnr + 1; } -out: - put_ldev(device); - if (wake_up) + if (count) { + if (mode == SET_IN_SYNC) { + unsigned long still_to_go = drbd_bm_total_weight(device); + bool rs_is_done = (still_to_go <= device->rs_failed); + drbd_advance_rs_marks(device, still_to_go); + if (cleared || rs_is_done) + maybe_schedule_on_disk_bitmap_update(device, rs_is_done); + } else if (mode == RECORD_RS_FAILED) + device->rs_failed += count; wake_up(&device->al_wait); + } + return count; } -/* - * this is intended to set one request worth of data out of sync. - * affects at least 1 bit, - * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. +/* clear the bit corresponding to the piece of storage in question: + * size byte of data starting from sector. Only clear a bits of the affected + * one ore more _aligned_ BM_BLOCK_SIZE blocks. + * + * called by worker on C_SYNC_TARGET and receiver on SyncSource. * - * called by tl_clear and drbd_send_dblock (==drbd_make_request). - * so this can be _any_ process. */ -int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size, - const char *file, const unsigned int line) +int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, + enum update_sync_bits_mode mode, + const char *file, const unsigned int line) { - unsigned long sbnr, ebnr, flags; + /* Is called from worker and receiver context _only_ */ + unsigned long sbnr, ebnr, lbnr; + unsigned long count = 0; sector_t esector, nr_sectors; - unsigned int enr, count = 0; - struct lc_element *e; - /* this should be an empty REQ_FLUSH */ - if (size == 0) + /* This would be an empty REQ_FLUSH, be silent. */ + if ((mode == SET_OUT_OF_SYNC) && size == 0) return 0; - if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { - drbd_err(device, "sector: %llus, size: %d\n", - (unsigned long long)sector, size); + if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { + drbd_err(device, "%s: sector=%llus size=%d nonsense!\n", + drbd_change_sync_fname[mode], + (unsigned long long)sector, size); return 0; } if (!get_ldev(device)) - return 0; /* no disk, no metadata, no bitmap to set bits in */ + return 0; /* no disk, no metadata, no bitmap to manipulate bits in */ nr_sectors = drbd_get_capacity(device->this_bdev); esector = sector + (size >> 9) - 1; @@ -935,25 +857,28 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size if (!expect(esector < nr_sectors)) esector = nr_sectors - 1; - /* we set it out of sync, - * we do not need to round anything here */ - sbnr = BM_SECT_TO_BIT(sector); - ebnr = BM_SECT_TO_BIT(esector); - - /* ok, (capacity & 7) != 0 sometimes, but who cares... - * we count rs_{total,left} in bits, not sectors. */ - spin_lock_irqsave(&device->al_lock, flags); - count = drbd_bm_set_bits(device, sbnr, ebnr); + lbnr = BM_SECT_TO_BIT(nr_sectors-1); - enr = BM_SECT_TO_EXT(sector); - e = lc_find(device->resync, enr); - if (e) - lc_entry(e, struct bm_extent, lce)->rs_left += count; - spin_unlock_irqrestore(&device->al_lock, flags); + if (mode == SET_IN_SYNC) { + /* Round up start sector, round down end sector. We make sure + * we only clear full, aligned, BM_BLOCK_SIZE blocks. */ + if (unlikely(esector < BM_SECT_PER_BIT-1)) + goto out; + if (unlikely(esector == (nr_sectors-1))) + ebnr = lbnr; + else + ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); + sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); + } else { + /* We set it out of sync, or record resync failure. + * Should not round anything here. */ + sbnr = BM_SECT_TO_BIT(sector); + ebnr = BM_SECT_TO_BIT(esector); + } + count = update_sync_bits(device, sbnr, ebnr, mode); out: put_ldev(device); - return count; } @@ -1075,6 +1000,15 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) struct lc_element *e; struct bm_extent *bm_ext; int i; + bool throttle = drbd_rs_should_slow_down(device, sector, true); + + /* If we need to throttle, a half-locked (only marked BME_NO_WRITES, + * not yet BME_LOCKED) extent needs to be kicked out explicitly if we + * need to throttle. There is at most one such half-locked extent, + * which is remembered in resync_wenr. */ + + if (throttle && device->resync_wenr != enr) + return -EAGAIN; spin_lock_irq(&device->al_lock); if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) { @@ -1098,8 +1032,10 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector) D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); clear_bit(BME_NO_WRITES, &bm_ext->flags); device->resync_wenr = LC_FREE; - if (lc_put(device->resync, &bm_ext->lce) == 0) + if (lc_put(device->resync, &bm_ext->lce) == 0) { + bm_ext->flags = 0; device->resync_locked--; + } wake_up(&device->al_wait); } else { drbd_alert(device, "LOGIC BUG\n"); @@ -1161,8 +1097,20 @@ proceed: return 0; try_again: - if (bm_ext) - device->resync_wenr = enr; + if (bm_ext) { + if (throttle) { + D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags)); + D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); + clear_bit(BME_NO_WRITES, &bm_ext->flags); + device->resync_wenr = LC_FREE; + if (lc_put(device->resync, &bm_ext->lce) == 0) { + bm_ext->flags = 0; + device->resync_locked--; + } + wake_up(&device->al_wait); + } else + device->resync_wenr = enr; + } spin_unlock_irq(&device->al_lock); return -EAGAIN; } @@ -1270,69 +1218,3 @@ int drbd_rs_del_all(struct drbd_device *device) return 0; } - -/** - * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks - * @device: DRBD device. - * @sector: The sector number. - * @size: Size of failed IO operation, in byte. - */ -void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size) -{ - /* Is called from worker and receiver context _only_ */ - unsigned long sbnr, ebnr, lbnr; - unsigned long count; - sector_t esector, nr_sectors; - int wake_up = 0; - - if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { - drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", - (unsigned long long)sector, size); - return; - } - nr_sectors = drbd_get_capacity(device->this_bdev); - esector = sector + (size >> 9) - 1; - - if (!expect(sector < nr_sectors)) - return; - if (!expect(esector < nr_sectors)) - esector = nr_sectors - 1; - - lbnr = BM_SECT_TO_BIT(nr_sectors-1); - - /* - * round up start sector, round down end sector. we make sure we only - * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */ - if (unlikely(esector < BM_SECT_PER_BIT-1)) - return; - if (unlikely(esector == (nr_sectors-1))) - ebnr = lbnr; - else - ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1)); - sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1); - - if (sbnr > ebnr) - return; - - /* - * ok, (capacity & 7) != 0 sometimes, but who cares... - * we count rs_{total,left} in bits, not sectors. - */ - spin_lock_irq(&device->al_lock); - count = drbd_bm_count_bits(device, sbnr, ebnr); - if (count) { - device->rs_failed += count; - - if (get_ldev(device)) { - drbd_try_clear_on_disk_bm(device, sector, count, false); - put_ldev(device); - } - - /* just wake_up unconditional now, various lc_chaged(), - * lc_put() in drbd_try_clear_on_disk_bm(). */ - wake_up = 1; - } - spin_unlock_irq(&device->al_lock); - if (wake_up) - wake_up(&device->al_wait); -} diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 1aa29f8..426c97a 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -22,6 +22,8 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/bitops.h> #include <linux/vmalloc.h> #include <linux/string.h> @@ -353,9 +355,8 @@ static void bm_free_pages(struct page **pages, unsigned long number) for (i = 0; i < number; i++) { if (!pages[i]) { - printk(KERN_ALERT "drbd: bm_free_pages tried to free " - "a NULL pointer; i=%lu n=%lu\n", - i, number); + pr_alert("bm_free_pages tried to free a NULL pointer; i=%lu n=%lu\n", + i, number); continue; } __free_page(pages[i]); @@ -592,7 +593,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) end = offset + len; if (end > b->bm_words) { - printk(KERN_ALERT "drbd: bm_memset end > bm_words\n"); + pr_alert("bm_memset end > bm_words\n"); return; } @@ -602,7 +603,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) p_addr = bm_map_pidx(b, idx); bm = p_addr + MLPP(offset); if (bm+do_now > p_addr + LWPP) { - printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", + pr_alert("BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", p_addr, bm, (int)do_now); } else memset(bm, c, do_now * sizeof(long)); @@ -927,22 +928,14 @@ void drbd_bm_clear_all(struct drbd_device *device) spin_unlock_irq(&b->bm_lock); } -struct bm_aio_ctx { - struct drbd_device *device; - atomic_t in_flight; - unsigned int done; - unsigned flags; -#define BM_AIO_COPY_PAGES 1 -#define BM_AIO_WRITE_HINTED 2 -#define BM_WRITE_ALL_PAGES 4 - int error; - struct kref kref; -}; - -static void bm_aio_ctx_destroy(struct kref *kref) +static void drbd_bm_aio_ctx_destroy(struct kref *kref) { - struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); + struct drbd_bm_aio_ctx *ctx = container_of(kref, struct drbd_bm_aio_ctx, kref); + unsigned long flags; + spin_lock_irqsave(&ctx->device->resource->req_lock, flags); + list_del(&ctx->list); + spin_unlock_irqrestore(&ctx->device->resource->req_lock, flags); put_ldev(ctx->device); kfree(ctx); } @@ -950,7 +943,7 @@ static void bm_aio_ctx_destroy(struct kref *kref) /* bv_page may be a copy, or may be the original */ static void bm_async_io_complete(struct bio *bio, int error) { - struct bm_aio_ctx *ctx = bio->bi_private; + struct drbd_bm_aio_ctx *ctx = bio->bi_private; struct drbd_device *device = ctx->device; struct drbd_bitmap *b = device->bitmap; unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page); @@ -993,17 +986,18 @@ static void bm_async_io_complete(struct bio *bio, int error) if (atomic_dec_and_test(&ctx->in_flight)) { ctx->done = 1; wake_up(&device->misc_wait); - kref_put(&ctx->kref, &bm_aio_ctx_destroy); + kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); } } -static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) +static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local) { struct bio *bio = bio_alloc_drbd(GFP_NOIO); struct drbd_device *device = ctx->device; struct drbd_bitmap *b = device->bitmap; struct page *page; unsigned int len; + unsigned int rw = (ctx->flags & BM_AIO_READ) ? READ : WRITE; sector_t on_disk_sector = device->ldev->md.md_offset + device->ldev->md.bm_offset; @@ -1049,9 +1043,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must /* * bm_rw: read/write the whole bitmap from/to its on disk location. */ -static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) +static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned lazy_writeout_upper_idx) __must_hold(local) { - struct bm_aio_ctx *ctx; + struct drbd_bm_aio_ctx *ctx; struct drbd_bitmap *b = device->bitmap; int num_pages, i, count = 0; unsigned long now; @@ -1067,12 +1061,13 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la * as we submit copies of pages anyways. */ - ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); + ctx = kmalloc(sizeof(struct drbd_bm_aio_ctx), GFP_NOIO); if (!ctx) return -ENOMEM; - *ctx = (struct bm_aio_ctx) { + *ctx = (struct drbd_bm_aio_ctx) { .device = device, + .start_jif = jiffies, .in_flight = ATOMIC_INIT(1), .done = 0, .flags = flags, @@ -1080,15 +1075,21 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la .kref = { ATOMIC_INIT(2) }, }; - if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ + if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in drbd_bm_aio_ctx_destroy() */ drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); kfree(ctx); return -ENODEV; } + /* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from + drbd_adm_attach(), after device->ldev was assigned. */ - if (!ctx->flags) + if (0 == (ctx->flags & ~BM_AIO_READ)) WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); + spin_lock_irq(&device->resource->req_lock); + list_add_tail(&ctx->list, &device->pending_bitmap_io); + spin_unlock_irq(&device->resource->req_lock); + num_pages = b->bm_number_of_pages; now = jiffies; @@ -1098,13 +1099,13 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la /* ignore completely unchanged pages */ if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) break; - if (rw & WRITE) { + if (!(flags & BM_AIO_READ)) { if ((flags & BM_AIO_WRITE_HINTED) && !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, &page_private(b->bm_pages[i]))) continue; - if (!(flags & BM_WRITE_ALL_PAGES) && + if (!(flags & BM_AIO_WRITE_ALL_PAGES) && bm_test_page_unchanged(b->bm_pages[i])) { dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i); continue; @@ -1118,7 +1119,7 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la } } atomic_inc(&ctx->in_flight); - bm_page_io_async(ctx, i, rw); + bm_page_io_async(ctx, i); ++count; cond_resched(); } @@ -1134,12 +1135,12 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la if (!atomic_dec_and_test(&ctx->in_flight)) wait_until_done_or_force_detached(device, device->ldev, &ctx->done); else - kref_put(&ctx->kref, &bm_aio_ctx_destroy); + kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); /* summary for global bitmap IO */ if (flags == 0) drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n", - rw == WRITE ? "WRITE" : "READ", + (flags & BM_AIO_READ) ? "READ" : "WRITE", count, jiffies - now); if (ctx->error) { @@ -1152,20 +1153,18 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la err = -EIO; /* Disk timeout/force-detach during IO... */ now = jiffies; - if (rw == WRITE) { - drbd_md_flush(device); - } else /* rw == READ */ { + if (flags & BM_AIO_READ) { b->bm_set = bm_count_bits(b); drbd_info(device, "recounting of set bits took additional %lu jiffies\n", jiffies - now); } now = b->bm_set; - if (flags == 0) + if ((flags & ~BM_AIO_READ) == 0) drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); - kref_put(&ctx->kref, &bm_aio_ctx_destroy); + kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy); return err; } @@ -1175,7 +1174,7 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la */ int drbd_bm_read(struct drbd_device *device) __must_hold(local) { - return bm_rw(device, READ, 0, 0); + return bm_rw(device, BM_AIO_READ, 0); } /** @@ -1186,7 +1185,7 @@ int drbd_bm_read(struct drbd_device *device) __must_hold(local) */ int drbd_bm_write(struct drbd_device *device) __must_hold(local) { - return bm_rw(device, WRITE, 0, 0); + return bm_rw(device, 0, 0); } /** @@ -1197,7 +1196,17 @@ int drbd_bm_write(struct drbd_device *device) __must_hold(local) */ int drbd_bm_write_all(struct drbd_device *device) __must_hold(local) { - return bm_rw(device, WRITE, BM_WRITE_ALL_PAGES, 0); + return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0); +} + +/** + * drbd_bm_write_lazy() - Write bitmap pages 0 to @upper_idx-1, if they have changed. + * @device: DRBD device. + * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages + */ +int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local) +{ + return bm_rw(device, BM_AIO_COPY_PAGES, upper_idx); } /** @@ -1213,7 +1222,7 @@ int drbd_bm_write_all(struct drbd_device *device) __must_hold(local) */ int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local) { - return bm_rw(device, WRITE, BM_AIO_COPY_PAGES, 0); + return bm_rw(device, BM_AIO_COPY_PAGES, 0); } /** @@ -1222,62 +1231,7 @@ int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local) */ int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local) { - return bm_rw(device, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); -} - -/** - * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap - * @device: DRBD device. - * @idx: bitmap page index - * - * We don't want to special case on logical_block_size of the backend device, - * so we submit PAGE_SIZE aligned pieces. - * Note that on "most" systems, PAGE_SIZE is 4k. - * - * In case this becomes an issue on systems with larger PAGE_SIZE, - * we may want to change this again to write 4k aligned 4k pieces. - */ -int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local) -{ - struct bm_aio_ctx *ctx; - int err; - - if (bm_test_page_unchanged(device->bitmap->bm_pages[idx])) { - dynamic_drbd_dbg(device, "skipped bm page write for idx %u\n", idx); - return 0; - } - - ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); - if (!ctx) - return -ENOMEM; - - *ctx = (struct bm_aio_ctx) { - .device = device, - .in_flight = ATOMIC_INIT(1), - .done = 0, - .flags = BM_AIO_COPY_PAGES, - .error = 0, - .kref = { ATOMIC_INIT(2) }, - }; - - if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ - drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n"); - kfree(ctx); - return -ENODEV; - } - - bm_page_io_async(ctx, idx, WRITE_SYNC); - wait_until_done_or_force_detached(device, device->ldev, &ctx->done); - - if (ctx->error) - drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); - /* that causes us to detach, so the in memory bitmap will be - * gone in a moment as well. */ - - device->bm_writ_cnt++; - err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error; - kref_put(&ctx->kref, &bm_aio_ctx_destroy); - return err; + return bm_rw(device, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); } /* NOTE diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c new file mode 100644 index 0000000..5c20b18 --- /dev/null +++ b/drivers/block/drbd/drbd_debugfs.c @@ -0,0 +1,958 @@ +#define pr_fmt(fmt) "drbd debugfs: " fmt +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/stat.h> +#include <linux/jiffies.h> +#include <linux/list.h> + +#include "drbd_int.h" +#include "drbd_req.h" +#include "drbd_debugfs.h" + + +/********************************************************************** + * Whenever you change the file format, remember to bump the version. * + **********************************************************************/ + +static struct dentry *drbd_debugfs_root; +static struct dentry *drbd_debugfs_version; +static struct dentry *drbd_debugfs_resources; +static struct dentry *drbd_debugfs_minors; + +static void seq_print_age_or_dash(struct seq_file *m, bool valid, unsigned long dt) +{ + if (valid) + seq_printf(m, "\t%d", jiffies_to_msecs(dt)); + else + seq_printf(m, "\t-"); +} + +static void __seq_print_rq_state_bit(struct seq_file *m, + bool is_set, char *sep, const char *set_name, const char *unset_name) +{ + if (is_set && set_name) { + seq_putc(m, *sep); + seq_puts(m, set_name); + *sep = '|'; + } else if (!is_set && unset_name) { + seq_putc(m, *sep); + seq_puts(m, unset_name); + *sep = '|'; + } +} + +static void seq_print_rq_state_bit(struct seq_file *m, + bool is_set, char *sep, const char *set_name) +{ + __seq_print_rq_state_bit(m, is_set, sep, set_name, NULL); +} + +/* pretty print enum drbd_req_state_bits req->rq_state */ +static void seq_print_request_state(struct seq_file *m, struct drbd_request *req) +{ + unsigned int s = req->rq_state; + char sep = ' '; + seq_printf(m, "\t0x%08x", s); + seq_printf(m, "\tmaster: %s", req->master_bio ? "pending" : "completed"); + + /* RQ_WRITE ignored, already reported */ + seq_puts(m, "\tlocal:"); + seq_print_rq_state_bit(m, s & RQ_IN_ACT_LOG, &sep, "in-AL"); + seq_print_rq_state_bit(m, s & RQ_POSTPONED, &sep, "postponed"); + seq_print_rq_state_bit(m, s & RQ_COMPLETION_SUSP, &sep, "suspended"); + sep = ' '; + seq_print_rq_state_bit(m, s & RQ_LOCAL_PENDING, &sep, "pending"); + seq_print_rq_state_bit(m, s & RQ_LOCAL_COMPLETED, &sep, "completed"); + seq_print_rq_state_bit(m, s & RQ_LOCAL_ABORTED, &sep, "aborted"); + seq_print_rq_state_bit(m, s & RQ_LOCAL_OK, &sep, "ok"); + if (sep == ' ') + seq_puts(m, " -"); + + /* for_each_connection ... */ + seq_printf(m, "\tnet:"); + sep = ' '; + seq_print_rq_state_bit(m, s & RQ_NET_PENDING, &sep, "pending"); + seq_print_rq_state_bit(m, s & RQ_NET_QUEUED, &sep, "queued"); + seq_print_rq_state_bit(m, s & RQ_NET_SENT, &sep, "sent"); + seq_print_rq_state_bit(m, s & RQ_NET_DONE, &sep, "done"); + seq_print_rq_state_bit(m, s & RQ_NET_SIS, &sep, "sis"); + seq_print_rq_state_bit(m, s & RQ_NET_OK, &sep, "ok"); + if (sep == ' ') + seq_puts(m, " -"); + + seq_printf(m, " :"); + sep = ' '; + seq_print_rq_state_bit(m, s & RQ_EXP_RECEIVE_ACK, &sep, "B"); + seq_print_rq_state_bit(m, s & RQ_EXP_WRITE_ACK, &sep, "C"); + seq_print_rq_state_bit(m, s & RQ_EXP_BARR_ACK, &sep, "barr"); + if (sep == ' ') + seq_puts(m, " -"); + seq_printf(m, "\n"); +} + +static void seq_print_one_request(struct seq_file *m, struct drbd_request *req, unsigned long now) +{ + /* change anything here, fixup header below! */ + unsigned int s = req->rq_state; + +#define RQ_HDR_1 "epoch\tsector\tsize\trw" + seq_printf(m, "0x%x\t%llu\t%u\t%s", + req->epoch, + (unsigned long long)req->i.sector, req->i.size >> 9, + (s & RQ_WRITE) ? "W" : "R"); + +#define RQ_HDR_2 "\tstart\tin AL\tsubmit" + seq_printf(m, "\t%d", jiffies_to_msecs(now - req->start_jif)); + seq_print_age_or_dash(m, s & RQ_IN_ACT_LOG, now - req->in_actlog_jif); + seq_print_age_or_dash(m, s & RQ_LOCAL_PENDING, now - req->pre_submit_jif); + +#define RQ_HDR_3 "\tsent\tacked\tdone" + seq_print_age_or_dash(m, s & RQ_NET_SENT, now - req->pre_send_jif); + seq_print_age_or_dash(m, (s & RQ_NET_SENT) && !(s & RQ_NET_PENDING), now - req->acked_jif); + seq_print_age_or_dash(m, s & RQ_NET_DONE, now - req->net_done_jif); + +#define RQ_HDR_4 "\tstate\n" + seq_print_request_state(m, req); +} +#define RQ_HDR RQ_HDR_1 RQ_HDR_2 RQ_HDR_3 RQ_HDR_4 + +static void seq_print_minor_vnr_req(struct seq_file *m, struct drbd_request *req, unsigned long now) +{ + seq_printf(m, "%u\t%u\t", req->device->minor, req->device->vnr); + seq_print_one_request(m, req, now); +} + +static void seq_print_resource_pending_meta_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now) +{ + struct drbd_device *device; + unsigned int i; + + seq_puts(m, "minor\tvnr\tstart\tsubmit\tintent\n"); + rcu_read_lock(); + idr_for_each_entry(&resource->devices, device, i) { + struct drbd_md_io tmp; + /* In theory this is racy, + * in the sense that there could have been a + * drbd_md_put_buffer(); drbd_md_get_buffer(); + * between accessing these members here. */ + tmp = device->md_io; + if (atomic_read(&tmp.in_use)) { + seq_printf(m, "%u\t%u\t%d\t", + device->minor, device->vnr, + jiffies_to_msecs(now - tmp.start_jif)); + if (time_before(tmp.submit_jif, tmp.start_jif)) + seq_puts(m, "-\t"); + else + seq_printf(m, "%d\t", jiffies_to_msecs(now - tmp.submit_jif)); + seq_printf(m, "%s\n", tmp.current_use); + } + } + rcu_read_unlock(); +} + +static void seq_print_waiting_for_AL(struct seq_file *m, struct drbd_resource *resource, unsigned long now) +{ + struct drbd_device *device; + unsigned int i; + + seq_puts(m, "minor\tvnr\tage\t#waiting\n"); + rcu_read_lock(); + idr_for_each_entry(&resource->devices, device, i) { + unsigned long jif; + struct drbd_request *req; + int n = atomic_read(&device->ap_actlog_cnt); + if (n) { + spin_lock_irq(&device->resource->req_lock); + req = list_first_entry_or_null(&device->pending_master_completion[1], + struct drbd_request, req_pending_master_completion); + /* if the oldest request does not wait for the activity log + * it is not interesting for us here */ + if (req && !(req->rq_state & RQ_IN_ACT_LOG)) + jif = req->start_jif; + else + req = NULL; + spin_unlock_irq(&device->resource->req_lock); + } + if (n) { + seq_printf(m, "%u\t%u\t", device->minor, device->vnr); + if (req) + seq_printf(m, "%u\t", jiffies_to_msecs(now - jif)); + else + seq_puts(m, "-\t"); + seq_printf(m, "%u\n", n); + } + } + rcu_read_unlock(); +} + +static void seq_print_device_bitmap_io(struct seq_file *m, struct drbd_device *device, unsigned long now) +{ + struct drbd_bm_aio_ctx *ctx; + unsigned long start_jif; + unsigned int in_flight; + unsigned int flags; + spin_lock_irq(&device->resource->req_lock); + ctx = list_first_entry_or_null(&device->pending_bitmap_io, struct drbd_bm_aio_ctx, list); + if (ctx && ctx->done) + ctx = NULL; + if (ctx) { + start_jif = ctx->start_jif; + in_flight = atomic_read(&ctx->in_flight); + flags = ctx->flags; + } + spin_unlock_irq(&device->resource->req_lock); + if (ctx) { + seq_printf(m, "%u\t%u\t%c\t%u\t%u\n", + device->minor, device->vnr, + (flags & BM_AIO_READ) ? 'R' : 'W', + jiffies_to_msecs(now - start_jif), + in_flight); + } +} + +static void seq_print_resource_pending_bitmap_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now) +{ + struct drbd_device *device; + unsigned int i; + + seq_puts(m, "minor\tvnr\trw\tage\t#in-flight\n"); + rcu_read_lock(); + idr_for_each_entry(&resource->devices, device, i) { + seq_print_device_bitmap_io(m, device, now); + } + rcu_read_unlock(); +} + +/* pretty print enum peer_req->flags */ +static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_request *peer_req) +{ + unsigned long f = peer_req->flags; + char sep = ' '; + + __seq_print_rq_state_bit(m, f & EE_SUBMITTED, &sep, "submitted", "preparing"); + __seq_print_rq_state_bit(m, f & EE_APPLICATION, &sep, "application", "internal"); + seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL"); + seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C"); + seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync"); + + if (f & EE_IS_TRIM) { + seq_putc(m, sep); + sep = '|'; + if (f & EE_IS_TRIM_USE_ZEROOUT) + seq_puts(m, "zero-out"); + else + seq_puts(m, "trim"); + } + seq_putc(m, '\n'); +} + +static void seq_print_peer_request(struct seq_file *m, + struct drbd_device *device, struct list_head *lh, + unsigned long now) +{ + bool reported_preparing = false; + struct drbd_peer_request *peer_req; + list_for_each_entry(peer_req, lh, w.list) { + if (reported_preparing && !(peer_req->flags & EE_SUBMITTED)) + continue; + + if (device) + seq_printf(m, "%u\t%u\t", device->minor, device->vnr); + + seq_printf(m, "%llu\t%u\t%c\t%u\t", + (unsigned long long)peer_req->i.sector, peer_req->i.size >> 9, + (peer_req->flags & EE_WRITE) ? 'W' : 'R', + jiffies_to_msecs(now - peer_req->submit_jif)); + seq_print_peer_request_flags(m, peer_req); + if (peer_req->flags & EE_SUBMITTED) + break; + else + reported_preparing = true; + } +} + +static void seq_print_device_peer_requests(struct seq_file *m, + struct drbd_device *device, unsigned long now) +{ + seq_puts(m, "minor\tvnr\tsector\tsize\trw\tage\tflags\n"); + spin_lock_irq(&device->resource->req_lock); + seq_print_peer_request(m, device, &device->active_ee, now); + seq_print_peer_request(m, device, &device->read_ee, now); + seq_print_peer_request(m, device, &device->sync_ee, now); + spin_unlock_irq(&device->resource->req_lock); + if (test_bit(FLUSH_PENDING, &device->flags)) { + seq_printf(m, "%u\t%u\t-\t-\tF\t%u\tflush\n", + device->minor, device->vnr, + jiffies_to_msecs(now - device->flush_jif)); + } +} + +static void seq_print_resource_pending_peer_requests(struct seq_file *m, + struct drbd_resource *resource, unsigned long now) +{ + struct drbd_device *device; + unsigned int i; + + rcu_read_lock(); + idr_for_each_entry(&resource->devices, device, i) { + seq_print_device_peer_requests(m, device, now); + } + rcu_read_unlock(); +} + +static void seq_print_resource_transfer_log_summary(struct seq_file *m, + struct drbd_resource *resource, + struct drbd_connection *connection, + unsigned long now) +{ + struct drbd_request *req; + unsigned int count = 0; + unsigned int show_state = 0; + + seq_puts(m, "n\tdevice\tvnr\t" RQ_HDR); + spin_lock_irq(&resource->req_lock); + list_for_each_entry(req, &connection->transfer_log, tl_requests) { + unsigned int tmp = 0; + unsigned int s; + ++count; + + /* don't disable irq "forever" */ + if (!(count & 0x1ff)) { + struct drbd_request *req_next; + kref_get(&req->kref); + spin_unlock_irq(&resource->req_lock); + cond_resched(); + spin_lock_irq(&resource->req_lock); + req_next = list_next_entry(req, tl_requests); + if (kref_put(&req->kref, drbd_req_destroy)) + req = req_next; + if (&req->tl_requests == &connection->transfer_log) + break; + } + + s = req->rq_state; + + /* This is meant to summarize timing issues, to be able to tell + * local disk problems from network problems. + * Skip requests, if we have shown an even older request with + * similar aspects already. */ + if (req->master_bio == NULL) + tmp |= 1; + if ((s & RQ_LOCAL_MASK) && (s & RQ_LOCAL_PENDING)) + tmp |= 2; + if (s & RQ_NET_MASK) { + if (!(s & RQ_NET_SENT)) + tmp |= 4; + if (s & RQ_NET_PENDING) + tmp |= 8; + if (!(s & RQ_NET_DONE)) + tmp |= 16; + } + if ((tmp & show_state) == tmp) + continue; + show_state |= tmp; + seq_printf(m, "%u\t", count); + seq_print_minor_vnr_req(m, req, now); + if (show_state == 0x1f) + break; + } + spin_unlock_irq(&resource->req_lock); +} + +/* TODO: transfer_log and friends should be moved to resource */ +static int in_flight_summary_show(struct seq_file *m, void *pos) +{ + struct drbd_resource *resource = m->private; + struct drbd_connection *connection; + unsigned long jif = jiffies; + + connection = first_connection(resource); + /* This does not happen, actually. + * But be robust and prepare for future code changes. */ + if (!connection || !kref_get_unless_zero(&connection->kref)) + return -ESTALE; + + /* BUMP me if you change the file format/content/presentation */ + seq_printf(m, "v: %u\n\n", 0); + + seq_puts(m, "oldest bitmap IO\n"); + seq_print_resource_pending_bitmap_io(m, resource, jif); + seq_putc(m, '\n'); + + seq_puts(m, "meta data IO\n"); + seq_print_resource_pending_meta_io(m, resource, jif); + seq_putc(m, '\n'); + + seq_puts(m, "socket buffer stats\n"); + /* for each connection ... once we have more than one */ + rcu_read_lock(); + if (connection->data.socket) { + /* open coded SIOCINQ, the "relevant" part */ + struct tcp_sock *tp = tcp_sk(connection->data.socket->sk); + int answ = tp->rcv_nxt - tp->copied_seq; + seq_printf(m, "unread receive buffer: %u Byte\n", answ); + /* open coded SIOCOUTQ, the "relevant" part */ + answ = tp->write_seq - tp->snd_una; + seq_printf(m, "unacked send buffer: %u Byte\n", answ); + } + rcu_read_unlock(); + seq_putc(m, '\n'); + + seq_puts(m, "oldest peer requests\n"); + seq_print_resource_pending_peer_requests(m, resource, jif); + seq_putc(m, '\n'); + + seq_puts(m, "application requests waiting for activity log\n"); + seq_print_waiting_for_AL(m, resource, jif); + seq_putc(m, '\n'); + + seq_puts(m, "oldest application requests\n"); + seq_print_resource_transfer_log_summary(m, resource, connection, jif); + seq_putc(m, '\n'); + + jif = jiffies - jif; + if (jif) + seq_printf(m, "generated in %d ms\n", jiffies_to_msecs(jif)); + kref_put(&connection->kref, drbd_destroy_connection); + return 0; +} + +/* simple_positive(file->f_dentry) respectively debugfs_positive(), + * but neither is "reachable" from here. + * So we have our own inline version of it above. :-( */ +static inline int debugfs_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + +/* make sure at *open* time that the respective object won't go away. */ +static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, void *), + void *data, struct kref *kref, + void (*release)(struct kref *)) +{ + struct dentry *parent; + int ret = -ESTALE; + + /* Are we still linked, + * or has debugfs_remove() already been called? */ + parent = file->f_dentry->d_parent; + /* not sure if this can happen: */ + if (!parent || !parent->d_inode) + goto out; + /* serialize with d_delete() */ + mutex_lock(&parent->d_inode->i_mutex); + /* Make sure the object is still alive */ + if (debugfs_positive(file->f_dentry) + && kref_get_unless_zero(kref)) + ret = 0; + mutex_unlock(&parent->d_inode->i_mutex); + if (!ret) { + ret = single_open(file, show, data); + if (ret) + kref_put(kref, release); + } +out: + return ret; +} + +static int in_flight_summary_open(struct inode *inode, struct file *file) +{ + struct drbd_resource *resource = inode->i_private; + return drbd_single_open(file, in_flight_summary_show, resource, + &resource->kref, drbd_destroy_resource); +} + +static int in_flight_summary_release(struct inode *inode, struct file *file) +{ + struct drbd_resource *resource = inode->i_private; + kref_put(&resource->kref, drbd_destroy_resource); + return single_release(inode, file); +} + +static const struct file_operations in_flight_summary_fops = { + .owner = THIS_MODULE, + .open = in_flight_summary_open, + .read = seq_read, + .llseek = seq_lseek, + .release = in_flight_summary_release, +}; + +void drbd_debugfs_resource_add(struct drbd_resource *resource) +{ + struct dentry *dentry; + if (!drbd_debugfs_resources) + return; + + dentry = debugfs_create_dir(resource->name, drbd_debugfs_resources); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + resource->debugfs_res = dentry; + + dentry = debugfs_create_dir("volumes", resource->debugfs_res); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + resource->debugfs_res_volumes = dentry; + + dentry = debugfs_create_dir("connections", resource->debugfs_res); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + resource->debugfs_res_connections = dentry; + + dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP, + resource->debugfs_res, resource, + &in_flight_summary_fops); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + resource->debugfs_res_in_flight_summary = dentry; + return; + +fail: + drbd_debugfs_resource_cleanup(resource); + drbd_err(resource, "failed to create debugfs dentry\n"); +} + +static void drbd_debugfs_remove(struct dentry **dp) +{ + debugfs_remove(*dp); + *dp = NULL; +} + +void drbd_debugfs_resource_cleanup(struct drbd_resource *resource) +{ + /* it is ok to call debugfs_remove(NULL) */ + drbd_debugfs_remove(&resource->debugfs_res_in_flight_summary); + drbd_debugfs_remove(&resource->debugfs_res_connections); + drbd_debugfs_remove(&resource->debugfs_res_volumes); + drbd_debugfs_remove(&resource->debugfs_res); +} + +static void seq_print_one_timing_detail(struct seq_file *m, + const struct drbd_thread_timing_details *tdp, + unsigned long now) +{ + struct drbd_thread_timing_details td; + /* No locking... + * use temporary assignment to get at consistent data. */ + do { + td = *tdp; + } while (td.cb_nr != tdp->cb_nr); + if (!td.cb_addr) + return; + seq_printf(m, "%u\t%d\t%s:%u\t%ps\n", + td.cb_nr, + jiffies_to_msecs(now - td.start_jif), + td.caller_fn, td.line, + td.cb_addr); +} + +static void seq_print_timing_details(struct seq_file *m, + const char *title, + unsigned int cb_nr, struct drbd_thread_timing_details *tdp, unsigned long now) +{ + unsigned int start_idx; + unsigned int i; + + seq_printf(m, "%s\n", title); + /* If not much is going on, this will result in natural ordering. + * If it is very busy, we will possibly skip events, or even see wrap + * arounds, which could only be avoided with locking. + */ + start_idx = cb_nr % DRBD_THREAD_DETAILS_HIST; + for (i = start_idx; i < DRBD_THREAD_DETAILS_HIST; i++) + seq_print_one_timing_detail(m, tdp+i, now); + for (i = 0; i < start_idx; i++) + seq_print_one_timing_detail(m, tdp+i, now); +} + +static int callback_history_show(struct seq_file *m, void *ignored) +{ + struct drbd_connection *connection = m->private; + unsigned long jif = jiffies; + + /* BUMP me if you change the file format/content/presentation */ + seq_printf(m, "v: %u\n\n", 0); + + seq_puts(m, "n\tage\tcallsite\tfn\n"); + seq_print_timing_details(m, "worker", connection->w_cb_nr, connection->w_timing_details, jif); + seq_print_timing_details(m, "receiver", connection->r_cb_nr, connection->r_timing_details, jif); + return 0; +} + +static int callback_history_open(struct inode *inode, struct file *file) +{ + struct drbd_connection *connection = inode->i_private; + return drbd_single_open(file, callback_history_show, connection, + &connection->kref, drbd_destroy_connection); +} + +static int callback_history_release(struct inode *inode, struct file *file) +{ + struct drbd_connection *connection = inode->i_private; + kref_put(&connection->kref, drbd_destroy_connection); + return single_release(inode, file); +} + +static const struct file_operations connection_callback_history_fops = { + .owner = THIS_MODULE, + .open = callback_history_open, + .read = seq_read, + .llseek = seq_lseek, + .release = callback_history_release, +}; + +static int connection_oldest_requests_show(struct seq_file *m, void *ignored) +{ + struct drbd_connection *connection = m->private; + unsigned long now = jiffies; + struct drbd_request *r1, *r2; + + /* BUMP me if you change the file format/content/presentation */ + seq_printf(m, "v: %u\n\n", 0); + + spin_lock_irq(&connection->resource->req_lock); + r1 = connection->req_next; + if (r1) + seq_print_minor_vnr_req(m, r1, now); + r2 = connection->req_ack_pending; + if (r2 && r2 != r1) { + r1 = r2; + seq_print_minor_vnr_req(m, r1, now); + } + r2 = connection->req_not_net_done; + if (r2 && r2 != r1) + seq_print_minor_vnr_req(m, r2, now); + spin_unlock_irq(&connection->resource->req_lock); + return 0; +} + +static int connection_oldest_requests_open(struct inode *inode, struct file *file) +{ + struct drbd_connection *connection = inode->i_private; + return drbd_single_open(file, connection_oldest_requests_show, connection, + &connection->kref, drbd_destroy_connection); +} + +static int connection_oldest_requests_release(struct inode *inode, struct file *file) +{ + struct drbd_connection *connection = inode->i_private; + kref_put(&connection->kref, drbd_destroy_connection); + return single_release(inode, file); +} + +static const struct file_operations connection_oldest_requests_fops = { + .owner = THIS_MODULE, + .open = connection_oldest_requests_open, + .read = seq_read, + .llseek = seq_lseek, + .release = connection_oldest_requests_release, +}; + +void drbd_debugfs_connection_add(struct drbd_connection *connection) +{ + struct dentry *conns_dir = connection->resource->debugfs_res_connections; + struct dentry *dentry; + if (!conns_dir) + return; + + /* Once we enable mutliple peers, + * these connections will have descriptive names. + * For now, it is just the one connection to the (only) "peer". */ + dentry = debugfs_create_dir("peer", conns_dir); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + connection->debugfs_conn = dentry; + + dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP, + connection->debugfs_conn, connection, + &connection_callback_history_fops); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + connection->debugfs_conn_callback_history = dentry; + + dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP, + connection->debugfs_conn, connection, + &connection_oldest_requests_fops); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + connection->debugfs_conn_oldest_requests = dentry; + return; + +fail: + drbd_debugfs_connection_cleanup(connection); + drbd_err(connection, "failed to create debugfs dentry\n"); +} + +void drbd_debugfs_connection_cleanup(struct drbd_connection *connection) +{ + drbd_debugfs_remove(&connection->debugfs_conn_callback_history); + drbd_debugfs_remove(&connection->debugfs_conn_oldest_requests); + drbd_debugfs_remove(&connection->debugfs_conn); +} + +static void resync_dump_detail(struct seq_file *m, struct lc_element *e) +{ + struct bm_extent *bme = lc_entry(e, struct bm_extent, lce); + + seq_printf(m, "%5d %s %s %s\n", bme->rs_left, + test_bit(BME_NO_WRITES, &bme->flags) ? "NO_WRITES" : "---------", + test_bit(BME_LOCKED, &bme->flags) ? "LOCKED" : "------", + test_bit(BME_PRIORITY, &bme->flags) ? "PRIORITY" : "--------" + ); +} + +static int device_resync_extents_show(struct seq_file *m, void *ignored) +{ + struct drbd_device *device = m->private; + + /* BUMP me if you change the file format/content/presentation */ + seq_printf(m, "v: %u\n\n", 0); + + if (get_ldev_if_state(device, D_FAILED)) { + lc_seq_printf_stats(m, device->resync); + lc_seq_dump_details(m, device->resync, "rs_left flags", resync_dump_detail); + put_ldev(device); + } + return 0; +} + +static int device_act_log_extents_show(struct seq_file *m, void *ignored) +{ + struct drbd_device *device = m->private; + + /* BUMP me if you change the file format/content/presentation */ + seq_printf(m, "v: %u\n\n", 0); + + if (get_ldev_if_state(device, D_FAILED)) { + lc_seq_printf_stats(m, device->act_log); + lc_seq_dump_details(m, device->act_log, "", NULL); + put_ldev(device); + } + return 0; +} + +static int device_oldest_requests_show(struct seq_file *m, void *ignored) +{ + struct drbd_device *device = m->private; + struct drbd_resource *resource = device->resource; + unsigned long now = jiffies; + struct drbd_request *r1, *r2; + int i; + + /* BUMP me if you change the file format/content/presentation */ + seq_printf(m, "v: %u\n\n", 0); + + seq_puts(m, RQ_HDR); + spin_lock_irq(&resource->req_lock); + /* WRITE, then READ */ + for (i = 1; i >= 0; --i) { + r1 = list_first_entry_or_null(&device->pending_master_completion[i], + struct drbd_request, req_pending_master_completion); + r2 = list_first_entry_or_null(&device->pending_completion[i], + struct drbd_request, req_pending_local); + if (r1) + seq_print_one_request(m, r1, now); + if (r2 && r2 != r1) + seq_print_one_request(m, r2, now); + } + spin_unlock_irq(&resource->req_lock); + return 0; +} + +static int device_data_gen_id_show(struct seq_file *m, void *ignored) +{ + struct drbd_device *device = m->private; + struct drbd_md *md; + enum drbd_uuid_index idx; + + if (!get_ldev_if_state(device, D_FAILED)) + return -ENODEV; + + md = &device->ldev->md; + spin_lock_irq(&md->uuid_lock); + for (idx = UI_CURRENT; idx <= UI_HISTORY_END; idx++) { + seq_printf(m, "0x%016llX\n", md->uuid[idx]); + } + spin_unlock_irq(&md->uuid_lock); + put_ldev(device); + return 0; +} + +#define drbd_debugfs_device_attr(name) \ +static int device_ ## name ## _open(struct inode *inode, struct file *file) \ +{ \ + struct drbd_device *device = inode->i_private; \ + return drbd_single_open(file, device_ ## name ## _show, device, \ + &device->kref, drbd_destroy_device); \ +} \ +static int device_ ## name ## _release(struct inode *inode, struct file *file) \ +{ \ + struct drbd_device *device = inode->i_private; \ + kref_put(&device->kref, drbd_destroy_device); \ + return single_release(inode, file); \ +} \ +static const struct file_operations device_ ## name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = device_ ## name ## _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = device_ ## name ## _release, \ +}; + +drbd_debugfs_device_attr(oldest_requests) +drbd_debugfs_device_attr(act_log_extents) +drbd_debugfs_device_attr(resync_extents) +drbd_debugfs_device_attr(data_gen_id) + +void drbd_debugfs_device_add(struct drbd_device *device) +{ + struct dentry *vols_dir = device->resource->debugfs_res_volumes; + char minor_buf[8]; /* MINORMASK, MINORBITS == 20; */ + char vnr_buf[8]; /* volume number vnr is even 16 bit only; */ + char *slink_name = NULL; + + struct dentry *dentry; + if (!vols_dir || !drbd_debugfs_minors) + return; + + snprintf(vnr_buf, sizeof(vnr_buf), "%u", device->vnr); + dentry = debugfs_create_dir(vnr_buf, vols_dir); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + device->debugfs_vol = dentry; + + snprintf(minor_buf, sizeof(minor_buf), "%u", device->minor); + slink_name = kasprintf(GFP_KERNEL, "../resources/%s/volumes/%u", + device->resource->name, device->vnr); + if (!slink_name) + goto fail; + dentry = debugfs_create_symlink(minor_buf, drbd_debugfs_minors, slink_name); + kfree(slink_name); + slink_name = NULL; + if (IS_ERR_OR_NULL(dentry)) + goto fail; + device->debugfs_minor = dentry; + +#define DCF(name) do { \ + dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP, \ + device->debugfs_vol, device, \ + &device_ ## name ## _fops); \ + if (IS_ERR_OR_NULL(dentry)) \ + goto fail; \ + device->debugfs_vol_ ## name = dentry; \ + } while (0) + + DCF(oldest_requests); + DCF(act_log_extents); + DCF(resync_extents); + DCF(data_gen_id); +#undef DCF + return; + +fail: + drbd_debugfs_device_cleanup(device); + drbd_err(device, "failed to create debugfs entries\n"); +} + +void drbd_debugfs_device_cleanup(struct drbd_device *device) +{ + drbd_debugfs_remove(&device->debugfs_minor); + drbd_debugfs_remove(&device->debugfs_vol_oldest_requests); + drbd_debugfs_remove(&device->debugfs_vol_act_log_extents); + drbd_debugfs_remove(&device->debugfs_vol_resync_extents); + drbd_debugfs_remove(&device->debugfs_vol_data_gen_id); + drbd_debugfs_remove(&device->debugfs_vol); +} + +void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device) +{ + struct dentry *conn_dir = peer_device->connection->debugfs_conn; + struct dentry *dentry; + char vnr_buf[8]; + + if (!conn_dir) + return; + + snprintf(vnr_buf, sizeof(vnr_buf), "%u", peer_device->device->vnr); + dentry = debugfs_create_dir(vnr_buf, conn_dir); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + peer_device->debugfs_peer_dev = dentry; + return; + +fail: + drbd_debugfs_peer_device_cleanup(peer_device); + drbd_err(peer_device, "failed to create debugfs entries\n"); +} + +void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device) +{ + drbd_debugfs_remove(&peer_device->debugfs_peer_dev); +} + +static int drbd_version_show(struct seq_file *m, void *ignored) +{ + seq_printf(m, "# %s\n", drbd_buildtag()); + seq_printf(m, "VERSION=%s\n", REL_VERSION); + seq_printf(m, "API_VERSION=%u\n", API_VERSION); + seq_printf(m, "PRO_VERSION_MIN=%u\n", PRO_VERSION_MIN); + seq_printf(m, "PRO_VERSION_MAX=%u\n", PRO_VERSION_MAX); + return 0; +} + +static int drbd_version_open(struct inode *inode, struct file *file) +{ + return single_open(file, drbd_version_show, NULL); +} + +static struct file_operations drbd_version_fops = { + .owner = THIS_MODULE, + .open = drbd_version_open, + .llseek = seq_lseek, + .read = seq_read, + .release = single_release, +}; + +/* not __exit, may be indirectly called + * from the module-load-failure path as well. */ +void drbd_debugfs_cleanup(void) +{ + drbd_debugfs_remove(&drbd_debugfs_resources); + drbd_debugfs_remove(&drbd_debugfs_minors); + drbd_debugfs_remove(&drbd_debugfs_version); + drbd_debugfs_remove(&drbd_debugfs_root); +} + +int __init drbd_debugfs_init(void) +{ + struct dentry *dentry; + + dentry = debugfs_create_dir("drbd", NULL); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + drbd_debugfs_root = dentry; + + dentry = debugfs_create_file("version", 0444, drbd_debugfs_root, NULL, &drbd_version_fops); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + drbd_debugfs_version = dentry; + + dentry = debugfs_create_dir("resources", drbd_debugfs_root); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + drbd_debugfs_resources = dentry; + + dentry = debugfs_create_dir("minors", drbd_debugfs_root); + if (IS_ERR_OR_NULL(dentry)) + goto fail; + drbd_debugfs_minors = dentry; + return 0; + +fail: + drbd_debugfs_cleanup(); + if (dentry) + return PTR_ERR(dentry); + else + return -EINVAL; +} diff --git a/drivers/block/drbd/drbd_debugfs.h b/drivers/block/drbd/drbd_debugfs.h new file mode 100644 index 0000000..8bee213 --- /dev/null +++ b/drivers/block/drbd/drbd_debugfs.h @@ -0,0 +1,39 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/debugfs.h> + +#include "drbd_int.h" + +#ifdef CONFIG_DEBUG_FS +int __init drbd_debugfs_init(void); +void drbd_debugfs_cleanup(void); + +void drbd_debugfs_resource_add(struct drbd_resource *resource); +void drbd_debugfs_resource_cleanup(struct drbd_resource *resource); + +void drbd_debugfs_connection_add(struct drbd_connection *connection); +void drbd_debugfs_connection_cleanup(struct drbd_connection *connection); + +void drbd_debugfs_device_add(struct drbd_device *device); +void drbd_debugfs_device_cleanup(struct drbd_device *device); + +void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device); +void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device); +#else + +static inline int __init drbd_debugfs_init(void) { return -ENODEV; } +static inline void drbd_debugfs_cleanup(void) { } + +static inline void drbd_debugfs_resource_add(struct drbd_resource *resource) { } +static inline void drbd_debugfs_resource_cleanup(struct drbd_resource *resource) { } + +static inline void drbd_debugfs_connection_add(struct drbd_connection *connection) { } +static inline void drbd_debugfs_connection_cleanup(struct drbd_connection *connection) { } + +static inline void drbd_debugfs_device_add(struct drbd_device *device) { } +static inline void drbd_debugfs_device_cleanup(struct drbd_device *device) { } + +static inline void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device) { } +static inline void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device) { } + +#endif diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a76ceb3..1a00001 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -317,7 +317,63 @@ struct drbd_request { struct list_head tl_requests; /* ring list in the transfer log */ struct bio *master_bio; /* master bio pointer */ - unsigned long start_time; + + /* see struct drbd_device */ + struct list_head req_pending_master_completion; + struct list_head req_pending_local; + + /* for generic IO accounting */ + unsigned long start_jif; + + /* for DRBD internal statistics */ + + /* Minimal set of time stamps to determine if we wait for activity log + * transactions, local disk or peer. 32 bit "jiffies" are good enough, + * we don't expect a DRBD request to be stalled for several month. + */ + + /* before actual request processing */ + unsigned long in_actlog_jif; + + /* local disk */ + unsigned long pre_submit_jif; + + /* per connection */ + unsigned long pre_send_jif; + unsigned long acked_jif; + unsigned long net_done_jif; + + /* Possibly even more detail to track each phase: + * master_completion_jif + * how long did it take to complete the master bio + * (application visible latency) + * allocated_jif + * how long the master bio was blocked until we finally allocated + * a tracking struct + * in_actlog_jif + * how long did we wait for activity log transactions + * + * net_queued_jif + * when did we finally queue it for sending + * pre_send_jif + * when did we start sending it + * post_send_jif + * how long did we block in the network stack trying to send it + * acked_jif + * when did we receive (or fake, in protocol A) a remote ACK + * net_done_jif + * when did we receive final acknowledgement (P_BARRIER_ACK), + * or decide, e.g. on connection loss, that we do no longer expect + * anything from this peer for this request. + * + * pre_submit_jif + * post_sub_jif + * when did we start submiting to the lower level device, + * and how long did we block in that submit function + * local_completion_jif + * how long did it take the lower level device to complete this request + */ + /* once it hits 0, we may complete the master_bio */ atomic_t completion_ref; @@ -366,6 +422,7 @@ struct drbd_peer_request { struct drbd_interval i; /* see comments on ee flag bits below */ unsigned long flags; + unsigned long submit_jif; union { u64 block_id; struct digest_info *digest; @@ -408,6 +465,17 @@ enum { /* Is set when net_conf had two_primaries set while creating this peer_req */ __EE_IN_INTERVAL_TREE, + + /* for debugfs: */ + /* has this been submitted, or does it still wait for something else? */ + __EE_SUBMITTED, + + /* this is/was a write request */ + __EE_WRITE, + + /* this originates from application on peer + * (not some resync or verify or other DRBD internal request) */ + __EE_APPLICATION, }; #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) @@ -419,6 +487,9 @@ enum { #define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS) #define EE_SEND_WRITE_ACK (1<<__EE_SEND_WRITE_ACK) #define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE) +#define EE_SUBMITTED (1<<__EE_SUBMITTED) +#define EE_WRITE (1<<__EE_WRITE) +#define EE_APPLICATION (1<<__EE_APPLICATION) /* flag bits per device */ enum { @@ -433,11 +504,11 @@ enum { CONSIDER_RESYNC, MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ + SUSPEND_IO, /* suspend application io */ BITMAP_IO, /* suspend application io; once no more io in flight, start bitmap io */ BITMAP_IO_QUEUED, /* Started bitmap IO */ - GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */ WAS_IO_ERROR, /* Local disk failed, returned IO error */ WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */ FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ @@ -450,6 +521,20 @@ enum { B_RS_H_DONE, /* Before resync handler done (already executed) */ DISCARD_MY_DATA, /* discard_my_data flag per volume */ READ_BALANCE_RR, + + FLUSH_PENDING, /* if set, device->flush_jif is when we submitted that flush + * from drbd_flush_after_epoch() */ + + /* cleared only after backing device related structures have been destroyed. */ + GOING_DISKLESS, /* Disk is being detached, because of io-error, or admin request. */ + + /* to be used in drbd_device_post_work() */ + GO_DISKLESS, /* tell worker to schedule cleanup before detach */ + DESTROY_DISK, /* tell worker to close backing devices and destroy related structures. */ + MD_SYNC, /* tell worker to call drbd_md_sync() */ + RS_START, /* tell worker to start resync/OV */ + RS_PROGRESS, /* tell worker that resync made significant progress */ + RS_DONE, /* tell worker that resync is done */ }; struct drbd_bitmap; /* opaque for drbd_device */ @@ -531,6 +616,11 @@ struct drbd_backing_dev { }; struct drbd_md_io { + struct page *page; + unsigned long start_jif; /* last call to drbd_md_get_buffer */ + unsigned long submit_jif; /* last _drbd_md_sync_page_io() submit */ + const char *current_use; + atomic_t in_use; unsigned int done; int error; }; @@ -577,10 +667,18 @@ enum { * and potentially deadlock on, this drbd worker. */ DISCONNECT_SENT, + + DEVICE_WORK_PENDING, /* tell worker that some device has pending work */ }; struct drbd_resource { char *name; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_res; + struct dentry *debugfs_res_volumes; + struct dentry *debugfs_res_connections; + struct dentry *debugfs_res_in_flight_summary; +#endif struct kref kref; struct idr devices; /* volume number to device mapping */ struct list_head connections; @@ -594,12 +692,28 @@ struct drbd_resource { unsigned susp_nod:1; /* IO suspended because no data */ unsigned susp_fen:1; /* IO suspended because fence peer handler runs */ + enum write_ordering_e write_ordering; + cpumask_var_t cpu_mask; }; +struct drbd_thread_timing_details +{ + unsigned long start_jif; + void *cb_addr; + const char *caller_fn; + unsigned int line; + unsigned int cb_nr; +}; + struct drbd_connection { struct list_head connections; struct drbd_resource *resource; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_conn; + struct dentry *debugfs_conn_callback_history; + struct dentry *debugfs_conn_oldest_requests; +#endif struct kref kref; struct idr peer_devices; /* volume number to peer device mapping */ enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ @@ -636,7 +750,6 @@ struct drbd_connection { struct drbd_epoch *current_epoch; spinlock_t epoch_lock; unsigned int epochs; - enum write_ordering_e write_ordering; atomic_t current_tle_nr; /* transfer log epoch number */ unsigned current_tle_writes; /* writes seen within this tl epoch */ @@ -645,9 +758,22 @@ struct drbd_connection { struct drbd_thread worker; struct drbd_thread asender; + /* cached pointers, + * so we can look up the oldest pending requests more quickly. + * protected by resource->req_lock */ + struct drbd_request *req_next; /* DRBD 9: todo.req_next */ + struct drbd_request *req_ack_pending; + struct drbd_request *req_not_net_done; + /* sender side */ struct drbd_work_queue sender_work; +#define DRBD_THREAD_DETAILS_HIST 16 + unsigned int w_cb_nr; /* keeps counting up */ + unsigned int r_cb_nr; /* keeps counting up */ + struct drbd_thread_timing_details w_timing_details[DRBD_THREAD_DETAILS_HIST]; + struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST]; + struct { /* whether this sender thread * has processed a single write yet. */ @@ -663,11 +789,22 @@ struct drbd_connection { } send; }; +void __update_timing_details( + struct drbd_thread_timing_details *tdp, + unsigned int *cb_nr, + void *cb, + const char *fn, const unsigned int line); + +#define update_worker_timing_details(c, cb) \ + __update_timing_details(c->w_timing_details, &c->w_cb_nr, cb, __func__ , __LINE__ ) +#define update_receiver_timing_details(c, cb) \ + __update_timing_details(c->r_timing_details, &c->r_cb_nr, cb, __func__ , __LINE__ ) + struct submit_worker { struct workqueue_struct *wq; struct work_struct worker; - spinlock_t lock; + /* protected by ..->resource->req_lock */ struct list_head writes; }; @@ -675,12 +812,29 @@ struct drbd_peer_device { struct list_head peer_devices; struct drbd_device *device; struct drbd_connection *connection; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_peer_dev; +#endif }; struct drbd_device { struct drbd_resource *resource; struct list_head peer_devices; - int vnr; /* volume number within the connection */ + struct list_head pending_bitmap_io; + + unsigned long flush_jif; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_minor; + struct dentry *debugfs_vol; + struct dentry *debugfs_vol_oldest_requests; + struct dentry *debugfs_vol_act_log_extents; + struct dentry *debugfs_vol_resync_extents; + struct dentry *debugfs_vol_data_gen_id; +#endif + + unsigned int vnr; /* volume number within the connection */ + unsigned int minor; /* device minor number */ + struct kref kref; /* things that are stored as / read from meta data on disk */ @@ -697,19 +851,10 @@ struct drbd_device { unsigned long last_reattach_jif; struct drbd_work resync_work; struct drbd_work unplug_work; - struct drbd_work go_diskless; - struct drbd_work md_sync_work; - struct drbd_work start_resync_work; struct timer_list resync_timer; struct timer_list md_sync_timer; struct timer_list start_resync_timer; struct timer_list request_timer; -#ifdef DRBD_DEBUG_MD_SYNC - struct { - unsigned int line; - const char* func; - } last_md_mark_dirty; -#endif /* Used after attach while negotiating new disk state. */ union drbd_state new_state_tmp; @@ -724,6 +869,7 @@ struct drbd_device { unsigned int al_writ_cnt; unsigned int bm_writ_cnt; atomic_t ap_bio_cnt; /* Requests we need to complete */ + atomic_t ap_actlog_cnt; /* Requests waiting for activity log */ atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ atomic_t unacked_cnt; /* Need to send replies for */ @@ -733,6 +879,13 @@ struct drbd_device { struct rb_root read_requests; struct rb_root write_requests; + /* for statistics and timeouts */ + /* [0] read, [1] write */ + struct list_head pending_master_completion[2]; + struct list_head pending_completion[2]; + + /* use checksums for *this* resync */ + bool use_csums; /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ unsigned long rs_total; /* number of resync blocks that failed in this run */ @@ -788,9 +941,7 @@ struct drbd_device { atomic_t pp_in_use; /* allocated from page pool */ atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ wait_queue_head_t ee_wait; - struct page *md_io_page; /* one page buffer for md_io */ struct drbd_md_io md_io; - atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */ spinlock_t al_lock; wait_queue_head_t al_wait; struct lru_cache *act_log; /* activity log */ @@ -800,7 +951,6 @@ struct drbd_device { atomic_t packet_seq; unsigned int peer_seq; spinlock_t peer_seq_lock; - unsigned int minor; unsigned long comm_bm_set; /* communicated number of set bits. */ struct bm_io_work bm_io_work; u64 ed_uuid; /* UUID of the exposed data */ @@ -824,6 +974,21 @@ struct drbd_device { struct submit_worker submit; }; +struct drbd_bm_aio_ctx { + struct drbd_device *device; + struct list_head list; /* on device->pending_bitmap_io */; + unsigned long start_jif; + atomic_t in_flight; + unsigned int done; + unsigned flags; +#define BM_AIO_COPY_PAGES 1 +#define BM_AIO_WRITE_HINTED 2 +#define BM_AIO_WRITE_ALL_PAGES 4 +#define BM_AIO_READ 8 + int error; + struct kref kref; +}; + struct drbd_config_context { /* assigned from drbd_genlmsghdr */ unsigned int minor; @@ -949,7 +1114,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int extern int drbd_send_bitmap(struct drbd_device *device); extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); -extern void drbd_free_bc(struct drbd_backing_dev *ldev); +extern void drbd_free_ldev(struct drbd_backing_dev *ldev); extern void drbd_device_cleanup(struct drbd_device *device); void drbd_print_uuids(struct drbd_device *device, const char *text); @@ -966,13 +1131,7 @@ extern void __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must extern void drbd_md_set_flag(struct drbd_device *device, int flags) __must_hold(local); extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold(local); extern int drbd_md_test_flag(struct drbd_backing_dev *, int); -#ifndef DRBD_DEBUG_MD_SYNC extern void drbd_md_mark_dirty(struct drbd_device *device); -#else -#define drbd_md_mark_dirty(m) drbd_md_mark_dirty_(m, __LINE__ , __func__ ) -extern void drbd_md_mark_dirty_(struct drbd_device *device, - unsigned int line, const char *func); -#endif extern void drbd_queue_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *), void (*done)(struct drbd_device *, int), @@ -983,9 +1142,8 @@ extern int drbd_bitmap_io(struct drbd_device *device, extern int drbd_bitmap_io_from_worker(struct drbd_device *device, int (*io_fn)(struct drbd_device *), char *why, enum bm_flag flags); -extern int drbd_bmio_set_n_write(struct drbd_device *device); -extern int drbd_bmio_clear_n_write(struct drbd_device *device); -extern void drbd_ldev_destroy(struct drbd_device *device); +extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local); +extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local); /* Meta data layout * @@ -1105,17 +1263,21 @@ struct bm_extent { /* in which _bitmap_ extent (resp. sector) the bit for a certain * _storage_ sector is located in */ #define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) +#define BM_BIT_TO_EXT(x) ((x) >> (BM_EXT_SHIFT - BM_BLOCK_SHIFT)) -/* how much _storage_ sectors we have per bitmap sector */ +/* first storage sector a bitmap extent corresponds to */ #define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) +/* how much _storage_ sectors we have per bitmap extent */ #define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) +/* how many bits are covered by one bitmap extent (resync extent) */ +#define BM_BITS_PER_EXT (1UL << (BM_EXT_SHIFT - BM_BLOCK_SHIFT)) + +#define BM_BLOCKS_PER_BM_EXT_MASK (BM_BITS_PER_EXT - 1) + /* in one sector of the bitmap, we have this many activity_log extents. */ #define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) -#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT) -#define BM_BLOCKS_PER_BM_EXT_MASK ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1) - /* the extent in "PER_EXTENT" below is an activity log extent * we need that many (long words/bytes) to store the bitmap * of one AL_EXTENT_SIZE chunk of storage. @@ -1195,11 +1357,11 @@ extern void _drbd_bm_set_bits(struct drbd_device *device, const unsigned long s, const unsigned long e); extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr); extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr); -extern int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local); extern int drbd_bm_read(struct drbd_device *device) __must_hold(local); extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr); extern int drbd_bm_write(struct drbd_device *device) __must_hold(local); extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local); +extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local); extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local); extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local); extern size_t drbd_bm_words(struct drbd_device *device); @@ -1213,7 +1375,6 @@ extern unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned lon extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo); extern unsigned long _drbd_bm_total_weight(struct drbd_device *device); extern unsigned long drbd_bm_total_weight(struct drbd_device *device); -extern int drbd_bm_rs_done(struct drbd_device *device); /* for receive_bitmap */ extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, size_t number, unsigned long *buffer); @@ -1312,7 +1473,7 @@ enum determine_dev_size { extern enum determine_dev_size drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local); extern void resync_after_online_grow(struct drbd_device *); -extern void drbd_reconsider_max_bio_size(struct drbd_device *device); +extern void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev); extern enum drbd_state_rv drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force); @@ -1333,7 +1494,7 @@ extern void resume_next_sg(struct drbd_device *device); extern void suspend_other_sg(struct drbd_device *device); extern int drbd_resync_finished(struct drbd_device *device); /* maybe rather drbd_main.c ? */ -extern void *drbd_md_get_buffer(struct drbd_device *device); +extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent); extern void drbd_md_put_buffer(struct drbd_device *device); extern int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, sector_t sector, int rw); @@ -1380,7 +1541,8 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); extern int drbd_receiver(struct drbd_thread *thi); extern int drbd_asender(struct drbd_thread *thi); extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); -extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); +extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, + bool throttle_if_app_is_waiting); extern int drbd_submit_peer_request(struct drbd_device *, struct drbd_peer_request *, const unsigned, const int); @@ -1464,10 +1626,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device, { __release(local); if (!bio->bi_bdev) { - printk(KERN_ERR "drbd%d: drbd_generic_make_request: " - "bio->bi_bdev == NULL\n", - device_to_minor(device)); - dump_stack(); + drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n"); bio_endio(bio, -ENODEV); return; } @@ -1478,7 +1637,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device, generic_make_request(bio); } -void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo); +void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev, + enum write_ordering_e wo); /* drbd_proc.c */ extern struct proc_dir_entry *drbd_proc; @@ -1489,9 +1649,9 @@ extern const char *drbd_role_str(enum drbd_role s); /* drbd_actlog.c */ extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i); extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i); -extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate); +extern void drbd_al_begin_io_commit(struct drbd_device *device); extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i); -extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate); +extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i); extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i); extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector); extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector); @@ -1501,14 +1661,17 @@ extern int drbd_rs_del_all(struct drbd_device *device); extern void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size); extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go); -extern void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, - int size, const char *file, const unsigned int line); + +enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC }; +extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size, + enum update_sync_bits_mode mode, + const char *file, const unsigned int line); #define drbd_set_in_sync(device, sector, size) \ - __drbd_set_in_sync(device, sector, size, __FILE__, __LINE__) -extern int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, - int size, const char *file, const unsigned int line); + __drbd_change_sync(device, sector, size, SET_IN_SYNC, __FILE__, __LINE__) #define drbd_set_out_of_sync(device, sector, size) \ - __drbd_set_out_of_sync(device, sector, size, __FILE__, __LINE__) + __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC, __FILE__, __LINE__) +#define drbd_rs_failed_io(device, sector, size) \ + __drbd_change_sync(device, sector, size, RECORD_RS_FAILED, __FILE__, __LINE__) extern void drbd_al_shrink(struct drbd_device *device); extern int drbd_initialize_al(struct drbd_device *, void *); @@ -1764,25 +1927,38 @@ static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev) } static inline void -drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w) +drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) { unsigned long flags; spin_lock_irqsave(&q->q_lock, flags); - list_add(&w->list, &q->q); + list_add_tail(&w->list, &q->q); spin_unlock_irqrestore(&q->q_lock, flags); wake_up(&q->q_wait); } static inline void -drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) +drbd_queue_work_if_unqueued(struct drbd_work_queue *q, struct drbd_work *w) { unsigned long flags; spin_lock_irqsave(&q->q_lock, flags); - list_add_tail(&w->list, &q->q); + if (list_empty_careful(&w->list)) + list_add_tail(&w->list, &q->q); spin_unlock_irqrestore(&q->q_lock, flags); wake_up(&q->q_wait); } +static inline void +drbd_device_post_work(struct drbd_device *device, int work_bit) +{ + if (!test_and_set_bit(work_bit, &device->flags)) { + struct drbd_connection *connection = + first_peer_device(device)->connection; + struct drbd_work_queue *q = &connection->sender_work; + if (!test_and_set_bit(DEVICE_WORK_PENDING, &connection->flags)) + wake_up(&q->q_wait); + } +} + extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue); static inline void wake_asender(struct drbd_connection *connection) @@ -1859,7 +2035,7 @@ static inline void inc_ap_pending(struct drbd_device *device) func, line, \ atomic_read(&device->which)) -#define dec_ap_pending(device) _dec_ap_pending(device, __FUNCTION__, __LINE__) +#define dec_ap_pending(device) _dec_ap_pending(device, __func__, __LINE__) static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line) { if (atomic_dec_and_test(&device->ap_pending_cnt)) @@ -1878,7 +2054,7 @@ static inline void inc_rs_pending(struct drbd_device *device) atomic_inc(&device->rs_pending_cnt); } -#define dec_rs_pending(device) _dec_rs_pending(device, __FUNCTION__, __LINE__) +#define dec_rs_pending(device) _dec_rs_pending(device, __func__, __LINE__) static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line) { atomic_dec(&device->rs_pending_cnt); @@ -1899,20 +2075,29 @@ static inline void inc_unacked(struct drbd_device *device) atomic_inc(&device->unacked_cnt); } -#define dec_unacked(device) _dec_unacked(device, __FUNCTION__, __LINE__) +#define dec_unacked(device) _dec_unacked(device, __func__, __LINE__) static inline void _dec_unacked(struct drbd_device *device, const char *func, int line) { atomic_dec(&device->unacked_cnt); ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); } -#define sub_unacked(device, n) _sub_unacked(device, n, __FUNCTION__, __LINE__) +#define sub_unacked(device, n) _sub_unacked(device, n, __func__, __LINE__) static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line) { atomic_sub(n, &device->unacked_cnt); ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); } +static inline bool is_sync_state(enum drbd_conns connection_state) +{ + return + (connection_state == C_SYNC_SOURCE + || connection_state == C_SYNC_TARGET + || connection_state == C_PAUSED_SYNC_S + || connection_state == C_PAUSED_SYNC_T); +} + /** * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev * @M: DRBD device. @@ -1924,6 +2109,11 @@ static inline void _sub_unacked(struct drbd_device *device, int n, const char *f static inline void put_ldev(struct drbd_device *device) { + enum drbd_disk_state ds = device->state.disk; + /* We must check the state *before* the atomic_dec becomes visible, + * or we have a theoretical race where someone hitting zero, + * while state still D_FAILED, will then see D_DISKLESS in the + * condition below and calling into destroy, where he must not, yet. */ int i = atomic_dec_return(&device->local_cnt); /* This may be called from some endio handler, @@ -1932,15 +2122,13 @@ static inline void put_ldev(struct drbd_device *device) __release(local); D_ASSERT(device, i >= 0); if (i == 0) { - if (device->state.disk == D_DISKLESS) + if (ds == D_DISKLESS) /* even internal references gone, safe to destroy */ - drbd_ldev_destroy(device); - if (device->state.disk == D_FAILED) { + drbd_device_post_work(device, DESTROY_DISK); + if (ds == D_FAILED) /* all application IO references gone. */ - if (!test_and_set_bit(GO_DISKLESS, &device->flags)) - drbd_queue_work(&first_peer_device(device)->connection->sender_work, - &device->go_diskless); - } + if (!test_and_set_bit(GOING_DISKLESS, &device->flags)) + drbd_device_post_work(device, GO_DISKLESS); wake_up(&device->misc_wait); } } @@ -1964,54 +2152,6 @@ static inline int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_ extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins); #endif -/* you must have an "get_ldev" reference */ -static inline void drbd_get_syncer_progress(struct drbd_device *device, - unsigned long *bits_left, unsigned int *per_mil_done) -{ - /* this is to break it at compile time when we change that, in case we - * want to support more than (1<<32) bits on a 32bit arch. */ - typecheck(unsigned long, device->rs_total); - - /* note: both rs_total and rs_left are in bits, i.e. in - * units of BM_BLOCK_SIZE. - * for the percentage, we don't care. */ - - if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) - *bits_left = device->ov_left; - else - *bits_left = drbd_bm_total_weight(device) - device->rs_failed; - /* >> 10 to prevent overflow, - * +1 to prevent division by zero */ - if (*bits_left > device->rs_total) { - /* doh. maybe a logic bug somewhere. - * may also be just a race condition - * between this and a disconnect during sync. - * for now, just prevent in-kernel buffer overflow. - */ - smp_rmb(); - drbd_warn(device, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n", - drbd_conn_str(device->state.conn), - *bits_left, device->rs_total, device->rs_failed); - *per_mil_done = 0; - } else { - /* Make sure the division happens in long context. - * We allow up to one petabyte storage right now, - * at a granularity of 4k per bit that is 2**38 bits. - * After shift right and multiplication by 1000, - * this should still fit easily into a 32bit long, - * so we don't need a 64bit division on 32bit arch. - * Note: currently we don't support such large bitmaps on 32bit - * arch anyways, but no harm done to be prepared for it here. - */ - unsigned int shift = device->rs_total > UINT_MAX ? 16 : 10; - unsigned long left = *bits_left >> shift; - unsigned long total = 1UL + (device->rs_total >> shift); - unsigned long tmp = 1000UL - left * 1000UL/total; - *per_mil_done = tmp; - } -} - - /* this throttles on-the-fly application requests * according to max_buffers settings; * maybe re-implement using semaphores? */ @@ -2201,25 +2341,6 @@ static inline int drbd_queue_order_type(struct drbd_device *device) return QUEUE_ORDERED_NONE; } -static inline void drbd_md_flush(struct drbd_device *device) -{ - int r; - - if (device->ldev == NULL) { - drbd_warn(device, "device->ldev == NULL in drbd_md_flush\n"); - return; - } - - if (test_bit(MD_NO_FUA, &device->flags)) - return; - - r = blkdev_issue_flush(device->ldev->md_bdev, GFP_NOIO, NULL); - if (r) { - set_bit(MD_NO_FUA, &device->flags); - drbd_err(device, "meta data flush failed with status %d, disabling md-flushes\n", r); - } -} - static inline struct drbd_connection *first_connection(struct drbd_resource *resource) { return list_first_entry_or_null(&resource->connections, diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h index f38fcb0..f210543 100644 --- a/drivers/block/drbd/drbd_interval.h +++ b/drivers/block/drbd/drbd_interval.h @@ -10,7 +10,9 @@ struct drbd_interval { unsigned int size; /* size in bytes */ sector_t end; /* highest interval end in subtree */ int local:1 /* local or remote request? */; - int waiting:1; + int waiting:1; /* someone is waiting for this to complete */ + int completed:1; /* this has been completed already; + * ignore for conflict detection */ }; static inline void drbd_clear_interval(struct drbd_interval *i) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 960645c..9b465bb 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -26,7 +26,10 @@ */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> +#include <linux/jiffies.h> #include <linux/drbd.h> #include <asm/uaccess.h> #include <asm/types.h> @@ -54,16 +57,14 @@ #include "drbd_int.h" #include "drbd_protocol.h" #include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */ - #include "drbd_vli.h" +#include "drbd_debugfs.h" static DEFINE_MUTEX(drbd_main_mutex); static int drbd_open(struct block_device *bdev, fmode_t mode); static void drbd_release(struct gendisk *gd, fmode_t mode); -static int w_md_sync(struct drbd_work *w, int unused); static void md_sync_timer_fn(unsigned long data); static int w_bitmap_io(struct drbd_work *w, int unused); -static int w_go_diskless(struct drbd_work *w, int unused); MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " "Lars Ellenberg <lars@linbit.com>"); @@ -264,7 +265,7 @@ bail: /** * _tl_restart() - Walks the transfer log, and applies an action to all requests - * @device: DRBD device. + * @connection: DRBD connection to operate on. * @what: The action/event to perform with all request objects * * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, @@ -662,6 +663,11 @@ static int __send_command(struct drbd_connection *connection, int vnr, msg_flags); if (data && !err) err = drbd_send_all(connection, sock->socket, data, size, 0); + /* DRBD protocol "pings" are latency critical. + * This is supposed to trigger tcp_push_pending_frames() */ + if (!err && (cmd == P_PING || cmd == P_PING_ACK)) + drbd_tcp_nodelay(sock->socket); + return err; } @@ -1636,7 +1642,10 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * if (peer_device->connection->agreed_pro_version >= 100) { if (req->rq_state & RQ_EXP_RECEIVE_ACK) dp_flags |= DP_SEND_RECEIVE_ACK; - if (req->rq_state & RQ_EXP_WRITE_ACK) + /* During resync, request an explicit write ack, + * even in protocol != C */ + if (req->rq_state & RQ_EXP_WRITE_ACK + || (dp_flags & DP_MAY_SET_IN_SYNC)) dp_flags |= DP_SEND_WRITE_ACK; } p->dp_flags = cpu_to_be32(dp_flags); @@ -1900,6 +1909,7 @@ void drbd_init_set_defaults(struct drbd_device *device) drbd_set_defaults(device); atomic_set(&device->ap_bio_cnt, 0); + atomic_set(&device->ap_actlog_cnt, 0); atomic_set(&device->ap_pending_cnt, 0); atomic_set(&device->rs_pending_cnt, 0); atomic_set(&device->unacked_cnt, 0); @@ -1908,7 +1918,7 @@ void drbd_init_set_defaults(struct drbd_device *device) atomic_set(&device->rs_sect_in, 0); atomic_set(&device->rs_sect_ev, 0); atomic_set(&device->ap_in_flight, 0); - atomic_set(&device->md_io_in_use, 0); + atomic_set(&device->md_io.in_use, 0); mutex_init(&device->own_state_mutex); device->state_mutex = &device->own_state_mutex; @@ -1924,17 +1934,15 @@ void drbd_init_set_defaults(struct drbd_device *device) INIT_LIST_HEAD(&device->resync_reads); INIT_LIST_HEAD(&device->resync_work.list); INIT_LIST_HEAD(&device->unplug_work.list); - INIT_LIST_HEAD(&device->go_diskless.list); - INIT_LIST_HEAD(&device->md_sync_work.list); - INIT_LIST_HEAD(&device->start_resync_work.list); INIT_LIST_HEAD(&device->bm_io_work.w.list); + INIT_LIST_HEAD(&device->pending_master_completion[0]); + INIT_LIST_HEAD(&device->pending_master_completion[1]); + INIT_LIST_HEAD(&device->pending_completion[0]); + INIT_LIST_HEAD(&device->pending_completion[1]); device->resync_work.cb = w_resync_timer; device->unplug_work.cb = w_send_write_hint; - device->go_diskless.cb = w_go_diskless; - device->md_sync_work.cb = w_md_sync; device->bm_io_work.w.cb = w_bitmap_io; - device->start_resync_work.cb = w_start_resync; init_timer(&device->resync_timer); init_timer(&device->md_sync_timer); @@ -1992,7 +2000,7 @@ void drbd_device_cleanup(struct drbd_device *device) drbd_bm_cleanup(device); } - drbd_free_bc(device->ldev); + drbd_free_ldev(device->ldev); device->ldev = NULL; clear_bit(AL_SUSPENDED, &device->flags); @@ -2006,7 +2014,6 @@ void drbd_device_cleanup(struct drbd_device *device) D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q)); D_ASSERT(device, list_empty(&device->resync_work.list)); D_ASSERT(device, list_empty(&device->unplug_work.list)); - D_ASSERT(device, list_empty(&device->go_diskless.list)); drbd_set_defaults(device); } @@ -2129,20 +2136,6 @@ Enomem: return -ENOMEM; } -static int drbd_notify_sys(struct notifier_block *this, unsigned long code, - void *unused) -{ - /* just so we have it. you never know what interesting things we - * might want to do here some day... - */ - - return NOTIFY_DONE; -} - -static struct notifier_block drbd_notifier = { - .notifier_call = drbd_notify_sys, -}; - static void drbd_release_all_peer_reqs(struct drbd_device *device) { int rr; @@ -2173,7 +2166,7 @@ void drbd_destroy_device(struct kref *kref) { struct drbd_device *device = container_of(kref, struct drbd_device, kref); struct drbd_resource *resource = device->resource; - struct drbd_connection *connection; + struct drbd_peer_device *peer_device, *tmp_peer_device; del_timer_sync(&device->request_timer); @@ -2187,7 +2180,7 @@ void drbd_destroy_device(struct kref *kref) if (device->this_bdev) bdput(device->this_bdev); - drbd_free_bc(device->ldev); + drbd_free_ldev(device->ldev); device->ldev = NULL; drbd_release_all_peer_reqs(device); @@ -2200,15 +2193,20 @@ void drbd_destroy_device(struct kref *kref) if (device->bitmap) /* should no longer be there. */ drbd_bm_cleanup(device); - __free_page(device->md_io_page); + __free_page(device->md_io.page); put_disk(device->vdisk); blk_cleanup_queue(device->rq_queue); kfree(device->rs_plan_s); - kfree(first_peer_device(device)); - kfree(device); - for_each_connection(connection, resource) - kref_put(&connection->kref, drbd_destroy_connection); + /* not for_each_connection(connection, resource): + * those may have been cleaned up and disassociated already. + */ + for_each_peer_device_safe(peer_device, tmp_peer_device, device) { + kref_put(&peer_device->connection->kref, drbd_destroy_connection); + kfree(peer_device); + } + memset(device, 0xfd, sizeof(*device)); + kfree(device); kref_put(&resource->kref, drbd_destroy_resource); } @@ -2236,7 +2234,7 @@ static void do_retry(struct work_struct *ws) list_for_each_entry_safe(req, tmp, &writes, tl_requests) { struct drbd_device *device = req->device; struct bio *bio = req->master_bio; - unsigned long start_time = req->start_time; + unsigned long start_jif = req->start_jif; bool expected; expected = @@ -2271,10 +2269,12 @@ static void do_retry(struct work_struct *ws) /* We are not just doing generic_make_request(), * as we want to keep the start_time information. */ inc_ap_bio(device); - __drbd_make_request(device, bio, start_time); + __drbd_make_request(device, bio, start_jif); } } +/* called via drbd_req_put_completion_ref(), + * holds resource->req_lock */ void drbd_restart_request(struct drbd_request *req) { unsigned long flags; @@ -2298,6 +2298,7 @@ void drbd_destroy_resource(struct kref *kref) idr_destroy(&resource->devices); free_cpumask_var(resource->cpu_mask); kfree(resource->name); + memset(resource, 0xf2, sizeof(*resource)); kfree(resource); } @@ -2307,8 +2308,10 @@ void drbd_free_resource(struct drbd_resource *resource) for_each_connection_safe(connection, tmp, resource) { list_del(&connection->connections); + drbd_debugfs_connection_cleanup(connection); kref_put(&connection->kref, drbd_destroy_connection); } + drbd_debugfs_resource_cleanup(resource); kref_put(&resource->kref, drbd_destroy_resource); } @@ -2318,8 +2321,6 @@ static void drbd_cleanup(void) struct drbd_device *device; struct drbd_resource *resource, *tmp; - unregister_reboot_notifier(&drbd_notifier); - /* first remove proc, * drbdsetup uses it's presence to detect * whether DRBD is loaded. @@ -2335,6 +2336,7 @@ static void drbd_cleanup(void) destroy_workqueue(retry.wq); drbd_genl_unregister(); + drbd_debugfs_cleanup(); idr_for_each_entry(&drbd_devices, device, i) drbd_delete_device(device); @@ -2350,7 +2352,7 @@ static void drbd_cleanup(void) idr_destroy(&drbd_devices); - printk(KERN_INFO "drbd: module cleanup done.\n"); + pr_info("module cleanup done.\n"); } /** @@ -2539,6 +2541,20 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) { err = bitmap_parse(res_opts->cpu_mask, DRBD_CPU_MASK_SIZE, cpumask_bits(new_cpu_mask), nr_cpu_ids); + if (err == -EOVERFLOW) { + /* So what. mask it out. */ + cpumask_var_t tmp_cpu_mask; + if (zalloc_cpumask_var(&tmp_cpu_mask, GFP_KERNEL)) { + cpumask_setall(tmp_cpu_mask); + cpumask_and(new_cpu_mask, new_cpu_mask, tmp_cpu_mask); + drbd_warn(resource, "Overflow in bitmap_parse(%.12s%s), truncating to %u bits\n", + res_opts->cpu_mask, + strlen(res_opts->cpu_mask) > 12 ? "..." : "", + nr_cpu_ids); + free_cpumask_var(tmp_cpu_mask); + err = 0; + } + } if (err) { drbd_warn(resource, "bitmap_parse() failed with %d\n", err); /* retcode = ERR_CPU_MASK_PARSE; */ @@ -2579,10 +2595,12 @@ struct drbd_resource *drbd_create_resource(const char *name) kref_init(&resource->kref); idr_init(&resource->devices); INIT_LIST_HEAD(&resource->connections); + resource->write_ordering = WO_bdev_flush; list_add_tail_rcu(&resource->resources, &drbd_resources); mutex_init(&resource->conf_update); mutex_init(&resource->adm_mutex); spin_lock_init(&resource->req_lock); + drbd_debugfs_resource_add(resource); return resource; fail_free_name: @@ -2593,7 +2611,7 @@ fail: return NULL; } -/* caller must be under genl_lock() */ +/* caller must be under adm_mutex */ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts) { struct drbd_resource *resource; @@ -2617,7 +2635,6 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts) INIT_LIST_HEAD(&connection->current_epoch->list); connection->epochs = 1; spin_lock_init(&connection->epoch_lock); - connection->write_ordering = WO_bdev_flush; connection->send.seen_any_write_yet = false; connection->send.current_epoch_nr = 0; @@ -2652,6 +2669,7 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts) kref_get(&resource->kref); list_add_tail_rcu(&connection->connections, &resource->connections); + drbd_debugfs_connection_add(connection); return connection; fail_resource: @@ -2680,6 +2698,7 @@ void drbd_destroy_connection(struct kref *kref) drbd_free_socket(&connection->data); kfree(connection->int_dig_in); kfree(connection->int_dig_vv); + memset(connection, 0xfc, sizeof(*connection)); kfree(connection); kref_put(&resource->kref, drbd_destroy_resource); } @@ -2694,7 +2713,6 @@ static int init_submitter(struct drbd_device *device) return -ENOMEM; INIT_WORK(&device->submit.worker, do_submit); - spin_lock_init(&device->submit.lock); INIT_LIST_HEAD(&device->submit.writes); return 0; } @@ -2764,8 +2782,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig blk_queue_merge_bvec(q, drbd_merge_bvec); q->queue_lock = &resource->req_lock; - device->md_io_page = alloc_page(GFP_KERNEL); - if (!device->md_io_page) + device->md_io.page = alloc_page(GFP_KERNEL); + if (!device->md_io.page) goto out_no_io_page; if (drbd_bm_init(device)) @@ -2794,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig kref_get(&device->kref); INIT_LIST_HEAD(&device->peer_devices); + INIT_LIST_HEAD(&device->pending_bitmap_io); for_each_connection(connection, resource) { peer_device = kzalloc(sizeof(struct drbd_peer_device), GFP_KERNEL); if (!peer_device) @@ -2829,7 +2848,10 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig for_each_peer_device(peer_device, device) drbd_connected(peer_device); } - + /* move to create_peer_device() */ + for_each_peer_device(peer_device, device) + drbd_debugfs_peer_device_add(peer_device); + drbd_debugfs_device_add(device); return NO_ERROR; out_idr_remove_vol: @@ -2853,7 +2875,7 @@ out_idr_remove_minor: out_no_minor_idr: drbd_bm_cleanup(device); out_no_bitmap: - __free_page(device->md_io_page); + __free_page(device->md_io.page); out_no_io_page: put_disk(disk); out_no_disk: @@ -2868,8 +2890,13 @@ void drbd_delete_device(struct drbd_device *device) { struct drbd_resource *resource = device->resource; struct drbd_connection *connection; + struct drbd_peer_device *peer_device; int refs = 3; + /* move to free_peer_device() */ + for_each_peer_device(peer_device, device) + drbd_debugfs_peer_device_cleanup(peer_device); + drbd_debugfs_device_cleanup(device); for_each_connection(connection, resource) { idr_remove(&connection->peer_devices, device->vnr); refs++; @@ -2881,13 +2908,12 @@ void drbd_delete_device(struct drbd_device *device) kref_sub(&device->kref, refs, drbd_destroy_device); } -int __init drbd_init(void) +static int __init drbd_init(void) { int err; if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { - printk(KERN_ERR - "drbd: invalid minor_count (%d)\n", minor_count); + pr_err("invalid minor_count (%d)\n", minor_count); #ifdef MODULE return -EINVAL; #else @@ -2897,14 +2923,11 @@ int __init drbd_init(void) err = register_blkdev(DRBD_MAJOR, "drbd"); if (err) { - printk(KERN_ERR - "drbd: unable to register block device major %d\n", + pr_err("unable to register block device major %d\n", DRBD_MAJOR); return err; } - register_reboot_notifier(&drbd_notifier); - /* * allocate all necessary structs */ @@ -2918,7 +2941,7 @@ int __init drbd_init(void) err = drbd_genl_register(); if (err) { - printk(KERN_ERR "drbd: unable to register generic netlink family\n"); + pr_err("unable to register generic netlink family\n"); goto fail; } @@ -2929,38 +2952,39 @@ int __init drbd_init(void) err = -ENOMEM; drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); if (!drbd_proc) { - printk(KERN_ERR "drbd: unable to register proc file\n"); + pr_err("unable to register proc file\n"); goto fail; } retry.wq = create_singlethread_workqueue("drbd-reissue"); if (!retry.wq) { - printk(KERN_ERR "drbd: unable to create retry workqueue\n"); + pr_err("unable to create retry workqueue\n"); goto fail; } INIT_WORK(&retry.worker, do_retry); spin_lock_init(&retry.lock); INIT_LIST_HEAD(&retry.writes); - printk(KERN_INFO "drbd: initialized. " + if (drbd_debugfs_init()) + pr_notice("failed to initialize debugfs -- will not be available\n"); + + pr_info("initialized. " "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); - printk(KERN_INFO "drbd: %s\n", drbd_buildtag()); - printk(KERN_INFO "drbd: registered as block device major %d\n", - DRBD_MAJOR); - + pr_info("%s\n", drbd_buildtag()); + pr_info("registered as block device major %d\n", DRBD_MAJOR); return 0; /* Success! */ fail: drbd_cleanup(); if (err == -ENOMEM) - printk(KERN_ERR "drbd: ran out of memory\n"); + pr_err("ran out of memory\n"); else - printk(KERN_ERR "drbd: initialization failure\n"); + pr_err("initialization failure\n"); return err; } -void drbd_free_bc(struct drbd_backing_dev *ldev) +void drbd_free_ldev(struct drbd_backing_dev *ldev) { if (ldev == NULL) return; @@ -2972,24 +2996,29 @@ void drbd_free_bc(struct drbd_backing_dev *ldev) kfree(ldev); } -void drbd_free_sock(struct drbd_connection *connection) +static void drbd_free_one_sock(struct drbd_socket *ds) { - if (connection->data.socket) { - mutex_lock(&connection->data.mutex); - kernel_sock_shutdown(connection->data.socket, SHUT_RDWR); - sock_release(connection->data.socket); - connection->data.socket = NULL; - mutex_unlock(&connection->data.mutex); - } - if (connection->meta.socket) { - mutex_lock(&connection->meta.mutex); - kernel_sock_shutdown(connection->meta.socket, SHUT_RDWR); - sock_release(connection->meta.socket); - connection->meta.socket = NULL; - mutex_unlock(&connection->meta.mutex); + struct socket *s; + mutex_lock(&ds->mutex); + s = ds->socket; + ds->socket = NULL; + mutex_unlock(&ds->mutex); + if (s) { + /* so debugfs does not need to mutex_lock() */ + synchronize_rcu(); + kernel_sock_shutdown(s, SHUT_RDWR); + sock_release(s); } } +void drbd_free_sock(struct drbd_connection *connection) +{ + if (connection->data.socket) + drbd_free_one_sock(&connection->data); + if (connection->meta.socket) + drbd_free_one_sock(&connection->meta); +} + /* meta data management */ void conn_md_sync(struct drbd_connection *connection) @@ -3093,7 +3122,7 @@ void drbd_md_sync(struct drbd_device *device) if (!get_ldev_if_state(device, D_FAILED)) return; - buffer = drbd_md_get_buffer(device); + buffer = drbd_md_get_buffer(device, __func__); if (!buffer) goto out; @@ -3253,7 +3282,7 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev) if (device->state.disk != D_DISKLESS) return ERR_DISK_CONFIGURED; - buffer = drbd_md_get_buffer(device); + buffer = drbd_md_get_buffer(device, __func__); if (!buffer) return ERR_NOMEM; @@ -3466,23 +3495,19 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local) * * Sets all bits in the bitmap and writes the whole bitmap to stable storage. */ -int drbd_bmio_set_n_write(struct drbd_device *device) +int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local) { int rv = -EIO; - if (get_ldev_if_state(device, D_ATTACHING)) { - drbd_md_set_flag(device, MDF_FULL_SYNC); - drbd_md_sync(device); - drbd_bm_set_all(device); - - rv = drbd_bm_write(device); + drbd_md_set_flag(device, MDF_FULL_SYNC); + drbd_md_sync(device); + drbd_bm_set_all(device); - if (!rv) { - drbd_md_clear_flag(device, MDF_FULL_SYNC); - drbd_md_sync(device); - } + rv = drbd_bm_write(device); - put_ldev(device); + if (!rv) { + drbd_md_clear_flag(device, MDF_FULL_SYNC); + drbd_md_sync(device); } return rv; @@ -3494,18 +3519,11 @@ int drbd_bmio_set_n_write(struct drbd_device *device) * * Clears all bits in the bitmap and writes the whole bitmap to stable storage. */ -int drbd_bmio_clear_n_write(struct drbd_device *device) +int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local) { - int rv = -EIO; - drbd_resume_al(device); - if (get_ldev_if_state(device, D_ATTACHING)) { - drbd_bm_clear_all(device); - rv = drbd_bm_write(device); - put_ldev(device); - } - - return rv; + drbd_bm_clear_all(device); + return drbd_bm_write(device); } static int w_bitmap_io(struct drbd_work *w, int unused) @@ -3537,61 +3555,6 @@ static int w_bitmap_io(struct drbd_work *w, int unused) return 0; } -void drbd_ldev_destroy(struct drbd_device *device) -{ - lc_destroy(device->resync); - device->resync = NULL; - lc_destroy(device->act_log); - device->act_log = NULL; - __no_warn(local, - drbd_free_bc(device->ldev); - device->ldev = NULL;); - - clear_bit(GO_DISKLESS, &device->flags); -} - -static int w_go_diskless(struct drbd_work *w, int unused) -{ - struct drbd_device *device = - container_of(w, struct drbd_device, go_diskless); - - D_ASSERT(device, device->state.disk == D_FAILED); - /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will - * inc/dec it frequently. Once we are D_DISKLESS, no one will touch - * the protected members anymore, though, so once put_ldev reaches zero - * again, it will be safe to free them. */ - - /* Try to write changed bitmap pages, read errors may have just - * set some bits outside the area covered by the activity log. - * - * If we have an IO error during the bitmap writeout, - * we will want a full sync next time, just in case. - * (Do we want a specific meta data flag for this?) - * - * If that does not make it to stable storage either, - * we cannot do anything about that anymore. - * - * We still need to check if both bitmap and ldev are present, we may - * end up here after a failed attach, before ldev was even assigned. - */ - if (device->bitmap && device->ldev) { - /* An interrupted resync or similar is allowed to recounts bits - * while we detach. - * Any modifications would not be expected anymore, though. - */ - if (drbd_bitmap_io_from_worker(device, drbd_bm_write, - "detach", BM_LOCKED_TEST_ALLOWED)) { - if (test_bit(WAS_READ_ERROR, &device->flags)) { - drbd_md_set_flag(device, MDF_FULL_SYNC); - drbd_md_sync(device); - } - } - } - - drbd_force_state(device, NS(disk, D_DISKLESS)); - return 0; -} - /** * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap * @device: DRBD device. @@ -3603,6 +3566,9 @@ static int w_go_diskless(struct drbd_work *w, int unused) * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be * called from worker context. It MUST NOT be used while a previous such * work is still pending! + * + * Its worker function encloses the call of io_fn() by get_ldev() and + * put_ldev(). */ void drbd_queue_bitmap_io(struct drbd_device *device, int (*io_fn)(struct drbd_device *), @@ -3685,25 +3651,7 @@ int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag) static void md_sync_timer_fn(unsigned long data) { struct drbd_device *device = (struct drbd_device *) data; - - /* must not double-queue! */ - if (list_empty(&device->md_sync_work.list)) - drbd_queue_work_front(&first_peer_device(device)->connection->sender_work, - &device->md_sync_work); -} - -static int w_md_sync(struct drbd_work *w, int unused) -{ - struct drbd_device *device = - container_of(w, struct drbd_device, md_sync_work); - - drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); -#ifdef DEBUG - drbd_warn(device, "last md_mark_dirty: %s:%u\n", - device->last_md_mark_dirty.func, device->last_md_mark_dirty.line); -#endif - drbd_md_sync(device); - return 0; + drbd_device_post_work(device, MD_SYNC); } const char *cmdname(enum drbd_packet cmd) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 3f2e167..1cd47df 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -23,6 +23,8 @@ */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/drbd.h> #include <linux/in.h> @@ -85,7 +87,7 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) { genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb)))); if (genlmsg_reply(skb, info)) - printk(KERN_ERR "drbd: error sending genl reply\n"); + pr_err("error sending genl reply\n"); } /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only @@ -558,8 +560,10 @@ void conn_try_outdate_peer_async(struct drbd_connection *connection) } enum drbd_state_rv -drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) +drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force) { + struct drbd_peer_device *const peer_device = first_peer_device(device); + struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; const int max_tries = 4; enum drbd_state_rv rv = SS_UNKNOWN_ERROR; struct net_conf *nc; @@ -607,7 +611,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) device->state.disk == D_CONSISTENT && mask.pdsk == 0) { D_ASSERT(device, device->state.pdsk == D_UNKNOWN); - if (conn_try_outdate_peer(first_peer_device(device)->connection)) { + if (conn_try_outdate_peer(connection)) { val.disk = D_UP_TO_DATE; mask.disk = D_MASK; } @@ -617,7 +621,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) if (rv == SS_NOTHING_TO_DO) goto out; if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { - if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) { + if (!conn_try_outdate_peer(connection) && force) { drbd_warn(device, "Forced into split brain situation!\n"); mask.pdsk = D_MASK; val.pdsk = D_OUTDATED; @@ -630,7 +634,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) retry at most once more in this case. */ int timeo; rcu_read_lock(); - nc = rcu_dereference(first_peer_device(device)->connection->net_conf); + nc = rcu_dereference(connection->net_conf); timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; rcu_read_unlock(); schedule_timeout_interruptible(timeo); @@ -659,19 +663,17 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) /* FIXME also wait for all pending P_BARRIER_ACK? */ if (new_role == R_SECONDARY) { - set_disk_ro(device->vdisk, true); if (get_ldev(device)) { device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; put_ldev(device); } } else { - /* Called from drbd_adm_set_role only. - * We are still holding the conf_update mutex. */ - nc = first_peer_device(device)->connection->net_conf; + mutex_lock(&device->resource->conf_update); + nc = connection->net_conf; if (nc) nc->discard_my_data = 0; /* without copy; single bit op is atomic */ + mutex_unlock(&device->resource->conf_update); - set_disk_ro(device->vdisk, false); if (get_ldev(device)) { if (((device->state.conn < C_CONNECTED || device->state.pdsk <= D_FAILED) @@ -689,12 +691,12 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) if (device->state.conn >= C_WF_REPORT_PARAMS) { /* if this was forced, we should consider sync */ if (forced) - drbd_send_uuids(first_peer_device(device)); - drbd_send_current_state(first_peer_device(device)); + drbd_send_uuids(peer_device); + drbd_send_current_state(peer_device); } drbd_md_sync(device); - + set_disk_ro(device->vdisk, new_role == R_SECONDARY); kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); out: mutex_unlock(device->state_mutex); @@ -891,7 +893,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct * still lock the act_log to not trigger ASSERTs there. */ drbd_suspend_io(device); - buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */ + buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */ if (!buffer) { drbd_resume_io(device); return DS_ERROR; @@ -971,6 +973,10 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct if (la_size_changed || md_moved || rs) { u32 prev_flags; + /* We do some synchronous IO below, which may take some time. + * Clear the timer, to avoid scary "timer expired!" messages, + * "Superblock" is written out at least twice below, anyways. */ + del_timer(&device->md_sync_timer); drbd_al_shrink(device); /* All extents inactive. */ prev_flags = md->flags; @@ -1116,15 +1122,16 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc) return 0; } -static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size) +static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev, + unsigned int max_bio_size) { struct request_queue * const q = device->rq_queue; unsigned int max_hw_sectors = max_bio_size >> 9; unsigned int max_segments = 0; struct request_queue *b = NULL; - if (get_ldev_if_state(device, D_ATTACHING)) { - b = device->ldev->backing_bdev->bd_disk->queue; + if (bdev) { + b = bdev->backing_bdev->bd_disk->queue; max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); rcu_read_lock(); @@ -1169,11 +1176,10 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_ b->backing_dev_info.ra_pages); q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; } - put_ldev(device); } } -void drbd_reconsider_max_bio_size(struct drbd_device *device) +void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev) { unsigned int now, new, local, peer; @@ -1181,10 +1187,9 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device) local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */ peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */ - if (get_ldev_if_state(device, D_ATTACHING)) { - local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9; + if (bdev) { + local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9; device->local_max_bio_size = local; - put_ldev(device); } local = min(local, DRBD_MAX_BIO_SIZE); @@ -1217,7 +1222,7 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device) if (new != now) drbd_info(device, "max BIO size = %u\n", new); - drbd_setup_queue_param(device, new); + drbd_setup_queue_param(device, bdev, new); } /* Starts the worker thread */ @@ -1299,6 +1304,13 @@ static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev) return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION; } +static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b) +{ + return a->disk_barrier != b->disk_barrier || + a->disk_flushes != b->disk_flushes || + a->disk_drain != b->disk_drain; +} + int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { struct drbd_config_context adm_ctx; @@ -1405,7 +1417,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) else set_bit(MD_NO_FUA, &device->flags); - drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush); + if (write_ordering_changed(old_disk_conf, new_disk_conf)) + drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush); drbd_md_sync(device); @@ -1440,6 +1453,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) { struct drbd_config_context adm_ctx; struct drbd_device *device; + struct drbd_peer_device *peer_device; + struct drbd_connection *connection; int err; enum drbd_ret_code retcode; enum determine_dev_size dd; @@ -1462,7 +1477,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) device = adm_ctx.device; mutex_lock(&adm_ctx.resource->adm_mutex); - conn_reconfig_start(first_peer_device(device)->connection); + peer_device = first_peer_device(device); + connection = peer_device ? peer_device->connection : NULL; + conn_reconfig_start(connection); /* if you want to reconfigure, please tear down first */ if (device->state.disk > D_DISKLESS) { @@ -1473,7 +1490,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) * drbd_ldev_destroy is done already, we may end up here very fast, * e.g. if someone calls attach from the on-io-error handler, * to realize a "hot spare" feature (not that I'd recommend that) */ - wait_event(device->misc_wait, !atomic_read(&device->local_cnt)); + wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags)); /* make sure there is no leftover from previous force-detach attempts */ clear_bit(FORCE_DETACH, &device->flags); @@ -1529,7 +1546,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) goto fail; rcu_read_lock(); - nc = rcu_dereference(first_peer_device(device)->connection->net_conf); + nc = rcu_dereference(connection->net_conf); if (nc) { if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) { rcu_read_unlock(); @@ -1649,7 +1666,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) */ wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device)); /* and for any other previously queued work */ - drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work); + drbd_flush_workqueue(&connection->sender_work); rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE); retcode = rv; /* FIXME: Type mismatch. */ @@ -1710,7 +1727,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) new_disk_conf = NULL; new_plan = NULL; - drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush); + drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush); if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY)) set_bit(CRASHED_PRIMARY, &device->flags); @@ -1726,7 +1743,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) device->read_cnt = 0; device->writ_cnt = 0; - drbd_reconsider_max_bio_size(device); + drbd_reconsider_max_bio_size(device, device->ldev); /* If I am currently not R_PRIMARY, * but meta data primary indicator is set, @@ -1845,7 +1862,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); put_ldev(device); - conn_reconfig_done(first_peer_device(device)->connection); + conn_reconfig_done(connection); mutex_unlock(&adm_ctx.resource->adm_mutex); drbd_adm_finish(&adm_ctx, info, retcode); return 0; @@ -1856,7 +1873,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) drbd_force_state(device, NS(disk, D_DISKLESS)); drbd_md_sync(device); fail: - conn_reconfig_done(first_peer_device(device)->connection); + conn_reconfig_done(connection); if (nbc) { if (nbc->backing_bdev) blkdev_put(nbc->backing_bdev, @@ -1888,7 +1905,7 @@ static int adm_detach(struct drbd_device *device, int force) } drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ - drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */ + drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */ retcode = drbd_request_state(device, NS(disk, D_FAILED)); drbd_md_put_buffer(device); /* D_FAILED will transition to DISKLESS. */ @@ -2654,8 +2671,13 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mutex_lock(&adm_ctx.resource->adm_mutex); device = adm_ctx.device; + if (!get_ldev(device)) { + retcode = ERR_NO_DISK; + goto out; + } + + mutex_lock(&adm_ctx.resource->adm_mutex); /* If there is still bitmap IO pending, probably because of a previous * resync just being finished, wait for it before requesting a new resync. @@ -2679,6 +2701,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T)); drbd_resume_io(device); mutex_unlock(&adm_ctx.resource->adm_mutex); + put_ldev(device); out: drbd_adm_finish(&adm_ctx, info, retcode); return 0; @@ -2704,7 +2727,7 @@ out: return 0; } -static int drbd_bmio_set_susp_al(struct drbd_device *device) +static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local) { int rv; @@ -2725,8 +2748,13 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) if (retcode != NO_ERROR) goto out; - mutex_lock(&adm_ctx.resource->adm_mutex); device = adm_ctx.device; + if (!get_ldev(device)) { + retcode = ERR_NO_DISK; + goto out; + } + + mutex_lock(&adm_ctx.resource->adm_mutex); /* If there is still bitmap IO pending, probably because of a previous * resync just being finished, wait for it before requesting a new resync. @@ -2753,6 +2781,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S)); drbd_resume_io(device); mutex_unlock(&adm_ctx.resource->adm_mutex); + put_ldev(device); out: drbd_adm_finish(&adm_ctx, info, retcode); return 0; @@ -2892,7 +2921,7 @@ static struct drbd_connection *the_only_connection(struct drbd_resource *resourc return list_first_entry(&resource->connections, struct drbd_connection, connections); } -int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, +static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, const struct sib_info *sib) { struct drbd_resource *resource = device->resource; @@ -3622,13 +3651,6 @@ void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib) unsigned seq; int err = -ENOMEM; - if (sib->sib_reason == SIB_SYNC_PROGRESS) { - if (time_after(jiffies, device->rs_last_bcast + HZ)) - device->rs_last_bcast = jiffies; - else - return; - } - seq = atomic_inc_return(&drbd_genl_seq); msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); if (!msg) diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 89736bd..06e6147 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c @@ -60,20 +60,65 @@ static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v) seq_printf(seq, "%ld", v); } +static void drbd_get_syncer_progress(struct drbd_device *device, + union drbd_dev_state state, unsigned long *rs_total, + unsigned long *bits_left, unsigned int *per_mil_done) +{ + /* this is to break it at compile time when we change that, in case we + * want to support more than (1<<32) bits on a 32bit arch. */ + typecheck(unsigned long, device->rs_total); + *rs_total = device->rs_total; + + /* note: both rs_total and rs_left are in bits, i.e. in + * units of BM_BLOCK_SIZE. + * for the percentage, we don't care. */ + + if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T) + *bits_left = device->ov_left; + else + *bits_left = drbd_bm_total_weight(device) - device->rs_failed; + /* >> 10 to prevent overflow, + * +1 to prevent division by zero */ + if (*bits_left > *rs_total) { + /* D'oh. Maybe a logic bug somewhere. More likely just a race + * between state change and reset of rs_total. + */ + *bits_left = *rs_total; + *per_mil_done = *rs_total ? 0 : 1000; + } else { + /* Make sure the division happens in long context. + * We allow up to one petabyte storage right now, + * at a granularity of 4k per bit that is 2**38 bits. + * After shift right and multiplication by 1000, + * this should still fit easily into a 32bit long, + * so we don't need a 64bit division on 32bit arch. + * Note: currently we don't support such large bitmaps on 32bit + * arch anyways, but no harm done to be prepared for it here. + */ + unsigned int shift = *rs_total > UINT_MAX ? 16 : 10; + unsigned long left = *bits_left >> shift; + unsigned long total = 1UL + (*rs_total >> shift); + unsigned long tmp = 1000UL - left * 1000UL/total; + *per_mil_done = tmp; + } +} + + /*lge * progress bars shamelessly adapted from driver/md/md.c * output looks like * [=====>..............] 33.5% (23456/123456) * finish: 2:20:20 speed: 6,345 (6,456) K/sec */ -static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq) +static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq, + union drbd_dev_state state) { - unsigned long db, dt, dbdt, rt, rs_left; + unsigned long db, dt, dbdt, rt, rs_total, rs_left; unsigned int res; int i, x, y; int stalled = 0; - drbd_get_syncer_progress(device, &rs_left, &res); + drbd_get_syncer_progress(device, state, &rs_total, &rs_left, &res); x = res/50; y = 20-x; @@ -85,21 +130,21 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se seq_printf(seq, "."); seq_printf(seq, "] "); - if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) + if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T) seq_printf(seq, "verified:"); else seq_printf(seq, "sync'ed:"); seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); /* if more than a few GB, display in MB */ - if (device->rs_total > (4UL << (30 - BM_BLOCK_SHIFT))) + if (rs_total > (4UL << (30 - BM_BLOCK_SHIFT))) seq_printf(seq, "(%lu/%lu)M", (unsigned long) Bit2KB(rs_left >> 10), - (unsigned long) Bit2KB(device->rs_total >> 10)); + (unsigned long) Bit2KB(rs_total >> 10)); else seq_printf(seq, "(%lu/%lu)K\n\t", (unsigned long) Bit2KB(rs_left), - (unsigned long) Bit2KB(device->rs_total)); + (unsigned long) Bit2KB(rs_total)); /* see drivers/md/md.c * We do not want to overflow, so the order of operands and @@ -150,13 +195,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se dt = (jiffies - device->rs_start - device->rs_paused) / HZ; if (dt == 0) dt = 1; - db = device->rs_total - rs_left; + db = rs_total - rs_left; dbdt = Bit2KB(db/dt); seq_printf_with_thousands_grouping(seq, dbdt); seq_printf(seq, ")"); - if (device->state.conn == C_SYNC_TARGET || - device->state.conn == C_VERIFY_S) { + if (state.conn == C_SYNC_TARGET || + state.conn == C_VERIFY_S) { seq_printf(seq, " want: "); seq_printf_with_thousands_grouping(seq, device->c_sync_rate); } @@ -168,8 +213,8 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se unsigned long bm_bits = drbd_bm_bits(device); unsigned long bit_pos; unsigned long long stop_sector = 0; - if (device->state.conn == C_VERIFY_S || - device->state.conn == C_VERIFY_T) { + if (state.conn == C_VERIFY_S || + state.conn == C_VERIFY_T) { bit_pos = bm_bits - device->ov_left; if (verify_can_do_stop_sector(device)) stop_sector = device->ov_stop_sector; @@ -188,22 +233,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se } } -static void resync_dump_detail(struct seq_file *seq, struct lc_element *e) -{ - struct bm_extent *bme = lc_entry(e, struct bm_extent, lce); - - seq_printf(seq, "%5d %s %s\n", bme->rs_left, - bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------", - bme->flags & BME_LOCKED ? "LOCKED" : "------" - ); -} - static int drbd_seq_show(struct seq_file *seq, void *v) { int i, prev_i = -1; const char *sn; struct drbd_device *device; struct net_conf *nc; + union drbd_dev_state state; char wp; static char write_ordering_chars[] = { @@ -241,11 +277,12 @@ static int drbd_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\n"); prev_i = i; - sn = drbd_conn_str(device->state.conn); + state = device->state; + sn = drbd_conn_str(state.conn); - if (device->state.conn == C_STANDALONE && - device->state.disk == D_DISKLESS && - device->state.role == R_SECONDARY) { + if (state.conn == C_STANDALONE && + state.disk == D_DISKLESS && + state.role == R_SECONDARY) { seq_printf(seq, "%2d: cs:Unconfigured\n", i); } else { /* reset device->congestion_reason */ @@ -258,15 +295,15 @@ static int drbd_seq_show(struct seq_file *seq, void *v) " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c", i, sn, - drbd_role_str(device->state.role), - drbd_role_str(device->state.peer), - drbd_disk_str(device->state.disk), - drbd_disk_str(device->state.pdsk), + drbd_role_str(state.role), + drbd_role_str(state.peer), + drbd_disk_str(state.disk), + drbd_disk_str(state.pdsk), wp, drbd_suspended(device) ? 's' : 'r', - device->state.aftr_isp ? 'a' : '-', - device->state.peer_isp ? 'p' : '-', - device->state.user_isp ? 'u' : '-', + state.aftr_isp ? 'a' : '-', + state.peer_isp ? 'p' : '-', + state.user_isp ? 'u' : '-', device->congestion_reason ?: '-', test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-', device->send_cnt/2, @@ -281,17 +318,17 @@ static int drbd_seq_show(struct seq_file *seq, void *v) atomic_read(&device->unacked_cnt), atomic_read(&device->ap_bio_cnt), first_peer_device(device)->connection->epochs, - write_ordering_chars[first_peer_device(device)->connection->write_ordering] + write_ordering_chars[device->resource->write_ordering] ); seq_printf(seq, " oos:%llu\n", Bit2KB((unsigned long long) drbd_bm_total_weight(device))); } - if (device->state.conn == C_SYNC_SOURCE || - device->state.conn == C_SYNC_TARGET || - device->state.conn == C_VERIFY_S || - device->state.conn == C_VERIFY_T) - drbd_syncer_progress(device, seq); + if (state.conn == C_SYNC_SOURCE || + state.conn == C_SYNC_TARGET || + state.conn == C_VERIFY_S || + state.conn == C_VERIFY_T) + drbd_syncer_progress(device, seq, state); if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) { lc_seq_printf_stats(seq, device->resync); @@ -299,12 +336,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v) put_ldev(device); } - if (proc_details >= 2) { - if (device->resync) { - lc_seq_dump_details(seq, device->resync, "rs_left", - resync_dump_detail); - } - } + if (proc_details >= 2) + seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt)); } rcu_read_unlock(); @@ -316,7 +349,7 @@ static int drbd_proc_open(struct inode *inode, struct file *file) int err; if (try_module_get(THIS_MODULE)) { - err = single_open(file, drbd_seq_show, PDE_DATA(inode)); + err = single_open(file, drbd_seq_show, NULL); if (err) module_put(THIS_MODULE); return err; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5b17ec8..9342b8d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -362,17 +362,14 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto goto fail; } + memset(peer_req, 0, sizeof(*peer_req)); + INIT_LIST_HEAD(&peer_req->w.list); drbd_clear_interval(&peer_req->i); peer_req->i.size = data_size; peer_req->i.sector = sector; - peer_req->i.local = false; - peer_req->i.waiting = false; - - peer_req->epoch = NULL; + peer_req->submit_jif = jiffies; peer_req->peer_device = peer_device; peer_req->pages = page; - atomic_set(&peer_req->pending_bios, 0); - peer_req->flags = 0; /* * The block_id is opaque to the receiver. It is not endianness * converted, and sent back to the sender unchanged. @@ -389,11 +386,16 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, int is_net) { + might_sleep(); if (peer_req->flags & EE_HAS_DIGEST) kfree(peer_req->digest); drbd_free_pages(device, peer_req->pages, is_net); D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); D_ASSERT(device, drbd_interval_empty(&peer_req->i)); + if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) { + peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; + drbd_al_complete_io(device, &peer_req->i); + } mempool_free(peer_req, drbd_ee_mempool); } @@ -791,8 +793,18 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke { unsigned int header_size = drbd_header_size(connection); struct packet_info pi; + struct net_conf *nc; int err; + rcu_read_lock(); + nc = rcu_dereference(connection->net_conf); + if (!nc) { + rcu_read_unlock(); + return -EIO; + } + sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10; + rcu_read_unlock(); + err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0); if (err != header_size) { if (err >= 0) @@ -809,7 +821,7 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke * drbd_socket_okay() - Free the socket if its connection is not okay * @sock: pointer to the pointer to the socket. */ -static int drbd_socket_okay(struct socket **sock) +static bool drbd_socket_okay(struct socket **sock) { int rr; char tb[4]; @@ -827,6 +839,30 @@ static int drbd_socket_okay(struct socket **sock) return false; } } + +static bool connection_established(struct drbd_connection *connection, + struct socket **sock1, + struct socket **sock2) +{ + struct net_conf *nc; + int timeout; + bool ok; + + if (!*sock1 || !*sock2) + return false; + + rcu_read_lock(); + nc = rcu_dereference(connection->net_conf); + timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10; + rcu_read_unlock(); + schedule_timeout_interruptible(timeout); + + ok = drbd_socket_okay(sock1); + ok = drbd_socket_okay(sock2) && ok; + + return ok; +} + /* Gets called if a connection is established, or if a new minor gets created in a connection */ int drbd_connected(struct drbd_peer_device *peer_device) @@ -868,8 +904,8 @@ static int conn_connect(struct drbd_connection *connection) struct drbd_socket sock, msock; struct drbd_peer_device *peer_device; struct net_conf *nc; - int vnr, timeout, h, ok; - bool discard_my_data; + int vnr, timeout, h; + bool discard_my_data, ok; enum drbd_state_rv rv; struct accept_wait_data ad = { .connection = connection, @@ -913,17 +949,8 @@ static int conn_connect(struct drbd_connection *connection) } } - if (sock.socket && msock.socket) { - rcu_read_lock(); - nc = rcu_dereference(connection->net_conf); - timeout = nc->ping_timeo * HZ / 10; - rcu_read_unlock(); - schedule_timeout_interruptible(timeout); - ok = drbd_socket_okay(&sock.socket); - ok = drbd_socket_okay(&msock.socket) && ok; - if (ok) - break; - } + if (connection_established(connection, &sock.socket, &msock.socket)) + break; retry: s = drbd_wait_for_connect(connection, &ad); @@ -969,8 +996,7 @@ randomize: goto out_release_sockets; } - ok = drbd_socket_okay(&sock.socket); - ok = drbd_socket_okay(&msock.socket) && ok; + ok = connection_established(connection, &sock.socket, &msock.socket); } while (!ok); if (ad.s_listen) @@ -1151,7 +1177,7 @@ static void drbd_flush(struct drbd_connection *connection) struct drbd_peer_device *peer_device; int vnr; - if (connection->write_ordering >= WO_bdev_flush) { + if (connection->resource->write_ordering >= WO_bdev_flush) { rcu_read_lock(); idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { struct drbd_device *device = peer_device->device; @@ -1161,14 +1187,22 @@ static void drbd_flush(struct drbd_connection *connection) kref_get(&device->kref); rcu_read_unlock(); + /* Right now, we have only this one synchronous code path + * for flushes between request epochs. + * We may want to make those asynchronous, + * or at least parallelize the flushes to the volume devices. + */ + device->flush_jif = jiffies; + set_bit(FLUSH_PENDING, &device->flags); rv = blkdev_issue_flush(device->ldev->backing_bdev, GFP_NOIO, NULL); + clear_bit(FLUSH_PENDING, &device->flags); if (rv) { drbd_info(device, "local disk flush failed with status %d\n", rv); /* would rather check on EOPNOTSUPP, but that is not reliable. * don't try again for ANY return value != 0 * if (rv == -EOPNOTSUPP) */ - drbd_bump_write_ordering(connection, WO_drain_io); + drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io); } put_ldev(device); kref_put(&device->kref, drbd_destroy_device); @@ -1257,15 +1291,30 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connectio return rv; } +static enum write_ordering_e +max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo) +{ + struct disk_conf *dc; + + dc = rcu_dereference(bdev->disk_conf); + + if (wo == WO_bdev_flush && !dc->disk_flushes) + wo = WO_drain_io; + if (wo == WO_drain_io && !dc->disk_drain) + wo = WO_none; + + return wo; +} + /** * drbd_bump_write_ordering() - Fall back to an other write ordering method * @connection: DRBD connection. * @wo: Write ordering method to try. */ -void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo) +void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev, + enum write_ordering_e wo) { - struct disk_conf *dc; - struct drbd_peer_device *peer_device; + struct drbd_device *device; enum write_ordering_e pwo; int vnr; static char *write_ordering_str[] = { @@ -1274,26 +1323,27 @@ void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ord [WO_bdev_flush] = "flush", }; - pwo = connection->write_ordering; - wo = min(pwo, wo); + pwo = resource->write_ordering; + if (wo != WO_bdev_flush) + wo = min(pwo, wo); rcu_read_lock(); - idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { - struct drbd_device *device = peer_device->device; + idr_for_each_entry(&resource->devices, device, vnr) { + if (get_ldev(device)) { + wo = max_allowed_wo(device->ldev, wo); + if (device->ldev == bdev) + bdev = NULL; + put_ldev(device); + } + } - if (!get_ldev_if_state(device, D_ATTACHING)) - continue; - dc = rcu_dereference(device->ldev->disk_conf); + if (bdev) + wo = max_allowed_wo(bdev, wo); - if (wo == WO_bdev_flush && !dc->disk_flushes) - wo = WO_drain_io; - if (wo == WO_drain_io && !dc->disk_drain) - wo = WO_none; - put_ldev(device); - } rcu_read_unlock(); - connection->write_ordering = wo; - if (pwo != connection->write_ordering || wo == WO_bdev_flush) - drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]); + + resource->write_ordering = wo; + if (pwo != resource->write_ordering || wo == WO_bdev_flush) + drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]); } /** @@ -1330,6 +1380,13 @@ int drbd_submit_peer_request(struct drbd_device *device, /* wait for all pending IO completions, before we start * zeroing things out. */ conn_wait_active_ee_empty(first_peer_device(device)->connection); + /* add it to the active list now, + * so we can find it to present it in debugfs */ + peer_req->submit_jif = jiffies; + peer_req->flags |= EE_SUBMITTED; + spin_lock_irq(&device->resource->req_lock); + list_add_tail(&peer_req->w.list, &device->active_ee); + spin_unlock_irq(&device->resource->req_lock); if (blkdev_issue_zeroout(device->ldev->backing_bdev, sector, ds >> 9, GFP_NOIO)) peer_req->flags |= EE_WAS_ERROR; @@ -1398,6 +1455,9 @@ submit: D_ASSERT(device, page == NULL); atomic_set(&peer_req->pending_bios, n_bios); + /* for debugfs: update timestamp, mark as submitted */ + peer_req->submit_jif = jiffies; + peer_req->flags |= EE_SUBMITTED; do { bio = bios; bios = bios->bi_next; @@ -1471,7 +1531,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf * R_PRIMARY crashes now. * Therefore we must send the barrier_ack after the barrier request was * completed. */ - switch (connection->write_ordering) { + switch (connection->resource->write_ordering) { case WO_none: if (rv == FE_RECYCLED) return 0; @@ -1498,7 +1558,8 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf return 0; default: - drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering); + drbd_err(connection, "Strangeness in connection->write_ordering %d\n", + connection->resource->write_ordering); return -EIO; } @@ -1531,7 +1592,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, struct drbd_peer_request *peer_req; struct page *page; int dgs, ds, err; - int data_size = pi->size; + unsigned int data_size = pi->size; void *dig_in = peer_device->connection->int_dig_in; void *dig_vv = peer_device->connection->int_dig_vv; unsigned long *data; @@ -1578,6 +1639,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, if (!peer_req) return NULL; + peer_req->flags |= EE_WRITE; if (trim) return peer_req; @@ -1734,9 +1796,10 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto * respective _drbd_clear_done_ee */ peer_req->w.cb = e_end_resync_block; + peer_req->submit_jif = jiffies; spin_lock_irq(&device->resource->req_lock); - list_add(&peer_req->w.list, &device->sync_ee); + list_add_tail(&peer_req->w.list, &device->sync_ee); spin_unlock_irq(&device->resource->req_lock); atomic_add(pi->size >> 9, &device->rs_sect_ev); @@ -1889,6 +1952,7 @@ static int e_end_block(struct drbd_work *w, int cancel) } dec_unacked(device); } + /* we delete from the conflict detection hash _after_ we sent out the * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ if (peer_req->flags & EE_IN_INTERVAL_TREE) { @@ -2115,6 +2179,8 @@ static int handle_write_conflicts(struct drbd_device *device, drbd_for_each_overlap(i, &device->write_requests, sector, size) { if (i == &peer_req->i) continue; + if (i->completed) + continue; if (!i->local) { /* @@ -2147,7 +2213,6 @@ static int handle_write_conflicts(struct drbd_device *device, (unsigned long long)sector, size, superseded ? "local" : "remote"); - inc_unacked(device); peer_req->w.cb = superseded ? e_send_superseded : e_send_retry_write; list_add_tail(&peer_req->w.list, &device->done_ee); @@ -2206,6 +2271,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * { struct drbd_peer_device *peer_device; struct drbd_device *device; + struct net_conf *nc; sector_t sector; struct drbd_peer_request *peer_req; struct p_data *p = pi->data; @@ -2245,6 +2311,8 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * } peer_req->w.cb = e_end_block; + peer_req->submit_jif = jiffies; + peer_req->flags |= EE_APPLICATION; dp_flags = be32_to_cpu(p->dp_flags); rw |= wire_flags_to_bio(dp_flags); @@ -2271,9 +2339,36 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * spin_unlock(&connection->epoch_lock); rcu_read_lock(); - tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries; + nc = rcu_dereference(peer_device->connection->net_conf); + tp = nc->two_primaries; + if (peer_device->connection->agreed_pro_version < 100) { + switch (nc->wire_protocol) { + case DRBD_PROT_C: + dp_flags |= DP_SEND_WRITE_ACK; + break; + case DRBD_PROT_B: + dp_flags |= DP_SEND_RECEIVE_ACK; + break; + } + } rcu_read_unlock(); + + if (dp_flags & DP_SEND_WRITE_ACK) { + peer_req->flags |= EE_SEND_WRITE_ACK; + inc_unacked(device); + /* corresponding dec_unacked() in e_end_block() + * respective _drbd_clear_done_ee */ + } + + if (dp_flags & DP_SEND_RECEIVE_ACK) { + /* I really don't like it that the receiver thread + * sends on the msock, but anyways */ + drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req); + } + if (tp) { + /* two primaries implies protocol C */ + D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK); peer_req->flags |= EE_IN_INTERVAL_TREE; err = wait_for_and_update_peer_seq(peer_device, peer_seq); if (err) @@ -2297,44 +2392,18 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * * active_ee to become empty in drbd_submit_peer_request(); * better not add ourselves here. */ if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0) - list_add(&peer_req->w.list, &device->active_ee); + list_add_tail(&peer_req->w.list, &device->active_ee); spin_unlock_irq(&device->resource->req_lock); if (device->state.conn == C_SYNC_TARGET) wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req)); - if (peer_device->connection->agreed_pro_version < 100) { - rcu_read_lock(); - switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) { - case DRBD_PROT_C: - dp_flags |= DP_SEND_WRITE_ACK; - break; - case DRBD_PROT_B: - dp_flags |= DP_SEND_RECEIVE_ACK; - break; - } - rcu_read_unlock(); - } - - if (dp_flags & DP_SEND_WRITE_ACK) { - peer_req->flags |= EE_SEND_WRITE_ACK; - inc_unacked(device); - /* corresponding dec_unacked() in e_end_block() - * respective _drbd_clear_done_ee */ - } - - if (dp_flags & DP_SEND_RECEIVE_ACK) { - /* I really don't like it that the receiver thread - * sends on the msock, but anyways */ - drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req); - } - if (device->state.pdsk < D_INCONSISTENT) { /* In case we have the only disk of the cluster, */ drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); - peer_req->flags |= EE_CALL_AL_COMPLETE_IO; peer_req->flags &= ~EE_MAY_SET_IN_SYNC; - drbd_al_begin_io(device, &peer_req->i, true); + drbd_al_begin_io(device, &peer_req->i); + peer_req->flags |= EE_CALL_AL_COMPLETE_IO; } err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR); @@ -2347,8 +2416,10 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * list_del(&peer_req->w.list); drbd_remove_epoch_entry_interval(device, peer_req); spin_unlock_irq(&device->resource->req_lock); - if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) + if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) { + peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; drbd_al_complete_io(device, &peer_req->i); + } out_interrupted: drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP); @@ -2368,13 +2439,14 @@ out_interrupted: * The current sync rate used here uses only the most recent two step marks, * to have a short time average so we can react faster. */ -bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) +bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, + bool throttle_if_app_is_waiting) { struct lc_element *tmp; - bool throttle = true; + bool throttle = drbd_rs_c_min_rate_throttle(device); - if (!drbd_rs_c_min_rate_throttle(device)) - return false; + if (!throttle || throttle_if_app_is_waiting) + return throttle; spin_lock_irq(&device->al_lock); tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); @@ -2382,7 +2454,8 @@ bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); if (test_bit(BME_PRIORITY, &bm_ext->flags)) throttle = false; - /* Do not slow down if app IO is already waiting for this extent */ + /* Do not slow down if app IO is already waiting for this extent, + * and our progress is necessary for application IO to complete. */ } spin_unlock_irq(&device->al_lock); @@ -2407,7 +2480,9 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + (int)part_stat_read(&disk->part0, sectors[1]) - atomic_read(&device->rs_sect_ev); - if (!device->rs_last_events || curr_events - device->rs_last_events > 64) { + + if (atomic_read(&device->ap_actlog_cnt) + || !device->rs_last_events || curr_events - device->rs_last_events > 64) { unsigned long rs_left; int i; @@ -2508,6 +2583,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet peer_req->w.cb = w_e_end_data_req; fault_type = DRBD_FAULT_DT_RD; /* application IO, don't drbd_rs_begin_io */ + peer_req->flags |= EE_APPLICATION; goto submit; case P_RS_DATA_REQUEST: @@ -2538,6 +2614,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet peer_req->w.cb = w_e_end_csum_rs_req; /* used in the sector offset progress display */ device->bm_resync_fo = BM_SECT_TO_BIT(sector); + /* remember to report stats in drbd_resync_finished */ + device->use_csums = true; } else if (pi->cmd == P_OV_REPLY) { /* track progress, we may need to throttle */ atomic_add(size >> 9, &device->rs_sect_in); @@ -2595,8 +2673,20 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet * we would also throttle its application reads. * In that case, throttling is done on the SyncTarget only. */ - if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector)) + + /* Even though this may be a resync request, we do add to "read_ee"; + * "sync_ee" is only used for resync WRITEs. + * Add to list early, so debugfs can find this request + * even if we have to sleep below. */ + spin_lock_irq(&device->resource->req_lock); + list_add_tail(&peer_req->w.list, &device->read_ee); + spin_unlock_irq(&device->resource->req_lock); + + update_receiver_timing_details(connection, drbd_rs_should_slow_down); + if (device->state.peer != R_PRIMARY + && drbd_rs_should_slow_down(device, sector, false)) schedule_timeout_uninterruptible(HZ/10); + update_receiver_timing_details(connection, drbd_rs_begin_io); if (drbd_rs_begin_io(device, sector)) goto out_free_e; @@ -2604,22 +2694,20 @@ submit_for_resync: atomic_add(size >> 9, &device->rs_sect_ev); submit: + update_receiver_timing_details(connection, drbd_submit_peer_request); inc_unacked(device); - spin_lock_irq(&device->resource->req_lock); - list_add_tail(&peer_req->w.list, &device->read_ee); - spin_unlock_irq(&device->resource->req_lock); - if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0) return 0; /* don't care for the reason here */ drbd_err(device, "submit failed, triggering re-connect\n"); + +out_free_e: spin_lock_irq(&device->resource->req_lock); list_del(&peer_req->w.list); spin_unlock_irq(&device->resource->req_lock); /* no drbd_rs_complete_io(), we are dropping the connection anyways */ -out_free_e: put_ldev(device); drbd_free_peer_req(device, peer_req); return -EIO; @@ -2842,8 +2930,10 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid, -1091 requires proto 91 -1096 requires proto 96 */ -static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local) +static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local) { + struct drbd_peer_device *const peer_device = first_peer_device(device); + struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; u64 self, peer; int i, j; @@ -2869,7 +2959,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) { - if (first_peer_device(device)->connection->agreed_pro_version < 91) + if (connection->agreed_pro_version < 91) return -1091; if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) && @@ -2892,7 +2982,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) { - if (first_peer_device(device)->connection->agreed_pro_version < 91) + if (connection->agreed_pro_version < 91) return -1091; if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) && @@ -2925,7 +3015,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho case 1: /* self_pri && !peer_pri */ return 1; case 2: /* !self_pri && peer_pri */ return -1; case 3: /* self_pri && peer_pri */ - dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags); + dc = test_bit(RESOLVE_CONFLICTS, &connection->flags); return dc ? -1 : 1; } } @@ -2938,14 +3028,14 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho *rule_nr = 51; peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1); if (self == peer) { - if (first_peer_device(device)->connection->agreed_pro_version < 96 ? + if (connection->agreed_pro_version < 96 ? (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) { /* The last P_SYNC_UUID did not get though. Undo the last start of resync as sync source modifications of the peer's UUIDs. */ - if (first_peer_device(device)->connection->agreed_pro_version < 91) + if (connection->agreed_pro_version < 91) return -1091; device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START]; @@ -2975,14 +3065,14 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho *rule_nr = 71; self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); if (self == peer) { - if (first_peer_device(device)->connection->agreed_pro_version < 96 ? + if (connection->agreed_pro_version < 96 ? (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) : self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { /* The last P_SYNC_UUID did not get though. Undo the last start of resync as sync source modifications of our UUIDs. */ - if (first_peer_device(device)->connection->agreed_pro_version < 91) + if (connection->agreed_pro_version < 91) return -1091; __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]); @@ -3352,8 +3442,7 @@ disconnect: * return: NULL (alg name was "") * ERR_PTR(error) if something goes wrong * or the crypto hash ptr, if it worked out ok. */ -static -struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device, +static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device, const char *alg, const char *name) { struct crypto_hash *tfm; @@ -3639,7 +3728,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info struct drbd_device *device; struct p_sizes *p = pi->data; enum determine_dev_size dd = DS_UNCHANGED; - sector_t p_size, p_usize, my_usize; + sector_t p_size, p_usize, p_csize, my_usize; int ldsc = 0; /* local disk size changed */ enum dds_flags ddsf; @@ -3650,6 +3739,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info p_size = be64_to_cpu(p->d_size); p_usize = be64_to_cpu(p->u_size); + p_csize = be64_to_cpu(p->c_size); /* just store the peer's disk size for now. * we still need to figure out whether we accept that. */ @@ -3710,7 +3800,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info } device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); - drbd_reconsider_max_bio_size(device); /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size(). In case we cleared the QUEUE_FLAG_DISCARD from our queue in drbd_reconsider_max_bio_size(), we can be sure that after @@ -3718,14 +3807,28 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info ddsf = be16_to_cpu(p->dds_flags); if (get_ldev(device)) { + drbd_reconsider_max_bio_size(device, device->ldev); dd = drbd_determine_dev_size(device, ddsf, NULL); put_ldev(device); if (dd == DS_ERROR) return -EIO; drbd_md_sync(device); } else { - /* I am diskless, need to accept the peer's size. */ - drbd_set_my_capacity(device, p_size); + /* + * I am diskless, need to accept the peer's *current* size. + * I must NOT accept the peers backing disk size, + * it may have been larger than mine all along... + * + * At this point, the peer knows more about my disk, or at + * least about what we last agreed upon, than myself. + * So if his c_size is less than his d_size, the most likely + * reason is that *my* d_size was smaller last time we checked. + * + * However, if he sends a zero current size, + * take his (user-capped or) backing disk size anyways. + */ + drbd_reconsider_max_bio_size(device, NULL); + drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size); } if (get_ldev(device)) { @@ -4501,6 +4604,7 @@ static void drbdd(struct drbd_connection *connection) struct data_cmd *cmd; drbd_thread_current_set_cpu(&connection->receiver); + update_receiver_timing_details(connection, drbd_recv_header); if (drbd_recv_header(connection, &pi)) goto err_out; @@ -4519,12 +4623,14 @@ static void drbdd(struct drbd_connection *connection) } if (shs) { + update_receiver_timing_details(connection, drbd_recv_all_warn); err = drbd_recv_all_warn(connection, pi.data, shs); if (err) goto err_out; pi.size -= shs; } + update_receiver_timing_details(connection, cmd->fn); err = cmd->fn(connection, &pi); if (err) { drbd_err(connection, "error receiving %s, e: %d l: %d!\n", diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 09803d0..c67717d 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -52,7 +52,7 @@ static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req) { int rw = bio_data_dir(req->master_bio); - unsigned long duration = jiffies - req->start_time; + unsigned long duration = jiffies - req->start_jif; int cpu; cpu = part_stat_lock(); part_stat_add(cpu, &device->vdisk->part0, ticks[rw], duration); @@ -66,7 +66,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, { struct drbd_request *req; - req = mempool_alloc(drbd_request_mempool, GFP_NOIO); + req = mempool_alloc(drbd_request_mempool, GFP_NOIO | __GFP_ZERO); if (!req) return NULL; @@ -84,6 +84,8 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, INIT_LIST_HEAD(&req->tl_requests); INIT_LIST_HEAD(&req->w.list); + INIT_LIST_HEAD(&req->req_pending_master_completion); + INIT_LIST_HEAD(&req->req_pending_local); /* one reference to be put by __drbd_make_request */ atomic_set(&req->completion_ref, 1); @@ -92,6 +94,19 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, return req; } +static void drbd_remove_request_interval(struct rb_root *root, + struct drbd_request *req) +{ + struct drbd_device *device = req->device; + struct drbd_interval *i = &req->i; + + drbd_remove_interval(root, i); + + /* Wake up any processes waiting for this request to complete. */ + if (i->waiting) + wake_up(&device->misc_wait); +} + void drbd_req_destroy(struct kref *kref) { struct drbd_request *req = container_of(kref, struct drbd_request, kref); @@ -107,14 +122,30 @@ void drbd_req_destroy(struct kref *kref) return; } - /* remove it from the transfer log. - * well, only if it had been there in the first - * place... if it had not (local only or conflicting - * and never sent), it should still be "empty" as - * initialized in drbd_req_new(), so we can list_del() it - * here unconditionally */ + /* If called from mod_rq_state (expected normal case) or + * drbd_send_and_submit (the less likely normal path), this holds the + * req_lock, and req->tl_requests will typicaly be on ->transfer_log, + * though it may be still empty (never added to the transfer log). + * + * If called from do_retry(), we do NOT hold the req_lock, but we are + * still allowed to unconditionally list_del(&req->tl_requests), + * because it will be on a local on-stack list only. */ list_del_init(&req->tl_requests); + /* finally remove the request from the conflict detection + * respective block_id verification interval tree. */ + if (!drbd_interval_empty(&req->i)) { + struct rb_root *root; + + if (s & RQ_WRITE) + root = &device->write_requests; + else + root = &device->read_requests; + drbd_remove_request_interval(root, req); + } else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0) + drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n", + s, (unsigned long long)req->i.sector, req->i.size); + /* if it was a write, we may have to set the corresponding * bit(s) out-of-sync first. If it had a local part, we need to * release the reference to the activity log. */ @@ -188,19 +219,6 @@ void complete_master_bio(struct drbd_device *device, } -static void drbd_remove_request_interval(struct rb_root *root, - struct drbd_request *req) -{ - struct drbd_device *device = req->device; - struct drbd_interval *i = &req->i; - - drbd_remove_interval(root, i); - - /* Wake up any processes waiting for this request to complete. */ - if (i->waiting) - wake_up(&device->misc_wait); -} - /* Helper for __req_mod(). * Set m->bio to the master bio, if it is fit to be completed, * or leave it alone (it is initialized to NULL in __req_mod), @@ -254,18 +272,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); error = PTR_ERR(req->private_bio); - /* remove the request from the conflict detection - * respective block_id verification hash */ - if (!drbd_interval_empty(&req->i)) { - struct rb_root *root; - - if (rw == WRITE) - root = &device->write_requests; - else - root = &device->read_requests; - drbd_remove_request_interval(root, req); - } - /* Before we can signal completion to the upper layers, * we may need to close the current transfer log epoch. * We are within the request lock, so we can simply compare @@ -301,9 +307,24 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) m->error = ok ? 0 : (error ?: -EIO); m->bio = req->master_bio; req->master_bio = NULL; + /* We leave it in the tree, to be able to verify later + * write-acks in protocol != C during resync. + * But we mark it as "complete", so it won't be counted as + * conflict in a multi-primary setup. */ + req->i.completed = true; } + + if (req->i.waiting) + wake_up(&device->misc_wait); + + /* Either we are about to complete to upper layers, + * or we will restart this request. + * In either case, the request object will be destroyed soon, + * so better remove it from all lists. */ + list_del_init(&req->req_pending_master_completion); } +/* still holds resource->req_lock */ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) { struct drbd_device *device = req->device; @@ -324,12 +345,91 @@ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_ return 1; } +static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req) +{ + struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; + if (!connection) + return; + if (connection->req_next == NULL) + connection->req_next = req; +} + +static void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req) +{ + struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; + if (!connection) + return; + if (connection->req_next != req) + return; + list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) { + const unsigned s = req->rq_state; + if (s & RQ_NET_QUEUED) + break; + } + if (&req->tl_requests == &connection->transfer_log) + req = NULL; + connection->req_next = req; +} + +static void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req) +{ + struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; + if (!connection) + return; + if (connection->req_ack_pending == NULL) + connection->req_ack_pending = req; +} + +static void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req) +{ + struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; + if (!connection) + return; + if (connection->req_ack_pending != req) + return; + list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) { + const unsigned s = req->rq_state; + if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING)) + break; + } + if (&req->tl_requests == &connection->transfer_log) + req = NULL; + connection->req_ack_pending = req; +} + +static void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req) +{ + struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; + if (!connection) + return; + if (connection->req_not_net_done == NULL) + connection->req_not_net_done = req; +} + +static void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req) +{ + struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; + if (!connection) + return; + if (connection->req_not_net_done != req) + return; + list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) { + const unsigned s = req->rq_state; + if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE)) + break; + } + if (&req->tl_requests == &connection->transfer_log) + req = NULL; + connection->req_not_net_done = req; +} + /* I'd like this to be the only place that manipulates * req->completion_ref and req->kref. */ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, int clear, int set) { struct drbd_device *device = req->device; + struct drbd_peer_device *peer_device = first_peer_device(device); unsigned s = req->rq_state; int c_put = 0; int k_put = 0; @@ -356,14 +456,23 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, atomic_inc(&req->completion_ref); } - if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) + if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) { atomic_inc(&req->completion_ref); + set_if_null_req_next(peer_device, req); + } if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK)) kref_get(&req->kref); /* wait for the DONE */ - if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) - atomic_add(req->i.size >> 9, &device->ap_in_flight); + if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) { + /* potentially already completed in the asender thread */ + if (!(s & RQ_NET_DONE)) { + atomic_add(req->i.size >> 9, &device->ap_in_flight); + set_if_null_req_not_net_done(peer_device, req); + } + if (s & RQ_NET_PENDING) + set_if_null_req_ack_pending(peer_device, req); + } if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP)) atomic_inc(&req->completion_ref); @@ -386,20 +495,34 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, ++k_put; else ++c_put; + list_del_init(&req->req_pending_local); } if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) { dec_ap_pending(device); ++c_put; + req->acked_jif = jiffies; + advance_conn_req_ack_pending(peer_device, req); } - if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) + if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) { ++c_put; + advance_conn_req_next(peer_device, req); + } - if ((s & RQ_EXP_BARR_ACK) && !(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) { - if (req->rq_state & RQ_NET_SENT) + if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) { + if (s & RQ_NET_SENT) atomic_sub(req->i.size >> 9, &device->ap_in_flight); - ++k_put; + if (s & RQ_EXP_BARR_ACK) + ++k_put; + req->net_done_jif = jiffies; + + /* in ahead/behind mode, or just in case, + * before we finally destroy this request, + * the caching pointers must not reference it anymore */ + advance_conn_req_next(peer_device, req); + advance_conn_req_ack_pending(peer_device, req); + advance_conn_req_not_net_done(peer_device, req); } /* potentially complete and destroy */ @@ -439,6 +562,19 @@ static void drbd_report_io_error(struct drbd_device *device, struct drbd_request bdevname(device->ldev->backing_bdev, b)); } +/* Helper for HANDED_OVER_TO_NETWORK. + * Is this a protocol A write (neither WRITE_ACK nor RECEIVE_ACK expected)? + * Is it also still "PENDING"? + * --> If so, clear PENDING and set NET_OK below. + * If it is a protocol A write, but not RQ_PENDING anymore, neg-ack was faster + * (and we must not set RQ_NET_OK) */ +static inline bool is_pending_write_protocol_A(struct drbd_request *req) +{ + return (req->rq_state & + (RQ_WRITE|RQ_NET_PENDING|RQ_EXP_WRITE_ACK|RQ_EXP_RECEIVE_ACK)) + == (RQ_WRITE|RQ_NET_PENDING); +} + /* obviously this could be coded as many single functions * instead of one huge switch, * or by putting the code directly in the respective locations @@ -454,7 +590,9 @@ static void drbd_report_io_error(struct drbd_device *device, struct drbd_request int __req_mod(struct drbd_request *req, enum drbd_req_event what, struct bio_and_error *m) { - struct drbd_device *device = req->device; + struct drbd_device *const device = req->device; + struct drbd_peer_device *const peer_device = first_peer_device(device); + struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; struct net_conf *nc; int p, rv = 0; @@ -477,7 +615,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * and from w_read_retry_remote */ D_ASSERT(device, !(req->rq_state & RQ_NET_MASK)); rcu_read_lock(); - nc = rcu_dereference(first_peer_device(device)->connection->net_conf); + nc = rcu_dereference(connection->net_conf); p = nc->wire_protocol; rcu_read_unlock(); req->rq_state |= @@ -549,7 +687,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0); mod_rq_state(req, m, 0, RQ_NET_QUEUED); req->w.cb = w_send_read_req; - drbd_queue_work(&first_peer_device(device)->connection->sender_work, + drbd_queue_work(&connection->sender_work, &req->w); break; @@ -585,23 +723,23 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, D_ASSERT(device, req->rq_state & RQ_NET_PENDING); mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK); req->w.cb = w_send_dblock; - drbd_queue_work(&first_peer_device(device)->connection->sender_work, + drbd_queue_work(&connection->sender_work, &req->w); /* close the epoch, in case it outgrew the limit */ rcu_read_lock(); - nc = rcu_dereference(first_peer_device(device)->connection->net_conf); + nc = rcu_dereference(connection->net_conf); p = nc->max_epoch_size; rcu_read_unlock(); - if (first_peer_device(device)->connection->current_tle_writes >= p) - start_new_tl_epoch(first_peer_device(device)->connection); + if (connection->current_tle_writes >= p) + start_new_tl_epoch(connection); break; case QUEUE_FOR_SEND_OOS: mod_rq_state(req, m, 0, RQ_NET_QUEUED); req->w.cb = w_send_out_of_sync; - drbd_queue_work(&first_peer_device(device)->connection->sender_work, + drbd_queue_work(&connection->sender_work, &req->w); break; @@ -615,18 +753,16 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case HANDED_OVER_TO_NETWORK: /* assert something? */ - if (bio_data_dir(req->master_bio) == WRITE && - !(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) { + if (is_pending_write_protocol_A(req)) /* this is what is dangerous about protocol A: * pretend it was successfully written on the peer. */ - if (req->rq_state & RQ_NET_PENDING) - mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); - /* else: neg-ack was faster... */ - /* it is still not yet RQ_NET_DONE until the - * corresponding epoch barrier got acked as well, - * so we know what to dirty on connection loss */ - } - mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT); + mod_rq_state(req, m, RQ_NET_QUEUED|RQ_NET_PENDING, + RQ_NET_SENT|RQ_NET_OK); + else + mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT); + /* It is still not yet RQ_NET_DONE until the + * corresponding epoch barrier got acked as well, + * so we know what to dirty on connection loss. */ break; case OOS_HANDED_TO_NETWORK: @@ -658,12 +794,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, case WRITE_ACKED_BY_PEER_AND_SIS: req->rq_state |= RQ_NET_SIS; case WRITE_ACKED_BY_PEER: - D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK); - /* protocol C; successfully written on peer. + /* Normal operation protocol C: successfully written on peer. + * During resync, even in protocol != C, + * we requested an explicit write ack anyways. + * Which means we cannot even assert anything here. * Nothing more to do here. * We want to keep the tl in place for all protocols, to cater * for volatile write-back caches on lower level devices. */ - goto ack_common; case RECV_ACKED_BY_PEER: D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK); @@ -671,7 +808,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, * see also notes above in HANDED_OVER_TO_NETWORK about * protocol != C */ ack_common: - D_ASSERT(device, req->rq_state & RQ_NET_PENDING); mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); break; @@ -714,7 +850,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, get_ldev(device); /* always succeeds in this call path */ req->w.cb = w_restart_disk_io; - drbd_queue_work(&first_peer_device(device)->connection->sender_work, + drbd_queue_work(&connection->sender_work, &req->w); break; @@ -736,7 +872,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING); if (req->w.cb) { - drbd_queue_work(&first_peer_device(device)->connection->sender_work, + /* w.cb expected to be w_send_dblock, or w_send_read_req */ + drbd_queue_work(&connection->sender_work, &req->w); rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; } /* else: FIXME can this happen? */ @@ -769,7 +906,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case QUEUE_AS_DRBD_BARRIER: - start_new_tl_epoch(first_peer_device(device)->connection); + start_new_tl_epoch(connection); mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE); break; }; @@ -886,6 +1023,9 @@ static void maybe_pull_ahead(struct drbd_device *device) connection->agreed_pro_version < 96) return; + if (on_congestion == OC_PULL_AHEAD && device->state.conn == C_AHEAD) + return; /* nothing to do ... */ + /* If I don't even have good local storage, we can not reasonably try * to pull ahead of the peer. We also need the local reference to make * sure device->act_log is there. @@ -1021,6 +1161,7 @@ drbd_submit_req_private_bio(struct drbd_request *req) * stable storage, and this is a WRITE, we may not even submit * this bio. */ if (get_ldev(device)) { + req->pre_submit_jif = jiffies; if (drbd_insert_fault(device, rw == WRITE ? DRBD_FAULT_DT_WR : rw == READ ? DRBD_FAULT_DT_RD @@ -1035,10 +1176,14 @@ drbd_submit_req_private_bio(struct drbd_request *req) static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req) { - spin_lock(&device->submit.lock); + spin_lock_irq(&device->resource->req_lock); list_add_tail(&req->tl_requests, &device->submit.writes); - spin_unlock(&device->submit.lock); + list_add_tail(&req->req_pending_master_completion, + &device->pending_master_completion[1 /* WRITE */]); + spin_unlock_irq(&device->resource->req_lock); queue_work(device->submit.wq, &device->submit.worker); + /* do_submit() may sleep internally on al_wait, too */ + wake_up(&device->al_wait); } /* returns the new drbd_request pointer, if the caller is expected to @@ -1047,7 +1192,7 @@ static void drbd_queue_write(struct drbd_device *device, struct drbd_request *re * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request. */ static struct drbd_request * -drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_time) +drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_jif) { const int rw = bio_data_dir(bio); struct drbd_request *req; @@ -1062,7 +1207,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long bio_endio(bio, -ENOMEM); return ERR_PTR(-ENOMEM); } - req->start_time = start_time; + req->start_jif = start_jif; if (!get_ldev(device)) { bio_put(req->private_bio); @@ -1075,10 +1220,12 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long if (rw == WRITE && req->private_bio && req->i.size && !test_bit(AL_SUSPENDED, &device->flags)) { if (!drbd_al_begin_io_fastpath(device, &req->i)) { + atomic_inc(&device->ap_actlog_cnt); drbd_queue_write(device, req); return NULL; } req->rq_state |= RQ_IN_ACT_LOG; + req->in_actlog_jif = jiffies; } return req; @@ -1086,11 +1233,13 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) { + struct drbd_resource *resource = device->resource; const int rw = bio_rw(req->master_bio); struct bio_and_error m = { NULL, }; bool no_remote = false; + bool submit_private_bio = false; - spin_lock_irq(&device->resource->req_lock); + spin_lock_irq(&resource->req_lock); if (rw == WRITE) { /* This may temporarily give up the req_lock, * but will re-aquire it before it returns here. @@ -1148,13 +1297,18 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request no_remote = true; } + /* If it took the fast path in drbd_request_prepare, add it here. + * The slow path has added it already. */ + if (list_empty(&req->req_pending_master_completion)) + list_add_tail(&req->req_pending_master_completion, + &device->pending_master_completion[rw == WRITE]); if (req->private_bio) { /* needs to be marked within the same spinlock */ + list_add_tail(&req->req_pending_local, + &device->pending_completion[rw == WRITE]); _req_mod(req, TO_BE_SUBMITTED); /* but we need to give up the spinlock to submit */ - spin_unlock_irq(&device->resource->req_lock); - drbd_submit_req_private_bio(req); - spin_lock_irq(&device->resource->req_lock); + submit_private_bio = true; } else if (no_remote) { nodata: if (__ratelimit(&drbd_ratelimit_state)) @@ -1167,15 +1321,23 @@ nodata: out: if (drbd_req_put_completion_ref(req, &m, 1)) kref_put(&req->kref, drbd_req_destroy); - spin_unlock_irq(&device->resource->req_lock); - + spin_unlock_irq(&resource->req_lock); + + /* Even though above is a kref_put(), this is safe. + * As long as we still need to submit our private bio, + * we hold a completion ref, and the request cannot disappear. + * If however this request did not even have a private bio to submit + * (e.g. remote read), req may already be invalid now. + * That's why we cannot check on req->private_bio. */ + if (submit_private_bio) + drbd_submit_req_private_bio(req); if (m.bio) complete_master_bio(device, &m); } -void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_time) +void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_jif) { - struct drbd_request *req = drbd_request_prepare(device, bio, start_time); + struct drbd_request *req = drbd_request_prepare(device, bio, start_jif); if (IS_ERR_OR_NULL(req)) return; drbd_send_and_submit(device, req); @@ -1194,6 +1356,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom continue; req->rq_state |= RQ_IN_ACT_LOG; + req->in_actlog_jif = jiffies; + atomic_dec(&device->ap_actlog_cnt); } list_del_init(&req->tl_requests); @@ -1203,7 +1367,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom static bool prepare_al_transaction_nonblock(struct drbd_device *device, struct list_head *incoming, - struct list_head *pending) + struct list_head *pending, + struct list_head *later) { struct drbd_request *req, *tmp; int wake = 0; @@ -1212,45 +1377,105 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device, spin_lock_irq(&device->al_lock); list_for_each_entry_safe(req, tmp, incoming, tl_requests) { err = drbd_al_begin_io_nonblock(device, &req->i); + if (err == -ENOBUFS) + break; if (err == -EBUSY) wake = 1; if (err) - continue; - req->rq_state |= RQ_IN_ACT_LOG; - list_move_tail(&req->tl_requests, pending); + list_move_tail(&req->tl_requests, later); + else + list_move_tail(&req->tl_requests, pending); } spin_unlock_irq(&device->al_lock); if (wake) wake_up(&device->al_wait); - return !list_empty(pending); } +void send_and_submit_pending(struct drbd_device *device, struct list_head *pending) +{ + struct drbd_request *req, *tmp; + + list_for_each_entry_safe(req, tmp, pending, tl_requests) { + req->rq_state |= RQ_IN_ACT_LOG; + req->in_actlog_jif = jiffies; + atomic_dec(&device->ap_actlog_cnt); + list_del_init(&req->tl_requests); + drbd_send_and_submit(device, req); + } +} + void do_submit(struct work_struct *ws) { struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker); - LIST_HEAD(incoming); - LIST_HEAD(pending); - struct drbd_request *req, *tmp; + LIST_HEAD(incoming); /* from drbd_make_request() */ + LIST_HEAD(pending); /* to be submitted after next AL-transaction commit */ + LIST_HEAD(busy); /* blocked by resync requests */ + + /* grab new incoming requests */ + spin_lock_irq(&device->resource->req_lock); + list_splice_tail_init(&device->submit.writes, &incoming); + spin_unlock_irq(&device->resource->req_lock); for (;;) { - spin_lock(&device->submit.lock); - list_splice_tail_init(&device->submit.writes, &incoming); - spin_unlock(&device->submit.lock); + DEFINE_WAIT(wait); + /* move used-to-be-busy back to front of incoming */ + list_splice_init(&busy, &incoming); submit_fast_path(device, &incoming); if (list_empty(&incoming)) break; -skip_fast_path: - wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending)); - /* Maybe more was queued, while we prepared the transaction? - * Try to stuff them into this transaction as well. - * Be strictly non-blocking here, no wait_event, we already - * have something to commit. - * Stop if we don't make any more progres. - */ for (;;) { + prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE); + + list_splice_init(&busy, &incoming); + prepare_al_transaction_nonblock(device, &incoming, &pending, &busy); + if (!list_empty(&pending)) + break; + + schedule(); + + /* If all currently "hot" activity log extents are kept busy by + * incoming requests, we still must not totally starve new + * requests to "cold" extents. + * Something left on &incoming means there had not been + * enough update slots available, and the activity log + * has been marked as "starving". + * + * Try again now, without looking for new requests, + * effectively blocking all new requests until we made + * at least _some_ progress with what we currently have. + */ + if (!list_empty(&incoming)) + continue; + + /* Nothing moved to pending, but nothing left + * on incoming: all moved to busy! + * Grab new and iterate. */ + spin_lock_irq(&device->resource->req_lock); + list_splice_tail_init(&device->submit.writes, &incoming); + spin_unlock_irq(&device->resource->req_lock); + } + finish_wait(&device->al_wait, &wait); + + /* If the transaction was full, before all incoming requests + * had been processed, skip ahead to commit, and iterate + * without splicing in more incoming requests from upper layers. + * + * Else, if all incoming have been processed, + * they have become either "pending" (to be submitted after + * next transaction commit) or "busy" (blocked by resync). + * + * Maybe more was queued, while we prepared the transaction? + * Try to stuff those into this transaction as well. + * Be strictly non-blocking here, + * we already have something to commit. + * + * Commit if we don't make any more progres. + */ + + while (list_empty(&incoming)) { LIST_HEAD(more_pending); LIST_HEAD(more_incoming); bool made_progress; @@ -1260,55 +1485,32 @@ skip_fast_path: if (list_empty(&device->submit.writes)) break; - spin_lock(&device->submit.lock); + spin_lock_irq(&device->resource->req_lock); list_splice_tail_init(&device->submit.writes, &more_incoming); - spin_unlock(&device->submit.lock); + spin_unlock_irq(&device->resource->req_lock); if (list_empty(&more_incoming)) break; - made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending); + made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy); list_splice_tail_init(&more_pending, &pending); list_splice_tail_init(&more_incoming, &incoming); - if (!made_progress) break; } - drbd_al_begin_io_commit(device, false); - - list_for_each_entry_safe(req, tmp, &pending, tl_requests) { - list_del_init(&req->tl_requests); - drbd_send_and_submit(device, req); - } - /* If all currently hot activity log extents are kept busy by - * incoming requests, we still must not totally starve new - * requests to cold extents. In that case, prepare one request - * in blocking mode. */ - list_for_each_entry_safe(req, tmp, &incoming, tl_requests) { - list_del_init(&req->tl_requests); - req->rq_state |= RQ_IN_ACT_LOG; - if (!drbd_al_begin_io_prepare(device, &req->i)) { - /* Corresponding extent was hot after all? */ - drbd_send_and_submit(device, req); - } else { - /* Found a request to a cold extent. - * Put on "pending" list, - * and try to cumulate with more. */ - list_add(&req->tl_requests, &pending); - goto skip_fast_path; - } - } + drbd_al_begin_io_commit(device); + send_and_submit_pending(device, &pending); } } void drbd_make_request(struct request_queue *q, struct bio *bio) { struct drbd_device *device = (struct drbd_device *) q->queuedata; - unsigned long start_time; + unsigned long start_jif; - start_time = jiffies; + start_jif = jiffies; /* * what we "blindly" assume: @@ -1316,7 +1518,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio) D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512)); inc_ap_bio(device); - __drbd_make_request(device, bio, start_time); + __drbd_make_request(device, bio, start_jif); } /* This is called by bio_add_page(). @@ -1353,36 +1555,13 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct return limit; } -static void find_oldest_requests( - struct drbd_connection *connection, - struct drbd_device *device, - struct drbd_request **oldest_req_waiting_for_peer, - struct drbd_request **oldest_req_waiting_for_disk) -{ - struct drbd_request *r; - *oldest_req_waiting_for_peer = NULL; - *oldest_req_waiting_for_disk = NULL; - list_for_each_entry(r, &connection->transfer_log, tl_requests) { - const unsigned s = r->rq_state; - if (!*oldest_req_waiting_for_peer - && ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) - *oldest_req_waiting_for_peer = r; - - if (!*oldest_req_waiting_for_disk - && (s & RQ_LOCAL_PENDING) && r->device == device) - *oldest_req_waiting_for_disk = r; - - if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk) - break; - } -} - void request_timer_fn(unsigned long data) { struct drbd_device *device = (struct drbd_device *) data; struct drbd_connection *connection = first_peer_device(device)->connection; - struct drbd_request *req_disk, *req_peer; /* oldest request */ + struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */ struct net_conf *nc; + unsigned long oldest_submit_jif; unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ unsigned long now; @@ -1403,14 +1582,31 @@ void request_timer_fn(unsigned long data) return; /* Recurring timer stopped */ now = jiffies; + nt = now + et; spin_lock_irq(&device->resource->req_lock); - find_oldest_requests(connection, device, &req_peer, &req_disk); - if (req_peer == NULL && req_disk == NULL) { - spin_unlock_irq(&device->resource->req_lock); - mod_timer(&device->request_timer, now + et); - return; - } + req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local); + req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local); + req_peer = connection->req_not_net_done; + /* maybe the oldest request waiting for the peer is in fact still + * blocking in tcp sendmsg */ + if (!req_peer && connection->req_next && connection->req_next->pre_send_jif) + req_peer = connection->req_next; + + /* evaluate the oldest peer request only in one timer! */ + if (req_peer && req_peer->device != device) + req_peer = NULL; + + /* do we have something to evaluate? */ + if (req_peer == NULL && req_write == NULL && req_read == NULL) + goto out; + + oldest_submit_jif = + (req_write && req_read) + ? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif) + ? req_write->pre_submit_jif : req_read->pre_submit_jif ) + : req_write ? req_write->pre_submit_jif + : req_read ? req_read->pre_submit_jif : now; /* The request is considered timed out, if * - we have some effective timeout from the configuration, @@ -1429,13 +1625,13 @@ void request_timer_fn(unsigned long data) * to expire twice (worst case) to become effective. Good enough. */ if (ent && req_peer && - time_after(now, req_peer->start_time + ent) && + time_after(now, req_peer->pre_send_jif + ent) && !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); } - if (dt && req_disk && - time_after(now, req_disk->start_time + dt) && + if (dt && oldest_submit_jif != now && + time_after(now, oldest_submit_jif + dt) && !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { drbd_warn(device, "Local backing device failed to meet the disk-timeout\n"); __drbd_chk_io_error(device, DRBD_FORCE_DETACH); @@ -1443,11 +1639,12 @@ void request_timer_fn(unsigned long data) /* Reschedule timer for the nearest not already expired timeout. * Fallback to now + min(effective network timeout, disk timeout). */ - ent = (ent && req_peer && time_before(now, req_peer->start_time + ent)) - ? req_peer->start_time + ent : now + et; - dt = (dt && req_disk && time_before(now, req_disk->start_time + dt)) - ? req_disk->start_time + dt : now + et; + ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent)) + ? req_peer->pre_send_jif + ent : now + et; + dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt)) + ? oldest_submit_jif + dt : now + et; nt = time_before(ent, dt) ? ent : dt; +out: spin_unlock_irq(&connection->resource->req_lock); mod_timer(&device->request_timer, nt); } diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 8566cd5..9f6a040 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -288,6 +288,7 @@ extern void complete_master_bio(struct drbd_device *device, extern void request_timer_fn(unsigned long data); extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what); extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what); +extern void tl_abort_disk_io(struct drbd_device *device); /* this is in drbd_main.c */ extern void drbd_restart_request(struct drbd_request *req); diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index a5d8aae..c35c0f0 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -410,7 +410,7 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask, return rv; } -static void print_st(struct drbd_device *device, char *name, union drbd_state ns) +static void print_st(struct drbd_device *device, const char *name, union drbd_state ns) { drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", name, @@ -952,11 +952,12 @@ enum drbd_state_rv __drbd_set_state(struct drbd_device *device, union drbd_state ns, enum chg_state_flags flags, struct completion *done) { + struct drbd_peer_device *peer_device = first_peer_device(device); + struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; union drbd_state os; enum drbd_state_rv rv = SS_SUCCESS; enum sanitize_state_warnings ssw; struct after_state_chg_work *ascw; - bool did_remote, should_do_remote; os = drbd_read_state(device); @@ -978,9 +979,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, this happen...*/ if (is_valid_state(device, os) == rv) - rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection); + rv = is_valid_soft_transition(os, ns, connection); } else - rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection); + rv = is_valid_soft_transition(os, ns, connection); } if (rv < SS_SUCCESS) { @@ -997,7 +998,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, sanitize_state(). Only display it here if we where not called from _conn_request_state() */ if (!(flags & CS_DC_SUSP)) - conn_pr_state_change(first_peer_device(device)->connection, os, ns, + conn_pr_state_change(connection, os, ns, (flags & ~CS_DC_MASK) | CS_DC_SUSP); /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference @@ -1008,28 +1009,35 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) atomic_inc(&device->local_cnt); - did_remote = drbd_should_do_remote(device->state); + if (!is_sync_state(os.conn) && is_sync_state(ns.conn)) + clear_bit(RS_DONE, &device->flags); + + /* changes to local_cnt and device flags should be visible before + * changes to state, which again should be visible before anything else + * depending on that change happens. */ + smp_wmb(); device->state.i = ns.i; - should_do_remote = drbd_should_do_remote(device->state); device->resource->susp = ns.susp; device->resource->susp_nod = ns.susp_nod; device->resource->susp_fen = ns.susp_fen; + smp_wmb(); /* put replicated vs not-replicated requests in seperate epochs */ - if (did_remote != should_do_remote) - start_new_tl_epoch(first_peer_device(device)->connection); + if (drbd_should_do_remote((union drbd_dev_state)os.i) != + drbd_should_do_remote((union drbd_dev_state)ns.i)) + start_new_tl_epoch(connection); if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) drbd_print_uuids(device, "attached to UUIDs"); /* Wake up role changes, that were delayed because of connection establishing */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS && - no_peer_wf_report_params(first_peer_device(device)->connection)) - clear_bit(STATE_SENT, &first_peer_device(device)->connection->flags); + no_peer_wf_report_params(connection)) + clear_bit(STATE_SENT, &connection->flags); wake_up(&device->misc_wait); wake_up(&device->state_wait); - wake_up(&first_peer_device(device)->connection->ping_wait); + wake_up(&connection->ping_wait); /* Aborted verify run, or we reached the stop sector. * Log the last position, unless end-of-device. */ @@ -1118,21 +1126,21 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, /* Receiver should clean up itself */ if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) - drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver); + drbd_thread_stop_nowait(&connection->receiver); /* Now the receiver finished cleaning up itself, it should die */ if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) - drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver); + drbd_thread_stop_nowait(&connection->receiver); /* Upon network failure, we need to restart the receiver. */ if (os.conn > C_WF_CONNECTION && ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) - drbd_thread_restart_nowait(&first_peer_device(device)->connection->receiver); + drbd_thread_restart_nowait(&connection->receiver); /* Resume AL writing if we get a connection */ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { drbd_resume_al(device); - first_peer_device(device)->connection->connect_cnt++; + connection->connect_cnt++; } /* remember last attach time so request_timer_fn() won't @@ -1150,7 +1158,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, ascw->w.cb = w_after_state_ch; ascw->device = device; ascw->done = done; - drbd_queue_work(&first_peer_device(device)->connection->sender_work, + drbd_queue_work(&connection->sender_work, &ascw->w); } else { drbd_err(device, "Could not kmalloc an ascw\n"); @@ -1222,13 +1230,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, union drbd_state ns, enum chg_state_flags flags) { struct drbd_resource *resource = device->resource; + struct drbd_peer_device *peer_device = first_peer_device(device); + struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; struct sib_info sib; sib.sib_reason = SIB_STATE_CHANGE; sib.os = os; sib.ns = ns; - if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { + if ((os.disk != D_UP_TO_DATE || os.pdsk != D_UP_TO_DATE) + && (ns.disk == D_UP_TO_DATE && ns.pdsk == D_UP_TO_DATE)) { clear_bit(CRASHED_PRIMARY, &device->flags); if (device->p_uuid) device->p_uuid[UI_FLAGS] &= ~((u64)2); @@ -1245,7 +1256,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, state change. This function might sleep */ if (ns.susp_nod) { - struct drbd_connection *connection = first_peer_device(device)->connection; enum drbd_req_event what = NOTHING; spin_lock_irq(&device->resource->req_lock); @@ -1267,8 +1277,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, } if (ns.susp_fen) { - struct drbd_connection *connection = first_peer_device(device)->connection; - spin_lock_irq(&device->resource->req_lock); if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) { /* case2: The connection was established again: */ @@ -1294,8 +1302,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, * which is unexpected. */ if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && - first_peer_device(device)->connection->agreed_pro_version >= 96 && get_ldev(device)) { - drbd_gen_and_send_sync_uuid(first_peer_device(device)); + connection->agreed_pro_version >= 96 && get_ldev(device)) { + drbd_gen_and_send_sync_uuid(peer_device); put_ldev(device); } @@ -1309,8 +1317,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, atomic_set(&device->rs_pending_cnt, 0); drbd_rs_cancel_all(device); - drbd_send_uuids(first_peer_device(device)); - drbd_send_state(first_peer_device(device), ns); + drbd_send_uuids(peer_device); + drbd_send_state(peer_device, ns); } /* No point in queuing send_bitmap if we don't have a connection * anymore, so check also the _current_ state, not only the new state @@ -1335,7 +1343,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, set_bit(NEW_CUR_UUID, &device->flags); } else { drbd_uuid_new_current(device); - drbd_send_uuids(first_peer_device(device)); + drbd_send_uuids(peer_device); } } put_ldev(device); @@ -1346,7 +1354,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY && device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { drbd_uuid_new_current(device); - drbd_send_uuids(first_peer_device(device)); + drbd_send_uuids(peer_device); } /* D_DISKLESS Peer becomes secondary */ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) @@ -1373,16 +1381,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* Last part of the attaching process ... */ if (ns.conn >= C_CONNECTED && os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { - drbd_send_sizes(first_peer_device(device), 0, 0); /* to start sync... */ - drbd_send_uuids(first_peer_device(device)); - drbd_send_state(first_peer_device(device), ns); + drbd_send_sizes(peer_device, 0, 0); /* to start sync... */ + drbd_send_uuids(peer_device); + drbd_send_state(peer_device, ns); } /* We want to pause/continue resync, tell peer. */ if (ns.conn >= C_CONNECTED && ((os.aftr_isp != ns.aftr_isp) || (os.user_isp != ns.user_isp))) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); /* In case one of the isp bits got set, suspend other devices. */ if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && @@ -1392,10 +1400,10 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* Make sure the peer gets informed about eventual state changes (ISP bits) while we were in WFReportParams. */ if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); if (os.conn != C_AHEAD && ns.conn == C_AHEAD) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); /* We are in the progress to start a full sync... */ if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || @@ -1449,7 +1457,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, drbd_disk_str(device->state.disk)); if (ns.conn >= C_CONNECTED) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); drbd_rs_cancel_all(device); @@ -1473,7 +1481,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, drbd_disk_str(device->state.disk)); if (ns.conn >= C_CONNECTED) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); /* corresponding get_ldev in __drbd_set_state * this may finally trigger drbd_ldev_destroy. */ put_ldev(device); @@ -1481,7 +1489,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* Notify peer that I had a local IO error, and did not detached.. */ if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); /* Disks got bigger while they were detached */ if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && @@ -1499,14 +1507,14 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, /* sync target done with resync. Explicitly notify peer, even though * it should (at least for non-empty resyncs) already know itself. */ if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); /* Verify finished, or reached stop sector. Peer did not know about * the stop sector, and we may even have changed the stop sector during * verify to interrupt/stop early. Send the new state. */ if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED && verify_can_do_stop_sector(device)) - drbd_send_state(first_peer_device(device), ns); + drbd_send_state(peer_device, ns); /* This triggers bitmap writeout of potentially still unwritten pages * if the resync finished cleanly, or aborted because of peer disk @@ -1563,7 +1571,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) old_conf = connection->net_conf; connection->my_addr_len = 0; connection->peer_addr_len = 0; - rcu_assign_pointer(connection->net_conf, NULL); + RCU_INIT_POINTER(connection->net_conf, NULL); conn_free_crypto(connection); mutex_unlock(&connection->resource->conf_update); @@ -1599,7 +1607,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused) return 0; } -void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf) +static void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf) { enum chg_state_flags flags = ~0; struct drbd_peer_device *peer_device; @@ -1688,7 +1696,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma return rv; } -void +static void conn_set_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags) { diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d8f57b6..50776b3 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -67,13 +67,10 @@ rwlock_t global_state_lock; */ void drbd_md_io_complete(struct bio *bio, int error) { - struct drbd_md_io *md_io; struct drbd_device *device; - md_io = (struct drbd_md_io *)bio->bi_private; - device = container_of(md_io, struct drbd_device, md_io); - - md_io->error = error; + device = bio->bi_private; + device->md_io.error = error; /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able * to timeout on the lower level device, and eventually detach from it. @@ -87,7 +84,7 @@ void drbd_md_io_complete(struct bio *bio, int error) * ASSERT(atomic_read(&device->md_io_in_use) == 1) there. */ drbd_md_put_buffer(device); - md_io->done = 1; + device->md_io.done = 1; wake_up(&device->misc_wait); bio_put(bio); if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */ @@ -135,6 +132,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l i = peer_req->i; do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; block_id = peer_req->block_id; + peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO; spin_lock_irqsave(&device->resource->req_lock, flags); device->writ_cnt += peer_req->i.size >> 9; @@ -398,9 +396,6 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, if (!get_ldev(device)) return -EIO; - if (drbd_rs_should_slow_down(device, sector)) - goto defer; - /* GFP_TRY, because if there is no memory available right now, this may * be rescheduled for later. It is "only" background resync, after all. */ peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, @@ -410,7 +405,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, peer_req->w.cb = w_e_send_csum; spin_lock_irq(&device->resource->req_lock); - list_add(&peer_req->w.list, &device->read_ee); + list_add_tail(&peer_req->w.list, &device->read_ee); spin_unlock_irq(&device->resource->req_lock); atomic_add(size >> 9, &device->rs_sect_ev); @@ -452,9 +447,9 @@ void resync_timer_fn(unsigned long data) { struct drbd_device *device = (struct drbd_device *) data; - if (list_empty(&device->resync_work.list)) - drbd_queue_work(&first_peer_device(device)->connection->sender_work, - &device->resync_work); + drbd_queue_work_if_unqueued( + &first_peer_device(device)->connection->sender_work, + &device->resync_work); } static void fifo_set(struct fifo_buffer *fb, int value) @@ -504,9 +499,9 @@ struct fifo_buffer *fifo_alloc(int fifo_size) static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) { struct disk_conf *dc; - unsigned int want; /* The number of sectors we want in the proxy */ + unsigned int want; /* The number of sectors we want in-flight */ int req_sect; /* Number of sectors to request in this turn */ - int correction; /* Number of sectors more we need in the proxy*/ + int correction; /* Number of sectors more we need in-flight */ int cps; /* correction per invocation of drbd_rs_controller() */ int steps; /* Number of time steps to plan ahead */ int curr_corr; @@ -577,20 +572,27 @@ static int drbd_rs_number_requests(struct drbd_device *device) * potentially causing a distributed deadlock on congestion during * online-verify or (checksum-based) resync, if max-buffers, * socket buffer sizes and resync rate settings are mis-configured. */ - if (mxb - device->rs_in_flight < number) - number = mxb - device->rs_in_flight; + + /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k), + * mxb (as used here, and in drbd_alloc_pages on the peer) is + * "number of pages" (typically also 4k), + * but "rs_in_flight" is in "sectors" (512 Byte). */ + if (mxb - device->rs_in_flight/8 < number) + number = mxb - device->rs_in_flight/8; return number; } -static int make_resync_request(struct drbd_device *device, int cancel) +static int make_resync_request(struct drbd_device *const device, int cancel) { + struct drbd_peer_device *const peer_device = first_peer_device(device); + struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL; unsigned long bit; sector_t sector; const sector_t capacity = drbd_get_capacity(device->this_bdev); int max_bio_size; int number, rollback_i, size; - int align, queued, sndbuf; + int align, requeue = 0; int i = 0; if (unlikely(cancel)) @@ -617,17 +619,22 @@ static int make_resync_request(struct drbd_device *device, int cancel) goto requeue; for (i = 0; i < number; i++) { - /* Stop generating RS requests, when half of the send buffer is filled */ - mutex_lock(&first_peer_device(device)->connection->data.mutex); - if (first_peer_device(device)->connection->data.socket) { - queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued; - sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf; - } else { - queued = 1; - sndbuf = 0; - } - mutex_unlock(&first_peer_device(device)->connection->data.mutex); - if (queued > sndbuf / 2) + /* Stop generating RS requests when half of the send buffer is filled, + * but notify TCP that we'd like to have more space. */ + mutex_lock(&connection->data.mutex); + if (connection->data.socket) { + struct sock *sk = connection->data.socket->sk; + int queued = sk->sk_wmem_queued; + int sndbuf = sk->sk_sndbuf; + if (queued > sndbuf / 2) { + requeue = 1; + if (sk->sk_socket) + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + } + } else + requeue = 1; + mutex_unlock(&connection->data.mutex); + if (requeue) goto requeue; next_sector: @@ -642,8 +649,7 @@ next_sector: sector = BM_BIT_TO_SECT(bit); - if (drbd_rs_should_slow_down(device, sector) || - drbd_try_rs_begin_io(device, sector)) { + if (drbd_try_rs_begin_io(device, sector)) { device->bm_resync_fo = bit; goto requeue; } @@ -696,9 +702,9 @@ next_sector: /* adjust very last sectors, in case we are oddly sized */ if (sector + (size>>9) > capacity) size = (capacity-sector)<<9; - if (first_peer_device(device)->connection->agreed_pro_version >= 89 && - first_peer_device(device)->connection->csums_tfm) { - switch (read_for_csum(first_peer_device(device), sector, size)) { + + if (device->use_csums) { + switch (read_for_csum(peer_device, sector, size)) { case -EIO: /* Disk failure */ put_ldev(device); return -EIO; @@ -717,7 +723,7 @@ next_sector: int err; inc_rs_pending(device); - err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST, + err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST, sector, size, ID_SYNCER); if (err) { drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); @@ -774,8 +780,7 @@ static int make_ov_request(struct drbd_device *device, int cancel) size = BM_BLOCK_SIZE; - if (drbd_rs_should_slow_down(device, sector) || - drbd_try_rs_begin_io(device, sector)) { + if (drbd_try_rs_begin_io(device, sector)) { device->ov_position = sector; goto requeue; } @@ -911,7 +916,7 @@ int drbd_resync_finished(struct drbd_device *device) if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) khelper_cmd = "after-resync-target"; - if (first_peer_device(device)->connection->csums_tfm && device->rs_total) { + if (device->use_csums && device->rs_total) { const unsigned long s = device->rs_same_csum; const unsigned long t = device->rs_total; const int ratio = @@ -1351,13 +1356,15 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_device *device = req->device; - struct drbd_connection *connection = first_peer_device(device)->connection; + struct drbd_peer_device *const peer_device = first_peer_device(device); + struct drbd_connection *const connection = peer_device->connection; int err; if (unlikely(cancel)) { req_mod(req, SEND_CANCELED); return 0; } + req->pre_send_jif = jiffies; /* this time, no connection->send.current_epoch_writes++; * If it was sent, it was the closing barrier for the last @@ -1365,7 +1372,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel) * No more barriers will be sent, until we leave AHEAD mode again. */ maybe_send_barrier(connection, req->epoch); - err = drbd_send_out_of_sync(first_peer_device(device), req); + err = drbd_send_out_of_sync(peer_device, req); req_mod(req, OOS_HANDED_TO_NETWORK); return err; @@ -1380,19 +1387,21 @@ int w_send_dblock(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_device *device = req->device; - struct drbd_connection *connection = first_peer_device(device)->connection; + struct drbd_peer_device *const peer_device = first_peer_device(device); + struct drbd_connection *connection = peer_device->connection; int err; if (unlikely(cancel)) { req_mod(req, SEND_CANCELED); return 0; } + req->pre_send_jif = jiffies; re_init_if_first_write(connection, req->epoch); maybe_send_barrier(connection, req->epoch); connection->send.current_epoch_writes++; - err = drbd_send_dblock(first_peer_device(device), req); + err = drbd_send_dblock(peer_device, req); req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); return err; @@ -1407,19 +1416,21 @@ int w_send_read_req(struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); struct drbd_device *device = req->device; - struct drbd_connection *connection = first_peer_device(device)->connection; + struct drbd_peer_device *const peer_device = first_peer_device(device); + struct drbd_connection *connection = peer_device->connection; int err; if (unlikely(cancel)) { req_mod(req, SEND_CANCELED); return 0; } + req->pre_send_jif = jiffies; /* Even read requests may close a write epoch, * if there was any yet. */ maybe_send_barrier(connection, req->epoch); - err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size, + err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size, (unsigned long)req); req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); @@ -1433,7 +1444,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) struct drbd_device *device = req->device; if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) - drbd_al_begin_io(device, &req->i, false); + drbd_al_begin_io(device, &req->i); drbd_req_make_private_bio(req, req->master_bio); req->private_bio->bi_bdev = device->ldev->backing_bdev; @@ -1601,26 +1612,32 @@ void drbd_rs_controller_reset(struct drbd_device *device) void start_resync_timer_fn(unsigned long data) { struct drbd_device *device = (struct drbd_device *) data; - - drbd_queue_work(&first_peer_device(device)->connection->sender_work, - &device->start_resync_work); + drbd_device_post_work(device, RS_START); } -int w_start_resync(struct drbd_work *w, int cancel) +static void do_start_resync(struct drbd_device *device) { - struct drbd_device *device = - container_of(w, struct drbd_device, start_resync_work); - if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) { - drbd_warn(device, "w_start_resync later...\n"); + drbd_warn(device, "postponing start_resync ...\n"); device->start_resync_timer.expires = jiffies + HZ/10; add_timer(&device->start_resync_timer); - return 0; + return; } drbd_start_resync(device, C_SYNC_SOURCE); clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags); - return 0; +} + +static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device) +{ + bool csums_after_crash_only; + rcu_read_lock(); + csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only; + rcu_read_unlock(); + return connection->agreed_pro_version >= 89 && /* supported? */ + connection->csums_tfm && /* configured? */ + (csums_after_crash_only == 0 /* use for each resync? */ + || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */ } /** @@ -1633,6 +1650,8 @@ int w_start_resync(struct drbd_work *w, int cancel) */ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) { + struct drbd_peer_device *peer_device = first_peer_device(device); + struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; union drbd_state ns; int r; @@ -1651,7 +1670,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) if (r > 0) { drbd_info(device, "before-resync-target handler returned %d, " "dropping connection.\n", r); - conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD); + conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); return; } } else /* C_SYNC_SOURCE */ { @@ -1664,7 +1683,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) } else { drbd_info(device, "before-resync-source handler returned %d, " "dropping connection.\n", r); - conn_request_state(first_peer_device(device)->connection, + conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD); return; } @@ -1672,7 +1691,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) } } - if (current == first_peer_device(device)->connection->worker.task) { + if (current == connection->worker.task) { /* The worker should not sleep waiting for state_mutex, that can take long */ if (!mutex_trylock(device->state_mutex)) { @@ -1733,11 +1752,20 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) device->rs_mark_time[i] = now; } _drbd_pause_after(device); + /* Forget potentially stale cached per resync extent bit-counts. + * Open coded drbd_rs_cancel_all(device), we already have IRQs + * disabled, and know the disk state is ok. */ + spin_lock(&device->al_lock); + lc_reset(device->resync); + device->resync_locked = 0; + device->resync_wenr = LC_FREE; + spin_unlock(&device->al_lock); } write_unlock(&global_state_lock); spin_unlock_irq(&device->resource->req_lock); if (r == SS_SUCCESS) { + wake_up(&device->al_wait); /* for lc_reset() above */ /* reset rs_last_bcast when a resync or verify is started, * to deal with potential jiffies wrap. */ device->rs_last_bcast = jiffies - HZ; @@ -1746,8 +1774,12 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) drbd_conn_str(ns.conn), (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), (unsigned long) device->rs_total); - if (side == C_SYNC_TARGET) + if (side == C_SYNC_TARGET) { device->bm_resync_fo = 0; + device->use_csums = use_checksum_based_resync(connection, device); + } else { + device->use_csums = 0; + } /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid * with w_send_oos, or the sync target will get confused as to @@ -1756,12 +1788,10 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) * drbd_resync_finished from here in that case. * We drbd_gen_and_send_sync_uuid here for protocol < 96, * and from after_state_ch otherwise. */ - if (side == C_SYNC_SOURCE && - first_peer_device(device)->connection->agreed_pro_version < 96) - drbd_gen_and_send_sync_uuid(first_peer_device(device)); + if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96) + drbd_gen_and_send_sync_uuid(peer_device); - if (first_peer_device(device)->connection->agreed_pro_version < 95 && - device->rs_total == 0) { + if (connection->agreed_pro_version < 95 && device->rs_total == 0) { /* This still has a race (about when exactly the peers * detect connection loss) that can lead to a full sync * on next handshake. In 8.3.9 we fixed this with explicit @@ -1777,7 +1807,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) int timeo; rcu_read_lock(); - nc = rcu_dereference(first_peer_device(device)->connection->net_conf); + nc = rcu_dereference(connection->net_conf); timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; rcu_read_unlock(); schedule_timeout_interruptible(timeo); @@ -1799,10 +1829,165 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) mutex_unlock(device->state_mutex); } +static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done) +{ + struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; + device->rs_last_bcast = jiffies; + + if (!get_ldev(device)) + return; + + drbd_bm_write_lazy(device, 0); + if (resync_done && is_sync_state(device->state.conn)) + drbd_resync_finished(device); + + drbd_bcast_event(device, &sib); + /* update timestamp, in case it took a while to write out stuff */ + device->rs_last_bcast = jiffies; + put_ldev(device); +} + +static void drbd_ldev_destroy(struct drbd_device *device) +{ + lc_destroy(device->resync); + device->resync = NULL; + lc_destroy(device->act_log); + device->act_log = NULL; + __no_warn(local, + drbd_free_ldev(device->ldev); + device->ldev = NULL;); + clear_bit(GOING_DISKLESS, &device->flags); + wake_up(&device->misc_wait); +} + +static void go_diskless(struct drbd_device *device) +{ + D_ASSERT(device, device->state.disk == D_FAILED); + /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will + * inc/dec it frequently. Once we are D_DISKLESS, no one will touch + * the protected members anymore, though, so once put_ldev reaches zero + * again, it will be safe to free them. */ + + /* Try to write changed bitmap pages, read errors may have just + * set some bits outside the area covered by the activity log. + * + * If we have an IO error during the bitmap writeout, + * we will want a full sync next time, just in case. + * (Do we want a specific meta data flag for this?) + * + * If that does not make it to stable storage either, + * we cannot do anything about that anymore. + * + * We still need to check if both bitmap and ldev are present, we may + * end up here after a failed attach, before ldev was even assigned. + */ + if (device->bitmap && device->ldev) { + /* An interrupted resync or similar is allowed to recounts bits + * while we detach. + * Any modifications would not be expected anymore, though. + */ + if (drbd_bitmap_io_from_worker(device, drbd_bm_write, + "detach", BM_LOCKED_TEST_ALLOWED)) { + if (test_bit(WAS_READ_ERROR, &device->flags)) { + drbd_md_set_flag(device, MDF_FULL_SYNC); + drbd_md_sync(device); + } + } + } + + drbd_force_state(device, NS(disk, D_DISKLESS)); +} + +static int do_md_sync(struct drbd_device *device) +{ + drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n"); + drbd_md_sync(device); + return 0; +} + +/* only called from drbd_worker thread, no locking */ +void __update_timing_details( + struct drbd_thread_timing_details *tdp, + unsigned int *cb_nr, + void *cb, + const char *fn, const unsigned int line) +{ + unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST; + struct drbd_thread_timing_details *td = tdp + i; + + td->start_jif = jiffies; + td->cb_addr = cb; + td->caller_fn = fn; + td->line = line; + td->cb_nr = *cb_nr; + + i = (i+1) % DRBD_THREAD_DETAILS_HIST; + td = tdp + i; + memset(td, 0, sizeof(*td)); + + ++(*cb_nr); +} + +#define WORK_PENDING(work_bit, todo) (todo & (1UL << work_bit)) +static void do_device_work(struct drbd_device *device, const unsigned long todo) +{ + if (WORK_PENDING(MD_SYNC, todo)) + do_md_sync(device); + if (WORK_PENDING(RS_DONE, todo) || + WORK_PENDING(RS_PROGRESS, todo)) + update_on_disk_bitmap(device, WORK_PENDING(RS_DONE, todo)); + if (WORK_PENDING(GO_DISKLESS, todo)) + go_diskless(device); + if (WORK_PENDING(DESTROY_DISK, todo)) + drbd_ldev_destroy(device); + if (WORK_PENDING(RS_START, todo)) + do_start_resync(device); +} + +#define DRBD_DEVICE_WORK_MASK \ + ((1UL << GO_DISKLESS) \ + |(1UL << DESTROY_DISK) \ + |(1UL << MD_SYNC) \ + |(1UL << RS_START) \ + |(1UL << RS_PROGRESS) \ + |(1UL << RS_DONE) \ + ) + +static unsigned long get_work_bits(unsigned long *flags) +{ + unsigned long old, new; + do { + old = *flags; + new = old & ~DRBD_DEVICE_WORK_MASK; + } while (cmpxchg(flags, old, new) != old); + return old & DRBD_DEVICE_WORK_MASK; +} + +static void do_unqueued_work(struct drbd_connection *connection) +{ + struct drbd_peer_device *peer_device; + int vnr; + + rcu_read_lock(); + idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { + struct drbd_device *device = peer_device->device; + unsigned long todo = get_work_bits(&device->flags); + if (!todo) + continue; + + kref_get(&device->kref); + rcu_read_unlock(); + do_device_work(device, todo); + kref_put(&device->kref, drbd_destroy_device); + rcu_read_lock(); + } + rcu_read_unlock(); +} + static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) { spin_lock_irq(&queue->q_lock); - list_splice_init(&queue->q, work_list); + list_splice_tail_init(&queue->q, work_list); spin_unlock_irq(&queue->q_lock); return !list_empty(work_list); } @@ -1851,7 +2036,7 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head * /* dequeue single item only, * we still use drbd_queue_work_front() in some places */ if (!list_empty(&connection->sender_work.q)) - list_move(connection->sender_work.q.next, work_list); + list_splice_tail_init(&connection->sender_work.q, work_list); spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ if (!list_empty(work_list) || signal_pending(current)) { spin_unlock_irq(&connection->resource->req_lock); @@ -1873,6 +2058,14 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head * if (send_barrier) maybe_send_barrier(connection, connection->send.current_epoch_nr + 1); + + if (test_bit(DEVICE_WORK_PENDING, &connection->flags)) + break; + + /* drbd_send() may have called flush_signals() */ + if (get_t_state(&connection->worker) != RUNNING) + break; + schedule(); /* may be woken up for other things but new work, too, * e.g. if the current epoch got closed. @@ -1906,10 +2099,15 @@ int drbd_worker(struct drbd_thread *thi) while (get_t_state(thi) == RUNNING) { drbd_thread_current_set_cpu(thi); - /* as long as we use drbd_queue_work_front(), - * we may only dequeue single work items here, not batches. */ - if (list_empty(&work_list)) + if (list_empty(&work_list)) { + update_worker_timing_details(connection, wait_for_work); wait_for_work(connection, &work_list); + } + + if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) { + update_worker_timing_details(connection, do_unqueued_work); + do_unqueued_work(connection); + } if (signal_pending(current)) { flush_signals(current); @@ -1926,6 +2124,7 @@ int drbd_worker(struct drbd_thread *thi) while (!list_empty(&work_list)) { w = list_first_entry(&work_list, struct drbd_work, list); list_del_init(&w->list); + update_worker_timing_details(connection, w->cb); if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0) continue; if (connection->cstate >= C_WF_REPORT_PARAMS) @@ -1934,13 +2133,18 @@ int drbd_worker(struct drbd_thread *thi) } do { + if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) { + update_worker_timing_details(connection, do_unqueued_work); + do_unqueued_work(connection); + } while (!list_empty(&work_list)) { w = list_first_entry(&work_list, struct drbd_work, list); list_del_init(&w->list); + update_worker_timing_details(connection, w->cb); w->cb(w, 1); } dequeue_work_batch(&connection->sender_work, &work_list); - } while (!list_empty(&work_list)); + } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags)); rcu_read_lock(); idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b2c98c1..623c841 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -42,6 +42,7 @@ #include <linux/blkdev.h> #include <linux/slab.h> #include <linux/idr.h> +#include <linux/workqueue.h> #include "rbd_types.h" @@ -332,7 +333,10 @@ struct rbd_device { char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ + struct list_head rq_queue; /* incoming rq queue */ spinlock_t lock; /* queue, flags, open_count */ + struct workqueue_struct *rq_wq; + struct work_struct rq_work; struct rbd_image_header header; unsigned long flags; /* possibly lock protected */ @@ -514,7 +518,8 @@ static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); static int rbd_dev_refresh(struct rbd_device *rbd_dev); static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); -static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev); +static int rbd_dev_header_info(struct rbd_device *rbd_dev); +static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev); static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u64 snap_id); static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, @@ -971,12 +976,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev, header->snap_names = snap_names; header->snap_sizes = snap_sizes; - /* Make sure mapping size is consistent with header info */ - - if (rbd_dev->spec->snap_id == CEPH_NOSNAP || first_time) - if (rbd_dev->mapping.size != header->image_size) - rbd_dev->mapping.size = header->image_size; - return 0; out_2big: ret = -EIO; @@ -1139,6 +1138,13 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev) rbd_dev->mapping.features = 0; } +static void rbd_segment_name_free(const char *name) +{ + /* The explicit cast here is needed to drop the const qualifier */ + + kmem_cache_free(rbd_segment_name_cache, (void *)name); +} + static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) { char *name; @@ -1158,20 +1164,13 @@ static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) if (ret < 0 || ret > CEPH_MAX_OID_NAME_LEN) { pr_err("error formatting segment name for #%llu (%d)\n", segment, ret); - kfree(name); + rbd_segment_name_free(name); name = NULL; } return name; } -static void rbd_segment_name_free(const char *name) -{ - /* The explicit cast here is needed to drop the const qualifier */ - - kmem_cache_free(rbd_segment_name_cache, (void *)name); -} - static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset) { u64 segment_size = (u64) 1 << rbd_dev->header.obj_order; @@ -1371,7 +1370,7 @@ static void obj_request_img_data_set(struct rbd_obj_request *obj_request) struct rbd_device *rbd_dev; rbd_dev = obj_request->img_request->rbd_dev; - rbd_warn(rbd_dev, "obj_request %p already marked img_data\n", + rbd_warn(rbd_dev, "obj_request %p already marked img_data", obj_request); } } @@ -1389,7 +1388,7 @@ static void obj_request_done_set(struct rbd_obj_request *obj_request) if (obj_request_img_data_test(obj_request)) rbd_dev = obj_request->img_request->rbd_dev; - rbd_warn(rbd_dev, "obj_request %p already marked done\n", + rbd_warn(rbd_dev, "obj_request %p already marked done", obj_request); } } @@ -1527,11 +1526,37 @@ static bool obj_request_type_valid(enum obj_request_type type) static int rbd_obj_request_submit(struct ceph_osd_client *osdc, struct rbd_obj_request *obj_request) { - dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request); - + dout("%s %p\n", __func__, obj_request); return ceph_osdc_start_request(osdc, obj_request->osd_req, false); } +static void rbd_obj_request_end(struct rbd_obj_request *obj_request) +{ + dout("%s %p\n", __func__, obj_request); + ceph_osdc_cancel_request(obj_request->osd_req); +} + +/* + * Wait for an object request to complete. If interrupted, cancel the + * underlying osd request. + */ +static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) +{ + int ret; + + dout("%s %p\n", __func__, obj_request); + + ret = wait_for_completion_interruptible(&obj_request->completion); + if (ret < 0) { + dout("%s %p interrupted\n", __func__, obj_request); + rbd_obj_request_end(obj_request); + return ret; + } + + dout("%s %p done\n", __func__, obj_request); + return 0; +} + static void rbd_img_request_complete(struct rbd_img_request *img_request) { @@ -1558,15 +1583,6 @@ static void rbd_img_request_complete(struct rbd_img_request *img_request) rbd_img_request_put(img_request); } -/* Caller is responsible for rbd_obj_request_destroy(obj_request) */ - -static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) -{ - dout("%s: obj %p\n", __func__, obj_request); - - return wait_for_completion_interruptible(&obj_request->completion); -} - /* * The default/initial value for all image request flags is 0. Each * is conditionally set to 1 at image request initialization time @@ -1763,7 +1779,7 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, rbd_osd_trivial_callback(obj_request); break; default: - rbd_warn(NULL, "%s: unsupported op %hu\n", + rbd_warn(NULL, "%s: unsupported op %hu", obj_request->object_name, (unsigned short) opcode); break; } @@ -1998,7 +2014,7 @@ static void rbd_dev_parent_put(struct rbd_device *rbd_dev) if (!counter) rbd_dev_unparent(rbd_dev); else - rbd_warn(rbd_dev, "parent reference underflow\n"); + rbd_warn(rbd_dev, "parent reference underflow"); } /* @@ -2028,7 +2044,7 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev) /* Image was flattened, but parent is not yet torn down */ if (counter < 0) - rbd_warn(rbd_dev, "parent reference overflow\n"); + rbd_warn(rbd_dev, "parent reference overflow"); return false; } @@ -2045,7 +2061,7 @@ static struct rbd_img_request *rbd_img_request_create( { struct rbd_img_request *img_request; - img_request = kmem_cache_alloc(rbd_img_request_cache, GFP_ATOMIC); + img_request = kmem_cache_alloc(rbd_img_request_cache, GFP_NOIO); if (!img_request) return NULL; @@ -2161,11 +2177,11 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) if (result) { struct rbd_device *rbd_dev = img_request->rbd_dev; - rbd_warn(rbd_dev, "%s %llx at %llx (%llx)\n", + rbd_warn(rbd_dev, "%s %llx at %llx (%llx)", img_request_write_test(img_request) ? "write" : "read", obj_request->length, obj_request->img_offset, obj_request->offset); - rbd_warn(rbd_dev, " result %d xferred %x\n", + rbd_warn(rbd_dev, " result %d xferred %x", result, xferred); if (!img_request->result) img_request->result = result; @@ -2946,154 +2962,135 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, rbd_dev->header_name, (unsigned long long)notify_id, (unsigned int)opcode); + + /* + * Until adequate refresh error handling is in place, there is + * not much we can do here, except warn. + * + * See http://tracker.ceph.com/issues/5040 + */ ret = rbd_dev_refresh(rbd_dev); if (ret) - rbd_warn(rbd_dev, "header refresh error (%d)\n", ret); + rbd_warn(rbd_dev, "refresh failed: %d", ret); - rbd_obj_notify_ack_sync(rbd_dev, notify_id); + ret = rbd_obj_notify_ack_sync(rbd_dev, notify_id); + if (ret) + rbd_warn(rbd_dev, "notify_ack ret %d", ret); } /* - * Initiate a watch request, synchronously. + * Send a (un)watch request and wait for the ack. Return a request + * with a ref held on success or error. */ -static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev) +static struct rbd_obj_request *rbd_obj_watch_request_helper( + struct rbd_device *rbd_dev, + bool watch) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_obj_request *obj_request; int ret; - rbd_assert(!rbd_dev->watch_event); - rbd_assert(!rbd_dev->watch_request); - - ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, - &rbd_dev->watch_event); - if (ret < 0) - return ret; - - rbd_assert(rbd_dev->watch_event); - obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, OBJ_REQUEST_NODATA); - if (!obj_request) { - ret = -ENOMEM; - goto out_cancel; - } + if (!obj_request) + return ERR_PTR(-ENOMEM); obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1, obj_request); if (!obj_request->osd_req) { ret = -ENOMEM; - goto out_put; + goto out; } - ceph_osdc_set_request_linger(osdc, obj_request->osd_req); - osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, - rbd_dev->watch_event->cookie, 0, 1); + rbd_dev->watch_event->cookie, 0, watch); rbd_osd_req_format_write(obj_request); + if (watch) + ceph_osdc_set_request_linger(osdc, obj_request->osd_req); + ret = rbd_obj_request_submit(osdc, obj_request); if (ret) - goto out_linger; + goto out; ret = rbd_obj_request_wait(obj_request); if (ret) - goto out_linger; + goto out; ret = obj_request->result; - if (ret) - goto out_linger; - - /* - * A watch request is set to linger, so the underlying osd - * request won't go away until we unregister it. We retain - * a pointer to the object request during that time (in - * rbd_dev->watch_request), so we'll keep a reference to - * it. We'll drop that reference (below) after we've - * unregistered it. - */ - rbd_dev->watch_request = obj_request; + if (ret) { + if (watch) + rbd_obj_request_end(obj_request); + goto out; + } - return 0; + return obj_request; -out_linger: - ceph_osdc_unregister_linger_request(osdc, obj_request->osd_req); -out_put: +out: rbd_obj_request_put(obj_request); -out_cancel: - ceph_osdc_cancel_event(rbd_dev->watch_event); - rbd_dev->watch_event = NULL; - - return ret; + return ERR_PTR(ret); } /* - * Tear down a watch request, synchronously. + * Initiate a watch request, synchronously. */ -static int __rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev) +static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_obj_request *obj_request; int ret; - rbd_assert(rbd_dev->watch_event); - rbd_assert(rbd_dev->watch_request); + rbd_assert(!rbd_dev->watch_event); + rbd_assert(!rbd_dev->watch_request); - obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, - OBJ_REQUEST_NODATA); - if (!obj_request) { - ret = -ENOMEM; - goto out_cancel; - } + ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, + &rbd_dev->watch_event); + if (ret < 0) + return ret; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1, - obj_request); - if (!obj_request->osd_req) { - ret = -ENOMEM; - goto out_put; + obj_request = rbd_obj_watch_request_helper(rbd_dev, true); + if (IS_ERR(obj_request)) { + ceph_osdc_cancel_event(rbd_dev->watch_event); + rbd_dev->watch_event = NULL; + return PTR_ERR(obj_request); } - osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, - rbd_dev->watch_event->cookie, 0, 0); - rbd_osd_req_format_write(obj_request); - - ret = rbd_obj_request_submit(osdc, obj_request); - if (ret) - goto out_put; + /* + * A watch request is set to linger, so the underlying osd + * request won't go away until we unregister it. We retain + * a pointer to the object request during that time (in + * rbd_dev->watch_request), so we'll keep a reference to it. + * We'll drop that reference after we've unregistered it in + * rbd_dev_header_unwatch_sync(). + */ + rbd_dev->watch_request = obj_request; - ret = rbd_obj_request_wait(obj_request); - if (ret) - goto out_put; + return 0; +} - ret = obj_request->result; - if (ret) - goto out_put; +/* + * Tear down a watch request, synchronously. + */ +static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev) +{ + struct rbd_obj_request *obj_request; - /* We have successfully torn down the watch request */ + rbd_assert(rbd_dev->watch_event); + rbd_assert(rbd_dev->watch_request); - ceph_osdc_unregister_linger_request(osdc, - rbd_dev->watch_request->osd_req); + rbd_obj_request_end(rbd_dev->watch_request); rbd_obj_request_put(rbd_dev->watch_request); rbd_dev->watch_request = NULL; -out_put: - rbd_obj_request_put(obj_request); -out_cancel: + obj_request = rbd_obj_watch_request_helper(rbd_dev, false); + if (!IS_ERR(obj_request)) + rbd_obj_request_put(obj_request); + else + rbd_warn(rbd_dev, "unable to tear down watch request (%ld)", + PTR_ERR(obj_request)); + ceph_osdc_cancel_event(rbd_dev->watch_event); rbd_dev->watch_event = NULL; - - return ret; -} - -static void rbd_dev_header_unwatch_sync(struct rbd_device *rbd_dev) -{ - int ret; - - ret = __rbd_dev_header_unwatch_sync(rbd_dev); - if (ret) { - rbd_warn(rbd_dev, "unable to tear down watch request: %d\n", - ret); - } } /* @@ -3183,102 +3180,129 @@ out: return ret; } -static void rbd_request_fn(struct request_queue *q) - __releases(q->queue_lock) __acquires(q->queue_lock) +static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) { - struct rbd_device *rbd_dev = q->queuedata; - struct request *rq; + struct rbd_img_request *img_request; + u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; + u64 length = blk_rq_bytes(rq); + bool wr = rq_data_dir(rq) == WRITE; int result; - while ((rq = blk_fetch_request(q))) { - bool write_request = rq_data_dir(rq) == WRITE; - struct rbd_img_request *img_request; - u64 offset; - u64 length; + /* Ignore/skip any zero-length requests */ - /* Ignore any non-FS requests that filter through. */ + if (!length) { + dout("%s: zero-length request\n", __func__); + result = 0; + goto err_rq; + } - if (rq->cmd_type != REQ_TYPE_FS) { - dout("%s: non-fs request type %d\n", __func__, - (int) rq->cmd_type); - __blk_end_request_all(rq, 0); - continue; + /* Disallow writes to a read-only device */ + + if (wr) { + if (rbd_dev->mapping.read_only) { + result = -EROFS; + goto err_rq; } + rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); + } - /* Ignore/skip any zero-length requests */ + /* + * Quit early if the mapped snapshot no longer exists. It's + * still possible the snapshot will have disappeared by the + * time our request arrives at the osd, but there's no sense in + * sending it if we already know. + */ + if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) { + dout("request for non-existent snapshot"); + rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); + result = -ENXIO; + goto err_rq; + } - offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT; - length = (u64) blk_rq_bytes(rq); + if (offset && length > U64_MAX - offset + 1) { + rbd_warn(rbd_dev, "bad request range (%llu~%llu)", offset, + length); + result = -EINVAL; + goto err_rq; /* Shouldn't happen */ + } - if (!length) { - dout("%s: zero-length request\n", __func__); - __blk_end_request_all(rq, 0); - continue; - } + if (offset + length > rbd_dev->mapping.size) { + rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)", offset, + length, rbd_dev->mapping.size); + result = -EIO; + goto err_rq; + } - spin_unlock_irq(q->queue_lock); + img_request = rbd_img_request_create(rbd_dev, offset, length, wr); + if (!img_request) { + result = -ENOMEM; + goto err_rq; + } + img_request->rq = rq; - /* Disallow writes to a read-only device */ + result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, rq->bio); + if (result) + goto err_img_request; - if (write_request) { - result = -EROFS; - if (rbd_dev->mapping.read_only) - goto end_request; - rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); - } + result = rbd_img_request_submit(img_request); + if (result) + goto err_img_request; - /* - * Quit early if the mapped snapshot no longer - * exists. It's still possible the snapshot will - * have disappeared by the time our request arrives - * at the osd, but there's no sense in sending it if - * we already know. - */ - if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) { - dout("request for non-existent snapshot"); - rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); - result = -ENXIO; - goto end_request; - } + return; - result = -EINVAL; - if (offset && length > U64_MAX - offset + 1) { - rbd_warn(rbd_dev, "bad request range (%llu~%llu)\n", - offset, length); - goto end_request; /* Shouldn't happen */ - } +err_img_request: + rbd_img_request_put(img_request); +err_rq: + if (result) + rbd_warn(rbd_dev, "%s %llx at %llx result %d", + wr ? "write" : "read", length, offset, result); + blk_end_request_all(rq, result); +} - result = -EIO; - if (offset + length > rbd_dev->mapping.size) { - rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)\n", - offset, length, rbd_dev->mapping.size); - goto end_request; - } +static void rbd_request_workfn(struct work_struct *work) +{ + struct rbd_device *rbd_dev = + container_of(work, struct rbd_device, rq_work); + struct request *rq, *next; + LIST_HEAD(requests); - result = -ENOMEM; - img_request = rbd_img_request_create(rbd_dev, offset, length, - write_request); - if (!img_request) - goto end_request; + spin_lock_irq(&rbd_dev->lock); /* rq->q->queue_lock */ + list_splice_init(&rbd_dev->rq_queue, &requests); + spin_unlock_irq(&rbd_dev->lock); - img_request->rq = rq; + list_for_each_entry_safe(rq, next, &requests, queuelist) { + list_del_init(&rq->queuelist); + rbd_handle_request(rbd_dev, rq); + } +} - result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, - rq->bio); - if (!result) - result = rbd_img_request_submit(img_request); - if (result) - rbd_img_request_put(img_request); -end_request: - spin_lock_irq(q->queue_lock); - if (result < 0) { - rbd_warn(rbd_dev, "%s %llx at %llx result %d\n", - write_request ? "write" : "read", - length, offset, result); - - __blk_end_request_all(rq, result); +/* + * Called with q->queue_lock held and interrupts disabled, possibly on + * the way to schedule(). Do not sleep here! + */ +static void rbd_request_fn(struct request_queue *q) +{ + struct rbd_device *rbd_dev = q->queuedata; + struct request *rq; + int queued = 0; + + rbd_assert(rbd_dev); + + while ((rq = blk_fetch_request(q))) { + /* Ignore any non-FS requests that filter through. */ + if (rq->cmd_type != REQ_TYPE_FS) { + dout("%s: non-fs request type %d\n", __func__, + (int) rq->cmd_type); + __blk_end_request_all(rq, 0); + continue; } + + list_add_tail(&rq->queuelist, &rbd_dev->rq_queue); + queued++; } + + if (queued) + queue_work(rbd_dev->rq_wq, &rbd_dev->rq_work); } /* @@ -3517,24 +3541,37 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) u64 mapping_size; int ret; - rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); down_write(&rbd_dev->header_rwsem); mapping_size = rbd_dev->mapping.size; - if (rbd_dev->image_format == 1) - ret = rbd_dev_v1_header_info(rbd_dev); - else - ret = rbd_dev_v2_header_info(rbd_dev); - /* If it's a mapped snapshot, validate its EXISTS flag */ + ret = rbd_dev_header_info(rbd_dev); + if (ret) + return ret; + + /* + * If there is a parent, see if it has disappeared due to the + * mapped image getting flattened. + */ + if (rbd_dev->parent) { + ret = rbd_dev_v2_parent_info(rbd_dev); + if (ret) + return ret; + } + + if (rbd_dev->spec->snap_id == CEPH_NOSNAP) { + if (rbd_dev->mapping.size != rbd_dev->header.image_size) + rbd_dev->mapping.size = rbd_dev->header.image_size; + } else { + /* validate mapped snapshot's EXISTS flag */ + rbd_exists_validate(rbd_dev); + } - rbd_exists_validate(rbd_dev); up_write(&rbd_dev->header_rwsem); - if (mapping_size != rbd_dev->mapping.size) { + if (mapping_size != rbd_dev->mapping.size) rbd_dev_update_size(rbd_dev); - } - return ret; + return 0; } static int rbd_init_disk(struct rbd_device *rbd_dev) @@ -3696,46 +3733,36 @@ static ssize_t rbd_snap_show(struct device *dev, } /* - * For an rbd v2 image, shows the pool id, image id, and snapshot id - * for the parent image. If there is no parent, simply shows - * "(no parent image)". + * For a v2 image, shows the chain of parent images, separated by empty + * lines. For v1 images or if there is no parent, shows "(no parent + * image)". */ static ssize_t rbd_parent_show(struct device *dev, - struct device_attribute *attr, - char *buf) + struct device_attribute *attr, + char *buf) { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - struct rbd_spec *spec = rbd_dev->parent_spec; - int count; - char *bufp = buf; + ssize_t count = 0; - if (!spec) + if (!rbd_dev->parent) return sprintf(buf, "(no parent image)\n"); - count = sprintf(bufp, "pool_id %llu\npool_name %s\n", - (unsigned long long) spec->pool_id, spec->pool_name); - if (count < 0) - return count; - bufp += count; - - count = sprintf(bufp, "image_id %s\nimage_name %s\n", spec->image_id, - spec->image_name ? spec->image_name : "(unknown)"); - if (count < 0) - return count; - bufp += count; - - count = sprintf(bufp, "snap_id %llu\nsnap_name %s\n", - (unsigned long long) spec->snap_id, spec->snap_name); - if (count < 0) - return count; - bufp += count; - - count = sprintf(bufp, "overlap %llu\n", rbd_dev->parent_overlap); - if (count < 0) - return count; - bufp += count; + for ( ; rbd_dev->parent; rbd_dev = rbd_dev->parent) { + struct rbd_spec *spec = rbd_dev->parent_spec; + + count += sprintf(&buf[count], "%s" + "pool_id %llu\npool_name %s\n" + "image_id %s\nimage_name %s\n" + "snap_id %llu\nsnap_name %s\n" + "overlap %llu\n", + !count ? "" : "\n", /* first? */ + spec->pool_id, spec->pool_name, + spec->image_id, spec->image_name ?: "(unknown)", + spec->snap_id, spec->snap_name, + rbd_dev->parent_overlap); + } - return (ssize_t) (bufp - buf); + return count; } static ssize_t rbd_image_refresh(struct device *dev, @@ -3748,9 +3775,9 @@ static ssize_t rbd_image_refresh(struct device *dev, ret = rbd_dev_refresh(rbd_dev); if (ret) - rbd_warn(rbd_dev, ": manual header refresh error (%d)\n", ret); + return ret; - return ret < 0 ? ret : size; + return size; } static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL); @@ -3822,6 +3849,9 @@ static struct rbd_spec *rbd_spec_alloc(void) spec = kzalloc(sizeof (*spec), GFP_KERNEL); if (!spec) return NULL; + + spec->pool_id = CEPH_NOPOOL; + spec->snap_id = CEPH_NOSNAP; kref_init(&spec->kref); return spec; @@ -3848,6 +3878,8 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, return NULL; spin_lock_init(&rbd_dev->lock); + INIT_LIST_HEAD(&rbd_dev->rq_queue); + INIT_WORK(&rbd_dev->rq_work, rbd_request_workfn); rbd_dev->flags = 0; atomic_set(&rbd_dev->parent_ref, 0); INIT_LIST_HEAD(&rbd_dev->node); @@ -4021,7 +4053,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) goto out_err; } - snapid = cpu_to_le64(CEPH_NOSNAP); + snapid = cpu_to_le64(rbd_dev->spec->snap_id); ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_parent", &snapid, sizeof (snapid), @@ -4059,7 +4091,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) ret = -EIO; if (pool_id > (u64)U32_MAX) { - rbd_warn(NULL, "parent pool id too large (%llu > %u)\n", + rbd_warn(NULL, "parent pool id too large (%llu > %u)", (unsigned long long)pool_id, U32_MAX); goto out_err; } @@ -4083,6 +4115,8 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) parent_spec->snap_id = snap_id; rbd_dev->parent_spec = parent_spec; parent_spec = NULL; /* rbd_dev now owns this */ + } else { + kfree(image_id); } /* @@ -4110,8 +4144,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) * overlap is zero we just pretend there was * no parent image. */ - rbd_warn(rbd_dev, "ignoring parent of " - "clone with overlap 0\n"); + rbd_warn(rbd_dev, "ignoring parent with overlap 0"); } } out: @@ -4279,18 +4312,38 @@ static u64 rbd_snap_id_by_name(struct rbd_device *rbd_dev, const char *name) } /* - * When an rbd image has a parent image, it is identified by the - * pool, image, and snapshot ids (not names). This function fills - * in the names for those ids. (It's OK if we can't figure out the - * name for an image id, but the pool and snapshot ids should always - * exist and have names.) All names in an rbd spec are dynamically - * allocated. + * An image being mapped will have everything but the snap id. + */ +static int rbd_spec_fill_snap_id(struct rbd_device *rbd_dev) +{ + struct rbd_spec *spec = rbd_dev->spec; + + rbd_assert(spec->pool_id != CEPH_NOPOOL && spec->pool_name); + rbd_assert(spec->image_id && spec->image_name); + rbd_assert(spec->snap_name); + + if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) { + u64 snap_id; + + snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name); + if (snap_id == CEPH_NOSNAP) + return -ENOENT; + + spec->snap_id = snap_id; + } else { + spec->snap_id = CEPH_NOSNAP; + } + + return 0; +} + +/* + * A parent image will have all ids but none of the names. * - * When an image being mapped (not a parent) is probed, we have the - * pool name and pool id, image name and image id, and the snapshot - * name. The only thing we're missing is the snapshot id. + * All names in an rbd spec are dynamically allocated. It's OK if we + * can't figure out the name for an image id. */ -static int rbd_dev_spec_update(struct rbd_device *rbd_dev) +static int rbd_spec_fill_names(struct rbd_device *rbd_dev) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; struct rbd_spec *spec = rbd_dev->spec; @@ -4299,24 +4352,9 @@ static int rbd_dev_spec_update(struct rbd_device *rbd_dev) const char *snap_name; int ret; - /* - * An image being mapped will have the pool name (etc.), but - * we need to look up the snapshot id. - */ - if (spec->pool_name) { - if (strcmp(spec->snap_name, RBD_SNAP_HEAD_NAME)) { - u64 snap_id; - - snap_id = rbd_snap_id_by_name(rbd_dev, spec->snap_name); - if (snap_id == CEPH_NOSNAP) - return -ENOENT; - spec->snap_id = snap_id; - } else { - spec->snap_id = CEPH_NOSNAP; - } - - return 0; - } + rbd_assert(spec->pool_id != CEPH_NOPOOL); + rbd_assert(spec->image_id); + rbd_assert(spec->snap_id != CEPH_NOSNAP); /* Get the pool name; we have to make our own copy of this */ @@ -4335,7 +4373,7 @@ static int rbd_dev_spec_update(struct rbd_device *rbd_dev) if (!image_name) rbd_warn(rbd_dev, "unable to get image name"); - /* Look up the snapshot name, and make a copy */ + /* Fetch the snapshot name */ snap_name = rbd_snap_name(rbd_dev, spec->snap_id); if (IS_ERR(snap_name)) { @@ -4348,10 +4386,10 @@ static int rbd_dev_spec_update(struct rbd_device *rbd_dev) spec->snap_name = snap_name; return 0; + out_err: kfree(image_name); kfree(pool_name); - return ret; } @@ -4483,43 +4521,22 @@ static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) return ret; } - /* - * If the image supports layering, get the parent info. We - * need to probe the first time regardless. Thereafter we - * only need to if there's a parent, to see if it has - * disappeared due to the mapped image getting flattened. - */ - if (rbd_dev->header.features & RBD_FEATURE_LAYERING && - (first_time || rbd_dev->parent_spec)) { - bool warn; - - ret = rbd_dev_v2_parent_info(rbd_dev); - if (ret) - return ret; - - /* - * Print a warning if this is the initial probe and - * the image has a parent. Don't print it if the - * image now being probed is itself a parent. We - * can tell at this point because we won't know its - * pool name yet (just its pool id). - */ - warn = rbd_dev->parent_spec && rbd_dev->spec->pool_name; - if (first_time && warn) - rbd_warn(rbd_dev, "WARNING: kernel layering " - "is EXPERIMENTAL!"); - } - - if (rbd_dev->spec->snap_id == CEPH_NOSNAP) - if (rbd_dev->mapping.size != rbd_dev->header.image_size) - rbd_dev->mapping.size = rbd_dev->header.image_size; - ret = rbd_dev_v2_snap_context(rbd_dev); dout("rbd_dev_v2_snap_context returned %d\n", ret); return ret; } +static int rbd_dev_header_info(struct rbd_device *rbd_dev) +{ + rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); + + if (rbd_dev->image_format == 1) + return rbd_dev_v1_header_info(rbd_dev); + + return rbd_dev_v2_header_info(rbd_dev); +} + static int rbd_bus_add_dev(struct rbd_device *rbd_dev) { struct device *dev; @@ -5066,12 +5083,17 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) ret = rbd_dev_mapping_set(rbd_dev); if (ret) goto err_out_disk; + set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only); + rbd_dev->rq_wq = alloc_workqueue(rbd_dev->disk->disk_name, 0, 0); + if (!rbd_dev->rq_wq) + goto err_out_mapping; + ret = rbd_bus_add_dev(rbd_dev); if (ret) - goto err_out_mapping; + goto err_out_workqueue; /* Everything's ready. Announce the disk to the world. */ @@ -5083,6 +5105,9 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) return ret; +err_out_workqueue: + destroy_workqueue(rbd_dev->rq_wq); + rbd_dev->rq_wq = NULL; err_out_mapping: rbd_dev_mapping_clear(rbd_dev); err_out_disk: @@ -5155,8 +5180,6 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) ret = rbd_dev_image_id(rbd_dev); if (ret) return ret; - rbd_assert(rbd_dev->spec->image_id); - rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); ret = rbd_dev_header_name(rbd_dev); if (ret) @@ -5168,25 +5191,45 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) goto out_header_name; } - if (rbd_dev->image_format == 1) - ret = rbd_dev_v1_header_info(rbd_dev); - else - ret = rbd_dev_v2_header_info(rbd_dev); + ret = rbd_dev_header_info(rbd_dev); if (ret) goto err_out_watch; - ret = rbd_dev_spec_update(rbd_dev); + /* + * If this image is the one being mapped, we have pool name and + * id, image name and id, and snap name - need to fill snap id. + * Otherwise this is a parent image, identified by pool, image + * and snap ids - need to fill in names for those ids. + */ + if (mapping) + ret = rbd_spec_fill_snap_id(rbd_dev); + else + ret = rbd_spec_fill_names(rbd_dev); if (ret) goto err_out_probe; + if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { + ret = rbd_dev_v2_parent_info(rbd_dev); + if (ret) + goto err_out_probe; + + /* + * Need to warn users if this image is the one being + * mapped and has a parent. + */ + if (mapping && rbd_dev->parent_spec) + rbd_warn(rbd_dev, + "WARNING: kernel layering is EXPERIMENTAL!"); + } + ret = rbd_dev_probe_parent(rbd_dev); if (ret) goto err_out_probe; dout("discovered format %u image, header name is %s\n", rbd_dev->image_format, rbd_dev->header_name); - return 0; + err_out_probe: rbd_dev_unprobe(rbd_dev); err_out_watch: @@ -5199,9 +5242,6 @@ err_out_format: rbd_dev->image_format = 0; kfree(rbd_dev->spec->image_id); rbd_dev->spec->image_id = NULL; - - dout("probe failed, returning %d\n", ret); - return ret; } @@ -5243,7 +5283,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, /* The ceph file layout needs to fit pool id in 32 bits */ if (spec->pool_id > (u64)U32_MAX) { - rbd_warn(NULL, "pool id too large (%llu > %u)\n", + rbd_warn(NULL, "pool id too large (%llu > %u)", (unsigned long long)spec->pool_id, U32_MAX); rc = -EIO; goto err_out_client; @@ -5314,6 +5354,7 @@ static void rbd_dev_device_release(struct device *dev) { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); + destroy_workqueue(rbd_dev->rq_wq); rbd_free_disk(rbd_dev); clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); rbd_dev_mapping_clear(rbd_dev); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index f63d358..0a58140 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -15,17 +15,22 @@ #include <linux/numa.h> #define PART_BITS 4 +#define VQ_NAME_LEN 16 static int major; static DEFINE_IDA(vd_index_ida); static struct workqueue_struct *virtblk_wq; +struct virtio_blk_vq { + struct virtqueue *vq; + spinlock_t lock; + char name[VQ_NAME_LEN]; +} ____cacheline_aligned_in_smp; + struct virtio_blk { struct virtio_device *vdev; - struct virtqueue *vq; - spinlock_t vq_lock; /* The disk structure for the kernel. */ struct gendisk *disk; @@ -47,6 +52,10 @@ struct virtio_blk /* Ida index - used to track minor number allocations. */ int index; + + /* num of vqs */ + int num_vqs; + struct virtio_blk_vq *vqs; }; struct virtblk_req @@ -133,14 +142,15 @@ static void virtblk_done(struct virtqueue *vq) { struct virtio_blk *vblk = vq->vdev->priv; bool req_done = false; + int qid = vq->index; struct virtblk_req *vbr; unsigned long flags; unsigned int len; - spin_lock_irqsave(&vblk->vq_lock, flags); + spin_lock_irqsave(&vblk->vqs[qid].lock, flags); do { virtqueue_disable_cb(vq); - while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { + while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { blk_mq_complete_request(vbr->req); req_done = true; } @@ -151,7 +161,7 @@ static void virtblk_done(struct virtqueue *vq) /* In case queue is stopped waiting for more buffers. */ if (req_done) blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); - spin_unlock_irqrestore(&vblk->vq_lock, flags); + spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); } static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) @@ -160,6 +170,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); unsigned long flags; unsigned int num; + int qid = hctx->queue_num; const bool last = (req->cmd_flags & REQ_END) != 0; int err; bool notify = false; @@ -202,12 +213,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) vbr->out_hdr.type |= VIRTIO_BLK_T_IN; } - spin_lock_irqsave(&vblk->vq_lock, flags); - err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num); + spin_lock_irqsave(&vblk->vqs[qid].lock, flags); + err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); if (err) { - virtqueue_kick(vblk->vq); + virtqueue_kick(vblk->vqs[qid].vq); blk_mq_stop_hw_queue(hctx); - spin_unlock_irqrestore(&vblk->vq_lock, flags); + spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); /* Out of mem doesn't actually happen, since we fall back * to direct descriptors */ if (err == -ENOMEM || err == -ENOSPC) @@ -215,12 +226,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) return BLK_MQ_RQ_QUEUE_ERROR; } - if (last && virtqueue_kick_prepare(vblk->vq)) + if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) notify = true; - spin_unlock_irqrestore(&vblk->vq_lock, flags); + spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); if (notify) - virtqueue_notify(vblk->vq); + virtqueue_notify(vblk->vqs[qid].vq); return BLK_MQ_RQ_QUEUE_OK; } @@ -377,12 +388,64 @@ static void virtblk_config_changed(struct virtio_device *vdev) static int init_vq(struct virtio_blk *vblk) { int err = 0; + int i; + vq_callback_t **callbacks; + const char **names; + struct virtqueue **vqs; + unsigned short num_vqs; + struct virtio_device *vdev = vblk->vdev; + + err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, + struct virtio_blk_config, num_queues, + &num_vqs); + if (err) + num_vqs = 1; + + vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL); + if (!vblk->vqs) { + err = -ENOMEM; + goto out; + } + + names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL); + if (!names) + goto err_names; + + callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL); + if (!callbacks) + goto err_callbacks; + + vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL); + if (!vqs) + goto err_vqs; - /* We expect one virtqueue, for output. */ - vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests"); - if (IS_ERR(vblk->vq)) - err = PTR_ERR(vblk->vq); + for (i = 0; i < num_vqs; i++) { + callbacks[i] = virtblk_done; + snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); + names[i] = vblk->vqs[i].name; + } + + /* Discover virtqueues and write information to configuration. */ + err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); + if (err) + goto err_find_vqs; + for (i = 0; i < num_vqs; i++) { + spin_lock_init(&vblk->vqs[i].lock); + vblk->vqs[i].vq = vqs[i]; + } + vblk->num_vqs = num_vqs; + + err_find_vqs: + kfree(vqs); + err_vqs: + kfree(callbacks); + err_callbacks: + kfree(names); + err_names: + if (err) + kfree(vblk->vqs); + out: return err; } @@ -551,7 +614,6 @@ static int virtblk_probe(struct virtio_device *vdev) err = init_vq(vblk); if (err) goto out_free_vblk; - spin_lock_init(&vblk->vq_lock); /* FIXME: How many partitions? How long is a piece of string? */ vblk->disk = alloc_disk(1 << PART_BITS); @@ -562,7 +624,7 @@ static int virtblk_probe(struct virtio_device *vdev) /* Default queue sizing is to fill the ring. */ if (!virtblk_queue_depth) { - virtblk_queue_depth = vblk->vq->num_free; + virtblk_queue_depth = vblk->vqs[0].vq->num_free; /* ... but without indirect descs, we use 2 descs per req */ if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) virtblk_queue_depth /= 2; @@ -570,7 +632,6 @@ static int virtblk_probe(struct virtio_device *vdev) memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); vblk->tag_set.ops = &virtio_mq_ops; - vblk->tag_set.nr_hw_queues = 1; vblk->tag_set.queue_depth = virtblk_queue_depth; vblk->tag_set.numa_node = NUMA_NO_NODE; vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; @@ -578,6 +639,7 @@ static int virtblk_probe(struct virtio_device *vdev) sizeof(struct virtblk_req) + sizeof(struct scatterlist) * sg_elems; vblk->tag_set.driver_data = vblk; + vblk->tag_set.nr_hw_queues = vblk->num_vqs; err = blk_mq_alloc_tag_set(&vblk->tag_set); if (err) @@ -727,6 +789,7 @@ static void virtblk_remove(struct virtio_device *vdev) refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); put_disk(vblk->disk); vdev->config->del_vqs(vdev); + kfree(vblk->vqs); kfree(vblk); /* Only free device id if we don't have any users */ @@ -777,7 +840,8 @@ static const struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, - VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE + VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, + VIRTIO_BLK_F_MQ, }; static struct virtio_driver virtio_blk = { diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c index 8bc4229..4ff8687 100644 --- a/drivers/cpufreq/pmac64-cpufreq.c +++ b/drivers/cpufreq/pmac64-cpufreq.c @@ -499,8 +499,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode) } /* Lookup the i2c hwclock */ - for (hwclock = NULL; - (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){ + for_each_node_by_name(hwclock, "i2c-hwclock") { const char *loc = of_get_property(hwclock, "hwctrl-location", NULL); if (loc == NULL) diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c index 544f6d3..061407d 100644 --- a/drivers/crypto/nx/nx-842.c +++ b/drivers/crypto/nx/nx-842.c @@ -936,28 +936,14 @@ static int nx842_OF_upd(struct property *new_prop) goto error_out; } - /* Set ptr to new property if provided */ - if (new_prop) { - /* Single property */ - if (!strncmp(new_prop->name, "status", new_prop->length)) { - status = new_prop; - - } else if (!strncmp(new_prop->name, "ibm,max-sg-len", - new_prop->length)) { - maxsglen = new_prop; - - } else if (!strncmp(new_prop->name, "ibm,max-sync-cop", - new_prop->length)) { - maxsyncop = new_prop; - - } else { - /* - * Skip the update, the property being updated - * has no impact. - */ - goto out; - } - } + /* + * If this is a property update, there are only certain properties that + * we care about. Bail if it isn't in the below list + */ + if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) || + strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) || + strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length))) + goto out; /* Perform property updates */ ret = nx842_OF_upd_status(new_devdata, status); diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 8f6afbf..9b1ea0e 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -393,6 +393,22 @@ config XILINX_VDMA channels, Memory Mapped to Stream (MM2S) and Stream to Memory Mapped (S2MM) for the data transfers. +config DMA_SUN6I + tristate "Allwinner A31 SoCs DMA support" + depends on MACH_SUN6I || COMPILE_TEST + depends on RESET_CONTROLLER + select DMA_ENGINE + select DMA_VIRTUAL_CHANNELS + help + Support for the DMA engine for Allwinner A31 SoCs. + +config NBPFAXI_DMA + tristate "Renesas Type-AXI NBPF DMA support" + select DMA_ENGINE + depends on ARM || COMPILE_TEST + help + Support for "Type-AXI" NBPF DMA IPs from Renesas + config DMA_ENGINE bool @@ -406,6 +422,7 @@ config DMA_ACPI config DMA_OF def_bool y depends on OF + select DMA_ENGINE comment "DMA Clients" depends on DMA_ENGINE diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index bd9e7fa..c6adb92 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -1,5 +1,5 @@ -ccflags-$(CONFIG_DMADEVICES_DEBUG) := -DDEBUG -ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG +subdir-ccflags-$(CONFIG_DMADEVICES_DEBUG) := -DDEBUG +subdir-ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG obj-$(CONFIG_DMA_ENGINE) += dmaengine.o obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o @@ -48,3 +48,5 @@ obj-$(CONFIG_FSL_EDMA) += fsl-edma.o obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o obj-y += xilinx/ obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o +obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o +obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o diff --git a/drivers/dma/TODO b/drivers/dma/TODO index 734ed02..b8045cd 100644 --- a/drivers/dma/TODO +++ b/drivers/dma/TODO @@ -7,7 +7,6 @@ TODO for slave dma - imx-dma - imx-sdma - mxs-dma.c - - dw_dmac - intel_mid_dma 4. Check other subsystems for dma drivers and merge/move to dmaengine 5. Remove dma_slave_config's dma direction. diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 8114731..e34024b 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1040,7 +1040,7 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x, if (early_bytes) { dev_vdbg(&pl08x->adev->dev, - "%s byte width LLIs (remain 0x%08x)\n", + "%s byte width LLIs (remain 0x%08zx)\n", __func__, bd.remainder); prep_byte_width_lli(pl08x, &bd, &cctl, early_bytes, num_llis++, &total_bytes); @@ -1653,7 +1653,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg( static struct dma_async_tx_descriptor *pl08x_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct pl08x_dma_chan *plchan = to_pl08x_chan(chan); struct pl08x_driver_data *pl08x = plchan->host; @@ -1662,7 +1662,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_cyclic( dma_addr_t slave_addr; dev_dbg(&pl08x->adev->dev, - "%s prepare cyclic transaction of %d/%d bytes %s %s\n", + "%s prepare cyclic transaction of %zd/%zd bytes %s %s\n", __func__, period_len, buf_len, direction == DMA_MEM_TO_DEV ? "to" : "from", plchan->name); diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index c13a3bb..ca9dd26 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -294,14 +294,16 @@ static int atc_get_bytes_left(struct dma_chan *chan) ret = -EINVAL; goto out; } - atchan->remain_desc -= (desc_cur->lli.ctrla & ATC_BTSIZE_MAX) - << (desc_first->tx_width); - if (atchan->remain_desc < 0) { + + count = (desc_cur->lli.ctrla & ATC_BTSIZE_MAX) + << desc_first->tx_width; + if (atchan->remain_desc < count) { ret = -EINVAL; goto out; - } else { - ret = atchan->remain_desc; } + + atchan->remain_desc -= count; + ret = atchan->remain_desc; } else { /* * Get residual bytes when current @@ -893,12 +895,11 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc, * @period_len: number of bytes for each period * @direction: transfer direction, to or from device * @flags: tx descriptor status flags - * @context: transfer context (ignored) */ static struct dma_async_tx_descriptor * atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma_slave *atslave = chan->private; diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c index a036021..6800797 100644 --- a/drivers/dma/bcm2835-dma.c +++ b/drivers/dma/bcm2835-dma.c @@ -335,7 +335,7 @@ static void bcm2835_dma_issue_pending(struct dma_chan *chan) static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct bcm2835_chan *c = to_bcm2835_dma_chan(chan); enum dma_slave_buswidth dev_width; diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c index 94c380f..6a9d89c 100644 --- a/drivers/dma/dma-jz4740.c +++ b/drivers/dma/dma-jz4740.c @@ -433,7 +433,7 @@ static struct dma_async_tx_descriptor *jz4740_dma_prep_slave_sg( static struct dma_async_tx_descriptor *jz4740_dma_prep_dma_cyclic( struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c); struct jz4740_dma_desc *desc; @@ -614,4 +614,4 @@ module_platform_driver(jz4740_dma_driver); MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>"); MODULE_DESCRIPTION("JZ4740 DMA driver"); -MODULE_LICENSE("GPLv2"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index a27ded5..1af731b 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -279,6 +279,19 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) channel_set_bit(dw, CH_EN, dwc->mask); } +static void dwc_dostart_first_queued(struct dw_dma_chan *dwc) +{ + struct dw_desc *desc; + + if (list_empty(&dwc->queue)) + return; + + list_move(dwc->queue.next, &dwc->active_list); + desc = dwc_first_active(dwc); + dev_vdbg(chan2dev(&dwc->chan), "%s: started %u\n", __func__, desc->txd.cookie); + dwc_dostart(dwc, desc); +} + /*----------------------------------------------------------------------*/ static void @@ -335,10 +348,7 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) * the completed ones. */ list_splice_init(&dwc->active_list, &list); - if (!list_empty(&dwc->queue)) { - list_move(dwc->queue.next, &dwc->active_list); - dwc_dostart(dwc, dwc_first_active(dwc)); - } + dwc_dostart_first_queued(dwc); spin_unlock_irqrestore(&dwc->lock, flags); @@ -467,10 +477,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) /* Try to continue after resetting the channel... */ dwc_chan_disable(dw, dwc); - if (!list_empty(&dwc->queue)) { - list_move(dwc->queue.next, &dwc->active_list); - dwc_dostart(dwc, dwc_first_active(dwc)); - } + dwc_dostart_first_queued(dwc); spin_unlock_irqrestore(&dwc->lock, flags); } @@ -677,17 +684,9 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) * possible, perhaps even appending to those already submitted * for DMA. But this is hard to do in a race-free manner. */ - if (list_empty(&dwc->active_list)) { - dev_vdbg(chan2dev(tx->chan), "%s: started %u\n", __func__, - desc->txd.cookie); - list_add_tail(&desc->desc_node, &dwc->active_list); - dwc_dostart(dwc, dwc_first_active(dwc)); - } else { - dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, - desc->txd.cookie); - list_add_tail(&desc->desc_node, &dwc->queue); - } + dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n", __func__, desc->txd.cookie); + list_add_tail(&desc->desc_node, &dwc->queue); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1092,9 +1091,12 @@ dwc_tx_status(struct dma_chan *chan, static void dwc_issue_pending(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + unsigned long flags; - if (!list_empty(&dwc->queue)) - dwc_scan_descriptors(to_dw_dma(chan->device), dwc); + spin_lock_irqsave(&dwc->lock, flags); + if (list_empty(&dwc->active_list)) + dwc_dostart_first_queued(dwc); + spin_unlock_irqrestore(&dwc->lock, flags); } static int dwc_alloc_chan_resources(struct dma_chan *chan) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index b512caf..7b65633 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -23,6 +23,7 @@ #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/of.h> #include <linux/platform_data/edma.h> @@ -256,8 +257,13 @@ static int edma_terminate_all(struct edma_chan *echan) * echan->edesc is NULL and exit.) */ if (echan->edesc) { + int cyclic = echan->edesc->cyclic; echan->edesc = NULL; edma_stop(echan->ch_num); + /* Move the cyclic channel back to default queue */ + if (cyclic) + edma_assign_channel_eventq(echan->ch_num, + EVENTQ_DEFAULT); } vchan_get_all_descriptors(&echan->vchan, &head); @@ -592,7 +598,7 @@ struct dma_async_tx_descriptor *edma_prep_dma_memcpy( static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long tx_flags, void *context) + unsigned long tx_flags) { struct edma_chan *echan = to_edma_chan(chan); struct device *dev = chan->device->dev; @@ -718,12 +724,15 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic( edesc->absync = ret; /* - * Enable interrupts for every period because callback - * has to be called for every period. + * Enable period interrupt only if it is requested */ - edesc->pset[i].param.opt |= TCINTEN; + if (tx_flags & DMA_PREP_INTERRUPT) + edesc->pset[i].param.opt |= TCINTEN; } + /* Place the cyclic channel to highest priority queue */ + edma_assign_channel_eventq(echan->ch_num, EVENTQ_0); + return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags); } @@ -993,7 +1002,7 @@ static int edma_dma_device_slave_caps(struct dma_chan *dchan, caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); caps->cmd_pause = true; caps->cmd_terminate = true; - caps->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + caps->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; return 0; } @@ -1040,7 +1049,7 @@ static int edma_probe(struct platform_device *pdev) ecc->dummy_slot = edma_alloc_slot(ecc->ctlr, EDMA_SLOT_ANY); if (ecc->dummy_slot < 0) { dev_err(&pdev->dev, "Can't allocate PaRAM dummy slot\n"); - return -EIO; + return ecc->dummy_slot; } dma_cap_zero(ecc->dma_slave.cap_mask); @@ -1125,7 +1134,7 @@ static int edma_init(void) } } - if (EDMA_CTLRS == 2) { + if (!of_have_populated_dt() && EDMA_CTLRS == 2) { pdev1 = platform_device_register_full(&edma_dev_info1); if (IS_ERR(pdev1)) { platform_driver_unregister(&edma_driver); diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index cb4bf68..7650470 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -1092,7 +1092,6 @@ fail: * @period_len: length of a single period * @dir: direction of the operation * @flags: tx descriptor status flags - * @context: operation context (ignored) * * Prepares a descriptor for cyclic DMA operation. This means that once the * descriptor is submitted, we will be submitting in a @period_len sized @@ -1105,8 +1104,7 @@ fail: static struct dma_async_tx_descriptor * ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, - enum dma_transfer_direction dir, unsigned long flags, - void *context) + enum dma_transfer_direction dir, unsigned long flags) { struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan); struct ep93xx_dma_desc *desc, *first; diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c index b396a7f..3c5711d 100644 --- a/drivers/dma/fsl-edma.c +++ b/drivers/dma/fsl-edma.c @@ -248,11 +248,12 @@ static void fsl_edma_chan_mux(struct fsl_edma_chan *fsl_chan, unsigned int slot, bool enable) { u32 ch = fsl_chan->vchan.chan.chan_id; - void __iomem *muxaddr = fsl_chan->edma->muxbase[ch / DMAMUX_NR]; + void __iomem *muxaddr; unsigned chans_per_mux, ch_off; chans_per_mux = fsl_chan->edma->n_chans / DMAMUX_NR; ch_off = fsl_chan->vchan.chan.chan_id % chans_per_mux; + muxaddr = fsl_chan->edma->muxbase[ch / chans_per_mux]; if (enable) edma_writeb(fsl_chan->edma, @@ -516,7 +517,7 @@ err: static struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan); struct fsl_edma_desc *fsl_desc; @@ -724,6 +725,7 @@ static struct dma_chan *fsl_edma_xlate(struct of_phandle_args *dma_spec, { struct fsl_edma_engine *fsl_edma = ofdma->of_dma_data; struct dma_chan *chan, *_chan; + unsigned long chans_per_mux = fsl_edma->n_chans / DMAMUX_NR; if (dma_spec->args_count != 2) return NULL; @@ -732,7 +734,7 @@ static struct dma_chan *fsl_edma_xlate(struct of_phandle_args *dma_spec, list_for_each_entry_safe(chan, _chan, &fsl_edma->dma_dev.channels, device_node) { if (chan->client_count) continue; - if ((chan->chan_id / DMAMUX_NR) == dma_spec->args[0]) { + if ((chan->chan_id / chans_per_mux) == dma_spec->args[0]) { chan = dma_get_slave_channel(chan); if (chan) { chan->device->privatecnt++; diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index e0fec68..d5d6885 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -396,10 +396,17 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) struct fsldma_chan *chan = to_fsl_chan(tx->chan); struct fsl_desc_sw *desc = tx_to_fsl_desc(tx); struct fsl_desc_sw *child; - unsigned long flags; dma_cookie_t cookie = -EINVAL; - spin_lock_irqsave(&chan->desc_lock, flags); + spin_lock_bh(&chan->desc_lock); + +#ifdef CONFIG_PM + if (unlikely(chan->pm_state != RUNNING)) { + chan_dbg(chan, "cannot submit due to suspend\n"); + spin_unlock_bh(&chan->desc_lock); + return -1; + } +#endif /* * assign cookies to all of the software descriptors @@ -412,7 +419,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) /* put this transaction onto the tail of the pending queue */ append_ld_queue(chan, desc); - spin_unlock_irqrestore(&chan->desc_lock, flags); + spin_unlock_bh(&chan->desc_lock); return cookie; } @@ -459,6 +466,88 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan) } /** + * fsldma_clean_completed_descriptor - free all descriptors which + * has been completed and acked + * @chan: Freescale DMA channel + * + * This function is used on all completed and acked descriptors. + * All descriptors should only be freed in this function. + */ +static void fsldma_clean_completed_descriptor(struct fsldma_chan *chan) +{ + struct fsl_desc_sw *desc, *_desc; + + /* Run the callback for each descriptor, in order */ + list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) + if (async_tx_test_ack(&desc->async_tx)) + fsl_dma_free_descriptor(chan, desc); +} + +/** + * fsldma_run_tx_complete_actions - cleanup a single link descriptor + * @chan: Freescale DMA channel + * @desc: descriptor to cleanup and free + * @cookie: Freescale DMA transaction identifier + * + * This function is used on a descriptor which has been executed by the DMA + * controller. It will run any callbacks, submit any dependencies. + */ +static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan *chan, + struct fsl_desc_sw *desc, dma_cookie_t cookie) +{ + struct dma_async_tx_descriptor *txd = &desc->async_tx; + dma_cookie_t ret = cookie; + + BUG_ON(txd->cookie < 0); + + if (txd->cookie > 0) { + ret = txd->cookie; + + /* Run the link descriptor callback function */ + if (txd->callback) { + chan_dbg(chan, "LD %p callback\n", desc); + txd->callback(txd->callback_param); + } + } + + /* Run any dependencies */ + dma_run_dependencies(txd); + + return ret; +} + +/** + * fsldma_clean_running_descriptor - move the completed descriptor from + * ld_running to ld_completed + * @chan: Freescale DMA channel + * @desc: the descriptor which is completed + * + * Free the descriptor directly if acked by async_tx api, or move it to + * queue ld_completed. + */ +static void fsldma_clean_running_descriptor(struct fsldma_chan *chan, + struct fsl_desc_sw *desc) +{ + /* Remove from the list of transactions */ + list_del(&desc->node); + + /* + * the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!async_tx_test_ack(&desc->async_tx)) { + /* + * Move this descriptor to the list of descriptors which is + * completed, but still awaiting the 'ack' bit to be set. + */ + list_add_tail(&desc->node, &chan->ld_completed); + return; + } + + dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys); +} + +/** * fsl_chan_xfer_ld_queue - transfer any pending transactions * @chan : Freescale DMA channel * @@ -526,31 +615,58 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan) } /** - * fsldma_cleanup_descriptor - cleanup and free a single link descriptor + * fsldma_cleanup_descriptors - cleanup link descriptors which are completed + * and move them to ld_completed to free until flag 'ack' is set * @chan: Freescale DMA channel - * @desc: descriptor to cleanup and free * - * This function is used on a descriptor which has been executed by the DMA - * controller. It will run any callbacks, submit any dependencies, and then - * free the descriptor. + * This function is used on descriptors which have been executed by the DMA + * controller. It will run any callbacks, submit any dependencies, then + * free these descriptors if flag 'ack' is set. */ -static void fsldma_cleanup_descriptor(struct fsldma_chan *chan, - struct fsl_desc_sw *desc) +static void fsldma_cleanup_descriptors(struct fsldma_chan *chan) { - struct dma_async_tx_descriptor *txd = &desc->async_tx; + struct fsl_desc_sw *desc, *_desc; + dma_cookie_t cookie = 0; + dma_addr_t curr_phys = get_cdar(chan); + int seen_current = 0; + + fsldma_clean_completed_descriptor(chan); + + /* Run the callback for each descriptor, in order */ + list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) { + /* + * do not advance past the current descriptor loaded into the + * hardware channel, subsequent descriptors are either in + * process or have not been submitted + */ + if (seen_current) + break; + + /* + * stop the search if we reach the current descriptor and the + * channel is busy + */ + if (desc->async_tx.phys == curr_phys) { + seen_current = 1; + if (!dma_is_idle(chan)) + break; + } + + cookie = fsldma_run_tx_complete_actions(chan, desc, cookie); - /* Run the link descriptor callback function */ - if (txd->callback) { - chan_dbg(chan, "LD %p callback\n", desc); - txd->callback(txd->callback_param); + fsldma_clean_running_descriptor(chan, desc); } - /* Run any dependencies */ - dma_run_dependencies(txd); + /* + * Start any pending transactions automatically + * + * In the ideal case, we keep the DMA controller busy while we go + * ahead and free the descriptors below. + */ + fsl_chan_xfer_ld_queue(chan); - dma_descriptor_unmap(txd); - chan_dbg(chan, "LD %p free\n", desc); - dma_pool_free(chan->desc_pool, desc, txd->phys); + if (cookie > 0) + chan->common.completed_cookie = cookie; } /** @@ -617,13 +733,14 @@ static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan, static void fsl_dma_free_chan_resources(struct dma_chan *dchan) { struct fsldma_chan *chan = to_fsl_chan(dchan); - unsigned long flags; chan_dbg(chan, "free all channel resources\n"); - spin_lock_irqsave(&chan->desc_lock, flags); + spin_lock_bh(&chan->desc_lock); + fsldma_cleanup_descriptors(chan); fsldma_free_desc_list(chan, &chan->ld_pending); fsldma_free_desc_list(chan, &chan->ld_running); - spin_unlock_irqrestore(&chan->desc_lock, flags); + fsldma_free_desc_list(chan, &chan->ld_completed); + spin_unlock_bh(&chan->desc_lock); dma_pool_destroy(chan->desc_pool); chan->desc_pool = NULL; @@ -842,7 +959,6 @@ static int fsl_dma_device_control(struct dma_chan *dchan, { struct dma_slave_config *config; struct fsldma_chan *chan; - unsigned long flags; int size; if (!dchan) @@ -852,7 +968,7 @@ static int fsl_dma_device_control(struct dma_chan *dchan, switch (cmd) { case DMA_TERMINATE_ALL: - spin_lock_irqsave(&chan->desc_lock, flags); + spin_lock_bh(&chan->desc_lock); /* Halt the DMA engine */ dma_halt(chan); @@ -860,9 +976,10 @@ static int fsl_dma_device_control(struct dma_chan *dchan, /* Remove and free all of the descriptors in the LD queue */ fsldma_free_desc_list(chan, &chan->ld_pending); fsldma_free_desc_list(chan, &chan->ld_running); + fsldma_free_desc_list(chan, &chan->ld_completed); chan->idle = true; - spin_unlock_irqrestore(&chan->desc_lock, flags); + spin_unlock_bh(&chan->desc_lock); return 0; case DMA_SLAVE_CONFIG: @@ -904,11 +1021,10 @@ static int fsl_dma_device_control(struct dma_chan *dchan, static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan) { struct fsldma_chan *chan = to_fsl_chan(dchan); - unsigned long flags; - spin_lock_irqsave(&chan->desc_lock, flags); + spin_lock_bh(&chan->desc_lock); fsl_chan_xfer_ld_queue(chan); - spin_unlock_irqrestore(&chan->desc_lock, flags); + spin_unlock_bh(&chan->desc_lock); } /** @@ -919,6 +1035,17 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan, dma_cookie_t cookie, struct dma_tx_state *txstate) { + struct fsldma_chan *chan = to_fsl_chan(dchan); + enum dma_status ret; + + ret = dma_cookie_status(dchan, cookie, txstate); + if (ret == DMA_COMPLETE) + return ret; + + spin_lock_bh(&chan->desc_lock); + fsldma_cleanup_descriptors(chan); + spin_unlock_bh(&chan->desc_lock); + return dma_cookie_status(dchan, cookie, txstate); } @@ -996,52 +1123,18 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data) static void dma_do_tasklet(unsigned long data) { struct fsldma_chan *chan = (struct fsldma_chan *)data; - struct fsl_desc_sw *desc, *_desc; - LIST_HEAD(ld_cleanup); - unsigned long flags; chan_dbg(chan, "tasklet entry\n"); - spin_lock_irqsave(&chan->desc_lock, flags); - - /* update the cookie if we have some descriptors to cleanup */ - if (!list_empty(&chan->ld_running)) { - dma_cookie_t cookie; - - desc = to_fsl_desc(chan->ld_running.prev); - cookie = desc->async_tx.cookie; - dma_cookie_complete(&desc->async_tx); - - chan_dbg(chan, "completed_cookie=%d\n", cookie); - } - - /* - * move the descriptors to a temporary list so we can drop the lock - * during the entire cleanup operation - */ - list_splice_tail_init(&chan->ld_running, &ld_cleanup); + spin_lock_bh(&chan->desc_lock); /* the hardware is now idle and ready for more */ chan->idle = true; - /* - * Start any pending transactions automatically - * - * In the ideal case, we keep the DMA controller busy while we go - * ahead and free the descriptors below. - */ - fsl_chan_xfer_ld_queue(chan); - spin_unlock_irqrestore(&chan->desc_lock, flags); - - /* Run the callback for each descriptor, in order */ - list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) { - - /* Remove from the list of transactions */ - list_del(&desc->node); + /* Run all cleanup for descriptors which have been completed */ + fsldma_cleanup_descriptors(chan); - /* Run all cleanup for this descriptor */ - fsldma_cleanup_descriptor(chan, desc); - } + spin_unlock_bh(&chan->desc_lock); chan_dbg(chan, "tasklet exit\n"); } @@ -1225,7 +1318,11 @@ static int fsl_dma_chan_probe(struct fsldma_device *fdev, spin_lock_init(&chan->desc_lock); INIT_LIST_HEAD(&chan->ld_pending); INIT_LIST_HEAD(&chan->ld_running); + INIT_LIST_HEAD(&chan->ld_completed); chan->idle = true; +#ifdef CONFIG_PM + chan->pm_state = RUNNING; +#endif chan->common.device = &fdev->common; dma_cookie_init(&chan->common); @@ -1365,6 +1462,69 @@ static int fsldma_of_remove(struct platform_device *op) return 0; } +#ifdef CONFIG_PM +static int fsldma_suspend_late(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct fsldma_device *fdev = platform_get_drvdata(pdev); + struct fsldma_chan *chan; + int i; + + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + chan = fdev->chan[i]; + if (!chan) + continue; + + spin_lock_bh(&chan->desc_lock); + if (unlikely(!chan->idle)) + goto out; + chan->regs_save.mr = get_mr(chan); + chan->pm_state = SUSPENDED; + spin_unlock_bh(&chan->desc_lock); + } + return 0; + +out: + for (; i >= 0; i--) { + chan = fdev->chan[i]; + if (!chan) + continue; + chan->pm_state = RUNNING; + spin_unlock_bh(&chan->desc_lock); + } + return -EBUSY; +} + +static int fsldma_resume_early(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct fsldma_device *fdev = platform_get_drvdata(pdev); + struct fsldma_chan *chan; + u32 mode; + int i; + + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + chan = fdev->chan[i]; + if (!chan) + continue; + + spin_lock_bh(&chan->desc_lock); + mode = chan->regs_save.mr + & ~FSL_DMA_MR_CS & ~FSL_DMA_MR_CC & ~FSL_DMA_MR_CA; + set_mr(chan, mode); + chan->pm_state = RUNNING; + spin_unlock_bh(&chan->desc_lock); + } + + return 0; +} + +static const struct dev_pm_ops fsldma_pm_ops = { + .suspend_late = fsldma_suspend_late, + .resume_early = fsldma_resume_early, +}; +#endif + static const struct of_device_id fsldma_of_ids[] = { { .compatible = "fsl,elo3-dma", }, { .compatible = "fsl,eloplus-dma", }, @@ -1377,6 +1537,9 @@ static struct platform_driver fsldma_of_driver = { .name = "fsl-elo-dma", .owner = THIS_MODULE, .of_match_table = fsldma_of_ids, +#ifdef CONFIG_PM + .pm = &fsldma_pm_ops, +#endif }, .probe = fsldma_of_probe, .remove = fsldma_of_remove, diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h index d56e835..239c20c 100644 --- a/drivers/dma/fsldma.h +++ b/drivers/dma/fsldma.h @@ -134,12 +134,36 @@ struct fsldma_device { #define FSL_DMA_CHAN_PAUSE_EXT 0x00001000 #define FSL_DMA_CHAN_START_EXT 0x00002000 +#ifdef CONFIG_PM +struct fsldma_chan_regs_save { + u32 mr; +}; + +enum fsldma_pm_state { + RUNNING = 0, + SUSPENDED, +}; +#endif + struct fsldma_chan { char name[8]; /* Channel name */ struct fsldma_chan_regs __iomem *regs; spinlock_t desc_lock; /* Descriptor operation lock */ - struct list_head ld_pending; /* Link descriptors queue */ - struct list_head ld_running; /* Link descriptors queue */ + /* + * Descriptors which are queued to run, but have not yet been + * submitted to the hardware for execution + */ + struct list_head ld_pending; + /* + * Descriptors which are currently being executed by the hardware + */ + struct list_head ld_running; + /* + * Descriptors which have finished execution by the hardware. These + * descriptors have already had their cleanup actions run. They are + * waiting for the ACK bit to be set by the async_tx API. + */ + struct list_head ld_completed; /* Link descriptors queue */ struct dma_chan common; /* DMA common channel */ struct dma_pool *desc_pool; /* Descriptors pool */ struct device *dev; /* Channel device */ @@ -148,6 +172,10 @@ struct fsldma_chan { struct tasklet_struct tasklet; u32 feature; bool idle; /* DMA controller is idle */ +#ifdef CONFIG_PM + struct fsldma_chan_regs_save regs_save; + enum fsldma_pm_state pm_state; +#endif void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable); void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable); diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 286660a..9d2c9e7 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -866,7 +866,7 @@ static struct dma_async_tx_descriptor *imxdma_prep_slave_sg( static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct imxdma_channel *imxdmac = to_imxdma_chan(chan); struct imxdma_engine *imxdma = imxdmac->imxdma; diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 14867e3..f7626e3 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -271,6 +271,7 @@ struct sdma_channel { unsigned int chn_count; unsigned int chn_real_count; struct tasklet_struct tasklet; + struct imx_dma_data data; }; #define IMX_DMA_SG_LOOP BIT(0) @@ -749,6 +750,11 @@ static void sdma_get_pc(struct sdma_channel *sdmac, emi_2_per = sdma->script_addrs->asrc_2_mcu_addr; per_2_per = sdma->script_addrs->per_2_per_addr; break; + case IMX_DMATYPE_ASRC_SP: + per_2_emi = sdma->script_addrs->shp_2_mcu_addr; + emi_2_per = sdma->script_addrs->mcu_2_shp_addr; + per_2_per = sdma->script_addrs->per_2_per_addr; + break; case IMX_DMATYPE_MSHC: per_2_emi = sdma->script_addrs->mshc_2_mcu_addr; emi_2_per = sdma->script_addrs->mcu_2_mshc_addr; @@ -911,14 +917,13 @@ static int sdma_request_channel(struct sdma_channel *sdmac) int channel = sdmac->channel; int ret = -EBUSY; - sdmac->bd = dma_alloc_coherent(NULL, PAGE_SIZE, &sdmac->bd_phys, GFP_KERNEL); + sdmac->bd = dma_zalloc_coherent(NULL, PAGE_SIZE, &sdmac->bd_phys, + GFP_KERNEL); if (!sdmac->bd) { ret = -ENOMEM; goto out; } - memset(sdmac->bd, 0, PAGE_SIZE); - sdma->channel_control[channel].base_bd_ptr = sdmac->bd_phys; sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys; @@ -1120,7 +1125,7 @@ err_out: static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct sdma_channel *sdmac = to_sdma_chan(chan); struct sdma_engine *sdma = sdmac->sdma; @@ -1414,12 +1419,14 @@ err_dma_alloc: static bool sdma_filter_fn(struct dma_chan *chan, void *fn_param) { + struct sdma_channel *sdmac = to_sdma_chan(chan); struct imx_dma_data *data = fn_param; if (!imx_dma_is_general_purpose(chan)) return false; - chan->private = data; + sdmac->data = *data; + chan->private = &sdmac->data; return true; } diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c index 128ca14..bbf6292 100644 --- a/drivers/dma/ipu/ipu_idmac.c +++ b/drivers/dma/ipu/ipu_idmac.c @@ -1532,11 +1532,17 @@ static int idmac_alloc_chan_resources(struct dma_chan *chan) #ifdef DEBUG if (chan->chan_id == IDMAC_IC_7) { ic_sof = ipu_irq_map(69); - if (ic_sof > 0) - request_irq(ic_sof, ic_sof_irq, 0, "IC SOF", ichan); + if (ic_sof > 0) { + ret = request_irq(ic_sof, ic_sof_irq, 0, "IC SOF", ichan); + if (ret) + dev_err(&chan->dev->device, "request irq failed for IC SOF"); + } ic_eof = ipu_irq_map(70); - if (ic_eof > 0) - request_irq(ic_eof, ic_eof_irq, 0, "IC EOF", ichan); + if (ic_eof > 0) { + ret = request_irq(ic_eof, ic_eof_irq, 0, "IC EOF", ichan); + if (ret) + dev_err(&chan->dev->device, "request irq failed for IC EOF"); + } } #endif diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c index a7b186d..a1a4db5 100644 --- a/drivers/dma/mmp_pdma.c +++ b/drivers/dma/mmp_pdma.c @@ -601,7 +601,7 @@ static struct dma_async_tx_descriptor * mmp_pdma_prep_dma_cyclic(struct dma_chan *dchan, dma_addr_t buf_addr, size_t len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct mmp_pdma_chan *chan; struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new; diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c index 724f7f4..6ad30e2 100644 --- a/drivers/dma/mmp_tdma.c +++ b/drivers/dma/mmp_tdma.c @@ -389,7 +389,7 @@ struct mmp_tdma_desc *mmp_tdma_alloc_descriptor(struct mmp_tdma_chan *tdmac) static struct dma_async_tx_descriptor *mmp_tdma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan); struct mmp_tdma_desc *desc; diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c index 2ad4373..881db2b 100644 --- a/drivers/dma/mpc512x_dma.c +++ b/drivers/dma/mpc512x_dma.c @@ -53,6 +53,7 @@ #include <linux/of_address.h> #include <linux/of_device.h> #include <linux/of_irq.h> +#include <linux/of_dma.h> #include <linux/of_platform.h> #include <linux/random.h> @@ -1036,7 +1037,15 @@ static int mpc_dma_probe(struct platform_device *op) if (retval) goto err_free2; - return retval; + /* Register with OF helpers for DMA lookups (nonfatal) */ + if (dev->of_node) { + retval = of_dma_controller_register(dev->of_node, + of_dma_xlate_by_chan_id, mdma); + if (retval) + dev_warn(dev, "Could not register for OF lookup\n"); + } + + return 0; err_free2: if (mdma->is_mpc8308) @@ -1057,6 +1066,8 @@ static int mpc_dma_remove(struct platform_device *op) struct device *dev = &op->dev; struct mpc_dma *mdma = dev_get_drvdata(dev); + if (dev->of_node) + of_dma_controller_free(dev->of_node); dma_async_device_unregister(&mdma->dma); if (mdma->is_mpc8308) { free_irq(mdma->irq2, mdma); diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c index ead4913..5ea6120 100644 --- a/drivers/dma/mxs-dma.c +++ b/drivers/dma/mxs-dma.c @@ -413,16 +413,14 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan) struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; int ret; - mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev, - CCW_BLOCK_SIZE, &mxs_chan->ccw_phys, - GFP_KERNEL); + mxs_chan->ccw = dma_zalloc_coherent(mxs_dma->dma_device.dev, + CCW_BLOCK_SIZE, + &mxs_chan->ccw_phys, GFP_KERNEL); if (!mxs_chan->ccw) { ret = -ENOMEM; goto err_alloc; } - memset(mxs_chan->ccw, 0, CCW_BLOCK_SIZE); - if (mxs_chan->chan_irq != NO_IRQ) { ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler, 0, "mxs-dma", mxs_dma); @@ -591,7 +589,7 @@ err_out: static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan); struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma; diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c new file mode 100644 index 0000000..5aeada5 --- /dev/null +++ b/drivers/dma/nbpfaxi.c @@ -0,0 +1,1517 @@ +/* + * Copyright (C) 2013-2014 Renesas Electronics Europe Ltd. + * Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + */ + +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/dma-mapping.h> +#include <linux/dmaengine.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/log2.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_dma.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include <dt-bindings/dma/nbpfaxi.h> + +#include "dmaengine.h" + +#define NBPF_REG_CHAN_OFFSET 0 +#define NBPF_REG_CHAN_SIZE 0x40 + +/* Channel Current Transaction Byte register */ +#define NBPF_CHAN_CUR_TR_BYTE 0x20 + +/* Channel Status register */ +#define NBPF_CHAN_STAT 0x24 +#define NBPF_CHAN_STAT_EN 1 +#define NBPF_CHAN_STAT_TACT 4 +#define NBPF_CHAN_STAT_ERR 0x10 +#define NBPF_CHAN_STAT_END 0x20 +#define NBPF_CHAN_STAT_TC 0x40 +#define NBPF_CHAN_STAT_DER 0x400 + +/* Channel Control register */ +#define NBPF_CHAN_CTRL 0x28 +#define NBPF_CHAN_CTRL_SETEN 1 +#define NBPF_CHAN_CTRL_CLREN 2 +#define NBPF_CHAN_CTRL_STG 4 +#define NBPF_CHAN_CTRL_SWRST 8 +#define NBPF_CHAN_CTRL_CLRRQ 0x10 +#define NBPF_CHAN_CTRL_CLREND 0x20 +#define NBPF_CHAN_CTRL_CLRTC 0x40 +#define NBPF_CHAN_CTRL_SETSUS 0x100 +#define NBPF_CHAN_CTRL_CLRSUS 0x200 + +/* Channel Configuration register */ +#define NBPF_CHAN_CFG 0x2c +#define NBPF_CHAN_CFG_SEL 7 /* terminal SELect: 0..7 */ +#define NBPF_CHAN_CFG_REQD 8 /* REQuest Direction: DMAREQ is 0: input, 1: output */ +#define NBPF_CHAN_CFG_LOEN 0x10 /* LOw ENable: low DMA request line is: 0: inactive, 1: active */ +#define NBPF_CHAN_CFG_HIEN 0x20 /* HIgh ENable: high DMA request line is: 0: inactive, 1: active */ +#define NBPF_CHAN_CFG_LVL 0x40 /* LeVeL: DMA request line is sensed as 0: edge, 1: level */ +#define NBPF_CHAN_CFG_AM 0x700 /* ACK Mode: 0: Pulse mode, 1: Level mode, b'1x: Bus Cycle */ +#define NBPF_CHAN_CFG_SDS 0xf000 /* Source Data Size: 0: 8 bits,... , 7: 1024 bits */ +#define NBPF_CHAN_CFG_DDS 0xf0000 /* Destination Data Size: as above */ +#define NBPF_CHAN_CFG_SAD 0x100000 /* Source ADdress counting: 0: increment, 1: fixed */ +#define NBPF_CHAN_CFG_DAD 0x200000 /* Destination ADdress counting: 0: increment, 1: fixed */ +#define NBPF_CHAN_CFG_TM 0x400000 /* Transfer Mode: 0: single, 1: block TM */ +#define NBPF_CHAN_CFG_DEM 0x1000000 /* DMAEND interrupt Mask */ +#define NBPF_CHAN_CFG_TCM 0x2000000 /* DMATCO interrupt Mask */ +#define NBPF_CHAN_CFG_SBE 0x8000000 /* Sweep Buffer Enable */ +#define NBPF_CHAN_CFG_RSEL 0x10000000 /* RM: Register Set sELect */ +#define NBPF_CHAN_CFG_RSW 0x20000000 /* RM: Register Select sWitch */ +#define NBPF_CHAN_CFG_REN 0x40000000 /* RM: Register Set Enable */ +#define NBPF_CHAN_CFG_DMS 0x80000000 /* 0: register mode (RM), 1: link mode (LM) */ + +#define NBPF_CHAN_NXLA 0x38 +#define NBPF_CHAN_CRLA 0x3c + +/* Link Header field */ +#define NBPF_HEADER_LV 1 +#define NBPF_HEADER_LE 2 +#define NBPF_HEADER_WBD 4 +#define NBPF_HEADER_DIM 8 + +#define NBPF_CTRL 0x300 +#define NBPF_CTRL_PR 1 /* 0: fixed priority, 1: round robin */ +#define NBPF_CTRL_LVINT 2 /* DMAEND and DMAERR signalling: 0: pulse, 1: level */ + +#define NBPF_DSTAT_ER 0x314 +#define NBPF_DSTAT_END 0x318 + +#define NBPF_DMA_BUSWIDTHS \ + (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \ + BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ + BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \ + BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)) + +struct nbpf_config { + int num_channels; + int buffer_size; +}; + +/* + * We've got 3 types of objects, used to describe DMA transfers: + * 1. high-level descriptor, containing a struct dma_async_tx_descriptor object + * in it, used to communicate with the user + * 2. hardware DMA link descriptors, that we pass to DMAC for DMA transfer + * queuing, these must be DMAable, using either the streaming DMA API or + * allocated from coherent memory - one per SG segment + * 3. one per SG segment descriptors, used to manage HW link descriptors from + * (2). They do not have to be DMAable. They can either be (a) allocated + * together with link descriptors as mixed (DMA / CPU) objects, or (b) + * separately. Even if allocated separately it would be best to link them + * to link descriptors once during channel resource allocation and always + * use them as a single object. + * Therefore for both cases (a) and (b) at run-time objects (2) and (3) shall be + * treated as a single SG segment descriptor. + */ + +struct nbpf_link_reg { + u32 header; + u32 src_addr; + u32 dst_addr; + u32 transaction_size; + u32 config; + u32 interval; + u32 extension; + u32 next; +} __packed; + +struct nbpf_device; +struct nbpf_channel; +struct nbpf_desc; + +struct nbpf_link_desc { + struct nbpf_link_reg *hwdesc; + dma_addr_t hwdesc_dma_addr; + struct nbpf_desc *desc; + struct list_head node; +}; + +/** + * struct nbpf_desc - DMA transfer descriptor + * @async_tx: dmaengine object + * @user_wait: waiting for a user ack + * @length: total transfer length + * @sg: list of hardware descriptors, represented by struct nbpf_link_desc + * @node: member in channel descriptor lists + */ +struct nbpf_desc { + struct dma_async_tx_descriptor async_tx; + bool user_wait; + size_t length; + struct nbpf_channel *chan; + struct list_head sg; + struct list_head node; +}; + +/* Take a wild guess: allocate 4 segments per descriptor */ +#define NBPF_SEGMENTS_PER_DESC 4 +#define NBPF_DESCS_PER_PAGE ((PAGE_SIZE - sizeof(struct list_head)) / \ + (sizeof(struct nbpf_desc) + \ + NBPF_SEGMENTS_PER_DESC * \ + (sizeof(struct nbpf_link_desc) + sizeof(struct nbpf_link_reg)))) +#define NBPF_SEGMENTS_PER_PAGE (NBPF_SEGMENTS_PER_DESC * NBPF_DESCS_PER_PAGE) + +struct nbpf_desc_page { + struct list_head node; + struct nbpf_desc desc[NBPF_DESCS_PER_PAGE]; + struct nbpf_link_desc ldesc[NBPF_SEGMENTS_PER_PAGE]; + struct nbpf_link_reg hwdesc[NBPF_SEGMENTS_PER_PAGE]; +}; + +/** + * struct nbpf_channel - one DMAC channel + * @dma_chan: standard dmaengine channel object + * @base: register address base + * @nbpf: DMAC + * @name: IRQ name + * @irq: IRQ number + * @slave_addr: address for slave DMA + * @slave_width:slave data size in bytes + * @slave_burst:maximum slave burst size in bytes + * @terminal: DMA terminal, assigned to this channel + * @dmarq_cfg: DMA request line configuration - high / low, edge / level for NBPF_CHAN_CFG + * @flags: configuration flags from DT + * @lock: protect descriptor lists + * @free_links: list of free link descriptors + * @free: list of free descriptors + * @queued: list of queued descriptors + * @active: list of descriptors, scheduled for processing + * @done: list of completed descriptors, waiting post-processing + * @desc_page: list of additionally allocated descriptor pages - if any + */ +struct nbpf_channel { + struct dma_chan dma_chan; + struct tasklet_struct tasklet; + void __iomem *base; + struct nbpf_device *nbpf; + char name[16]; + int irq; + dma_addr_t slave_src_addr; + size_t slave_src_width; + size_t slave_src_burst; + dma_addr_t slave_dst_addr; + size_t slave_dst_width; + size_t slave_dst_burst; + unsigned int terminal; + u32 dmarq_cfg; + unsigned long flags; + spinlock_t lock; + struct list_head free_links; + struct list_head free; + struct list_head queued; + struct list_head active; + struct list_head done; + struct list_head desc_page; + struct nbpf_desc *running; + bool paused; +}; + +struct nbpf_device { + struct dma_device dma_dev; + void __iomem *base; + struct clk *clk; + const struct nbpf_config *config; + struct nbpf_channel chan[]; +}; + +enum nbpf_model { + NBPF1B4, + NBPF1B8, + NBPF1B16, + NBPF4B4, + NBPF4B8, + NBPF4B16, + NBPF8B4, + NBPF8B8, + NBPF8B16, +}; + +static struct nbpf_config nbpf_cfg[] = { + [NBPF1B4] = { + .num_channels = 1, + .buffer_size = 4, + }, + [NBPF1B8] = { + .num_channels = 1, + .buffer_size = 8, + }, + [NBPF1B16] = { + .num_channels = 1, + .buffer_size = 16, + }, + [NBPF4B4] = { + .num_channels = 4, + .buffer_size = 4, + }, + [NBPF4B8] = { + .num_channels = 4, + .buffer_size = 8, + }, + [NBPF4B16] = { + .num_channels = 4, + .buffer_size = 16, + }, + [NBPF8B4] = { + .num_channels = 8, + .buffer_size = 4, + }, + [NBPF8B8] = { + .num_channels = 8, + .buffer_size = 8, + }, + [NBPF8B16] = { + .num_channels = 8, + .buffer_size = 16, + }, +}; + +#define nbpf_to_chan(d) container_of(d, struct nbpf_channel, dma_chan) + +/* + * dmaengine drivers seem to have a lot in common and instead of sharing more + * code, they reimplement those common algorithms independently. In this driver + * we try to separate the hardware-specific part from the (largely) generic + * part. This improves code readability and makes it possible in the future to + * reuse the generic code in form of a helper library. That generic code should + * be suitable for various DMA controllers, using transfer descriptors in RAM + * and pushing one SG list at a time to the DMA controller. + */ + +/* Hardware-specific part */ + +static inline u32 nbpf_chan_read(struct nbpf_channel *chan, + unsigned int offset) +{ + u32 data = ioread32(chan->base + offset); + dev_dbg(chan->dma_chan.device->dev, "%s(0x%p + 0x%x) = 0x%x\n", + __func__, chan->base, offset, data); + return data; +} + +static inline void nbpf_chan_write(struct nbpf_channel *chan, + unsigned int offset, u32 data) +{ + iowrite32(data, chan->base + offset); + dev_dbg(chan->dma_chan.device->dev, "%s(0x%p + 0x%x) = 0x%x\n", + __func__, chan->base, offset, data); +} + +static inline u32 nbpf_read(struct nbpf_device *nbpf, + unsigned int offset) +{ + u32 data = ioread32(nbpf->base + offset); + dev_dbg(nbpf->dma_dev.dev, "%s(0x%p + 0x%x) = 0x%x\n", + __func__, nbpf->base, offset, data); + return data; +} + +static inline void nbpf_write(struct nbpf_device *nbpf, + unsigned int offset, u32 data) +{ + iowrite32(data, nbpf->base + offset); + dev_dbg(nbpf->dma_dev.dev, "%s(0x%p + 0x%x) = 0x%x\n", + __func__, nbpf->base, offset, data); +} + +static void nbpf_chan_halt(struct nbpf_channel *chan) +{ + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREN); +} + +static bool nbpf_status_get(struct nbpf_channel *chan) +{ + u32 status = nbpf_read(chan->nbpf, NBPF_DSTAT_END); + + return status & BIT(chan - chan->nbpf->chan); +} + +static void nbpf_status_ack(struct nbpf_channel *chan) +{ + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREND); +} + +static u32 nbpf_error_get(struct nbpf_device *nbpf) +{ + return nbpf_read(nbpf, NBPF_DSTAT_ER); +} + +static struct nbpf_channel *nbpf_error_get_channel(struct nbpf_device *nbpf, u32 error) +{ + return nbpf->chan + __ffs(error); +} + +static void nbpf_error_clear(struct nbpf_channel *chan) +{ + u32 status; + int i; + + /* Stop the channel, make sure DMA has been aborted */ + nbpf_chan_halt(chan); + + for (i = 1000; i; i--) { + status = nbpf_chan_read(chan, NBPF_CHAN_STAT); + if (!(status & NBPF_CHAN_STAT_TACT)) + break; + cpu_relax(); + } + + if (!i) + dev_err(chan->dma_chan.device->dev, + "%s(): abort timeout, channel status 0x%x\n", __func__, status); + + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SWRST); +} + +static int nbpf_start(struct nbpf_desc *desc) +{ + struct nbpf_channel *chan = desc->chan; + struct nbpf_link_desc *ldesc = list_first_entry(&desc->sg, struct nbpf_link_desc, node); + + nbpf_chan_write(chan, NBPF_CHAN_NXLA, (u32)ldesc->hwdesc_dma_addr); + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SETEN | NBPF_CHAN_CTRL_CLRSUS); + chan->paused = false; + + /* Software trigger MEMCPY - only MEMCPY uses the block mode */ + if (ldesc->hwdesc->config & NBPF_CHAN_CFG_TM) + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_STG); + + dev_dbg(chan->nbpf->dma_dev.dev, "%s(): next 0x%x, cur 0x%x\n", __func__, + nbpf_chan_read(chan, NBPF_CHAN_NXLA), nbpf_chan_read(chan, NBPF_CHAN_CRLA)); + + return 0; +} + +static void nbpf_chan_prepare(struct nbpf_channel *chan) +{ + chan->dmarq_cfg = (chan->flags & NBPF_SLAVE_RQ_HIGH ? NBPF_CHAN_CFG_HIEN : 0) | + (chan->flags & NBPF_SLAVE_RQ_LOW ? NBPF_CHAN_CFG_LOEN : 0) | + (chan->flags & NBPF_SLAVE_RQ_LEVEL ? + NBPF_CHAN_CFG_LVL | (NBPF_CHAN_CFG_AM & 0x200) : 0) | + chan->terminal; +} + +static void nbpf_chan_prepare_default(struct nbpf_channel *chan) +{ + /* Don't output DMAACK */ + chan->dmarq_cfg = NBPF_CHAN_CFG_AM & 0x400; + chan->terminal = 0; + chan->flags = 0; +} + +static void nbpf_chan_configure(struct nbpf_channel *chan) +{ + /* + * We assume, that only the link mode and DMA request line configuration + * have to be set in the configuration register manually. Dynamic + * per-transfer configuration will be loaded from transfer descriptors. + */ + nbpf_chan_write(chan, NBPF_CHAN_CFG, NBPF_CHAN_CFG_DMS | chan->dmarq_cfg); +} + +static u32 nbpf_xfer_ds(struct nbpf_device *nbpf, size_t size) +{ + /* Maximum supported bursts depend on the buffer size */ + return min_t(int, __ffs(size), ilog2(nbpf->config->buffer_size * 8)); +} + +static size_t nbpf_xfer_size(struct nbpf_device *nbpf, + enum dma_slave_buswidth width, u32 burst) +{ + size_t size; + + if (!burst) + burst = 1; + + switch (width) { + case DMA_SLAVE_BUSWIDTH_8_BYTES: + size = 8 * burst; + break; + + case DMA_SLAVE_BUSWIDTH_4_BYTES: + size = 4 * burst; + break; + + case DMA_SLAVE_BUSWIDTH_2_BYTES: + size = 2 * burst; + break; + + default: + pr_warn("%s(): invalid bus width %u\n", __func__, width); + case DMA_SLAVE_BUSWIDTH_1_BYTE: + size = burst; + } + + return nbpf_xfer_ds(nbpf, size); +} + +/* + * We need a way to recognise slaves, whose data is sent "raw" over the bus, + * i.e. it isn't known in advance how many bytes will be received. Therefore + * the slave driver has to provide a "large enough" buffer and either read the + * buffer, when it is full, or detect, that some data has arrived, then wait for + * a timeout, if no more data arrives - receive what's already there. We want to + * handle such slaves in a special way to allow an optimised mode for other + * users, for whom the amount of data is known in advance. So far there's no way + * to recognise such slaves. We use a data-width check to distinguish between + * the SD host and the PL011 UART. + */ + +static int nbpf_prep_one(struct nbpf_link_desc *ldesc, + enum dma_transfer_direction direction, + dma_addr_t src, dma_addr_t dst, size_t size, bool last) +{ + struct nbpf_link_reg *hwdesc = ldesc->hwdesc; + struct nbpf_desc *desc = ldesc->desc; + struct nbpf_channel *chan = desc->chan; + struct device *dev = chan->dma_chan.device->dev; + size_t mem_xfer, slave_xfer; + bool can_burst; + + hwdesc->header = NBPF_HEADER_WBD | NBPF_HEADER_LV | + (last ? NBPF_HEADER_LE : 0); + + hwdesc->src_addr = src; + hwdesc->dst_addr = dst; + hwdesc->transaction_size = size; + + /* + * set config: SAD, DAD, DDS, SDS, etc. + * Note on transfer sizes: the DMAC can perform unaligned DMA transfers, + * but it is important to have transaction size a multiple of both + * receiver and transmitter transfer sizes. It is also possible to use + * different RAM and device transfer sizes, and it does work well with + * some devices, e.g. with V08R07S01E SD host controllers, which can use + * 128 byte transfers. But this doesn't work with other devices, + * especially when the transaction size is unknown. This is the case, + * e.g. with serial drivers like amba-pl011.c. For reception it sets up + * the transaction size of 4K and if fewer bytes are received, it + * pauses DMA and reads out data received via DMA as well as those left + * in the Rx FIFO. For this to work with the RAM side using burst + * transfers we enable the SBE bit and terminate the transfer in our + * DMA_PAUSE handler. + */ + mem_xfer = nbpf_xfer_ds(chan->nbpf, size); + + switch (direction) { + case DMA_DEV_TO_MEM: + can_burst = chan->slave_src_width >= 3; + slave_xfer = min(mem_xfer, can_burst ? + chan->slave_src_burst : chan->slave_src_width); + /* + * Is the slave narrower than 64 bits, i.e. isn't using the full + * bus width and cannot use bursts? + */ + if (mem_xfer > chan->slave_src_burst && !can_burst) + mem_xfer = chan->slave_src_burst; + /* Device-to-RAM DMA is unreliable without REQD set */ + hwdesc->config = NBPF_CHAN_CFG_SAD | (NBPF_CHAN_CFG_DDS & (mem_xfer << 16)) | + (NBPF_CHAN_CFG_SDS & (slave_xfer << 12)) | NBPF_CHAN_CFG_REQD | + NBPF_CHAN_CFG_SBE; + break; + + case DMA_MEM_TO_DEV: + slave_xfer = min(mem_xfer, chan->slave_dst_width >= 3 ? + chan->slave_dst_burst : chan->slave_dst_width); + hwdesc->config = NBPF_CHAN_CFG_DAD | (NBPF_CHAN_CFG_SDS & (mem_xfer << 12)) | + (NBPF_CHAN_CFG_DDS & (slave_xfer << 16)) | NBPF_CHAN_CFG_REQD; + break; + + case DMA_MEM_TO_MEM: + hwdesc->config = NBPF_CHAN_CFG_TCM | NBPF_CHAN_CFG_TM | + (NBPF_CHAN_CFG_SDS & (mem_xfer << 12)) | + (NBPF_CHAN_CFG_DDS & (mem_xfer << 16)); + break; + + default: + return -EINVAL; + } + + hwdesc->config |= chan->dmarq_cfg | (last ? 0 : NBPF_CHAN_CFG_DEM) | + NBPF_CHAN_CFG_DMS; + + dev_dbg(dev, "%s(): desc @ %pad: hdr 0x%x, cfg 0x%x, %zu @ %pad -> %pad\n", + __func__, &ldesc->hwdesc_dma_addr, hwdesc->header, + hwdesc->config, size, &src, &dst); + + dma_sync_single_for_device(dev, ldesc->hwdesc_dma_addr, sizeof(*hwdesc), + DMA_TO_DEVICE); + + return 0; +} + +static size_t nbpf_bytes_left(struct nbpf_channel *chan) +{ + return nbpf_chan_read(chan, NBPF_CHAN_CUR_TR_BYTE); +} + +static void nbpf_configure(struct nbpf_device *nbpf) +{ + nbpf_write(nbpf, NBPF_CTRL, NBPF_CTRL_LVINT); +} + +static void nbpf_pause(struct nbpf_channel *chan) +{ + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SETSUS); + /* See comment in nbpf_prep_one() */ + nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREN); +} + +/* Generic part */ + +/* DMA ENGINE functions */ +static void nbpf_issue_pending(struct dma_chan *dchan) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + unsigned long flags; + + dev_dbg(dchan->device->dev, "Entry %s()\n", __func__); + + spin_lock_irqsave(&chan->lock, flags); + if (list_empty(&chan->queued)) + goto unlock; + + list_splice_tail_init(&chan->queued, &chan->active); + + if (!chan->running) { + struct nbpf_desc *desc = list_first_entry(&chan->active, + struct nbpf_desc, node); + if (!nbpf_start(desc)) + chan->running = desc; + } + +unlock: + spin_unlock_irqrestore(&chan->lock, flags); +} + +static enum dma_status nbpf_tx_status(struct dma_chan *dchan, + dma_cookie_t cookie, struct dma_tx_state *state) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + enum dma_status status = dma_cookie_status(dchan, cookie, state); + + if (state) { + dma_cookie_t running; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + running = chan->running ? chan->running->async_tx.cookie : -EINVAL; + + if (cookie == running) { + state->residue = nbpf_bytes_left(chan); + dev_dbg(dchan->device->dev, "%s(): residue %u\n", __func__, + state->residue); + } else if (status == DMA_IN_PROGRESS) { + struct nbpf_desc *desc; + bool found = false; + + list_for_each_entry(desc, &chan->active, node) + if (desc->async_tx.cookie == cookie) { + found = true; + break; + } + + if (!found) + list_for_each_entry(desc, &chan->queued, node) + if (desc->async_tx.cookie == cookie) { + found = true; + break; + + } + + state->residue = found ? desc->length : 0; + } + + spin_unlock_irqrestore(&chan->lock, flags); + } + + if (chan->paused) + status = DMA_PAUSED; + + return status; +} + +static dma_cookie_t nbpf_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct nbpf_desc *desc = container_of(tx, struct nbpf_desc, async_tx); + struct nbpf_channel *chan = desc->chan; + unsigned long flags; + dma_cookie_t cookie; + + spin_lock_irqsave(&chan->lock, flags); + cookie = dma_cookie_assign(tx); + list_add_tail(&desc->node, &chan->queued); + spin_unlock_irqrestore(&chan->lock, flags); + + dev_dbg(chan->dma_chan.device->dev, "Entry %s(%d)\n", __func__, cookie); + + return cookie; +} + +static int nbpf_desc_page_alloc(struct nbpf_channel *chan) +{ + struct dma_chan *dchan = &chan->dma_chan; + struct nbpf_desc_page *dpage = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA); + struct nbpf_link_desc *ldesc; + struct nbpf_link_reg *hwdesc; + struct nbpf_desc *desc; + LIST_HEAD(head); + LIST_HEAD(lhead); + int i; + struct device *dev = dchan->device->dev; + + if (!dpage) + return -ENOMEM; + + dev_dbg(dev, "%s(): alloc %lu descriptors, %lu segments, total alloc %zu\n", + __func__, NBPF_DESCS_PER_PAGE, NBPF_SEGMENTS_PER_PAGE, sizeof(*dpage)); + + for (i = 0, ldesc = dpage->ldesc, hwdesc = dpage->hwdesc; + i < ARRAY_SIZE(dpage->ldesc); + i++, ldesc++, hwdesc++) { + ldesc->hwdesc = hwdesc; + list_add_tail(&ldesc->node, &lhead); + ldesc->hwdesc_dma_addr = dma_map_single(dchan->device->dev, + hwdesc, sizeof(*hwdesc), DMA_TO_DEVICE); + + dev_dbg(dev, "%s(): mapped 0x%p to %pad\n", __func__, + hwdesc, &ldesc->hwdesc_dma_addr); + } + + for (i = 0, desc = dpage->desc; + i < ARRAY_SIZE(dpage->desc); + i++, desc++) { + dma_async_tx_descriptor_init(&desc->async_tx, dchan); + desc->async_tx.tx_submit = nbpf_tx_submit; + desc->chan = chan; + INIT_LIST_HEAD(&desc->sg); + list_add_tail(&desc->node, &head); + } + + /* + * This function cannot be called from interrupt context, so, no need to + * save flags + */ + spin_lock_irq(&chan->lock); + list_splice_tail(&lhead, &chan->free_links); + list_splice_tail(&head, &chan->free); + list_add(&dpage->node, &chan->desc_page); + spin_unlock_irq(&chan->lock); + + return ARRAY_SIZE(dpage->desc); +} + +static void nbpf_desc_put(struct nbpf_desc *desc) +{ + struct nbpf_channel *chan = desc->chan; + struct nbpf_link_desc *ldesc, *tmp; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + list_for_each_entry_safe(ldesc, tmp, &desc->sg, node) + list_move(&ldesc->node, &chan->free_links); + + list_add(&desc->node, &chan->free); + spin_unlock_irqrestore(&chan->lock, flags); +} + +static void nbpf_scan_acked(struct nbpf_channel *chan) +{ + struct nbpf_desc *desc, *tmp; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->lock, flags); + list_for_each_entry_safe(desc, tmp, &chan->done, node) + if (async_tx_test_ack(&desc->async_tx) && desc->user_wait) { + list_move(&desc->node, &head); + desc->user_wait = false; + } + spin_unlock_irqrestore(&chan->lock, flags); + + list_for_each_entry_safe(desc, tmp, &head, node) { + list_del(&desc->node); + nbpf_desc_put(desc); + } +} + +/* + * We have to allocate descriptors with the channel lock dropped. This means, + * before we re-acquire the lock buffers can be taken already, so we have to + * re-check after re-acquiring the lock and possibly retry, if buffers are gone + * again. + */ +static struct nbpf_desc *nbpf_desc_get(struct nbpf_channel *chan, size_t len) +{ + struct nbpf_desc *desc = NULL; + struct nbpf_link_desc *ldesc, *prev = NULL; + + nbpf_scan_acked(chan); + + spin_lock_irq(&chan->lock); + + do { + int i = 0, ret; + + if (list_empty(&chan->free)) { + /* No more free descriptors */ + spin_unlock_irq(&chan->lock); + ret = nbpf_desc_page_alloc(chan); + if (ret < 0) + return NULL; + spin_lock_irq(&chan->lock); + continue; + } + desc = list_first_entry(&chan->free, struct nbpf_desc, node); + list_del(&desc->node); + + do { + if (list_empty(&chan->free_links)) { + /* No more free link descriptors */ + spin_unlock_irq(&chan->lock); + ret = nbpf_desc_page_alloc(chan); + if (ret < 0) { + nbpf_desc_put(desc); + return NULL; + } + spin_lock_irq(&chan->lock); + continue; + } + + ldesc = list_first_entry(&chan->free_links, + struct nbpf_link_desc, node); + ldesc->desc = desc; + if (prev) + prev->hwdesc->next = (u32)ldesc->hwdesc_dma_addr; + + prev = ldesc; + list_move_tail(&ldesc->node, &desc->sg); + + i++; + } while (i < len); + } while (!desc); + + prev->hwdesc->next = 0; + + spin_unlock_irq(&chan->lock); + + return desc; +} + +static void nbpf_chan_idle(struct nbpf_channel *chan) +{ + struct nbpf_desc *desc, *tmp; + unsigned long flags; + LIST_HEAD(head); + + spin_lock_irqsave(&chan->lock, flags); + + list_splice_init(&chan->done, &head); + list_splice_init(&chan->active, &head); + list_splice_init(&chan->queued, &head); + + chan->running = NULL; + + spin_unlock_irqrestore(&chan->lock, flags); + + list_for_each_entry_safe(desc, tmp, &head, node) { + dev_dbg(chan->nbpf->dma_dev.dev, "%s(): force-free desc %p cookie %d\n", + __func__, desc, desc->async_tx.cookie); + list_del(&desc->node); + nbpf_desc_put(desc); + } +} + +static int nbpf_control(struct dma_chan *dchan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + struct dma_slave_config *config; + + dev_dbg(dchan->device->dev, "Entry %s(%d)\n", __func__, cmd); + + switch (cmd) { + case DMA_TERMINATE_ALL: + dev_dbg(dchan->device->dev, "Terminating\n"); + nbpf_chan_halt(chan); + nbpf_chan_idle(chan); + break; + + case DMA_SLAVE_CONFIG: + if (!arg) + return -EINVAL; + config = (struct dma_slave_config *)arg; + + /* + * We could check config->slave_id to match chan->terminal here, + * but with DT they would be coming from the same source, so + * such a check would be superflous + */ + + chan->slave_dst_addr = config->dst_addr; + chan->slave_dst_width = nbpf_xfer_size(chan->nbpf, + config->dst_addr_width, 1); + chan->slave_dst_burst = nbpf_xfer_size(chan->nbpf, + config->dst_addr_width, + config->dst_maxburst); + chan->slave_src_addr = config->src_addr; + chan->slave_src_width = nbpf_xfer_size(chan->nbpf, + config->src_addr_width, 1); + chan->slave_src_burst = nbpf_xfer_size(chan->nbpf, + config->src_addr_width, + config->src_maxburst); + break; + + case DMA_PAUSE: + chan->paused = true; + nbpf_pause(chan); + break; + + default: + return -ENXIO; + } + + return 0; +} + +static struct dma_async_tx_descriptor *nbpf_prep_sg(struct nbpf_channel *chan, + struct scatterlist *src_sg, struct scatterlist *dst_sg, + size_t len, enum dma_transfer_direction direction, + unsigned long flags) +{ + struct nbpf_link_desc *ldesc; + struct scatterlist *mem_sg; + struct nbpf_desc *desc; + bool inc_src, inc_dst; + size_t data_len = 0; + int i = 0; + + switch (direction) { + case DMA_DEV_TO_MEM: + mem_sg = dst_sg; + inc_src = false; + inc_dst = true; + break; + + case DMA_MEM_TO_DEV: + mem_sg = src_sg; + inc_src = true; + inc_dst = false; + break; + + default: + case DMA_MEM_TO_MEM: + mem_sg = src_sg; + inc_src = true; + inc_dst = true; + } + + desc = nbpf_desc_get(chan, len); + if (!desc) + return NULL; + + desc->async_tx.flags = flags; + desc->async_tx.cookie = -EBUSY; + desc->user_wait = false; + + /* + * This is a private descriptor list, and we own the descriptor. No need + * to lock. + */ + list_for_each_entry(ldesc, &desc->sg, node) { + int ret = nbpf_prep_one(ldesc, direction, + sg_dma_address(src_sg), + sg_dma_address(dst_sg), + sg_dma_len(mem_sg), + i == len - 1); + if (ret < 0) { + nbpf_desc_put(desc); + return NULL; + } + data_len += sg_dma_len(mem_sg); + if (inc_src) + src_sg = sg_next(src_sg); + if (inc_dst) + dst_sg = sg_next(dst_sg); + mem_sg = direction == DMA_DEV_TO_MEM ? dst_sg : src_sg; + i++; + } + + desc->length = data_len; + + /* The user has to return the descriptor to us ASAP via .tx_submit() */ + return &desc->async_tx; +} + +static struct dma_async_tx_descriptor *nbpf_prep_memcpy( + struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + struct scatterlist dst_sg; + struct scatterlist src_sg; + + sg_init_table(&dst_sg, 1); + sg_init_table(&src_sg, 1); + + sg_dma_address(&dst_sg) = dst; + sg_dma_address(&src_sg) = src; + + sg_dma_len(&dst_sg) = len; + sg_dma_len(&src_sg) = len; + + dev_dbg(dchan->device->dev, "%s(): %zu @ %pad -> %pad\n", + __func__, len, &src, &dst); + + return nbpf_prep_sg(chan, &src_sg, &dst_sg, 1, + DMA_MEM_TO_MEM, flags); +} + +static struct dma_async_tx_descriptor *nbpf_prep_memcpy_sg( + struct dma_chan *dchan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long flags) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + + if (dst_nents != src_nents) + return NULL; + + return nbpf_prep_sg(chan, src_sg, dst_sg, src_nents, + DMA_MEM_TO_MEM, flags); +} + +static struct dma_async_tx_descriptor *nbpf_prep_slave_sg( + struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len, + enum dma_transfer_direction direction, unsigned long flags, void *context) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + struct scatterlist slave_sg; + + dev_dbg(dchan->device->dev, "Entry %s()\n", __func__); + + sg_init_table(&slave_sg, 1); + + switch (direction) { + case DMA_MEM_TO_DEV: + sg_dma_address(&slave_sg) = chan->slave_dst_addr; + return nbpf_prep_sg(chan, sgl, &slave_sg, sg_len, + direction, flags); + + case DMA_DEV_TO_MEM: + sg_dma_address(&slave_sg) = chan->slave_src_addr; + return nbpf_prep_sg(chan, &slave_sg, sgl, sg_len, + direction, flags); + + default: + return NULL; + } +} + +static int nbpf_alloc_chan_resources(struct dma_chan *dchan) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + int ret; + + INIT_LIST_HEAD(&chan->free); + INIT_LIST_HEAD(&chan->free_links); + INIT_LIST_HEAD(&chan->queued); + INIT_LIST_HEAD(&chan->active); + INIT_LIST_HEAD(&chan->done); + + ret = nbpf_desc_page_alloc(chan); + if (ret < 0) + return ret; + + dev_dbg(dchan->device->dev, "Entry %s(): terminal %u\n", __func__, + chan->terminal); + + nbpf_chan_configure(chan); + + return ret; +} + +static void nbpf_free_chan_resources(struct dma_chan *dchan) +{ + struct nbpf_channel *chan = nbpf_to_chan(dchan); + struct nbpf_desc_page *dpage, *tmp; + + dev_dbg(dchan->device->dev, "Entry %s()\n", __func__); + + nbpf_chan_halt(chan); + nbpf_chan_idle(chan); + /* Clean up for if a channel is re-used for MEMCPY after slave DMA */ + nbpf_chan_prepare_default(chan); + + list_for_each_entry_safe(dpage, tmp, &chan->desc_page, node) { + struct nbpf_link_desc *ldesc; + int i; + list_del(&dpage->node); + for (i = 0, ldesc = dpage->ldesc; + i < ARRAY_SIZE(dpage->ldesc); + i++, ldesc++) + dma_unmap_single(dchan->device->dev, ldesc->hwdesc_dma_addr, + sizeof(*ldesc->hwdesc), DMA_TO_DEVICE); + free_page((unsigned long)dpage); + } +} + +static int nbpf_slave_caps(struct dma_chan *dchan, + struct dma_slave_caps *caps) +{ + caps->src_addr_widths = NBPF_DMA_BUSWIDTHS; + caps->dstn_addr_widths = NBPF_DMA_BUSWIDTHS; + caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); + caps->cmd_pause = false; + caps->cmd_terminate = true; + + return 0; +} + +static struct dma_chan *nbpf_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct nbpf_device *nbpf = ofdma->of_dma_data; + struct dma_chan *dchan; + struct nbpf_channel *chan; + + if (dma_spec->args_count != 2) + return NULL; + + dchan = dma_get_any_slave_channel(&nbpf->dma_dev); + if (!dchan) + return NULL; + + dev_dbg(dchan->device->dev, "Entry %s(%s)\n", __func__, + dma_spec->np->name); + + chan = nbpf_to_chan(dchan); + + chan->terminal = dma_spec->args[0]; + chan->flags = dma_spec->args[1]; + + nbpf_chan_prepare(chan); + nbpf_chan_configure(chan); + + return dchan; +} + +static void nbpf_chan_tasklet(unsigned long data) +{ + struct nbpf_channel *chan = (struct nbpf_channel *)data; + struct nbpf_desc *desc, *tmp; + dma_async_tx_callback callback; + void *param; + + while (!list_empty(&chan->done)) { + bool found = false, must_put, recycling = false; + + spin_lock_irq(&chan->lock); + + list_for_each_entry_safe(desc, tmp, &chan->done, node) { + if (!desc->user_wait) { + /* Newly completed descriptor, have to process */ + found = true; + break; + } else if (async_tx_test_ack(&desc->async_tx)) { + /* + * This descriptor was waiting for a user ACK, + * it can be recycled now. + */ + list_del(&desc->node); + spin_unlock_irq(&chan->lock); + nbpf_desc_put(desc); + recycling = true; + break; + } + } + + if (recycling) + continue; + + if (!found) { + /* This can happen if TERMINATE_ALL has been called */ + spin_unlock_irq(&chan->lock); + break; + } + + dma_cookie_complete(&desc->async_tx); + + /* + * With released lock we cannot dereference desc, maybe it's + * still on the "done" list + */ + if (async_tx_test_ack(&desc->async_tx)) { + list_del(&desc->node); + must_put = true; + } else { + desc->user_wait = true; + must_put = false; + } + + callback = desc->async_tx.callback; + param = desc->async_tx.callback_param; + + /* ack and callback completed descriptor */ + spin_unlock_irq(&chan->lock); + + if (callback) + callback(param); + + if (must_put) + nbpf_desc_put(desc); + } +} + +static irqreturn_t nbpf_chan_irq(int irq, void *dev) +{ + struct nbpf_channel *chan = dev; + bool done = nbpf_status_get(chan); + struct nbpf_desc *desc; + irqreturn_t ret; + bool bh = false; + + if (!done) + return IRQ_NONE; + + nbpf_status_ack(chan); + + dev_dbg(&chan->dma_chan.dev->device, "%s()\n", __func__); + + spin_lock(&chan->lock); + desc = chan->running; + if (WARN_ON(!desc)) { + ret = IRQ_NONE; + goto unlock; + } else { + ret = IRQ_HANDLED; + bh = true; + } + + list_move_tail(&desc->node, &chan->done); + chan->running = NULL; + + if (!list_empty(&chan->active)) { + desc = list_first_entry(&chan->active, + struct nbpf_desc, node); + if (!nbpf_start(desc)) + chan->running = desc; + } + +unlock: + spin_unlock(&chan->lock); + + if (bh) + tasklet_schedule(&chan->tasklet); + + return ret; +} + +static irqreturn_t nbpf_err_irq(int irq, void *dev) +{ + struct nbpf_device *nbpf = dev; + u32 error = nbpf_error_get(nbpf); + + dev_warn(nbpf->dma_dev.dev, "DMA error IRQ %u\n", irq); + + if (!error) + return IRQ_NONE; + + do { + struct nbpf_channel *chan = nbpf_error_get_channel(nbpf, error); + /* On error: abort all queued transfers, no callback */ + nbpf_error_clear(chan); + nbpf_chan_idle(chan); + error = nbpf_error_get(nbpf); + } while (error); + + return IRQ_HANDLED; +} + +static int nbpf_chan_probe(struct nbpf_device *nbpf, int n) +{ + struct dma_device *dma_dev = &nbpf->dma_dev; + struct nbpf_channel *chan = nbpf->chan + n; + int ret; + + chan->nbpf = nbpf; + chan->base = nbpf->base + NBPF_REG_CHAN_OFFSET + NBPF_REG_CHAN_SIZE * n; + INIT_LIST_HEAD(&chan->desc_page); + spin_lock_init(&chan->lock); + chan->dma_chan.device = dma_dev; + dma_cookie_init(&chan->dma_chan); + nbpf_chan_prepare_default(chan); + + dev_dbg(dma_dev->dev, "%s(): channel %d: -> %p\n", __func__, n, chan->base); + + snprintf(chan->name, sizeof(chan->name), "nbpf %d", n); + + tasklet_init(&chan->tasklet, nbpf_chan_tasklet, (unsigned long)chan); + ret = devm_request_irq(dma_dev->dev, chan->irq, + nbpf_chan_irq, IRQF_SHARED, + chan->name, chan); + if (ret < 0) + return ret; + + /* Add the channel to DMA device channel list */ + list_add_tail(&chan->dma_chan.device_node, + &dma_dev->channels); + + return 0; +} + +static const struct of_device_id nbpf_match[] = { + {.compatible = "renesas,nbpfaxi64dmac1b4", .data = &nbpf_cfg[NBPF1B4]}, + {.compatible = "renesas,nbpfaxi64dmac1b8", .data = &nbpf_cfg[NBPF1B8]}, + {.compatible = "renesas,nbpfaxi64dmac1b16", .data = &nbpf_cfg[NBPF1B16]}, + {.compatible = "renesas,nbpfaxi64dmac4b4", .data = &nbpf_cfg[NBPF4B4]}, + {.compatible = "renesas,nbpfaxi64dmac4b8", .data = &nbpf_cfg[NBPF4B8]}, + {.compatible = "renesas,nbpfaxi64dmac4b16", .data = &nbpf_cfg[NBPF4B16]}, + {.compatible = "renesas,nbpfaxi64dmac8b4", .data = &nbpf_cfg[NBPF8B4]}, + {.compatible = "renesas,nbpfaxi64dmac8b8", .data = &nbpf_cfg[NBPF8B8]}, + {.compatible = "renesas,nbpfaxi64dmac8b16", .data = &nbpf_cfg[NBPF8B16]}, + {} +}; +MODULE_DEVICE_TABLE(of, nbpf_match); + +static int nbpf_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + const struct of_device_id *of_id = of_match_device(nbpf_match, dev); + struct device_node *np = dev->of_node; + struct nbpf_device *nbpf; + struct dma_device *dma_dev; + struct resource *iomem, *irq_res; + const struct nbpf_config *cfg; + int num_channels; + int ret, irq, eirq, i; + int irqbuf[9] /* maximum 8 channels + error IRQ */; + unsigned int irqs = 0; + + BUILD_BUG_ON(sizeof(struct nbpf_desc_page) > PAGE_SIZE); + + /* DT only */ + if (!np || !of_id || !of_id->data) + return -ENODEV; + + cfg = of_id->data; + num_channels = cfg->num_channels; + + nbpf = devm_kzalloc(dev, sizeof(*nbpf) + num_channels * + sizeof(nbpf->chan[0]), GFP_KERNEL); + if (!nbpf) { + dev_err(dev, "Memory allocation failed\n"); + return -ENOMEM; + } + dma_dev = &nbpf->dma_dev; + dma_dev->dev = dev; + + iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + nbpf->base = devm_ioremap_resource(dev, iomem); + if (IS_ERR(nbpf->base)) + return PTR_ERR(nbpf->base); + + nbpf->clk = devm_clk_get(dev, NULL); + if (IS_ERR(nbpf->clk)) + return PTR_ERR(nbpf->clk); + + nbpf->config = cfg; + + for (i = 0; irqs < ARRAY_SIZE(irqbuf); i++) { + irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i); + if (!irq_res) + break; + + for (irq = irq_res->start; irq <= irq_res->end; + irq++, irqs++) + irqbuf[irqs] = irq; + } + + /* + * 3 IRQ resource schemes are supported: + * 1. 1 shared IRQ for error and all channels + * 2. 2 IRQs: one for error and one shared for all channels + * 3. 1 IRQ for error and an own IRQ for each channel + */ + if (irqs != 1 && irqs != 2 && irqs != num_channels + 1) + return -ENXIO; + + if (irqs == 1) { + eirq = irqbuf[0]; + + for (i = 0; i <= num_channels; i++) + nbpf->chan[i].irq = irqbuf[0]; + } else { + eirq = platform_get_irq_byname(pdev, "error"); + if (eirq < 0) + return eirq; + + if (irqs == num_channels + 1) { + struct nbpf_channel *chan; + + for (i = 0, chan = nbpf->chan; i <= num_channels; + i++, chan++) { + /* Skip the error IRQ */ + if (irqbuf[i] == eirq) + i++; + chan->irq = irqbuf[i]; + } + + if (chan != nbpf->chan + num_channels) + return -EINVAL; + } else { + /* 2 IRQs and more than one channel */ + if (irqbuf[0] == eirq) + irq = irqbuf[1]; + else + irq = irqbuf[0]; + + for (i = 0; i <= num_channels; i++) + nbpf->chan[i].irq = irq; + } + } + + ret = devm_request_irq(dev, eirq, nbpf_err_irq, + IRQF_SHARED, "dma error", nbpf); + if (ret < 0) + return ret; + + INIT_LIST_HEAD(&dma_dev->channels); + + /* Create DMA Channel */ + for (i = 0; i < num_channels; i++) { + ret = nbpf_chan_probe(nbpf, i); + if (ret < 0) + return ret; + } + + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_SLAVE, dma_dev->cap_mask); + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + dma_cap_set(DMA_SG, dma_dev->cap_mask); + + /* Common and MEMCPY operations */ + dma_dev->device_alloc_chan_resources + = nbpf_alloc_chan_resources; + dma_dev->device_free_chan_resources = nbpf_free_chan_resources; + dma_dev->device_prep_dma_sg = nbpf_prep_memcpy_sg; + dma_dev->device_prep_dma_memcpy = nbpf_prep_memcpy; + dma_dev->device_tx_status = nbpf_tx_status; + dma_dev->device_issue_pending = nbpf_issue_pending; + dma_dev->device_slave_caps = nbpf_slave_caps; + + /* + * If we drop support for unaligned MEMCPY buffer addresses and / or + * lengths by setting + * dma_dev->copy_align = 4; + * then we can set transfer length to 4 bytes in nbpf_prep_one() for + * DMA_MEM_TO_MEM + */ + + /* Compulsory for DMA_SLAVE fields */ + dma_dev->device_prep_slave_sg = nbpf_prep_slave_sg; + dma_dev->device_control = nbpf_control; + + platform_set_drvdata(pdev, nbpf); + + ret = clk_prepare_enable(nbpf->clk); + if (ret < 0) + return ret; + + nbpf_configure(nbpf); + + ret = dma_async_device_register(dma_dev); + if (ret < 0) + goto e_clk_off; + + ret = of_dma_controller_register(np, nbpf_of_xlate, nbpf); + if (ret < 0) + goto e_dma_dev_unreg; + + return 0; + +e_dma_dev_unreg: + dma_async_device_unregister(dma_dev); +e_clk_off: + clk_disable_unprepare(nbpf->clk); + + return ret; +} + +static int nbpf_remove(struct platform_device *pdev) +{ + struct nbpf_device *nbpf = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&nbpf->dma_dev); + clk_disable_unprepare(nbpf->clk); + + return 0; +} + +static struct platform_device_id nbpf_ids[] = { + {"nbpfaxi64dmac1b4", (kernel_ulong_t)&nbpf_cfg[NBPF1B4]}, + {"nbpfaxi64dmac1b8", (kernel_ulong_t)&nbpf_cfg[NBPF1B8]}, + {"nbpfaxi64dmac1b16", (kernel_ulong_t)&nbpf_cfg[NBPF1B16]}, + {"nbpfaxi64dmac4b4", (kernel_ulong_t)&nbpf_cfg[NBPF4B4]}, + {"nbpfaxi64dmac4b8", (kernel_ulong_t)&nbpf_cfg[NBPF4B8]}, + {"nbpfaxi64dmac4b16", (kernel_ulong_t)&nbpf_cfg[NBPF4B16]}, + {"nbpfaxi64dmac8b4", (kernel_ulong_t)&nbpf_cfg[NBPF8B4]}, + {"nbpfaxi64dmac8b8", (kernel_ulong_t)&nbpf_cfg[NBPF8B8]}, + {"nbpfaxi64dmac8b16", (kernel_ulong_t)&nbpf_cfg[NBPF8B16]}, + {}, +}; +MODULE_DEVICE_TABLE(platform, nbpf_ids); + +#ifdef CONFIG_PM_RUNTIME +static int nbpf_runtime_suspend(struct device *dev) +{ + struct nbpf_device *nbpf = platform_get_drvdata(to_platform_device(dev)); + clk_disable_unprepare(nbpf->clk); + return 0; +} + +static int nbpf_runtime_resume(struct device *dev) +{ + struct nbpf_device *nbpf = platform_get_drvdata(to_platform_device(dev)); + return clk_prepare_enable(nbpf->clk); +} +#endif + +static const struct dev_pm_ops nbpf_pm_ops = { + SET_RUNTIME_PM_OPS(nbpf_runtime_suspend, nbpf_runtime_resume, NULL) +}; + +static struct platform_driver nbpf_driver = { + .driver = { + .owner = THIS_MODULE, + .name = "dma-nbpf", + .of_match_table = nbpf_match, + .pm = &nbpf_pm_ops, + }, + .id_table = nbpf_ids, + .probe = nbpf_probe, + .remove = nbpf_remove, +}; + +module_platform_driver(nbpf_driver); + +MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>"); +MODULE_DESCRIPTION("dmaengine driver for NBPFAXI64* DMACs"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c index e8fe9dc..d5fbeaa 100644 --- a/drivers/dma/of-dma.c +++ b/drivers/dma/of-dma.c @@ -218,3 +218,38 @@ struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec, &dma_spec->args[0]); } EXPORT_SYMBOL_GPL(of_dma_simple_xlate); + +/** + * of_dma_xlate_by_chan_id - Translate dt property to DMA channel by channel id + * @dma_spec: pointer to DMA specifier as found in the device tree + * @of_dma: pointer to DMA controller data + * + * This function can be used as the of xlate callback for DMA driver which wants + * to match the channel based on the channel id. When using this xlate function + * the #dma-cells propety of the DMA controller dt node needs to be set to 1. + * The data parameter of of_dma_controller_register must be a pointer to the + * dma_device struct the function should match upon. + * + * Returns pointer to appropriate dma channel on success or NULL on error. + */ +struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dma_device *dev = ofdma->of_dma_data; + struct dma_chan *chan, *candidate = NULL; + + if (!dev || dma_spec->args_count != 1) + return NULL; + + list_for_each_entry(chan, &dev->channels, device_node) + if (chan->chan_id == dma_spec->args[0]) { + candidate = chan; + break; + } + + if (!candidate) + return NULL; + + return dma_get_slave_channel(candidate); +} +EXPORT_SYMBOL_GPL(of_dma_xlate_by_chan_id); diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index b19f04f..4cf7d9a 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -853,8 +853,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg( static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, - size_t period_len, enum dma_transfer_direction dir, unsigned long flags, - void *context) + size_t period_len, enum dma_transfer_direction dir, unsigned long flags) { struct omap_dmadev *od = to_omap_dma_dev(chan->device); struct omap_chan *c = to_omap_dma_chan(chan); diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 73fa9b7..d5149aa 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -33,26 +33,15 @@ #define PL330_MAX_IRQS 32 #define PL330_MAX_PERI 32 -enum pl330_srccachectrl { - SCCTRL0, /* Noncacheable and nonbufferable */ - SCCTRL1, /* Bufferable only */ - SCCTRL2, /* Cacheable, but do not allocate */ - SCCTRL3, /* Cacheable and bufferable, but do not allocate */ - SINVALID1, - SINVALID2, - SCCTRL6, /* Cacheable write-through, allocate on reads only */ - SCCTRL7, /* Cacheable write-back, allocate on reads only */ -}; - -enum pl330_dstcachectrl { - DCCTRL0, /* Noncacheable and nonbufferable */ - DCCTRL1, /* Bufferable only */ - DCCTRL2, /* Cacheable, but do not allocate */ - DCCTRL3, /* Cacheable and bufferable, but do not allocate */ - DINVALID1, /* AWCACHE = 0x1000 */ - DINVALID2, - DCCTRL6, /* Cacheable write-through, allocate on writes only */ - DCCTRL7, /* Cacheable write-back, allocate on writes only */ +enum pl330_cachectrl { + CCTRL0, /* Noncacheable and nonbufferable */ + CCTRL1, /* Bufferable only */ + CCTRL2, /* Cacheable, but do not allocate */ + CCTRL3, /* Cacheable and bufferable, but do not allocate */ + INVALID1, /* AWCACHE = 0x1000 */ + INVALID2, + CCTRL6, /* Cacheable write-through, allocate on writes only */ + CCTRL7, /* Cacheable write-back, allocate on writes only */ }; enum pl330_byteswap { @@ -63,13 +52,6 @@ enum pl330_byteswap { SWAP_16, }; -enum pl330_reqtype { - MEMTOMEM, - MEMTODEV, - DEVTOMEM, - DEVTODEV, -}; - /* Register and Bit field Definitions */ #define DS 0x0 #define DS_ST_STOP 0x0 @@ -263,9 +245,6 @@ enum pl330_reqtype { */ #define MCODE_BUFF_PER_REQ 256 -/* If the _pl330_req is available to the client */ -#define IS_FREE(req) (*((u8 *)((req)->mc_cpu)) == CMD_DMAEND) - /* Use this _only_ to wait on transient states */ #define UNTIL(t, s) while (!(_state(t) & (s))) cpu_relax(); @@ -300,27 +279,6 @@ struct pl330_config { u32 irq_ns; }; -/* Handle to the DMAC provided to the PL330 core */ -struct pl330_info { - /* Owning device */ - struct device *dev; - /* Size of MicroCode buffers for each channel. */ - unsigned mcbufsz; - /* ioremap'ed address of PL330 registers. */ - void __iomem *base; - /* Client can freely use it. */ - void *client_data; - /* PL330 core data, Client must not touch it. */ - void *pl330_data; - /* Populated by the PL330 core driver during pl330_add */ - struct pl330_config pcfg; - /* - * If the DMAC has some reset mechanism, then the - * client may want to provide pointer to the method. - */ - void (*dmac_reset)(struct pl330_info *pi); -}; - /** * Request Configuration. * The PL330 core does not modify this and uses the last @@ -344,8 +302,8 @@ struct pl330_reqcfg { unsigned brst_len:5; unsigned brst_size:3; /* in power of 2 */ - enum pl330_dstcachectrl dcctl; - enum pl330_srccachectrl scctl; + enum pl330_cachectrl dcctl; + enum pl330_cachectrl scctl; enum pl330_byteswap swap; struct pl330_config *pcfg; }; @@ -359,11 +317,6 @@ struct pl330_xfer { u32 dst_addr; /* Size to xfer */ u32 bytes; - /* - * Pointer to next xfer in the list. - * The last xfer in the req must point to NULL. - */ - struct pl330_xfer *next; }; /* The xfer callbacks are made with one of these arguments. */ @@ -376,67 +329,6 @@ enum pl330_op_err { PL330_ERR_FAIL, }; -/* A request defining Scatter-Gather List ending with NULL xfer. */ -struct pl330_req { - enum pl330_reqtype rqtype; - /* Index of peripheral for the xfer. */ - unsigned peri:5; - /* Unique token for this xfer, set by the client. */ - void *token; - /* Callback to be called after xfer. */ - void (*xfer_cb)(void *token, enum pl330_op_err err); - /* If NULL, req will be done at last set parameters. */ - struct pl330_reqcfg *cfg; - /* Pointer to first xfer in the request. */ - struct pl330_xfer *x; - /* Hook to attach to DMAC's list of reqs with due callback */ - struct list_head rqd; -}; - -/* - * To know the status of the channel and DMAC, the client - * provides a pointer to this structure. The PL330 core - * fills it with current information. - */ -struct pl330_chanstatus { - /* - * If the DMAC engine halted due to some error, - * the client should remove-add DMAC. - */ - bool dmac_halted; - /* - * If channel is halted due to some error, - * the client should ABORT/FLUSH and START the channel. - */ - bool faulting; - /* Location of last load */ - u32 src_addr; - /* Location of last store */ - u32 dst_addr; - /* - * Pointer to the currently active req, NULL if channel is - * inactive, even though the requests may be present. - */ - struct pl330_req *top_req; - /* Pointer to req waiting second in the queue if any. */ - struct pl330_req *wait_req; -}; - -enum pl330_chan_op { - /* Start the channel */ - PL330_OP_START, - /* Abort the active xfer */ - PL330_OP_ABORT, - /* Stop xfer and flush queue */ - PL330_OP_FLUSH, -}; - -struct _xfer_spec { - u32 ccr; - struct pl330_req *r; - struct pl330_xfer *x; -}; - enum dmamov_dst { SAR = 0, CCR, @@ -454,12 +346,12 @@ enum pl330_cond { ALWAYS, }; +struct dma_pl330_desc; + struct _pl330_req { u32 mc_bus; void *mc_cpu; - /* Number of bytes taken to setup MC for the req */ - u32 mc_len; - struct pl330_req *r; + struct dma_pl330_desc *desc; }; /* ToBeDone for tasklet */ @@ -491,30 +383,6 @@ enum pl330_dmac_state { DYING, }; -/* A DMAC */ -struct pl330_dmac { - spinlock_t lock; - /* Holds list of reqs with due callbacks */ - struct list_head req_done; - /* Pointer to platform specific stuff */ - struct pl330_info *pinfo; - /* Maximum possible events/irqs */ - int events[32]; - /* BUS address of MicroCode buffer */ - dma_addr_t mcode_bus; - /* CPU address of MicroCode buffer */ - void *mcode_cpu; - /* List of all Channel threads */ - struct pl330_thread *channels; - /* Pointer to the MANAGER thread */ - struct pl330_thread *manager; - /* To handle bad news in interrupt */ - struct tasklet_struct tasks; - struct _pl330_tbd dmac_tbd; - /* State of DMAC operation */ - enum pl330_dmac_state state; -}; - enum desc_status { /* In the DMAC pool */ FREE, @@ -555,15 +423,16 @@ struct dma_pl330_chan { * As the parent, this DMAC also provides descriptors * to the channel. */ - struct dma_pl330_dmac *dmac; + struct pl330_dmac *dmac; /* To protect channel manipulation */ spinlock_t lock; - /* Token of a hardware channel thread of PL330 DMAC - * NULL if the channel is available to be acquired. + /* + * Hardware channel thread of PL330 DMAC. NULL if the channel is + * available. */ - void *pl330_chid; + struct pl330_thread *thread; /* For D-to-M and M-to-D channels */ int burst_sz; /* the peripheral fifo width */ @@ -574,9 +443,7 @@ struct dma_pl330_chan { bool cyclic; }; -struct dma_pl330_dmac { - struct pl330_info pif; - +struct pl330_dmac { /* DMA-Engine Device */ struct dma_device ddma; @@ -588,6 +455,32 @@ struct dma_pl330_dmac { /* To protect desc_pool manipulation */ spinlock_t pool_lock; + /* Size of MicroCode buffers for each channel. */ + unsigned mcbufsz; + /* ioremap'ed address of PL330 registers. */ + void __iomem *base; + /* Populated by the PL330 core driver during pl330_add */ + struct pl330_config pcfg; + + spinlock_t lock; + /* Maximum possible events/irqs */ + int events[32]; + /* BUS address of MicroCode buffer */ + dma_addr_t mcode_bus; + /* CPU address of MicroCode buffer */ + void *mcode_cpu; + /* List of all Channel threads */ + struct pl330_thread *channels; + /* Pointer to the MANAGER thread */ + struct pl330_thread *manager; + /* To handle bad news in interrupt */ + struct tasklet_struct tasks; + struct _pl330_tbd dmac_tbd; + /* State of DMAC operation */ + enum pl330_dmac_state state; + /* Holds list of reqs with due callbacks */ + struct list_head req_done; + /* Peripheral channels connected to this DMAC */ unsigned int num_peripherals; struct dma_pl330_chan *peripherals; /* keep at end */ @@ -604,49 +497,43 @@ struct dma_pl330_desc { struct pl330_xfer px; struct pl330_reqcfg rqcfg; - struct pl330_req req; enum desc_status status; /* The channel which currently holds this desc */ struct dma_pl330_chan *pchan; + + enum dma_transfer_direction rqtype; + /* Index of peripheral for the xfer. */ + unsigned peri:5; + /* Hook to attach to DMAC's list of reqs with due callback */ + struct list_head rqd; }; -static inline void _callback(struct pl330_req *r, enum pl330_op_err err) -{ - if (r && r->xfer_cb) - r->xfer_cb(r->token, err); -} +struct _xfer_spec { + u32 ccr; + struct dma_pl330_desc *desc; +}; static inline bool _queue_empty(struct pl330_thread *thrd) { - return (IS_FREE(&thrd->req[0]) && IS_FREE(&thrd->req[1])) - ? true : false; + return thrd->req[0].desc == NULL && thrd->req[1].desc == NULL; } static inline bool _queue_full(struct pl330_thread *thrd) { - return (IS_FREE(&thrd->req[0]) || IS_FREE(&thrd->req[1])) - ? false : true; + return thrd->req[0].desc != NULL && thrd->req[1].desc != NULL; } static inline bool is_manager(struct pl330_thread *thrd) { - struct pl330_dmac *pl330 = thrd->dmac; - - /* MANAGER is indexed at the end */ - if (thrd->id == pl330->pinfo->pcfg.num_chan) - return true; - else - return false; + return thrd->dmac->manager == thrd; } /* If manager of the thread is in Non-Secure mode */ static inline bool _manager_ns(struct pl330_thread *thrd) { - struct pl330_dmac *pl330 = thrd->dmac; - - return (pl330->pinfo->pcfg.mode & DMAC_MODE_NS) ? true : false; + return (thrd->dmac->pcfg.mode & DMAC_MODE_NS) ? true : false; } static inline u32 get_revision(u32 periph_id) @@ -1004,7 +891,7 @@ static inline u32 _emit_GO(unsigned dry_run, u8 buf[], /* Returns Time-Out */ static bool _until_dmac_idle(struct pl330_thread *thrd) { - void __iomem *regs = thrd->dmac->pinfo->base; + void __iomem *regs = thrd->dmac->base; unsigned long loops = msecs_to_loops(5); do { @@ -1024,7 +911,7 @@ static bool _until_dmac_idle(struct pl330_thread *thrd) static inline void _execute_DBGINSN(struct pl330_thread *thrd, u8 insn[], bool as_manager) { - void __iomem *regs = thrd->dmac->pinfo->base; + void __iomem *regs = thrd->dmac->base; u32 val; val = (insn[0] << 16) | (insn[1] << 24); @@ -1039,7 +926,7 @@ static inline void _execute_DBGINSN(struct pl330_thread *thrd, /* If timed out due to halted state-machine */ if (_until_dmac_idle(thrd)) { - dev_err(thrd->dmac->pinfo->dev, "DMAC halted!\n"); + dev_err(thrd->dmac->ddma.dev, "DMAC halted!\n"); return; } @@ -1047,25 +934,9 @@ static inline void _execute_DBGINSN(struct pl330_thread *thrd, writel(0, regs + DBGCMD); } -/* - * Mark a _pl330_req as free. - * We do it by writing DMAEND as the first instruction - * because no valid request is going to have DMAEND as - * its first instruction to execute. - */ -static void mark_free(struct pl330_thread *thrd, int idx) -{ - struct _pl330_req *req = &thrd->req[idx]; - - _emit_END(0, req->mc_cpu); - req->mc_len = 0; - - thrd->req_running = -1; -} - static inline u32 _state(struct pl330_thread *thrd) { - void __iomem *regs = thrd->dmac->pinfo->base; + void __iomem *regs = thrd->dmac->base; u32 val; if (is_manager(thrd)) @@ -1123,7 +994,7 @@ static inline u32 _state(struct pl330_thread *thrd) static void _stop(struct pl330_thread *thrd) { - void __iomem *regs = thrd->dmac->pinfo->base; + void __iomem *regs = thrd->dmac->base; u8 insn[6] = {0, 0, 0, 0, 0, 0}; if (_state(thrd) == PL330_STATE_FAULT_COMPLETING) @@ -1146,9 +1017,9 @@ static void _stop(struct pl330_thread *thrd) /* Start doing req 'idx' of thread 'thrd' */ static bool _trigger(struct pl330_thread *thrd) { - void __iomem *regs = thrd->dmac->pinfo->base; + void __iomem *regs = thrd->dmac->base; struct _pl330_req *req; - struct pl330_req *r; + struct dma_pl330_desc *desc; struct _arg_GO go; unsigned ns; u8 insn[6] = {0, 0, 0, 0, 0, 0}; @@ -1159,32 +1030,27 @@ static bool _trigger(struct pl330_thread *thrd) return true; idx = 1 - thrd->lstenq; - if (!IS_FREE(&thrd->req[idx])) + if (thrd->req[idx].desc != NULL) { req = &thrd->req[idx]; - else { + } else { idx = thrd->lstenq; - if (!IS_FREE(&thrd->req[idx])) + if (thrd->req[idx].desc != NULL) req = &thrd->req[idx]; else req = NULL; } /* Return if no request */ - if (!req || !req->r) + if (!req) return true; - r = req->r; + desc = req->desc; - if (r->cfg) - ns = r->cfg->nonsecure ? 1 : 0; - else if (readl(regs + CS(thrd->id)) & CS_CNS) - ns = 1; - else - ns = 0; + ns = desc->rqcfg.nonsecure ? 1 : 0; /* See 'Abort Sources' point-4 at Page 2-25 */ if (_manager_ns(thrd) && !ns) - dev_info(thrd->dmac->pinfo->dev, "%s:%d Recipe for ABORT!\n", + dev_info(thrd->dmac->ddma.dev, "%s:%d Recipe for ABORT!\n", __func__, __LINE__); go.chan = thrd->id; @@ -1240,7 +1106,7 @@ static inline int _ldst_memtomem(unsigned dry_run, u8 buf[], const struct _xfer_spec *pxs, int cyc) { int off = 0; - struct pl330_config *pcfg = pxs->r->cfg->pcfg; + struct pl330_config *pcfg = pxs->desc->rqcfg.pcfg; /* check lock-up free version */ if (get_revision(pcfg->periph_id) >= PERIPH_REV_R1P0) { @@ -1266,10 +1132,10 @@ static inline int _ldst_devtomem(unsigned dry_run, u8 buf[], int off = 0; while (cyc--) { - off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri); - off += _emit_LDP(dry_run, &buf[off], SINGLE, pxs->r->peri); + off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->desc->peri); + off += _emit_LDP(dry_run, &buf[off], SINGLE, pxs->desc->peri); off += _emit_ST(dry_run, &buf[off], ALWAYS); - off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri); + off += _emit_FLUSHP(dry_run, &buf[off], pxs->desc->peri); } return off; @@ -1281,10 +1147,10 @@ static inline int _ldst_memtodev(unsigned dry_run, u8 buf[], int off = 0; while (cyc--) { - off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri); + off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->desc->peri); off += _emit_LD(dry_run, &buf[off], ALWAYS); - off += _emit_STP(dry_run, &buf[off], SINGLE, pxs->r->peri); - off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri); + off += _emit_STP(dry_run, &buf[off], SINGLE, pxs->desc->peri); + off += _emit_FLUSHP(dry_run, &buf[off], pxs->desc->peri); } return off; @@ -1295,14 +1161,14 @@ static int _bursts(unsigned dry_run, u8 buf[], { int off = 0; - switch (pxs->r->rqtype) { - case MEMTODEV: + switch (pxs->desc->rqtype) { + case DMA_MEM_TO_DEV: off += _ldst_memtodev(dry_run, &buf[off], pxs, cyc); break; - case DEVTOMEM: + case DMA_DEV_TO_MEM: off += _ldst_devtomem(dry_run, &buf[off], pxs, cyc); break; - case MEMTOMEM: + case DMA_MEM_TO_MEM: off += _ldst_memtomem(dry_run, &buf[off], pxs, cyc); break; default: @@ -1395,7 +1261,7 @@ static inline int _loop(unsigned dry_run, u8 buf[], static inline int _setup_loops(unsigned dry_run, u8 buf[], const struct _xfer_spec *pxs) { - struct pl330_xfer *x = pxs->x; + struct pl330_xfer *x = &pxs->desc->px; u32 ccr = pxs->ccr; unsigned long c, bursts = BYTE_TO_BURST(x->bytes, ccr); int off = 0; @@ -1412,7 +1278,7 @@ static inline int _setup_loops(unsigned dry_run, u8 buf[], static inline int _setup_xfer(unsigned dry_run, u8 buf[], const struct _xfer_spec *pxs) { - struct pl330_xfer *x = pxs->x; + struct pl330_xfer *x = &pxs->desc->px; int off = 0; /* DMAMOV SAR, x->src_addr */ @@ -1443,17 +1309,12 @@ static int _setup_req(unsigned dry_run, struct pl330_thread *thrd, /* DMAMOV CCR, ccr */ off += _emit_MOV(dry_run, &buf[off], CCR, pxs->ccr); - x = pxs->r->x; - do { - /* Error if xfer length is not aligned at burst size */ - if (x->bytes % (BRST_SIZE(pxs->ccr) * BRST_LEN(pxs->ccr))) - return -EINVAL; - - pxs->x = x; - off += _setup_xfer(dry_run, &buf[off], pxs); + x = &pxs->desc->px; + /* Error if xfer length is not aligned at burst size */ + if (x->bytes % (BRST_SIZE(pxs->ccr) * BRST_LEN(pxs->ccr))) + return -EINVAL; - x = x->next; - } while (x); + off += _setup_xfer(dry_run, &buf[off], pxs); /* DMASEV peripheral/event */ off += _emit_SEV(dry_run, &buf[off], thrd->ev); @@ -1495,31 +1356,15 @@ static inline u32 _prepare_ccr(const struct pl330_reqcfg *rqc) return ccr; } -static inline bool _is_valid(u32 ccr) -{ - enum pl330_dstcachectrl dcctl; - enum pl330_srccachectrl scctl; - - dcctl = (ccr >> CC_DSTCCTRL_SHFT) & CC_DRCCCTRL_MASK; - scctl = (ccr >> CC_SRCCCTRL_SHFT) & CC_SRCCCTRL_MASK; - - if (dcctl == DINVALID1 || dcctl == DINVALID2 - || scctl == SINVALID1 || scctl == SINVALID2) - return false; - else - return true; -} - /* * Submit a list of xfers after which the client wants notification. * Client is not notified after each xfer unit, just once after all * xfer units are done or some error occurs. */ -static int pl330_submit_req(void *ch_id, struct pl330_req *r) +static int pl330_submit_req(struct pl330_thread *thrd, + struct dma_pl330_desc *desc) { - struct pl330_thread *thrd = ch_id; - struct pl330_dmac *pl330; - struct pl330_info *pi; + struct pl330_dmac *pl330 = thrd->dmac; struct _xfer_spec xs; unsigned long flags; void __iomem *regs; @@ -1528,25 +1373,24 @@ static int pl330_submit_req(void *ch_id, struct pl330_req *r) int ret = 0; /* No Req or Unacquired Channel or DMAC */ - if (!r || !thrd || thrd->free) + if (!desc || !thrd || thrd->free) return -EINVAL; - pl330 = thrd->dmac; - pi = pl330->pinfo; - regs = pi->base; + regs = thrd->dmac->base; if (pl330->state == DYING || pl330->dmac_tbd.reset_chan & (1 << thrd->id)) { - dev_info(thrd->dmac->pinfo->dev, "%s:%d\n", + dev_info(thrd->dmac->ddma.dev, "%s:%d\n", __func__, __LINE__); return -EAGAIN; } /* If request for non-existing peripheral */ - if (r->rqtype != MEMTOMEM && r->peri >= pi->pcfg.num_peri) { - dev_info(thrd->dmac->pinfo->dev, + if (desc->rqtype != DMA_MEM_TO_MEM && + desc->peri >= pl330->pcfg.num_peri) { + dev_info(thrd->dmac->ddma.dev, "%s:%d Invalid peripheral(%u)!\n", - __func__, __LINE__, r->peri); + __func__, __LINE__, desc->peri); return -EINVAL; } @@ -1557,41 +1401,26 @@ static int pl330_submit_req(void *ch_id, struct pl330_req *r) goto xfer_exit; } + /* Prefer Secure Channel */ + if (!_manager_ns(thrd)) + desc->rqcfg.nonsecure = 0; + else + desc->rqcfg.nonsecure = 1; - /* Use last settings, if not provided */ - if (r->cfg) { - /* Prefer Secure Channel */ - if (!_manager_ns(thrd)) - r->cfg->nonsecure = 0; - else - r->cfg->nonsecure = 1; - - ccr = _prepare_ccr(r->cfg); - } else { - ccr = readl(regs + CC(thrd->id)); - } - - /* If this req doesn't have valid xfer settings */ - if (!_is_valid(ccr)) { - ret = -EINVAL; - dev_info(thrd->dmac->pinfo->dev, "%s:%d Invalid CCR(%x)!\n", - __func__, __LINE__, ccr); - goto xfer_exit; - } + ccr = _prepare_ccr(&desc->rqcfg); - idx = IS_FREE(&thrd->req[0]) ? 0 : 1; + idx = thrd->req[0].desc == NULL ? 0 : 1; xs.ccr = ccr; - xs.r = r; + xs.desc = desc; /* First dry run to check if req is acceptable */ ret = _setup_req(1, thrd, idx, &xs); if (ret < 0) goto xfer_exit; - if (ret > pi->mcbufsz / 2) { - dev_info(thrd->dmac->pinfo->dev, - "%s:%d Trying increasing mcbufsz\n", + if (ret > pl330->mcbufsz / 2) { + dev_info(pl330->ddma.dev, "%s:%d Trying increasing mcbufsz\n", __func__, __LINE__); ret = -ENOMEM; goto xfer_exit; @@ -1599,8 +1428,8 @@ static int pl330_submit_req(void *ch_id, struct pl330_req *r) /* Hook the request */ thrd->lstenq = idx; - thrd->req[idx].mc_len = _setup_req(0, thrd, idx, &xs); - thrd->req[idx].r = r; + thrd->req[idx].desc = desc; + _setup_req(0, thrd, idx, &xs); ret = 0; @@ -1610,10 +1439,32 @@ xfer_exit: return ret; } +static void dma_pl330_rqcb(struct dma_pl330_desc *desc, enum pl330_op_err err) +{ + struct dma_pl330_chan *pch; + unsigned long flags; + + if (!desc) + return; + + pch = desc->pchan; + + /* If desc aborted */ + if (!pch) + return; + + spin_lock_irqsave(&pch->lock, flags); + + desc->status = DONE; + + spin_unlock_irqrestore(&pch->lock, flags); + + tasklet_schedule(&pch->task); +} + static void pl330_dotask(unsigned long data) { struct pl330_dmac *pl330 = (struct pl330_dmac *) data; - struct pl330_info *pi = pl330->pinfo; unsigned long flags; int i; @@ -1631,16 +1482,16 @@ static void pl330_dotask(unsigned long data) if (pl330->dmac_tbd.reset_mngr) { _stop(pl330->manager); /* Reset all channels */ - pl330->dmac_tbd.reset_chan = (1 << pi->pcfg.num_chan) - 1; + pl330->dmac_tbd.reset_chan = (1 << pl330->pcfg.num_chan) - 1; /* Clear the reset flag */ pl330->dmac_tbd.reset_mngr = false; } - for (i = 0; i < pi->pcfg.num_chan; i++) { + for (i = 0; i < pl330->pcfg.num_chan; i++) { if (pl330->dmac_tbd.reset_chan & (1 << i)) { struct pl330_thread *thrd = &pl330->channels[i]; - void __iomem *regs = pi->base; + void __iomem *regs = pl330->base; enum pl330_op_err err; _stop(thrd); @@ -1651,16 +1502,13 @@ static void pl330_dotask(unsigned long data) err = PL330_ERR_ABORT; spin_unlock_irqrestore(&pl330->lock, flags); - - _callback(thrd->req[1 - thrd->lstenq].r, err); - _callback(thrd->req[thrd->lstenq].r, err); - + dma_pl330_rqcb(thrd->req[1 - thrd->lstenq].desc, err); + dma_pl330_rqcb(thrd->req[thrd->lstenq].desc, err); spin_lock_irqsave(&pl330->lock, flags); - thrd->req[0].r = NULL; - thrd->req[1].r = NULL; - mark_free(thrd, 0); - mark_free(thrd, 1); + thrd->req[0].desc = NULL; + thrd->req[1].desc = NULL; + thrd->req_running = -1; /* Clear the reset flag */ pl330->dmac_tbd.reset_chan &= ~(1 << i); @@ -1673,20 +1521,15 @@ static void pl330_dotask(unsigned long data) } /* Returns 1 if state was updated, 0 otherwise */ -static int pl330_update(const struct pl330_info *pi) +static int pl330_update(struct pl330_dmac *pl330) { - struct pl330_req *rqdone, *tmp; - struct pl330_dmac *pl330; + struct dma_pl330_desc *descdone, *tmp; unsigned long flags; void __iomem *regs; u32 val; int id, ev, ret = 0; - if (!pi || !pi->pl330_data) - return 0; - - regs = pi->base; - pl330 = pi->pl330_data; + regs = pl330->base; spin_lock_irqsave(&pl330->lock, flags); @@ -1696,13 +1539,13 @@ static int pl330_update(const struct pl330_info *pi) else pl330->dmac_tbd.reset_mngr = false; - val = readl(regs + FSC) & ((1 << pi->pcfg.num_chan) - 1); + val = readl(regs + FSC) & ((1 << pl330->pcfg.num_chan) - 1); pl330->dmac_tbd.reset_chan |= val; if (val) { int i = 0; - while (i < pi->pcfg.num_chan) { + while (i < pl330->pcfg.num_chan) { if (val & (1 << i)) { - dev_info(pi->dev, + dev_info(pl330->ddma.dev, "Reset Channel-%d\t CS-%x FTC-%x\n", i, readl(regs + CS(i)), readl(regs + FTC(i))); @@ -1714,15 +1557,16 @@ static int pl330_update(const struct pl330_info *pi) /* Check which event happened i.e, thread notified */ val = readl(regs + ES); - if (pi->pcfg.num_events < 32 - && val & ~((1 << pi->pcfg.num_events) - 1)) { + if (pl330->pcfg.num_events < 32 + && val & ~((1 << pl330->pcfg.num_events) - 1)) { pl330->dmac_tbd.reset_dmac = true; - dev_err(pi->dev, "%s:%d Unexpected!\n", __func__, __LINE__); + dev_err(pl330->ddma.dev, "%s:%d Unexpected!\n", __func__, + __LINE__); ret = 1; goto updt_exit; } - for (ev = 0; ev < pi->pcfg.num_events; ev++) { + for (ev = 0; ev < pl330->pcfg.num_events; ev++) { if (val & (1 << ev)) { /* Event occurred */ struct pl330_thread *thrd; u32 inten = readl(regs + INTEN); @@ -1743,25 +1587,22 @@ static int pl330_update(const struct pl330_info *pi) continue; /* Detach the req */ - rqdone = thrd->req[active].r; - thrd->req[active].r = NULL; - - mark_free(thrd, active); + descdone = thrd->req[active].desc; + thrd->req[active].desc = NULL; /* Get going again ASAP */ _start(thrd); /* For now, just make a list of callbacks to be done */ - list_add_tail(&rqdone->rqd, &pl330->req_done); + list_add_tail(&descdone->rqd, &pl330->req_done); } } /* Now that we are in no hurry, do the callbacks */ - list_for_each_entry_safe(rqdone, tmp, &pl330->req_done, rqd) { - list_del(&rqdone->rqd); - + list_for_each_entry_safe(descdone, tmp, &pl330->req_done, rqd) { + list_del(&descdone->rqd); spin_unlock_irqrestore(&pl330->lock, flags); - _callback(rqdone, PL330_ERR_NONE); + dma_pl330_rqcb(descdone, PL330_ERR_NONE); spin_lock_irqsave(&pl330->lock, flags); } @@ -1778,65 +1619,13 @@ updt_exit: return ret; } -static int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op) -{ - struct pl330_thread *thrd = ch_id; - struct pl330_dmac *pl330; - unsigned long flags; - int ret = 0, active; - - if (!thrd || thrd->free || thrd->dmac->state == DYING) - return -EINVAL; - - pl330 = thrd->dmac; - active = thrd->req_running; - - spin_lock_irqsave(&pl330->lock, flags); - - switch (op) { - case PL330_OP_FLUSH: - /* Make sure the channel is stopped */ - _stop(thrd); - - thrd->req[0].r = NULL; - thrd->req[1].r = NULL; - mark_free(thrd, 0); - mark_free(thrd, 1); - break; - - case PL330_OP_ABORT: - /* Make sure the channel is stopped */ - _stop(thrd); - - /* ABORT is only for the active req */ - if (active == -1) - break; - - thrd->req[active].r = NULL; - mark_free(thrd, active); - - /* Start the next */ - case PL330_OP_START: - if ((active == -1) && !_start(thrd)) - ret = -EIO; - break; - - default: - ret = -EINVAL; - } - - spin_unlock_irqrestore(&pl330->lock, flags); - return ret; -} - /* Reserve an event */ static inline int _alloc_event(struct pl330_thread *thrd) { struct pl330_dmac *pl330 = thrd->dmac; - struct pl330_info *pi = pl330->pinfo; int ev; - for (ev = 0; ev < pi->pcfg.num_events; ev++) + for (ev = 0; ev < pl330->pcfg.num_events; ev++) if (pl330->events[ev] == -1) { pl330->events[ev] = thrd->id; return ev; @@ -1845,45 +1634,38 @@ static inline int _alloc_event(struct pl330_thread *thrd) return -1; } -static bool _chan_ns(const struct pl330_info *pi, int i) +static bool _chan_ns(const struct pl330_dmac *pl330, int i) { - return pi->pcfg.irq_ns & (1 << i); + return pl330->pcfg.irq_ns & (1 << i); } /* Upon success, returns IdentityToken for the * allocated channel, NULL otherwise. */ -static void *pl330_request_channel(const struct pl330_info *pi) +static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330) { struct pl330_thread *thrd = NULL; - struct pl330_dmac *pl330; unsigned long flags; int chans, i; - if (!pi || !pi->pl330_data) - return NULL; - - pl330 = pi->pl330_data; - if (pl330->state == DYING) return NULL; - chans = pi->pcfg.num_chan; + chans = pl330->pcfg.num_chan; spin_lock_irqsave(&pl330->lock, flags); for (i = 0; i < chans; i++) { thrd = &pl330->channels[i]; if ((thrd->free) && (!_manager_ns(thrd) || - _chan_ns(pi, i))) { + _chan_ns(pl330, i))) { thrd->ev = _alloc_event(thrd); if (thrd->ev >= 0) { thrd->free = false; thrd->lstenq = 1; - thrd->req[0].r = NULL; - mark_free(thrd, 0); - thrd->req[1].r = NULL; - mark_free(thrd, 1); + thrd->req[0].desc = NULL; + thrd->req[1].desc = NULL; + thrd->req_running = -1; break; } } @@ -1899,17 +1681,15 @@ static void *pl330_request_channel(const struct pl330_info *pi) static inline void _free_event(struct pl330_thread *thrd, int ev) { struct pl330_dmac *pl330 = thrd->dmac; - struct pl330_info *pi = pl330->pinfo; /* If the event is valid and was held by the thread */ - if (ev >= 0 && ev < pi->pcfg.num_events + if (ev >= 0 && ev < pl330->pcfg.num_events && pl330->events[ev] == thrd->id) pl330->events[ev] = -1; } -static void pl330_release_channel(void *ch_id) +static void pl330_release_channel(struct pl330_thread *thrd) { - struct pl330_thread *thrd = ch_id; struct pl330_dmac *pl330; unsigned long flags; @@ -1918,8 +1698,8 @@ static void pl330_release_channel(void *ch_id) _stop(thrd); - _callback(thrd->req[1 - thrd->lstenq].r, PL330_ERR_ABORT); - _callback(thrd->req[thrd->lstenq].r, PL330_ERR_ABORT); + dma_pl330_rqcb(thrd->req[1 - thrd->lstenq].desc, PL330_ERR_ABORT); + dma_pl330_rqcb(thrd->req[thrd->lstenq].desc, PL330_ERR_ABORT); pl330 = thrd->dmac; @@ -1932,72 +1712,70 @@ static void pl330_release_channel(void *ch_id) /* Initialize the structure for PL330 configuration, that can be used * by the client driver the make best use of the DMAC */ -static void read_dmac_config(struct pl330_info *pi) +static void read_dmac_config(struct pl330_dmac *pl330) { - void __iomem *regs = pi->base; + void __iomem *regs = pl330->base; u32 val; val = readl(regs + CRD) >> CRD_DATA_WIDTH_SHIFT; val &= CRD_DATA_WIDTH_MASK; - pi->pcfg.data_bus_width = 8 * (1 << val); + pl330->pcfg.data_bus_width = 8 * (1 << val); val = readl(regs + CRD) >> CRD_DATA_BUFF_SHIFT; val &= CRD_DATA_BUFF_MASK; - pi->pcfg.data_buf_dep = val + 1; + pl330->pcfg.data_buf_dep = val + 1; val = readl(regs + CR0) >> CR0_NUM_CHANS_SHIFT; val &= CR0_NUM_CHANS_MASK; val += 1; - pi->pcfg.num_chan = val; + pl330->pcfg.num_chan = val; val = readl(regs + CR0); if (val & CR0_PERIPH_REQ_SET) { val = (val >> CR0_NUM_PERIPH_SHIFT) & CR0_NUM_PERIPH_MASK; val += 1; - pi->pcfg.num_peri = val; - pi->pcfg.peri_ns = readl(regs + CR4); + pl330->pcfg.num_peri = val; + pl330->pcfg.peri_ns = readl(regs + CR4); } else { - pi->pcfg.num_peri = 0; + pl330->pcfg.num_peri = 0; } val = readl(regs + CR0); if (val & CR0_BOOT_MAN_NS) - pi->pcfg.mode |= DMAC_MODE_NS; + pl330->pcfg.mode |= DMAC_MODE_NS; else - pi->pcfg.mode &= ~DMAC_MODE_NS; + pl330->pcfg.mode &= ~DMAC_MODE_NS; val = readl(regs + CR0) >> CR0_NUM_EVENTS_SHIFT; val &= CR0_NUM_EVENTS_MASK; val += 1; - pi->pcfg.num_events = val; + pl330->pcfg.num_events = val; - pi->pcfg.irq_ns = readl(regs + CR3); + pl330->pcfg.irq_ns = readl(regs + CR3); } static inline void _reset_thread(struct pl330_thread *thrd) { struct pl330_dmac *pl330 = thrd->dmac; - struct pl330_info *pi = pl330->pinfo; thrd->req[0].mc_cpu = pl330->mcode_cpu - + (thrd->id * pi->mcbufsz); + + (thrd->id * pl330->mcbufsz); thrd->req[0].mc_bus = pl330->mcode_bus - + (thrd->id * pi->mcbufsz); - thrd->req[0].r = NULL; - mark_free(thrd, 0); + + (thrd->id * pl330->mcbufsz); + thrd->req[0].desc = NULL; thrd->req[1].mc_cpu = thrd->req[0].mc_cpu - + pi->mcbufsz / 2; + + pl330->mcbufsz / 2; thrd->req[1].mc_bus = thrd->req[0].mc_bus - + pi->mcbufsz / 2; - thrd->req[1].r = NULL; - mark_free(thrd, 1); + + pl330->mcbufsz / 2; + thrd->req[1].desc = NULL; + + thrd->req_running = -1; } static int dmac_alloc_threads(struct pl330_dmac *pl330) { - struct pl330_info *pi = pl330->pinfo; - int chans = pi->pcfg.num_chan; + int chans = pl330->pcfg.num_chan; struct pl330_thread *thrd; int i; @@ -2028,29 +1806,28 @@ static int dmac_alloc_threads(struct pl330_dmac *pl330) static int dmac_alloc_resources(struct pl330_dmac *pl330) { - struct pl330_info *pi = pl330->pinfo; - int chans = pi->pcfg.num_chan; + int chans = pl330->pcfg.num_chan; int ret; /* * Alloc MicroCode buffer for 'chans' Channel threads. * A channel's buffer offset is (Channel_Id * MCODE_BUFF_PERCHAN) */ - pl330->mcode_cpu = dma_alloc_coherent(pi->dev, - chans * pi->mcbufsz, + pl330->mcode_cpu = dma_alloc_coherent(pl330->ddma.dev, + chans * pl330->mcbufsz, &pl330->mcode_bus, GFP_KERNEL); if (!pl330->mcode_cpu) { - dev_err(pi->dev, "%s:%d Can't allocate memory!\n", + dev_err(pl330->ddma.dev, "%s:%d Can't allocate memory!\n", __func__, __LINE__); return -ENOMEM; } ret = dmac_alloc_threads(pl330); if (ret) { - dev_err(pi->dev, "%s:%d Can't to create channels for DMAC!\n", + dev_err(pl330->ddma.dev, "%s:%d Can't to create channels for DMAC!\n", __func__, __LINE__); - dma_free_coherent(pi->dev, - chans * pi->mcbufsz, + dma_free_coherent(pl330->ddma.dev, + chans * pl330->mcbufsz, pl330->mcode_cpu, pl330->mcode_bus); return ret; } @@ -2058,71 +1835,45 @@ static int dmac_alloc_resources(struct pl330_dmac *pl330) return 0; } -static int pl330_add(struct pl330_info *pi) +static int pl330_add(struct pl330_dmac *pl330) { - struct pl330_dmac *pl330; void __iomem *regs; int i, ret; - if (!pi || !pi->dev) - return -EINVAL; - - /* If already added */ - if (pi->pl330_data) - return -EINVAL; - - /* - * If the SoC can perform reset on the DMAC, then do it - * before reading its configuration. - */ - if (pi->dmac_reset) - pi->dmac_reset(pi); - - regs = pi->base; + regs = pl330->base; /* Check if we can handle this DMAC */ - if ((pi->pcfg.periph_id & 0xfffff) != PERIPH_ID_VAL) { - dev_err(pi->dev, "PERIPH_ID 0x%x !\n", pi->pcfg.periph_id); + if ((pl330->pcfg.periph_id & 0xfffff) != PERIPH_ID_VAL) { + dev_err(pl330->ddma.dev, "PERIPH_ID 0x%x !\n", + pl330->pcfg.periph_id); return -EINVAL; } /* Read the configuration of the DMAC */ - read_dmac_config(pi); + read_dmac_config(pl330); - if (pi->pcfg.num_events == 0) { - dev_err(pi->dev, "%s:%d Can't work without events!\n", + if (pl330->pcfg.num_events == 0) { + dev_err(pl330->ddma.dev, "%s:%d Can't work without events!\n", __func__, __LINE__); return -EINVAL; } - pl330 = kzalloc(sizeof(*pl330), GFP_KERNEL); - if (!pl330) { - dev_err(pi->dev, "%s:%d Can't allocate memory!\n", - __func__, __LINE__); - return -ENOMEM; - } - - /* Assign the info structure and private data */ - pl330->pinfo = pi; - pi->pl330_data = pl330; - spin_lock_init(&pl330->lock); INIT_LIST_HEAD(&pl330->req_done); /* Use default MC buffer size if not provided */ - if (!pi->mcbufsz) - pi->mcbufsz = MCODE_BUFF_PER_REQ * 2; + if (!pl330->mcbufsz) + pl330->mcbufsz = MCODE_BUFF_PER_REQ * 2; /* Mark all events as free */ - for (i = 0; i < pi->pcfg.num_events; i++) + for (i = 0; i < pl330->pcfg.num_events; i++) pl330->events[i] = -1; /* Allocate resources needed by the DMAC */ ret = dmac_alloc_resources(pl330); if (ret) { - dev_err(pi->dev, "Unable to create channels for DMAC\n"); - kfree(pl330); + dev_err(pl330->ddma.dev, "Unable to create channels for DMAC\n"); return ret; } @@ -2135,15 +1886,13 @@ static int pl330_add(struct pl330_info *pi) static int dmac_free_threads(struct pl330_dmac *pl330) { - struct pl330_info *pi = pl330->pinfo; - int chans = pi->pcfg.num_chan; struct pl330_thread *thrd; int i; /* Release Channel threads */ - for (i = 0; i < chans; i++) { + for (i = 0; i < pl330->pcfg.num_chan; i++) { thrd = &pl330->channels[i]; - pl330_release_channel((void *)thrd); + pl330_release_channel(thrd); } /* Free memory */ @@ -2152,35 +1901,18 @@ static int dmac_free_threads(struct pl330_dmac *pl330) return 0; } -static void dmac_free_resources(struct pl330_dmac *pl330) +static void pl330_del(struct pl330_dmac *pl330) { - struct pl330_info *pi = pl330->pinfo; - int chans = pi->pcfg.num_chan; - - dmac_free_threads(pl330); - - dma_free_coherent(pi->dev, chans * pi->mcbufsz, - pl330->mcode_cpu, pl330->mcode_bus); -} - -static void pl330_del(struct pl330_info *pi) -{ - struct pl330_dmac *pl330; - - if (!pi || !pi->pl330_data) - return; - - pl330 = pi->pl330_data; - pl330->state = UNINIT; tasklet_kill(&pl330->tasks); /* Free DMAC resources */ - dmac_free_resources(pl330); + dmac_free_threads(pl330); - kfree(pl330); - pi->pl330_data = NULL; + dma_free_coherent(pl330->ddma.dev, + pl330->pcfg.num_chan * pl330->mcbufsz, pl330->mcode_cpu, + pl330->mcode_bus); } /* forward declaration */ @@ -2212,8 +1944,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch) if (desc->status == BUSY) continue; - ret = pl330_submit_req(pch->pl330_chid, - &desc->req); + ret = pl330_submit_req(pch->thread, desc); if (!ret) { desc->status = BUSY; } else if (ret == -EAGAIN) { @@ -2222,7 +1953,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch) } else { /* Unacceptable request */ desc->status = DONE; - dev_err(pch->dmac->pif.dev, "%s:%d Bad Desc(%d)\n", + dev_err(pch->dmac->ddma.dev, "%s:%d Bad Desc(%d)\n", __func__, __LINE__, desc->txd.cookie); tasklet_schedule(&pch->task); } @@ -2249,7 +1980,9 @@ static void pl330_tasklet(unsigned long data) fill_queue(pch); /* Make sure the PL330 Channel thread is active */ - pl330_chan_ctrl(pch->pl330_chid, PL330_OP_START); + spin_lock(&pch->thread->dmac->lock); + _start(pch->thread); + spin_unlock(&pch->thread->dmac->lock); while (!list_empty(&pch->completed_list)) { dma_async_tx_callback callback; @@ -2280,25 +2013,6 @@ static void pl330_tasklet(unsigned long data) spin_unlock_irqrestore(&pch->lock, flags); } -static void dma_pl330_rqcb(void *token, enum pl330_op_err err) -{ - struct dma_pl330_desc *desc = token; - struct dma_pl330_chan *pch = desc->pchan; - unsigned long flags; - - /* If desc aborted */ - if (!pch) - return; - - spin_lock_irqsave(&pch->lock, flags); - - desc->status = DONE; - - spin_unlock_irqrestore(&pch->lock, flags); - - tasklet_schedule(&pch->task); -} - bool pl330_filter(struct dma_chan *chan, void *param) { u8 *peri_id; @@ -2315,23 +2029,26 @@ static struct dma_chan *of_dma_pl330_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { int count = dma_spec->args_count; - struct dma_pl330_dmac *pdmac = ofdma->of_dma_data; + struct pl330_dmac *pl330 = ofdma->of_dma_data; unsigned int chan_id; + if (!pl330) + return NULL; + if (count != 1) return NULL; chan_id = dma_spec->args[0]; - if (chan_id >= pdmac->num_peripherals) + if (chan_id >= pl330->num_peripherals) return NULL; - return dma_get_slave_channel(&pdmac->peripherals[chan_id].chan); + return dma_get_slave_channel(&pl330->peripherals[chan_id].chan); } static int pl330_alloc_chan_resources(struct dma_chan *chan) { struct dma_pl330_chan *pch = to_pchan(chan); - struct dma_pl330_dmac *pdmac = pch->dmac; + struct pl330_dmac *pl330 = pch->dmac; unsigned long flags; spin_lock_irqsave(&pch->lock, flags); @@ -2339,8 +2056,8 @@ static int pl330_alloc_chan_resources(struct dma_chan *chan) dma_cookie_init(chan); pch->cyclic = false; - pch->pl330_chid = pl330_request_channel(&pdmac->pif); - if (!pch->pl330_chid) { + pch->thread = pl330_request_channel(pl330); + if (!pch->thread) { spin_unlock_irqrestore(&pch->lock, flags); return -ENOMEM; } @@ -2357,7 +2074,7 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned struct dma_pl330_chan *pch = to_pchan(chan); struct dma_pl330_desc *desc; unsigned long flags; - struct dma_pl330_dmac *pdmac = pch->dmac; + struct pl330_dmac *pl330 = pch->dmac; struct dma_slave_config *slave_config; LIST_HEAD(list); @@ -2365,8 +2082,13 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned case DMA_TERMINATE_ALL: spin_lock_irqsave(&pch->lock, flags); - /* FLUSH the PL330 Channel thread */ - pl330_chan_ctrl(pch->pl330_chid, PL330_OP_FLUSH); + spin_lock(&pl330->lock); + _stop(pch->thread); + spin_unlock(&pl330->lock); + + pch->thread->req[0].desc = NULL; + pch->thread->req[1].desc = NULL; + pch->thread->req_running = -1; /* Mark all desc done */ list_for_each_entry(desc, &pch->submitted_list, node) { @@ -2384,9 +2106,9 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned dma_cookie_complete(&desc->txd); } - list_splice_tail_init(&pch->submitted_list, &pdmac->desc_pool); - list_splice_tail_init(&pch->work_list, &pdmac->desc_pool); - list_splice_tail_init(&pch->completed_list, &pdmac->desc_pool); + list_splice_tail_init(&pch->submitted_list, &pl330->desc_pool); + list_splice_tail_init(&pch->work_list, &pl330->desc_pool); + list_splice_tail_init(&pch->completed_list, &pl330->desc_pool); spin_unlock_irqrestore(&pch->lock, flags); break; case DMA_SLAVE_CONFIG: @@ -2409,7 +2131,7 @@ static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned } break; default: - dev_err(pch->dmac->pif.dev, "Not supported command.\n"); + dev_err(pch->dmac->ddma.dev, "Not supported command.\n"); return -ENXIO; } @@ -2425,8 +2147,8 @@ static void pl330_free_chan_resources(struct dma_chan *chan) spin_lock_irqsave(&pch->lock, flags); - pl330_release_channel(pch->pl330_chid); - pch->pl330_chid = NULL; + pl330_release_channel(pch->thread); + pch->thread = NULL; if (pch->cyclic) list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool); @@ -2489,57 +2211,46 @@ static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx) static inline void _init_desc(struct dma_pl330_desc *desc) { - desc->req.x = &desc->px; - desc->req.token = desc; desc->rqcfg.swap = SWAP_NO; - desc->rqcfg.scctl = SCCTRL0; - desc->rqcfg.dcctl = DCCTRL0; - desc->req.cfg = &desc->rqcfg; - desc->req.xfer_cb = dma_pl330_rqcb; + desc->rqcfg.scctl = CCTRL0; + desc->rqcfg.dcctl = CCTRL0; desc->txd.tx_submit = pl330_tx_submit; INIT_LIST_HEAD(&desc->node); } /* Returns the number of descriptors added to the DMAC pool */ -static int add_desc(struct dma_pl330_dmac *pdmac, gfp_t flg, int count) +static int add_desc(struct pl330_dmac *pl330, gfp_t flg, int count) { struct dma_pl330_desc *desc; unsigned long flags; int i; - if (!pdmac) - return 0; - desc = kcalloc(count, sizeof(*desc), flg); if (!desc) return 0; - spin_lock_irqsave(&pdmac->pool_lock, flags); + spin_lock_irqsave(&pl330->pool_lock, flags); for (i = 0; i < count; i++) { _init_desc(&desc[i]); - list_add_tail(&desc[i].node, &pdmac->desc_pool); + list_add_tail(&desc[i].node, &pl330->desc_pool); } - spin_unlock_irqrestore(&pdmac->pool_lock, flags); + spin_unlock_irqrestore(&pl330->pool_lock, flags); return count; } -static struct dma_pl330_desc * -pluck_desc(struct dma_pl330_dmac *pdmac) +static struct dma_pl330_desc *pluck_desc(struct pl330_dmac *pl330) { struct dma_pl330_desc *desc = NULL; unsigned long flags; - if (!pdmac) - return NULL; - - spin_lock_irqsave(&pdmac->pool_lock, flags); + spin_lock_irqsave(&pl330->pool_lock, flags); - if (!list_empty(&pdmac->desc_pool)) { - desc = list_entry(pdmac->desc_pool.next, + if (!list_empty(&pl330->desc_pool)) { + desc = list_entry(pl330->desc_pool.next, struct dma_pl330_desc, node); list_del_init(&desc->node); @@ -2548,29 +2259,29 @@ pluck_desc(struct dma_pl330_dmac *pdmac) desc->txd.callback = NULL; } - spin_unlock_irqrestore(&pdmac->pool_lock, flags); + spin_unlock_irqrestore(&pl330->pool_lock, flags); return desc; } static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) { - struct dma_pl330_dmac *pdmac = pch->dmac; + struct pl330_dmac *pl330 = pch->dmac; u8 *peri_id = pch->chan.private; struct dma_pl330_desc *desc; /* Pluck one desc from the pool of DMAC */ - desc = pluck_desc(pdmac); + desc = pluck_desc(pl330); /* If the DMAC pool is empty, alloc new */ if (!desc) { - if (!add_desc(pdmac, GFP_ATOMIC, 1)) + if (!add_desc(pl330, GFP_ATOMIC, 1)) return NULL; /* Try again */ - desc = pluck_desc(pdmac); + desc = pluck_desc(pl330); if (!desc) { - dev_err(pch->dmac->pif.dev, + dev_err(pch->dmac->ddma.dev, "%s:%d ALERT!\n", __func__, __LINE__); return NULL; } @@ -2581,8 +2292,8 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) desc->txd.cookie = 0; async_tx_ack(&desc->txd); - desc->req.peri = peri_id ? pch->chan.chan_id : 0; - desc->rqcfg.pcfg = &pch->dmac->pif.pcfg; + desc->peri = peri_id ? pch->chan.chan_id : 0; + desc->rqcfg.pcfg = &pch->dmac->pcfg; dma_async_tx_descriptor_init(&desc->txd, &pch->chan); @@ -2592,7 +2303,6 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch) static inline void fill_px(struct pl330_xfer *px, dma_addr_t dst, dma_addr_t src, size_t len) { - px->next = NULL; px->bytes = len; px->dst_addr = dst; px->src_addr = src; @@ -2605,7 +2315,7 @@ __pl330_prep_dma_memcpy(struct dma_pl330_chan *pch, dma_addr_t dst, struct dma_pl330_desc *desc = pl330_get_desc(pch); if (!desc) { - dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n", + dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n", __func__, __LINE__); return NULL; } @@ -2629,11 +2339,11 @@ __pl330_prep_dma_memcpy(struct dma_pl330_chan *pch, dma_addr_t dst, static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len) { struct dma_pl330_chan *pch = desc->pchan; - struct pl330_info *pi = &pch->dmac->pif; + struct pl330_dmac *pl330 = pch->dmac; int burst_len; - burst_len = pi->pcfg.data_bus_width / 8; - burst_len *= pi->pcfg.data_buf_dep; + burst_len = pl330->pcfg.data_bus_width / 8; + burst_len *= pl330->pcfg.data_buf_dep; burst_len >>= desc->rqcfg.brst_size; /* src/dst_burst_len can't be more than 16 */ @@ -2652,11 +2362,11 @@ static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len) static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t dma_addr, size_t len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct dma_pl330_desc *desc = NULL, *first = NULL; struct dma_pl330_chan *pch = to_pchan(chan); - struct dma_pl330_dmac *pdmac = pch->dmac; + struct pl330_dmac *pl330 = pch->dmac; unsigned int i; dma_addr_t dst; dma_addr_t src; @@ -2665,7 +2375,7 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( return NULL; if (!is_slave_direction(direction)) { - dev_err(pch->dmac->pif.dev, "%s:%d Invalid dma direction\n", + dev_err(pch->dmac->ddma.dev, "%s:%d Invalid dma direction\n", __func__, __LINE__); return NULL; } @@ -2673,23 +2383,23 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( for (i = 0; i < len / period_len; i++) { desc = pl330_get_desc(pch); if (!desc) { - dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n", + dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n", __func__, __LINE__); if (!first) return NULL; - spin_lock_irqsave(&pdmac->pool_lock, flags); + spin_lock_irqsave(&pl330->pool_lock, flags); while (!list_empty(&first->node)) { desc = list_entry(first->node.next, struct dma_pl330_desc, node); - list_move_tail(&desc->node, &pdmac->desc_pool); + list_move_tail(&desc->node, &pl330->desc_pool); } - list_move_tail(&first->node, &pdmac->desc_pool); + list_move_tail(&first->node, &pl330->desc_pool); - spin_unlock_irqrestore(&pdmac->pool_lock, flags); + spin_unlock_irqrestore(&pl330->pool_lock, flags); return NULL; } @@ -2698,14 +2408,12 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( case DMA_MEM_TO_DEV: desc->rqcfg.src_inc = 1; desc->rqcfg.dst_inc = 0; - desc->req.rqtype = MEMTODEV; src = dma_addr; dst = pch->fifo_addr; break; case DMA_DEV_TO_MEM: desc->rqcfg.src_inc = 0; desc->rqcfg.dst_inc = 1; - desc->req.rqtype = DEVTOMEM; src = pch->fifo_addr; dst = dma_addr; break; @@ -2713,6 +2421,7 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( break; } + desc->rqtype = direction; desc->rqcfg.brst_size = pch->burst_sz; desc->rqcfg.brst_len = 1; fill_px(&desc->px, dst, src, period_len); @@ -2740,24 +2449,22 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, { struct dma_pl330_desc *desc; struct dma_pl330_chan *pch = to_pchan(chan); - struct pl330_info *pi; + struct pl330_dmac *pl330 = pch->dmac; int burst; if (unlikely(!pch || !len)) return NULL; - pi = &pch->dmac->pif; - desc = __pl330_prep_dma_memcpy(pch, dst, src, len); if (!desc) return NULL; desc->rqcfg.src_inc = 1; desc->rqcfg.dst_inc = 1; - desc->req.rqtype = MEMTOMEM; + desc->rqtype = DMA_MEM_TO_MEM; /* Select max possible burst size */ - burst = pi->pcfg.data_bus_width / 8; + burst = pl330->pcfg.data_bus_width / 8; while (burst > 1) { if (!(len % burst)) @@ -2776,7 +2483,7 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst, return &desc->txd; } -static void __pl330_giveback_desc(struct dma_pl330_dmac *pdmac, +static void __pl330_giveback_desc(struct pl330_dmac *pl330, struct dma_pl330_desc *first) { unsigned long flags; @@ -2785,17 +2492,17 @@ static void __pl330_giveback_desc(struct dma_pl330_dmac *pdmac, if (!first) return; - spin_lock_irqsave(&pdmac->pool_lock, flags); + spin_lock_irqsave(&pl330->pool_lock, flags); while (!list_empty(&first->node)) { desc = list_entry(first->node.next, struct dma_pl330_desc, node); - list_move_tail(&desc->node, &pdmac->desc_pool); + list_move_tail(&desc->node, &pl330->desc_pool); } - list_move_tail(&first->node, &pdmac->desc_pool); + list_move_tail(&first->node, &pl330->desc_pool); - spin_unlock_irqrestore(&pdmac->pool_lock, flags); + spin_unlock_irqrestore(&pl330->pool_lock, flags); } static struct dma_async_tx_descriptor * @@ -2820,12 +2527,12 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, desc = pl330_get_desc(pch); if (!desc) { - struct dma_pl330_dmac *pdmac = pch->dmac; + struct pl330_dmac *pl330 = pch->dmac; - dev_err(pch->dmac->pif.dev, + dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n", __func__, __LINE__); - __pl330_giveback_desc(pdmac, first); + __pl330_giveback_desc(pl330, first); return NULL; } @@ -2838,19 +2545,18 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (direction == DMA_MEM_TO_DEV) { desc->rqcfg.src_inc = 1; desc->rqcfg.dst_inc = 0; - desc->req.rqtype = MEMTODEV; fill_px(&desc->px, addr, sg_dma_address(sg), sg_dma_len(sg)); } else { desc->rqcfg.src_inc = 0; desc->rqcfg.dst_inc = 1; - desc->req.rqtype = DEVTOMEM; fill_px(&desc->px, sg_dma_address(sg), addr, sg_dma_len(sg)); } desc->rqcfg.brst_size = pch->burst_sz; desc->rqcfg.brst_len = 1; + desc->rqtype = direction; } /* Return the last desc in the chain */ @@ -2890,9 +2596,9 @@ static int pl330_probe(struct amba_device *adev, const struct amba_id *id) { struct dma_pl330_platdata *pdat; - struct dma_pl330_dmac *pdmac; + struct pl330_config *pcfg; + struct pl330_dmac *pl330; struct dma_pl330_chan *pch, *_p; - struct pl330_info *pi; struct dma_device *pd; struct resource *res; int i, ret, irq; @@ -2905,30 +2611,27 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) return ret; /* Allocate a new DMAC and its Channels */ - pdmac = devm_kzalloc(&adev->dev, sizeof(*pdmac), GFP_KERNEL); - if (!pdmac) { + pl330 = devm_kzalloc(&adev->dev, sizeof(*pl330), GFP_KERNEL); + if (!pl330) { dev_err(&adev->dev, "unable to allocate mem\n"); return -ENOMEM; } - pi = &pdmac->pif; - pi->dev = &adev->dev; - pi->pl330_data = NULL; - pi->mcbufsz = pdat ? pdat->mcbuf_sz : 0; + pl330->mcbufsz = pdat ? pdat->mcbuf_sz : 0; res = &adev->res; - pi->base = devm_ioremap_resource(&adev->dev, res); - if (IS_ERR(pi->base)) - return PTR_ERR(pi->base); + pl330->base = devm_ioremap_resource(&adev->dev, res); + if (IS_ERR(pl330->base)) + return PTR_ERR(pl330->base); - amba_set_drvdata(adev, pdmac); + amba_set_drvdata(adev, pl330); for (i = 0; i < AMBA_NR_IRQS; i++) { irq = adev->irq[i]; if (irq) { ret = devm_request_irq(&adev->dev, irq, pl330_irq_handler, 0, - dev_name(&adev->dev), pi); + dev_name(&adev->dev), pl330); if (ret) return ret; } else { @@ -2936,38 +2639,40 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) } } - pi->pcfg.periph_id = adev->periphid; - ret = pl330_add(pi); + pcfg = &pl330->pcfg; + + pcfg->periph_id = adev->periphid; + ret = pl330_add(pl330); if (ret) return ret; - INIT_LIST_HEAD(&pdmac->desc_pool); - spin_lock_init(&pdmac->pool_lock); + INIT_LIST_HEAD(&pl330->desc_pool); + spin_lock_init(&pl330->pool_lock); /* Create a descriptor pool of default size */ - if (!add_desc(pdmac, GFP_KERNEL, NR_DEFAULT_DESC)) + if (!add_desc(pl330, GFP_KERNEL, NR_DEFAULT_DESC)) dev_warn(&adev->dev, "unable to allocate desc\n"); - pd = &pdmac->ddma; + pd = &pl330->ddma; INIT_LIST_HEAD(&pd->channels); /* Initialize channel parameters */ if (pdat) - num_chan = max_t(int, pdat->nr_valid_peri, pi->pcfg.num_chan); + num_chan = max_t(int, pdat->nr_valid_peri, pcfg->num_chan); else - num_chan = max_t(int, pi->pcfg.num_peri, pi->pcfg.num_chan); + num_chan = max_t(int, pcfg->num_peri, pcfg->num_chan); - pdmac->num_peripherals = num_chan; + pl330->num_peripherals = num_chan; - pdmac->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL); - if (!pdmac->peripherals) { + pl330->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL); + if (!pl330->peripherals) { ret = -ENOMEM; - dev_err(&adev->dev, "unable to allocate pdmac->peripherals\n"); + dev_err(&adev->dev, "unable to allocate pl330->peripherals\n"); goto probe_err2; } for (i = 0; i < num_chan; i++) { - pch = &pdmac->peripherals[i]; + pch = &pl330->peripherals[i]; if (!adev->dev.of_node) pch->chan.private = pdat ? &pdat->peri_id[i] : NULL; else @@ -2977,9 +2682,9 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) INIT_LIST_HEAD(&pch->work_list); INIT_LIST_HEAD(&pch->completed_list); spin_lock_init(&pch->lock); - pch->pl330_chid = NULL; + pch->thread = NULL; pch->chan.device = pd; - pch->dmac = pdmac; + pch->dmac = pl330; /* Add the channel to the DMAC list */ list_add_tail(&pch->chan.device_node, &pd->channels); @@ -2990,7 +2695,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) pd->cap_mask = pdat->cap_mask; } else { dma_cap_set(DMA_MEMCPY, pd->cap_mask); - if (pi->pcfg.num_peri) { + if (pcfg->num_peri) { dma_cap_set(DMA_SLAVE, pd->cap_mask); dma_cap_set(DMA_CYCLIC, pd->cap_mask); dma_cap_set(DMA_PRIVATE, pd->cap_mask); @@ -3015,14 +2720,14 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) if (adev->dev.of_node) { ret = of_dma_controller_register(adev->dev.of_node, - of_dma_pl330_xlate, pdmac); + of_dma_pl330_xlate, pl330); if (ret) { dev_err(&adev->dev, "unable to register DMA to the generic DT DMA helpers\n"); } } - adev->dev.dma_parms = &pdmac->dma_parms; + adev->dev.dma_parms = &pl330->dma_parms; /* * This is the limit for transfers with a buswidth of 1, larger @@ -3037,14 +2742,13 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) "Loaded driver for PL330 DMAC-%d\n", adev->periphid); dev_info(&adev->dev, "\tDBUFF-%ux%ubytes Num_Chans-%u Num_Peri-%u Num_Events-%u\n", - pi->pcfg.data_buf_dep, - pi->pcfg.data_bus_width / 8, pi->pcfg.num_chan, - pi->pcfg.num_peri, pi->pcfg.num_events); + pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan, + pcfg->num_peri, pcfg->num_events); return 0; probe_err3: /* Idle the DMAC */ - list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels, + list_for_each_entry_safe(pch, _p, &pl330->ddma.channels, chan.device_node) { /* Remove the channel */ @@ -3055,27 +2759,23 @@ probe_err3: pl330_free_chan_resources(&pch->chan); } probe_err2: - pl330_del(pi); + pl330_del(pl330); return ret; } static int pl330_remove(struct amba_device *adev) { - struct dma_pl330_dmac *pdmac = amba_get_drvdata(adev); + struct pl330_dmac *pl330 = amba_get_drvdata(adev); struct dma_pl330_chan *pch, *_p; - struct pl330_info *pi; - - if (!pdmac) - return 0; if (adev->dev.of_node) of_dma_controller_free(adev->dev.of_node); - dma_async_device_unregister(&pdmac->ddma); + dma_async_device_unregister(&pl330->ddma); /* Idle the DMAC */ - list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels, + list_for_each_entry_safe(pch, _p, &pl330->ddma.channels, chan.device_node) { /* Remove the channel */ @@ -3086,9 +2786,7 @@ static int pl330_remove(struct amba_device *adev) pl330_free_chan_resources(&pch->chan); } - pi = &pdmac->pif; - - pl330_del(pi); + pl330_del(pl330); return 0; } diff --git a/drivers/dma/qcom_bam_dma.c b/drivers/dma/qcom_bam_dma.c index 82c9231..7a4bbb0 100644 --- a/drivers/dma/qcom_bam_dma.c +++ b/drivers/dma/qcom_bam_dma.c @@ -61,12 +61,17 @@ struct bam_desc_hw { #define DESC_FLAG_INT BIT(15) #define DESC_FLAG_EOT BIT(14) #define DESC_FLAG_EOB BIT(13) +#define DESC_FLAG_NWD BIT(12) struct bam_async_desc { struct virt_dma_desc vd; u32 num_desc; u32 xfer_len; + + /* transaction flags, EOT|EOB|NWD */ + u16 flags; + struct bam_desc_hw *curr_desc; enum dma_transfer_direction dir; @@ -490,6 +495,14 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan, if (!async_desc) goto err_out; + if (flags & DMA_PREP_FENCE) + async_desc->flags |= DESC_FLAG_NWD; + + if (flags & DMA_PREP_INTERRUPT) + async_desc->flags |= DESC_FLAG_EOT; + else + async_desc->flags |= DESC_FLAG_INT; + async_desc->num_desc = num_alloc; async_desc->curr_desc = async_desc->desc; async_desc->dir = direction; @@ -793,8 +806,11 @@ static void bam_start_dma(struct bam_chan *bchan) else async_desc->xfer_len = async_desc->num_desc; - /* set INT on last descriptor */ - desc[async_desc->xfer_len - 1].flags |= DESC_FLAG_INT; + /* set any special flags on the last descriptor */ + if (async_desc->num_desc == async_desc->xfer_len) + desc[async_desc->xfer_len - 1].flags = async_desc->flags; + else + desc[async_desc->xfer_len - 1].flags |= DESC_FLAG_INT; if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) { u32 partial = MAX_DESCRIPTORS - bchan->tail; diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c index 012520c..7416572 100644 --- a/drivers/dma/s3c24xx-dma.c +++ b/drivers/dma/s3c24xx-dma.c @@ -889,8 +889,7 @@ static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy( static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period, - enum dma_transfer_direction direction, unsigned long flags, - void *context) + enum dma_transfer_direction direction, unsigned long flags) { struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan); struct s3c24xx_dma_engine *s3cdma = s3cchan->host; diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c index 5ebdfbc..4b0ef04 100644 --- a/drivers/dma/sa11x0-dma.c +++ b/drivers/dma/sa11x0-dma.c @@ -612,7 +612,7 @@ static struct dma_async_tx_descriptor *sa11x0_dma_prep_slave_sg( static struct dma_async_tx_descriptor *sa11x0_dma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period, - enum dma_transfer_direction dir, unsigned long flags, void *context) + enum dma_transfer_direction dir, unsigned long flags) { struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan); struct sa11x0_dma_desc *txd; diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig index 0f719816..0349125 100644 --- a/drivers/dma/sh/Kconfig +++ b/drivers/dma/sh/Kconfig @@ -2,21 +2,39 @@ # DMA engine configuration for sh # +# +# DMA Engine Helpers +# + config SH_DMAE_BASE bool "Renesas SuperH DMA Engine support" - depends on (SUPERH && SH_DMA) || ARCH_SHMOBILE || COMPILE_TEST + depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST + depends on !SUPERH || SH_DMA depends on !SH_DMA_API default y select DMA_ENGINE help Enable support for the Renesas SuperH DMA controllers. +# +# DMA Controllers +# + config SH_DMAE tristate "Renesas SuperH DMAC support" depends on SH_DMAE_BASE help Enable support for the Renesas SuperH DMA controllers. +if SH_DMAE + +config SH_DMAE_R8A73A4 + def_bool y + depends on ARCH_R8A73A4 + depends on OF + +endif + config SUDMAC tristate "Renesas SUDMAC support" depends on SH_DMAE_BASE @@ -34,7 +52,3 @@ config RCAR_AUDMAC_PP depends on SH_DMAE_BASE help Enable support for the Renesas R-Car Audio DMAC Peripheral Peripheral controllers. - -config SHDMA_R8A73A4 - def_bool y - depends on ARCH_R8A73A4 && SH_DMAE != n diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile index 1ce88b2..0a5cfdb 100644 --- a/drivers/dma/sh/Makefile +++ b/drivers/dma/sh/Makefile @@ -1,10 +1,18 @@ +# +# DMA Engine Helpers +# + obj-$(CONFIG_SH_DMAE_BASE) += shdma-base.o shdma-of.o -obj-$(CONFIG_SH_DMAE) += shdma.o + +# +# DMA Controllers +# + shdma-y := shdmac.o -ifeq ($(CONFIG_OF),y) -shdma-$(CONFIG_SHDMA_R8A73A4) += shdma-r8a73a4.o -endif +shdma-$(CONFIG_SH_DMAE_R8A73A4) += shdma-r8a73a4.o shdma-objs := $(shdma-y) +obj-$(CONFIG_SH_DMAE) += shdma.o + obj-$(CONFIG_SUDMAC) += sudmac.o obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o obj-$(CONFIG_RCAR_AUDMAC_PP) += rcar-audmapp.o diff --git a/drivers/dma/sh/rcar-audmapp.c b/drivers/dma/sh/rcar-audmapp.c index 2de7728..dabbf0a 100644 --- a/drivers/dma/sh/rcar-audmapp.c +++ b/drivers/dma/sh/rcar-audmapp.c @@ -22,6 +22,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/dmaengine.h> +#include <linux/of_dma.h> #include <linux/platform_data/dma-rcar-audmapp.h> #include <linux/platform_device.h> #include <linux/shdma-base.h> @@ -45,8 +46,9 @@ struct audmapp_chan { struct shdma_chan shdma_chan; - struct audmapp_slave_config *config; void __iomem *base; + dma_addr_t slave_addr; + u32 chcr; }; struct audmapp_device { @@ -56,7 +58,16 @@ struct audmapp_device { void __iomem *chan_reg; }; +struct audmapp_desc { + struct shdma_desc shdma_desc; + dma_addr_t src; + dma_addr_t dst; +}; + +#define to_shdma_chan(c) container_of(c, struct shdma_chan, dma_chan) + #define to_chan(chan) container_of(chan, struct audmapp_chan, shdma_chan) +#define to_desc(sdesc) container_of(sdesc, struct audmapp_desc, shdma_desc) #define to_dev(chan) container_of(chan->shdma_chan.dma_chan.device, \ struct audmapp_device, shdma_dev.dma_dev) @@ -90,70 +101,82 @@ static void audmapp_halt(struct shdma_chan *schan) } static void audmapp_start_xfer(struct shdma_chan *schan, - struct shdma_desc *sdecs) + struct shdma_desc *sdesc) { struct audmapp_chan *auchan = to_chan(schan); struct audmapp_device *audev = to_dev(auchan); - struct audmapp_slave_config *cfg = auchan->config; + struct audmapp_desc *desc = to_desc(sdesc); struct device *dev = audev->dev; - u32 chcr = cfg->chcr | PDMACHCR_DE; + u32 chcr = auchan->chcr | PDMACHCR_DE; - dev_dbg(dev, "src/dst/chcr = %pad/%pad/%x\n", - &cfg->src, &cfg->dst, cfg->chcr); + dev_dbg(dev, "src/dst/chcr = %pad/%pad/%08x\n", + &desc->src, &desc->dst, chcr); - audmapp_write(auchan, cfg->src, PDMASAR); - audmapp_write(auchan, cfg->dst, PDMADAR); + audmapp_write(auchan, desc->src, PDMASAR); + audmapp_write(auchan, desc->dst, PDMADAR); audmapp_write(auchan, chcr, PDMACHCR); } -static struct audmapp_slave_config * -audmapp_find_slave(struct audmapp_chan *auchan, int slave_id) +static void audmapp_get_config(struct audmapp_chan *auchan, int slave_id, + u32 *chcr, dma_addr_t *dst) { struct audmapp_device *audev = to_dev(auchan); struct audmapp_pdata *pdata = audev->pdata; struct audmapp_slave_config *cfg; int i; + *chcr = 0; + *dst = 0; + + if (!pdata) { /* DT */ + *chcr = ((u32)slave_id) << 16; + auchan->shdma_chan.slave_id = (slave_id) >> 8; + return; + } + + /* non-DT */ + if (slave_id >= AUDMAPP_SLAVE_NUMBER) - return NULL; + return; for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++) - if (cfg->slave_id == slave_id) - return cfg; - - return NULL; + if (cfg->slave_id == slave_id) { + *chcr = cfg->chcr; + *dst = cfg->dst; + break; + } } static int audmapp_set_slave(struct shdma_chan *schan, int slave_id, dma_addr_t slave_addr, bool try) { struct audmapp_chan *auchan = to_chan(schan); - struct audmapp_slave_config *cfg = - audmapp_find_slave(auchan, slave_id); + u32 chcr; + dma_addr_t dst; + + audmapp_get_config(auchan, slave_id, &chcr, &dst); - if (!cfg) - return -ENODEV; if (try) return 0; - auchan->config = cfg; + auchan->chcr = chcr; + auchan->slave_addr = slave_addr ? : dst; return 0; } static int audmapp_desc_setup(struct shdma_chan *schan, - struct shdma_desc *sdecs, + struct shdma_desc *sdesc, dma_addr_t src, dma_addr_t dst, size_t *len) { - struct audmapp_chan *auchan = to_chan(schan); - struct audmapp_slave_config *cfg = auchan->config; - - if (!cfg) - return -ENODEV; + struct audmapp_desc *desc = to_desc(sdesc); if (*len > (size_t)AUDMAPP_LEN_MAX) *len = (size_t)AUDMAPP_LEN_MAX; + desc->src = src; + desc->dst = dst; + return 0; } @@ -164,7 +187,9 @@ static void audmapp_setup_xfer(struct shdma_chan *schan, static dma_addr_t audmapp_slave_addr(struct shdma_chan *schan) { - return 0; /* always fixed address */ + struct audmapp_chan *auchan = to_chan(schan); + + return auchan->slave_addr; } static bool audmapp_channel_busy(struct shdma_chan *schan) @@ -183,7 +208,7 @@ static bool audmapp_desc_completed(struct shdma_chan *schan, static struct shdma_desc *audmapp_embedded_desc(void *buf, int i) { - return &((struct shdma_desc *)buf)[i]; + return &((struct audmapp_desc *)buf)[i].shdma_desc; } static const struct shdma_ops audmapp_shdma_ops = { @@ -234,16 +259,39 @@ static void audmapp_chan_remove(struct audmapp_device *audev) dma_dev->chancnt = 0; } +static struct dma_chan *audmapp_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + dma_cap_mask_t mask; + struct dma_chan *chan; + u32 chcr = dma_spec->args[0]; + + if (dma_spec->args_count != 1) + return NULL; + + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + chan = dma_request_channel(mask, shdma_chan_filter, NULL); + if (chan) + to_shdma_chan(chan)->hw_req = chcr; + + return chan; +} + static int audmapp_probe(struct platform_device *pdev) { struct audmapp_pdata *pdata = pdev->dev.platform_data; + struct device_node *np = pdev->dev.of_node; struct audmapp_device *audev; struct shdma_dev *sdev; struct dma_device *dma_dev; struct resource *res; int err, i; - if (!pdata) + if (np) + of_dma_controller_register(np, audmapp_of_xlate, pdev); + else if (!pdata) return -ENODEV; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -260,7 +308,7 @@ static int audmapp_probe(struct platform_device *pdev) sdev = &audev->shdma_dev; sdev->ops = &audmapp_shdma_ops; - sdev->desc_size = sizeof(struct shdma_desc); + sdev->desc_size = sizeof(struct audmapp_desc); dma_dev = &sdev->dma_dev; dma_dev->copy_align = LOG2_DEFAULT_XFER_SIZE; @@ -305,12 +353,18 @@ static int audmapp_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id audmapp_of_match[] = { + { .compatible = "renesas,rcar-audmapp", }, + {}, +}; + static struct platform_driver audmapp_driver = { .probe = audmapp_probe, .remove = audmapp_remove, .driver = { .owner = THIS_MODULE, .name = "rcar-audmapp-engine", + .of_match_table = audmapp_of_match, }, }; module_platform_driver(audmapp_driver); diff --git a/drivers/dma/sh/shdma-arm.h b/drivers/dma/sh/shdma-arm.h index a2b8258..a1b0ef4 100644 --- a/drivers/dma/sh/shdma-arm.h +++ b/drivers/dma/sh/shdma-arm.h @@ -45,7 +45,7 @@ enum { ((((i) & TS_LOW_BIT) << TS_LOW_SHIFT) |\ (((i) & TS_HI_BIT) << TS_HI_SHIFT)) -#define CHCR_TX(xmit_sz) (DM_FIX | SM_INC | 0x800 | TS_INDEX2VAL((xmit_sz))) -#define CHCR_RX(xmit_sz) (DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL((xmit_sz))) +#define CHCR_TX(xmit_sz) (DM_FIX | SM_INC | RS_ERS | TS_INDEX2VAL((xmit_sz))) +#define CHCR_RX(xmit_sz) (DM_INC | SM_FIX | RS_ERS | TS_INDEX2VAL((xmit_sz))) #endif diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index b35007e..42d4974 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -206,45 +206,6 @@ static int shdma_setup_slave(struct shdma_chan *schan, int slave_id, return 0; } -/* - * This is the standard shdma filter function to be used as a replacement to the - * "old" method, using the .private pointer. If for some reason you allocate a - * channel without slave data, use something like ERR_PTR(-EINVAL) as a filter - * parameter. If this filter is used, the slave driver, after calling - * dma_request_channel(), will also have to call dmaengine_slave_config() with - * .slave_id, .direction, and either .src_addr or .dst_addr set. - * NOTE: this filter doesn't support multiple DMAC drivers with the DMA_SLAVE - * capability! If this becomes a requirement, hardware glue drivers, using this - * services would have to provide their own filters, which first would check - * the device driver, similar to how other DMAC drivers, e.g., sa11x0-dma.c, do - * this, and only then, in case of a match, call this common filter. - * NOTE 2: This filter function is also used in the DT case by shdma_of_xlate(). - * In that case the MID-RID value is used for slave channel filtering and is - * passed to this function in the "arg" parameter. - */ -bool shdma_chan_filter(struct dma_chan *chan, void *arg) -{ - struct shdma_chan *schan = to_shdma_chan(chan); - struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device); - const struct shdma_ops *ops = sdev->ops; - int match = (long)arg; - int ret; - - if (match < 0) - /* No slave requested - arbitrary channel */ - return true; - - if (!schan->dev->of_node && match >= slave_num) - return false; - - ret = ops->set_slave(schan, match, 0, true); - if (ret < 0) - return false; - - return true; -} -EXPORT_SYMBOL(shdma_chan_filter); - static int shdma_alloc_chan_resources(struct dma_chan *chan) { struct shdma_chan *schan = to_shdma_chan(chan); @@ -295,6 +256,51 @@ esetslave: return ret; } +/* + * This is the standard shdma filter function to be used as a replacement to the + * "old" method, using the .private pointer. If for some reason you allocate a + * channel without slave data, use something like ERR_PTR(-EINVAL) as a filter + * parameter. If this filter is used, the slave driver, after calling + * dma_request_channel(), will also have to call dmaengine_slave_config() with + * .slave_id, .direction, and either .src_addr or .dst_addr set. + * NOTE: this filter doesn't support multiple DMAC drivers with the DMA_SLAVE + * capability! If this becomes a requirement, hardware glue drivers, using this + * services would have to provide their own filters, which first would check + * the device driver, similar to how other DMAC drivers, e.g., sa11x0-dma.c, do + * this, and only then, in case of a match, call this common filter. + * NOTE 2: This filter function is also used in the DT case by shdma_of_xlate(). + * In that case the MID-RID value is used for slave channel filtering and is + * passed to this function in the "arg" parameter. + */ +bool shdma_chan_filter(struct dma_chan *chan, void *arg) +{ + struct shdma_chan *schan; + struct shdma_dev *sdev; + int match = (long)arg; + int ret; + + /* Only support channels handled by this driver. */ + if (chan->device->device_alloc_chan_resources != + shdma_alloc_chan_resources) + return false; + + if (match < 0) + /* No slave requested - arbitrary channel */ + return true; + + schan = to_shdma_chan(chan); + if (!schan->dev->of_node && match >= slave_num) + return false; + + sdev = to_shdma_dev(schan->dma_chan.device); + ret = sdev->ops->set_slave(schan, match, 0, true); + if (ret < 0) + return false; + + return true; +} +EXPORT_SYMBOL(shdma_chan_filter); + static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all) { struct shdma_desc *desc, *_desc; @@ -662,15 +668,16 @@ static struct dma_async_tx_descriptor *shdma_prep_slave_sg( static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic( struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct shdma_chan *schan = to_shdma_chan(chan); struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device); + struct dma_async_tx_descriptor *desc; const struct shdma_ops *ops = sdev->ops; unsigned int sg_len = buf_len / period_len; int slave_id = schan->slave_id; dma_addr_t slave_addr; - struct scatterlist sgl[SHDMA_MAX_SG_LEN]; + struct scatterlist *sgl; int i; if (!chan) @@ -694,7 +701,16 @@ static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic( slave_addr = ops->slave_addr(schan); + /* + * Allocate the sg list dynamically as it would consumer too much stack + * space. + */ + sgl = kcalloc(sg_len, sizeof(*sgl), GFP_KERNEL); + if (!sgl) + return NULL; + sg_init_table(sgl, sg_len); + for (i = 0; i < sg_len; i++) { dma_addr_t src = buf_addr + (period_len * i); @@ -704,8 +720,11 @@ static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic( sg_dma_len(&sgl[i]) = period_len; } - return shdma_prep_sg(schan, sgl, sg_len, &slave_addr, + desc = shdma_prep_sg(schan, sgl, sg_len, &slave_addr, direction, flags, true); + + kfree(sgl); + return desc; } static int shdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h index 758a57b..2c0a969 100644 --- a/drivers/dma/sh/shdma.h +++ b/drivers/dma/sh/shdma.h @@ -62,7 +62,7 @@ struct sh_dmae_desc { #define to_sh_dev(chan) container_of(chan->shdma_chan.dma_chan.device,\ struct sh_dmae_device, shdma_dev.dma_dev) -#ifdef CONFIG_SHDMA_R8A73A4 +#ifdef CONFIG_SH_DMAE_R8A73A4 extern const struct sh_dmae_pdata r8a73a4_dma_pdata; #define r8a73a4_shdma_devid (&r8a73a4_dma_pdata) #else diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c index 146d5df..58eb857 100644 --- a/drivers/dma/sh/shdmac.c +++ b/drivers/dma/sh/shdmac.c @@ -38,12 +38,12 @@ #include "../dmaengine.h" #include "shdma.h" -/* DMA register */ -#define SAR 0x00 -#define DAR 0x04 -#define TCR 0x08 -#define CHCR 0x0C -#define DMAOR 0x40 +/* DMA registers */ +#define SAR 0x00 /* Source Address Register */ +#define DAR 0x04 /* Destination Address Register */ +#define TCR 0x08 /* Transfer Count Register */ +#define CHCR 0x0C /* Channel Control Register */ +#define DMAOR 0x40 /* DMA Operation Register */ #define TEND 0x18 /* USB-DMAC */ @@ -239,9 +239,8 @@ static void dmae_init(struct sh_dmae_chan *sh_chan) { /* * Default configuration for dual address memory-memory transfer. - * 0x400 represents auto-request. */ - u32 chcr = DM_INC | SM_INC | 0x400 | log2size_to_chcr(sh_chan, + u32 chcr = DM_INC | SM_INC | RS_AUTO | log2size_to_chcr(sh_chan, LOG2_DEFAULT_XFER_SIZE); sh_chan->xmit_shift = calc_xmit_shift(sh_chan, chcr); chcr_write(sh_chan, chcr); diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c index 03f7820..aac03ab 100644 --- a/drivers/dma/sirf-dma.c +++ b/drivers/dma/sirf-dma.c @@ -580,7 +580,7 @@ err_dir: static struct dma_async_tx_descriptor * sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr, size_t buf_len, size_t period_len, - enum dma_transfer_direction direction, unsigned long flags, void *context) + enum dma_transfer_direction direction, unsigned long flags) { struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan); struct sirfsoc_dma_desc *sdesc = NULL; diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index c798445..5fe5933 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -2531,8 +2531,7 @@ d40_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, static struct dma_async_tx_descriptor * dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len, size_t period_len, - enum dma_transfer_direction direction, unsigned long flags, - void *context) + enum dma_transfer_direction direction, unsigned long flags) { unsigned int periods = buf_len / period_len; struct dma_async_tx_descriptor *txd; diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c new file mode 100644 index 0000000..1f92a56 --- /dev/null +++ b/drivers/dma/sun6i-dma.c @@ -0,0 +1,1053 @@ +/* + * Copyright (C) 2013-2014 Allwinner Tech Co., Ltd + * Author: Sugar <shuge@allwinnertech.com> + * + * Copyright (C) 2014 Maxime Ripard + * Maxime Ripard <maxime.ripard@free-electrons.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/dmaengine.h> +#include <linux/dmapool.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of_dma.h> +#include <linux/platform_device.h> +#include <linux/reset.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include "virt-dma.h" + +/* + * There's 16 physical channels that can work in parallel. + * + * However we have 30 different endpoints for our requests. + * + * Since the channels are able to handle only an unidirectional + * transfer, we need to allocate more virtual channels so that + * everyone can grab one channel. + * + * Some devices can't work in both direction (mostly because it + * wouldn't make sense), so we have a bit fewer virtual channels than + * 2 channels per endpoints. + */ + +#define NR_MAX_CHANNELS 16 +#define NR_MAX_REQUESTS 30 +#define NR_MAX_VCHANS 53 + +/* + * Common registers + */ +#define DMA_IRQ_EN(x) ((x) * 0x04) +#define DMA_IRQ_HALF BIT(0) +#define DMA_IRQ_PKG BIT(1) +#define DMA_IRQ_QUEUE BIT(2) + +#define DMA_IRQ_CHAN_NR 8 +#define DMA_IRQ_CHAN_WIDTH 4 + + +#define DMA_IRQ_STAT(x) ((x) * 0x04 + 0x10) + +#define DMA_STAT 0x30 + +/* + * Channels specific registers + */ +#define DMA_CHAN_ENABLE 0x00 +#define DMA_CHAN_ENABLE_START BIT(0) +#define DMA_CHAN_ENABLE_STOP 0 + +#define DMA_CHAN_PAUSE 0x04 +#define DMA_CHAN_PAUSE_PAUSE BIT(1) +#define DMA_CHAN_PAUSE_RESUME 0 + +#define DMA_CHAN_LLI_ADDR 0x08 + +#define DMA_CHAN_CUR_CFG 0x0c +#define DMA_CHAN_CFG_SRC_DRQ(x) ((x) & 0x1f) +#define DMA_CHAN_CFG_SRC_IO_MODE BIT(5) +#define DMA_CHAN_CFG_SRC_LINEAR_MODE (0 << 5) +#define DMA_CHAN_CFG_SRC_BURST(x) (((x) & 0x3) << 7) +#define DMA_CHAN_CFG_SRC_WIDTH(x) (((x) & 0x3) << 9) + +#define DMA_CHAN_CFG_DST_DRQ(x) (DMA_CHAN_CFG_SRC_DRQ(x) << 16) +#define DMA_CHAN_CFG_DST_IO_MODE (DMA_CHAN_CFG_SRC_IO_MODE << 16) +#define DMA_CHAN_CFG_DST_LINEAR_MODE (DMA_CHAN_CFG_SRC_LINEAR_MODE << 16) +#define DMA_CHAN_CFG_DST_BURST(x) (DMA_CHAN_CFG_SRC_BURST(x) << 16) +#define DMA_CHAN_CFG_DST_WIDTH(x) (DMA_CHAN_CFG_SRC_WIDTH(x) << 16) + +#define DMA_CHAN_CUR_SRC 0x10 + +#define DMA_CHAN_CUR_DST 0x14 + +#define DMA_CHAN_CUR_CNT 0x18 + +#define DMA_CHAN_CUR_PARA 0x1c + + +/* + * Various hardware related defines + */ +#define LLI_LAST_ITEM 0xfffff800 +#define NORMAL_WAIT 8 +#define DRQ_SDRAM 1 + +/* + * Hardware representation of the LLI + * + * The hardware will be fed the physical address of this structure, + * and read its content in order to start the transfer. + */ +struct sun6i_dma_lli { + u32 cfg; + u32 src; + u32 dst; + u32 len; + u32 para; + u32 p_lli_next; + + /* + * This field is not used by the DMA controller, but will be + * used by the CPU to go through the list (mostly for dumping + * or freeing it). + */ + struct sun6i_dma_lli *v_lli_next; +}; + + +struct sun6i_desc { + struct virt_dma_desc vd; + dma_addr_t p_lli; + struct sun6i_dma_lli *v_lli; +}; + +struct sun6i_pchan { + u32 idx; + void __iomem *base; + struct sun6i_vchan *vchan; + struct sun6i_desc *desc; + struct sun6i_desc *done; +}; + +struct sun6i_vchan { + struct virt_dma_chan vc; + struct list_head node; + struct dma_slave_config cfg; + struct sun6i_pchan *phy; + u8 port; +}; + +struct sun6i_dma_dev { + struct dma_device slave; + void __iomem *base; + struct clk *clk; + int irq; + spinlock_t lock; + struct reset_control *rstc; + struct tasklet_struct task; + atomic_t tasklet_shutdown; + struct list_head pending; + struct dma_pool *pool; + struct sun6i_pchan *pchans; + struct sun6i_vchan *vchans; +}; + +static struct device *chan2dev(struct dma_chan *chan) +{ + return &chan->dev->device; +} + +static inline struct sun6i_dma_dev *to_sun6i_dma_dev(struct dma_device *d) +{ + return container_of(d, struct sun6i_dma_dev, slave); +} + +static inline struct sun6i_vchan *to_sun6i_vchan(struct dma_chan *chan) +{ + return container_of(chan, struct sun6i_vchan, vc.chan); +} + +static inline struct sun6i_desc * +to_sun6i_desc(struct dma_async_tx_descriptor *tx) +{ + return container_of(tx, struct sun6i_desc, vd.tx); +} + +static inline void sun6i_dma_dump_com_regs(struct sun6i_dma_dev *sdev) +{ + dev_dbg(sdev->slave.dev, "Common register:\n" + "\tmask0(%04x): 0x%08x\n" + "\tmask1(%04x): 0x%08x\n" + "\tpend0(%04x): 0x%08x\n" + "\tpend1(%04x): 0x%08x\n" + "\tstats(%04x): 0x%08x\n", + DMA_IRQ_EN(0), readl(sdev->base + DMA_IRQ_EN(0)), + DMA_IRQ_EN(1), readl(sdev->base + DMA_IRQ_EN(1)), + DMA_IRQ_STAT(0), readl(sdev->base + DMA_IRQ_STAT(0)), + DMA_IRQ_STAT(1), readl(sdev->base + DMA_IRQ_STAT(1)), + DMA_STAT, readl(sdev->base + DMA_STAT)); +} + +static inline void sun6i_dma_dump_chan_regs(struct sun6i_dma_dev *sdev, + struct sun6i_pchan *pchan) +{ + phys_addr_t reg = virt_to_phys(pchan->base); + + dev_dbg(sdev->slave.dev, "Chan %d reg: %pa\n" + "\t___en(%04x): \t0x%08x\n" + "\tpause(%04x): \t0x%08x\n" + "\tstart(%04x): \t0x%08x\n" + "\t__cfg(%04x): \t0x%08x\n" + "\t__src(%04x): \t0x%08x\n" + "\t__dst(%04x): \t0x%08x\n" + "\tcount(%04x): \t0x%08x\n" + "\t_para(%04x): \t0x%08x\n\n", + pchan->idx, ®, + DMA_CHAN_ENABLE, + readl(pchan->base + DMA_CHAN_ENABLE), + DMA_CHAN_PAUSE, + readl(pchan->base + DMA_CHAN_PAUSE), + DMA_CHAN_LLI_ADDR, + readl(pchan->base + DMA_CHAN_LLI_ADDR), + DMA_CHAN_CUR_CFG, + readl(pchan->base + DMA_CHAN_CUR_CFG), + DMA_CHAN_CUR_SRC, + readl(pchan->base + DMA_CHAN_CUR_SRC), + DMA_CHAN_CUR_DST, + readl(pchan->base + DMA_CHAN_CUR_DST), + DMA_CHAN_CUR_CNT, + readl(pchan->base + DMA_CHAN_CUR_CNT), + DMA_CHAN_CUR_PARA, + readl(pchan->base + DMA_CHAN_CUR_PARA)); +} + +static inline int convert_burst(u32 maxburst, u8 *burst) +{ + switch (maxburst) { + case 1: + *burst = 0; + break; + case 8: + *burst = 2; + break; + default: + return -EINVAL; + } + + return 0; +} + +static inline int convert_buswidth(enum dma_slave_buswidth addr_width, u8 *width) +{ + if ((addr_width < DMA_SLAVE_BUSWIDTH_1_BYTE) || + (addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES)) + return -EINVAL; + + *width = addr_width >> 1; + return 0; +} + +static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev, + struct sun6i_dma_lli *next, + dma_addr_t next_phy, + struct sun6i_desc *txd) +{ + if ((!prev && !txd) || !next) + return NULL; + + if (!prev) { + txd->p_lli = next_phy; + txd->v_lli = next; + } else { + prev->p_lli_next = next_phy; + prev->v_lli_next = next; + } + + next->p_lli_next = LLI_LAST_ITEM; + next->v_lli_next = NULL; + + return next; +} + +static inline int sun6i_dma_cfg_lli(struct sun6i_dma_lli *lli, + dma_addr_t src, + dma_addr_t dst, u32 len, + struct dma_slave_config *config) +{ + u8 src_width, dst_width, src_burst, dst_burst; + int ret; + + if (!config) + return -EINVAL; + + ret = convert_burst(config->src_maxburst, &src_burst); + if (ret) + return ret; + + ret = convert_burst(config->dst_maxburst, &dst_burst); + if (ret) + return ret; + + ret = convert_buswidth(config->src_addr_width, &src_width); + if (ret) + return ret; + + ret = convert_buswidth(config->dst_addr_width, &dst_width); + if (ret) + return ret; + + lli->cfg = DMA_CHAN_CFG_SRC_BURST(src_burst) | + DMA_CHAN_CFG_SRC_WIDTH(src_width) | + DMA_CHAN_CFG_DST_BURST(dst_burst) | + DMA_CHAN_CFG_DST_WIDTH(dst_width); + + lli->src = src; + lli->dst = dst; + lli->len = len; + lli->para = NORMAL_WAIT; + + return 0; +} + +static inline void sun6i_dma_dump_lli(struct sun6i_vchan *vchan, + struct sun6i_dma_lli *lli) +{ + phys_addr_t p_lli = virt_to_phys(lli); + + dev_dbg(chan2dev(&vchan->vc.chan), + "\n\tdesc: p - %pa v - 0x%p\n" + "\t\tc - 0x%08x s - 0x%08x d - 0x%08x\n" + "\t\tl - 0x%08x p - 0x%08x n - 0x%08x\n", + &p_lli, lli, + lli->cfg, lli->src, lli->dst, + lli->len, lli->para, lli->p_lli_next); +} + +static void sun6i_dma_free_desc(struct virt_dma_desc *vd) +{ + struct sun6i_desc *txd = to_sun6i_desc(&vd->tx); + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vd->tx.chan->device); + struct sun6i_dma_lli *v_lli, *v_next; + dma_addr_t p_lli, p_next; + + if (unlikely(!txd)) + return; + + p_lli = txd->p_lli; + v_lli = txd->v_lli; + + while (v_lli) { + v_next = v_lli->v_lli_next; + p_next = v_lli->p_lli_next; + + dma_pool_free(sdev->pool, v_lli, p_lli); + + v_lli = v_next; + p_lli = p_next; + } + + kfree(txd); +} + +static int sun6i_dma_terminate_all(struct sun6i_vchan *vchan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vchan->vc.chan.device); + struct sun6i_pchan *pchan = vchan->phy; + unsigned long flags; + LIST_HEAD(head); + + spin_lock(&sdev->lock); + list_del_init(&vchan->node); + spin_unlock(&sdev->lock); + + spin_lock_irqsave(&vchan->vc.lock, flags); + + vchan_get_all_descriptors(&vchan->vc, &head); + + if (pchan) { + writel(DMA_CHAN_ENABLE_STOP, pchan->base + DMA_CHAN_ENABLE); + writel(DMA_CHAN_PAUSE_RESUME, pchan->base + DMA_CHAN_PAUSE); + + vchan->phy = NULL; + pchan->vchan = NULL; + pchan->desc = NULL; + pchan->done = NULL; + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + vchan_dma_desc_free_list(&vchan->vc, &head); + + return 0; +} + +static int sun6i_dma_start_desc(struct sun6i_vchan *vchan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vchan->vc.chan.device); + struct virt_dma_desc *desc = vchan_next_desc(&vchan->vc); + struct sun6i_pchan *pchan = vchan->phy; + u32 irq_val, irq_reg, irq_offset; + + if (!pchan) + return -EAGAIN; + + if (!desc) { + pchan->desc = NULL; + pchan->done = NULL; + return -EAGAIN; + } + + list_del(&desc->node); + + pchan->desc = to_sun6i_desc(&desc->tx); + pchan->done = NULL; + + sun6i_dma_dump_lli(vchan, pchan->desc->v_lli); + + irq_reg = pchan->idx / DMA_IRQ_CHAN_NR; + irq_offset = pchan->idx % DMA_IRQ_CHAN_NR; + + irq_val = readl(sdev->base + DMA_IRQ_EN(irq_offset)); + irq_val |= DMA_IRQ_QUEUE << (irq_offset * DMA_IRQ_CHAN_WIDTH); + writel(irq_val, sdev->base + DMA_IRQ_EN(irq_offset)); + + writel(pchan->desc->p_lli, pchan->base + DMA_CHAN_LLI_ADDR); + writel(DMA_CHAN_ENABLE_START, pchan->base + DMA_CHAN_ENABLE); + + sun6i_dma_dump_com_regs(sdev); + sun6i_dma_dump_chan_regs(sdev, pchan); + + return 0; +} + +static void sun6i_dma_tasklet(unsigned long data) +{ + struct sun6i_dma_dev *sdev = (struct sun6i_dma_dev *)data; + struct sun6i_vchan *vchan; + struct sun6i_pchan *pchan; + unsigned int pchan_alloc = 0; + unsigned int pchan_idx; + + list_for_each_entry(vchan, &sdev->slave.channels, vc.chan.device_node) { + spin_lock_irq(&vchan->vc.lock); + + pchan = vchan->phy; + + if (pchan && pchan->done) { + if (sun6i_dma_start_desc(vchan)) { + /* + * No current txd associated with this channel + */ + dev_dbg(sdev->slave.dev, "pchan %u: free\n", + pchan->idx); + + /* Mark this channel free */ + vchan->phy = NULL; + pchan->vchan = NULL; + } + } + spin_unlock_irq(&vchan->vc.lock); + } + + spin_lock_irq(&sdev->lock); + for (pchan_idx = 0; pchan_idx < NR_MAX_CHANNELS; pchan_idx++) { + pchan = &sdev->pchans[pchan_idx]; + + if (pchan->vchan || list_empty(&sdev->pending)) + continue; + + vchan = list_first_entry(&sdev->pending, + struct sun6i_vchan, node); + + /* Remove from pending channels */ + list_del_init(&vchan->node); + pchan_alloc |= BIT(pchan_idx); + + /* Mark this channel allocated */ + pchan->vchan = vchan; + vchan->phy = pchan; + dev_dbg(sdev->slave.dev, "pchan %u: alloc vchan %p\n", + pchan->idx, &vchan->vc); + } + spin_unlock_irq(&sdev->lock); + + for (pchan_idx = 0; pchan_idx < NR_MAX_CHANNELS; pchan_idx++) { + if (!(pchan_alloc & BIT(pchan_idx))) + continue; + + pchan = sdev->pchans + pchan_idx; + vchan = pchan->vchan; + if (vchan) { + spin_lock_irq(&vchan->vc.lock); + sun6i_dma_start_desc(vchan); + spin_unlock_irq(&vchan->vc.lock); + } + } +} + +static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id) +{ + struct sun6i_dma_dev *sdev = dev_id; + struct sun6i_vchan *vchan; + struct sun6i_pchan *pchan; + int i, j, ret = IRQ_NONE; + u32 status; + + for (i = 0; i < 2; i++) { + status = readl(sdev->base + DMA_IRQ_STAT(i)); + if (!status) + continue; + + dev_dbg(sdev->slave.dev, "DMA irq status %s: 0x%x\n", + i ? "high" : "low", status); + + writel(status, sdev->base + DMA_IRQ_STAT(i)); + + for (j = 0; (j < 8) && status; j++) { + if (status & DMA_IRQ_QUEUE) { + pchan = sdev->pchans + j; + vchan = pchan->vchan; + + if (vchan) { + spin_lock(&vchan->vc.lock); + vchan_cookie_complete(&pchan->desc->vd); + pchan->done = pchan->desc; + spin_unlock(&vchan->vc.lock); + } + } + + status = status >> 4; + } + + if (!atomic_read(&sdev->tasklet_shutdown)) + tasklet_schedule(&sdev->task); + ret = IRQ_HANDLED; + } + + return ret; +} + +static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( + struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun6i_dma_lli *v_lli; + struct sun6i_desc *txd; + dma_addr_t p_lli; + int ret; + + dev_dbg(chan2dev(chan), + "%s; chan: %d, dest: %pad, src: %pad, len: %zu. flags: 0x%08lx\n", + __func__, vchan->vc.chan.chan_id, &dest, &src, len, flags); + + if (!len) + return NULL; + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli); + if (!v_lli) { + dev_err(sdev->slave.dev, "Failed to alloc lli memory\n"); + goto err_txd_free; + } + + ret = sun6i_dma_cfg_lli(v_lli, src, dest, len, sconfig); + if (ret) + goto err_dma_free; + + v_lli->cfg |= DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_DST_LINEAR_MODE | + DMA_CHAN_CFG_SRC_LINEAR_MODE; + + sun6i_dma_lli_add(NULL, v_lli, p_lli, txd); + + sun6i_dma_dump_lli(vchan, v_lli); + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + +err_dma_free: + dma_pool_free(sdev->pool, v_lli, p_lli); +err_txd_free: + kfree(txd); + return NULL; +} + +static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_transfer_direction dir, + unsigned long flags, void *context) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct dma_slave_config *sconfig = &vchan->cfg; + struct sun6i_dma_lli *v_lli, *prev = NULL; + struct sun6i_desc *txd; + struct scatterlist *sg; + dma_addr_t p_lli; + int i, ret; + + if (!sgl) + return NULL; + + if (!is_slave_direction(dir)) { + dev_err(chan2dev(chan), "Invalid DMA direction\n"); + return NULL; + } + + txd = kzalloc(sizeof(*txd), GFP_NOWAIT); + if (!txd) + return NULL; + + for_each_sg(sgl, sg, sg_len, i) { + v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli); + if (!v_lli) + goto err_lli_free; + + if (dir == DMA_MEM_TO_DEV) { + ret = sun6i_dma_cfg_lli(v_lli, sg_dma_address(sg), + sconfig->dst_addr, sg_dma_len(sg), + sconfig); + if (ret) + goto err_cur_lli_free; + + v_lli->cfg |= DMA_CHAN_CFG_DST_IO_MODE | + DMA_CHAN_CFG_SRC_LINEAR_MODE | + DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_DST_DRQ(vchan->port); + + dev_dbg(chan2dev(chan), + "%s; chan: %d, dest: %pad, src: %pad, len: %u. flags: 0x%08lx\n", + __func__, vchan->vc.chan.chan_id, + &sconfig->dst_addr, &sg_dma_address(sg), + sg_dma_len(sg), flags); + + } else { + ret = sun6i_dma_cfg_lli(v_lli, sconfig->src_addr, + sg_dma_address(sg), sg_dma_len(sg), + sconfig); + if (ret) + goto err_cur_lli_free; + + v_lli->cfg |= DMA_CHAN_CFG_DST_LINEAR_MODE | + DMA_CHAN_CFG_SRC_IO_MODE | + DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | + DMA_CHAN_CFG_SRC_DRQ(vchan->port); + + dev_dbg(chan2dev(chan), + "%s; chan: %d, dest: %pad, src: %pad, len: %u. flags: 0x%08lx\n", + __func__, vchan->vc.chan.chan_id, + &sg_dma_address(sg), &sconfig->src_addr, + sg_dma_len(sg), flags); + } + + prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd); + } + + dev_dbg(chan2dev(chan), "First: %pad\n", &txd->p_lli); + for (prev = txd->v_lli; prev; prev = prev->v_lli_next) + sun6i_dma_dump_lli(vchan, prev); + + return vchan_tx_prep(&vchan->vc, &txd->vd, flags); + +err_cur_lli_free: + dma_pool_free(sdev->pool, v_lli, p_lli); +err_lli_free: + for (prev = txd->v_lli; prev; prev = prev->v_lli_next) + dma_pool_free(sdev->pool, prev, virt_to_phys(prev)); + kfree(txd); + return NULL; +} + +static int sun6i_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, + unsigned long arg) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct sun6i_pchan *pchan = vchan->phy; + unsigned long flags; + int ret = 0; + + switch (cmd) { + case DMA_RESUME: + dev_dbg(chan2dev(chan), "vchan %p: resume\n", &vchan->vc); + + spin_lock_irqsave(&vchan->vc.lock, flags); + + if (pchan) { + writel(DMA_CHAN_PAUSE_RESUME, + pchan->base + DMA_CHAN_PAUSE); + } else if (!list_empty(&vchan->vc.desc_issued)) { + spin_lock(&sdev->lock); + list_add_tail(&vchan->node, &sdev->pending); + spin_unlock(&sdev->lock); + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + break; + + case DMA_PAUSE: + dev_dbg(chan2dev(chan), "vchan %p: pause\n", &vchan->vc); + + if (pchan) { + writel(DMA_CHAN_PAUSE_PAUSE, + pchan->base + DMA_CHAN_PAUSE); + } else { + spin_lock(&sdev->lock); + list_del_init(&vchan->node); + spin_unlock(&sdev->lock); + } + break; + + case DMA_TERMINATE_ALL: + ret = sun6i_dma_terminate_all(vchan); + break; + case DMA_SLAVE_CONFIG: + memcpy(&vchan->cfg, (void *)arg, sizeof(struct dma_slave_config)); + break; + default: + ret = -ENXIO; + break; + } + return ret; +} + +static enum dma_status sun6i_dma_tx_status(struct dma_chan *chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + struct sun6i_pchan *pchan = vchan->phy; + struct sun6i_dma_lli *lli; + struct virt_dma_desc *vd; + struct sun6i_desc *txd; + enum dma_status ret; + unsigned long flags; + size_t bytes = 0; + + ret = dma_cookie_status(chan, cookie, state); + if (ret == DMA_COMPLETE) + return ret; + + spin_lock_irqsave(&vchan->vc.lock, flags); + + vd = vchan_find_desc(&vchan->vc, cookie); + txd = to_sun6i_desc(&vd->tx); + + if (vd) { + for (lli = txd->v_lli; lli != NULL; lli = lli->v_lli_next) + bytes += lli->len; + } else if (!pchan || !pchan->desc) { + bytes = 0; + } else { + bytes = readl(pchan->base + DMA_CHAN_CUR_CNT); + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); + + dma_set_residue(state, bytes); + + return ret; +} + +static void sun6i_dma_issue_pending(struct dma_chan *chan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + unsigned long flags; + + spin_lock_irqsave(&vchan->vc.lock, flags); + + if (vchan_issue_pending(&vchan->vc)) { + spin_lock(&sdev->lock); + + if (!vchan->phy && list_empty(&vchan->node)) { + list_add_tail(&vchan->node, &sdev->pending); + tasklet_schedule(&sdev->task); + dev_dbg(chan2dev(chan), "vchan %p: issued\n", + &vchan->vc); + } + + spin_unlock(&sdev->lock); + } else { + dev_dbg(chan2dev(chan), "vchan %p: nothing to issue\n", + &vchan->vc); + } + + spin_unlock_irqrestore(&vchan->vc.lock, flags); +} + +static int sun6i_dma_alloc_chan_resources(struct dma_chan *chan) +{ + return 0; +} + +static void sun6i_dma_free_chan_resources(struct dma_chan *chan) +{ + struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device); + struct sun6i_vchan *vchan = to_sun6i_vchan(chan); + unsigned long flags; + + spin_lock_irqsave(&sdev->lock, flags); + list_del_init(&vchan->node); + spin_unlock_irqrestore(&sdev->lock, flags); + + vchan_free_chan_resources(&vchan->vc); +} + +static struct dma_chan *sun6i_dma_of_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct sun6i_dma_dev *sdev = ofdma->of_dma_data; + struct sun6i_vchan *vchan; + struct dma_chan *chan; + u8 port = dma_spec->args[0]; + + if (port > NR_MAX_REQUESTS) + return NULL; + + chan = dma_get_any_slave_channel(&sdev->slave); + if (!chan) + return NULL; + + vchan = to_sun6i_vchan(chan); + vchan->port = port; + + return chan; +} + +static inline void sun6i_kill_tasklet(struct sun6i_dma_dev *sdev) +{ + /* Disable all interrupts from DMA */ + writel(0, sdev->base + DMA_IRQ_EN(0)); + writel(0, sdev->base + DMA_IRQ_EN(1)); + + /* Prevent spurious interrupts from scheduling the tasklet */ + atomic_inc(&sdev->tasklet_shutdown); + + /* Make sure we won't have any further interrupts */ + devm_free_irq(sdev->slave.dev, sdev->irq, sdev); + + /* Actually prevent the tasklet from being scheduled */ + tasklet_kill(&sdev->task); +} + +static inline void sun6i_dma_free(struct sun6i_dma_dev *sdev) +{ + int i; + + for (i = 0; i < NR_MAX_VCHANS; i++) { + struct sun6i_vchan *vchan = &sdev->vchans[i]; + + list_del(&vchan->vc.chan.device_node); + tasklet_kill(&vchan->vc.task); + } +} + +static int sun6i_dma_probe(struct platform_device *pdev) +{ + struct sun6i_dma_dev *sdc; + struct resource *res; + struct clk *mux, *pll6; + int ret, i; + + sdc = devm_kzalloc(&pdev->dev, sizeof(*sdc), GFP_KERNEL); + if (!sdc) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + sdc->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(sdc->base)) + return PTR_ERR(sdc->base); + + sdc->irq = platform_get_irq(pdev, 0); + if (sdc->irq < 0) { + dev_err(&pdev->dev, "Cannot claim IRQ\n"); + return sdc->irq; + } + + sdc->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(sdc->clk)) { + dev_err(&pdev->dev, "No clock specified\n"); + return PTR_ERR(sdc->clk); + } + + mux = clk_get(NULL, "ahb1_mux"); + if (IS_ERR(mux)) { + dev_err(&pdev->dev, "Couldn't get AHB1 Mux\n"); + return PTR_ERR(mux); + } + + pll6 = clk_get(NULL, "pll6"); + if (IS_ERR(pll6)) { + dev_err(&pdev->dev, "Couldn't get PLL6\n"); + clk_put(mux); + return PTR_ERR(pll6); + } + + ret = clk_set_parent(mux, pll6); + clk_put(pll6); + clk_put(mux); + + if (ret) { + dev_err(&pdev->dev, "Couldn't reparent AHB1 on PLL6\n"); + return ret; + } + + sdc->rstc = devm_reset_control_get(&pdev->dev, NULL); + if (IS_ERR(sdc->rstc)) { + dev_err(&pdev->dev, "No reset controller specified\n"); + return PTR_ERR(sdc->rstc); + } + + sdc->pool = dmam_pool_create(dev_name(&pdev->dev), &pdev->dev, + sizeof(struct sun6i_dma_lli), 4, 0); + if (!sdc->pool) { + dev_err(&pdev->dev, "No memory for descriptors dma pool\n"); + return -ENOMEM; + } + + platform_set_drvdata(pdev, sdc); + INIT_LIST_HEAD(&sdc->pending); + spin_lock_init(&sdc->lock); + + dma_cap_set(DMA_PRIVATE, sdc->slave.cap_mask); + dma_cap_set(DMA_MEMCPY, sdc->slave.cap_mask); + dma_cap_set(DMA_SLAVE, sdc->slave.cap_mask); + + INIT_LIST_HEAD(&sdc->slave.channels); + sdc->slave.device_alloc_chan_resources = sun6i_dma_alloc_chan_resources; + sdc->slave.device_free_chan_resources = sun6i_dma_free_chan_resources; + sdc->slave.device_tx_status = sun6i_dma_tx_status; + sdc->slave.device_issue_pending = sun6i_dma_issue_pending; + sdc->slave.device_prep_slave_sg = sun6i_dma_prep_slave_sg; + sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy; + sdc->slave.device_control = sun6i_dma_control; + sdc->slave.chancnt = NR_MAX_VCHANS; + + sdc->slave.dev = &pdev->dev; + + sdc->pchans = devm_kcalloc(&pdev->dev, NR_MAX_CHANNELS, + sizeof(struct sun6i_pchan), GFP_KERNEL); + if (!sdc->pchans) + return -ENOMEM; + + sdc->vchans = devm_kcalloc(&pdev->dev, NR_MAX_VCHANS, + sizeof(struct sun6i_vchan), GFP_KERNEL); + if (!sdc->vchans) + return -ENOMEM; + + tasklet_init(&sdc->task, sun6i_dma_tasklet, (unsigned long)sdc); + + for (i = 0; i < NR_MAX_CHANNELS; i++) { + struct sun6i_pchan *pchan = &sdc->pchans[i]; + + pchan->idx = i; + pchan->base = sdc->base + 0x100 + i * 0x40; + } + + for (i = 0; i < NR_MAX_VCHANS; i++) { + struct sun6i_vchan *vchan = &sdc->vchans[i]; + + INIT_LIST_HEAD(&vchan->node); + vchan->vc.desc_free = sun6i_dma_free_desc; + vchan_init(&vchan->vc, &sdc->slave); + } + + ret = reset_control_deassert(sdc->rstc); + if (ret) { + dev_err(&pdev->dev, "Couldn't deassert the device from reset\n"); + goto err_chan_free; + } + + ret = clk_prepare_enable(sdc->clk); + if (ret) { + dev_err(&pdev->dev, "Couldn't enable the clock\n"); + goto err_reset_assert; + } + + ret = devm_request_irq(&pdev->dev, sdc->irq, sun6i_dma_interrupt, 0, + dev_name(&pdev->dev), sdc); + if (ret) { + dev_err(&pdev->dev, "Cannot request IRQ\n"); + goto err_clk_disable; + } + + ret = dma_async_device_register(&sdc->slave); + if (ret) { + dev_warn(&pdev->dev, "Failed to register DMA engine device\n"); + goto err_irq_disable; + } + + ret = of_dma_controller_register(pdev->dev.of_node, sun6i_dma_of_xlate, + sdc); + if (ret) { + dev_err(&pdev->dev, "of_dma_controller_register failed\n"); + goto err_dma_unregister; + } + + return 0; + +err_dma_unregister: + dma_async_device_unregister(&sdc->slave); +err_irq_disable: + sun6i_kill_tasklet(sdc); +err_clk_disable: + clk_disable_unprepare(sdc->clk); +err_reset_assert: + reset_control_assert(sdc->rstc); +err_chan_free: + sun6i_dma_free(sdc); + return ret; +} + +static int sun6i_dma_remove(struct platform_device *pdev) +{ + struct sun6i_dma_dev *sdc = platform_get_drvdata(pdev); + + of_dma_controller_free(pdev->dev.of_node); + dma_async_device_unregister(&sdc->slave); + + sun6i_kill_tasklet(sdc); + + clk_disable_unprepare(sdc->clk); + reset_control_assert(sdc->rstc); + + sun6i_dma_free(sdc); + + return 0; +} + +static struct of_device_id sun6i_dma_match[] = { + { .compatible = "allwinner,sun6i-a31-dma" }, + { /* sentinel */ } +}; + +static struct platform_driver sun6i_dma_driver = { + .probe = sun6i_dma_probe, + .remove = sun6i_dma_remove, + .driver = { + .name = "sun6i-dma", + .of_match_table = sun6i_dma_match, + }, +}; +module_platform_driver(sun6i_dma_driver); + +MODULE_DESCRIPTION("Allwinner A31 DMA Controller Driver"); +MODULE_AUTHOR("Sugar <shuge@allwinnertech.com>"); +MODULE_AUTHOR("Maxime Ripard <maxime.ripard@free-electrons.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 03ad64e..16efa60 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -1055,7 +1055,7 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg( static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic( struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len, size_t period_len, enum dma_transfer_direction direction, - unsigned long flags, void *context) + unsigned long flags) { struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc); struct tegra_dma_desc *dma_desc = NULL; diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c index 374b57f..a12c855 100644 --- a/drivers/edac/cell_edac.c +++ b/drivers/edac/cell_edac.c @@ -134,8 +134,7 @@ static void cell_edac_init_csrows(struct mem_ctl_info *mci) int j; u32 nr_pages; - for (np = NULL; - (np = of_find_node_by_name(np, "memory")) != NULL;) { + for_each_node_by_name(np, "memory") { struct resource r; /* We "know" that the Cell firmware only creates one entry diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c index d3d0e8c..d6c767a 100644 --- a/drivers/hwmon/adm1025.c +++ b/drivers/hwmon/adm1025.c @@ -382,6 +382,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr, if (err) return err; + if (val > 255) + return -EINVAL; + data->vrm = val; return count; } diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c index ca8430f..e67b9a5 100644 --- a/drivers/hwmon/adm1026.c +++ b/drivers/hwmon/adm1026.c @@ -1085,6 +1085,9 @@ static ssize_t store_vrm_reg(struct device *dev, struct device_attribute *attr, if (err) return err; + if (val > 255) + return -EINVAL; + data->vrm = val; return count; } diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c index 22e0c92..1265164 100644 --- a/drivers/hwmon/ads1015.c +++ b/drivers/hwmon/ads1015.c @@ -212,6 +212,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client) dev_err(&client->dev, "invalid gain on %s\n", node->full_name); + return -EINVAL; } } @@ -222,6 +223,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client) dev_err(&client->dev, "invalid data_rate on %s\n", node->full_name); + return -EINVAL; } } diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c index f960636..272fcc8 100644 --- a/drivers/hwmon/asb100.c +++ b/drivers/hwmon/asb100.c @@ -510,6 +510,10 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr, err = kstrtoul(buf, 10, &val); if (err) return err; + + if (val > 255) + return -EINVAL; + data->vrm = val; return count; } diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c index 4ae3fff1..bea0a34 100644 --- a/drivers/hwmon/dme1737.c +++ b/drivers/hwmon/dme1737.c @@ -247,8 +247,8 @@ struct dme1737_data { u8 pwm_acz[3]; u8 pwm_freq[6]; u8 pwm_rr[2]; - u8 zone_low[3]; - u8 zone_abs[3]; + s8 zone_low[3]; + s8 zone_abs[3]; u8 zone_hyst[2]; u32 alarms; }; @@ -277,7 +277,7 @@ static inline int IN_FROM_REG(int reg, int nominal, int res) return (reg * nominal + (3 << (res - 3))) / (3 << (res - 2)); } -static inline int IN_TO_REG(int val, int nominal) +static inline int IN_TO_REG(long val, int nominal) { return clamp_val((val * 192 + nominal / 2) / nominal, 0, 255); } @@ -293,7 +293,7 @@ static inline int TEMP_FROM_REG(int reg, int res) return (reg * 1000) >> (res - 8); } -static inline int TEMP_TO_REG(int val) +static inline int TEMP_TO_REG(long val) { return clamp_val((val < 0 ? val - 500 : val + 500) / 1000, -128, 127); } @@ -308,7 +308,7 @@ static inline int TEMP_RANGE_FROM_REG(int reg) return TEMP_RANGE[(reg >> 4) & 0x0f]; } -static int TEMP_RANGE_TO_REG(int val, int reg) +static int TEMP_RANGE_TO_REG(long val, int reg) { int i; @@ -331,7 +331,7 @@ static inline int TEMP_HYST_FROM_REG(int reg, int ix) return (((ix == 1) ? reg : reg >> 4) & 0x0f) * 1000; } -static inline int TEMP_HYST_TO_REG(int val, int ix, int reg) +static inline int TEMP_HYST_TO_REG(long val, int ix, int reg) { int hyst = clamp_val((val + 500) / 1000, 0, 15); @@ -347,7 +347,7 @@ static inline int FAN_FROM_REG(int reg, int tpc) return (reg == 0 || reg == 0xffff) ? 0 : 90000 * 60 / reg; } -static inline int FAN_TO_REG(int val, int tpc) +static inline int FAN_TO_REG(long val, int tpc) { if (tpc) { return clamp_val(val / tpc, 0, 0xffff); @@ -379,7 +379,7 @@ static inline int FAN_TYPE_FROM_REG(int reg) return (edge > 0) ? 1 << (edge - 1) : 0; } -static inline int FAN_TYPE_TO_REG(int val, int reg) +static inline int FAN_TYPE_TO_REG(long val, int reg) { int edge = (val == 4) ? 3 : val; @@ -402,7 +402,7 @@ static int FAN_MAX_FROM_REG(int reg) return 1000 + i * 500; } -static int FAN_MAX_TO_REG(int val) +static int FAN_MAX_TO_REG(long val) { int i; @@ -460,7 +460,7 @@ static inline int PWM_ACZ_FROM_REG(int reg) return acz[(reg >> 5) & 0x07]; } -static inline int PWM_ACZ_TO_REG(int val, int reg) +static inline int PWM_ACZ_TO_REG(long val, int reg) { int acz = (val == 4) ? 2 : val - 1; @@ -476,7 +476,7 @@ static inline int PWM_FREQ_FROM_REG(int reg) return PWM_FREQ[reg & 0x0f]; } -static int PWM_FREQ_TO_REG(int val, int reg) +static int PWM_FREQ_TO_REG(long val, int reg) { int i; @@ -510,7 +510,7 @@ static inline int PWM_RR_FROM_REG(int reg, int ix) return (rr & 0x08) ? PWM_RR[rr & 0x07] : 0; } -static int PWM_RR_TO_REG(int val, int ix, int reg) +static int PWM_RR_TO_REG(long val, int ix, int reg) { int i; @@ -528,7 +528,7 @@ static inline int PWM_RR_EN_FROM_REG(int reg, int ix) return PWM_RR_FROM_REG(reg, ix) ? 1 : 0; } -static inline int PWM_RR_EN_TO_REG(int val, int ix, int reg) +static inline int PWM_RR_EN_TO_REG(long val, int ix, int reg) { int en = (ix == 1) ? 0x80 : 0x08; @@ -1481,13 +1481,16 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct dme1737_data *data = dev_get_drvdata(dev); - long val; + unsigned long val; int err; - err = kstrtol(buf, 10, &val); + err = kstrtoul(buf, 10, &val); if (err) return err; + if (val > 255) + return -EINVAL; + data->vrm = val; return count; } diff --git a/drivers/hwmon/emc6w201.c b/drivers/hwmon/emc6w201.c index e87da90..ada9071 100644 --- a/drivers/hwmon/emc6w201.c +++ b/drivers/hwmon/emc6w201.c @@ -252,12 +252,12 @@ static ssize_t set_temp(struct device *dev, struct device_attribute *devattr, if (err < 0) return err; - val /= 1000; + val = DIV_ROUND_CLOSEST(val, 1000); reg = (sf == min) ? EMC6W201_REG_TEMP_LOW(nr) : EMC6W201_REG_TEMP_HIGH(nr); mutex_lock(&data->update_lock); - data->temp[sf][nr] = clamp_val(val, -127, 128); + data->temp[sf][nr] = clamp_val(val, -127, 127); err = emc6w201_write8(client, reg, data->temp[sf][nr]); mutex_unlock(&data->update_lock); diff --git a/drivers/hwmon/hih6130.c b/drivers/hwmon/hih6130.c index 0e01c4e..7b73d20 100644 --- a/drivers/hwmon/hih6130.c +++ b/drivers/hwmon/hih6130.c @@ -238,6 +238,9 @@ static int hih6130_probe(struct i2c_client *client, hih6130->client = client; mutex_init(&hih6130->lock); + if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_QUICK)) + hih6130->write_length = 1; + hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, hih6130, hih6130_groups); diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c index ba1d83d..a5e2958 100644 --- a/drivers/hwmon/lm87.c +++ b/drivers/hwmon/lm87.c @@ -617,6 +617,10 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr, err = kstrtoul(buf, 10, &val); if (err) return err; + + if (val > 255) + return -EINVAL; + data->vrm = val; return count; } diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c index d2060e2..cfaf70b 100644 --- a/drivers/hwmon/lm92.c +++ b/drivers/hwmon/lm92.c @@ -74,12 +74,9 @@ static inline int TEMP_FROM_REG(s16 reg) return reg / 8 * 625 / 10; } -static inline s16 TEMP_TO_REG(int val) +static inline s16 TEMP_TO_REG(long val) { - if (val <= -60000) - return -60000 * 10 / 625 * 8; - if (val >= 160000) - return 160000 * 10 / 625 * 8; + val = clamp_val(val, -60000, 160000); return val * 10 / 625 * 8; } @@ -206,10 +203,12 @@ static ssize_t set_temp_hyst(struct device *dev, if (err) return err; + val = clamp_val(val, -120000, 220000); mutex_lock(&data->update_lock); - data->temp[t_hyst] = TEMP_FROM_REG(data->temp[attr->index]) - val; + data->temp[t_hyst] = + TEMP_TO_REG(TEMP_FROM_REG(data->temp[attr->index]) - val); i2c_smbus_write_word_swapped(client, LM92_REG_TEMP_HYST, - TEMP_TO_REG(data->temp[t_hyst])); + data->temp[t_hyst]); mutex_unlock(&data->update_lock); return count; } diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c index 988181e..145f674 100644 --- a/drivers/hwmon/pc87360.c +++ b/drivers/hwmon/pc87360.c @@ -615,6 +615,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr, if (err) return err; + if (val > 255) + return -EINVAL; + data->vrm = val; return count; } diff --git a/drivers/hwmon/tmp103.c b/drivers/hwmon/tmp103.c index c74d2da..e42964f 100644 --- a/drivers/hwmon/tmp103.c +++ b/drivers/hwmon/tmp103.c @@ -131,13 +131,6 @@ static int tmp103_probe(struct i2c_client *client, struct regmap *regmap; int ret; - if (!i2c_check_functionality(client->adapter, - I2C_FUNC_SMBUS_BYTE_DATA)) { - dev_err(&client->dev, - "adapter doesn't support SMBus byte transactions\n"); - return -ENODEV; - } - regmap = devm_regmap_init_i2c(client, &tmp103_regmap_config); if (IS_ERR(regmap)) { dev_err(dev, "failed to allocate register map\n"); diff --git a/drivers/hwmon/vt1211.c b/drivers/hwmon/vt1211.c index 344b22e..3ea57c3 100644 --- a/drivers/hwmon/vt1211.c +++ b/drivers/hwmon/vt1211.c @@ -879,6 +879,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr, if (err) return err; + if (val > 255) + return -EINVAL; + data->vrm = val; return count; diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c index c1726be..2f55973 100644 --- a/drivers/hwmon/w83627hf.c +++ b/drivers/hwmon/w83627hf.c @@ -820,6 +820,9 @@ store_vrm_reg(struct device *dev, struct device_attribute *attr, const char *buf err = kstrtoul(buf, 10, &val); if (err) return err; + + if (val > 255) + return -EINVAL; data->vrm = val; return count; diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c index cb3765f..001df85 100644 --- a/drivers/hwmon/w83791d.c +++ b/drivers/hwmon/w83791d.c @@ -1181,6 +1181,9 @@ static ssize_t store_vrm_reg(struct device *dev, if (err) return err; + if (val > 255) + return -EINVAL; + data->vrm = val; return count; } diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c index 9d63d71..816aa6c 100644 --- a/drivers/hwmon/w83793.c +++ b/drivers/hwmon/w83793.c @@ -353,6 +353,9 @@ store_vrm(struct device *dev, struct device_attribute *attr, if (err) return err; + if (val > 255) + return -EINVAL; + data->vrm = val; return count; } diff --git a/drivers/hwspinlock/Kconfig b/drivers/hwspinlock/Kconfig index 70637d2..3612cb5 100644 --- a/drivers/hwspinlock/Kconfig +++ b/drivers/hwspinlock/Kconfig @@ -10,7 +10,7 @@ menu "Hardware Spinlock drivers" config HWSPINLOCK_OMAP tristate "OMAP Hardware Spinlock device" - depends on ARCH_OMAP4 || SOC_OMAP5 + depends on ARCH_OMAP4 || SOC_OMAP5 || SOC_DRA7XX || SOC_AM33XX || SOC_AM43XX select HWSPINLOCK help Say y here to support the OMAP Hardware Spinlock device (firstly diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c index 292869c..c1e2cd4 100644 --- a/drivers/hwspinlock/omap_hwspinlock.c +++ b/drivers/hwspinlock/omap_hwspinlock.c @@ -98,10 +98,29 @@ static int omap_hwspinlock_probe(struct platform_device *pdev) if (!io_base) return -ENOMEM; + /* + * make sure the module is enabled and clocked before reading + * the module SYSSTATUS register + */ + pm_runtime_enable(&pdev->dev); + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) { + pm_runtime_put_noidle(&pdev->dev); + goto iounmap_base; + } + /* Determine number of locks */ i = readl(io_base + SYSSTATUS_OFFSET); i >>= SPINLOCK_NUMLOCKS_BIT_OFFSET; + /* + * runtime PM will make sure the clock of this module is + * enabled again iff at least one lock is requested + */ + ret = pm_runtime_put(&pdev->dev); + if (ret < 0) + goto iounmap_base; + /* one of the four lsb's must be set, and nothing else */ if (hweight_long(i & 0xf) != 1 || i > 8) { ret = -EINVAL; @@ -121,12 +140,6 @@ static int omap_hwspinlock_probe(struct platform_device *pdev) for (i = 0, hwlock = &bank->lock[0]; i < num_locks; i++, hwlock++) hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i; - /* - * runtime PM will make sure the clock of this module is - * enabled iff at least one lock is requested - */ - pm_runtime_enable(&pdev->dev); - ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops, pdata->base_id, num_locks); if (ret) @@ -135,9 +148,9 @@ static int omap_hwspinlock_probe(struct platform_device *pdev) return 0; reg_fail: - pm_runtime_disable(&pdev->dev); kfree(bank); iounmap_base: + pm_runtime_disable(&pdev->dev); iounmap(io_base); return ret; } diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 2bc7f5a..f6d2961 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -94,14 +94,14 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, port_priv = ib_get_agent_port(device, port_num); if (!port_priv) { - printk(KERN_ERR SPFX "Unable to find port agent\n"); + dev_err(&device->dev, "Unable to find port agent\n"); return; } agent = port_priv->agent[qpn]; ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); if (IS_ERR(ah)) { - printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n", + dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n", PTR_ERR(ah)); return; } @@ -110,7 +110,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_KERNEL); if (IS_ERR(send_buf)) { - printk(KERN_ERR SPFX "ib_create_send_mad error\n"); + dev_err(&device->dev, "ib_create_send_mad error\n"); goto err1; } @@ -125,7 +125,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh, } if (ib_post_send_mad(send_buf, NULL)) { - printk(KERN_ERR SPFX "ib_post_send_mad error\n"); + dev_err(&device->dev, "ib_post_send_mad error\n"); goto err2; } return; @@ -151,7 +151,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { - printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n"); + dev_err(&device->dev, "No memory for ib_agent_port_private\n"); ret = -ENOMEM; goto error1; } @@ -161,7 +161,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) port_priv->agent[0] = ib_register_mad_agent(device, port_num, IB_QPT_SMI, NULL, 0, &agent_send_handler, - NULL, NULL); + NULL, NULL, 0); if (IS_ERR(port_priv->agent[0])) { ret = PTR_ERR(port_priv->agent[0]); goto error2; @@ -172,7 +172,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num) port_priv->agent[1] = ib_register_mad_agent(device, port_num, IB_QPT_GSI, NULL, 0, &agent_send_handler, - NULL, NULL); + NULL, NULL, 0); if (IS_ERR(port_priv->agent[1])) { ret = PTR_ERR(port_priv->agent[1]); goto error3; @@ -202,7 +202,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num) port_priv = __ib_get_agent_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); - printk(KERN_ERR SPFX "Port %d not found\n", port_num); + dev_err(&device->dev, "Port %d not found\n", port_num); return -ENODEV; } list_del(&port_priv->port_list); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c323917..e28a494 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3753,7 +3753,7 @@ static void cm_add_one(struct ib_device *ib_device) struct cm_port *port; struct ib_mad_reg_req reg_req = { .mgmt_class = IB_MGMT_CLASS_CM, - .mgmt_class_version = IB_CM_CLASS_VERSION + .mgmt_class_version = IB_CM_CLASS_VERSION, }; struct ib_port_modify port_modify = { .set_port_cap_mask = IB_PORT_CM_SUP @@ -3801,7 +3801,8 @@ static void cm_add_one(struct ib_device *ib_device) 0, cm_send_handler, cm_recv_handler, - port); + port, + 0); if (IS_ERR(port->mad_agent)) goto error2; diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 3d2e489..ff9163d 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -46,6 +46,7 @@ #include <linux/completion.h> #include <linux/slab.h> #include <linux/module.h> +#include <linux/sysctl.h> #include <rdma/iw_cm.h> #include <rdma/ib_addr.h> @@ -65,6 +66,20 @@ struct iwcm_work { struct list_head free_list; }; +static unsigned int default_backlog = 256; + +static struct ctl_table_header *iwcm_ctl_table_hdr; +static struct ctl_table iwcm_ctl_table[] = { + { + .procname = "default_backlog", + .data = &default_backlog, + .maxlen = sizeof(default_backlog), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + /* * The following services provide a mechanism for pre-allocating iwcm_work * elements. The design pre-allocates them based on the cm_id type: @@ -425,6 +440,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + if (!backlog) + backlog = default_backlog; + ret = alloc_work_entries(cm_id_priv, backlog); if (ret) return ret; @@ -1030,11 +1048,20 @@ static int __init iw_cm_init(void) if (!iwcm_wq) return -ENOMEM; + iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", + iwcm_ctl_table); + if (!iwcm_ctl_table_hdr) { + pr_err("iw_cm: couldn't register sysctl paths\n"); + destroy_workqueue(iwcm_wq); + return -ENOMEM; + } + return 0; } static void __exit iw_cm_cleanup(void) { + unregister_net_sysctl_table(iwcm_ctl_table_hdr); destroy_workqueue(iwcm_wq); } diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index ab31f13..74c30f4 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -33,6 +33,9 @@ * SOFTWARE. * */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/module.h> @@ -195,7 +198,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, u8 rmpp_version, ib_mad_send_handler send_handler, ib_mad_recv_handler recv_handler, - void *context) + void *context, + u32 registration_flags) { struct ib_mad_port_private *port_priv; struct ib_mad_agent *ret = ERR_PTR(-EINVAL); @@ -211,68 +215,109 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Validate parameters */ qpn = get_spl_qp_index(qp_type); - if (qpn == -1) + if (qpn == -1) { + dev_notice(&device->dev, + "ib_register_mad_agent: invalid QP Type %d\n", + qp_type); goto error1; + } - if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) + if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { + dev_notice(&device->dev, + "ib_register_mad_agent: invalid RMPP Version %u\n", + rmpp_version); goto error1; + } /* Validate MAD registration request if supplied */ if (mad_reg_req) { - if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) + if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { + dev_notice(&device->dev, + "ib_register_mad_agent: invalid Class Version %u\n", + mad_reg_req->mgmt_class_version); goto error1; - if (!recv_handler) + } + if (!recv_handler) { + dev_notice(&device->dev, + "ib_register_mad_agent: no recv_handler\n"); goto error1; + } if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { /* * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only * one in this range currently allowed */ if (mad_reg_req->mgmt_class != - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n", + mad_reg_req->mgmt_class); goto error1; + } } else if (mad_reg_req->mgmt_class == 0) { /* * Class 0 is reserved in IBA and is used for * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE */ + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid Mgmt Class 0\n"); goto error1; } else if (is_vendor_class(mad_reg_req->mgmt_class)) { /* * If class is in "new" vendor range, * ensure supplied OUI is not zero */ - if (!is_vendor_oui(mad_reg_req->oui)) + if (!is_vendor_oui(mad_reg_req->oui)) { + dev_notice(&device->dev, + "ib_register_mad_agent: No OUI specified for class 0x%x\n", + mad_reg_req->mgmt_class); goto error1; + } } /* Make sure class supplied is consistent with RMPP */ if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { - if (rmpp_version) + if (rmpp_version) { + dev_notice(&device->dev, + "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n", + mad_reg_req->mgmt_class); goto error1; + } } + /* Make sure class supplied is consistent with QP type */ if (qp_type == IB_QPT_SMI) { if ((mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED) && (mad_reg_req->mgmt_class != - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n", + mad_reg_req->mgmt_class); goto error1; + } } else { if ((mad_reg_req->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) || (mad_reg_req->mgmt_class == - IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) + IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { + dev_notice(&device->dev, + "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n", + mad_reg_req->mgmt_class); goto error1; + } } } else { /* No registration request supplied */ if (!send_handler) goto error1; + if (registration_flags & IB_MAD_USER_RMPP) + goto error1; } /* Validate device and port */ port_priv = ib_get_mad_port(device, port_num); if (!port_priv) { + dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n"); ret = ERR_PTR(-ENODEV); goto error1; } @@ -280,6 +325,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Verify the QP requested is supported. For example, Ethernet devices * will not have QP0 */ if (!port_priv->qp_info[qpn].qp) { + dev_notice(&device->dev, + "ib_register_mad_agent: QP %d not supported\n", qpn); ret = ERR_PTR(-EPROTONOSUPPORT); goto error1; } @@ -316,6 +363,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, mad_agent_priv->agent.context = context; mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; mad_agent_priv->agent.port_num = port_num; + mad_agent_priv->agent.flags = registration_flags; spin_lock_init(&mad_agent_priv->lock); INIT_LIST_HEAD(&mad_agent_priv->send_list); INIT_LIST_HEAD(&mad_agent_priv->wait_list); @@ -706,7 +754,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, smi_handle_dr_smp_send(smp, device->node_type, port_num) == IB_SMI_DISCARD) { ret = -EINVAL; - printk(KERN_ERR PFX "Invalid directed route\n"); + dev_err(&device->dev, "Invalid directed route\n"); goto out; } @@ -718,7 +766,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, local = kmalloc(sizeof *local, GFP_ATOMIC); if (!local) { ret = -ENOMEM; - printk(KERN_ERR PFX "No memory for ib_mad_local_private\n"); + dev_err(&device->dev, "No memory for ib_mad_local_private\n"); goto out; } local->mad_priv = NULL; @@ -726,7 +774,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv, mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC); if (!mad_priv) { ret = -ENOMEM; - printk(KERN_ERR PFX "No memory for local response MAD\n"); + dev_err(&device->dev, "No memory for local response MAD\n"); kfree(local); goto out; } @@ -837,9 +885,9 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); if (!seg) { - printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem " - "alloc failed for len %zd, gfp %#x\n", - sizeof (*seg) + seg_size, gfp_mask); + dev_err(&send_buf->mad_agent->device->dev, + "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n", + sizeof (*seg) + seg_size, gfp_mask); free_send_rmpp_list(send_wr); return -ENOMEM; } @@ -862,6 +910,12 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr, return 0; } +int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent) +{ + return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP); +} +EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent); + struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, u32 remote_qpn, u16 pkey_index, int rmpp_active, @@ -878,10 +932,12 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, pad = get_pad_size(hdr_len, data_len); message_size = hdr_len + data_len + pad; - if ((!mad_agent->rmpp_version && - (rmpp_active || message_size > sizeof(struct ib_mad))) || - (!rmpp_active && message_size > sizeof(struct ib_mad))) - return ERR_PTR(-EINVAL); + if (ib_mad_kernel_rmpp_agent(mad_agent)) { + if (!rmpp_active && message_size > sizeof(struct ib_mad)) + return ERR_PTR(-EINVAL); + } else + if (rmpp_active || message_size > sizeof(struct ib_mad)) + return ERR_PTR(-EINVAL); size = rmpp_active ? hdr_len : sizeof(struct ib_mad); buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); @@ -1135,7 +1191,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf, &mad_agent_priv->send_list); spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - if (mad_agent_priv->agent.rmpp_version) { + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { ret = ib_send_rmpp_mad(mad_send_wr); if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED) ret = ib_send_mad(mad_send_wr); @@ -1199,7 +1255,8 @@ EXPORT_SYMBOL(ib_redirect_mad_qp); int ib_process_mad_wc(struct ib_mad_agent *mad_agent, struct ib_wc *wc) { - printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n"); + dev_err(&mad_agent->device->dev, + "ib_process_mad_wc() not implemented yet\n"); return 0; } EXPORT_SYMBOL(ib_process_mad_wc); @@ -1211,7 +1268,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method, for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { if ((*method)->agent[i]) { - printk(KERN_ERR PFX "Method %d already in use\n", i); + pr_err("Method %d already in use\n", i); return -EINVAL; } } @@ -1223,8 +1280,7 @@ static int allocate_method_table(struct ib_mad_mgmt_method_table **method) /* Allocate management method table */ *method = kzalloc(sizeof **method, GFP_ATOMIC); if (!*method) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_mgmt_method_table\n"); + pr_err("No memory for ib_mad_mgmt_method_table\n"); return -ENOMEM; } @@ -1319,8 +1375,8 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req, /* Allocate management class table for "new" class version */ *class = kzalloc(sizeof **class, GFP_ATOMIC); if (!*class) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_mgmt_class_table\n"); + dev_err(&agent_priv->agent.device->dev, + "No memory for ib_mad_mgmt_class_table\n"); ret = -ENOMEM; goto error1; } @@ -1386,8 +1442,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, /* Allocate mgmt vendor class table for "new" class version */ vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); if (!vendor) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_mgmt_vendor_class_table\n"); + dev_err(&agent_priv->agent.device->dev, + "No memory for ib_mad_mgmt_vendor_class_table\n"); goto error1; } @@ -1397,8 +1453,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, /* Allocate table for this management vendor class */ vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); if (!vendor_class) { - printk(KERN_ERR PFX "No memory for " - "ib_mad_mgmt_vendor_class\n"); + dev_err(&agent_priv->agent.device->dev, + "No memory for ib_mad_mgmt_vendor_class\n"); goto error2; } @@ -1429,7 +1485,7 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req, goto check_in_use; } } - printk(KERN_ERR PFX "All OUI slots in use\n"); + dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n"); goto error3; check_in_use: @@ -1640,9 +1696,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv, if (mad_agent->agent.recv_handler) atomic_inc(&mad_agent->refcount); else { - printk(KERN_NOTICE PFX "No receive handler for client " - "%p on port %d\n", - &mad_agent->agent, port_priv->port_num); + dev_notice(&port_priv->device->dev, + "No receive handler for client %p on port %d\n", + &mad_agent->agent, port_priv->port_num); mad_agent = NULL; } } @@ -1658,8 +1714,8 @@ static int validate_mad(struct ib_mad *mad, u32 qp_num) /* Make sure MAD base version is understood */ if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) { - printk(KERN_ERR PFX "MAD received with unsupported base " - "version %d\n", mad->mad_hdr.base_version); + pr_err("MAD received with unsupported base version %d\n", + mad->mad_hdr.base_version); goto out; } @@ -1685,6 +1741,7 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv, rmpp_mad = (struct ib_rmpp_mad *)mad_hdr; return !mad_agent_priv->agent.rmpp_version || + !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) || !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE) || (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); @@ -1812,7 +1869,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); - if (mad_agent_priv->agent.rmpp_version) { + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, mad_recv_wc); if (!mad_recv_wc) { @@ -1827,23 +1884,39 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); if (!mad_send_wr) { spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - ib_free_recv_mad(mad_recv_wc); - deref_mad_agent(mad_agent_priv); - return; - } - ib_mark_mad_done(mad_send_wr); - spin_unlock_irqrestore(&mad_agent_priv->lock, flags); + if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) + && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class) + && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr) + & IB_MGMT_RMPP_FLAG_ACTIVE)) { + /* user rmpp is in effect + * and this is an active RMPP MAD + */ + mad_recv_wc->wc->wr_id = 0; + mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, + mad_recv_wc); + atomic_dec(&mad_agent_priv->refcount); + } else { + /* not user rmpp, revert to normal behavior and + * drop the mad */ + ib_free_recv_mad(mad_recv_wc); + deref_mad_agent(mad_agent_priv); + return; + } + } else { + ib_mark_mad_done(mad_send_wr); + spin_unlock_irqrestore(&mad_agent_priv->lock, flags); - /* Defined behavior is to complete response before request */ - mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf; - mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, - mad_recv_wc); - atomic_dec(&mad_agent_priv->refcount); + /* Defined behavior is to complete response before request */ + mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf; + mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, + mad_recv_wc); + atomic_dec(&mad_agent_priv->refcount); - mad_send_wc.status = IB_WC_SUCCESS; - mad_send_wc.vendor_err = 0; - mad_send_wc.send_buf = &mad_send_wr->send_buf; - ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); + mad_send_wc.status = IB_WC_SUCCESS; + mad_send_wc.vendor_err = 0; + mad_send_wc.send_buf = &mad_send_wr->send_buf; + ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); + } } else { mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, mad_recv_wc); @@ -1911,8 +1984,8 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); if (!response) { - printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory " - "for response buffer\n"); + dev_err(&port_priv->device->dev, + "ib_mad_recv_done_handler no memory for response buffer\n"); goto out; } @@ -2083,7 +2156,7 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, mad_agent_priv = mad_send_wr->mad_agent_priv; spin_lock_irqsave(&mad_agent_priv->lock, flags); - if (mad_agent_priv->agent.rmpp_version) { + if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) { ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc); if (ret == IB_RMPP_RESULT_CONSUMED) goto done; @@ -2176,7 +2249,8 @@ retry: ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, &bad_send_wr); if (ret) { - printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); + dev_err(&port_priv->device->dev, + "ib_post_send failed: %d\n", ret); mad_send_wr = queued_send_wr; wc->status = IB_WC_LOC_QP_OP_ERR; goto retry; @@ -2248,8 +2322,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv, IB_QP_STATE | IB_QP_CUR_STATE); kfree(attr); if (ret) - printk(KERN_ERR PFX "mad_error_handler - " - "ib_modify_qp to RTS : %d\n", ret); + dev_err(&port_priv->device->dev, + "mad_error_handler - ib_modify_qp to RTS : %d\n", + ret); else mark_sends_for_retry(qp_info); } @@ -2408,7 +2483,8 @@ static void local_completions(struct work_struct *work) if (local->mad_priv) { recv_mad_agent = local->recv_mad_agent; if (!recv_mad_agent) { - printk(KERN_ERR PFX "No receive MAD agent for local completion\n"); + dev_err(&mad_agent_priv->agent.device->dev, + "No receive MAD agent for local completion\n"); free_mad = 1; goto local_send_completion; } @@ -2476,7 +2552,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); - if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { + if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) { ret = ib_retry_rmpp(mad_send_wr); switch (ret) { case IB_RMPP_RESULT_UNHANDLED: @@ -2589,7 +2665,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, } else { mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); if (!mad_priv) { - printk(KERN_ERR PFX "No memory for receive buffer\n"); + dev_err(&qp_info->port_priv->device->dev, + "No memory for receive buffer\n"); ret = -ENOMEM; break; } @@ -2625,7 +2702,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, sizeof mad_priv->header, DMA_FROM_DEVICE); kmem_cache_free(ib_mad_cache, mad_priv); - printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret); + dev_err(&qp_info->port_priv->device->dev, + "ib_post_recv failed: %d\n", ret); break; } } while (post); @@ -2681,7 +2759,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) { - printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n"); + dev_err(&port_priv->device->dev, + "Couldn't kmalloc ib_qp_attr\n"); return -ENOMEM; } @@ -2705,16 +2784,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY); if (ret) { - printk(KERN_ERR PFX "Couldn't change QP%d state to " - "INIT: %d\n", i, ret); + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to INIT: %d\n", + i, ret); goto out; } attr->qp_state = IB_QPS_RTR; ret = ib_modify_qp(qp, attr, IB_QP_STATE); if (ret) { - printk(KERN_ERR PFX "Couldn't change QP%d state to " - "RTR: %d\n", i, ret); + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to RTR: %d\n", + i, ret); goto out; } @@ -2722,16 +2803,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) attr->sq_psn = IB_MAD_SEND_Q_PSN; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN); if (ret) { - printk(KERN_ERR PFX "Couldn't change QP%d state to " - "RTS: %d\n", i, ret); + dev_err(&port_priv->device->dev, + "Couldn't change QP%d state to RTS: %d\n", + i, ret); goto out; } } ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); if (ret) { - printk(KERN_ERR PFX "Failed to request completion " - "notification: %d\n", ret); + dev_err(&port_priv->device->dev, + "Failed to request completion notification: %d\n", + ret); goto out; } @@ -2741,7 +2824,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); if (ret) { - printk(KERN_ERR PFX "Couldn't post receive WRs\n"); + dev_err(&port_priv->device->dev, + "Couldn't post receive WRs\n"); goto out; } } @@ -2755,7 +2839,8 @@ static void qp_event_handler(struct ib_event *event, void *qp_context) struct ib_mad_qp_info *qp_info = qp_context; /* It's worse than that! He's dead, Jim! */ - printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n", + dev_err(&qp_info->port_priv->device->dev, + "Fatal error (%d) on MAD QP (%d)\n", event->event, qp_info->qp->qp_num); } @@ -2801,8 +2886,9 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info, qp_init_attr.event_handler = qp_event_handler; qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr); if (IS_ERR(qp_info->qp)) { - printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n", - get_spl_qp_index(qp_type)); + dev_err(&qp_info->port_priv->device->dev, + "Couldn't create ib_mad QP%d\n", + get_spl_qp_index(qp_type)); ret = PTR_ERR(qp_info->qp); goto error; } @@ -2840,7 +2926,7 @@ static int ib_mad_port_open(struct ib_device *device, /* Create new device info */ port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); if (!port_priv) { - printk(KERN_ERR PFX "No memory for ib_mad_port_private\n"); + dev_err(&device->dev, "No memory for ib_mad_port_private\n"); return -ENOMEM; } @@ -2860,21 +2946,21 @@ static int ib_mad_port_open(struct ib_device *device, ib_mad_thread_completion_handler, NULL, port_priv, cq_size, 0); if (IS_ERR(port_priv->cq)) { - printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n"); + dev_err(&device->dev, "Couldn't create ib_mad CQ\n"); ret = PTR_ERR(port_priv->cq); goto error3; } port_priv->pd = ib_alloc_pd(device); if (IS_ERR(port_priv->pd)) { - printk(KERN_ERR PFX "Couldn't create ib_mad PD\n"); + dev_err(&device->dev, "Couldn't create ib_mad PD\n"); ret = PTR_ERR(port_priv->pd); goto error4; } port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(port_priv->mr)) { - printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n"); + dev_err(&device->dev, "Couldn't get ib_mad DMA MR\n"); ret = PTR_ERR(port_priv->mr); goto error5; } @@ -2902,7 +2988,7 @@ static int ib_mad_port_open(struct ib_device *device, ret = ib_mad_port_start(port_priv); if (ret) { - printk(KERN_ERR PFX "Couldn't start port\n"); + dev_err(&device->dev, "Couldn't start port\n"); goto error9; } @@ -2946,7 +3032,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num) port_priv = __ib_get_mad_port(device, port_num); if (port_priv == NULL) { spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); - printk(KERN_ERR PFX "Port %d not found\n", port_num); + dev_err(&device->dev, "Port %d not found\n", port_num); return -ENODEV; } list_del_init(&port_priv->port_list); @@ -2984,14 +3070,12 @@ static void ib_mad_init_device(struct ib_device *device) for (i = start; i <= end; i++) { if (ib_mad_port_open(device, i)) { - printk(KERN_ERR PFX "Couldn't open %s port %d\n", - device->name, i); + dev_err(&device->dev, "Couldn't open port %d\n", i); goto error; } if (ib_agent_port_open(device, i)) { - printk(KERN_ERR PFX "Couldn't open %s port %d " - "for agents\n", - device->name, i); + dev_err(&device->dev, + "Couldn't open port %d for agents\n", i); goto error_agent; } } @@ -2999,20 +3083,17 @@ static void ib_mad_init_device(struct ib_device *device) error_agent: if (ib_mad_port_close(device, i)) - printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, i); + dev_err(&device->dev, "Couldn't close port %d\n", i); error: i--; while (i >= start) { if (ib_agent_port_close(device, i)) - printk(KERN_ERR PFX "Couldn't close %s port %d " - "for agents\n", - device->name, i); + dev_err(&device->dev, + "Couldn't close port %d for agents\n", i); if (ib_mad_port_close(device, i)) - printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, i); + dev_err(&device->dev, "Couldn't close port %d\n", i); i--; } } @@ -3033,12 +3114,12 @@ static void ib_mad_remove_device(struct ib_device *device) } for (i = 0; i < num_ports; i++, cur_port++) { if (ib_agent_port_close(device, cur_port)) - printk(KERN_ERR PFX "Couldn't close %s port %d " - "for agents\n", - device->name, cur_port); + dev_err(&device->dev, + "Couldn't close port %d for agents\n", + cur_port); if (ib_mad_port_close(device, cur_port)) - printk(KERN_ERR PFX "Couldn't close %s port %d\n", - device->name, cur_port); + dev_err(&device->dev, "Couldn't close port %d\n", + cur_port); } } @@ -3064,7 +3145,7 @@ static int __init ib_mad_init_module(void) SLAB_HWCACHE_ALIGN, NULL); if (!ib_mad_cache) { - printk(KERN_ERR PFX "Couldn't create ib_mad cache\n"); + pr_err("Couldn't create ib_mad cache\n"); ret = -ENOMEM; goto error1; } @@ -3072,7 +3153,7 @@ static int __init ib_mad_init_module(void) INIT_LIST_HEAD(&ib_mad_port_list); if (ib_register_client(&mad_client)) { - printk(KERN_ERR PFX "Couldn't register ib_mad client\n"); + pr_err("Couldn't register ib_mad client\n"); ret = -EINVAL; goto error2; } diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h index 9430ab4..d1a0b0e 100644 --- a/drivers/infiniband/core/mad_priv.h +++ b/drivers/infiniband/core/mad_priv.h @@ -42,9 +42,6 @@ #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> - -#define PFX "ib_mad: " - #define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */ /* QP and CQ parameters */ diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 233eaf5..c38f030 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1184,7 +1184,7 @@ static void ib_sa_add_one(struct ib_device *device) sa_dev->port[i].agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, NULL, 0, send_handler, - recv_handler, sa_dev); + recv_handler, sa_dev, 0); if (IS_ERR(sa_dev->port[i].agent)) goto err; diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 1acb991..928cdd2 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -33,6 +33,8 @@ * SOFTWARE. */ +#define pr_fmt(fmt) "user_mad: " fmt + #include <linux/module.h> #include <linux/init.h> #include <linux/device.h> @@ -504,13 +506,15 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); - if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) { - copy_offset = IB_MGMT_MAD_HDR; - rmpp_active = 0; - } else { + + if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) + && ib_mad_kernel_rmpp_agent(agent)) { copy_offset = IB_MGMT_RMPP_HDR; rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & - IB_MGMT_RMPP_FLAG_ACTIVE; + IB_MGMT_RMPP_FLAG_ACTIVE; + } else { + copy_offset = IB_MGMT_MAD_HDR; + rmpp_active = 0; } data_len = count - hdr_size(file) - hdr_len; @@ -556,14 +560,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, rmpp_mad->mad_hdr.tid = *tid; } - spin_lock_irq(&file->send_lock); - ret = is_duplicate(file, packet); - if (!ret) + if (!ib_mad_kernel_rmpp_agent(agent) + && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class) + && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) { + spin_lock_irq(&file->send_lock); list_add_tail(&packet->list, &file->send_list); - spin_unlock_irq(&file->send_lock); - if (ret) { - ret = -EINVAL; - goto err_msg; + spin_unlock_irq(&file->send_lock); + } else { + spin_lock_irq(&file->send_lock); + ret = is_duplicate(file, packet); + if (!ret) + list_add_tail(&packet->list, &file->send_list); + spin_unlock_irq(&file->send_lock); + if (ret) { + ret = -EINVAL; + goto err_msg; + } } ret = ib_post_send_mad(packet->msg, NULL); @@ -614,6 +626,8 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, mutex_lock(&file->mutex); if (!file->port->ib_dev) { + dev_notice(file->port->dev, + "ib_umad_reg_agent: invalid device\n"); ret = -EPIPE; goto out; } @@ -624,6 +638,9 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, } if (ureq.qpn != 0 && ureq.qpn != 1) { + dev_notice(file->port->dev, + "ib_umad_reg_agent: invalid QPN %d specified\n", + ureq.qpn); ret = -EINVAL; goto out; } @@ -632,11 +649,15 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg, if (!__get_agent(file, agent_id)) goto found; + dev_notice(file->port->dev, + "ib_umad_reg_agent: Max Agents (%u) reached\n", + IB_UMAD_MAX_AGENTS); ret = -ENOMEM; goto out; found: if (ureq.mgmt_class) { + memset(&req, 0, sizeof(req)); req.mgmt_class = ureq.mgmt_class; req.mgmt_class_version = ureq.mgmt_class_version; memcpy(req.oui, ureq.oui, sizeof req.oui); @@ -657,7 +678,7 @@ found: ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, ureq.mgmt_class ? &req : NULL, ureq.rmpp_version, - send_handler, recv_handler, file); + send_handler, recv_handler, file, 0); if (IS_ERR(agent)) { ret = PTR_ERR(agent); agent = NULL; @@ -673,10 +694,11 @@ found: if (!file->already_used) { file->already_used = 1; if (!file->use_pkey_index) { - printk(KERN_WARNING "user_mad: process %s did not enable " - "P_Key index support.\n", current->comm); - printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt " - "has info on the new ABI.\n"); + dev_warn(file->port->dev, + "process %s did not enable P_Key index support.\n", + current->comm); + dev_warn(file->port->dev, + " Documentation/infiniband/user_mad.txt has info on the new ABI.\n"); } } @@ -694,6 +716,119 @@ out: return ret; } +static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg) +{ + struct ib_user_mad_reg_req2 ureq; + struct ib_mad_reg_req req; + struct ib_mad_agent *agent = NULL; + int agent_id; + int ret; + + mutex_lock(&file->port->file_mutex); + mutex_lock(&file->mutex); + + if (!file->port->ib_dev) { + dev_notice(file->port->dev, + "ib_umad_reg_agent2: invalid device\n"); + ret = -EPIPE; + goto out; + } + + if (copy_from_user(&ureq, arg, sizeof(ureq))) { + ret = -EFAULT; + goto out; + } + + if (ureq.qpn != 0 && ureq.qpn != 1) { + dev_notice(file->port->dev, + "ib_umad_reg_agent2: invalid QPN %d specified\n", + ureq.qpn); + ret = -EINVAL; + goto out; + } + + if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) { + dev_notice(file->port->dev, + "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n", + ureq.flags, IB_USER_MAD_REG_FLAGS_CAP); + ret = -EINVAL; + + if (put_user((u32)IB_USER_MAD_REG_FLAGS_CAP, + (u32 __user *) (arg + offsetof(struct + ib_user_mad_reg_req2, flags)))) + ret = -EFAULT; + + goto out; + } + + for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id) + if (!__get_agent(file, agent_id)) + goto found; + + dev_notice(file->port->dev, + "ib_umad_reg_agent2: Max Agents (%u) reached\n", + IB_UMAD_MAX_AGENTS); + ret = -ENOMEM; + goto out; + +found: + if (ureq.mgmt_class) { + memset(&req, 0, sizeof(req)); + req.mgmt_class = ureq.mgmt_class; + req.mgmt_class_version = ureq.mgmt_class_version; + if (ureq.oui & 0xff000000) { + dev_notice(file->port->dev, + "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n", + ureq.oui); + ret = -EINVAL; + goto out; + } + req.oui[2] = ureq.oui & 0x0000ff; + req.oui[1] = (ureq.oui & 0x00ff00) >> 8; + req.oui[0] = (ureq.oui & 0xff0000) >> 16; + memcpy(req.method_mask, ureq.method_mask, + sizeof(req.method_mask)); + } + + agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num, + ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, + ureq.mgmt_class ? &req : NULL, + ureq.rmpp_version, + send_handler, recv_handler, file, + ureq.flags); + if (IS_ERR(agent)) { + ret = PTR_ERR(agent); + agent = NULL; + goto out; + } + + if (put_user(agent_id, + (u32 __user *)(arg + + offsetof(struct ib_user_mad_reg_req2, id)))) { + ret = -EFAULT; + goto out; + } + + if (!file->already_used) { + file->already_used = 1; + file->use_pkey_index = 1; + } + + file->agent[agent_id] = agent; + ret = 0; + +out: + mutex_unlock(&file->mutex); + + if (ret && agent) + ib_unregister_mad_agent(agent); + + mutex_unlock(&file->port->file_mutex); + + return ret; +} + + static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) { struct ib_mad_agent *agent = NULL; @@ -749,6 +884,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd, return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg); case IB_USER_MAD_ENABLE_PKEY: return ib_umad_enable_pkey(filp->private_data); + case IB_USER_MAD_REGISTER_AGENT2: + return ib_umad_reg_agent2(filp->private_data, (void __user *) arg); default: return -ENOIOCTLCMD; } @@ -765,6 +902,8 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd, return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg)); case IB_USER_MAD_ENABLE_PKEY: return ib_umad_enable_pkey(filp->private_data); + case IB_USER_MAD_REGISTER_AGENT2: + return ib_umad_reg_agent2(filp->private_data, compat_ptr(arg)); default: return -ENOIOCTLCMD; } @@ -983,7 +1122,7 @@ static CLASS_ATTR_STRING(abi_version, S_IRUGO, static dev_t overflow_maj; static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS); -static int find_overflow_devnum(void) +static int find_overflow_devnum(struct ib_device *device) { int ret; @@ -991,7 +1130,8 @@ static int find_overflow_devnum(void) ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { - printk(KERN_ERR "user_mad: couldn't register dynamic device number\n"); + dev_err(&device->dev, + "couldn't register dynamic device number\n"); return ret; } } @@ -1014,7 +1154,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); if (devnum >= IB_UMAD_MAX_PORTS) { spin_unlock(&port_lock); - devnum = find_overflow_devnum(); + devnum = find_overflow_devnum(device); if (devnum < 0) return -1; @@ -1200,14 +1340,14 @@ static int __init ib_umad_init(void) ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, "infiniband_mad"); if (ret) { - printk(KERN_ERR "user_mad: couldn't register device number\n"); + pr_err("couldn't register device number\n"); goto out; } umad_class = class_create(THIS_MODULE, "infiniband_mad"); if (IS_ERR(umad_class)) { ret = PTR_ERR(umad_class); - printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n"); + pr_err("couldn't create class infiniband_mad\n"); goto out_chrdev; } @@ -1215,13 +1355,13 @@ static int __init ib_umad_init(void) ret = class_create_file(umad_class, &class_attr_abi_version.attr); if (ret) { - printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); + pr_err("couldn't create abi_version attribute\n"); goto out_class; } ret = ib_register_client(&umad_client); if (ret) { - printk(KERN_ERR "user_mad: couldn't register ib_umad client\n"); + pr_err("couldn't register ib_umad client\n"); goto out_class; } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index a283274..643c08a 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -221,6 +221,7 @@ IB_UVERBS_DECLARE_CMD(query_port); IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); +IB_UVERBS_DECLARE_CMD(rereg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); IB_UVERBS_DECLARE_CMD(alloc_mw); IB_UVERBS_DECLARE_CMD(dealloc_mw); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index ea6203e..0600c50 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1002,6 +1002,99 @@ err_free: return ret; } +ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_rereg_mr cmd; + struct ib_uverbs_rereg_mr_resp resp; + struct ib_udata udata; + struct ib_pd *pd = NULL; + struct ib_mr *mr; + struct ib_pd *old_pd; + int ret; + struct ib_uobject *uobj; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); + + if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) + return -EINVAL; + + if ((cmd.flags & IB_MR_REREG_TRANS) && + (!cmd.start || !cmd.hca_va || 0 >= cmd.length || + (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) + return -EINVAL; + + uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, + file->ucontext); + + if (!uobj) + return -EINVAL; + + mr = uobj->object; + + if (cmd.flags & IB_MR_REREG_ACCESS) { + ret = ib_check_mr_access(cmd.access_flags); + if (ret) + goto put_uobjs; + } + + if (cmd.flags & IB_MR_REREG_PD) { + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto put_uobjs; + } + } + + if (atomic_read(&mr->usecnt)) { + ret = -EBUSY; + goto put_uobj_pd; + } + + old_pd = mr->pd; + ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start, + cmd.length, cmd.hca_va, + cmd.access_flags, pd, &udata); + if (!ret) { + if (cmd.flags & IB_MR_REREG_PD) { + atomic_inc(&pd->usecnt); + mr->pd = pd; + atomic_dec(&old_pd->usecnt); + } + } else { + goto put_uobj_pd; + } + + memset(&resp, 0, sizeof(resp)); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + else + ret = in_len; + +put_uobj_pd: + if (cmd.flags & IB_MR_REREG_PD) + put_pd_read(pd); + +put_uobjs: + + put_uobj_write(mr->uobject); + + return ret; +} + ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 08219fb..c73b22a 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -87,6 +87,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c index 49e0e85..1b63185 100644 --- a/drivers/infiniband/hw/amso1100/c2_cq.c +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -260,11 +260,14 @@ static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq) mq->msg_pool.host, dma_unmap_addr(mq, mapping)); } -static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size, - int msg_size) +static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, + size_t q_size, size_t msg_size) { u8 *pool_start; + if (q_size > SIZE_MAX / msg_size) + return -EINVAL; + pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size, &mq->host_dma, GFP_KERNEL); if (!pool_start) diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index fbe6051..c9df054 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -227,6 +227,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) chp = get_chp(dev, qid); if (chp) { + t4_clear_cq_armed(&chp->cq); spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); spin_unlock_irqrestore(&chp->comp_handler_lock, flag); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index c158fcc..41cd688 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1105,7 +1105,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, struct c4iw_cq *schp) { int count; - int flushed; + int rq_flushed, sq_flushed; unsigned long flag; PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); @@ -1123,27 +1123,40 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, c4iw_flush_hw_cq(rchp); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); - flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, flag); - if (flushed) { - spin_lock_irqsave(&rchp->comp_handler_lock, flag); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); - } /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, flag); spin_lock(&qhp->lock); if (schp != rchp) c4iw_flush_hw_cq(schp); - flushed = c4iw_flush_sq(qhp); + sq_flushed = c4iw_flush_sq(qhp); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); - if (flushed) { - spin_lock_irqsave(&schp->comp_handler_lock, flag); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + + if (schp == rchp) { + if (t4_clear_cq_armed(&rchp->cq) && + (rq_flushed || sq_flushed)) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + } else { + if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + if (t4_clear_cq_armed(&schp->cq) && sq_flushed) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } } } diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index df5edfa..c04e513 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -524,6 +524,10 @@ static inline int t4_wq_db_enabled(struct t4_wq *wq) return !wq->rq.queue[wq->rq.size].status.db_off; } +enum t4_cq_flags { + CQ_ARMED = 1, +}; + struct t4_cq { struct t4_cqe *queue; dma_addr_t dma_addr; @@ -544,12 +548,19 @@ struct t4_cq { u16 cidx_inc; u8 gen; u8 error; + unsigned long flags; }; +static inline int t4_clear_cq_armed(struct t4_cq *cq) +{ + return test_and_clear_bit(CQ_ARMED, &cq->flags); +} + static inline int t4_arm_cq(struct t4_cq *cq, int se) { u32 val; + set_bit(CQ_ARMED, &cq->flags); while (cq->cidx_inc > CIDXINC_MASK) { val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) | INGRESSQID(cq->cqid); diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 43f2d04..e890e5b 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -726,7 +726,7 @@ bail: * @dd: the infinipath device * @pkeys: the PKEY table */ -static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) +static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port) { struct ipath_portdata *pd; int i; @@ -759,6 +759,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) } if (changed) { u64 pkey; + struct ib_event event; pkey = (u64) dd->ipath_pkeys[0] | ((u64) dd->ipath_pkeys[1] << 16) | @@ -768,12 +769,17 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) (unsigned long long) pkey); ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, pkey); + + event.event = IB_EVENT_PKEY_CHANGE; + event.device = &dd->verbs_dev->ibdev; + event.element.port_num = port; + ib_dispatch_event(&event); } return 0; } static int recv_subn_set_pkeytable(struct ib_smp *smp, - struct ib_device *ibdev) + struct ib_device *ibdev, u8 port) { u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); __be16 *p = (__be16 *) smp->data; @@ -784,7 +790,7 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp, for (i = 0; i < n; i++) q[i] = be16_to_cpu(p[i]); - if (startpx != 0 || set_pkeys(dev->dd, q) != 0) + if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0) smp->status |= IB_SMP_INVALID_FIELD; return recv_subn_get_pkeytable(smp, ibdev); @@ -1342,7 +1348,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, ret = recv_subn_set_portinfo(smp, ibdev, port_num); goto bail; case IB_SMP_ATTR_PKEY_TABLE: - ret = recv_subn_set_pkeytable(smp, ibdev); + ret = recv_subn_set_pkeytable(smp, ibdev, port_num); goto bail; case IB_SMP_ATTR_SM_INFO: if (dev->port_cap_flags & IB_PORT_SM_DISABLED) { diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 287ad05..82a7dd8 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -891,7 +891,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) agent = ib_register_mad_agent(&dev->ib_dev, p + 1, q ? IB_QPT_GSI : IB_QPT_SMI, NULL, 0, send_handler, - NULL, NULL); + NULL, NULL, 0); if (IS_ERR(agent)) { ret = PTR_ERR(agent); goto err; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0f7027e..e1e558a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -910,8 +910,7 @@ static int __mlx4_ib_default_rules_match(struct ib_qp *qp, const struct default_rules *pdefault_rules = default_table; u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port); - for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++, - pdefault_rules++) { + for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) { __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS]; memset(&field_types, 0, sizeof(field_types)); @@ -965,8 +964,7 @@ static int __mlx4_ib_create_default_rules( int size = 0; int i; - for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/ - sizeof(pdefault_rules->rules_create_list[0]); i++) { + for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) { int ret; union ib_flow_spec ib_spec; switch (pdefault_rules->rules_create_list[i]) { @@ -2007,6 +2005,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_REREG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | @@ -2059,6 +2058,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; + ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 369da3c..e8cad39 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -788,5 +788,9 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach); +int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, + u64 start, u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index cb2a872..9b0e80e 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -144,8 +144,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); + /* Force registering the memory as writable. */ + /* Used for memory re-registeration. HCA protects the access */ mr->umem = ib_umem_get(pd->uobject->context, start, length, - access_flags, 0); + access_flags | IB_ACCESS_LOCAL_WRITE, 0); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; @@ -183,6 +185,90 @@ err_free: return ERR_PTR(err); } +int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, + u64 start, u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(mr->device); + struct mlx4_ib_mr *mmr = to_mmr(mr); + struct mlx4_mpt_entry *mpt_entry; + struct mlx4_mpt_entry **pmpt_entry = &mpt_entry; + int err; + + /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs, + * we assume that the calls can't run concurrently. Otherwise, a + * race exists. + */ + err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry); + + if (err) + return err; + + if (flags & IB_MR_REREG_PD) { + err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry, + to_mpd(pd)->pdn); + + if (err) + goto release_mpt_entry; + } + + if (flags & IB_MR_REREG_ACCESS) { + err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, + convert_access(mr_access_flags)); + + if (err) + goto release_mpt_entry; + } + + if (flags & IB_MR_REREG_TRANS) { + int shift; + int err; + int n; + + mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); + ib_umem_release(mmr->umem); + mmr->umem = ib_umem_get(mr->uobject->context, start, length, + mr_access_flags | + IB_ACCESS_LOCAL_WRITE, + 0); + if (IS_ERR(mmr->umem)) { + err = PTR_ERR(mmr->umem); + mmr->umem = NULL; + goto release_mpt_entry; + } + n = ib_umem_page_count(mmr->umem); + shift = ilog2(mmr->umem->page_size); + + mmr->mmr.iova = virt_addr; + mmr->mmr.size = length; + err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr, + virt_addr, length, n, shift, + *pmpt_entry); + if (err) { + ib_umem_release(mmr->umem); + goto release_mpt_entry; + } + + err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem); + if (err) { + mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); + ib_umem_release(mmr->umem); + goto release_mpt_entry; + } + } + + /* If we couldn't transfer the MR to the HCA, just remember to + * return a failure. But dereg_mr will free the resources. + */ + err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry); + +release_mpt_entry: + mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry); + + return err; +} + int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7efe6e3..8c574b6 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2501,7 +2501,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); for (nreq = 0; wr; nreq++, wr = wr->next) { - if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) { + if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { mlx5_ib_warn(dev, "\n"); err = -EINVAL; *bad_wr = wr; diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index b6f7f45..8881fa3 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -294,7 +294,7 @@ int mthca_create_agents(struct mthca_dev *dev) agent = ib_register_mad_agent(&dev->ib_dev, p + 1, q ? IB_QPT_GSI : IB_QPT_SMI, NULL, 0, send_handler, - NULL, NULL); + NULL, NULL, 0); if (IS_ERR(agent)) { ret = PTR_ERR(agent); goto err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 19011db..b43456a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -40,7 +40,7 @@ #include <be_roce.h> #include "ocrdma_sli.h" -#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u" +#define OCRDMA_ROCE_DRV_VERSION "10.2.287.0u" #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" @@ -137,6 +137,7 @@ struct mqe_ctx { u16 cqe_status; u16 ext_status; bool cmd_done; + bool fw_error_state; }; struct ocrdma_hw_mr { @@ -235,7 +236,10 @@ struct ocrdma_dev { struct list_head entry; struct rcu_head rcu; int id; - u64 stag_arr[OCRDMA_MAX_STAG]; + u64 *stag_arr; + u8 sl; /* service level */ + bool pfc_state; + atomic_t update_sl; u16 pvid; u32 asic_id; @@ -518,4 +522,22 @@ static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev) OCRDMA_SLI_ASIC_GEN_NUM_SHIFT; } +static inline u8 ocrdma_get_pfc_prio(u8 *pfc, u8 prio) +{ + return *(pfc + prio); +} + +static inline u8 ocrdma_get_app_prio(u8 *app_prio, u8 prio) +{ + return *(app_prio + prio); +} + +static inline u8 ocrdma_is_enabled_and_synced(u32 state) +{ /* May also be used to interpret TC-state, QCN-state + * Appl-state and Logical-link-state in future. + */ + return (state & OCRDMA_STATE_FLAG_ENABLED) && + (state & OCRDMA_STATE_FLAG_SYNC); +} + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index d4cc01f..40f8536 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -35,6 +35,8 @@ #include "ocrdma_ah.h" #include "ocrdma_hw.h" +#define OCRDMA_VID_PCP_SHIFT 0xD + static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ib_ah_attr *attr, int pdid) { @@ -55,7 +57,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, if (vlan_tag && (vlan_tag < 0x1000)) { eth.eth_type = cpu_to_be16(0x8100); eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); - vlan_tag |= (attr->sl & 7) << 13; + vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT; eth.vlan_tag = cpu_to_be16(vlan_tag); eth_sz = sizeof(struct ocrdma_eth_vlan); vlan_enabled = true; @@ -100,6 +102,8 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) if (!(attr->ah_flags & IB_AH_GRH)) return ERR_PTR(-EINVAL); + if (atomic_cmpxchg(&dev->update_sl, 1, 0)) + ocrdma_init_service_level(dev); ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 3bbf201..dd35ae5 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -525,7 +525,7 @@ static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev, cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS; cmd->eqn = eq->id; - cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe); + cmd->pdid_cqecnt = cq->size / sizeof(struct ocrdma_mcqe); ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE, cq->dma, PAGE_SIZE_4K); @@ -661,7 +661,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, { struct ocrdma_qp *qp = NULL; struct ocrdma_cq *cq = NULL; - struct ib_event ib_evt = { 0 }; + struct ib_event ib_evt; int cq_event = 0; int qp_event = 1; int srq_event = 0; @@ -674,6 +674,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK]; + memset(&ib_evt, 0, sizeof(ib_evt)); + ib_evt.device = &dev->ibdev; switch (type) { @@ -771,6 +773,10 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev, OCRDMA_AE_PVID_MCQE_TAG_MASK) >> OCRDMA_AE_PVID_MCQE_TAG_SHIFT); break; + + case OCRDMA_ASYNC_EVENT_COS_VALUE: + atomic_set(&dev->update_sl, 1); + break; default: /* Not interested evts. */ break; @@ -962,8 +968,12 @@ static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev) msecs_to_jiffies(30000)); if (status) return 0; - else + else { + dev->mqe_ctx.fw_error_state = true; + pr_err("%s(%d) mailbox timeout: fw not responding\n", + __func__, dev->id); return -1; + } } /* issue a mailbox command on the MQ */ @@ -975,6 +985,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) struct ocrdma_mbx_rsp *rsp = NULL; mutex_lock(&dev->mqe_ctx.lock); + if (dev->mqe_ctx.fw_error_state) + goto mbx_err; ocrdma_post_mqe(dev, mqe); status = ocrdma_wait_mqe_cmpl(dev); if (status) @@ -1078,7 +1090,8 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT; attr->max_mw = rsp->max_mw; attr->max_mr = rsp->max_mr; - attr->max_mr_size = ~0ull; + attr->max_mr_size = ((u64)rsp->max_mr_size_hi << 32) | + rsp->max_mr_size_lo; attr->max_fmr = 0; attr->max_pages_per_frmr = rsp->max_pages_per_frmr; attr->max_num_mr_pbl = rsp->max_num_mr_pbl; @@ -1252,7 +1265,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va; hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs; - dev->hba_port_num = hba_attribs->phy_port; + dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv & + OCRDMA_HBA_ATTRB_PTNUM_MASK) + >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT; strncpy(dev->model_number, hba_attribs->controller_model_number, 31); } @@ -1302,7 +1317,8 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed) goto mbx_err; rsp = (struct ocrdma_get_link_speed_rsp *)cmd; - *lnk_speed = rsp->phys_port_speed; + *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK) + >> OCRDMA_PHY_PS_SHIFT; mbx_err: kfree(cmd); @@ -1328,11 +1344,16 @@ static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev) goto mbx_err; rsp = (struct ocrdma_get_phy_info_rsp *)cmd; - dev->phy.phy_type = le16_to_cpu(rsp->phy_type); + dev->phy.phy_type = + (rsp->ityp_ptyp & OCRDMA_PHY_TYPE_MASK); + dev->phy.interface_type = + (rsp->ityp_ptyp & OCRDMA_IF_TYPE_MASK) + >> OCRDMA_IF_TYPE_SHIFT; dev->phy.auto_speeds_supported = - le16_to_cpu(rsp->auto_speeds_supported); + (rsp->fspeed_aspeed & OCRDMA_ASPEED_SUPP_MASK); dev->phy.fixed_speeds_supported = - le16_to_cpu(rsp->fixed_speeds_supported); + (rsp->fspeed_aspeed & OCRDMA_FSPEED_SUPP_MASK) + >> OCRDMA_FSPEED_SUPP_SHIFT; mbx_err: kfree(cmd); return status; @@ -1457,8 +1478,8 @@ static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev) pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va; for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) { - pbes[i].pa_lo = (u32) (pa & 0xffffffff); - pbes[i].pa_hi = (u32) upper_32_bits(pa); + pbes[i].pa_lo = (u32)cpu_to_le32(pa & 0xffffffff); + pbes[i].pa_hi = (u32)cpu_to_le32(upper_32_bits(pa)); pa += PAGE_SIZE; } cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF); @@ -1501,6 +1522,7 @@ static void ocrdma_mbx_delete_ah_tbl(struct ocrdma_dev *dev) ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va, dev->av_tbl.pa); + dev->av_tbl.va = NULL; dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va, dev->av_tbl.pbl.pa); kfree(cmd); @@ -1624,14 +1646,16 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP << OCRDMA_CREATE_CQ_TYPE_SHIFT; cq->phase_change = false; - cmd->cmd.cqe_count = (cq->len / cqe_size); + cmd->cmd.pdid_cqecnt = (cq->len / cqe_size); } else { - cmd->cmd.cqe_count = (cq->len / cqe_size) - 1; + cmd->cmd.pdid_cqecnt = (cq->len / cqe_size) - 1; cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID; cq->phase_change = true; } - cmd->cmd.pd_id = pd_id; /* valid only for v3 */ + /* pd_id valid only for v3 */ + cmd->cmd.pdid_cqecnt |= (pd_id << + OCRDMA_CREATE_CQ_CMD_PDID_SHIFT); ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size); status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); if (status) @@ -2206,7 +2230,8 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs, OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK; qp->rq_cq = cq; - if (pd->dpp_enabled && pd->num_dpp_qp) { + if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp && + (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) { ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq, dpp_cq_id); } @@ -2264,6 +2289,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, if ((ah_attr->ah_flags & IB_AH_GRH) == 0) return -EINVAL; + if (atomic_cmpxchg(&qp->dev->update_sl, 1, 0)) + ocrdma_init_service_level(qp->dev); cmd->params.tclass_sq_psn |= (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT); cmd->params.rnt_rc_sl_fl |= @@ -2297,6 +2324,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, cmd->params.vlan_dmac_b4_to_b5 |= vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; + cmd->params.rnt_rc_sl_fl |= + (qp->dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT; } return 0; } @@ -2604,6 +2633,168 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq) return status; } +static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype, + struct ocrdma_dcbx_cfg *dcbxcfg) +{ + int status = 0; + dma_addr_t pa; + struct ocrdma_mqe cmd; + + struct ocrdma_get_dcbx_cfg_req *req = NULL; + struct ocrdma_get_dcbx_cfg_rsp *rsp = NULL; + struct pci_dev *pdev = dev->nic_info.pdev; + struct ocrdma_mqe_sge *mqe_sge = cmd.u.nonemb_req.sge; + + memset(&cmd, 0, sizeof(struct ocrdma_mqe)); + cmd.hdr.pyld_len = max_t (u32, sizeof(struct ocrdma_get_dcbx_cfg_rsp), + sizeof(struct ocrdma_get_dcbx_cfg_req)); + req = dma_alloc_coherent(&pdev->dev, cmd.hdr.pyld_len, &pa, GFP_KERNEL); + if (!req) { + status = -ENOMEM; + goto mem_err; + } + + cmd.hdr.spcl_sge_cnt_emb |= (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) & + OCRDMA_MQE_HDR_SGE_CNT_MASK; + mqe_sge->pa_lo = (u32) (pa & 0xFFFFFFFFUL); + mqe_sge->pa_hi = (u32) upper_32_bits(pa); + mqe_sge->len = cmd.hdr.pyld_len; + + memset(req, 0, sizeof(struct ocrdma_get_dcbx_cfg_req)); + ocrdma_init_mch(&req->hdr, OCRDMA_CMD_GET_DCBX_CONFIG, + OCRDMA_SUBSYS_DCBX, cmd.hdr.pyld_len); + req->param_type = ptype; + + status = ocrdma_mbx_cmd(dev, &cmd); + if (status) + goto mbx_err; + + rsp = (struct ocrdma_get_dcbx_cfg_rsp *)req; + ocrdma_le32_to_cpu(rsp, sizeof(struct ocrdma_get_dcbx_cfg_rsp)); + memcpy(dcbxcfg, &rsp->cfg, sizeof(struct ocrdma_dcbx_cfg)); + +mbx_err: + dma_free_coherent(&pdev->dev, cmd.hdr.pyld_len, req, pa); +mem_err: + return status; +} + +#define OCRDMA_MAX_SERVICE_LEVEL_INDEX 0x08 +#define OCRDMA_DEFAULT_SERVICE_LEVEL 0x05 + +static int ocrdma_parse_dcbxcfg_rsp(struct ocrdma_dev *dev, int ptype, + struct ocrdma_dcbx_cfg *dcbxcfg, + u8 *srvc_lvl) +{ + int status = -EINVAL, indx, slindx; + int ventry_cnt; + struct ocrdma_app_parameter *app_param; + u8 valid, proto_sel; + u8 app_prio, pfc_prio; + u16 proto; + + if (!(dcbxcfg->tcv_aev_opv_st & OCRDMA_DCBX_STATE_MASK)) { + pr_info("%s ocrdma%d DCBX is disabled\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); + goto out; + } + + if (!ocrdma_is_enabled_and_synced(dcbxcfg->pfc_state)) { + pr_info("%s ocrdma%d priority flow control(%s) is %s%s\n", + dev_name(&dev->nic_info.pdev->dev), dev->id, + (ptype > 0 ? "operational" : "admin"), + (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_ENABLED) ? + "enabled" : "disabled", + (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_SYNC) ? + "" : ", not sync'ed"); + goto out; + } else { + pr_info("%s ocrdma%d priority flow control is enabled and sync'ed\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); + } + + ventry_cnt = (dcbxcfg->tcv_aev_opv_st >> + OCRDMA_DCBX_APP_ENTRY_SHIFT) + & OCRDMA_DCBX_STATE_MASK; + + for (indx = 0; indx < ventry_cnt; indx++) { + app_param = &dcbxcfg->app_param[indx]; + valid = (app_param->valid_proto_app >> + OCRDMA_APP_PARAM_VALID_SHIFT) + & OCRDMA_APP_PARAM_VALID_MASK; + proto_sel = (app_param->valid_proto_app + >> OCRDMA_APP_PARAM_PROTO_SEL_SHIFT) + & OCRDMA_APP_PARAM_PROTO_SEL_MASK; + proto = app_param->valid_proto_app & + OCRDMA_APP_PARAM_APP_PROTO_MASK; + + if ( + valid && proto == OCRDMA_APP_PROTO_ROCE && + proto_sel == OCRDMA_PROTO_SELECT_L2) { + for (slindx = 0; slindx < + OCRDMA_MAX_SERVICE_LEVEL_INDEX; slindx++) { + app_prio = ocrdma_get_app_prio( + (u8 *)app_param->app_prio, + slindx); + pfc_prio = ocrdma_get_pfc_prio( + (u8 *)dcbxcfg->pfc_prio, + slindx); + + if (app_prio && pfc_prio) { + *srvc_lvl = slindx; + status = 0; + goto out; + } + } + if (slindx == OCRDMA_MAX_SERVICE_LEVEL_INDEX) { + pr_info("%s ocrdma%d application priority not set for 0x%x protocol\n", + dev_name(&dev->nic_info.pdev->dev), + dev->id, proto); + } + } + } + +out: + return status; +} + +void ocrdma_init_service_level(struct ocrdma_dev *dev) +{ + int status = 0, indx; + struct ocrdma_dcbx_cfg dcbxcfg; + u8 srvc_lvl = OCRDMA_DEFAULT_SERVICE_LEVEL; + int ptype = OCRDMA_PARAMETER_TYPE_OPER; + + for (indx = 0; indx < 2; indx++) { + status = ocrdma_mbx_get_dcbx_config(dev, ptype, &dcbxcfg); + if (status) { + pr_err("%s(): status=%d\n", __func__, status); + ptype = OCRDMA_PARAMETER_TYPE_ADMIN; + continue; + } + + status = ocrdma_parse_dcbxcfg_rsp(dev, ptype, + &dcbxcfg, &srvc_lvl); + if (status) { + ptype = OCRDMA_PARAMETER_TYPE_ADMIN; + continue; + } + + break; + } + + if (status) + pr_info("%s ocrdma%d service level default\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); + else + pr_info("%s ocrdma%d service level %d\n", + dev_name(&dev->nic_info.pdev->dev), dev->id, + srvc_lvl); + + dev->pfc_state = ocrdma_is_enabled_and_synced(dcbxcfg.pfc_state); + dev->sl = srvc_lvl; +} + int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) { int i; @@ -2709,13 +2900,15 @@ int ocrdma_init_hw(struct ocrdma_dev *dev) goto conf_err; status = ocrdma_mbx_get_phy_info(dev); if (status) - goto conf_err; + goto info_attrb_err; status = ocrdma_mbx_get_ctrl_attribs(dev); if (status) - goto conf_err; + goto info_attrb_err; return 0; +info_attrb_err: + ocrdma_mbx_delete_ah_tbl(dev); conf_err: ocrdma_destroy_mq(dev); mq_err: diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index e513f72..6eed8f19 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -135,4 +135,6 @@ int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq); int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset); char *port_speed_string(struct ocrdma_dev *dev); +void ocrdma_init_service_level(struct ocrdma_dev *); + #endif /* __OCRDMA_HW_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 7c504e0..256a06b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -324,6 +324,11 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev) if (!dev->qp_tbl) goto alloc_err; } + + dev->stag_arr = kzalloc(sizeof(u64) * OCRDMA_MAX_STAG, GFP_KERNEL); + if (dev->stag_arr == NULL) + goto alloc_err; + spin_lock_init(&dev->av_tbl.lock); spin_lock_init(&dev->flush_q_lock); return 0; @@ -334,6 +339,7 @@ alloc_err: static void ocrdma_free_resources(struct ocrdma_dev *dev) { + kfree(dev->stag_arr); kfree(dev->qp_tbl); kfree(dev->cq_tbl); kfree(dev->sgid_tbl); @@ -353,15 +359,25 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, { struct ocrdma_dev *dev = dev_get_drvdata(device); - return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]); + return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]); +} + +static ssize_t show_hca_type(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct ocrdma_dev *dev = dev_get_drvdata(device); + + return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL); static struct device_attribute *ocrdma_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver + &dev_attr_fw_ver, + &dev_attr_hca_type }; static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) @@ -372,6 +388,58 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]); } +static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev, + struct net_device *net) +{ + struct in_device *in_dev; + union ib_gid gid; + in_dev = in_dev_get(net); + if (in_dev) { + for_ifa(in_dev) { + ipv6_addr_set_v4mapped(ifa->ifa_address, + (struct in6_addr *)&gid); + ocrdma_add_sgid(dev, &gid); + } + endfor_ifa(in_dev); + in_dev_put(in_dev); + } +} + +static void ocrdma_init_ipv6_gids(struct ocrdma_dev *dev, + struct net_device *net) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *in6_dev; + union ib_gid *pgid; + struct inet6_ifaddr *ifp; + in6_dev = in6_dev_get(net); + if (in6_dev) { + read_lock_bh(&in6_dev->lock); + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { + pgid = (union ib_gid *)&ifp->addr; + ocrdma_add_sgid(dev, pgid); + } + read_unlock_bh(&in6_dev->lock); + in6_dev_put(in6_dev); + } +#endif +} + +static void ocrdma_init_gid_table(struct ocrdma_dev *dev) +{ + struct net_device *net_dev; + + for_each_netdev(&init_net, net_dev) { + struct net_device *real_dev = rdma_vlan_dev_real_dev(net_dev) ? + rdma_vlan_dev_real_dev(net_dev) : net_dev; + + if (real_dev == dev->nic_info.netdev) { + ocrdma_init_ipv4_gids(dev, net_dev); + ocrdma_init_ipv6_gids(dev, net_dev); + } + } +} + static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) { int status = 0, i; @@ -399,6 +467,8 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) if (status) goto alloc_err; + ocrdma_init_service_level(dev); + ocrdma_init_gid_table(dev); status = ocrdma_register_device(dev); if (status) goto alloc_err; @@ -508,6 +578,12 @@ static int ocrdma_close(struct ocrdma_dev *dev) return 0; } +static void ocrdma_shutdown(struct ocrdma_dev *dev) +{ + ocrdma_close(dev); + ocrdma_remove(dev); +} + /* event handling via NIC driver ensures that all the NIC specific * initialization done before RoCE driver notifies * event to stack. @@ -521,6 +597,9 @@ static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event) case BE_DEV_DOWN: ocrdma_close(dev); break; + case BE_DEV_SHUTDOWN: + ocrdma_shutdown(dev); + break; } } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 96c9ee6..904989e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -44,35 +44,39 @@ enum { #define OCRDMA_SUBSYS_ROCE 10 enum { OCRDMA_CMD_QUERY_CONFIG = 1, - OCRDMA_CMD_ALLOC_PD, - OCRDMA_CMD_DEALLOC_PD, - - OCRDMA_CMD_CREATE_AH_TBL, - OCRDMA_CMD_DELETE_AH_TBL, - - OCRDMA_CMD_CREATE_QP, - OCRDMA_CMD_QUERY_QP, - OCRDMA_CMD_MODIFY_QP, - OCRDMA_CMD_DELETE_QP, - - OCRDMA_CMD_RSVD1, - OCRDMA_CMD_ALLOC_LKEY, - OCRDMA_CMD_DEALLOC_LKEY, - OCRDMA_CMD_REGISTER_NSMR, - OCRDMA_CMD_REREGISTER_NSMR, - OCRDMA_CMD_REGISTER_NSMR_CONT, - OCRDMA_CMD_QUERY_NSMR, - OCRDMA_CMD_ALLOC_MW, - OCRDMA_CMD_QUERY_MW, - - OCRDMA_CMD_CREATE_SRQ, - OCRDMA_CMD_QUERY_SRQ, - OCRDMA_CMD_MODIFY_SRQ, - OCRDMA_CMD_DELETE_SRQ, - - OCRDMA_CMD_ATTACH_MCAST, - OCRDMA_CMD_DETACH_MCAST, - OCRDMA_CMD_GET_RDMA_STATS, + OCRDMA_CMD_ALLOC_PD = 2, + OCRDMA_CMD_DEALLOC_PD = 3, + + OCRDMA_CMD_CREATE_AH_TBL = 4, + OCRDMA_CMD_DELETE_AH_TBL = 5, + + OCRDMA_CMD_CREATE_QP = 6, + OCRDMA_CMD_QUERY_QP = 7, + OCRDMA_CMD_MODIFY_QP = 8 , + OCRDMA_CMD_DELETE_QP = 9, + + OCRDMA_CMD_RSVD1 = 10, + OCRDMA_CMD_ALLOC_LKEY = 11, + OCRDMA_CMD_DEALLOC_LKEY = 12, + OCRDMA_CMD_REGISTER_NSMR = 13, + OCRDMA_CMD_REREGISTER_NSMR = 14, + OCRDMA_CMD_REGISTER_NSMR_CONT = 15, + OCRDMA_CMD_QUERY_NSMR = 16, + OCRDMA_CMD_ALLOC_MW = 17, + OCRDMA_CMD_QUERY_MW = 18, + + OCRDMA_CMD_CREATE_SRQ = 19, + OCRDMA_CMD_QUERY_SRQ = 20, + OCRDMA_CMD_MODIFY_SRQ = 21, + OCRDMA_CMD_DELETE_SRQ = 22, + + OCRDMA_CMD_ATTACH_MCAST = 23, + OCRDMA_CMD_DETACH_MCAST = 24, + + OCRDMA_CMD_CREATE_RBQ = 25, + OCRDMA_CMD_DESTROY_RBQ = 26, + + OCRDMA_CMD_GET_RDMA_STATS = 27, OCRDMA_CMD_MAX }; @@ -103,7 +107,7 @@ enum { #define OCRDMA_MAX_QP 2048 #define OCRDMA_MAX_CQ 2048 -#define OCRDMA_MAX_STAG 8192 +#define OCRDMA_MAX_STAG 16384 enum { OCRDMA_DB_RQ_OFFSET = 0xE0, @@ -422,7 +426,12 @@ struct ocrdma_ae_qp_mcqe { #define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14 #define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5 -#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3 + +enum ocrdma_async_grp5_events { + OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01, + OCRDMA_ASYNC_EVENT_COS_VALUE = 0x02, + OCRDMA_ASYNC_EVENT_PVID_STATE = 0x03 +}; enum OCRDMA_ASYNC_EVENT_TYPE { OCRDMA_CQ_ERROR = 0x00, @@ -525,8 +534,8 @@ struct ocrdma_mbx_query_config { u32 max_ird_ord_per_qp; u32 max_shared_ird_ord; u32 max_mr; - u32 max_mr_size_lo; u32 max_mr_size_hi; + u32 max_mr_size_lo; u32 max_num_mr_pbl; u32 max_mw; u32 max_fmr; @@ -580,17 +589,26 @@ enum { OCRDMA_FN_MODE_RDMA = 0x4 }; +enum { + OCRDMA_IF_TYPE_MASK = 0xFFFF0000, + OCRDMA_IF_TYPE_SHIFT = 0x10, + OCRDMA_PHY_TYPE_MASK = 0x0000FFFF, + OCRDMA_FUTURE_DETAILS_MASK = 0xFFFF0000, + OCRDMA_FUTURE_DETAILS_SHIFT = 0x10, + OCRDMA_EX_PHY_DETAILS_MASK = 0x0000FFFF, + OCRDMA_FSPEED_SUPP_MASK = 0xFFFF0000, + OCRDMA_FSPEED_SUPP_SHIFT = 0x10, + OCRDMA_ASPEED_SUPP_MASK = 0x0000FFFF +}; + struct ocrdma_get_phy_info_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; - u16 phy_type; - u16 interface_type; + u32 ityp_ptyp; u32 misc_params; - u16 ext_phy_details; - u16 rsvd; - u16 auto_speeds_supported; - u16 fixed_speeds_supported; + u32 ftrdtl_exphydtl; + u32 fspeed_aspeed; u32 future_use[2]; }; @@ -603,19 +621,34 @@ enum { OCRDMA_PHY_SPEED_40GBPS = 0x20 }; +enum { + OCRDMA_PORT_NUM_MASK = 0x3F, + OCRDMA_PT_MASK = 0xC0, + OCRDMA_PT_SHIFT = 0x6, + OCRDMA_LINK_DUP_MASK = 0x0000FF00, + OCRDMA_LINK_DUP_SHIFT = 0x8, + OCRDMA_PHY_PS_MASK = 0x00FF0000, + OCRDMA_PHY_PS_SHIFT = 0x10, + OCRDMA_PHY_PFLT_MASK = 0xFF000000, + OCRDMA_PHY_PFLT_SHIFT = 0x18, + OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000, + OCRDMA_QOS_LNKSP_SHIFT = 0x10, + OCRDMA_LLST_MASK = 0xFF, + OCRDMA_PLFC_MASK = 0x00000400, + OCRDMA_PLFC_SHIFT = 0x8, + OCRDMA_PLRFC_MASK = 0x00000200, + OCRDMA_PLRFC_SHIFT = 0x8, + OCRDMA_PLTFC_MASK = 0x00000100, + OCRDMA_PLTFC_SHIFT = 0x8 +}; struct ocrdma_get_link_speed_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; - u8 pt_port_num; - u8 link_duplex; - u8 phys_port_speed; - u8 phys_port_fault; - u16 rsvd1; - u16 qos_lnk_speed; - u8 logical_lnk_status; - u8 rsvd2[3]; + u32 pflt_pps_ld_pnum; + u32 qos_lsp; + u32 res_lls; }; enum { @@ -666,8 +699,7 @@ struct ocrdma_create_cq_cmd { u32 pgsz_pgcnt; u32 ev_cnt_flags; u32 eqn; - u16 cqe_count; - u16 pd_id; + u32 pdid_cqecnt; u32 rsvd6; struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES]; }; @@ -678,6 +710,10 @@ struct ocrdma_create_cq { }; enum { + OCRDMA_CREATE_CQ_CMD_PDID_SHIFT = 0x10 +}; + +enum { OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF }; @@ -1231,7 +1267,6 @@ struct ocrdma_destroy_srq { enum { OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16), - OCRDMA_PD_MAX_DPP_ENABLED_QP = 8, OCRDMA_DPP_PAGE_SIZE = 4096 }; @@ -1896,12 +1931,62 @@ struct ocrdma_rdma_stats_resp { struct ocrdma_rx_dbg_stats rx_dbg_stats; } __packed; +enum { + OCRDMA_HBA_ATTRB_EPROM_VER_LO_MASK = 0xFF, + OCRDMA_HBA_ATTRB_EPROM_VER_HI_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_EPROM_VER_HI_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_CDBLEN_MASK = 0xFFFF, + OCRDMA_HBA_ATTRB_ASIC_REV_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_ASIC_REV_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_GUID0_MASK = 0xFF000000, + OCRDMA_HBA_ATTRB_GUID0_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_GUID13_MASK = 0xFF, + OCRDMA_HBA_ATTRB_GUID14_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_GUID14_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_GUID15_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_GUID15_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_PCNT_MASK = 0xFF000000, + OCRDMA_HBA_ATTRB_PCNT_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_LDTOUT_MASK = 0xFFFF, + OCRDMA_HBA_ATTRB_ISCSI_VER_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_ISCSI_VER_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_MFUNC_DEV_MASK = 0xFF000000, + OCRDMA_HBA_ATTRB_MFUNC_DEV_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_CV_MASK = 0xFF, + OCRDMA_HBA_ATTRB_HBA_ST_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_HBA_ST_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_MAX_DOMS_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_MAX_DOMS_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_PTNUM_MASK = 0x3F000000, + OCRDMA_HBA_ATTRB_PTNUM_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_PT_MASK = 0xC0000000, + OCRDMA_HBA_ATTRB_PT_SHIFT = 0x1E, + OCRDMA_HBA_ATTRB_ISCSI_FET_MASK = 0xFF, + OCRDMA_HBA_ATTRB_ASIC_GEN_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_ASIC_GEN_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_PCI_VID_MASK = 0xFFFF, + OCRDMA_HBA_ATTRB_PCI_DID_MASK = 0xFFFF0000, + OCRDMA_HBA_ATTRB_PCI_DID_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_PCI_SVID_MASK = 0xFFFF, + OCRDMA_HBA_ATTRB_PCI_SSID_MASK = 0xFFFF0000, + OCRDMA_HBA_ATTRB_PCI_SSID_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_PCI_BUSNUM_MASK = 0xFF, + OCRDMA_HBA_ATTRB_PCI_DEVNUM_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_PCI_DEVNUM_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_PCI_FUNCNUM_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_PCI_FUNCNUM_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_IF_TYPE_MASK = 0xFF000000, + OCRDMA_HBA_ATTRB_IF_TYPE_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_NETFIL_MASK =0xFF +}; struct mgmt_hba_attribs { u8 flashrom_version_string[32]; u8 manufacturer_name[32]; u32 supported_modes; - u32 rsvd0[3]; + u32 rsvd_eprom_verhi_verlo; + u32 mbx_ds_ver; + u32 epfw_ds_ver; u8 ncsi_ver_string[12]; u32 default_extended_timeout; u8 controller_model_number[32]; @@ -1914,34 +1999,26 @@ struct mgmt_hba_attribs { u8 driver_version_string[32]; u8 fw_on_flash_version_string[32]; u32 functionalities_supported; - u16 max_cdblength; - u8 asic_revision; - u8 generational_guid[16]; - u8 hba_port_count; - u16 default_link_down_timeout; - u8 iscsi_ver_min_max; - u8 multifunction_device; - u8 cache_valid; - u8 hba_status; - u8 max_domains_supported; - u8 phy_port; + u32 guid0_asicrev_cdblen; + u8 generational_guid[12]; + u32 portcnt_guid15; + u32 mfuncdev_iscsi_ldtout; + u32 ptpnum_maxdoms_hbast_cv; u32 firmware_post_status; u32 hba_mtu[8]; - u32 rsvd1[4]; + u32 res_asicgen_iscsi_feaures; + u32 rsvd1[3]; }; struct mgmt_controller_attrib { struct mgmt_hba_attribs hba_attribs; - u16 pci_vendor_id; - u16 pci_device_id; - u16 pci_sub_vendor_id; - u16 pci_sub_system_id; - u8 pci_bus_number; - u8 pci_device_number; - u8 pci_function_number; - u8 interface_type; - u64 unique_identifier; - u32 rsvd0[5]; + u32 pci_did_vid; + u32 pci_ssid_svid; + u32 ityp_fnum_devnum_bnum; + u32 uid_hi; + u32 uid_lo; + u32 res_nnetfil; + u32 rsvd0[4]; }; struct ocrdma_get_ctrl_attribs_rsp { @@ -1949,5 +2026,79 @@ struct ocrdma_get_ctrl_attribs_rsp { struct mgmt_controller_attrib ctrl_attribs; }; +#define OCRDMA_SUBSYS_DCBX 0x10 + +enum OCRDMA_DCBX_OPCODE { + OCRDMA_CMD_GET_DCBX_CONFIG = 0x01 +}; + +enum OCRDMA_DCBX_PARAM_TYPE { + OCRDMA_PARAMETER_TYPE_ADMIN = 0x00, + OCRDMA_PARAMETER_TYPE_OPER = 0x01, + OCRDMA_PARAMETER_TYPE_PEER = 0x02 +}; + +enum OCRDMA_DCBX_APP_PROTO { + OCRDMA_APP_PROTO_ROCE = 0x8915 +}; + +enum OCRDMA_DCBX_PROTO { + OCRDMA_PROTO_SELECT_L2 = 0x00, + OCRDMA_PROTO_SELECT_L4 = 0x01 +}; + +enum OCRDMA_DCBX_APP_PARAM { + OCRDMA_APP_PARAM_APP_PROTO_MASK = 0xFFFF, + OCRDMA_APP_PARAM_PROTO_SEL_MASK = 0xFF, + OCRDMA_APP_PARAM_PROTO_SEL_SHIFT = 0x10, + OCRDMA_APP_PARAM_VALID_MASK = 0xFF, + OCRDMA_APP_PARAM_VALID_SHIFT = 0x18 +}; + +enum OCRDMA_DCBX_STATE_FLAGS { + OCRDMA_STATE_FLAG_ENABLED = 0x01, + OCRDMA_STATE_FLAG_ADDVERTISED = 0x02, + OCRDMA_STATE_FLAG_WILLING = 0x04, + OCRDMA_STATE_FLAG_SYNC = 0x08, + OCRDMA_STATE_FLAG_UNSUPPORTED = 0x40000000, + OCRDMA_STATE_FLAG_NEG_FAILD = 0x80000000 +}; + +enum OCRDMA_TCV_AEV_OPV_ST { + OCRDMA_DCBX_TC_SUPPORT_MASK = 0xFF, + OCRDMA_DCBX_TC_SUPPORT_SHIFT = 0x18, + OCRDMA_DCBX_APP_ENTRY_SHIFT = 0x10, + OCRDMA_DCBX_OP_PARAM_SHIFT = 0x08, + OCRDMA_DCBX_STATE_MASK = 0xFF +}; + +struct ocrdma_app_parameter { + u32 valid_proto_app; + u32 oui; + u32 app_prio[2]; +}; + +struct ocrdma_dcbx_cfg { + u32 tcv_aev_opv_st; + u32 tc_state; + u32 pfc_state; + u32 qcn_state; + u32 appl_state; + u32 ll_state; + u32 tc_bw[2]; + u32 tc_prio[8]; + u32 pfc_prio[2]; + struct ocrdma_app_parameter app_param[15]; +}; + +struct ocrdma_get_dcbx_cfg_req { + struct ocrdma_mbx_hdr hdr; + u32 param_type; +} __packed; + +struct ocrdma_get_dcbx_cfg_rsp { + struct ocrdma_mbx_rsp hdr; + struct ocrdma_dcbx_cfg cfg; +} __packed; #endif /* __OCRDMA_SLI_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index edf6211..acb434d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -69,11 +69,11 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) memcpy(&attr->fw_ver, &dev->attr.fw_ver[0], min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver))); ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid); - attr->max_mr_size = ~0ull; + attr->max_mr_size = dev->attr.max_mr_size; attr->page_size_cap = 0xffff000; attr->vendor_id = dev->nic_info.pdev->vendor; attr->vendor_part_id = dev->nic_info.pdev->device; - attr->hw_ver = 0; + attr->hw_ver = dev->asic_id; attr->max_qp = dev->attr.max_qp; attr->max_ah = OCRDMA_MAX_AH; attr->max_qp_wr = dev->attr.max_wqe; @@ -268,7 +268,8 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, pd->dpp_enabled = ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; pd->num_dpp_qp = - pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; + pd->dpp_enabled ? (dev->nic_info.db_page_size / + dev->attr.wqe_size) : 0; } retry: @@ -328,7 +329,10 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) struct ocrdma_pd *pd = uctx->cntxt_pd; struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); - BUG_ON(uctx->pd_in_use); + if (uctx->pd_in_use) { + pr_err("%s(%d) Freeing in use pdid=0x%x.\n", + __func__, dev->id, pd->id); + } uctx->cntxt_pd = NULL; status = _ocrdma_dealloc_pd(dev, pd); return status; @@ -843,6 +847,13 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) if (mr->umem) ib_umem_release(mr->umem); kfree(mr); + + /* Don't stop cleanup, in case FW is unresponsive */ + if (dev->mqe_ctx.fw_error_state) { + status = 0; + pr_err("%s(%d) fw not responding.\n", + __func__, dev->id); + } return status; } @@ -2054,6 +2065,13 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } while (wr) { + if (qp->qp_type == IB_QPT_UD && + (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM)) { + *bad_wr = wr; + status = -EINVAL; + break; + } if (ocrdma_hwq_free_cnt(&qp->sq) == 0 || wr->num_sge > qp->sq.max_sges) { *bad_wr = wr; @@ -2488,6 +2506,11 @@ static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp, *stop = true; expand = false; } + } else if (is_hw_sq_empty(qp)) { + /* Do nothing */ + expand = false; + *polled = false; + *stop = false; } else { *polled = true; expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status); @@ -2593,6 +2616,11 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, *stop = true; expand = false; } + } else if (is_hw_rq_empty(qp)) { + /* Do nothing */ + expand = false; + *polled = false; + *stop = false; } else { *polled = true; expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status); diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 22c720e..636be11 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2476,7 +2476,7 @@ int qib_create_agents(struct qib_ibdev *dev) ibp = &dd->pport[p].ibport_data; agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI, NULL, 0, send_handler, - NULL, NULL); + NULL, NULL, 0); if (IS_ERR(agent)) { ret = PTR_ERR(agent); goto err; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index c639f90..3edce61 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -86,7 +86,6 @@ enum { IPOIB_FLAG_INITIALIZED = 1, IPOIB_FLAG_ADMIN_UP = 2, IPOIB_PKEY_ASSIGNED = 3, - IPOIB_PKEY_STOP = 4, IPOIB_FLAG_SUBINTERFACE = 5, IPOIB_MCAST_RUN = 6, IPOIB_STOP_REAPER = 7, @@ -312,7 +311,6 @@ struct ipoib_dev_priv { struct list_head multicast_list; struct rb_root multicast_tree; - struct delayed_work pkey_poll_task; struct delayed_work mcast_task; struct work_struct carrier_on_task; struct work_struct flush_light; @@ -473,10 +471,11 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); -int ipoib_ib_dev_open(struct net_device *dev); +int ipoib_ib_dev_open(struct net_device *dev, int flush); int ipoib_ib_dev_up(struct net_device *dev); int ipoib_ib_dev_down(struct net_device *dev, int flush); int ipoib_ib_dev_stop(struct net_device *dev, int flush); +void ipoib_pkey_dev_check_presence(struct net_device *dev); int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); void ipoib_dev_cleanup(struct net_device *dev); @@ -532,8 +531,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf); void ipoib_setup(struct net_device *dev); -void ipoib_pkey_poll(struct work_struct *work); -int ipoib_pkey_dev_delay_open(struct net_device *dev); +void ipoib_pkey_open(struct ipoib_dev_priv *priv); void ipoib_drain_cq(struct net_device *dev); void ipoib_set_ethtool_ops(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 5006185..6bd5740 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -281,10 +281,8 @@ void ipoib_delete_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - if (priv->mcg_dentry) - debugfs_remove(priv->mcg_dentry); - if (priv->path_dentry) - debugfs_remove(priv->path_dentry); + debugfs_remove(priv->mcg_dentry); + debugfs_remove(priv->path_dentry); } int ipoib_register_debugfs(void) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 6a7003d..72626c34 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -664,17 +664,18 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx) drain_tx_cq((struct net_device *)ctx); } -int ipoib_ib_dev_open(struct net_device *dev) +int ipoib_ib_dev_open(struct net_device *dev, int flush) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) { - ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey); - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + ipoib_pkey_dev_check_presence(dev); + + if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { + ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey, + (!(priv->pkey & 0x7fff) ? "Invalid" : "not found")); return -1; } - set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = ipoib_init_qp(dev); if (ret) { @@ -705,16 +706,17 @@ int ipoib_ib_dev_open(struct net_device *dev) dev_stop: if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) napi_enable(&priv->napi); - ipoib_ib_dev_stop(dev, 1); + ipoib_ib_dev_stop(dev, flush); return -1; } -static void ipoib_pkey_dev_check_presence(struct net_device *dev) +void ipoib_pkey_dev_check_presence(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - u16 pkey_index = 0; - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) + if (!(priv->pkey & 0x7fff) || + ib_find_pkey(priv->ca, priv->port, priv->pkey, + &priv->pkey_index)) clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); else set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); @@ -745,14 +747,6 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush) clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); netif_carrier_off(dev); - /* Shutdown the P_Key thread if still active */ - if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { - mutex_lock(&pkey_mutex); - set_bit(IPOIB_PKEY_STOP, &priv->flags); - cancel_delayed_work_sync(&priv->pkey_poll_task); - mutex_unlock(&pkey_mutex); - } - ipoib_mcast_stop_thread(dev, flush); ipoib_mcast_dev_flush(dev); @@ -924,7 +918,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) (unsigned long) dev); if (dev->flags & IFF_UP) { - if (ipoib_ib_dev_open(dev)) { + if (ipoib_ib_dev_open(dev, 1)) { ipoib_transport_dev_cleanup(dev); return -ENODEV; } @@ -966,13 +960,27 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv) return 1; } +/* + * returns 0 if pkey value was found in a different slot. + */ +static inline int update_child_pkey(struct ipoib_dev_priv *priv) +{ + u16 old_index = priv->pkey_index; + + priv->pkey_index = 0; + ipoib_pkey_dev_check_presence(priv->dev); + + if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) && + (old_index == priv->pkey_index)) + return 1; + return 0; +} static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, enum ipoib_flush_level level) { struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; - u16 new_index; int result; down_read(&priv->vlan_rwsem); @@ -986,16 +994,20 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, up_read(&priv->vlan_rwsem); - if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) { - /* for non-child devices must check/update the pkey value here */ - if (level == IPOIB_FLUSH_HEAVY && - !test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) - update_parent_pkey(priv); + if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) && + level != IPOIB_FLUSH_HEAVY) { ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); return; } if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { + /* interface is down. update pkey and leave. */ + if (level == IPOIB_FLUSH_HEAVY) { + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) + update_parent_pkey(priv); + else + update_child_pkey(priv); + } ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n"); return; } @@ -1005,20 +1017,13 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, * (parent) devices should always takes what present in pkey index 0 */ if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); - ipoib_ib_dev_down(dev, 0); - ipoib_ib_dev_stop(dev, 0); - if (ipoib_pkey_dev_delay_open(dev)) - return; - } - /* restart QP only if P_Key index is changed */ - if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) && - new_index == priv->pkey_index) { + result = update_child_pkey(priv); + if (result) { + /* restart QP only if P_Key index is changed */ ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n"); return; } - priv->pkey_index = new_index; + } else { result = update_parent_pkey(priv); /* restart QP only if P_Key value changed */ @@ -1038,8 +1043,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev, 0); if (level == IPOIB_FLUSH_HEAVY) { - ipoib_ib_dev_stop(dev, 0); - ipoib_ib_dev_open(dev); + if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) + ipoib_ib_dev_stop(dev, 0); + if (ipoib_ib_dev_open(dev, 0) != 0) + return; + if (netif_queue_stopped(dev)) + netif_start_queue(dev); } /* @@ -1094,54 +1103,4 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) ipoib_transport_dev_cleanup(dev); } -/* - * Delayed P_Key Assigment Interim Support - * - * The following is initial implementation of delayed P_Key assigment - * mechanism. It is using the same approach implemented for the multicast - * group join. The single goal of this implementation is to quickly address - * Bug #2507. This implementation will probably be removed when the P_Key - * change async notification is available. - */ - -void ipoib_pkey_poll(struct work_struct *work) -{ - struct ipoib_dev_priv *priv = - container_of(work, struct ipoib_dev_priv, pkey_poll_task.work); - struct net_device *dev = priv->dev; - - ipoib_pkey_dev_check_presence(dev); - - if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) - ipoib_open(dev); - else { - mutex_lock(&pkey_mutex); - if (!test_bit(IPOIB_PKEY_STOP, &priv->flags)) - queue_delayed_work(ipoib_workqueue, - &priv->pkey_poll_task, - HZ); - mutex_unlock(&pkey_mutex); - } -} - -int ipoib_pkey_dev_delay_open(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = netdev_priv(dev); - - /* Look for the interface pkey value in the IB Port P_Key table and */ - /* set the interface pkey assigment flag */ - ipoib_pkey_dev_check_presence(dev); - /* P_Key value not assigned yet - start polling */ - if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { - mutex_lock(&pkey_mutex); - clear_bit(IPOIB_PKEY_STOP, &priv->flags); - queue_delayed_work(ipoib_workqueue, - &priv->pkey_poll_task, - HZ); - mutex_unlock(&pkey_mutex); - return 1; - } - - return 0; -} diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 4e675f4..1310acf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -108,11 +108,11 @@ int ipoib_open(struct net_device *dev) set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); - if (ipoib_pkey_dev_delay_open(dev)) - return 0; - - if (ipoib_ib_dev_open(dev)) + if (ipoib_ib_dev_open(dev, 1)) { + if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) + return 0; goto err_disable; + } if (ipoib_ib_dev_up(dev)) goto err_stop; @@ -1379,7 +1379,6 @@ void ipoib_setup(struct net_device *dev) INIT_LIST_HEAD(&priv->dead_ahs); INIT_LIST_HEAD(&priv->multicast_list); - INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index eb79739..61ee91d 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -596,20 +596,28 @@ iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, struct iser_conn *ib_conn; struct iscsi_endpoint *ep; - ep = iscsi_create_endpoint(sizeof(*ib_conn)); + ep = iscsi_create_endpoint(0); if (!ep) return ERR_PTR(-ENOMEM); - ib_conn = ep->dd_data; + ib_conn = kzalloc(sizeof(*ib_conn), GFP_KERNEL); + if (!ib_conn) { + err = -ENOMEM; + goto failure; + } + + ep->dd_data = ib_conn; ib_conn->ep = ep; iser_conn_init(ib_conn); - err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, - non_blocking); + err = iser_connect(ib_conn, NULL, dst_addr, non_blocking); if (err) - return ERR_PTR(err); + goto failure; return ep; +failure: + iscsi_destroy_endpoint(ep); + return ERR_PTR(err); } static int @@ -619,15 +627,16 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) int rc; ib_conn = ep->dd_data; - rc = wait_event_interruptible_timeout(ib_conn->wait, - ib_conn->state == ISER_CONN_UP, - msecs_to_jiffies(timeout_ms)); - + rc = wait_for_completion_interruptible_timeout(&ib_conn->up_completion, + msecs_to_jiffies(timeout_ms)); /* if conn establishment failed, return error code to iscsi */ - if (!rc && - (ib_conn->state == ISER_CONN_TERMINATING || - ib_conn->state == ISER_CONN_DOWN)) - rc = -1; + if (rc == 0) { + mutex_lock(&ib_conn->state_mutex); + if (ib_conn->state == ISER_CONN_TERMINATING || + ib_conn->state == ISER_CONN_DOWN) + rc = -1; + mutex_unlock(&ib_conn->state_mutex); + } iser_info("ib conn %p rc = %d\n", ib_conn, rc); @@ -646,19 +655,25 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) ib_conn = ep->dd_data; iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state); + mutex_lock(&ib_conn->state_mutex); iser_conn_terminate(ib_conn); /* - * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop - * call and ISER_CONN_DOWN state before freeing the iser resources. - * otherwise we are safe to free resources immediately. + * if iser_conn and iscsi_conn are bound, we must wait for + * iscsi_conn_stop and flush errors completion before freeing + * the iser resources. Otherwise we are safe to free resources + * immediately. */ if (ib_conn->iscsi_conn) { INIT_WORK(&ib_conn->release_work, iser_release_work); queue_work(release_wq, &ib_conn->release_work); + mutex_unlock(&ib_conn->state_mutex); } else { + ib_conn->state = ISER_CONN_DOWN; + mutex_unlock(&ib_conn->state_mutex); iser_conn_release(ib_conn); } + iscsi_destroy_endpoint(ep); } static umode_t iser_attr_is_visible(int param_type, int param) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 97cd385..c877dad 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -326,7 +326,6 @@ struct iser_conn { struct iser_device *device; /* device context */ struct rdma_cm_id *cma_id; /* CMA ID */ struct ib_qp *qp; /* QP */ - wait_queue_head_t wait; /* waitq for conn/disconn */ unsigned qp_max_recv_dtos; /* num of rx buffers */ unsigned qp_max_recv_dtos_mask; /* above minus 1 */ unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */ @@ -335,6 +334,9 @@ struct iser_conn { char name[ISER_OBJECT_NAME_SIZE]; struct work_struct release_work; struct completion stop_completion; + struct mutex state_mutex; + struct completion flush_completion; + struct completion up_completion; struct list_head conn_list; /* entry in ig conn list */ char *login_buf; @@ -448,8 +450,8 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, - struct sockaddr_in *src_addr, - struct sockaddr_in *dst_addr, + struct sockaddr *src_addr, + struct sockaddr *dst_addr, int non_blocking); int iser_reg_page_vec(struct iser_conn *ib_conn, diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ea01075..3ef167f 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -491,10 +491,9 @@ out_err: } /** - * releases the QP objects, returns 0 on success, - * -1 on failure + * releases the QP object */ -static int iser_free_ib_conn_res(struct iser_conn *ib_conn) +static void iser_free_ib_conn_res(struct iser_conn *ib_conn) { int cq_index; BUG_ON(ib_conn == NULL); @@ -513,8 +512,6 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn) } ib_conn->qp = NULL; - - return 0; } /** @@ -568,31 +565,40 @@ static void iser_device_try_release(struct iser_device *device) mutex_unlock(&ig.device_list_mutex); } +/** + * Called with state mutex held + **/ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, enum iser_ib_conn_state comp, enum iser_ib_conn_state exch) { int ret; - spin_lock_bh(&ib_conn->lock); if ((ret = (ib_conn->state == comp))) ib_conn->state = exch; - spin_unlock_bh(&ib_conn->lock); return ret; } void iser_release_work(struct work_struct *work) { struct iser_conn *ib_conn; + int rc; ib_conn = container_of(work, struct iser_conn, release_work); /* wait for .conn_stop callback */ - wait_for_completion(&ib_conn->stop_completion); + rc = wait_for_completion_timeout(&ib_conn->stop_completion, 30 * HZ); + WARN_ON(rc == 0); /* wait for the qp`s post send and post receive buffers to empty */ - wait_event_interruptible(ib_conn->wait, - ib_conn->state == ISER_CONN_DOWN); + rc = wait_for_completion_timeout(&ib_conn->flush_completion, 30 * HZ); + WARN_ON(rc == 0); + + ib_conn->state = ISER_CONN_DOWN; + + mutex_lock(&ib_conn->state_mutex); + ib_conn->state = ISER_CONN_DOWN; + mutex_unlock(&ib_conn->state_mutex); iser_conn_release(ib_conn); } @@ -604,23 +610,27 @@ void iser_conn_release(struct iser_conn *ib_conn) { struct iser_device *device = ib_conn->device; - BUG_ON(ib_conn->state == ISER_CONN_UP); - mutex_lock(&ig.connlist_mutex); list_del(&ib_conn->conn_list); mutex_unlock(&ig.connlist_mutex); + + mutex_lock(&ib_conn->state_mutex); + BUG_ON(ib_conn->state != ISER_CONN_DOWN); + iser_free_rx_descriptors(ib_conn); iser_free_ib_conn_res(ib_conn); ib_conn->device = NULL; /* on EVENT_ADDR_ERROR there's no device yet for this conn */ if (device != NULL) iser_device_try_release(device); + mutex_unlock(&ib_conn->state_mutex); + /* if cma handler context, the caller actually destroy the id */ if (ib_conn->cma_id != NULL) { rdma_destroy_id(ib_conn->cma_id); ib_conn->cma_id = NULL; } - iscsi_destroy_endpoint(ib_conn->ep); + kfree(ib_conn); } /** @@ -642,22 +652,31 @@ void iser_conn_terminate(struct iser_conn *ib_conn) ib_conn,err); } +/** + * Called with state mutex held + **/ static void iser_connect_error(struct rdma_cm_id *cma_id) { struct iser_conn *ib_conn; ib_conn = (struct iser_conn *)cma_id->context; - ib_conn->state = ISER_CONN_DOWN; - wake_up_interruptible(&ib_conn->wait); } +/** + * Called with state mutex held + **/ static void iser_addr_handler(struct rdma_cm_id *cma_id) { struct iser_device *device; struct iser_conn *ib_conn; int ret; + ib_conn = (struct iser_conn *)cma_id->context; + if (ib_conn->state != ISER_CONN_PENDING) + /* bailout */ + return; + device = iser_device_find_by_ib_device(cma_id); if (!device) { iser_err("device lookup/creation failed\n"); @@ -665,7 +684,6 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) return; } - ib_conn = (struct iser_conn *)cma_id->context; ib_conn->device = device; /* connection T10-PI support */ @@ -689,18 +707,27 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) } } +/** + * Called with state mutex held + **/ static void iser_route_handler(struct rdma_cm_id *cma_id) { struct rdma_conn_param conn_param; int ret; struct iser_cm_hdr req_hdr; + struct iser_conn *ib_conn = (struct iser_conn *)cma_id->context; + struct iser_device *device = ib_conn->device; + + if (ib_conn->state != ISER_CONN_PENDING) + /* bailout */ + return; ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); if (ret) goto failure; memset(&conn_param, 0, sizeof conn_param); - conn_param.responder_resources = 4; + conn_param.responder_resources = device->dev_attr.max_qp_rd_atom; conn_param.initiator_depth = 1; conn_param.retry_count = 7; conn_param.rnr_retry_count = 6; @@ -728,12 +755,16 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id) struct ib_qp_attr attr; struct ib_qp_init_attr init_attr; + ib_conn = (struct iser_conn *)cma_id->context; + if (ib_conn->state != ISER_CONN_PENDING) + /* bailout */ + return; + (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); - ib_conn = (struct iser_conn *)cma_id->context; - if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP)) - wake_up_interruptible(&ib_conn->wait); + ib_conn->state = ISER_CONN_UP; + complete(&ib_conn->up_completion); } static void iser_disconnected_handler(struct rdma_cm_id *cma_id) @@ -752,19 +783,25 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id) iser_err("iscsi_iser connection isn't bound\n"); } - /* Complete the termination process if no posts are pending */ + /* Complete the termination process if no posts are pending. This code + * block also exists in iser_handle_comp_error(), but it is needed here + * for cases of no flushes at all, e.g. discovery over rdma. + */ if (ib_conn->post_recv_buf_count == 0 && (atomic_read(&ib_conn->post_send_buf_count) == 0)) { - ib_conn->state = ISER_CONN_DOWN; - wake_up_interruptible(&ib_conn->wait); + complete(&ib_conn->flush_completion); } } static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { + struct iser_conn *ib_conn; + + ib_conn = (struct iser_conn *)cma_id->context; iser_info("event %d status %d conn %p id %p\n", event->event, event->status, cma_id->context, cma_id); + mutex_lock(&ib_conn->state_mutex); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: iser_addr_handler(cma_id); @@ -785,24 +822,28 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_TIMEWAIT_EXIT: iser_disconnected_handler(cma_id); break; default: iser_err("Unexpected RDMA CM event (%d)\n", event->event); break; } + mutex_unlock(&ib_conn->state_mutex); return 0; } void iser_conn_init(struct iser_conn *ib_conn) { ib_conn->state = ISER_CONN_INIT; - init_waitqueue_head(&ib_conn->wait); ib_conn->post_recv_buf_count = 0; atomic_set(&ib_conn->post_send_buf_count, 0); init_completion(&ib_conn->stop_completion); + init_completion(&ib_conn->flush_completion); + init_completion(&ib_conn->up_completion); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); + mutex_init(&ib_conn->state_mutex); } /** @@ -810,22 +851,21 @@ void iser_conn_init(struct iser_conn *ib_conn) * sleeps until the connection is established or rejected */ int iser_connect(struct iser_conn *ib_conn, - struct sockaddr_in *src_addr, - struct sockaddr_in *dst_addr, + struct sockaddr *src_addr, + struct sockaddr *dst_addr, int non_blocking) { - struct sockaddr *src, *dst; int err = 0; - sprintf(ib_conn->name, "%pI4:%d", - &dst_addr->sin_addr.s_addr, dst_addr->sin_port); + mutex_lock(&ib_conn->state_mutex); + + sprintf(ib_conn->name, "%pISp", dst_addr); + + iser_info("connecting to: %s\n", ib_conn->name); /* the device is known only --after-- address resolution */ ib_conn->device = NULL; - iser_info("connecting to: %pI4, port 0x%x\n", - &dst_addr->sin_addr, dst_addr->sin_port); - ib_conn->state = ISER_CONN_PENDING; ib_conn->cma_id = rdma_create_id(iser_cma_handler, @@ -837,23 +877,21 @@ int iser_connect(struct iser_conn *ib_conn, goto id_failure; } - src = (struct sockaddr *)src_addr; - dst = (struct sockaddr *)dst_addr; - err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000); + err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000); if (err) { iser_err("rdma_resolve_addr failed: %d\n", err); goto addr_failure; } if (!non_blocking) { - wait_event_interruptible(ib_conn->wait, - (ib_conn->state != ISER_CONN_PENDING)); + wait_for_completion_interruptible(&ib_conn->up_completion); if (ib_conn->state != ISER_CONN_UP) { err = -EIO; goto connect_failure; } } + mutex_unlock(&ib_conn->state_mutex); mutex_lock(&ig.connlist_mutex); list_add(&ib_conn->conn_list, &ig.connlist); @@ -865,6 +903,7 @@ id_failure: addr_failure: ib_conn->state = ISER_CONN_DOWN; connect_failure: + mutex_unlock(&ib_conn->state_mutex); iser_conn_release(ib_conn); return err; } @@ -1049,18 +1088,19 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc, if (ib_conn->post_recv_buf_count == 0 && atomic_read(&ib_conn->post_send_buf_count) == 0) { - /* getting here when the state is UP means that the conn is * - * being terminated asynchronously from the iSCSI layer's * - * perspective. */ - if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, - ISER_CONN_TERMINATING)) + /** + * getting here when the state is UP means that the conn is + * being terminated asynchronously from the iSCSI layer's + * perspective. It is safe to peek at the connection state + * since iscsi_conn_failure is allowed to be called twice. + **/ + if (ib_conn->state == ISER_CONN_UP) iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); /* no more non completed posts to the QP, complete the * termination process w.o worrying on disconnect event */ - ib_conn->state = ISER_CONN_DOWN; - wake_up_interruptible(&ib_conn->wait); + complete(&ib_conn->flush_completion); } } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index e3c2c5b..62d2a18 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -130,6 +130,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr); static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); static struct scsi_transport_template *ib_srp_transport_template; +static struct workqueue_struct *srp_remove_wq; static struct ib_client srp_client = { .name = "srp", @@ -731,7 +732,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target) spin_unlock_irq(&target->lock); if (changed) - queue_work(system_long_wq, &target->remove_work); + queue_work(srp_remove_wq, &target->remove_work); return changed; } @@ -1643,10 +1644,14 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) SCSI_SENSE_BUFFERSIZE)); } - if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER)) - scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); - else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER)) + if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER)) scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); + else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER)) + scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt)); + else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER)) + scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); + else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER)) + scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt)); srp_free_req(target, req, scmnd, be32_to_cpu(rsp->req_lim_delta)); @@ -3261,9 +3266,10 @@ static void srp_remove_one(struct ib_device *device) spin_unlock(&host->target_lock); /* - * Wait for target port removal tasks. + * Wait for tl_err and target port removal tasks. */ flush_workqueue(system_long_wq); + flush_workqueue(srp_remove_wq); kfree(host); } @@ -3313,16 +3319,22 @@ static int __init srp_init_module(void) indirect_sg_entries = cmd_sg_entries; } + srp_remove_wq = create_workqueue("srp_remove"); + if (!srp_remove_wq) { + ret = -ENOMEM; + goto out; + } + + ret = -ENOMEM; ib_srp_transport_template = srp_attach_transport(&ib_srp_transport_functions); if (!ib_srp_transport_template) - return -ENOMEM; + goto destroy_wq; ret = class_register(&srp_class); if (ret) { pr_err("couldn't register class infiniband_srp\n"); - srp_release_transport(ib_srp_transport_template); - return ret; + goto release_tr; } ib_sa_register_client(&srp_sa_client); @@ -3330,13 +3342,22 @@ static int __init srp_init_module(void) ret = ib_register_client(&srp_client); if (ret) { pr_err("couldn't register IB client\n"); - srp_release_transport(ib_srp_transport_template); - ib_sa_unregister_client(&srp_sa_client); - class_unregister(&srp_class); - return ret; + goto unreg_sa; } - return 0; +out: + return ret; + +unreg_sa: + ib_sa_unregister_client(&srp_sa_client); + class_unregister(&srp_class); + +release_tr: + srp_release_transport(ib_srp_transport_template); + +destroy_wq: + destroy_workqueue(srp_remove_wq); + goto out; } static void __exit srp_cleanup_module(void) @@ -3345,6 +3366,7 @@ static void __exit srp_cleanup_module(void) ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); srp_release_transport(ib_srp_transport_template); + destroy_workqueue(srp_remove_wq); } module_init(srp_init_module); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index fe09f27..d28a8c2 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -198,6 +198,7 @@ static void srpt_event_handler(struct ib_event_handler *handler, case IB_EVENT_PKEY_CHANGE: case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: + case IB_EVENT_GID_CHANGE: /* Refresh port data asynchronously. */ if (event->element.port_num <= sdev->device->phys_port_cnt) { sport = &sdev->port[event->element.port_num - 1]; @@ -563,7 +564,7 @@ static int srpt_refresh_port(struct srpt_port *sport) ®_req, 0, srpt_mad_send_handler, srpt_mad_recv_handler, - sport); + sport, 0); if (IS_ERR(sport->mad_agent)) { ret = PTR_ERR(sport->mad_agent); sport->mad_agent = NULL; diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 443d03f..8eeab72 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -331,7 +331,7 @@ static int bch_allocator_thread(void *arg) mutex_unlock(&ca->set->bucket_lock); blkdev_issue_discard(ca->bdev, bucket_to_sector(ca->set, bucket), - ca->sb.block_size, GFP_KERNEL, 0); + ca->sb.bucket_size, GFP_KERNEL, 0); mutex_lock(&ca->set->bucket_lock); } diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d2ebcf3..04f7bc2 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -477,9 +477,13 @@ struct gc_stat { * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. * flushing dirty data). + * + * CACHE_SET_RUNNING means all cache devices have been registered and journal + * replay is complete. */ #define CACHE_SET_UNREGISTERING 0 #define CACHE_SET_STOPPING 1 +#define CACHE_SET_RUNNING 2 struct cache_set { struct closure cl; diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 5454164..646fe85 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -1182,7 +1182,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, { uint64_t start_time; bool used_mempool = false; - struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, + struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT, order); if (!out) { struct page *outp; diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 5f6728d..ae96462 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -453,7 +453,7 @@ static inline bool bch_bkey_equal_header(const struct bkey *l, { return (KEY_DIRTY(l) == KEY_DIRTY(r) && KEY_PTRS(l) == KEY_PTRS(r) && - KEY_CSUM(l) == KEY_CSUM(l)); + KEY_CSUM(l) == KEY_CSUM(r)); } /* Keylists */ diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 7347b61..00cde40 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -117,9 +117,9 @@ ({ \ int _r, l = (b)->level - 1; \ bool _w = l <= (op)->lock; \ - struct btree *_child = bch_btree_node_get((b)->c, op, key, l, _w);\ + struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \ + _w, b); \ if (!IS_ERR(_child)) { \ - _child->parent = (b); \ _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \ rw_unlock(_w, _child); \ } else \ @@ -142,7 +142,6 @@ rw_lock(_w, _b, _b->level); \ if (_b == (c)->root && \ _w == insert_lock(op, _b)) { \ - _b->parent = NULL; \ _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ } \ rw_unlock(_w, _b); \ @@ -202,7 +201,7 @@ void bch_btree_node_read_done(struct btree *b) struct bset *i = btree_bset_first(b); struct btree_iter *iter; - iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); + iter = mempool_alloc(b->c->fill_iter, GFP_NOIO); iter->size = b->c->sb.bucket_size / b->c->sb.block_size; iter->used = 0; @@ -421,7 +420,7 @@ static void do_btree_node_write(struct btree *b) SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_sector_offset(&b->keys, i)); - if (!bio_alloc_pages(b->bio, GFP_NOIO)) { + if (!bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) { int j; struct bio_vec *bv; void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); @@ -967,7 +966,8 @@ err: * level and op->lock. */ struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op, - struct bkey *k, int level, bool write) + struct bkey *k, int level, bool write, + struct btree *parent) { int i = 0; struct btree *b; @@ -1002,6 +1002,7 @@ retry: BUG_ON(b->level != level); } + b->parent = parent; b->accessed = 1; for (; i <= b->keys.nsets && b->keys.set[i].size; i++) { @@ -1022,15 +1023,16 @@ retry: return b; } -static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) +static void btree_node_prefetch(struct btree *parent, struct bkey *k) { struct btree *b; - mutex_lock(&c->bucket_lock); - b = mca_alloc(c, NULL, k, level); - mutex_unlock(&c->bucket_lock); + mutex_lock(&parent->c->bucket_lock); + b = mca_alloc(parent->c, NULL, k, parent->level - 1); + mutex_unlock(&parent->c->bucket_lock); if (!IS_ERR_OR_NULL(b)) { + b->parent = parent; bch_btree_node_read(b); rw_unlock(true, b); } @@ -1060,15 +1062,16 @@ static void btree_node_free(struct btree *b) mutex_unlock(&b->c->bucket_lock); } -struct btree *bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, - int level) +struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, + int level, bool wait, + struct btree *parent) { BKEY_PADDED(key) k; struct btree *b = ERR_PTR(-EAGAIN); mutex_lock(&c->bucket_lock); retry: - if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, op != NULL)) + if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait)) goto err; bkey_put(c, &k.key); @@ -1085,6 +1088,7 @@ retry: } b->accessed = 1; + b->parent = parent; bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb)); mutex_unlock(&c->bucket_lock); @@ -1096,14 +1100,21 @@ err_free: err: mutex_unlock(&c->bucket_lock); - trace_bcache_btree_node_alloc_fail(b); + trace_bcache_btree_node_alloc_fail(c); return b; } +static struct btree *bch_btree_node_alloc(struct cache_set *c, + struct btree_op *op, int level, + struct btree *parent) +{ + return __bch_btree_node_alloc(c, op, level, op != NULL, parent); +} + static struct btree *btree_node_alloc_replacement(struct btree *b, struct btree_op *op) { - struct btree *n = bch_btree_node_alloc(b->c, op, b->level); + struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent); if (!IS_ERR_OR_NULL(n)) { mutex_lock(&n->write_lock); bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); @@ -1403,6 +1414,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, BUG_ON(btree_bset_first(new_nodes[0])->keys); btree_node_free(new_nodes[0]); rw_unlock(true, new_nodes[0]); + new_nodes[0] = NULL; for (i = 0; i < nodes; i++) { if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key))) @@ -1516,7 +1528,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); if (k) { r->b = bch_btree_node_get(b->c, op, k, b->level - 1, - true); + true, b); if (IS_ERR(r->b)) { ret = PTR_ERR(r->b); break; @@ -1811,7 +1823,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); if (k) - btree_node_prefetch(b->c, k, b->level - 1); + btree_node_prefetch(b, k); if (p) ret = btree(check_recurse, p, b, op); @@ -1976,12 +1988,12 @@ static int btree_split(struct btree *b, struct btree_op *op, trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); - n2 = bch_btree_node_alloc(b->c, op, b->level); + n2 = bch_btree_node_alloc(b->c, op, b->level, b->parent); if (IS_ERR(n2)) goto err_free1; if (!b->parent) { - n3 = bch_btree_node_alloc(b->c, op, b->level + 1); + n3 = bch_btree_node_alloc(b->c, op, b->level + 1, NULL); if (IS_ERR(n3)) goto err_free2; } diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 91dfa5e..5c391fa 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -242,9 +242,10 @@ void __bch_btree_node_write(struct btree *, struct closure *); void bch_btree_node_write(struct btree *, struct closure *); void bch_btree_set_root(struct btree *); -struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int); +struct btree *__bch_btree_node_alloc(struct cache_set *, struct btree_op *, + int, bool, struct btree *); struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *, - struct bkey *, int, bool); + struct bkey *, int, bool, struct btree *); int bch_btree_insert_check_key(struct btree *, struct btree_op *, struct bkey *); diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 3a0de4c..243de0bf 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -474,9 +474,8 @@ out: return false; } -static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k) +bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k) { - struct btree *b = container_of(bk, struct btree, keys); char buf[80]; if (!KEY_SIZE(k)) @@ -485,16 +484,22 @@ static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k) if (KEY_SIZE(k) > KEY_OFFSET(k)) goto bad; - if (__ptr_invalid(b->c, k)) + if (__ptr_invalid(c, k)) goto bad; return false; bad: bch_extent_to_text(buf, sizeof(buf), k); - cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k)); + cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k)); return true; } +static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k) +{ + struct btree *b = container_of(bk, struct btree, keys); + return __bch_extent_invalid(b->c, k); +} + static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k, unsigned ptr) { diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h index e4e2340..e2ed540 100644 --- a/drivers/md/bcache/extents.h +++ b/drivers/md/bcache/extents.h @@ -9,5 +9,6 @@ struct cache_set; void bch_extent_to_text(char *, size_t, const struct bkey *); bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); +bool __bch_extent_invalid(struct cache_set *, const struct bkey *); #endif /* _BCACHE_EXTENTS_H */ diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 59e8202..fe080ad 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -7,6 +7,7 @@ #include "bcache.h" #include "btree.h" #include "debug.h" +#include "extents.h" #include <trace/events/bcache.h> @@ -189,11 +190,15 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) if (read_bucket(l)) goto bsearch; - if (list_empty(list)) + /* no journal entries on this device? */ + if (l == ca->sb.njournal_buckets) continue; bsearch: + BUG_ON(list_empty(list)); + /* Binary search */ - m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); + m = l; + r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); pr_debug("starting binary search, l %u r %u", l, r); while (l + 1 < r) { @@ -291,15 +296,16 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) for (k = i->j.start; k < bset_bkey_last(&i->j); - k = bkey_next(k)) { - unsigned j; + k = bkey_next(k)) + if (!__bch_extent_invalid(c, k)) { + unsigned j; - for (j = 0; j < KEY_PTRS(k); j++) - if (ptr_available(c, k, j)) - atomic_inc(&PTR_BUCKET(c, k, j)->pin); + for (j = 0; j < KEY_PTRS(k); j++) + if (ptr_available(c, k, j)) + atomic_inc(&PTR_BUCKET(c, k, j)->pin); - bch_initial_mark_key(c, 0, k); - } + bch_initial_mark_key(c, 0, k); + } } } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 15fff4f..62e6e98 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -311,7 +311,8 @@ void bch_data_insert(struct closure *cl) { struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); - trace_bcache_write(op->bio, op->writeback, op->bypass); + trace_bcache_write(op->c, op->inode, op->bio, + op->writeback, op->bypass); bch_keylist_init(&op->insert_keys); bio_get(op->bio); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 926ded8..d4713d0 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -733,8 +733,6 @@ static void bcache_device_detach(struct bcache_device *d) static void bcache_device_attach(struct bcache_device *d, struct cache_set *c, unsigned id) { - BUG_ON(test_bit(CACHE_SET_STOPPING, &c->flags)); - d->id = id; d->c = c; c->devices[id] = d; @@ -927,6 +925,7 @@ static void cached_dev_detach_finish(struct work_struct *w) list_move(&dc->list, &uncached_devices); clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); + clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags); mutex_unlock(&bch_register_lock); @@ -1041,6 +1040,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c) */ atomic_set(&dc->count, 1); + if (bch_cached_dev_writeback_start(dc)) + return -ENOMEM; + if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { bch_sectors_dirty_init(dc); atomic_set(&dc->has_dirty, 1); @@ -1070,7 +1072,8 @@ static void cached_dev_free(struct closure *cl) struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); cancel_delayed_work_sync(&dc->writeback_rate_update); - kthread_stop(dc->writeback_thread); + if (!IS_ERR_OR_NULL(dc->writeback_thread)) + kthread_stop(dc->writeback_thread); mutex_lock(&bch_register_lock); @@ -1081,12 +1084,8 @@ static void cached_dev_free(struct closure *cl) mutex_unlock(&bch_register_lock); - if (!IS_ERR_OR_NULL(dc->bdev)) { - if (dc->bdev->bd_disk) - blk_sync_queue(bdev_get_queue(dc->bdev)); - + if (!IS_ERR_OR_NULL(dc->bdev)) blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); - } wake_up(&unregister_wait); @@ -1213,7 +1212,9 @@ void bch_flash_dev_release(struct kobject *kobj) static void flash_dev_free(struct closure *cl) { struct bcache_device *d = container_of(cl, struct bcache_device, cl); + mutex_lock(&bch_register_lock); bcache_device_free(d); + mutex_unlock(&bch_register_lock); kobject_put(&d->kobj); } @@ -1221,7 +1222,9 @@ static void flash_dev_flush(struct closure *cl) { struct bcache_device *d = container_of(cl, struct bcache_device, cl); + mutex_lock(&bch_register_lock); bcache_device_unlink(d); + mutex_unlock(&bch_register_lock); kobject_del(&d->kobj); continue_at(cl, flash_dev_free, system_wq); } @@ -1277,6 +1280,9 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) if (test_bit(CACHE_SET_STOPPING, &c->flags)) return -EINTR; + if (!test_bit(CACHE_SET_RUNNING, &c->flags)) + return -EPERM; + u = uuid_find_empty(c); if (!u) { pr_err("Can't create volume, no room for UUID"); @@ -1346,8 +1352,11 @@ static void cache_set_free(struct closure *cl) bch_journal_free(c); for_each_cache(ca, c, i) - if (ca) + if (ca) { + ca->set = NULL; + c->cache[ca->sb.nr_this_dev] = NULL; kobject_put(&ca->kobj); + } bch_bset_sort_state_free(&c->sort); free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); @@ -1405,9 +1414,11 @@ static void cache_set_flush(struct closure *cl) if (ca->alloc_thread) kthread_stop(ca->alloc_thread); - cancel_delayed_work_sync(&c->journal.work); - /* flush last journal entry if needed */ - c->journal.work.work.func(&c->journal.work.work); + if (c->journal.cur) { + cancel_delayed_work_sync(&c->journal.work); + /* flush last journal entry if needed */ + c->journal.work.work.func(&c->journal.work.work); + } closure_return(cl); } @@ -1586,7 +1597,7 @@ static void run_cache_set(struct cache_set *c) goto err; err = "error reading btree root"; - c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true); + c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL); if (IS_ERR_OR_NULL(c->root)) goto err; @@ -1661,7 +1672,7 @@ static void run_cache_set(struct cache_set *c) goto err; err = "cannot allocate new btree root"; - c->root = bch_btree_node_alloc(c, NULL, 0); + c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL); if (IS_ERR_OR_NULL(c->root)) goto err; @@ -1697,6 +1708,7 @@ static void run_cache_set(struct cache_set *c) flash_devs_run(c); + set_bit(CACHE_SET_RUNNING, &c->flags); return; err: closure_sync(&cl); @@ -1760,6 +1772,7 @@ found: pr_debug("set version = %llu", c->sb.version); } + kobject_get(&ca->kobj); ca->set = c; ca->set->cache[ca->sb.nr_this_dev] = ca; c->cache_by_alloc[c->caches_loaded++] = ca; @@ -1780,8 +1793,10 @@ void bch_cache_release(struct kobject *kobj) struct cache *ca = container_of(kobj, struct cache, kobj); unsigned i; - if (ca->set) + if (ca->set) { + BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca); ca->set->cache[ca->sb.nr_this_dev] = NULL; + } bio_split_pool_free(&ca->bio_split_hook); @@ -1798,10 +1813,8 @@ void bch_cache_release(struct kobject *kobj) if (ca->sb_bio.bi_inline_vecs[0].bv_page) put_page(ca->sb_bio.bi_io_vec[0].bv_page); - if (!IS_ERR_OR_NULL(ca->bdev)) { - blk_sync_queue(bdev_get_queue(ca->bdev)); + if (!IS_ERR_OR_NULL(ca->bdev)) blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); - } kfree(ca); module_put(THIS_MODULE); @@ -1844,7 +1857,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) } static void register_cache(struct cache_sb *sb, struct page *sb_page, - struct block_device *bdev, struct cache *ca) + struct block_device *bdev, struct cache *ca) { char name[BDEVNAME_SIZE]; const char *err = "cannot allocate memory"; @@ -1877,10 +1890,12 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page, goto err; pr_info("registered cache device %s", bdevname(bdev, name)); +out: + kobject_put(&ca->kobj); return; err: pr_notice("error opening %s: %s", bdevname(bdev, name), err); - kobject_put(&ca->kobj); + goto out; } /* Global interfaces/init */ @@ -1945,10 +1960,12 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (IS_ERR(bdev)) { if (bdev == ERR_PTR(-EBUSY)) { bdev = lookup_bdev(strim(path)); + mutex_lock(&bch_register_lock); if (!IS_ERR(bdev) && bch_is_open(bdev)) err = "device already registered"; else err = "device busy"; + mutex_unlock(&bch_register_lock); } goto err; } diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index ac7d0d1..98df757 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -416,8 +416,8 @@ do { \ average_frequency, frequency_units); \ __print_time_stat(stats, name, \ average_duration, duration_units); \ - __print_time_stat(stats, name, \ - max_duration, duration_units); \ + sysfs_print(name ## _ ##max_duration ## _ ## duration_units, \ + div_u64((stats)->max_duration, NSEC_PER_ ## duration_units));\ \ sysfs_print(name ## _last_ ## frequency_units, (stats)->last \ ? div_s64(local_clock() - (stats)->last, \ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index f4300e4..f1986bc 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -239,7 +239,7 @@ static void read_dirty(struct cached_dev *dc) if (KEY_START(&w->key) != dc->last_read || jiffies_to_msecs(delay) > 50) while (!kthread_should_stop() && delay) - delay = schedule_timeout_uninterruptible(delay); + delay = schedule_timeout_interruptible(delay); dc->last_read = KEY_OFFSET(&w->key); @@ -436,7 +436,7 @@ static int bch_writeback_thread(void *arg) while (delay && !kthread_should_stop() && !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) - delay = schedule_timeout_uninterruptible(delay); + delay = schedule_timeout_interruptible(delay); } } @@ -478,7 +478,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc) dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); } -int bch_cached_dev_writeback_init(struct cached_dev *dc) +void bch_cached_dev_writeback_init(struct cached_dev *dc) { sema_init(&dc->in_flight, 64); init_rwsem(&dc->writeback_lock); @@ -494,14 +494,20 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc) dc->writeback_rate_d_term = 30; dc->writeback_rate_p_term_inverse = 6000; + INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); +} + +int bch_cached_dev_writeback_start(struct cached_dev *dc) +{ dc->writeback_thread = kthread_create(bch_writeback_thread, dc, "bcache_writeback"); if (IS_ERR(dc->writeback_thread)) return PTR_ERR(dc->writeback_thread); - INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate); schedule_delayed_work(&dc->writeback_rate_update, dc->writeback_rate_update_seconds * HZ); + bch_writeback_queue(dc); + return 0; } diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index e2f8598..0a9dab1 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -85,6 +85,7 @@ static inline void bch_writeback_add(struct cached_dev *dc) void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); void bch_sectors_dirty_init(struct cached_dev *dc); -int bch_cached_dev_writeback_init(struct cached_dev *); +void bch_cached_dev_writeback_init(struct cached_dev *); +int bch_cached_dev_writeback_start(struct cached_dev *); #endif diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index d2899e7..0670925 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -330,7 +330,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd) disk_super->discard_root = cpu_to_le64(cmd->discard_root); disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); - disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); + disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE); disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); disk_super->cache_blocks = cpu_to_le32(0); @@ -478,7 +478,7 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd, bool may_format_device) { int r; - cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE, + cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, CACHE_METADATA_CACHE_SIZE, CACHE_MAX_CONCURRENT_LOCKS); if (IS_ERR(cmd->bm)) { diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h index cd70a78..7383c90 100644 --- a/drivers/md/dm-cache-metadata.h +++ b/drivers/md/dm-cache-metadata.h @@ -9,19 +9,17 @@ #include "dm-cache-block-types.h" #include "dm-cache-policy-internal.h" +#include "persistent-data/dm-space-map-metadata.h" /*----------------------------------------------------------------*/ -#define DM_CACHE_METADATA_BLOCK_SIZE 4096 +#define DM_CACHE_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE /* FIXME: remove this restriction */ /* * The metadata device is currently limited in size. - * - * We have one block of index, which can hold 255 index entries. Each - * index entry contains allocation info about 16k metadata blocks. */ -#define DM_CACHE_METADATA_MAX_SECTORS (255 * (1 << 14) * (DM_CACHE_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT))) +#define DM_CACHE_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS /* * A metadata device larger than 16GB triggers a warning. diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 2c63326..1af40ee 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -718,6 +718,22 @@ static int bio_triggers_commit(struct cache *cache, struct bio *bio) return bio->bi_rw & (REQ_FLUSH | REQ_FUA); } +/* + * You must increment the deferred set whilst the prison cell is held. To + * encourage this, we ask for 'cell' to be passed in. + */ +static void inc_ds(struct cache *cache, struct bio *bio, + struct dm_bio_prison_cell *cell) +{ + size_t pb_data_size = get_per_bio_data_size(cache); + struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); + + BUG_ON(!cell); + BUG_ON(pb->all_io_entry); + + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); +} + static void issue(struct cache *cache, struct bio *bio) { unsigned long flags; @@ -737,6 +753,12 @@ static void issue(struct cache *cache, struct bio *bio) spin_unlock_irqrestore(&cache->lock, flags); } +static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell) +{ + inc_ds(cache, bio, cell); + issue(cache, bio); +} + static void defer_writethrough_bio(struct cache *cache, struct bio *bio) { unsigned long flags; @@ -1015,6 +1037,11 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio) dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock); + + /* + * No need to inc_ds() here, since the cell will be held for the + * duration of the io. + */ generic_make_request(bio); } @@ -1115,8 +1142,7 @@ static void check_for_quiesced_migrations(struct cache *cache, return; INIT_LIST_HEAD(&work); - if (pb->all_io_entry) - dm_deferred_entry_dec(pb->all_io_entry, &work); + dm_deferred_entry_dec(pb->all_io_entry, &work); if (!list_empty(&work)) queue_quiesced_migrations(cache, &work); @@ -1252,6 +1278,11 @@ static void process_flush_bio(struct cache *cache, struct bio *bio) else remap_to_cache(cache, bio, 0); + /* + * REQ_FLUSH is not directed at any particular block so we don't + * need to inc_ds(). REQ_FUA's are split into a write + REQ_FLUSH + * by dm-core. + */ issue(cache, bio); } @@ -1301,15 +1332,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio) &cache->stats.read_miss : &cache->stats.write_miss); } -static void issue_cache_bio(struct cache *cache, struct bio *bio, - struct per_bio_data *pb, - dm_oblock_t oblock, dm_cblock_t cblock) -{ - pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); - remap_to_cache_dirty(cache, bio, oblock, cblock); - issue(cache, bio); -} - static void process_bio(struct cache *cache, struct prealloc *structs, struct bio *bio) { @@ -1318,8 +1340,6 @@ static void process_bio(struct cache *cache, struct prealloc *structs, dm_oblock_t block = get_bio_block(cache, bio); struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; struct policy_result lookup_result; - size_t pb_data_size = get_per_bio_data_size(cache); - struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); bool discarded_block = is_discarded_oblock(cache, block); bool passthrough = passthrough_mode(&cache->features); bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); @@ -1359,9 +1379,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, } else { /* FIXME: factor out issue_origin() */ - pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); remap_to_origin_clear_discard(cache, bio, block); - issue(cache, bio); + inc_and_issue(cache, bio, new_ocell); } } else { inc_hit_counter(cache, bio); @@ -1369,20 +1388,21 @@ static void process_bio(struct cache *cache, struct prealloc *structs, if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && !is_dirty(cache, lookup_result.cblock)) { - pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); - issue(cache, bio); - } else - issue_cache_bio(cache, bio, pb, block, lookup_result.cblock); + inc_and_issue(cache, bio, new_ocell); + + } else { + remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); + inc_and_issue(cache, bio, new_ocell); + } } break; case POLICY_MISS: inc_miss_counter(cache, bio); - pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); remap_to_origin_clear_discard(cache, bio, block); - issue(cache, bio); + inc_and_issue(cache, bio, new_ocell); break; case POLICY_NEW: @@ -1501,6 +1521,9 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) bio_list_init(&cache->deferred_flush_bios); spin_unlock_irqrestore(&cache->lock, flags); + /* + * These bios have already been through inc_ds() + */ while ((bio = bio_list_pop(&bios))) submit_bios ? generic_make_request(bio) : bio_io_error(bio); } @@ -1518,6 +1541,9 @@ static void process_deferred_writethrough_bios(struct cache *cache) bio_list_init(&cache->deferred_writethrough_bios); spin_unlock_irqrestore(&cache->lock, flags); + /* + * These bios have already been through inc_ds() + */ while ((bio = bio_list_pop(&bios))) generic_make_request(bio); } @@ -1694,6 +1720,7 @@ static void do_worker(struct work_struct *ws) if (commit_if_needed(cache)) { process_deferred_flush_bios(cache, false); + process_migrations(cache, &cache->need_commit_migrations, migration_failure); /* * FIXME: rollback metadata or just go into a @@ -2406,16 +2433,13 @@ out: return r; } -static int cache_map(struct dm_target *ti, struct bio *bio) +static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell) { - struct cache *cache = ti->private; - int r; dm_oblock_t block = get_bio_block(cache, bio); size_t pb_data_size = get_per_bio_data_size(cache); bool can_migrate = false; bool discarded_block; - struct dm_bio_prison_cell *cell; struct policy_result lookup_result; struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); @@ -2437,15 +2461,15 @@ static int cache_map(struct dm_target *ti, struct bio *bio) /* * Check to see if that block is currently migrating. */ - cell = alloc_prison_cell(cache); - if (!cell) { + *cell = alloc_prison_cell(cache); + if (!*cell) { defer_bio(cache, bio); return DM_MAPIO_SUBMITTED; } - r = bio_detain(cache, block, bio, cell, + r = bio_detain(cache, block, bio, *cell, (cell_free_fn) free_prison_cell, - cache, &cell); + cache, cell); if (r) { if (r < 0) defer_bio(cache, bio); @@ -2458,11 +2482,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio) r = policy_map(cache->policy, block, false, can_migrate, discarded_block, bio, &lookup_result); if (r == -EWOULDBLOCK) { - cell_defer(cache, cell, true); + cell_defer(cache, *cell, true); return DM_MAPIO_SUBMITTED; } else if (r) { DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); + cell_defer(cache, *cell, false); bio_io_error(bio); return DM_MAPIO_SUBMITTED; } @@ -2476,52 +2501,44 @@ static int cache_map(struct dm_target *ti, struct bio *bio) * We need to invalidate this block, so * defer for the worker thread. */ - cell_defer(cache, cell, true); + cell_defer(cache, *cell, true); r = DM_MAPIO_SUBMITTED; } else { - pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); inc_miss_counter(cache, bio); remap_to_origin_clear_discard(cache, bio, block); - - cell_defer(cache, cell, false); } } else { inc_hit_counter(cache, bio); - pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); - if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && !is_dirty(cache, lookup_result.cblock)) remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); else remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); - - cell_defer(cache, cell, false); } break; case POLICY_MISS: inc_miss_counter(cache, bio); - pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); - if (pb->req_nr != 0) { /* * This is a duplicate writethrough io that is no * longer needed because the block has been demoted. */ bio_endio(bio, 0); - cell_defer(cache, cell, false); - return DM_MAPIO_SUBMITTED; - } else { + cell_defer(cache, *cell, false); + r = DM_MAPIO_SUBMITTED; + + } else remap_to_origin_clear_discard(cache, bio, block); - cell_defer(cache, cell, false); - } + break; default: DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, (unsigned) lookup_result.op); + cell_defer(cache, *cell, false); bio_io_error(bio); r = DM_MAPIO_SUBMITTED; } @@ -2529,6 +2546,21 @@ static int cache_map(struct dm_target *ti, struct bio *bio) return r; } +static int cache_map(struct dm_target *ti, struct bio *bio) +{ + int r; + struct dm_bio_prison_cell *cell; + struct cache *cache = ti->private; + + r = __cache_map(cache, bio, &cell); + if (r == DM_MAPIO_REMAPPED) { + inc_ds(cache, bio, cell); + cell_defer(cache, cell, false); + } + + return r; +} + static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) { struct cache *cache = ti->private; @@ -2808,7 +2840,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, residency = policy_residency(cache->policy); DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ", - (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), + (unsigned)DM_CACHE_METADATA_BLOCK_SIZE, (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), (unsigned long long)nr_blocks_metadata, cache->sectors_per_block, @@ -3062,7 +3094,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) */ if (io_opt_sectors < cache->sectors_per_block || do_div(io_opt_sectors, cache->sectors_per_block)) { - blk_limits_io_min(limits, 0); + blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT); blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); } set_discard_limits(cache, limits); @@ -3072,7 +3104,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr, diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 4cba2d8..2785007 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -59,7 +59,7 @@ struct dm_crypt_io { int error; sector_t sector; struct dm_crypt_io *base_io; -}; +} CRYPTO_MINALIGN_ATTR; struct dm_crypt_request { struct convert_context *ctx; @@ -162,6 +162,8 @@ struct crypt_config { */ unsigned int dmreq_start; + unsigned int per_bio_data_size; + unsigned long flags; unsigned int key_size; unsigned int key_parts; /* independent parts in key buffer */ @@ -895,6 +897,15 @@ static void crypt_alloc_req(struct crypt_config *cc, kcryptd_async_done, dmreq_of_req(cc, ctx->req)); } +static void crypt_free_req(struct crypt_config *cc, + struct ablkcipher_request *req, struct bio *base_bio) +{ + struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size); + + if ((struct ablkcipher_request *)(io + 1) != req) + mempool_free(req, cc->req_pool); +} + /* * Encrypt / decrypt data from one bio to another one (can be the same one) */ @@ -1008,12 +1019,9 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone) } } -static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, - struct bio *bio, sector_t sector) +static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc, + struct bio *bio, sector_t sector) { - struct dm_crypt_io *io; - - io = mempool_alloc(cc->io_pool, GFP_NOIO); io->cc = cc; io->base_bio = bio; io->sector = sector; @@ -1021,8 +1029,6 @@ static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, io->base_io = NULL; io->ctx.req = NULL; atomic_set(&io->io_pending, 0); - - return io; } static void crypt_inc_pending(struct dm_crypt_io *io) @@ -1046,8 +1052,9 @@ static void crypt_dec_pending(struct dm_crypt_io *io) return; if (io->ctx.req) - mempool_free(io->ctx.req, cc->req_pool); - mempool_free(io, cc->io_pool); + crypt_free_req(cc, io->ctx.req, base_bio); + if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size)) + mempool_free(io, cc->io_pool); if (likely(!base_io)) bio_endio(base_bio, error); @@ -1255,8 +1262,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) * between fragments, so switch to a new dm_crypt_io structure. */ if (unlikely(!crypt_finished && remaining)) { - new_io = crypt_io_alloc(io->cc, io->base_bio, - sector); + new_io = mempool_alloc(cc->io_pool, GFP_NOIO); + crypt_io_init(new_io, io->cc, io->base_bio, sector); crypt_inc_pending(new_io); crypt_convert_init(cc, &new_io->ctx, NULL, io->base_bio, sector); @@ -1325,7 +1332,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req, if (error < 0) io->error = -EIO; - mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); + crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio); if (!atomic_dec_and_test(&ctx->cc_pending)) return; @@ -1728,6 +1735,10 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } + cc->per_bio_data_size = ti->per_bio_data_size = + sizeof(struct dm_crypt_io) + cc->dmreq_start + + sizeof(struct dm_crypt_request) + cc->iv_size; + cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); if (!cc->page_pool) { ti->error = "Cannot allocate page mempool"; @@ -1824,7 +1835,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } - io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); + io = dm_per_bio_data(bio, cc->per_bio_data_size); + crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); + io->ctx.req = (struct ablkcipher_request *)(io + 1); if (bio_data_dir(io->base_bio) == READ) { if (kcryptd_io_read(io, GFP_NOWAIT)) diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index db404a0..c09359d 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -33,7 +33,6 @@ struct dm_io_client { struct io { unsigned long error_bits; atomic_t count; - struct completion *wait; struct dm_io_client *client; io_notify_fn callback; void *context; @@ -112,28 +111,27 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, * We need an io object to keep track of the number of bios that * have been dispatched for a particular io. *---------------------------------------------------------------*/ -static void dec_count(struct io *io, unsigned int region, int error) +static void complete_io(struct io *io) { - if (error) - set_bit(region, &io->error_bits); + unsigned long error_bits = io->error_bits; + io_notify_fn fn = io->callback; + void *context = io->context; - if (atomic_dec_and_test(&io->count)) { - if (io->vma_invalidate_size) - invalidate_kernel_vmap_range(io->vma_invalidate_address, - io->vma_invalidate_size); + if (io->vma_invalidate_size) + invalidate_kernel_vmap_range(io->vma_invalidate_address, + io->vma_invalidate_size); - if (io->wait) - complete(io->wait); + mempool_free(io, io->client->pool); + fn(error_bits, context); +} - else { - unsigned long r = io->error_bits; - io_notify_fn fn = io->callback; - void *context = io->context; +static void dec_count(struct io *io, unsigned int region, int error) +{ + if (error) + set_bit(region, &io->error_bits); - mempool_free(io, io->client->pool); - fn(r, context); - } - } + if (atomic_dec_and_test(&io->count)) + complete_io(io); } static void endio(struct bio *bio, int error) @@ -376,41 +374,51 @@ static void dispatch_io(int rw, unsigned int num_regions, dec_count(io, 0, 0); } +struct sync_io { + unsigned long error_bits; + struct completion wait; +}; + +static void sync_io_complete(unsigned long error, void *context) +{ + struct sync_io *sio = context; + + sio->error_bits = error; + complete(&sio->wait); +} + static int sync_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, int rw, struct dpages *dp, unsigned long *error_bits) { - /* - * gcc <= 4.3 can't do the alignment for stack variables, so we must - * align it on our own. - * volatile prevents the optimizer from removing or reusing - * "io_" field from the stack frame (allowed in ANSI C). - */ - volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1]; - struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io)); - DECLARE_COMPLETION_ONSTACK(wait); + struct io *io; + struct sync_io sio; if (num_regions > 1 && (rw & RW_MASK) != WRITE) { WARN_ON(1); return -EIO; } + init_completion(&sio.wait); + + io = mempool_alloc(client->pool, GFP_NOIO); io->error_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ - io->wait = &wait; io->client = client; + io->callback = sync_io_complete; + io->context = &sio; io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_size = dp->vma_invalidate_size; dispatch_io(rw, num_regions, where, dp, io, 1); - wait_for_completion_io(&wait); + wait_for_completion_io(&sio.wait); if (error_bits) - *error_bits = io->error_bits; + *error_bits = sio.error_bits; - return io->error_bits ? -EIO : 0; + return sio.error_bits ? -EIO : 0; } static int async_io(struct dm_io_client *client, unsigned int num_regions, @@ -428,7 +436,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io = mempool_alloc(client->pool, GFP_NOIO); io->error_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ - io->wait = NULL; io->client = client; io->callback = fn; io->context = context; @@ -481,9 +488,9 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, * New collapsed (a)synchronous interface. * * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug - * the queue with blk_unplug() some time later or set REQ_SYNC in -io_req->bi_rw. If you fail to do one of these, the IO will be submitted to - * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. + * the queue with blk_unplug() some time later or set REQ_SYNC in io_req->bi_rw. + * If you fail to do one of these, the IO will be submitted to the disk after + * q->unplug_delay, which defaults to 3ms in blk-settings.c. */ int dm_io(struct dm_io_request *io_req, unsigned num_regions, struct dm_io_region *where, unsigned long *sync_error_bits) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index f4167b0..833d7e7 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -373,8 +373,6 @@ static int __must_push_back(struct multipath *m) dm_noflush_suspending(m->ti))); } -#define pg_ready(m) (!(m)->queue_io && !(m)->pg_init_required) - /* * Map cloned requests */ @@ -402,11 +400,11 @@ static int multipath_map(struct dm_target *ti, struct request *clone, if (!__must_push_back(m)) r = -EIO; /* Failed */ goto out_unlock; - } - if (!pg_ready(m)) { + } else if (m->queue_io || m->pg_init_required) { __pg_init_all_paths(m); goto out_unlock; } + if (set_mapinfo(m, map_context) < 0) /* ENOMEM, requeue */ goto out_unlock; diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index 09a688b..50fca46 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -137,13 +137,23 @@ static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr *bit *= sctx->region_table_entry_bits; } +static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long region_nr) +{ + unsigned long region_index; + unsigned bit; + + switch_get_position(sctx, region_nr, ®ion_index, &bit); + + return (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) & + ((1 << sctx->region_table_entry_bits) - 1); +} + /* * Find which path to use at given offset. */ static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset) { - unsigned long region_index; - unsigned bit, path_nr; + unsigned path_nr; sector_t p; p = offset; @@ -152,9 +162,7 @@ static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset) else sector_div(p, sctx->region_size); - switch_get_position(sctx, p, ®ion_index, &bit); - path_nr = (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) & - ((1 << sctx->region_table_entry_bits) - 1); + path_nr = switch_region_table_read(sctx, p); /* This can only happen if the processor uses non-atomic stores. */ if (unlikely(path_nr >= sctx->nr_paths)) @@ -363,7 +371,7 @@ static __always_inline unsigned long parse_hex(const char **string) } static int process_set_region_mappings(struct switch_ctx *sctx, - unsigned argc, char **argv) + unsigned argc, char **argv) { unsigned i; unsigned long region_index = 0; @@ -372,6 +380,51 @@ static int process_set_region_mappings(struct switch_ctx *sctx, unsigned long path_nr; const char *string = argv[i]; + if ((*string & 0xdf) == 'R') { + unsigned long cycle_length, num_write; + + string++; + if (unlikely(*string == ',')) { + DMWARN("invalid set_region_mappings argument: '%s'", argv[i]); + return -EINVAL; + } + cycle_length = parse_hex(&string); + if (unlikely(*string != ',')) { + DMWARN("invalid set_region_mappings argument: '%s'", argv[i]); + return -EINVAL; + } + string++; + if (unlikely(!*string)) { + DMWARN("invalid set_region_mappings argument: '%s'", argv[i]); + return -EINVAL; + } + num_write = parse_hex(&string); + if (unlikely(*string)) { + DMWARN("invalid set_region_mappings argument: '%s'", argv[i]); + return -EINVAL; + } + + if (unlikely(!cycle_length) || unlikely(cycle_length - 1 > region_index)) { + DMWARN("invalid set_region_mappings cycle length: %lu > %lu", + cycle_length - 1, region_index); + return -EINVAL; + } + if (unlikely(region_index + num_write < region_index) || + unlikely(region_index + num_write >= sctx->nr_regions)) { + DMWARN("invalid set_region_mappings region number: %lu + %lu >= %lu", + region_index, num_write, sctx->nr_regions); + return -EINVAL; + } + + while (num_write--) { + region_index++; + path_nr = switch_region_table_read(sctx, region_index - cycle_length); + switch_region_table_write(sctx, region_index, path_nr); + } + + continue; + } + if (*string == ':') region_index++; else { @@ -500,7 +553,7 @@ static int switch_iterate_devices(struct dm_target *ti, static struct target_type switch_target = { .name = "switch", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = switch_ctr, .dtr = switch_dtr, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5f59f1e..f9c6cb8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1386,6 +1386,14 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, return q && !blk_queue_add_random(q); } +static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags); +} + static bool dm_table_all_devices_attribute(struct dm_table *t, iterate_devices_callout_fn func) { @@ -1430,6 +1438,43 @@ static bool dm_table_supports_write_same(struct dm_table *t) return true; } +static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && blk_queue_discard(q); +} + +static bool dm_table_supports_discards(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i = 0; + + /* + * Unless any target used by the table set discards_supported, + * require at least one underlying device to support discards. + * t->devices includes internal dm devices such as mirror logs + * so we need to use iterate_devices here, which targets + * supporting discard selectively must provide. + */ + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (!ti->num_discard_bios) + continue; + + if (ti->discards_supported) + return 1; + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, device_discard_capable, NULL)) + return 1; + } + + return 0; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1464,6 +1509,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (!dm_table_supports_write_same(t)) q->limits.max_write_same_sectors = 0; + if (dm_table_all_devices_attribute(t, queue_supports_sg_merge)) + queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); + else + queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); + dm_table_set_integrity(t); /* @@ -1636,39 +1686,3 @@ void dm_table_run_md_queue_async(struct dm_table *t) } EXPORT_SYMBOL(dm_table_run_md_queue_async); -static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) -{ - struct request_queue *q = bdev_get_queue(dev->bdev); - - return q && blk_queue_discard(q); -} - -bool dm_table_supports_discards(struct dm_table *t) -{ - struct dm_target *ti; - unsigned i = 0; - - /* - * Unless any target used by the table set discards_supported, - * require at least one underlying device to support discards. - * t->devices includes internal dm devices such as mirror logs - * so we need to use iterate_devices here, which targets - * supporting discard selectively must provide. - */ - while (i < dm_table_get_num_targets(t)) { - ti = dm_table_get_target(t, i++); - - if (!ti->num_discard_bios) - continue; - - if (ti->discards_supported) - return 1; - - if (ti->type->iterate_devices && - ti->type->iterate_devices(ti, device_discard_capable, NULL)) - return 1; - } - - return 0; -} diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index fc9c848..4843801 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -227,6 +227,7 @@ struct thin_c { struct list_head list; struct dm_dev *pool_dev; struct dm_dev *origin_dev; + sector_t origin_size; dm_thin_id dev_id; struct pool *pool; @@ -554,11 +555,16 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio, struct dm_thin_new_mapping { struct list_head list; - bool quiesced:1; - bool prepared:1; bool pass_discard:1; bool definitely_not_shared:1; + /* + * Track quiescing, copying and zeroing preparation actions. When this + * counter hits zero the block is prepared and can be inserted into the + * btree. + */ + atomic_t prepare_actions; + int err; struct thin_c *tc; dm_block_t virt_block; @@ -575,43 +581,41 @@ struct dm_thin_new_mapping { bio_end_io_t *saved_bi_end_io; }; -static void __maybe_add_mapping(struct dm_thin_new_mapping *m) +static void __complete_mapping_preparation(struct dm_thin_new_mapping *m) { struct pool *pool = m->tc->pool; - if (m->quiesced && m->prepared) { + if (atomic_dec_and_test(&m->prepare_actions)) { list_add_tail(&m->list, &pool->prepared_mappings); wake_worker(pool); } } -static void copy_complete(int read_err, unsigned long write_err, void *context) +static void complete_mapping_preparation(struct dm_thin_new_mapping *m) { unsigned long flags; - struct dm_thin_new_mapping *m = context; struct pool *pool = m->tc->pool; - m->err = read_err || write_err ? -EIO : 0; - spin_lock_irqsave(&pool->lock, flags); - m->prepared = true; - __maybe_add_mapping(m); + __complete_mapping_preparation(m); spin_unlock_irqrestore(&pool->lock, flags); } +static void copy_complete(int read_err, unsigned long write_err, void *context) +{ + struct dm_thin_new_mapping *m = context; + + m->err = read_err || write_err ? -EIO : 0; + complete_mapping_preparation(m); +} + static void overwrite_endio(struct bio *bio, int err) { - unsigned long flags; struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct dm_thin_new_mapping *m = h->overwrite_mapping; - struct pool *pool = m->tc->pool; m->err = err; - - spin_lock_irqsave(&pool->lock, flags); - m->prepared = true; - __maybe_add_mapping(m); - spin_unlock_irqrestore(&pool->lock, flags); + complete_mapping_preparation(m); } /*----------------------------------------------------------------*/ @@ -821,10 +825,31 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool) return m; } +static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m, + sector_t begin, sector_t end) +{ + int r; + struct dm_io_region to; + + to.bdev = tc->pool_dev->bdev; + to.sector = begin; + to.count = end - begin; + + r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m); + if (r < 0) { + DMERR_LIMIT("dm_kcopyd_zero() failed"); + copy_complete(1, 1, m); + } +} + +/* + * A partial copy also needs to zero the uncopied region. + */ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, struct dm_dev *origin, dm_block_t data_origin, dm_block_t data_dest, - struct dm_bio_prison_cell *cell, struct bio *bio) + struct dm_bio_prison_cell *cell, struct bio *bio, + sector_t len) { int r; struct pool *pool = tc->pool; @@ -835,8 +860,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, m->data_block = data_dest; m->cell = cell; + /* + * quiesce action + copy action + an extra reference held for the + * duration of this function (we may need to inc later for a + * partial zero). + */ + atomic_set(&m->prepare_actions, 3); + if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list)) - m->quiesced = true; + complete_mapping_preparation(m); /* already quiesced */ /* * IO to pool_dev remaps to the pool target's data_dev. @@ -857,20 +889,38 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, from.bdev = origin->bdev; from.sector = data_origin * pool->sectors_per_block; - from.count = pool->sectors_per_block; + from.count = len; to.bdev = tc->pool_dev->bdev; to.sector = data_dest * pool->sectors_per_block; - to.count = pool->sectors_per_block; + to.count = len; r = dm_kcopyd_copy(pool->copier, &from, 1, &to, 0, copy_complete, m); if (r < 0) { - mempool_free(m, pool->mapping_pool); DMERR_LIMIT("dm_kcopyd_copy() failed"); - cell_error(pool, cell); + copy_complete(1, 1, m); + + /* + * We allow the zero to be issued, to simplify the + * error path. Otherwise we'd need to start + * worrying about decrementing the prepare_actions + * counter. + */ + } + + /* + * Do we need to zero a tail region? + */ + if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) { + atomic_inc(&m->prepare_actions); + ll_zero(tc, m, + data_dest * pool->sectors_per_block + len, + (data_dest + 1) * pool->sectors_per_block); } } + + complete_mapping_preparation(m); /* drop our ref */ } static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block, @@ -878,15 +928,8 @@ static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block, struct dm_bio_prison_cell *cell, struct bio *bio) { schedule_copy(tc, virt_block, tc->pool_dev, - data_origin, data_dest, cell, bio); -} - -static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, - dm_block_t data_dest, - struct dm_bio_prison_cell *cell, struct bio *bio) -{ - schedule_copy(tc, virt_block, tc->origin_dev, - virt_block, data_dest, cell, bio); + data_origin, data_dest, cell, bio, + tc->pool->sectors_per_block); } static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, @@ -896,8 +939,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, struct pool *pool = tc->pool; struct dm_thin_new_mapping *m = get_next_mapping(pool); - m->quiesced = true; - m->prepared = false; + atomic_set(&m->prepare_actions, 1); /* no need to quiesce */ m->tc = tc; m->virt_block = virt_block; m->data_block = data_block; @@ -919,21 +961,33 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); inc_all_io_entry(pool, bio); remap_and_issue(tc, bio, data_block); - } else { - int r; - struct dm_io_region to; - to.bdev = tc->pool_dev->bdev; - to.sector = data_block * pool->sectors_per_block; - to.count = pool->sectors_per_block; + } else + ll_zero(tc, m, + data_block * pool->sectors_per_block, + (data_block + 1) * pool->sectors_per_block); +} - r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m); - if (r < 0) { - mempool_free(m, pool->mapping_pool); - DMERR_LIMIT("dm_kcopyd_zero() failed"); - cell_error(pool, cell); - } - } +static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, + dm_block_t data_dest, + struct dm_bio_prison_cell *cell, struct bio *bio) +{ + struct pool *pool = tc->pool; + sector_t virt_block_begin = virt_block * pool->sectors_per_block; + sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block; + + if (virt_block_end <= tc->origin_size) + schedule_copy(tc, virt_block, tc->origin_dev, + virt_block, data_dest, cell, bio, + pool->sectors_per_block); + + else if (virt_block_begin < tc->origin_size) + schedule_copy(tc, virt_block, tc->origin_dev, + virt_block, data_dest, cell, bio, + tc->origin_size - virt_block_begin); + + else + schedule_zero(tc, virt_block, data_dest, cell, bio); } /* @@ -1315,7 +1369,18 @@ static void process_bio(struct thin_c *tc, struct bio *bio) inc_all_io_entry(pool, bio); cell_defer_no_holder(tc, cell); - remap_to_origin_and_issue(tc, bio); + if (bio_end_sector(bio) <= tc->origin_size) + remap_to_origin_and_issue(tc, bio); + + else if (bio->bi_iter.bi_sector < tc->origin_size) { + zero_fill_bio(bio); + bio->bi_iter.bi_size = (tc->origin_size - bio->bi_iter.bi_sector) << SECTOR_SHIFT; + remap_to_origin_and_issue(tc, bio); + + } else { + zero_fill_bio(bio); + bio_endio(bio, 0); + } } else provision_block(tc, bio, block, cell); break; @@ -3112,7 +3177,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) */ if (io_opt_sectors < pool->sectors_per_block || do_div(io_opt_sectors, pool->sectors_per_block)) { - blk_limits_io_min(limits, 0); + blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT); blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); } @@ -3141,7 +3206,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 12, 0}, + .version = {1, 13, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -3361,8 +3426,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err) spin_lock_irqsave(&pool->lock, flags); list_for_each_entry_safe(m, tmp, &work, list) { list_del(&m->list); - m->quiesced = true; - __maybe_add_mapping(m); + __complete_mapping_preparation(m); } spin_unlock_irqrestore(&pool->lock, flags); } @@ -3401,6 +3465,16 @@ static void thin_postsuspend(struct dm_target *ti) noflush_work(tc, do_noflush_stop); } +static int thin_preresume(struct dm_target *ti) +{ + struct thin_c *tc = ti->private; + + if (tc->origin_dev) + tc->origin_size = get_dev_size(tc->origin_dev->bdev); + + return 0; +} + /* * <nr mapped sectors> <highest mapped sector> */ @@ -3483,12 +3557,13 @@ static int thin_iterate_devices(struct dm_target *ti, static struct target_type thin_target = { .name = "thin", - .version = {1, 12, 0}, + .version = {1, 13, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, .map = thin_map, .end_io = thin_endio, + .preresume = thin_preresume, .presuspend = thin_presuspend, .postsuspend = thin_postsuspend, .status = thin_status, diff --git a/drivers/md/dm.h b/drivers/md/dm.h index ed76126..e81d215 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -72,7 +72,6 @@ int dm_table_any_busy_target(struct dm_table *t); unsigned dm_table_get_type(struct dm_table *t); struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); bool dm_table_request_based(struct dm_table *t); -bool dm_table_supports_discards(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); diff --git a/drivers/md/md.c b/drivers/md/md.c index 32fc19c..1294238 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5961,7 +5961,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd) int err = 0; if (mddev->pers) { - if (!mddev->pers->quiesce) + if (!mddev->pers->quiesce || !mddev->thread) return -EBUSY; if (mddev->recovery || mddev->sync_thread) return -EBUSY; @@ -6263,7 +6263,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) rv = update_raid_disks(mddev, info->raid_disks); if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) { - if (mddev->pers->quiesce == NULL) + if (mddev->pers->quiesce == NULL || mddev->thread == NULL) return -EINVAL; if (mddev->recovery || mddev->sync_thread) return -EBUSY; @@ -7376,7 +7376,7 @@ void md_do_sync(struct md_thread *thread) struct mddev *mddev2; unsigned int currspeed = 0, window; - sector_t max_sectors,j, io_sectors; + sector_t max_sectors,j, io_sectors, recovery_done; unsigned long mark[SYNC_MARKS]; unsigned long update_time; sector_t mark_cnt[SYNC_MARKS]; @@ -7652,7 +7652,8 @@ void md_do_sync(struct md_thread *thread) */ cond_resched(); - currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2 + recovery_done = io_sectors - atomic_read(&mddev->recovery_active); + currspeed = ((unsigned long)(recovery_done - mddev->resync_mark_cnt))/2 /((jiffies-mddev->resync_mark)/HZ +1) +1; if (currspeed > speed_min(mddev)) { @@ -8592,7 +8593,7 @@ static int __init md_init(void) goto err_mdp; mdp_major = ret; - blk_register_region(MKDEV(MD_MAJOR, 0), 1UL<<MINORBITS, THIS_MODULE, + blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE, md_probe, NULL, NULL); blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, md_probe, NULL, NULL); @@ -8687,7 +8688,7 @@ static __exit void md_exit(void) struct list_head *tmp; int delay = 1; - blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS); + blk_unregister_region(MKDEV(MD_MAJOR,0), 512); blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS); unregister_blkdev(MD_MAJOR,"md"); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 407a99e..cf91f59 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -685,6 +685,12 @@ static void *raid0_takeover(struct mddev *mddev) * raid10 - assuming we have all necessary active disks * raid1 - with (N -1) mirror drives faulty */ + + if (mddev->bitmap) { + printk(KERN_ERR "md/raid0: %s: cannot takeover array with bitmap\n", + mdname(mddev)); + return ERR_PTR(-EBUSY); + } if (mddev->level == 4) return raid0_takeover_raid45(mddev); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 56e24c0..d7690f8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1501,12 +1501,12 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) mddev->degraded++; set_bit(Faulty, &rdev->flags); spin_unlock_irqrestore(&conf->device_lock, flags); - /* - * if recovery is running, make sure it aborts. - */ - set_bit(MD_RECOVERY_INTR, &mddev->recovery); } else set_bit(Faulty, &rdev->flags); + /* + * if recovery is running, make sure it aborts. + */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT "md/raid1:%s: Disk failure on %s, disabling device.\n" diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index cb882aa..b08c188 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1684,13 +1684,12 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) spin_unlock_irqrestore(&conf->device_lock, flags); return; } - if (test_and_clear_bit(In_sync, &rdev->flags)) { + if (test_and_clear_bit(In_sync, &rdev->flags)) mddev->degraded++; - /* - * if recovery is running, make sure it aborts. - */ - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - } + /* + * If recovery is running, make sure it aborts. + */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c index 6352bec..71f387c 100644 --- a/drivers/mfd/rtsx_usb.c +++ b/drivers/mfd/rtsx_usb.c @@ -744,6 +744,7 @@ static struct usb_device_id rtsx_usb_usb_ids[] = { { USB_DEVICE(0x0BDA, 0x0140) }, { } }; +MODULE_DEVICE_TABLE(usb, rtsx_usb_usb_ids); static struct usb_driver rtsx_usb_driver = { .name = "rtsx_usb", diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 452782b..ede41f0 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -2028,8 +2028,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) /* complete ongoing async transfer before issuing discard */ if (card->host->areq) mmc_blk_issue_rw_rq(mq, NULL); - if (req->cmd_flags & REQ_SECURE && - !(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN)) + if (req->cmd_flags & REQ_SECURE) ret = mmc_blk_issue_secdiscard_rq(mq, req); else ret = mmc_blk_issue_discard_rq(mq, req); @@ -2432,6 +2431,8 @@ static int mmc_blk_probe(struct mmc_card *card) if (!(card->csd.cmdclass & CCC_BLOCK_READ)) return -ENODEV; + mmc_fixup_device(card, blk_fixups); + md = mmc_blk_alloc(card); if (IS_ERR(md)) return PTR_ERR(md); @@ -2446,7 +2447,6 @@ static int mmc_blk_probe(struct mmc_card *card) goto out; mmc_set_drvdata(card, md); - mmc_fixup_device(card, blk_fixups); if (mmc_add_disk(md)) goto out; diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index d2dbf02..8a1f124 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -180,7 +180,6 @@ static int mmc_bus_resume(struct device *dev) #endif #ifdef CONFIG_PM_RUNTIME - static int mmc_runtime_suspend(struct device *dev) { struct mmc_card *card = mmc_dev_to_card(dev); @@ -196,17 +195,10 @@ static int mmc_runtime_resume(struct device *dev) return host->bus_ops->runtime_resume(host); } - -static int mmc_runtime_idle(struct device *dev) -{ - return 0; -} - #endif /* !CONFIG_PM_RUNTIME */ static const struct dev_pm_ops mmc_bus_pm_ops = { - SET_RUNTIME_PM_OPS(mmc_runtime_suspend, mmc_runtime_resume, - mmc_runtime_idle) + SET_RUNTIME_PM_OPS(mmc_runtime_suspend, mmc_runtime_resume, NULL) SET_SYSTEM_SLEEP_PM_OPS(mmc_bus_suspend, mmc_bus_resume) }; diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 7dc0c85..d03a080 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2102,7 +2102,8 @@ EXPORT_SYMBOL(mmc_can_sanitize); int mmc_can_secure_erase_trim(struct mmc_card *card) { - if (card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN) + if ((card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN) && + !(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN)) return 1; return 0; } diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 793c6f7..1eda8dd 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -324,13 +324,12 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd) } } + /* + * The EXT_CSD format is meant to be forward compatible. As long + * as CSD_STRUCTURE does not change, all values for EXT_CSD_REV + * are authorized, see JEDEC JESD84-B50 section B.8. + */ card->ext_csd.rev = ext_csd[EXT_CSD_REV]; - if (card->ext_csd.rev > 7) { - pr_err("%s: unrecognised EXT_CSD revision %d\n", - mmc_hostname(card->host), card->ext_csd.rev); - err = -EINVAL; - goto out; - } card->ext_csd.raw_sectors[0] = ext_csd[EXT_CSD_SEC_CNT + 0]; card->ext_csd.raw_sectors[1] = ext_csd[EXT_CSD_SEC_CNT + 1]; diff --git a/drivers/mmc/core/quirks.c b/drivers/mmc/core/quirks.c index 6c36fcc..dd1d1e0 100644 --- a/drivers/mmc/core/quirks.c +++ b/drivers/mmc/core/quirks.c @@ -91,7 +91,7 @@ void mmc_fixup_device(struct mmc_card *card, const struct mmc_fixup *table) (f->cis_device == card->cis.device || f->cis_device == (u16) SDIO_ANY_ID) && rev >= f->rev_start && rev <= f->rev_end) { - dev_dbg(&card->dev, "calling %pF\n", f->vendor_fixup); + dev_dbg(&card->dev, "calling %pf\n", f->vendor_fixup); f->vendor_fixup(card, f->data); } } diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c index 274ef00..48d0c93 100644 --- a/drivers/mmc/core/sd_ops.c +++ b/drivers/mmc/core/sd_ops.c @@ -184,6 +184,9 @@ int mmc_send_app_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr) mmc_delay(10); } + if (!i) + pr_err("%s: card never left busy state\n", mmc_hostname(host)); + if (rocr && !mmc_host_is_spi(host)) *rocr = cmd.resp[0]; diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index a565254..4511358 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -290,6 +290,18 @@ config MMC_MOXART be found on some embedded hardware such as UC-7112-LX. If you have a controller with this interface, say Y here. +config MMC_SDHCI_ST + tristate "SDHCI support on STMicroelectronics SoC" + depends on ARCH_STI + depends on MMC_SDHCI_PLTFM + select MMC_SDHCI_IO_ACCESSORS + help + This selects the Secure Digital Host Controller Interface in + STMicroelectronics SoCs. + + If you have a controller with this interface, say Y or M here. + If unsure, say N. + config MMC_OMAP tristate "TI OMAP Multimedia Card Interface support" depends on ARCH_OMAP @@ -303,6 +315,7 @@ config MMC_OMAP config MMC_OMAP_HS tristate "TI OMAP High Speed Multimedia Card Interface support" + depends on HAS_DMA depends on ARCH_OMAP2PLUS || COMPILE_TEST help This selects the TI OMAP High Speed Multimedia card Interface. @@ -343,7 +356,7 @@ config MMC_ATMELMCI config MMC_SDHCI_MSM tristate "Qualcomm SDHCI Controller Support" - depends on ARCH_QCOM + depends on ARCH_QCOM || (ARM && COMPILE_TEST) depends on MMC_SDHCI_PLTFM help This selects the Secure Digital Host Controller Interface (SDHCI) @@ -440,6 +453,7 @@ config MMC_SPI config MMC_S3C tristate "Samsung S3C SD/MMC Card Interface support" depends on ARCH_S3C24XX + depends on S3C24XX_DMAC help This selects a driver for the MCI interface found in Samsung's S3C2410, S3C2412, S3C2440, S3C2442 CPUs. @@ -477,15 +491,6 @@ config MMC_S3C_DMA working properly and needs to be debugged before this option is useful. -config MMC_S3C_PIODMA - bool "Support for both PIO and DMA" - help - Compile both the PIO and DMA transfer routines into the - driver and let the platform select at run-time which one - is best. - - See notes for the DMA option. - endchoice config MMC_SDRICOH_CS @@ -623,7 +628,7 @@ config MMC_DW_PCI config MMC_SH_MMCIF tristate "SuperH Internal MMCIF support" - depends on MMC_BLOCK + depends on MMC_BLOCK && HAS_DMA depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST help This selects the MMC Host Interface controller (MMCIF). @@ -697,6 +702,7 @@ config MMC_WMT config MMC_USDHI6ROL0 tristate "Renesas USDHI6ROL0 SD/SDIO Host Controller support" + depends on HAS_DMA help This selects support for the Renesas USDHI6ROL0 SD/SDIO Host Controller diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile index 7f81ddf..f211eed 100644 --- a/drivers/mmc/host/Makefile +++ b/drivers/mmc/host/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_MMC_SDHCI_OF_HLWD) += sdhci-of-hlwd.o obj-$(CONFIG_MMC_SDHCI_BCM_KONA) += sdhci-bcm-kona.o obj-$(CONFIG_MMC_SDHCI_BCM2835) += sdhci-bcm2835.o obj-$(CONFIG_MMC_SDHCI_MSM) += sdhci-msm.o +obj-$(CONFIG_MMC_SDHCI_ST) += sdhci-st.o ifeq ($(CONFIG_CB710_DEBUG),y) CFLAGS-cb710-mmc += -DDEBUG diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 1ac227c..8f216ed 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -111,8 +111,7 @@ static const u8 tuning_blk_pattern_8bit[] = { 0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, }; -static inline bool dw_mci_fifo_reset(struct dw_mci *host); -static inline bool dw_mci_ctrl_all_reset(struct dw_mci *host); +static bool dw_mci_reset(struct dw_mci *host); #if defined(CONFIG_DEBUG_FS) static int dw_mci_req_show(struct seq_file *s, void *v) @@ -997,7 +996,8 @@ static int dw_mci_get_ro(struct mmc_host *mmc) int gpio_ro = mmc_gpio_get_ro(mmc); /* Use platform get_ro function, else try on board write protect */ - if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) + if ((slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) || + (slot->host->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT)) read_only = 0; else if (!IS_ERR_VALUE(gpio_ro)) read_only = gpio_ro; @@ -1235,7 +1235,7 @@ static int dw_mci_data_complete(struct dw_mci *host, struct mmc_data *data) * After an error, there may be data lingering * in the FIFO */ - dw_mci_fifo_reset(host); + dw_mci_reset(host); } else { data->bytes_xfered = data->blocks * data->blksz; data->error = 0; @@ -1352,7 +1352,7 @@ static void dw_mci_tasklet_func(unsigned long priv) /* CMD error in data command */ if (mrq->cmd->error && mrq->data) - dw_mci_fifo_reset(host); + dw_mci_reset(host); host->cmd = NULL; host->data = NULL; @@ -1963,14 +1963,8 @@ static void dw_mci_work_routine_card(struct work_struct *work) } /* Power down slot */ - if (present == 0) { - /* Clear down the FIFO */ - dw_mci_fifo_reset(host); -#ifdef CONFIG_MMC_DW_IDMAC - dw_mci_idmac_reset(host); -#endif - - } + if (present == 0) + dw_mci_reset(host); spin_unlock_bh(&host->lock); @@ -2021,8 +2015,11 @@ static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot) /* get quirks */ for (idx = 0; idx < ARRAY_SIZE(of_slot_quirks); idx++) - if (of_get_property(np, of_slot_quirks[idx].quirk, NULL)) + if (of_get_property(np, of_slot_quirks[idx].quirk, NULL)) { + dev_warn(dev, "Slot quirk %s is deprecated\n", + of_slot_quirks[idx].quirk); quirks |= of_slot_quirks[idx].id; + } return quirks; } @@ -2208,8 +2205,11 @@ static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset) return false; } -static inline bool dw_mci_fifo_reset(struct dw_mci *host) +static bool dw_mci_reset(struct dw_mci *host) { + u32 flags = SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET; + bool ret = false; + /* * Reseting generates a block interrupt, hence setting * the scatter-gather pointer to NULL. @@ -2219,15 +2219,60 @@ static inline bool dw_mci_fifo_reset(struct dw_mci *host) host->sg = NULL; } - return dw_mci_ctrl_reset(host, SDMMC_CTRL_FIFO_RESET); -} + if (host->use_dma) + flags |= SDMMC_CTRL_DMA_RESET; -static inline bool dw_mci_ctrl_all_reset(struct dw_mci *host) -{ - return dw_mci_ctrl_reset(host, - SDMMC_CTRL_FIFO_RESET | - SDMMC_CTRL_RESET | - SDMMC_CTRL_DMA_RESET); + if (dw_mci_ctrl_reset(host, flags)) { + /* + * In all cases we clear the RAWINTS register to clear any + * interrupts. + */ + mci_writel(host, RINTSTS, 0xFFFFFFFF); + + /* if using dma we wait for dma_req to clear */ + if (host->use_dma) { + unsigned long timeout = jiffies + msecs_to_jiffies(500); + u32 status; + do { + status = mci_readl(host, STATUS); + if (!(status & SDMMC_STATUS_DMA_REQ)) + break; + cpu_relax(); + } while (time_before(jiffies, timeout)); + + if (status & SDMMC_STATUS_DMA_REQ) { + dev_err(host->dev, + "%s: Timeout waiting for dma_req to " + "clear during reset\n", __func__); + goto ciu_out; + } + + /* when using DMA next we reset the fifo again */ + if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_FIFO_RESET)) + goto ciu_out; + } + } else { + /* if the controller reset bit did clear, then set clock regs */ + if (!(mci_readl(host, CTRL) & SDMMC_CTRL_RESET)) { + dev_err(host->dev, "%s: fifo/dma reset bits didn't " + "clear but ciu was reset, doing clock update\n", + __func__); + goto ciu_out; + } + } + +#if IS_ENABLED(CONFIG_MMC_DW_IDMAC) + /* It is also recommended that we reset and reprogram idmac */ + dw_mci_idmac_reset(host); +#endif + + ret = true; + +ciu_out: + /* After a CTRL reset we need to have CIU set clock registers */ + mci_send_cmd(host->cur_slot, SDMMC_CMD_UPD_CLK, 0); + + return ret; } #ifdef CONFIG_OF @@ -2238,6 +2283,9 @@ static struct dw_mci_of_quirks { { .quirk = "broken-cd", .id = DW_MCI_QUIRK_BROKEN_CARD_DETECTION, + }, { + .quirk = "disable-wp", + .id = DW_MCI_QUIRK_NO_WRITE_PROTECT, }, }; @@ -2425,7 +2473,7 @@ int dw_mci_probe(struct dw_mci *host) } /* Reset all blocks */ - if (!dw_mci_ctrl_all_reset(host)) + if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS)) return -ENODEV; host->dma_ops = host->pdata->dma_ops; @@ -2612,7 +2660,7 @@ int dw_mci_resume(struct dw_mci *host) } } - if (!dw_mci_ctrl_all_reset(host)) { + if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS)) { ret = -ENODEV; return ret; } diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h index 738fa24..08fd956 100644 --- a/drivers/mmc/host/dw_mmc.h +++ b/drivers/mmc/host/dw_mmc.h @@ -129,6 +129,7 @@ #define SDMMC_CMD_INDX(n) ((n) & 0x1F) /* Status register defines */ #define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FFF) +#define SDMMC_STATUS_DMA_REQ BIT(31) /* FIFOTH register defines */ #define SDMMC_SET_FIFOTH(m, r, t) (((m) & 0x7) << 28 | \ ((r) & 0xFFF) << 16 | \ @@ -150,6 +151,10 @@ /* Card read threshold */ #define SDMMC_SET_RD_THLD(v, x) (((v) & 0x1FFF) << 16 | (x)) +/* All ctrl reset bits */ +#define SDMMC_CTRL_ALL_RESET_FLAGS \ + (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | SDMMC_CTRL_DMA_RESET) + /* Register access macros */ #define mci_readl(dev, reg) \ __raw_readl((dev)->regs + SDMMC_##reg) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index 7ad463e..e4d4707 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -52,34 +52,53 @@ static unsigned int fmax = 515633; * struct variant_data - MMCI variant-specific quirks * @clkreg: default value for MCICLOCK register * @clkreg_enable: enable value for MMCICLOCK register + * @clkreg_8bit_bus_enable: enable value for 8 bit bus + * @clkreg_neg_edge_enable: enable value for inverted data/cmd output * @datalength_bits: number of bits in the MMCIDATALENGTH register * @fifosize: number of bytes that can be written when MMCI_TXFIFOEMPTY * is asserted (likewise for RX) * @fifohalfsize: number of bytes that can be written when MCI_TXFIFOHALFEMPTY * is asserted (likewise for RX) + * @data_cmd_enable: enable value for data commands. * @sdio: variant supports SDIO * @st_clkdiv: true if using a ST-specific clock divider algorithm + * @datactrl_mask_ddrmode: ddr mode mask in datactrl register. * @blksz_datactrl16: true if Block size is at b16..b30 position in datactrl register + * @blksz_datactrl4: true if Block size is at b4..b16 position in datactrl + * register * @pwrreg_powerup: power up value for MMCIPOWER register + * @f_max: maximum clk frequency supported by the controller. * @signal_direction: input/out direction of bus signals can be indicated * @pwrreg_clkgate: MMCIPOWER register must be used to gate the clock * @busy_detect: true if busy detection on dat0 is supported * @pwrreg_nopower: bits in MMCIPOWER don't controls ext. power supply + * @explicit_mclk_control: enable explicit mclk control in driver. + * @qcom_fifo: enables qcom specific fifo pio read logic. + * @reversed_irq_handling: handle data irq before cmd irq. */ struct variant_data { unsigned int clkreg; unsigned int clkreg_enable; + unsigned int clkreg_8bit_bus_enable; + unsigned int clkreg_neg_edge_enable; unsigned int datalength_bits; unsigned int fifosize; unsigned int fifohalfsize; + unsigned int data_cmd_enable; + unsigned int datactrl_mask_ddrmode; bool sdio; bool st_clkdiv; bool blksz_datactrl16; + bool blksz_datactrl4; u32 pwrreg_powerup; + u32 f_max; bool signal_direction; bool pwrreg_clkgate; bool busy_detect; bool pwrreg_nopower; + bool explicit_mclk_control; + bool qcom_fifo; + bool reversed_irq_handling; }; static struct variant_data variant_arm = { @@ -87,6 +106,8 @@ static struct variant_data variant_arm = { .fifohalfsize = 8 * 4, .datalength_bits = 16, .pwrreg_powerup = MCI_PWR_UP, + .f_max = 100000000, + .reversed_irq_handling = true, }; static struct variant_data variant_arm_extended_fifo = { @@ -94,6 +115,7 @@ static struct variant_data variant_arm_extended_fifo = { .fifohalfsize = 64 * 4, .datalength_bits = 16, .pwrreg_powerup = MCI_PWR_UP, + .f_max = 100000000, }; static struct variant_data variant_arm_extended_fifo_hwfc = { @@ -102,15 +124,18 @@ static struct variant_data variant_arm_extended_fifo_hwfc = { .clkreg_enable = MCI_ARM_HWFCEN, .datalength_bits = 16, .pwrreg_powerup = MCI_PWR_UP, + .f_max = 100000000, }; static struct variant_data variant_u300 = { .fifosize = 16 * 4, .fifohalfsize = 8 * 4, .clkreg_enable = MCI_ST_U300_HWFCEN, + .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS, .datalength_bits = 16, .sdio = true, .pwrreg_powerup = MCI_PWR_ON, + .f_max = 100000000, .signal_direction = true, .pwrreg_clkgate = true, .pwrreg_nopower = true, @@ -124,6 +149,7 @@ static struct variant_data variant_nomadik = { .sdio = true, .st_clkdiv = true, .pwrreg_powerup = MCI_PWR_ON, + .f_max = 100000000, .signal_direction = true, .pwrreg_clkgate = true, .pwrreg_nopower = true, @@ -134,10 +160,13 @@ static struct variant_data variant_ux500 = { .fifohalfsize = 8 * 4, .clkreg = MCI_CLK_ENABLE, .clkreg_enable = MCI_ST_UX500_HWFCEN, + .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS, + .clkreg_neg_edge_enable = MCI_ST_UX500_NEG_EDGE, .datalength_bits = 24, .sdio = true, .st_clkdiv = true, .pwrreg_powerup = MCI_PWR_ON, + .f_max = 100000000, .signal_direction = true, .pwrreg_clkgate = true, .busy_detect = true, @@ -149,17 +178,38 @@ static struct variant_data variant_ux500v2 = { .fifohalfsize = 8 * 4, .clkreg = MCI_CLK_ENABLE, .clkreg_enable = MCI_ST_UX500_HWFCEN, + .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS, + .clkreg_neg_edge_enable = MCI_ST_UX500_NEG_EDGE, + .datactrl_mask_ddrmode = MCI_ST_DPSM_DDRMODE, .datalength_bits = 24, .sdio = true, .st_clkdiv = true, .blksz_datactrl16 = true, .pwrreg_powerup = MCI_PWR_ON, + .f_max = 100000000, .signal_direction = true, .pwrreg_clkgate = true, .busy_detect = true, .pwrreg_nopower = true, }; +static struct variant_data variant_qcom = { + .fifosize = 16 * 4, + .fifohalfsize = 8 * 4, + .clkreg = MCI_CLK_ENABLE, + .clkreg_enable = MCI_QCOM_CLK_FLOWENA | + MCI_QCOM_CLK_SELECT_IN_FBCLK, + .clkreg_8bit_bus_enable = MCI_QCOM_CLK_WIDEBUS_8, + .datactrl_mask_ddrmode = MCI_QCOM_CLK_SELECT_IN_DDR_MODE, + .data_cmd_enable = MCI_QCOM_CSPM_DATCMD, + .blksz_datactrl4 = true, + .datalength_bits = 24, + .pwrreg_powerup = MCI_PWR_UP, + .f_max = 208000000, + .explicit_mclk_control = true, + .qcom_fifo = true, +}; + static int mmci_card_busy(struct mmc_host *mmc) { struct mmci_host *host = mmc_priv(mmc); @@ -260,7 +310,9 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired) host->cclk = 0; if (desired) { - if (desired >= host->mclk) { + if (variant->explicit_mclk_control) { + host->cclk = host->mclk; + } else if (desired >= host->mclk) { clk = MCI_CLK_BYPASS; if (variant->st_clkdiv) clk |= MCI_ST_UX500_NEG_EDGE; @@ -299,11 +351,11 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired) if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_4) clk |= MCI_4BIT_BUS; if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_8) - clk |= MCI_ST_8BIT_BUS; + clk |= variant->clkreg_8bit_bus_enable; if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 || host->mmc->ios.timing == MMC_TIMING_MMC_DDR52) - clk |= MCI_ST_UX500_NEG_EDGE; + clk |= variant->clkreg_neg_edge_enable; mmci_write_clkreg(host, clk); } @@ -719,7 +771,7 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data) data->bytes_xfered = 0; clks = (unsigned long long)data->timeout_ns * host->cclk; - do_div(clks, 1000000000UL); + do_div(clks, NSEC_PER_SEC); timeout = data->timeout_clks + (unsigned int)clks; @@ -732,6 +784,8 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data) if (variant->blksz_datactrl16) datactrl = MCI_DPSM_ENABLE | (data->blksz << 16); + else if (variant->blksz_datactrl4) + datactrl = MCI_DPSM_ENABLE | (data->blksz << 4); else datactrl = MCI_DPSM_ENABLE | blksz_bits << 4; @@ -767,7 +821,7 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data) if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 || host->mmc->ios.timing == MMC_TIMING_MMC_DDR52) - datactrl |= MCI_ST_DPSM_DDRMODE; + datactrl |= variant->datactrl_mask_ddrmode; /* * Attempt to use DMA operation mode, if this @@ -812,7 +866,7 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c) if (readl(base + MMCICOMMAND) & MCI_CPSM_ENABLE) { writel(0, base + MMCICOMMAND); - udelay(1); + mmci_reg_delay(host); } c |= cmd->opcode | MCI_CPSM_ENABLE; @@ -824,6 +878,9 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c) if (/*interrupt*/0) c |= MCI_CPSM_INTERRUPT; + if (mmc_cmd_type(cmd) == MMC_CMD_ADTC) + c |= host->variant->data_cmd_enable; + host->cmd = cmd; writel(cmd->arg, base + MMCIARGUMENT); @@ -834,6 +891,10 @@ static void mmci_data_irq(struct mmci_host *host, struct mmc_data *data, unsigned int status) { + /* Make sure we have data to handle */ + if (!data) + return; + /* First check for errors */ if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR| MCI_TXUNDERRUN|MCI_RXOVERRUN)) { @@ -902,9 +963,17 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd, unsigned int status) { void __iomem *base = host->base; - bool sbc = (cmd == host->mrq->sbc); - bool busy_resp = host->variant->busy_detect && - (cmd->flags & MMC_RSP_BUSY); + bool sbc, busy_resp; + + if (!cmd) + return; + + sbc = (cmd == host->mrq->sbc); + busy_resp = host->variant->busy_detect && (cmd->flags & MMC_RSP_BUSY); + + if (!((status|host->busy_status) & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT| + MCI_CMDSENT|MCI_CMDRESPEND))) + return; /* Check if we need to wait for busy completion. */ if (host->busy_status && (status & MCI_ST_CARDBUSY)) @@ -957,15 +1026,34 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd, } } +static int mmci_get_rx_fifocnt(struct mmci_host *host, u32 status, int remain) +{ + return remain - (readl(host->base + MMCIFIFOCNT) << 2); +} + +static int mmci_qcom_get_rx_fifocnt(struct mmci_host *host, u32 status, int r) +{ + /* + * on qcom SDCC4 only 8 words are used in each burst so only 8 addresses + * from the fifo range should be used + */ + if (status & MCI_RXFIFOHALFFULL) + return host->variant->fifohalfsize; + else if (status & MCI_RXDATAAVLBL) + return 4; + + return 0; +} + static int mmci_pio_read(struct mmci_host *host, char *buffer, unsigned int remain) { void __iomem *base = host->base; char *ptr = buffer; - u32 status; + u32 status = readl(host->base + MMCISTATUS); int host_remain = host->size; do { - int count = host_remain - (readl(base + MMCIFIFOCNT) << 2); + int count = host->get_rx_fifocnt(host, status, host_remain); if (count > remain) count = remain; @@ -1132,9 +1220,6 @@ static irqreturn_t mmci_irq(int irq, void *dev_id) spin_lock(&host->lock); do { - struct mmc_command *cmd; - struct mmc_data *data; - status = readl(host->base + MMCISTATUS); if (host->singleirq) { @@ -1154,16 +1239,13 @@ static irqreturn_t mmci_irq(int irq, void *dev_id) dev_dbg(mmc_dev(host->mmc), "irq0 (data+cmd) %08x\n", status); - cmd = host->cmd; - if ((status|host->busy_status) & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT| - MCI_CMDSENT|MCI_CMDRESPEND) && cmd) - mmci_cmd_irq(host, cmd, status); - - data = host->data; - if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR| - MCI_TXUNDERRUN|MCI_RXOVERRUN|MCI_DATAEND| - MCI_DATABLOCKEND) && data) - mmci_data_irq(host, data, status); + if (host->variant->reversed_irq_handling) { + mmci_data_irq(host, host->data, status); + mmci_cmd_irq(host, host->cmd, status); + } else { + mmci_cmd_irq(host, host->cmd, status); + mmci_data_irq(host, host->data, status); + } /* Don't poll for busy completion in irq context. */ if (host->busy_status) @@ -1296,6 +1378,17 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) if (!ios->clock && variant->pwrreg_clkgate) pwr &= ~MCI_PWR_ON; + if (host->variant->explicit_mclk_control && + ios->clock != host->clock_cache) { + ret = clk_set_rate(host->clk, ios->clock); + if (ret < 0) + dev_err(mmc_dev(host->mmc), + "Error setting clock rate (%d)\n", ret); + else + host->mclk = clk_get_rate(host->clk); + } + host->clock_cache = ios->clock; + spin_lock_irqsave(&host->lock, flags); mmci_set_clkreg(host, ios->clock); @@ -1443,6 +1536,11 @@ static int mmci_probe(struct amba_device *dev, if (ret) goto host_free; + if (variant->qcom_fifo) + host->get_rx_fifocnt = mmci_qcom_get_rx_fifocnt; + else + host->get_rx_fifocnt = mmci_get_rx_fifocnt; + host->plat = plat; host->variant = variant; host->mclk = clk_get_rate(host->clk); @@ -1451,8 +1549,8 @@ static int mmci_probe(struct amba_device *dev, * so we try to adjust the clock down to this, * (if possible). */ - if (host->mclk > 100000000) { - ret = clk_set_rate(host->clk, 100000000); + if (host->mclk > variant->f_max) { + ret = clk_set_rate(host->clk, variant->f_max); if (ret < 0) goto clk_disable; host->mclk = clk_get_rate(host->clk); @@ -1471,9 +1569,12 @@ static int mmci_probe(struct amba_device *dev, * The ARM and ST versions of the block have slightly different * clock divider equations which means that the minimum divider * differs too. + * on Qualcomm like controllers get the nearest minimum clock to 100Khz */ if (variant->st_clkdiv) mmc->f_min = DIV_ROUND_UP(host->mclk, 257); + else if (variant->explicit_mclk_control) + mmc->f_min = clk_round_rate(host->clk, 100000); else mmc->f_min = DIV_ROUND_UP(host->mclk, 512); /* @@ -1483,9 +1584,14 @@ static int mmci_probe(struct amba_device *dev, * the block, of course. */ if (mmc->f_max) - mmc->f_max = min(host->mclk, mmc->f_max); + mmc->f_max = variant->explicit_mclk_control ? + min(variant->f_max, mmc->f_max) : + min(host->mclk, mmc->f_max); else - mmc->f_max = min(host->mclk, fmax); + mmc->f_max = variant->explicit_mclk_control ? + fmax : min(host->mclk, fmax); + + dev_dbg(mmc_dev(mmc), "clocking block at %u Hz\n", mmc->f_max); /* Get regulators and the supported OCR mask */ @@ -1752,6 +1858,12 @@ static struct amba_id mmci_ids[] = { .mask = 0xf0ffffff, .data = &variant_ux500v2, }, + /* Qualcomm variants */ + { + .id = 0x00051180, + .mask = 0x000fffff, + .data = &variant_qcom, + }, { 0, 0 }, }; diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h index 347d942..a1f5e4f 100644 --- a/drivers/mmc/host/mmci.h +++ b/drivers/mmc/host/mmci.h @@ -41,6 +41,15 @@ /* Modified PL180 on Versatile Express platform */ #define MCI_ARM_HWFCEN (1 << 12) +/* Modified on Qualcomm Integrations */ +#define MCI_QCOM_CLK_WIDEBUS_8 (BIT(10) | BIT(11)) +#define MCI_QCOM_CLK_FLOWENA BIT(12) +#define MCI_QCOM_CLK_INVERTOUT BIT(13) + +/* select in latch data and command in */ +#define MCI_QCOM_CLK_SELECT_IN_FBCLK BIT(15) +#define MCI_QCOM_CLK_SELECT_IN_DDR_MODE (BIT(14) | BIT(15)) + #define MMCIARGUMENT 0x008 #define MMCICOMMAND 0x00c #define MCI_CPSM_RESPONSE (1 << 6) @@ -54,6 +63,14 @@ #define MCI_ST_NIEN (1 << 13) #define MCI_ST_CE_ATACMD (1 << 14) +/* Modified on Qualcomm Integrations */ +#define MCI_QCOM_CSPM_DATCMD BIT(12) +#define MCI_QCOM_CSPM_MCIABORT BIT(13) +#define MCI_QCOM_CSPM_CCSENABLE BIT(14) +#define MCI_QCOM_CSPM_CCSDISABLE BIT(15) +#define MCI_QCOM_CSPM_AUTO_CMD19 BIT(16) +#define MCI_QCOM_CSPM_AUTO_CMD21 BIT(21) + #define MMCIRESPCMD 0x010 #define MMCIRESPONSE0 0x014 #define MMCIRESPONSE1 0x018 @@ -191,6 +208,8 @@ struct mmci_host { spinlock_t lock; unsigned int mclk; + /* cached value of requested clk in set_ios */ + unsigned int clock_cache; unsigned int cclk; u32 pwr_reg; u32 pwr_reg_add; @@ -210,6 +229,7 @@ struct mmci_host { /* pio stuff */ struct sg_mapping_iter sg_miter; unsigned int size; + int (*get_rx_fifocnt)(struct mmci_host *h, u32 status, int remain); #ifdef CONFIG_DMA_ENGINE /* DMA stuff */ diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c index 74924a0..b4b1efb 100644 --- a/drivers/mmc/host/moxart-mmc.c +++ b/drivers/mmc/host/moxart-mmc.c @@ -13,7 +13,6 @@ * warranty of any kind, whether express or implied. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/init.h> #include <linux/platform_device.h> diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index babfea0..140885a 100644 --- a/drivers/mmc/host/mxs-mmc.c +++ b/drivers/mmc/host/mxs-mmc.c @@ -86,7 +86,8 @@ static int mxs_mmc_get_cd(struct mmc_host *mmc) if (ret >= 0) return ret; - present = !(readl(ssp->base + HW_SSP_STATUS(ssp)) & + present = mmc->caps & MMC_CAP_NEEDS_POLL || + !(readl(ssp->base + HW_SSP_STATUS(ssp)) & BM_SSP_STATUS_CARD_DETECT); if (mmc->caps2 & MMC_CAP2_CD_ACTIVE_HIGH) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 6b7b755..9656726 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -29,6 +29,7 @@ #include <linux/timer.h> #include <linux/clk.h> #include <linux/of.h> +#include <linux/of_irq.h> #include <linux/of_gpio.h> #include <linux/of_device.h> #include <linux/omap-dmaengine.h> @@ -36,6 +37,7 @@ #include <linux/mmc/core.h> #include <linux/mmc/mmc.h> #include <linux/io.h> +#include <linux/irq.h> #include <linux/gpio.h> #include <linux/regulator/consumer.h> #include <linux/pinctrl/consumer.h> @@ -54,6 +56,7 @@ #define OMAP_HSMMC_RSP54 0x0118 #define OMAP_HSMMC_RSP76 0x011C #define OMAP_HSMMC_DATA 0x0120 +#define OMAP_HSMMC_PSTATE 0x0124 #define OMAP_HSMMC_HCTL 0x0128 #define OMAP_HSMMC_SYSCTL 0x012C #define OMAP_HSMMC_STAT 0x0130 @@ -91,7 +94,10 @@ #define BCE (1 << 1) #define FOUR_BIT (1 << 1) #define HSPE (1 << 2) +#define IWE (1 << 24) #define DDR (1 << 19) +#define CLKEXTFREE (1 << 16) +#define CTPL (1 << 11) #define DW8 (1 << 5) #define OD 0x1 #define STAT_CLEAR 0xFFFFFFFF @@ -101,11 +107,15 @@ #define SRD (1 << 26) #define SOFTRESET (1 << 1) +/* PSTATE */ +#define DLEV_DAT(x) (1 << (20 + (x))) + /* Interrupt masks for IE and ISE register */ #define CC_EN (1 << 0) #define TC_EN (1 << 1) #define BWR_EN (1 << 4) #define BRR_EN (1 << 5) +#define CIRQ_EN (1 << 8) #define ERR_EN (1 << 15) #define CTO_EN (1 << 16) #define CCRC_EN (1 << 17) @@ -140,7 +150,6 @@ #define VDD_3V0 3000000 /* 300000 uV */ #define VDD_165_195 (ffs(MMC_VDD_165_195) - 1) -#define AUTO_CMD23 (1 << 1) /* Auto CMD23 support */ /* * One controller can have multiple slots, like on some omap boards using * omap.c controller driver. Luckily this is not currently done on any known @@ -194,6 +203,7 @@ struct omap_hsmmc_host { u32 sysctl; u32 capa; int irq; + int wake_irq; int use_dma, dma_ch; struct dma_chan *tx_chan; struct dma_chan *rx_chan; @@ -206,6 +216,9 @@ struct omap_hsmmc_host { int req_in_progress; unsigned long clk_rate; unsigned int flags; +#define AUTO_CMD23 (1 << 0) /* Auto CMD23 support */ +#define HSMMC_SDIO_IRQ_ENABLED (1 << 1) /* SDIO irq enabled */ +#define HSMMC_WAKE_IRQ_ENABLED (1 << 2) struct omap_hsmmc_next next_data; struct omap_mmc_platform_data *pdata; }; @@ -510,27 +523,40 @@ static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host) static void omap_hsmmc_enable_irq(struct omap_hsmmc_host *host, struct mmc_command *cmd) { - unsigned int irq_mask; + u32 irq_mask = INT_EN_MASK; + unsigned long flags; if (host->use_dma) - irq_mask = INT_EN_MASK & ~(BRR_EN | BWR_EN); - else - irq_mask = INT_EN_MASK; + irq_mask &= ~(BRR_EN | BWR_EN); /* Disable timeout for erases */ if (cmd->opcode == MMC_ERASE) irq_mask &= ~DTO_EN; + spin_lock_irqsave(&host->irq_lock, flags); OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); OMAP_HSMMC_WRITE(host->base, ISE, irq_mask); + + /* latch pending CIRQ, but don't signal MMC core */ + if (host->flags & HSMMC_SDIO_IRQ_ENABLED) + irq_mask |= CIRQ_EN; OMAP_HSMMC_WRITE(host->base, IE, irq_mask); + spin_unlock_irqrestore(&host->irq_lock, flags); } static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host) { - OMAP_HSMMC_WRITE(host->base, ISE, 0); - OMAP_HSMMC_WRITE(host->base, IE, 0); + u32 irq_mask = 0; + unsigned long flags; + + spin_lock_irqsave(&host->irq_lock, flags); + /* no transfer running but need to keep cirq if enabled */ + if (host->flags & HSMMC_SDIO_IRQ_ENABLED) + irq_mask |= CIRQ_EN; + OMAP_HSMMC_WRITE(host->base, ISE, irq_mask); + OMAP_HSMMC_WRITE(host->base, IE, irq_mask); OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); + spin_unlock_irqrestore(&host->irq_lock, flags); } /* Calculate divisor for the given clock frequency */ @@ -667,6 +693,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host) capa = VS18; } + if (host->mmc->caps & MMC_CAP_SDIO_IRQ) + hctl |= IWE; + OMAP_HSMMC_WRITE(host->base, HCTL, OMAP_HSMMC_READ(host->base, HCTL) | hctl); @@ -681,7 +710,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host) && time_before(jiffies, timeout)) ; - omap_hsmmc_disable_irq(host); + OMAP_HSMMC_WRITE(host->base, ISE, 0); + OMAP_HSMMC_WRITE(host->base, IE, 0); + OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); /* Do not initialize card-specific things if the power is off */ if (host->power_mode == MMC_POWER_OFF) @@ -1118,8 +1149,12 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id) int status; status = OMAP_HSMMC_READ(host->base, STAT); - while (status & INT_EN_MASK && host->req_in_progress) { - omap_hsmmc_do_irq(host, status); + while (status & (INT_EN_MASK | CIRQ_EN)) { + if (host->req_in_progress) + omap_hsmmc_do_irq(host, status); + + if (status & CIRQ_EN) + mmc_signal_sdio_irq(host->mmc); /* Flush posted write */ status = OMAP_HSMMC_READ(host->base, STAT); @@ -1128,6 +1163,22 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id) return IRQ_HANDLED; } +static irqreturn_t omap_hsmmc_wake_irq(int irq, void *dev_id) +{ + struct omap_hsmmc_host *host = dev_id; + + /* cirq is level triggered, disable to avoid infinite loop */ + spin_lock(&host->irq_lock); + if (host->flags & HSMMC_WAKE_IRQ_ENABLED) { + disable_irq_nosync(host->wake_irq); + host->flags &= ~HSMMC_WAKE_IRQ_ENABLED; + } + spin_unlock(&host->irq_lock); + pm_request_resume(host->dev); /* no use counter */ + + return IRQ_HANDLED; +} + static void set_sd_bus_power(struct omap_hsmmc_host *host) { unsigned long i; @@ -1639,6 +1690,103 @@ static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card) mmc_slot(host).init_card(card); } +static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable) +{ + struct omap_hsmmc_host *host = mmc_priv(mmc); + u32 irq_mask, con; + unsigned long flags; + + spin_lock_irqsave(&host->irq_lock, flags); + + con = OMAP_HSMMC_READ(host->base, CON); + irq_mask = OMAP_HSMMC_READ(host->base, ISE); + if (enable) { + host->flags |= HSMMC_SDIO_IRQ_ENABLED; + irq_mask |= CIRQ_EN; + con |= CTPL | CLKEXTFREE; + } else { + host->flags &= ~HSMMC_SDIO_IRQ_ENABLED; + irq_mask &= ~CIRQ_EN; + con &= ~(CTPL | CLKEXTFREE); + } + OMAP_HSMMC_WRITE(host->base, CON, con); + OMAP_HSMMC_WRITE(host->base, IE, irq_mask); + + /* + * if enable, piggy back detection on current request + * but always disable immediately + */ + if (!host->req_in_progress || !enable) + OMAP_HSMMC_WRITE(host->base, ISE, irq_mask); + + /* flush posted write */ + OMAP_HSMMC_READ(host->base, IE); + + spin_unlock_irqrestore(&host->irq_lock, flags); +} + +static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host) +{ + struct mmc_host *mmc = host->mmc; + int ret; + + /* + * For omaps with wake-up path, wakeirq will be irq from pinctrl and + * for other omaps, wakeirq will be from GPIO (dat line remuxed to + * gpio). wakeirq is needed to detect sdio irq in runtime suspend state + * with functional clock disabled. + */ + if (!host->dev->of_node || !host->wake_irq) + return -ENODEV; + + /* Prevent auto-enabling of IRQ */ + irq_set_status_flags(host->wake_irq, IRQ_NOAUTOEN); + ret = devm_request_irq(host->dev, host->wake_irq, omap_hsmmc_wake_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + mmc_hostname(mmc), host); + if (ret) { + dev_err(mmc_dev(host->mmc), "Unable to request wake IRQ\n"); + goto err; + } + + /* + * Some omaps don't have wake-up path from deeper idle states + * and need to remux SDIO DAT1 to GPIO for wake-up from idle. + */ + if (host->pdata->controller_flags & OMAP_HSMMC_SWAKEUP_MISSING) { + struct pinctrl *p = devm_pinctrl_get(host->dev); + if (!p) { + ret = -ENODEV; + goto err_free_irq; + } + if (IS_ERR(pinctrl_lookup_state(p, PINCTRL_STATE_DEFAULT))) { + dev_info(host->dev, "missing default pinctrl state\n"); + devm_pinctrl_put(p); + ret = -EINVAL; + goto err_free_irq; + } + + if (IS_ERR(pinctrl_lookup_state(p, PINCTRL_STATE_IDLE))) { + dev_info(host->dev, "missing idle pinctrl state\n"); + devm_pinctrl_put(p); + ret = -EINVAL; + goto err_free_irq; + } + devm_pinctrl_put(p); + } + + OMAP_HSMMC_WRITE(host->base, HCTL, + OMAP_HSMMC_READ(host->base, HCTL) | IWE); + return 0; + +err_free_irq: + devm_free_irq(host->dev, host->wake_irq, host); +err: + dev_warn(host->dev, "no SDIO IRQ support, falling back to polling\n"); + host->wake_irq = 0; + return ret; +} + static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host) { u32 hctl, capa, value; @@ -1691,7 +1839,7 @@ static const struct mmc_host_ops omap_hsmmc_ops = { .get_cd = omap_hsmmc_get_cd, .get_ro = omap_hsmmc_get_ro, .init_card = omap_hsmmc_init_card, - /* NYET -- enable_sdio_irq */ + .enable_sdio_irq = omap_hsmmc_enable_sdio_irq, }; #ifdef CONFIG_DEBUG_FS @@ -1701,13 +1849,23 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data) struct mmc_host *mmc = s->private; struct omap_hsmmc_host *host = mmc_priv(mmc); - seq_printf(s, "mmc%d:\n ctx_loss:\t%d\n\nregs:\n", - mmc->index, host->context_loss); + seq_printf(s, "mmc%d:\n", mmc->index); + seq_printf(s, "sdio irq mode\t%s\n", + (mmc->caps & MMC_CAP_SDIO_IRQ) ? "interrupt" : "polling"); - pm_runtime_get_sync(host->dev); + if (mmc->caps & MMC_CAP_SDIO_IRQ) { + seq_printf(s, "sdio irq \t%s\n", + (host->flags & HSMMC_SDIO_IRQ_ENABLED) ? "enabled" + : "disabled"); + } + seq_printf(s, "ctx_loss:\t%d\n", host->context_loss); + pm_runtime_get_sync(host->dev); + seq_puts(s, "\nregs:\n"); seq_printf(s, "CON:\t\t0x%08x\n", OMAP_HSMMC_READ(host->base, CON)); + seq_printf(s, "PSTATE:\t\t0x%08x\n", + OMAP_HSMMC_READ(host->base, PSTATE)); seq_printf(s, "HCTL:\t\t0x%08x\n", OMAP_HSMMC_READ(host->base, HCTL)); seq_printf(s, "SYSCTL:\t\t0x%08x\n", @@ -1761,6 +1919,10 @@ static const struct omap_mmc_of_data omap3_pre_es3_mmc_of_data = { static const struct omap_mmc_of_data omap4_mmc_of_data = { .reg_offset = 0x100, }; +static const struct omap_mmc_of_data am33xx_mmc_of_data = { + .reg_offset = 0x100, + .controller_flags = OMAP_HSMMC_SWAKEUP_MISSING, +}; static const struct of_device_id omap_mmc_of_match[] = { { @@ -1777,6 +1939,10 @@ static const struct of_device_id omap_mmc_of_match[] = { .compatible = "ti,omap4-hsmmc", .data = &omap4_mmc_of_data, }, + { + .compatible = "ti,am33xx-hsmmc", + .data = &am33xx_mmc_of_data, + }, {}, }; MODULE_DEVICE_TABLE(of, omap_mmc_of_match); @@ -1850,7 +2016,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev) const struct of_device_id *match; dma_cap_mask_t mask; unsigned tx_req, rx_req; - struct pinctrl *pinctrl; const struct omap_mmc_of_data *data; void __iomem *base; @@ -1913,6 +2078,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, host); + if (pdev->dev.of_node) + host->wake_irq = irq_of_parse_and_map(pdev->dev.of_node, 1); + mmc->ops = &omap_hsmmc_ops; mmc->f_min = OMAP_MMC_MIN_CLOCK; @@ -2061,10 +2229,17 @@ static int omap_hsmmc_probe(struct platform_device *pdev) omap_hsmmc_disable_irq(host); - pinctrl = devm_pinctrl_get_select_default(&pdev->dev); - if (IS_ERR(pinctrl)) - dev_warn(&pdev->dev, - "pins are not configured from the driver\n"); + /* + * For now, only support SDIO interrupt if we have a separate + * wake-up interrupt configured from device tree. This is because + * the wake-up interrupt is needed for idle state and some + * platforms need special quirks. And we don't want to add new + * legacy mux platform init code callbacks any longer as we + * are moving to DT based booting anyways. + */ + ret = omap_hsmmc_configure_wake_irq(host); + if (!ret) + mmc->caps |= MMC_CAP_SDIO_IRQ; omap_hsmmc_protect_card(host); @@ -2170,11 +2345,18 @@ static int omap_hsmmc_suspend(struct device *dev) pm_runtime_get_sync(host->dev); if (!(host->mmc->pm_flags & MMC_PM_KEEP_POWER)) { - omap_hsmmc_disable_irq(host); + OMAP_HSMMC_WRITE(host->base, ISE, 0); + OMAP_HSMMC_WRITE(host->base, IE, 0); + OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); OMAP_HSMMC_WRITE(host->base, HCTL, OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP); } + /* do not wake up due to sdio irq */ + if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && + !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ)) + disable_irq(host->wake_irq); + if (host->dbclk) clk_disable_unprepare(host->dbclk); @@ -2200,6 +2382,10 @@ static int omap_hsmmc_resume(struct device *dev) omap_hsmmc_protect_card(host); + if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && + !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ)) + enable_irq(host->wake_irq); + pm_runtime_mark_last_busy(host->dev); pm_runtime_put_autosuspend(host->dev); return 0; @@ -2215,22 +2401,77 @@ static int omap_hsmmc_resume(struct device *dev) static int omap_hsmmc_runtime_suspend(struct device *dev) { struct omap_hsmmc_host *host; + unsigned long flags; + int ret = 0; host = platform_get_drvdata(to_platform_device(dev)); omap_hsmmc_context_save(host); dev_dbg(dev, "disabled\n"); - return 0; + spin_lock_irqsave(&host->irq_lock, flags); + if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && + (host->flags & HSMMC_SDIO_IRQ_ENABLED)) { + /* disable sdio irq handling to prevent race */ + OMAP_HSMMC_WRITE(host->base, ISE, 0); + OMAP_HSMMC_WRITE(host->base, IE, 0); + + if (!(OMAP_HSMMC_READ(host->base, PSTATE) & DLEV_DAT(1))) { + /* + * dat1 line low, pending sdio irq + * race condition: possible irq handler running on + * multi-core, abort + */ + dev_dbg(dev, "pending sdio irq, abort suspend\n"); + OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); + OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN); + OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN); + pm_runtime_mark_last_busy(dev); + ret = -EBUSY; + goto abort; + } + + pinctrl_pm_select_idle_state(dev); + + WARN_ON(host->flags & HSMMC_WAKE_IRQ_ENABLED); + enable_irq(host->wake_irq); + host->flags |= HSMMC_WAKE_IRQ_ENABLED; + } else { + pinctrl_pm_select_idle_state(dev); + } + +abort: + spin_unlock_irqrestore(&host->irq_lock, flags); + return ret; } static int omap_hsmmc_runtime_resume(struct device *dev) { struct omap_hsmmc_host *host; + unsigned long flags; host = platform_get_drvdata(to_platform_device(dev)); omap_hsmmc_context_restore(host); dev_dbg(dev, "enabled\n"); + spin_lock_irqsave(&host->irq_lock, flags); + if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) && + (host->flags & HSMMC_SDIO_IRQ_ENABLED)) { + /* sdio irq flag can't change while in runtime suspend */ + if (host->flags & HSMMC_WAKE_IRQ_ENABLED) { + disable_irq_nosync(host->wake_irq); + host->flags &= ~HSMMC_WAKE_IRQ_ENABLED; + } + + pinctrl_pm_select_default_state(host->dev); + + /* irq lost, if pinmux incorrect */ + OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); + OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN); + OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN); + } else { + pinctrl_pm_select_default_state(host->dev); + } + spin_unlock_irqrestore(&host->irq_lock, flags); return 0; } diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index f237826..e5516a2 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -12,6 +12,7 @@ */ #include <linux/module.h> +#include <linux/dmaengine.h> #include <linux/dma-mapping.h> #include <linux/clk.h> #include <linux/mmc/host.h> @@ -27,6 +28,7 @@ #include <mach/dma.h> #include <mach/gpio-samsung.h> +#include <linux/platform_data/dma-s3c24xx.h> #include <linux/platform_data/mmc-s3cmci.h> #include "s3cmci.h" @@ -140,10 +142,6 @@ static const int dbgmap_debug = dbg_err | dbg_debug; dev_dbg(&host->pdev->dev, args); \ } while (0) -static struct s3c2410_dma_client s3cmci_dma_client = { - .name = "s3c-mci", -}; - static void finalize_request(struct s3cmci_host *host); static void s3cmci_send_request(struct mmc_host *mmc); static void s3cmci_reset(struct s3cmci_host *host); @@ -256,25 +254,8 @@ static inline bool s3cmci_host_usedma(struct s3cmci_host *host) { #ifdef CONFIG_MMC_S3C_PIO return false; -#elif defined(CONFIG_MMC_S3C_DMA) +#else /* CONFIG_MMC_S3C_DMA */ return true; -#else - return host->dodma; -#endif -} - -/** - * s3cmci_host_canpio - return true if host has pio code available - * - * Return true if the driver has been compiled with the PIO support code - * available. - */ -static inline bool s3cmci_host_canpio(void) -{ -#ifdef CONFIG_MMC_S3C_PIO - return true; -#else - return false; #endif } @@ -841,60 +822,24 @@ static irqreturn_t s3cmci_irq_cd(int irq, void *dev_id) return IRQ_HANDLED; } -static void s3cmci_dma_done_callback(struct s3c2410_dma_chan *dma_ch, - void *buf_id, int size, - enum s3c2410_dma_buffresult result) +static void s3cmci_dma_done_callback(void *arg) { - struct s3cmci_host *host = buf_id; + struct s3cmci_host *host = arg; unsigned long iflags; - u32 mci_csta, mci_dsta, mci_fsta, mci_dcnt; - - mci_csta = readl(host->base + S3C2410_SDICMDSTAT); - mci_dsta = readl(host->base + S3C2410_SDIDSTA); - mci_fsta = readl(host->base + S3C2410_SDIFSTA); - mci_dcnt = readl(host->base + S3C2410_SDIDCNT); BUG_ON(!host->mrq); BUG_ON(!host->mrq->data); - BUG_ON(!host->dmatogo); spin_lock_irqsave(&host->complete_lock, iflags); - if (result != S3C2410_RES_OK) { - dbg(host, dbg_fail, "DMA FAILED: csta=0x%08x dsta=0x%08x " - "fsta=0x%08x dcnt:0x%08x result:0x%08x toGo:%u\n", - mci_csta, mci_dsta, mci_fsta, - mci_dcnt, result, host->dmatogo); - - goto fail_request; - } - - host->dmatogo--; - if (host->dmatogo) { - dbg(host, dbg_dma, "DMA DONE Size:%i DSTA:[%08x] " - "DCNT:[%08x] toGo:%u\n", - size, mci_dsta, mci_dcnt, host->dmatogo); - - goto out; - } - - dbg(host, dbg_dma, "DMA FINISHED Size:%i DSTA:%08x DCNT:%08x\n", - size, mci_dsta, mci_dcnt); + dbg(host, dbg_dma, "DMA FINISHED\n"); host->dma_complete = 1; host->complete_what = COMPLETION_FINALIZE; -out: tasklet_schedule(&host->pio_tasklet); spin_unlock_irqrestore(&host->complete_lock, iflags); - return; -fail_request: - host->mrq->data->error = -EINVAL; - host->complete_what = COMPLETION_FINALIZE; - clear_imask(host); - - goto out; } static void finalize_request(struct s3cmci_host *host) @@ -966,7 +911,7 @@ static void finalize_request(struct s3cmci_host *host) * DMA channel and the fifo to clear out any garbage. */ if (mrq->data->error != 0) { if (s3cmci_host_usedma(host)) - s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH); + dmaengine_terminate_all(host->dma); if (host->is2440) { /* Clear failure register and reset fifo. */ @@ -992,29 +937,6 @@ request_done: mmc_request_done(host->mmc, mrq); } -static void s3cmci_dma_setup(struct s3cmci_host *host, - enum dma_data_direction source) -{ - static enum dma_data_direction last_source = -1; - static int setup_ok; - - if (last_source == source) - return; - - last_source = source; - - s3c2410_dma_devconfig(host->dma, source, - host->mem->start + host->sdidata); - - if (!setup_ok) { - s3c2410_dma_config(host->dma, 4); - s3c2410_dma_set_buffdone_fn(host->dma, - s3cmci_dma_done_callback); - s3c2410_dma_setflags(host->dma, S3C2410_DMAF_AUTOSTART); - setup_ok = 1; - } -} - static void s3cmci_send_command(struct s3cmci_host *host, struct mmc_command *cmd) { @@ -1162,43 +1084,45 @@ static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data) static int s3cmci_prepare_dma(struct s3cmci_host *host, struct mmc_data *data) { - int dma_len, i; int rw = data->flags & MMC_DATA_WRITE; + struct dma_async_tx_descriptor *desc; + struct dma_slave_config conf = { + .src_addr = host->mem->start + host->sdidata, + .dst_addr = host->mem->start + host->sdidata, + .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + .dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + }; BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR); - s3cmci_dma_setup(host, rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH); - - dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, - rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - - if (dma_len == 0) - return -ENOMEM; - - host->dma_complete = 0; - host->dmatogo = dma_len; - - for (i = 0; i < dma_len; i++) { - int res; - - dbg(host, dbg_dma, "enqueue %i: %08x@%u\n", i, - sg_dma_address(&data->sg[i]), - sg_dma_len(&data->sg[i])); + /* Restore prescaler value */ + writel(host->prescaler, host->base + S3C2410_SDIPRE); - res = s3c2410_dma_enqueue(host->dma, host, - sg_dma_address(&data->sg[i]), - sg_dma_len(&data->sg[i])); + if (!rw) + conf.direction = DMA_DEV_TO_MEM; + else + conf.direction = DMA_MEM_TO_DEV; - if (res) { - s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH); - return -EBUSY; - } - } + dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE); - s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_START); + dmaengine_slave_config(host->dma, &conf); + desc = dmaengine_prep_slave_sg(host->dma, data->sg, data->sg_len, + conf.direction, + DMA_CTRL_ACK | DMA_PREP_INTERRUPT); + if (!desc) + goto unmap_exit; + desc->callback = s3cmci_dma_done_callback; + desc->callback_param = host; + dmaengine_submit(desc); + dma_async_issue_pending(host->dma); return 0; + +unmap_exit: + dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, + rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + return -ENOMEM; } static void s3cmci_send_request(struct mmc_host *mmc) @@ -1676,10 +1600,6 @@ static int s3cmci_probe(struct platform_device *pdev) host->complete_what = COMPLETION_NONE; host->pio_active = XFER_NONE; -#ifdef CONFIG_MMC_S3C_PIODMA - host->dodma = host->pdata->use_dma; -#endif - host->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!host->mem) { dev_err(&pdev->dev, @@ -1765,17 +1685,17 @@ static int s3cmci_probe(struct platform_device *pdev) /* depending on the dma state, get a dma channel to use. */ if (s3cmci_host_usedma(host)) { - host->dma = s3c2410_dma_request(DMACH_SDI, &s3cmci_dma_client, - host); - if (host->dma < 0) { + dma_cap_mask_t mask; + + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + host->dma = dma_request_slave_channel_compat(mask, + s3c24xx_dma_filter, (void *)DMACH_SDI, &pdev->dev, "rx-tx"); + if (!host->dma) { dev_err(&pdev->dev, "cannot get DMA channel.\n"); - if (!s3cmci_host_canpio()) { - ret = -EBUSY; - goto probe_free_gpio_wp; - } else { - dev_warn(&pdev->dev, "falling back to PIO.\n"); - host->dodma = 0; - } + ret = -EBUSY; + goto probe_free_gpio_wp; } } @@ -1787,7 +1707,7 @@ static int s3cmci_probe(struct platform_device *pdev) goto probe_free_dma; } - ret = clk_enable(host->clk); + ret = clk_prepare_enable(host->clk); if (ret) { dev_err(&pdev->dev, "failed to enable clock source.\n"); goto clk_free; @@ -1816,7 +1736,7 @@ static int s3cmci_probe(struct platform_device *pdev) mmc->max_segs = 128; dbg(host, dbg_debug, - "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%u.\n", + "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%p.\n", (host->is2440?"2440":""), host->base, host->irq, host->irq_cd, host->dma); @@ -1845,14 +1765,14 @@ static int s3cmci_probe(struct platform_device *pdev) s3cmci_cpufreq_deregister(host); free_dmabuf: - clk_disable(host->clk); + clk_disable_unprepare(host->clk); clk_free: clk_put(host->clk); probe_free_dma: if (s3cmci_host_usedma(host)) - s3c2410_dma_free(host->dma, &s3cmci_dma_client); + dma_release_channel(host->dma); probe_free_gpio_wp: if (!host->pdata->no_wprotect) @@ -1897,7 +1817,7 @@ static void s3cmci_shutdown(struct platform_device *pdev) s3cmci_debugfs_remove(host); s3cmci_cpufreq_deregister(host); mmc_remove_host(mmc); - clk_disable(host->clk); + clk_disable_unprepare(host->clk); } static int s3cmci_remove(struct platform_device *pdev) @@ -1914,7 +1834,7 @@ static int s3cmci_remove(struct platform_device *pdev) tasklet_disable(&host->pio_tasklet); if (s3cmci_host_usedma(host)) - s3c2410_dma_free(host->dma, &s3cmci_dma_client); + dma_release_channel(host->dma); free_irq(host->irq, host); diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h index c76b53d..cc2e46c 100644 --- a/drivers/mmc/host/s3cmci.h +++ b/drivers/mmc/host/s3cmci.h @@ -26,7 +26,7 @@ struct s3cmci_host { void __iomem *base; int irq; int irq_cd; - int dma; + struct dma_chan *dma; unsigned long clk_rate; unsigned long clk_div; @@ -36,8 +36,6 @@ struct s3cmci_host { int is2440; unsigned sdiimsk; unsigned sdidata; - int dodma; - int dmatogo; bool irq_disabled; bool irq_enabled; diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 8ce3c28..8c53370 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -124,9 +124,11 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = { static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = { .chip = &sdhci_acpi_chip_int, - .caps = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE | MMC_CAP_HW_RESET, + .caps = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE | + MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR, .caps2 = MMC_CAP2_HC_ERASE_SZ, .flags = SDHCI_ACPI_RUNTIME_PM, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, }; static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sdio = { diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 40573a5..1a6661e 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -16,7 +16,6 @@ #include <linux/module.h> #include <linux/of_device.h> -#include <linux/regulator/consumer.h> #include <linux/delay.h> #include <linux/mmc/mmc.h> #include <linux/slab.h> diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c index 52c42fc..c3a1deb 100644 --- a/drivers/mmc/host/sdhci-pci.c +++ b/drivers/mmc/host/sdhci-pci.c @@ -103,6 +103,10 @@ static const struct sdhci_pci_fixes sdhci_cafe = { SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, }; +static const struct sdhci_pci_fixes sdhci_intel_qrk = { + .quirks = SDHCI_QUIRK_NO_HISPD_BIT, +}; + static int mrst_hc_probe_slot(struct sdhci_pci_slot *slot) { slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA; @@ -264,7 +268,7 @@ static void sdhci_pci_int_hw_reset(struct sdhci_host *host) static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot) { slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE | - MMC_CAP_HW_RESET; + MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR; slot->host->mmc->caps2 |= MMC_CAP2_HC_ERASE_SZ; slot->hw_reset = sdhci_pci_int_hw_reset; return 0; @@ -279,6 +283,7 @@ static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot) static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = { .allow_runtime_pm = true, .probe_slot = byt_emmc_probe_slot, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, }; static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = { @@ -753,6 +758,14 @@ static const struct pci_device_id pci_ids[] = { { .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_QRK_SD, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .driver_data = (kernel_ulong_t)&sdhci_intel_qrk, + }, + + { + .vendor = PCI_VENDOR_ID_INTEL, .device = PCI_DEVICE_ID_INTEL_MRST_SD0, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, @@ -1130,18 +1143,13 @@ static int sdhci_pci_suspend(struct device *dev) goto err_pci_suspend; } - pci_save_state(pdev); if (pm_flags & MMC_PM_KEEP_POWER) { - if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) { - pci_pme_active(pdev, true); - pci_enable_wake(pdev, PCI_D3hot, 1); - } - pci_set_power_state(pdev, PCI_D3hot); - } else { - pci_enable_wake(pdev, PCI_D3hot, 0); - pci_disable_device(pdev); - pci_set_power_state(pdev, PCI_D3hot); - } + if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) + device_init_wakeup(dev, true); + else + device_init_wakeup(dev, false); + } else + device_init_wakeup(dev, false); return 0; @@ -1162,12 +1170,6 @@ static int sdhci_pci_resume(struct device *dev) if (!chip) return 0; - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - ret = pci_enable_device(pdev); - if (ret) - return ret; - if (chip->fixes && chip->fixes->resume) { ret = chip->fixes->resume(chip); if (ret) diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h index 6d71871..c101477 100644 --- a/drivers/mmc/host/sdhci-pci.h +++ b/drivers/mmc/host/sdhci-pci.h @@ -17,6 +17,7 @@ #define PCI_DEVICE_ID_INTEL_CLV_SDIO2 0x08fb #define PCI_DEVICE_ID_INTEL_CLV_EMMC0 0x08e5 #define PCI_DEVICE_ID_INTEL_CLV_EMMC1 0x08e6 +#define PCI_DEVICE_ID_INTEL_QRK_SD 0x08A7 /* * PCI registers diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c index f4f1289..6f842fb 100644 --- a/drivers/mmc/host/sdhci-pxav3.c +++ b/drivers/mmc/host/sdhci-pxav3.c @@ -288,15 +288,13 @@ static int sdhci_pxav3_probe(struct platform_device *pdev) int ret; struct clk *clk; - pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL); + pxa = devm_kzalloc(&pdev->dev, sizeof(struct sdhci_pxa), GFP_KERNEL); if (!pxa) return -ENOMEM; host = sdhci_pltfm_init(pdev, &sdhci_pxav3_pdata, 0); - if (IS_ERR(host)) { - kfree(pxa); + if (IS_ERR(host)) return PTR_ERR(host); - } if (of_device_is_compatible(np, "marvell,armada-380-sdhci")) { ret = mv_conf_mbus_windows(pdev, mv_mbus_dram_info()); @@ -308,7 +306,7 @@ static int sdhci_pxav3_probe(struct platform_device *pdev) pltfm_host = sdhci_priv(host); pltfm_host->priv = pxa; - clk = clk_get(dev, NULL); + clk = devm_clk_get(dev, NULL); if (IS_ERR(clk)) { dev_err(dev, "failed to get io clock\n"); ret = PTR_ERR(clk); @@ -389,11 +387,9 @@ err_add_host: pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); clk_disable_unprepare(clk); - clk_put(clk); err_clk_get: err_mbus_win: sdhci_pltfm_free(pdev); - kfree(pxa); return ret; } @@ -401,17 +397,14 @@ static int sdhci_pxav3_remove(struct platform_device *pdev) { struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct sdhci_pxa *pxa = pltfm_host->priv; pm_runtime_get_sync(&pdev->dev); sdhci_remove_host(host, 1); pm_runtime_disable(&pdev->dev); clk_disable_unprepare(pltfm_host->clk); - clk_put(pltfm_host->clk); sdhci_pltfm_free(pdev); - kfree(pxa); return 0; } diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c new file mode 100644 index 0000000..328f348 --- /dev/null +++ b/drivers/mmc/host/sdhci-st.c @@ -0,0 +1,176 @@ +/* + * Support for SDHCI on STMicroelectronics SoCs + * + * Copyright (C) 2014 STMicroelectronics Ltd + * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> + * Contributors: Peter Griffin <peter.griffin@linaro.org> + * + * Based on sdhci-cns3xxx.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/io.h> +#include <linux/of.h> +#include <linux/module.h> +#include <linux/err.h> +#include <linux/mmc/host.h> + +#include "sdhci-pltfm.h" + +static u32 sdhci_st_readl(struct sdhci_host *host, int reg) +{ + u32 ret; + + switch (reg) { + case SDHCI_CAPABILITIES: + ret = readl_relaxed(host->ioaddr + reg); + /* Support 3.3V and 1.8V */ + ret &= ~SDHCI_CAN_VDD_300; + break; + default: + ret = readl_relaxed(host->ioaddr + reg); + } + return ret; +} + +static const struct sdhci_ops sdhci_st_ops = { + .get_max_clock = sdhci_pltfm_clk_get_max_clock, + .set_clock = sdhci_set_clock, + .set_bus_width = sdhci_set_bus_width, + .read_l = sdhci_st_readl, + .reset = sdhci_reset, +}; + +static const struct sdhci_pltfm_data sdhci_st_pdata = { + .ops = &sdhci_st_ops, + .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC | + SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN, +}; + + +static int sdhci_st_probe(struct platform_device *pdev) +{ + struct sdhci_host *host; + struct sdhci_pltfm_host *pltfm_host; + struct clk *clk; + int ret = 0; + u16 host_version; + + clk = devm_clk_get(&pdev->dev, "mmc"); + if (IS_ERR(clk)) { + dev_err(&pdev->dev, "Peripheral clk not found\n"); + return PTR_ERR(clk); + } + + host = sdhci_pltfm_init(pdev, &sdhci_st_pdata, 0); + if (IS_ERR(host)) { + dev_err(&pdev->dev, "Failed sdhci_pltfm_init\n"); + return PTR_ERR(host); + } + + ret = mmc_of_parse(host->mmc); + + if (ret) { + dev_err(&pdev->dev, "Failed mmc_of_parse\n"); + return ret; + } + + clk_prepare_enable(clk); + + pltfm_host = sdhci_priv(host); + pltfm_host->clk = clk; + + ret = sdhci_add_host(host); + if (ret) { + dev_err(&pdev->dev, "Failed sdhci_add_host\n"); + goto err_out; + } + + platform_set_drvdata(pdev, host); + + host_version = readw_relaxed((host->ioaddr + SDHCI_HOST_VERSION)); + + dev_info(&pdev->dev, "SDHCI ST Initialised: Host Version: 0x%x Vendor Version 0x%x\n", + ((host_version & SDHCI_SPEC_VER_MASK) >> SDHCI_SPEC_VER_SHIFT), + ((host_version & SDHCI_VENDOR_VER_MASK) >> + SDHCI_VENDOR_VER_SHIFT)); + + return 0; + +err_out: + clk_disable_unprepare(clk); + sdhci_pltfm_free(pdev); + + return ret; +} + +static int sdhci_st_remove(struct platform_device *pdev) +{ + struct sdhci_host *host = platform_get_drvdata(pdev); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + + clk_disable_unprepare(pltfm_host->clk); + + return sdhci_pltfm_unregister(pdev); +} + +#ifdef CONFIG_PM_SLEEP +static int sdhci_st_suspend(struct device *dev) +{ + struct sdhci_host *host = dev_get_drvdata(dev); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + int ret = sdhci_suspend_host(host); + + if (ret) + goto out; + + clk_disable_unprepare(pltfm_host->clk); +out: + return ret; +} + +static int sdhci_st_resume(struct device *dev) +{ + struct sdhci_host *host = dev_get_drvdata(dev); + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + + clk_prepare_enable(pltfm_host->clk); + + return sdhci_resume_host(host); +} +#endif + +static SIMPLE_DEV_PM_OPS(sdhci_st_pmops, sdhci_st_suspend, sdhci_st_resume); + +static const struct of_device_id st_sdhci_match[] = { + { .compatible = "st,sdhci" }, + {}, +}; + +MODULE_DEVICE_TABLE(of, st_sdhci_match); + +static struct platform_driver sdhci_st_driver = { + .probe = sdhci_st_probe, + .remove = sdhci_st_remove, + .driver = { + .name = "sdhci-st", + .pm = &sdhci_st_pmops, + .of_match_table = of_match_ptr(st_sdhci_match), + }, +}; + +module_platform_driver(sdhci_st_driver); + +MODULE_DESCRIPTION("SDHCI driver for STMicroelectronics SoCs"); +MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:st-sdhci"); diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index d93a063..33100d1 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -26,8 +26,6 @@ #include <linux/mmc/host.h> #include <linux/mmc/slot-gpio.h> -#include <asm/gpio.h> - #include "sdhci-pltfm.h" /* Tegra SDHOST controller vendor register definitions */ diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 47055f3..37b2a9a 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1223,8 +1223,16 @@ EXPORT_SYMBOL_GPL(sdhci_set_clock); static void sdhci_set_power(struct sdhci_host *host, unsigned char mode, unsigned short vdd) { + struct mmc_host *mmc = host->mmc; u8 pwr = 0; + if (!IS_ERR(mmc->supply.vmmc)) { + spin_unlock_irq(&host->lock); + mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd); + spin_lock_irq(&host->lock); + return; + } + if (mode != MMC_POWER_OFF) { switch (1 << vdd) { case MMC_VDD_165_195: @@ -1283,12 +1291,6 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode, if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER) mdelay(10); } - - if (host->vmmc) { - spin_unlock_irq(&host->lock); - mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd); - spin_lock_irq(&host->lock); - } } /*****************************************************************************\ @@ -1440,13 +1442,15 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) { unsigned long flags; u8 ctrl; + struct mmc_host *mmc = host->mmc; spin_lock_irqsave(&host->lock, flags); if (host->flags & SDHCI_DEVICE_DEAD) { spin_unlock_irqrestore(&host->lock, flags); - if (host->vmmc && ios->power_mode == MMC_POWER_OFF) - mmc_regulator_set_ocr(host->mmc, host->vmmc, 0); + if (!IS_ERR(mmc->supply.vmmc) && + ios->power_mode == MMC_POWER_OFF) + mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0); return; } @@ -1530,7 +1534,6 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios) host->ops->set_clock(host, host->clock); } - /* Reset SD Clock Enable */ clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); clk &= ~SDHCI_CLOCK_CARD_EN; @@ -1707,6 +1710,7 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable) static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host, struct mmc_ios *ios) { + struct mmc_host *mmc = host->mmc; u16 ctrl; int ret; @@ -1725,11 +1729,12 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host, ctrl &= ~SDHCI_CTRL_VDD_180; sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); - if (host->vqmmc) { - ret = regulator_set_voltage(host->vqmmc, 2700000, 3600000); + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = regulator_set_voltage(mmc->supply.vqmmc, 2700000, + 3600000); if (ret) { pr_warning("%s: Switching to 3.3V signalling voltage " - " failed\n", mmc_hostname(host->mmc)); + " failed\n", mmc_hostname(mmc)); return -EIO; } } @@ -1742,16 +1747,16 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host, return 0; pr_warning("%s: 3.3V regulator output did not became stable\n", - mmc_hostname(host->mmc)); + mmc_hostname(mmc)); return -EAGAIN; case MMC_SIGNAL_VOLTAGE_180: - if (host->vqmmc) { - ret = regulator_set_voltage(host->vqmmc, + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = regulator_set_voltage(mmc->supply.vqmmc, 1700000, 1950000); if (ret) { pr_warning("%s: Switching to 1.8V signalling voltage " - " failed\n", mmc_hostname(host->mmc)); + " failed\n", mmc_hostname(mmc)); return -EIO; } } @@ -1763,24 +1768,22 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host, ctrl |= SDHCI_CTRL_VDD_180; sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); - /* Wait for 5ms */ - usleep_range(5000, 5500); - /* 1.8V regulator output should be stable within 5 ms */ ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); if (ctrl & SDHCI_CTRL_VDD_180) return 0; pr_warning("%s: 1.8V regulator output did not became stable\n", - mmc_hostname(host->mmc)); + mmc_hostname(mmc)); return -EAGAIN; case MMC_SIGNAL_VOLTAGE_120: - if (host->vqmmc) { - ret = regulator_set_voltage(host->vqmmc, 1100000, 1300000); + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = regulator_set_voltage(mmc->supply.vqmmc, 1100000, + 1300000); if (ret) { pr_warning("%s: Switching to 1.2V signalling voltage " - " failed\n", mmc_hostname(host->mmc)); + " failed\n", mmc_hostname(mmc)); return -EIO; } } @@ -2643,7 +2646,6 @@ static void sdhci_runtime_pm_bus_off(struct sdhci_host *host) int sdhci_runtime_suspend_host(struct sdhci_host *host) { unsigned long flags; - int ret = 0; /* Disable tuning since we are suspending */ if (host->flags & SDHCI_USING_RETUNING_TIMER) { @@ -2663,14 +2665,14 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host) host->runtime_suspended = true; spin_unlock_irqrestore(&host->lock, flags); - return ret; + return 0; } EXPORT_SYMBOL_GPL(sdhci_runtime_suspend_host); int sdhci_runtime_resume_host(struct sdhci_host *host) { unsigned long flags; - int ret = 0, host_flags = host->flags; + int host_flags = host->flags; if (host_flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) { if (host->ops->enable_dma) @@ -2709,7 +2711,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host) spin_unlock_irqrestore(&host->lock, flags); - return ret; + return 0; } EXPORT_SYMBOL_GPL(sdhci_runtime_resume_host); @@ -2820,12 +2822,12 @@ int sdhci_add_host(struct sdhci_host *host) * (128) and potentially one alignment transfer for * each of those entries. */ - host->adma_desc = dma_alloc_coherent(mmc_dev(host->mmc), + host->adma_desc = dma_alloc_coherent(mmc_dev(mmc), ADMA_SIZE, &host->adma_addr, GFP_KERNEL); host->align_buffer = kmalloc(128 * 4, GFP_KERNEL); if (!host->adma_desc || !host->align_buffer) { - dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE, + dma_free_coherent(mmc_dev(mmc), ADMA_SIZE, host->adma_desc, host->adma_addr); kfree(host->align_buffer); pr_warning("%s: Unable to allocate ADMA " @@ -2838,7 +2840,7 @@ int sdhci_add_host(struct sdhci_host *host) pr_warning("%s: unable to allocate aligned ADMA descriptor\n", mmc_hostname(mmc)); host->flags &= ~SDHCI_USE_ADMA; - dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE, + dma_free_coherent(mmc_dev(mmc), ADMA_SIZE, host->adma_desc, host->adma_addr); kfree(host->align_buffer); host->adma_desc = NULL; @@ -2853,7 +2855,7 @@ int sdhci_add_host(struct sdhci_host *host) */ if (!(host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA))) { host->dma_mask = DMA_BIT_MASK(64); - mmc_dev(host->mmc)->dma_mask = &host->dma_mask; + mmc_dev(mmc)->dma_mask = &host->dma_mask; } if (host->version >= SDHCI_SPEC_300) @@ -2959,28 +2961,25 @@ int sdhci_add_host(struct sdhci_host *host) mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED; if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) && - !(host->mmc->caps & MMC_CAP_NONREMOVABLE)) + !(mmc->caps & MMC_CAP_NONREMOVABLE)) mmc->caps |= MMC_CAP_NEEDS_POLL; + /* If there are external regulators, get them */ + if (mmc_regulator_get_supply(mmc) == -EPROBE_DEFER) + return -EPROBE_DEFER; + /* If vqmmc regulator and no 1.8V signalling, then there's no UHS */ - host->vqmmc = regulator_get_optional(mmc_dev(mmc), "vqmmc"); - if (IS_ERR_OR_NULL(host->vqmmc)) { - if (PTR_ERR(host->vqmmc) < 0) { - pr_info("%s: no vqmmc regulator found\n", - mmc_hostname(mmc)); - host->vqmmc = NULL; - } - } else { - ret = regulator_enable(host->vqmmc); - if (!regulator_is_supported_voltage(host->vqmmc, 1700000, - 1950000)) + if (!IS_ERR(mmc->supply.vqmmc)) { + ret = regulator_enable(mmc->supply.vqmmc); + if (!regulator_is_supported_voltage(mmc->supply.vqmmc, 1700000, + 1950000)) caps[1] &= ~(SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_DDR50); if (ret) { pr_warn("%s: Failed to enable vqmmc regulator: %d\n", mmc_hostname(mmc), ret); - host->vqmmc = NULL; + mmc->supply.vqmmc = NULL; } } @@ -3041,34 +3040,6 @@ int sdhci_add_host(struct sdhci_host *host) ocr_avail = 0; - host->vmmc = regulator_get_optional(mmc_dev(mmc), "vmmc"); - if (IS_ERR_OR_NULL(host->vmmc)) { - if (PTR_ERR(host->vmmc) < 0) { - pr_info("%s: no vmmc regulator found\n", - mmc_hostname(mmc)); - host->vmmc = NULL; - } - } - -#ifdef CONFIG_REGULATOR - /* - * Voltage range check makes sense only if regulator reports - * any voltage value. - */ - if (host->vmmc && regulator_get_voltage(host->vmmc) > 0) { - ret = regulator_is_supported_voltage(host->vmmc, 2700000, - 3600000); - if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_330))) - caps[0] &= ~SDHCI_CAN_VDD_330; - if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_300))) - caps[0] &= ~SDHCI_CAN_VDD_300; - ret = regulator_is_supported_voltage(host->vmmc, 1700000, - 1950000); - if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_180))) - caps[0] &= ~SDHCI_CAN_VDD_180; - } -#endif /* CONFIG_REGULATOR */ - /* * According to SD Host Controller spec v3.00, if the Host System * can afford more than 150mA, Host Driver should set XPC to 1. Also @@ -3077,8 +3048,8 @@ int sdhci_add_host(struct sdhci_host *host) * value. */ max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT); - if (!max_current_caps && host->vmmc) { - u32 curr = regulator_get_current_limit(host->vmmc); + if (!max_current_caps && !IS_ERR(mmc->supply.vmmc)) { + u32 curr = regulator_get_current_limit(mmc->supply.vmmc); if (curr > 0) { /* convert to SDHCI_MAX_CURRENT format */ @@ -3118,8 +3089,12 @@ int sdhci_add_host(struct sdhci_host *host) SDHCI_MAX_CURRENT_MULTIPLIER; } + /* If OCR set by external regulators, use it instead */ + if (mmc->ocr_avail) + ocr_avail = mmc->ocr_avail; + if (host->ocr_mask) - ocr_avail = host->ocr_mask; + ocr_avail &= host->ocr_mask; mmc->ocr_avail = ocr_avail; mmc->ocr_avail_sdio = ocr_avail; @@ -3273,6 +3248,7 @@ EXPORT_SYMBOL_GPL(sdhci_add_host); void sdhci_remove_host(struct sdhci_host *host, int dead) { + struct mmc_host *mmc = host->mmc; unsigned long flags; if (dead) { @@ -3282,7 +3258,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead) if (host->mrq) { pr_err("%s: Controller removed during " - " transfer!\n", mmc_hostname(host->mmc)); + " transfer!\n", mmc_hostname(mmc)); host->mrq->cmd->error = -ENOMEDIUM; tasklet_schedule(&host->finish_tasklet); @@ -3293,7 +3269,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead) sdhci_disable_card_detection(host); - mmc_remove_host(host->mmc); + mmc_remove_host(mmc); #ifdef SDHCI_USE_LEDS_CLASS led_classdev_unregister(&host->led); @@ -3310,18 +3286,14 @@ void sdhci_remove_host(struct sdhci_host *host, int dead) tasklet_kill(&host->finish_tasklet); - if (host->vmmc) { - regulator_disable(host->vmmc); - regulator_put(host->vmmc); - } + if (!IS_ERR(mmc->supply.vmmc)) + regulator_disable(mmc->supply.vmmc); - if (host->vqmmc) { - regulator_disable(host->vqmmc); - regulator_put(host->vqmmc); - } + if (!IS_ERR(mmc->supply.vqmmc)) + regulator_disable(mmc->supply.vqmmc); if (host->adma_desc) - dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE, + dma_free_coherent(mmc_dev(mmc), ADMA_SIZE, host->adma_desc, host->adma_addr); kfree(host->align_buffer); diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index 656fbba..d11708c 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -386,7 +386,7 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host, struct sh_mmcif_plat_data *pdata, enum dma_transfer_direction direction) { - struct dma_slave_config cfg; + struct dma_slave_config cfg = { 0, }; struct dma_chan *chan; unsigned int slave_id; struct resource *res; @@ -417,8 +417,15 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host, /* In the OF case the driver will get the slave ID from the DT */ cfg.slave_id = slave_id; cfg.direction = direction; - cfg.dst_addr = res->start + MMCIF_CE_DATA; - cfg.src_addr = 0; + + if (direction == DMA_DEV_TO_MEM) { + cfg.src_addr = res->start + MMCIF_CE_DATA; + cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + } else { + cfg.dst_addr = res->start + MMCIF_CE_DATA; + cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + } + ret = dmaengine_slave_config(chan, &cfg); if (ret < 0) { dma_release_channel(chan); @@ -1378,26 +1385,19 @@ static int sh_mmcif_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Get irq error\n"); return -ENXIO; } + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "platform_get_resource error.\n"); - return -ENXIO; - } - reg = ioremap(res->start, resource_size(res)); - if (!reg) { - dev_err(&pdev->dev, "ioremap error.\n"); - return -ENOMEM; - } + reg = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(reg)) + return PTR_ERR(reg); mmc = mmc_alloc_host(sizeof(struct sh_mmcif_host), &pdev->dev); - if (!mmc) { - ret = -ENOMEM; - goto ealloch; - } + if (!mmc) + return -ENOMEM; ret = mmc_of_parse(mmc); if (ret < 0) - goto eofparse; + goto err_host; host = mmc_priv(mmc); host->mmc = mmc; @@ -1427,19 +1427,19 @@ static int sh_mmcif_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); host->power = false; - host->hclk = clk_get(&pdev->dev, NULL); + host->hclk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(host->hclk)) { ret = PTR_ERR(host->hclk); dev_err(&pdev->dev, "cannot get clock: %d\n", ret); - goto eclkget; + goto err_pm; } ret = sh_mmcif_clk_update(host); if (ret < 0) - goto eclkupdate; + goto err_pm; ret = pm_runtime_resume(&pdev->dev); if (ret < 0) - goto eresume; + goto err_clk; INIT_DELAYED_WORK(&host->timeout_work, mmcif_timeout_work); @@ -1447,65 +1447,55 @@ static int sh_mmcif_probe(struct platform_device *pdev) sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL); name = irq[1] < 0 ? dev_name(&pdev->dev) : "sh_mmc:error"; - ret = request_threaded_irq(irq[0], sh_mmcif_intr, sh_mmcif_irqt, 0, name, host); + ret = devm_request_threaded_irq(&pdev->dev, irq[0], sh_mmcif_intr, + sh_mmcif_irqt, 0, name, host); if (ret) { dev_err(&pdev->dev, "request_irq error (%s)\n", name); - goto ereqirq0; + goto err_clk; } if (irq[1] >= 0) { - ret = request_threaded_irq(irq[1], sh_mmcif_intr, sh_mmcif_irqt, - 0, "sh_mmc:int", host); + ret = devm_request_threaded_irq(&pdev->dev, irq[1], + sh_mmcif_intr, sh_mmcif_irqt, + 0, "sh_mmc:int", host); if (ret) { dev_err(&pdev->dev, "request_irq error (sh_mmc:int)\n"); - goto ereqirq1; + goto err_clk; } } if (pd && pd->use_cd_gpio) { ret = mmc_gpio_request_cd(mmc, pd->cd_gpio, 0); if (ret < 0) - goto erqcd; + goto err_clk; } mutex_init(&host->thread_lock); - clk_disable_unprepare(host->hclk); ret = mmc_add_host(mmc); if (ret < 0) - goto emmcaddh; + goto err_clk; dev_pm_qos_expose_latency_limit(&pdev->dev, 100); - dev_info(&pdev->dev, "driver version %s\n", DRIVER_VERSION); - dev_dbg(&pdev->dev, "chip ver H'%04x\n", - sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0x0000ffff); + dev_info(&pdev->dev, "Chip version 0x%04x, clock rate %luMHz\n", + sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0xffff, + clk_get_rate(host->hclk) / 1000000UL); + + clk_disable_unprepare(host->hclk); return ret; -emmcaddh: -erqcd: - if (irq[1] >= 0) - free_irq(irq[1], host); -ereqirq1: - free_irq(irq[0], host); -ereqirq0: - pm_runtime_suspend(&pdev->dev); -eresume: +err_clk: clk_disable_unprepare(host->hclk); -eclkupdate: - clk_put(host->hclk); -eclkget: +err_pm: pm_runtime_disable(&pdev->dev); -eofparse: +err_host: mmc_free_host(mmc); -ealloch: - iounmap(reg); return ret; } static int sh_mmcif_remove(struct platform_device *pdev) { struct sh_mmcif_host *host = platform_get_drvdata(pdev); - int irq[2]; host->dying = true; clk_prepare_enable(host->hclk); @@ -1523,16 +1513,6 @@ static int sh_mmcif_remove(struct platform_device *pdev) */ cancel_delayed_work_sync(&host->timeout_work); - if (host->addr) - iounmap(host->addr); - - irq[0] = platform_get_irq(pdev, 0); - irq[1] = platform_get_irq(pdev, 1); - - free_irq(irq[0], host); - if (irq[1] >= 0) - free_irq(irq[1], host); - clk_disable_unprepare(host->hclk); mmc_free_host(host->mmc); pm_runtime_put_sync(&pdev->dev); diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c index 03e7b28..eb8f1d5 100644 --- a/drivers/mmc/host/tmio_mmc_dma.c +++ b/drivers/mmc/host/tmio_mmc_dma.c @@ -294,6 +294,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat cfg.slave_id = pdata->dma->slave_id_tx; cfg.direction = DMA_MEM_TO_DEV; cfg.dst_addr = res->start + (CTL_SD_DATA_PORT << host->pdata->bus_shift); + cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; cfg.src_addr = 0; ret = dmaengine_slave_config(host->chan_tx, &cfg); if (ret < 0) @@ -312,6 +313,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat cfg.slave_id = pdata->dma->slave_id_rx; cfg.direction = DMA_DEV_TO_MEM; cfg.src_addr = cfg.dst_addr; + cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; cfg.dst_addr = 0; ret = dmaengine_slave_config(host->chan_rx, &cfg); if (ret < 0) diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c index 282891a..54181b4 100644 --- a/drivers/mmc/host/wmt-sdmmc.c +++ b/drivers/mmc/host/wmt-sdmmc.c @@ -72,7 +72,6 @@ #define BM_SPI_CS 0x20 #define BM_SD_POWER 0x40 #define BM_SOFT_RESET 0x80 -#define BM_ONEBIT_MASK 0xFD /* SDMMC_BLKLEN bit fields */ #define BLKL_CRCERR_ABORT 0x0800 @@ -120,6 +119,8 @@ #define STS2_DATARSP_BUSY 0x20 #define STS2_DIS_FORCECLK 0x80 +/* SDMMC_EXTCTRL bit fields */ +#define EXT_EIGHTBIT 0x04 /* MMC/SD DMA Controller Registers */ #define SDDMA_GCR 0x100 @@ -672,7 +673,7 @@ static void wmt_mci_request(struct mmc_host *mmc, struct mmc_request *req) static void wmt_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct wmt_mci_priv *priv; - u32 reg_tmp; + u32 busmode, extctrl; priv = mmc_priv(mmc); @@ -687,28 +688,26 @@ static void wmt_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) if (ios->clock != 0) clk_set_rate(priv->clk_sdmmc, ios->clock); + busmode = readb(priv->sdmmc_base + SDMMC_BUSMODE); + extctrl = readb(priv->sdmmc_base + SDMMC_EXTCTRL); + + busmode &= ~(BM_EIGHTBIT_MODE | BM_FOURBIT_MODE); + extctrl &= ~EXT_EIGHTBIT; + switch (ios->bus_width) { case MMC_BUS_WIDTH_8: - reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL); - writeb(reg_tmp | 0x04, priv->sdmmc_base + SDMMC_EXTCTRL); + busmode |= BM_EIGHTBIT_MODE; + extctrl |= EXT_EIGHTBIT; break; case MMC_BUS_WIDTH_4: - reg_tmp = readb(priv->sdmmc_base + SDMMC_BUSMODE); - writeb(reg_tmp | BM_FOURBIT_MODE, priv->sdmmc_base + - SDMMC_BUSMODE); - - reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL); - writeb(reg_tmp & 0xFB, priv->sdmmc_base + SDMMC_EXTCTRL); + busmode |= BM_FOURBIT_MODE; break; case MMC_BUS_WIDTH_1: - reg_tmp = readb(priv->sdmmc_base + SDMMC_BUSMODE); - writeb(reg_tmp & BM_ONEBIT_MASK, priv->sdmmc_base + - SDMMC_BUSMODE); - - reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL); - writeb(reg_tmp & 0xFB, priv->sdmmc_base + SDMMC_EXTCTRL); break; } + + writeb(busmode, priv->sdmmc_base + SDMMC_BUSMODE); + writeb(extctrl, priv->sdmmc_base + SDMMC_EXTCTRL); } static int wmt_mci_get_ro(struct mmc_host *mmc) @@ -830,7 +829,7 @@ static int wmt_mci_probe(struct platform_device *pdev) goto fail3; } - ret = request_irq(dma_irq, wmt_mci_dma_isr, 32, "sdmmc", priv); + ret = request_irq(dma_irq, wmt_mci_dma_isr, 0, "sdmmc", priv); if (ret) { dev_err(&pdev->dev, "Register DMA IRQ fail\n"); goto fail4; diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 8457df7..33c6495 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -378,9 +378,11 @@ int ubiblock_create(struct ubi_volume_info *vi) { struct ubiblock *dev; struct gendisk *gd; - int disk_capacity; + u64 disk_capacity = ((u64)vi->size * vi->usable_leb_size) >> 9; int ret; + if ((sector_t)disk_capacity != disk_capacity) + return -EFBIG; /* Check that the volume isn't already handled */ mutex_lock(&devices_mutex); if (find_dev_nolock(vi->ubi_num, vi->vol_id)) { @@ -412,7 +414,6 @@ int ubiblock_create(struct ubi_volume_info *vi) gd->first_minor = dev->ubi_num * UBI_MAX_VOLUMES + dev->vol_id; gd->private_data = dev; sprintf(gd->disk_name, "ubiblock%d_%d", dev->ubi_num, dev->vol_id); - disk_capacity = (vi->size * vi->usable_leb_size) >> 9; set_capacity(gd, disk_capacity); dev->gd = gd; @@ -498,11 +499,16 @@ int ubiblock_remove(struct ubi_volume_info *vi) return 0; } -static void ubiblock_resize(struct ubi_volume_info *vi) +static int ubiblock_resize(struct ubi_volume_info *vi) { struct ubiblock *dev; - int disk_capacity; + u64 disk_capacity = ((u64)vi->size * vi->usable_leb_size) >> 9; + if ((sector_t)disk_capacity != disk_capacity) { + ubi_warn("%s: the volume is too big, cannot resize (%d LEBs)", + dev->gd->disk_name, vi->size); + return -EFBIG; + } /* * Need to lock the device list until we stop using the device, * otherwise the device struct might get released in @@ -512,15 +518,15 @@ static void ubiblock_resize(struct ubi_volume_info *vi) dev = find_dev_nolock(vi->ubi_num, vi->vol_id); if (!dev) { mutex_unlock(&devices_mutex); - return; + return -ENODEV; } mutex_lock(&dev->dev_mutex); - disk_capacity = (vi->size * vi->usable_leb_size) >> 9; set_capacity(dev->gd, disk_capacity); ubi_msg("%s resized to %d LEBs", dev->gd->disk_name, vi->size); mutex_unlock(&dev->dev_mutex); mutex_unlock(&devices_mutex); + return 0; } static int ubiblock_notify(struct notifier_block *nb, diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index d77b1c1..07cac5f 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -591,7 +591,7 @@ static int init_volumes(struct ubi_device *ubi, /* Static volumes only */ av = ubi_find_av(ai, i); - if (!av) { + if (!av || !av->leb_count) { /* * No eraseblocks belonging to this volume found. We * don't actually know whether this static volume is diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 0f3425d..20f4917 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1718,12 +1718,12 @@ int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum) vol_id, lnum, ubi->works_count); while (found) { - struct ubi_work *wrk; + struct ubi_work *wrk, *tmp; found = 0; down_read(&ubi->work_sem); spin_lock(&ubi->wl_lock); - list_for_each_entry(wrk, &ubi->works, list) { + list_for_each_entry_safe(wrk, tmp, &ubi->works, list) { if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) && (lnum == UBI_ALL || wrk->lnum == lnum)) { list_del(&wrk->list); diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c index 326a612..1a790a2 100644 --- a/drivers/net/arcnet/com20020_cs.c +++ b/drivers/net/arcnet/com20020_cs.c @@ -112,20 +112,20 @@ static void com20020_detach(struct pcmcia_device *p_dev); /*====================================================================*/ -typedef struct com20020_dev_t { +struct com20020_dev { struct net_device *dev; -} com20020_dev_t; +}; static int com20020_probe(struct pcmcia_device *p_dev) { - com20020_dev_t *info; + struct com20020_dev *info; struct net_device *dev; struct arcnet_local *lp; dev_dbg(&p_dev->dev, "com20020_attach()\n"); /* Create new network device */ - info = kzalloc(sizeof(struct com20020_dev_t), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) goto fail_alloc_info; @@ -160,7 +160,7 @@ fail_alloc_info: static void com20020_detach(struct pcmcia_device *link) { - struct com20020_dev_t *info = link->priv; + struct com20020_dev *info = link->priv; struct net_device *dev = info->dev; dev_dbg(&link->dev, "detach...\n"); @@ -199,7 +199,7 @@ static void com20020_detach(struct pcmcia_device *link) static int com20020_config(struct pcmcia_device *link) { struct arcnet_local *lp; - com20020_dev_t *info; + struct com20020_dev *info; struct net_device *dev; int i, ret; int ioaddr; @@ -291,7 +291,7 @@ static void com20020_release(struct pcmcia_device *link) static int com20020_suspend(struct pcmcia_device *link) { - com20020_dev_t *info = link->priv; + struct com20020_dev *info = link->priv; struct net_device *dev = info->dev; if (link->open) @@ -302,7 +302,7 @@ static int com20020_suspend(struct pcmcia_device *link) static int com20020_resume(struct pcmcia_device *link) { - com20020_dev_t *info = link->priv; + struct com20020_dev *info = link->priv; struct net_device *dev = info->dev; if (link->open) { diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig index 0988811..2d89bd0 100644 --- a/drivers/net/ethernet/8390/Kconfig +++ b/drivers/net/ethernet/8390/Kconfig @@ -91,7 +91,8 @@ config MCF8390 config NE2000 tristate "NE2000/NE1000 support" - depends on (ISA || (Q40 && m) || M32R || MACH_TX49XX) + depends on (ISA || (Q40 && m) || M32R || MACH_TX49XX || \ + ATARI_ETHERNEC) select CRC32 ---help--- If you have a network (Ethernet) card of this type, say Y and read diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 73c57a4..7769c05 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -108,7 +108,7 @@ static u32 axnet_msg_enable; /*====================================================================*/ -typedef struct axnet_dev_t { +struct axnet_dev { struct pcmcia_device *p_dev; caddr_t base; struct timer_list watchdog; @@ -118,9 +118,9 @@ typedef struct axnet_dev_t { int phy_id; int flags; int active_low; -} axnet_dev_t; +}; -static inline axnet_dev_t *PRIV(struct net_device *dev) +static inline struct axnet_dev *PRIV(struct net_device *dev) { void *p = (char *)netdev_priv(dev) + sizeof(struct ei_device); return p; @@ -141,13 +141,13 @@ static const struct net_device_ops axnet_netdev_ops = { static int axnet_probe(struct pcmcia_device *link) { - axnet_dev_t *info; + struct axnet_dev *info; struct net_device *dev; struct ei_device *ei_local; dev_dbg(&link->dev, "axnet_attach()\n"); - dev = alloc_etherdev(sizeof(struct ei_device) + sizeof(axnet_dev_t)); + dev = alloc_etherdev(sizeof(struct ei_device) + sizeof(struct axnet_dev)); if (!dev) return -ENOMEM; @@ -274,7 +274,7 @@ static int axnet_configcheck(struct pcmcia_device *p_dev, void *priv_data) static int axnet_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; - axnet_dev_t *info = PRIV(dev); + struct axnet_dev *info = PRIV(dev); int i, j, j2, ret; dev_dbg(&link->dev, "axnet_config(0x%p)\n", link); @@ -389,7 +389,7 @@ static int axnet_suspend(struct pcmcia_device *link) static int axnet_resume(struct pcmcia_device *link) { struct net_device *dev = link->priv; - axnet_dev_t *info = PRIV(dev); + struct axnet_dev *info = PRIV(dev); if (link->open) { if (info->active_low == 1) @@ -467,7 +467,7 @@ static void mdio_write(unsigned int addr, int phy_id, int loc, int value) static int axnet_open(struct net_device *dev) { int ret; - axnet_dev_t *info = PRIV(dev); + struct axnet_dev *info = PRIV(dev); struct pcmcia_device *link = info->p_dev; unsigned int nic_base = dev->base_addr; @@ -497,7 +497,7 @@ static int axnet_open(struct net_device *dev) static int axnet_close(struct net_device *dev) { - axnet_dev_t *info = PRIV(dev); + struct axnet_dev *info = PRIV(dev); struct pcmcia_device *link = info->p_dev; dev_dbg(&link->dev, "axnet_close('%s')\n", dev->name); @@ -554,7 +554,7 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id) static void ei_watchdog(u_long arg) { struct net_device *dev = (struct net_device *)(arg); - axnet_dev_t *info = PRIV(dev); + struct axnet_dev *info = PRIV(dev); unsigned int nic_base = dev->base_addr; unsigned int mii_addr = nic_base + AXNET_MII_EEP; u_short link; @@ -610,7 +610,7 @@ reschedule: static int axnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - axnet_dev_t *info = PRIV(dev); + struct axnet_dev *info = PRIV(dev); struct mii_ioctl_data *data = if_mii(rq); unsigned int mii_addr = dev->base_addr + AXNET_MII_EEP; switch (cmd) { @@ -1452,7 +1452,7 @@ static void ei_receive(struct net_device *dev) static void ei_rx_overrun(struct net_device *dev) { - axnet_dev_t *info = PRIV(dev); + struct axnet_dev *info = PRIV(dev); long e8390_base = dev->base_addr; unsigned char was_txing, must_resend = 0; struct ei_device *ei_local = netdev_priv(dev); @@ -1624,7 +1624,7 @@ static void set_multicast_list(struct net_device *dev) static void AX88190_init(struct net_device *dev, int startp) { - axnet_dev_t *info = PRIV(dev); + struct axnet_dev *info = PRIV(dev); long e8390_base = dev->base_addr; struct ei_device *ei_local = netdev_priv(dev); int i; diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index 58eaa8f..de566fb 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -169,6 +169,8 @@ bad_clone_list[] __initdata = { #elif defined(CONFIG_PLAT_OAKS32R) || \ defined(CONFIG_MACH_TX49XX) # define DCR_VAL 0x48 /* 8-bit mode */ +#elif defined(CONFIG_ATARI) /* 8-bit mode on Atari, normal on Q40 */ +# define DCR_VAL (MACH_IS_ATARI ? 0x48 : 0x49) #else # define DCR_VAL 0x49 #endif diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index ca3c2b9..9fb7b9d 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -111,11 +111,11 @@ static void pcnet_detach(struct pcmcia_device *p_dev); /*====================================================================*/ -typedef struct hw_info_t { +struct hw_info { u_int offset; u_char a0, a1, a2; u_int flags; -} hw_info_t; +}; #define DELAY_OUTPUT 0x01 #define HAS_MISC_REG 0x02 @@ -132,7 +132,7 @@ typedef struct hw_info_t { #define MII_PHYID_REG1 0x02 #define MII_PHYID_REG2 0x03 -static hw_info_t hw_info[] = { +static struct hw_info hw_info[] = { { /* Accton EN2212 */ 0x0ff0, 0x00, 0x00, 0xe8, DELAY_OUTPUT }, { /* Allied Telesis LA-PCM */ 0x0ff0, 0x00, 0x00, 0xf4, 0 }, { /* APEX MultiCard */ 0x03f4, 0x00, 0x20, 0xe5, 0 }, @@ -196,11 +196,11 @@ static hw_info_t hw_info[] = { #define NR_INFO ARRAY_SIZE(hw_info) -static hw_info_t default_info = { 0, 0, 0, 0, 0 }; -static hw_info_t dl10019_info = { 0, 0, 0, 0, IS_DL10019|HAS_MII }; -static hw_info_t dl10022_info = { 0, 0, 0, 0, IS_DL10022|HAS_MII }; +static struct hw_info default_info = { 0, 0, 0, 0, 0 }; +static struct hw_info dl10019_info = { 0, 0, 0, 0, IS_DL10019|HAS_MII }; +static struct hw_info dl10022_info = { 0, 0, 0, 0, IS_DL10022|HAS_MII }; -typedef struct pcnet_dev_t { +struct pcnet_dev { struct pcmcia_device *p_dev; u_int flags; void __iomem *base; @@ -210,12 +210,12 @@ typedef struct pcnet_dev_t { u_char eth_phy, pna_phy; u_short link_status; u_long mii_reset; -} pcnet_dev_t; +}; -static inline pcnet_dev_t *PRIV(struct net_device *dev) +static inline struct pcnet_dev *PRIV(struct net_device *dev) { char *p = netdev_priv(dev); - return (pcnet_dev_t *)(p + sizeof(struct ei_device)); + return (struct pcnet_dev *)(p + sizeof(struct ei_device)); } static const struct net_device_ops pcnet_netdev_ops = { @@ -237,13 +237,13 @@ static const struct net_device_ops pcnet_netdev_ops = { static int pcnet_probe(struct pcmcia_device *link) { - pcnet_dev_t *info; + struct pcnet_dev *info; struct net_device *dev; dev_dbg(&link->dev, "pcnet_attach()\n"); /* Create new ethernet device */ - dev = __alloc_ei_netdev(sizeof(pcnet_dev_t)); + dev = __alloc_ei_netdev(sizeof(struct pcnet_dev)); if (!dev) return -ENOMEM; info = PRIV(dev); info->p_dev = link; @@ -276,7 +276,7 @@ static void pcnet_detach(struct pcmcia_device *link) ======================================================================*/ -static hw_info_t *get_hwinfo(struct pcmcia_device *link) +static struct hw_info *get_hwinfo(struct pcmcia_device *link) { struct net_device *dev = link->priv; u_char __iomem *base, *virt; @@ -317,7 +317,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) ======================================================================*/ -static hw_info_t *get_prom(struct pcmcia_device *link) +static struct hw_info *get_prom(struct pcmcia_device *link) { struct net_device *dev = link->priv; unsigned int ioaddr = dev->base_addr; @@ -371,7 +371,7 @@ static hw_info_t *get_prom(struct pcmcia_device *link) ======================================================================*/ -static hw_info_t *get_dl10019(struct pcmcia_device *link) +static struct hw_info *get_dl10019(struct pcmcia_device *link) { struct net_device *dev = link->priv; int i; @@ -393,7 +393,7 @@ static hw_info_t *get_dl10019(struct pcmcia_device *link) ======================================================================*/ -static hw_info_t *get_ax88190(struct pcmcia_device *link) +static struct hw_info *get_ax88190(struct pcmcia_device *link) { struct net_device *dev = link->priv; unsigned int ioaddr = dev->base_addr; @@ -424,7 +424,7 @@ static hw_info_t *get_ax88190(struct pcmcia_device *link) ======================================================================*/ -static hw_info_t *get_hwired(struct pcmcia_device *link) +static struct hw_info *get_hwired(struct pcmcia_device *link) { struct net_device *dev = link->priv; int i; @@ -489,12 +489,12 @@ static int pcnet_confcheck(struct pcmcia_device *p_dev, void *priv_data) return try_io_port(p_dev); } -static hw_info_t *pcnet_try_config(struct pcmcia_device *link, - int *has_shmem, int try) +static struct hw_info *pcnet_try_config(struct pcmcia_device *link, + int *has_shmem, int try) { struct net_device *dev = link->priv; - hw_info_t *local_hw_info; - pcnet_dev_t *info = PRIV(dev); + struct hw_info *local_hw_info; + struct pcnet_dev *info = PRIV(dev); int priv = try; int ret; @@ -553,10 +553,10 @@ static hw_info_t *pcnet_try_config(struct pcmcia_device *link, static int pcnet_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; - pcnet_dev_t *info = PRIV(dev); + struct pcnet_dev *info = PRIV(dev); int start_pg, stop_pg, cm_offset; int has_shmem = 0; - hw_info_t *local_hw_info; + struct hw_info *local_hw_info; struct ei_device *ei_local; dev_dbg(&link->dev, "pcnet_config\n"); @@ -639,7 +639,7 @@ failed: static void pcnet_release(struct pcmcia_device *link) { - pcnet_dev_t *info = PRIV(link->priv); + struct pcnet_dev *info = PRIV(link->priv); dev_dbg(&link->dev, "pcnet_release\n"); @@ -836,7 +836,7 @@ static void write_asic(unsigned int ioaddr, int location, short asic_data) static void set_misc_reg(struct net_device *dev) { unsigned int nic_base = dev->base_addr; - pcnet_dev_t *info = PRIV(dev); + struct pcnet_dev *info = PRIV(dev); u_char tmp; if (info->flags & HAS_MISC_REG) { @@ -873,7 +873,7 @@ static void set_misc_reg(struct net_device *dev) static void mii_phy_probe(struct net_device *dev) { - pcnet_dev_t *info = PRIV(dev); + struct pcnet_dev *info = PRIV(dev); unsigned int mii_addr = dev->base_addr + DLINK_GPIO; int i; u_int tmp, phyid; @@ -898,7 +898,7 @@ static void mii_phy_probe(struct net_device *dev) static int pcnet_open(struct net_device *dev) { int ret; - pcnet_dev_t *info = PRIV(dev); + struct pcnet_dev *info = PRIV(dev); struct pcmcia_device *link = info->p_dev; unsigned int nic_base = dev->base_addr; @@ -931,7 +931,7 @@ static int pcnet_open(struct net_device *dev) static int pcnet_close(struct net_device *dev) { - pcnet_dev_t *info = PRIV(dev); + struct pcnet_dev *info = PRIV(dev); struct pcmcia_device *link = info->p_dev; dev_dbg(&link->dev, "pcnet_close('%s')\n", dev->name); @@ -982,7 +982,7 @@ static void pcnet_reset_8390(struct net_device *dev) static int set_config(struct net_device *dev, struct ifmap *map) { - pcnet_dev_t *info = PRIV(dev); + struct pcnet_dev *info = PRIV(dev); if ((map->port != (u_char)(-1)) && (map->port != dev->if_port)) { if (!(info->flags & HAS_MISC_REG)) return -EOPNOTSUPP; @@ -1000,7 +1000,7 @@ static int set_config(struct net_device *dev, struct ifmap *map) static irqreturn_t ei_irq_wrapper(int irq, void *dev_id) { struct net_device *dev = dev_id; - pcnet_dev_t *info; + struct pcnet_dev *info; irqreturn_t ret = ei_interrupt(irq, dev_id); if (ret == IRQ_HANDLED) { @@ -1013,7 +1013,7 @@ static irqreturn_t ei_irq_wrapper(int irq, void *dev_id) static void ei_watchdog(u_long arg) { struct net_device *dev = (struct net_device *)arg; - pcnet_dev_t *info = PRIV(dev); + struct pcnet_dev *info = PRIV(dev); unsigned int nic_base = dev->base_addr; unsigned int mii_addr = nic_base + DLINK_GPIO; u_short link; @@ -1101,7 +1101,7 @@ reschedule: static int ei_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - pcnet_dev_t *info = PRIV(dev); + struct pcnet_dev *info = PRIV(dev); struct mii_ioctl_data *data = if_mii(rq); unsigned int mii_addr = dev->base_addr + DLINK_GPIO; @@ -1214,7 +1214,7 @@ static void dma_block_output(struct net_device *dev, int count, const u_char *buf, const int start_page) { unsigned int nic_base = dev->base_addr; - pcnet_dev_t *info = PRIV(dev); + struct pcnet_dev *info = PRIV(dev); #ifdef PCMCIA_DEBUG int retries = 0; struct ei_device *ei_local = netdev_priv(dev); @@ -1403,7 +1403,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, int stop_pg, int cm_offset) { struct net_device *dev = link->priv; - pcnet_dev_t *info = PRIV(dev); + struct pcnet_dev *info = PRIV(dev); int i, window_size, offset, ret; window_size = (stop_pg - start_pg) << 8; diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index edb7186..dc7406c 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -24,6 +24,7 @@ source "drivers/net/ethernet/allwinner/Kconfig" source "drivers/net/ethernet/alteon/Kconfig" source "drivers/net/ethernet/altera/Kconfig" source "drivers/net/ethernet/amd/Kconfig" +source "drivers/net/ethernet/apm/Kconfig" source "drivers/net/ethernet/apple/Kconfig" source "drivers/net/ethernet/arc/Kconfig" source "drivers/net/ethernet/atheros/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 58de333..224a018 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/ obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/ obj-$(CONFIG_ALTERA_TSE) += altera/ obj-$(CONFIG_NET_VENDOR_AMD) += amd/ +obj-$(CONFIG_NET_XGENE) += apm/ obj-$(CONFIG_NET_VENDOR_APPLE) += apple/ obj-$(CONFIG_NET_VENDOR_ARC) += arc/ obj-$(CONFIG_NET_VENDOR_ATHEROS) += atheros/ diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 1f5487f..dc84f71 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -117,7 +117,6 @@ #include <linux/spinlock.h> #include <linux/tcp.h> #include <linux/if_vlan.h> -#include <linux/phy.h> #include <net/busy_poll.h> #include <linux/clk.h> #include <linux/if_ether.h> diff --git a/drivers/net/ethernet/apm/Kconfig b/drivers/net/ethernet/apm/Kconfig new file mode 100644 index 0000000..ec63d70 --- /dev/null +++ b/drivers/net/ethernet/apm/Kconfig @@ -0,0 +1 @@ +source "drivers/net/ethernet/apm/xgene/Kconfig" diff --git a/drivers/net/ethernet/apm/Makefile b/drivers/net/ethernet/apm/Makefile new file mode 100644 index 0000000..65ce32a --- /dev/null +++ b/drivers/net/ethernet/apm/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for APM X-GENE Ethernet driver. +# + +obj-$(CONFIG_NET_XGENE) += xgene/ diff --git a/drivers/net/ethernet/apm/xgene/Kconfig b/drivers/net/ethernet/apm/xgene/Kconfig new file mode 100644 index 0000000..616dff6 --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/Kconfig @@ -0,0 +1,9 @@ +config NET_XGENE + tristate "APM X-Gene SoC Ethernet Driver" + select PHYLIB + help + This is the Ethernet driver for the on-chip ethernet interface on the + APM X-Gene SoC. + + To compile this driver as a module, choose M here. This module will + be called xgene_enet. diff --git a/drivers/net/ethernet/apm/xgene/Makefile b/drivers/net/ethernet/apm/xgene/Makefile new file mode 100644 index 0000000..c643e8a --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for APM X-Gene Ethernet Driver. +# + +xgene-enet-objs := xgene_enet_hw.o xgene_enet_main.o xgene_enet_ethtool.o +obj-$(CONFIG_NET_XGENE) += xgene-enet.o diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c new file mode 100644 index 0000000..63f2aa5 --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c @@ -0,0 +1,125 @@ +/* Applied Micro X-Gene SoC Ethernet Driver + * + * Copyright (c) 2014, Applied Micro Circuits Corporation + * Authors: Iyappan Subramanian <isubramanian@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/ethtool.h> +#include "xgene_enet_main.h" + +struct xgene_gstrings_stats { + char name[ETH_GSTRING_LEN]; + int offset; +}; + +#define XGENE_STAT(m) { #m, offsetof(struct xgene_enet_pdata, stats.m) } + +static const struct xgene_gstrings_stats gstrings_stats[] = { + XGENE_STAT(rx_packets), + XGENE_STAT(tx_packets), + XGENE_STAT(rx_bytes), + XGENE_STAT(tx_bytes), + XGENE_STAT(rx_errors), + XGENE_STAT(tx_errors), + XGENE_STAT(rx_length_errors), + XGENE_STAT(rx_crc_errors), + XGENE_STAT(rx_frame_errors), + XGENE_STAT(rx_fifo_errors) +}; + +#define XGENE_STATS_LEN ARRAY_SIZE(gstrings_stats) + +static void xgene_get_drvinfo(struct net_device *ndev, + struct ethtool_drvinfo *info) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + struct platform_device *pdev = pdata->pdev; + + strcpy(info->driver, "xgene_enet"); + strcpy(info->version, XGENE_DRV_VERSION); + snprintf(info->fw_version, ETHTOOL_FWVERS_LEN, "N/A"); + sprintf(info->bus_info, "%s", pdev->name); +} + +static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + struct phy_device *phydev = pdata->phy_dev; + + if (phydev == NULL) + return -ENODEV; + + return phy_ethtool_gset(phydev, cmd); +} + +static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + struct phy_device *phydev = pdata->phy_dev; + + if (phydev == NULL) + return -ENODEV; + + return phy_ethtool_sset(phydev, cmd); +} + +static void xgene_get_strings(struct net_device *ndev, u32 stringset, u8 *data) +{ + int i; + u8 *p = data; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < XGENE_STATS_LEN; i++) { + memcpy(p, gstrings_stats[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } +} + +static int xgene_get_sset_count(struct net_device *ndev, int sset) +{ + if (sset != ETH_SS_STATS) + return -EINVAL; + + return XGENE_STATS_LEN; +} + +static void xgene_get_ethtool_stats(struct net_device *ndev, + struct ethtool_stats *dummy, + u64 *data) +{ + void *pdata = netdev_priv(ndev); + int i; + + for (i = 0; i < XGENE_STATS_LEN; i++) + *data++ = *(u64 *)(pdata + gstrings_stats[i].offset); +} + +static const struct ethtool_ops xgene_ethtool_ops = { + .get_drvinfo = xgene_get_drvinfo, + .get_settings = xgene_get_settings, + .set_settings = xgene_set_settings, + .get_link = ethtool_op_get_link, + .get_strings = xgene_get_strings, + .get_sset_count = xgene_get_sset_count, + .get_ethtool_stats = xgene_get_ethtool_stats +}; + +void xgene_enet_set_ethtool_ops(struct net_device *ndev) +{ + ndev->ethtool_ops = &xgene_ethtool_ops; +} diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c new file mode 100644 index 0000000..812d8d6 --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -0,0 +1,728 @@ +/* Applied Micro X-Gene SoC Ethernet Driver + * + * Copyright (c) 2014, Applied Micro Circuits Corporation + * Authors: Iyappan Subramanian <isubramanian@apm.com> + * Ravi Patel <rapatel@apm.com> + * Keyur Chudgar <kchudgar@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "xgene_enet_main.h" +#include "xgene_enet_hw.h" + +static void xgene_enet_ring_init(struct xgene_enet_desc_ring *ring) +{ + u32 *ring_cfg = ring->state; + u64 addr = ring->dma; + enum xgene_enet_ring_cfgsize cfgsize = ring->cfgsize; + + ring_cfg[4] |= (1 << SELTHRSH_POS) & + CREATE_MASK(SELTHRSH_POS, SELTHRSH_LEN); + ring_cfg[3] |= ACCEPTLERR; + ring_cfg[2] |= QCOHERENT; + + addr >>= 8; + ring_cfg[2] |= (addr << RINGADDRL_POS) & + CREATE_MASK_ULL(RINGADDRL_POS, RINGADDRL_LEN); + addr >>= RINGADDRL_LEN; + ring_cfg[3] |= addr & CREATE_MASK_ULL(RINGADDRH_POS, RINGADDRH_LEN); + ring_cfg[3] |= ((u32)cfgsize << RINGSIZE_POS) & + CREATE_MASK(RINGSIZE_POS, RINGSIZE_LEN); +} + +static void xgene_enet_ring_set_type(struct xgene_enet_desc_ring *ring) +{ + u32 *ring_cfg = ring->state; + bool is_bufpool; + u32 val; + + is_bufpool = xgene_enet_is_bufpool(ring->id); + val = (is_bufpool) ? RING_BUFPOOL : RING_REGULAR; + ring_cfg[4] |= (val << RINGTYPE_POS) & + CREATE_MASK(RINGTYPE_POS, RINGTYPE_LEN); + + if (is_bufpool) { + ring_cfg[3] |= (BUFPOOL_MODE << RINGMODE_POS) & + CREATE_MASK(RINGMODE_POS, RINGMODE_LEN); + } +} + +static void xgene_enet_ring_set_recombbuf(struct xgene_enet_desc_ring *ring) +{ + u32 *ring_cfg = ring->state; + + ring_cfg[3] |= RECOMBBUF; + ring_cfg[3] |= (0xf << RECOMTIMEOUTL_POS) & + CREATE_MASK(RECOMTIMEOUTL_POS, RECOMTIMEOUTL_LEN); + ring_cfg[4] |= 0x7 & CREATE_MASK(RECOMTIMEOUTH_POS, RECOMTIMEOUTH_LEN); +} + +static void xgene_enet_ring_wr32(struct xgene_enet_desc_ring *ring, + u32 offset, u32 data) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); + + iowrite32(data, pdata->ring_csr_addr + offset); +} + +static void xgene_enet_ring_rd32(struct xgene_enet_desc_ring *ring, + u32 offset, u32 *data) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); + + *data = ioread32(pdata->ring_csr_addr + offset); +} + +static void xgene_enet_write_ring_state(struct xgene_enet_desc_ring *ring) +{ + int i; + + xgene_enet_ring_wr32(ring, CSR_RING_CONFIG, ring->num); + for (i = 0; i < NUM_RING_CONFIG; i++) { + xgene_enet_ring_wr32(ring, CSR_RING_WR_BASE + (i * 4), + ring->state[i]); + } +} + +static void xgene_enet_clr_ring_state(struct xgene_enet_desc_ring *ring) +{ + memset(ring->state, 0, sizeof(u32) * NUM_RING_CONFIG); + xgene_enet_write_ring_state(ring); +} + +static void xgene_enet_set_ring_state(struct xgene_enet_desc_ring *ring) +{ + xgene_enet_ring_set_type(ring); + + if (xgene_enet_ring_owner(ring->id) == RING_OWNER_ETH0) + xgene_enet_ring_set_recombbuf(ring); + + xgene_enet_ring_init(ring); + xgene_enet_write_ring_state(ring); +} + +static void xgene_enet_set_ring_id(struct xgene_enet_desc_ring *ring) +{ + u32 ring_id_val, ring_id_buf; + bool is_bufpool; + + is_bufpool = xgene_enet_is_bufpool(ring->id); + + ring_id_val = ring->id & GENMASK(9, 0); + ring_id_val |= OVERWRITE; + + ring_id_buf = (ring->num << 9) & GENMASK(18, 9); + ring_id_buf |= PREFETCH_BUF_EN; + if (is_bufpool) + ring_id_buf |= IS_BUFFER_POOL; + + xgene_enet_ring_wr32(ring, CSR_RING_ID, ring_id_val); + xgene_enet_ring_wr32(ring, CSR_RING_ID_BUF, ring_id_buf); +} + +static void xgene_enet_clr_desc_ring_id(struct xgene_enet_desc_ring *ring) +{ + u32 ring_id; + + ring_id = ring->id | OVERWRITE; + xgene_enet_ring_wr32(ring, CSR_RING_ID, ring_id); + xgene_enet_ring_wr32(ring, CSR_RING_ID_BUF, 0); +} + +struct xgene_enet_desc_ring *xgene_enet_setup_ring( + struct xgene_enet_desc_ring *ring) +{ + u32 size = ring->size; + u32 i, data; + bool is_bufpool; + + xgene_enet_clr_ring_state(ring); + xgene_enet_set_ring_state(ring); + xgene_enet_set_ring_id(ring); + + ring->slots = xgene_enet_get_numslots(ring->id, size); + + is_bufpool = xgene_enet_is_bufpool(ring->id); + if (is_bufpool || xgene_enet_ring_owner(ring->id) != RING_OWNER_CPU) + return ring; + + for (i = 0; i < ring->slots; i++) + xgene_enet_mark_desc_slot_empty(&ring->raw_desc[i]); + + xgene_enet_ring_rd32(ring, CSR_RING_NE_INT_MODE, &data); + data |= BIT(31 - xgene_enet_ring_bufnum(ring->id)); + xgene_enet_ring_wr32(ring, CSR_RING_NE_INT_MODE, data); + + return ring; +} + +void xgene_enet_clear_ring(struct xgene_enet_desc_ring *ring) +{ + u32 data; + bool is_bufpool; + + is_bufpool = xgene_enet_is_bufpool(ring->id); + if (is_bufpool || xgene_enet_ring_owner(ring->id) != RING_OWNER_CPU) + goto out; + + xgene_enet_ring_rd32(ring, CSR_RING_NE_INT_MODE, &data); + data &= ~BIT(31 - xgene_enet_ring_bufnum(ring->id)); + xgene_enet_ring_wr32(ring, CSR_RING_NE_INT_MODE, data); + +out: + xgene_enet_clr_desc_ring_id(ring); + xgene_enet_clr_ring_state(ring); +} + +void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring, + struct xgene_enet_pdata *pdata, + enum xgene_enet_err_code status) +{ + struct rtnl_link_stats64 *stats = &pdata->stats; + + switch (status) { + case INGRESS_CRC: + stats->rx_crc_errors++; + break; + case INGRESS_CHECKSUM: + case INGRESS_CHECKSUM_COMPUTE: + stats->rx_errors++; + break; + case INGRESS_TRUNC_FRAME: + stats->rx_frame_errors++; + break; + case INGRESS_PKT_LEN: + stats->rx_length_errors++; + break; + case INGRESS_PKT_UNDER: + stats->rx_frame_errors++; + break; + case INGRESS_FIFO_OVERRUN: + stats->rx_fifo_errors++; + break; + default: + break; + } +} + +static void xgene_enet_wr_csr(struct xgene_enet_pdata *pdata, + u32 offset, u32 val) +{ + void __iomem *addr = pdata->eth_csr_addr + offset; + + iowrite32(val, addr); +} + +static void xgene_enet_wr_ring_if(struct xgene_enet_pdata *pdata, + u32 offset, u32 val) +{ + void __iomem *addr = pdata->eth_ring_if_addr + offset; + + iowrite32(val, addr); +} + +static void xgene_enet_wr_diag_csr(struct xgene_enet_pdata *pdata, + u32 offset, u32 val) +{ + void __iomem *addr = pdata->eth_diag_csr_addr + offset; + + iowrite32(val, addr); +} + +static void xgene_enet_wr_mcx_csr(struct xgene_enet_pdata *pdata, + u32 offset, u32 val) +{ + void __iomem *addr = pdata->mcx_mac_csr_addr + offset; + + iowrite32(val, addr); +} + +static bool xgene_enet_wr_indirect(void __iomem *addr, void __iomem *wr, + void __iomem *cmd, void __iomem *cmd_done, + u32 wr_addr, u32 wr_data) +{ + u32 done; + u8 wait = 10; + + iowrite32(wr_addr, addr); + iowrite32(wr_data, wr); + iowrite32(XGENE_ENET_WR_CMD, cmd); + + /* wait for write command to complete */ + while (!(done = ioread32(cmd_done)) && wait--) + udelay(1); + + if (!done) + return false; + + iowrite32(0, cmd); + + return true; +} + +static void xgene_enet_wr_mcx_mac(struct xgene_enet_pdata *pdata, + u32 wr_addr, u32 wr_data) +{ + void __iomem *addr, *wr, *cmd, *cmd_done; + + addr = pdata->mcx_mac_addr + MAC_ADDR_REG_OFFSET; + wr = pdata->mcx_mac_addr + MAC_WRITE_REG_OFFSET; + cmd = pdata->mcx_mac_addr + MAC_COMMAND_REG_OFFSET; + cmd_done = pdata->mcx_mac_addr + MAC_COMMAND_DONE_REG_OFFSET; + + if (!xgene_enet_wr_indirect(addr, wr, cmd, cmd_done, wr_addr, wr_data)) + netdev_err(pdata->ndev, "MCX mac write failed, addr: %04x\n", + wr_addr); +} + +static void xgene_enet_rd_csr(struct xgene_enet_pdata *pdata, + u32 offset, u32 *val) +{ + void __iomem *addr = pdata->eth_csr_addr + offset; + + *val = ioread32(addr); +} + +static void xgene_enet_rd_diag_csr(struct xgene_enet_pdata *pdata, + u32 offset, u32 *val) +{ + void __iomem *addr = pdata->eth_diag_csr_addr + offset; + + *val = ioread32(addr); +} + +static void xgene_enet_rd_mcx_csr(struct xgene_enet_pdata *pdata, + u32 offset, u32 *val) +{ + void __iomem *addr = pdata->mcx_mac_csr_addr + offset; + + *val = ioread32(addr); +} + +static bool xgene_enet_rd_indirect(void __iomem *addr, void __iomem *rd, + void __iomem *cmd, void __iomem *cmd_done, + u32 rd_addr, u32 *rd_data) +{ + u32 done; + u8 wait = 10; + + iowrite32(rd_addr, addr); + iowrite32(XGENE_ENET_RD_CMD, cmd); + + /* wait for read command to complete */ + while (!(done = ioread32(cmd_done)) && wait--) + udelay(1); + + if (!done) + return false; + + *rd_data = ioread32(rd); + iowrite32(0, cmd); + + return true; +} + +static void xgene_enet_rd_mcx_mac(struct xgene_enet_pdata *pdata, + u32 rd_addr, u32 *rd_data) +{ + void __iomem *addr, *rd, *cmd, *cmd_done; + + addr = pdata->mcx_mac_addr + MAC_ADDR_REG_OFFSET; + rd = pdata->mcx_mac_addr + MAC_READ_REG_OFFSET; + cmd = pdata->mcx_mac_addr + MAC_COMMAND_REG_OFFSET; + cmd_done = pdata->mcx_mac_addr + MAC_COMMAND_DONE_REG_OFFSET; + + if (!xgene_enet_rd_indirect(addr, rd, cmd, cmd_done, rd_addr, rd_data)) + netdev_err(pdata->ndev, "MCX mac read failed, addr: %04x\n", + rd_addr); +} + +static int xgene_mii_phy_write(struct xgene_enet_pdata *pdata, int phy_id, + u32 reg, u16 data) +{ + u32 addr = 0, wr_data = 0; + u32 done; + u8 wait = 10; + + PHY_ADDR_SET(&addr, phy_id); + REG_ADDR_SET(&addr, reg); + xgene_enet_wr_mcx_mac(pdata, MII_MGMT_ADDRESS_ADDR, addr); + + PHY_CONTROL_SET(&wr_data, data); + xgene_enet_wr_mcx_mac(pdata, MII_MGMT_CONTROL_ADDR, wr_data); + do { + usleep_range(5, 10); + xgene_enet_rd_mcx_mac(pdata, MII_MGMT_INDICATORS_ADDR, &done); + } while ((done & BUSY_MASK) && wait--); + + if (done & BUSY_MASK) { + netdev_err(pdata->ndev, "MII_MGMT write failed\n"); + return -EBUSY; + } + + return 0; +} + +static int xgene_mii_phy_read(struct xgene_enet_pdata *pdata, + u8 phy_id, u32 reg) +{ + u32 addr = 0; + u32 data, done; + u8 wait = 10; + + PHY_ADDR_SET(&addr, phy_id); + REG_ADDR_SET(&addr, reg); + xgene_enet_wr_mcx_mac(pdata, MII_MGMT_ADDRESS_ADDR, addr); + xgene_enet_wr_mcx_mac(pdata, MII_MGMT_COMMAND_ADDR, READ_CYCLE_MASK); + do { + usleep_range(5, 10); + xgene_enet_rd_mcx_mac(pdata, MII_MGMT_INDICATORS_ADDR, &done); + } while ((done & BUSY_MASK) && wait--); + + if (done & BUSY_MASK) { + netdev_err(pdata->ndev, "MII_MGMT read failed\n"); + return -EBUSY; + } + + xgene_enet_rd_mcx_mac(pdata, MII_MGMT_STATUS_ADDR, &data); + xgene_enet_wr_mcx_mac(pdata, MII_MGMT_COMMAND_ADDR, 0); + + return data; +} + +void xgene_gmac_set_mac_addr(struct xgene_enet_pdata *pdata) +{ + u32 addr0, addr1; + u8 *dev_addr = pdata->ndev->dev_addr; + + addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) | + (dev_addr[1] << 8) | dev_addr[0]; + addr1 = (dev_addr[5] << 24) | (dev_addr[4] << 16); + addr1 |= pdata->phy_addr & 0xFFFF; + + xgene_enet_wr_mcx_mac(pdata, STATION_ADDR0_ADDR, addr0); + xgene_enet_wr_mcx_mac(pdata, STATION_ADDR1_ADDR, addr1); +} + +static int xgene_enet_ecc_init(struct xgene_enet_pdata *pdata) +{ + struct net_device *ndev = pdata->ndev; + u32 data; + u8 wait = 10; + + xgene_enet_wr_diag_csr(pdata, ENET_CFG_MEM_RAM_SHUTDOWN_ADDR, 0x0); + do { + usleep_range(100, 110); + xgene_enet_rd_diag_csr(pdata, ENET_BLOCK_MEM_RDY_ADDR, &data); + } while ((data != 0xffffffff) && wait--); + + if (data != 0xffffffff) { + netdev_err(ndev, "Failed to release memory from shutdown\n"); + return -ENODEV; + } + + return 0; +} + +void xgene_gmac_reset(struct xgene_enet_pdata *pdata) +{ + xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, SOFT_RESET1); + xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, 0); +} + +void xgene_gmac_init(struct xgene_enet_pdata *pdata, int speed) +{ + u32 value, mc2; + u32 intf_ctl, rgmii; + u32 icm0, icm2; + + xgene_gmac_reset(pdata); + + xgene_enet_rd_mcx_csr(pdata, ICM_CONFIG0_REG_0_ADDR, &icm0); + xgene_enet_rd_mcx_csr(pdata, ICM_CONFIG2_REG_0_ADDR, &icm2); + xgene_enet_rd_mcx_mac(pdata, MAC_CONFIG_2_ADDR, &mc2); + xgene_enet_rd_mcx_mac(pdata, INTERFACE_CONTROL_ADDR, &intf_ctl); + xgene_enet_rd_csr(pdata, RGMII_REG_0_ADDR, &rgmii); + + switch (speed) { + case SPEED_10: + ENET_INTERFACE_MODE2_SET(&mc2, 1); + CFG_MACMODE_SET(&icm0, 0); + CFG_WAITASYNCRD_SET(&icm2, 500); + rgmii &= ~CFG_SPEED_1250; + break; + case SPEED_100: + ENET_INTERFACE_MODE2_SET(&mc2, 1); + intf_ctl |= ENET_LHD_MODE; + CFG_MACMODE_SET(&icm0, 1); + CFG_WAITASYNCRD_SET(&icm2, 80); + rgmii &= ~CFG_SPEED_1250; + break; + default: + ENET_INTERFACE_MODE2_SET(&mc2, 2); + intf_ctl |= ENET_GHD_MODE; + CFG_TXCLK_MUXSEL0_SET(&rgmii, 4); + xgene_enet_rd_csr(pdata, DEBUG_REG_ADDR, &value); + value |= CFG_BYPASS_UNISEC_TX | CFG_BYPASS_UNISEC_RX; + xgene_enet_wr_csr(pdata, DEBUG_REG_ADDR, value); + break; + } + + mc2 |= FULL_DUPLEX2; + xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_2_ADDR, mc2); + xgene_enet_wr_mcx_mac(pdata, INTERFACE_CONTROL_ADDR, intf_ctl); + + xgene_gmac_set_mac_addr(pdata); + + /* Adjust MDC clock frequency */ + xgene_enet_rd_mcx_mac(pdata, MII_MGMT_CONFIG_ADDR, &value); + MGMT_CLOCK_SEL_SET(&value, 7); + xgene_enet_wr_mcx_mac(pdata, MII_MGMT_CONFIG_ADDR, value); + + /* Enable drop if bufpool not available */ + xgene_enet_rd_csr(pdata, RSIF_CONFIG_REG_ADDR, &value); + value |= CFG_RSIF_FPBUFF_TIMEOUT_EN; + xgene_enet_wr_csr(pdata, RSIF_CONFIG_REG_ADDR, value); + + /* Rtype should be copied from FP */ + xgene_enet_wr_csr(pdata, RSIF_RAM_DBG_REG0_ADDR, 0); + xgene_enet_wr_csr(pdata, RGMII_REG_0_ADDR, rgmii); + + /* Rx-Tx traffic resume */ + xgene_enet_wr_csr(pdata, CFG_LINK_AGGR_RESUME_0_ADDR, TX_PORT0); + + xgene_enet_wr_mcx_csr(pdata, ICM_CONFIG0_REG_0_ADDR, icm0); + xgene_enet_wr_mcx_csr(pdata, ICM_CONFIG2_REG_0_ADDR, icm2); + + xgene_enet_rd_mcx_csr(pdata, RX_DV_GATE_REG_0_ADDR, &value); + value &= ~TX_DV_GATE_EN0; + value &= ~RX_DV_GATE_EN0; + value |= RESUME_RX0; + xgene_enet_wr_mcx_csr(pdata, RX_DV_GATE_REG_0_ADDR, value); + + xgene_enet_wr_csr(pdata, CFG_BYPASS_ADDR, RESUME_TX); +} + +static void xgene_enet_config_ring_if_assoc(struct xgene_enet_pdata *pdata) +{ + u32 val = 0xffffffff; + + xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIWQASSOC_ADDR, val); + xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIFPQASSOC_ADDR, val); + xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIQMLITEWQASSOC_ADDR, val); + xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIQMLITEFPQASSOC_ADDR, val); +} + +void xgene_enet_cle_bypass(struct xgene_enet_pdata *pdata, + u32 dst_ring_num, u16 bufpool_id) +{ + u32 cb; + u32 fpsel; + + fpsel = xgene_enet_ring_bufnum(bufpool_id) - 0x20; + + xgene_enet_rd_csr(pdata, CLE_BYPASS_REG0_0_ADDR, &cb); + cb |= CFG_CLE_BYPASS_EN0; + CFG_CLE_IP_PROTOCOL0_SET(&cb, 3); + xgene_enet_wr_csr(pdata, CLE_BYPASS_REG0_0_ADDR, cb); + + xgene_enet_rd_csr(pdata, CLE_BYPASS_REG1_0_ADDR, &cb); + CFG_CLE_DSTQID0_SET(&cb, dst_ring_num); + CFG_CLE_FPSEL0_SET(&cb, fpsel); + xgene_enet_wr_csr(pdata, CLE_BYPASS_REG1_0_ADDR, cb); +} + +void xgene_gmac_rx_enable(struct xgene_enet_pdata *pdata) +{ + u32 data; + + xgene_enet_rd_mcx_mac(pdata, MAC_CONFIG_1_ADDR, &data); + xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, data | RX_EN); +} + +void xgene_gmac_tx_enable(struct xgene_enet_pdata *pdata) +{ + u32 data; + + xgene_enet_rd_mcx_mac(pdata, MAC_CONFIG_1_ADDR, &data); + xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, data | TX_EN); +} + +void xgene_gmac_rx_disable(struct xgene_enet_pdata *pdata) +{ + u32 data; + + xgene_enet_rd_mcx_mac(pdata, MAC_CONFIG_1_ADDR, &data); + xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, data & ~RX_EN); +} + +void xgene_gmac_tx_disable(struct xgene_enet_pdata *pdata) +{ + u32 data; + + xgene_enet_rd_mcx_mac(pdata, MAC_CONFIG_1_ADDR, &data); + xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, data & ~TX_EN); +} + +void xgene_enet_reset(struct xgene_enet_pdata *pdata) +{ + u32 val; + + clk_prepare_enable(pdata->clk); + clk_disable_unprepare(pdata->clk); + clk_prepare_enable(pdata->clk); + xgene_enet_ecc_init(pdata); + xgene_enet_config_ring_if_assoc(pdata); + + /* Enable auto-incr for scanning */ + xgene_enet_rd_mcx_mac(pdata, MII_MGMT_CONFIG_ADDR, &val); + val |= SCAN_AUTO_INCR; + MGMT_CLOCK_SEL_SET(&val, 1); + xgene_enet_wr_mcx_mac(pdata, MII_MGMT_CONFIG_ADDR, val); +} + +void xgene_gport_shutdown(struct xgene_enet_pdata *pdata) +{ + clk_disable_unprepare(pdata->clk); +} + +static int xgene_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum) +{ + struct xgene_enet_pdata *pdata = bus->priv; + u32 val; + + val = xgene_mii_phy_read(pdata, mii_id, regnum); + netdev_dbg(pdata->ndev, "mdio_rd: bus=%d reg=%d val=%x\n", + mii_id, regnum, val); + + return val; +} + +static int xgene_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum, + u16 val) +{ + struct xgene_enet_pdata *pdata = bus->priv; + + netdev_dbg(pdata->ndev, "mdio_wr: bus=%d reg=%d val=%x\n", + mii_id, regnum, val); + return xgene_mii_phy_write(pdata, mii_id, regnum, val); +} + +static void xgene_enet_adjust_link(struct net_device *ndev) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + struct phy_device *phydev = pdata->phy_dev; + + if (phydev->link) { + if (pdata->phy_speed != phydev->speed) { + xgene_gmac_init(pdata, phydev->speed); + xgene_gmac_rx_enable(pdata); + xgene_gmac_tx_enable(pdata); + pdata->phy_speed = phydev->speed; + phy_print_status(phydev); + } + } else { + xgene_gmac_rx_disable(pdata); + xgene_gmac_tx_disable(pdata); + pdata->phy_speed = SPEED_UNKNOWN; + phy_print_status(phydev); + } +} + +static int xgene_enet_phy_connect(struct net_device *ndev) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + struct device_node *phy_np; + struct phy_device *phy_dev; + struct device *dev = &pdata->pdev->dev; + + phy_np = of_parse_phandle(dev->of_node, "phy-handle", 0); + if (!phy_np) { + netdev_dbg(ndev, "No phy-handle found\n"); + return -ENODEV; + } + + phy_dev = of_phy_connect(ndev, phy_np, &xgene_enet_adjust_link, + 0, pdata->phy_mode); + if (!phy_dev) { + netdev_err(ndev, "Could not connect to PHY\n"); + return -ENODEV; + } + + pdata->phy_speed = SPEED_UNKNOWN; + phy_dev->supported &= ~SUPPORTED_10baseT_Half & + ~SUPPORTED_100baseT_Half & + ~SUPPORTED_1000baseT_Half; + phy_dev->advertising = phy_dev->supported; + pdata->phy_dev = phy_dev; + + return 0; +} + +int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata) +{ + struct net_device *ndev = pdata->ndev; + struct device *dev = &pdata->pdev->dev; + struct device_node *child_np; + struct device_node *mdio_np = NULL; + struct mii_bus *mdio_bus; + int ret; + + for_each_child_of_node(dev->of_node, child_np) { + if (of_device_is_compatible(child_np, "apm,xgene-mdio")) { + mdio_np = child_np; + break; + } + } + + if (!mdio_np) { + netdev_dbg(ndev, "No mdio node in the dts\n"); + return -ENXIO; + } + + mdio_bus = mdiobus_alloc(); + if (!mdio_bus) + return -ENOMEM; + + mdio_bus->name = "APM X-Gene MDIO bus"; + mdio_bus->read = xgene_enet_mdio_read; + mdio_bus->write = xgene_enet_mdio_write; + snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "%s-%s", "xgene-mii", + ndev->name); + + mdio_bus->priv = pdata; + mdio_bus->parent = &ndev->dev; + + ret = of_mdiobus_register(mdio_bus, mdio_np); + if (ret) { + netdev_err(ndev, "Failed to register MDIO bus\n"); + mdiobus_free(mdio_bus); + return ret; + } + pdata->mdio_bus = mdio_bus; + + ret = xgene_enet_phy_connect(ndev); + if (ret) + xgene_enet_mdio_remove(pdata); + + return ret; +} + +void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata) +{ + mdiobus_unregister(pdata->mdio_bus); + mdiobus_free(pdata->mdio_bus); + pdata->mdio_bus = NULL; +} diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h new file mode 100644 index 0000000..371e7a5 --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -0,0 +1,337 @@ +/* Applied Micro X-Gene SoC Ethernet Driver + * + * Copyright (c) 2014, Applied Micro Circuits Corporation + * Authors: Iyappan Subramanian <isubramanian@apm.com> + * Ravi Patel <rapatel@apm.com> + * Keyur Chudgar <kchudgar@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __XGENE_ENET_HW_H__ +#define __XGENE_ENET_HW_H__ + +#include "xgene_enet_main.h" + +struct xgene_enet_pdata; +struct xgene_enet_stats; + +/* clears and then set bits */ +static inline void xgene_set_bits(u32 *dst, u32 val, u32 start, u32 len) +{ + u32 end = start + len - 1; + u32 mask = GENMASK(end, start); + + *dst &= ~mask; + *dst |= (val << start) & mask; +} + +static inline u32 xgene_get_bits(u32 val, u32 start, u32 end) +{ + return (val & GENMASK(end, start)) >> start; +} + +#define CSR_RING_ID 0x0008 +#define OVERWRITE BIT(31) +#define IS_BUFFER_POOL BIT(20) +#define PREFETCH_BUF_EN BIT(21) +#define CSR_RING_ID_BUF 0x000c +#define CSR_RING_NE_INT_MODE 0x017c +#define CSR_RING_CONFIG 0x006c +#define CSR_RING_WR_BASE 0x0070 +#define NUM_RING_CONFIG 5 +#define BUFPOOL_MODE 3 +#define RM3 3 +#define INC_DEC_CMD_ADDR 0x002c +#define UDP_HDR_SIZE 2 +#define BUF_LEN_CODE_2K 0x5000 + +#define CREATE_MASK(pos, len) GENMASK((pos)+(len)-1, (pos)) +#define CREATE_MASK_ULL(pos, len) GENMASK_ULL((pos)+(len)-1, (pos)) + +/* Empty slot soft signature */ +#define EMPTY_SLOT_INDEX 1 +#define EMPTY_SLOT ~0ULL + +#define WORK_DESC_SIZE 32 +#define BUFPOOL_DESC_SIZE 16 + +#define RING_OWNER_MASK GENMASK(9, 6) +#define RING_BUFNUM_MASK GENMASK(5, 0) + +#define SELTHRSH_POS 3 +#define SELTHRSH_LEN 3 +#define RINGADDRL_POS 5 +#define RINGADDRL_LEN 27 +#define RINGADDRH_POS 0 +#define RINGADDRH_LEN 6 +#define RINGSIZE_POS 23 +#define RINGSIZE_LEN 3 +#define RINGTYPE_POS 19 +#define RINGTYPE_LEN 2 +#define RINGMODE_POS 20 +#define RINGMODE_LEN 3 +#define RECOMTIMEOUTL_POS 28 +#define RECOMTIMEOUTL_LEN 3 +#define RECOMTIMEOUTH_POS 0 +#define RECOMTIMEOUTH_LEN 2 +#define NUMMSGSINQ_POS 1 +#define NUMMSGSINQ_LEN 16 +#define ACCEPTLERR BIT(19) +#define QCOHERENT BIT(4) +#define RECOMBBUF BIT(27) + +#define BLOCK_ETH_CSR_OFFSET 0x2000 +#define BLOCK_ETH_RING_IF_OFFSET 0x9000 +#define BLOCK_ETH_CLKRST_CSR_OFFSET 0xC000 +#define BLOCK_ETH_DIAG_CSR_OFFSET 0xD000 + +#define BLOCK_ETH_MAC_OFFSET 0x0000 +#define BLOCK_ETH_STATS_OFFSET 0x0014 +#define BLOCK_ETH_MAC_CSR_OFFSET 0x2800 + +#define MAC_ADDR_REG_OFFSET 0x00 +#define MAC_COMMAND_REG_OFFSET 0x04 +#define MAC_WRITE_REG_OFFSET 0x08 +#define MAC_READ_REG_OFFSET 0x0c +#define MAC_COMMAND_DONE_REG_OFFSET 0x10 + +#define STAT_ADDR_REG_OFFSET 0x00 +#define STAT_COMMAND_REG_OFFSET 0x04 +#define STAT_WRITE_REG_OFFSET 0x08 +#define STAT_READ_REG_OFFSET 0x0c +#define STAT_COMMAND_DONE_REG_OFFSET 0x10 + +#define MII_MGMT_CONFIG_ADDR 0x20 +#define MII_MGMT_COMMAND_ADDR 0x24 +#define MII_MGMT_ADDRESS_ADDR 0x28 +#define MII_MGMT_CONTROL_ADDR 0x2c +#define MII_MGMT_STATUS_ADDR 0x30 +#define MII_MGMT_INDICATORS_ADDR 0x34 + +#define BUSY_MASK BIT(0) +#define READ_CYCLE_MASK BIT(0) +#define PHY_CONTROL_SET(dst, val) xgene_set_bits(dst, val, 0, 16) + +#define ENET_SPARE_CFG_REG_ADDR 0x0750 +#define RSIF_CONFIG_REG_ADDR 0x0010 +#define RSIF_RAM_DBG_REG0_ADDR 0x0048 +#define RGMII_REG_0_ADDR 0x07e0 +#define CFG_LINK_AGGR_RESUME_0_ADDR 0x07c8 +#define DEBUG_REG_ADDR 0x0700 +#define CFG_BYPASS_ADDR 0x0294 +#define CLE_BYPASS_REG0_0_ADDR 0x0490 +#define CLE_BYPASS_REG1_0_ADDR 0x0494 +#define CFG_RSIF_FPBUFF_TIMEOUT_EN BIT(31) +#define RESUME_TX BIT(0) +#define CFG_SPEED_1250 BIT(24) +#define TX_PORT0 BIT(0) +#define CFG_BYPASS_UNISEC_TX BIT(2) +#define CFG_BYPASS_UNISEC_RX BIT(1) +#define CFG_CLE_BYPASS_EN0 BIT(31) +#define CFG_TXCLK_MUXSEL0_SET(dst, val) xgene_set_bits(dst, val, 29, 3) + +#define CFG_CLE_IP_PROTOCOL0_SET(dst, val) xgene_set_bits(dst, val, 16, 2) +#define CFG_CLE_DSTQID0_SET(dst, val) xgene_set_bits(dst, val, 0, 12) +#define CFG_CLE_FPSEL0_SET(dst, val) xgene_set_bits(dst, val, 16, 4) +#define CFG_MACMODE_SET(dst, val) xgene_set_bits(dst, val, 18, 2) +#define CFG_WAITASYNCRD_SET(dst, val) xgene_set_bits(dst, val, 0, 16) +#define ICM_CONFIG0_REG_0_ADDR 0x0400 +#define ICM_CONFIG2_REG_0_ADDR 0x0410 +#define RX_DV_GATE_REG_0_ADDR 0x05fc +#define TX_DV_GATE_EN0 BIT(2) +#define RX_DV_GATE_EN0 BIT(1) +#define RESUME_RX0 BIT(0) +#define ENET_CFGSSQMIWQASSOC_ADDR 0xe0 +#define ENET_CFGSSQMIFPQASSOC_ADDR 0xdc +#define ENET_CFGSSQMIQMLITEFPQASSOC_ADDR 0xf0 +#define ENET_CFGSSQMIQMLITEWQASSOC_ADDR 0xf4 +#define ENET_CFG_MEM_RAM_SHUTDOWN_ADDR 0x70 +#define ENET_BLOCK_MEM_RDY_ADDR 0x74 +#define MAC_CONFIG_1_ADDR 0x00 +#define MAC_CONFIG_2_ADDR 0x04 +#define MAX_FRAME_LEN_ADDR 0x10 +#define INTERFACE_CONTROL_ADDR 0x38 +#define STATION_ADDR0_ADDR 0x40 +#define STATION_ADDR1_ADDR 0x44 +#define PHY_ADDR_SET(dst, val) xgene_set_bits(dst, val, 8, 5) +#define REG_ADDR_SET(dst, val) xgene_set_bits(dst, val, 0, 5) +#define ENET_INTERFACE_MODE2_SET(dst, val) xgene_set_bits(dst, val, 8, 2) +#define MGMT_CLOCK_SEL_SET(dst, val) xgene_set_bits(dst, val, 0, 3) +#define SOFT_RESET1 BIT(31) +#define TX_EN BIT(0) +#define RX_EN BIT(2) +#define ENET_LHD_MODE BIT(25) +#define ENET_GHD_MODE BIT(26) +#define FULL_DUPLEX2 BIT(0) +#define SCAN_AUTO_INCR BIT(5) +#define TBYT_ADDR 0x38 +#define TPKT_ADDR 0x39 +#define TDRP_ADDR 0x45 +#define TFCS_ADDR 0x47 +#define TUND_ADDR 0x4a + +#define TSO_IPPROTO_TCP 1 +#define FULL_DUPLEX 2 + +#define USERINFO_POS 0 +#define USERINFO_LEN 32 +#define FPQNUM_POS 32 +#define FPQNUM_LEN 12 +#define LERR_POS 60 +#define LERR_LEN 3 +#define STASH_POS 52 +#define STASH_LEN 2 +#define BUFDATALEN_POS 48 +#define BUFDATALEN_LEN 12 +#define DATAADDR_POS 0 +#define DATAADDR_LEN 42 +#define COHERENT_POS 63 +#define HENQNUM_POS 48 +#define HENQNUM_LEN 12 +#define TYPESEL_POS 44 +#define TYPESEL_LEN 4 +#define ETHHDR_POS 12 +#define ETHHDR_LEN 8 +#define IC_POS 35 /* Insert CRC */ +#define TCPHDR_POS 0 +#define TCPHDR_LEN 6 +#define IPHDR_POS 6 +#define IPHDR_LEN 6 +#define EC_POS 22 /* Enable checksum */ +#define EC_LEN 1 +#define IS_POS 24 /* IP protocol select */ +#define IS_LEN 1 +#define TYPE_ETH_WORK_MESSAGE_POS 44 + +struct xgene_enet_raw_desc { + __le64 m0; + __le64 m1; + __le64 m2; + __le64 m3; +}; + +struct xgene_enet_raw_desc16 { + __le64 m0; + __le64 m1; +}; + +static inline void xgene_enet_mark_desc_slot_empty(void *desc_slot_ptr) +{ + __le64 *desc_slot = desc_slot_ptr; + + desc_slot[EMPTY_SLOT_INDEX] = cpu_to_le64(EMPTY_SLOT); +} + +static inline bool xgene_enet_is_desc_slot_empty(void *desc_slot_ptr) +{ + __le64 *desc_slot = desc_slot_ptr; + + return (desc_slot[EMPTY_SLOT_INDEX] == cpu_to_le64(EMPTY_SLOT)); +} + +enum xgene_enet_ring_cfgsize { + RING_CFGSIZE_512B, + RING_CFGSIZE_2KB, + RING_CFGSIZE_16KB, + RING_CFGSIZE_64KB, + RING_CFGSIZE_512KB, + RING_CFGSIZE_INVALID +}; + +enum xgene_enet_ring_type { + RING_DISABLED, + RING_REGULAR, + RING_BUFPOOL +}; + +enum xgene_ring_owner { + RING_OWNER_ETH0, + RING_OWNER_CPU = 15, + RING_OWNER_INVALID +}; + +enum xgene_enet_ring_bufnum { + RING_BUFNUM_REGULAR = 0x0, + RING_BUFNUM_BUFPOOL = 0x20, + RING_BUFNUM_INVALID +}; + +enum xgene_enet_cmd { + XGENE_ENET_WR_CMD = BIT(31), + XGENE_ENET_RD_CMD = BIT(30) +}; + +enum xgene_enet_err_code { + HBF_READ_DATA = 3, + HBF_LL_READ = 4, + BAD_WORK_MSG = 6, + BUFPOOL_TIMEOUT = 15, + INGRESS_CRC = 16, + INGRESS_CHECKSUM = 17, + INGRESS_TRUNC_FRAME = 18, + INGRESS_PKT_LEN = 19, + INGRESS_PKT_UNDER = 20, + INGRESS_FIFO_OVERRUN = 21, + INGRESS_CHECKSUM_COMPUTE = 26, + ERR_CODE_INVALID +}; + +static inline enum xgene_ring_owner xgene_enet_ring_owner(u16 id) +{ + return (id & RING_OWNER_MASK) >> 6; +} + +static inline u8 xgene_enet_ring_bufnum(u16 id) +{ + return id & RING_BUFNUM_MASK; +} + +static inline bool xgene_enet_is_bufpool(u16 id) +{ + return ((id & RING_BUFNUM_MASK) >= 0x20) ? true : false; +} + +static inline u16 xgene_enet_get_numslots(u16 id, u32 size) +{ + bool is_bufpool = xgene_enet_is_bufpool(id); + + return (is_bufpool) ? size / BUFPOOL_DESC_SIZE : + size / WORK_DESC_SIZE; +} + +struct xgene_enet_desc_ring *xgene_enet_setup_ring( + struct xgene_enet_desc_ring *ring); +void xgene_enet_clear_ring(struct xgene_enet_desc_ring *ring); +void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring, + struct xgene_enet_pdata *pdata, + enum xgene_enet_err_code status); + +void xgene_enet_reset(struct xgene_enet_pdata *priv); +void xgene_gmac_reset(struct xgene_enet_pdata *priv); +void xgene_gmac_init(struct xgene_enet_pdata *priv, int speed); +void xgene_gmac_tx_enable(struct xgene_enet_pdata *priv); +void xgene_gmac_rx_enable(struct xgene_enet_pdata *priv); +void xgene_gmac_tx_disable(struct xgene_enet_pdata *priv); +void xgene_gmac_rx_disable(struct xgene_enet_pdata *priv); +void xgene_gmac_set_mac_addr(struct xgene_enet_pdata *pdata); +void xgene_enet_cle_bypass(struct xgene_enet_pdata *pdata, + u32 dst_ring_num, u16 bufpool_id); +void xgene_gport_shutdown(struct xgene_enet_pdata *priv); +void xgene_gmac_get_tx_stats(struct xgene_enet_pdata *pdata); + +int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata); +void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata); + +#endif /* __XGENE_ENET_HW_H__ */ diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c new file mode 100644 index 0000000..e1a8f4e --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -0,0 +1,954 @@ +/* Applied Micro X-Gene SoC Ethernet Driver + * + * Copyright (c) 2014, Applied Micro Circuits Corporation + * Authors: Iyappan Subramanian <isubramanian@apm.com> + * Ravi Patel <rapatel@apm.com> + * Keyur Chudgar <kchudgar@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "xgene_enet_main.h" +#include "xgene_enet_hw.h" + +static void xgene_enet_init_bufpool(struct xgene_enet_desc_ring *buf_pool) +{ + struct xgene_enet_raw_desc16 *raw_desc; + int i; + + for (i = 0; i < buf_pool->slots; i++) { + raw_desc = &buf_pool->raw_desc16[i]; + + /* Hardware expects descriptor in little endian format */ + raw_desc->m0 = cpu_to_le64(i | + SET_VAL(FPQNUM, buf_pool->dst_ring_num) | + SET_VAL(STASH, 3)); + } +} + +static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool, + u32 nbuf) +{ + struct sk_buff *skb; + struct xgene_enet_raw_desc16 *raw_desc; + struct net_device *ndev; + struct device *dev; + dma_addr_t dma_addr; + u32 tail = buf_pool->tail; + u32 slots = buf_pool->slots - 1; + u16 bufdatalen, len; + int i; + + ndev = buf_pool->ndev; + dev = ndev_to_dev(buf_pool->ndev); + bufdatalen = BUF_LEN_CODE_2K | (SKB_BUFFER_SIZE & GENMASK(11, 0)); + len = XGENE_ENET_MAX_MTU; + + for (i = 0; i < nbuf; i++) { + raw_desc = &buf_pool->raw_desc16[tail]; + + skb = netdev_alloc_skb_ip_align(ndev, len); + if (unlikely(!skb)) + return -ENOMEM; + buf_pool->rx_skb[tail] = skb; + + dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + netdev_err(ndev, "DMA mapping error\n"); + dev_kfree_skb_any(skb); + return -EINVAL; + } + + raw_desc->m1 = cpu_to_le64(SET_VAL(DATAADDR, dma_addr) | + SET_VAL(BUFDATALEN, bufdatalen) | + SET_BIT(COHERENT)); + tail = (tail + 1) & slots; + } + + iowrite32(nbuf, buf_pool->cmd); + buf_pool->tail = tail; + + return 0; +} + +static u16 xgene_enet_dst_ring_num(struct xgene_enet_desc_ring *ring) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); + + return ((u16)pdata->rm << 10) | ring->num; +} + +static u8 xgene_enet_hdr_len(const void *data) +{ + const struct ethhdr *eth = data; + + return (eth->h_proto == htons(ETH_P_8021Q)) ? VLAN_ETH_HLEN : ETH_HLEN; +} + +static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring) +{ + u32 __iomem *cmd_base = ring->cmd_base; + u32 ring_state, num_msgs; + + ring_state = ioread32(&cmd_base[1]); + num_msgs = ring_state & CREATE_MASK(NUMMSGSINQ_POS, NUMMSGSINQ_LEN); + + return num_msgs >> NUMMSGSINQ_POS; +} + +static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool) +{ + struct xgene_enet_raw_desc16 *raw_desc; + u32 slots = buf_pool->slots - 1; + u32 tail = buf_pool->tail; + u32 userinfo; + int i, len; + + len = xgene_enet_ring_len(buf_pool); + for (i = 0; i < len; i++) { + tail = (tail - 1) & slots; + raw_desc = &buf_pool->raw_desc16[tail]; + + /* Hardware stores descriptor in little endian format */ + userinfo = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0)); + dev_kfree_skb_any(buf_pool->rx_skb[userinfo]); + } + + iowrite32(-len, buf_pool->cmd); + buf_pool->tail = tail; +} + +static irqreturn_t xgene_enet_rx_irq(const int irq, void *data) +{ + struct xgene_enet_desc_ring *rx_ring = data; + + if (napi_schedule_prep(&rx_ring->napi)) { + disable_irq_nosync(irq); + __napi_schedule(&rx_ring->napi); + } + + return IRQ_HANDLED; +} + +static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, + struct xgene_enet_raw_desc *raw_desc) +{ + struct sk_buff *skb; + struct device *dev; + u16 skb_index; + u8 status; + int ret = 0; + + skb_index = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0)); + skb = cp_ring->cp_skb[skb_index]; + + dev = ndev_to_dev(cp_ring->ndev); + dma_unmap_single(dev, GET_VAL(DATAADDR, le64_to_cpu(raw_desc->m1)), + GET_VAL(BUFDATALEN, le64_to_cpu(raw_desc->m1)), + DMA_TO_DEVICE); + + /* Checking for error */ + status = GET_VAL(LERR, le64_to_cpu(raw_desc->m0)); + if (unlikely(status > 2)) { + xgene_enet_parse_error(cp_ring, netdev_priv(cp_ring->ndev), + status); + ret = -EIO; + } + + if (likely(skb)) { + dev_kfree_skb_any(skb); + } else { + netdev_err(cp_ring->ndev, "completion skb is NULL\n"); + ret = -EIO; + } + + return ret; +} + +static u64 xgene_enet_work_msg(struct sk_buff *skb) +{ + struct iphdr *iph; + u8 l3hlen, l4hlen = 0; + u8 csum_enable = 0; + u8 proto = 0; + u8 ethhdr; + u64 hopinfo; + + if (unlikely(skb->protocol != htons(ETH_P_IP)) && + unlikely(skb->protocol != htons(ETH_P_8021Q))) + goto out; + + if (unlikely(!(skb->dev->features & NETIF_F_IP_CSUM))) + goto out; + + iph = ip_hdr(skb); + if (unlikely(ip_is_fragment(iph))) + goto out; + + if (likely(iph->protocol == IPPROTO_TCP)) { + l4hlen = tcp_hdrlen(skb) >> 2; + csum_enable = 1; + proto = TSO_IPPROTO_TCP; + } else if (iph->protocol == IPPROTO_UDP) { + l4hlen = UDP_HDR_SIZE; + csum_enable = 1; + } +out: + l3hlen = ip_hdrlen(skb) >> 2; + ethhdr = xgene_enet_hdr_len(skb->data); + hopinfo = SET_VAL(TCPHDR, l4hlen) | + SET_VAL(IPHDR, l3hlen) | + SET_VAL(ETHHDR, ethhdr) | + SET_VAL(EC, csum_enable) | + SET_VAL(IS, proto) | + SET_BIT(IC) | + SET_BIT(TYPE_ETH_WORK_MESSAGE); + + return hopinfo; +} + +static int xgene_enet_setup_tx_desc(struct xgene_enet_desc_ring *tx_ring, + struct sk_buff *skb) +{ + struct device *dev = ndev_to_dev(tx_ring->ndev); + struct xgene_enet_raw_desc *raw_desc; + dma_addr_t dma_addr; + u16 tail = tx_ring->tail; + u64 hopinfo; + + raw_desc = &tx_ring->raw_desc[tail]; + memset(raw_desc, 0, sizeof(struct xgene_enet_raw_desc)); + + dma_addr = dma_map_single(dev, skb->data, skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, dma_addr)) { + netdev_err(tx_ring->ndev, "DMA mapping error\n"); + return -EINVAL; + } + + /* Hardware expects descriptor in little endian format */ + raw_desc->m0 = cpu_to_le64(tail); + raw_desc->m1 = cpu_to_le64(SET_VAL(DATAADDR, dma_addr) | + SET_VAL(BUFDATALEN, skb->len) | + SET_BIT(COHERENT)); + hopinfo = xgene_enet_work_msg(skb); + raw_desc->m3 = cpu_to_le64(SET_VAL(HENQNUM, tx_ring->dst_ring_num) | + hopinfo); + tx_ring->cp_ring->cp_skb[tail] = skb; + + return 0; +} + +static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb, + struct net_device *ndev) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + struct xgene_enet_desc_ring *tx_ring = pdata->tx_ring; + struct xgene_enet_desc_ring *cp_ring = tx_ring->cp_ring; + u32 tx_level, cq_level; + + tx_level = xgene_enet_ring_len(tx_ring); + cq_level = xgene_enet_ring_len(cp_ring); + if (unlikely(tx_level > pdata->tx_qcnt_hi || + cq_level > pdata->cp_qcnt_hi)) { + netif_stop_queue(ndev); + return NETDEV_TX_BUSY; + } + + if (xgene_enet_setup_tx_desc(tx_ring, skb)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + iowrite32(1, tx_ring->cmd); + skb_tx_timestamp(skb); + tx_ring->tail = (tx_ring->tail + 1) & (tx_ring->slots - 1); + + pdata->stats.tx_packets++; + pdata->stats.tx_bytes += skb->len; + + return NETDEV_TX_OK; +} + +static void xgene_enet_skip_csum(struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + + if (!ip_is_fragment(iph) || + (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } +} + +static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring, + struct xgene_enet_raw_desc *raw_desc) +{ + struct net_device *ndev; + struct xgene_enet_pdata *pdata; + struct device *dev; + struct xgene_enet_desc_ring *buf_pool; + u32 datalen, skb_index; + struct sk_buff *skb; + u8 status; + int ret = 0; + + ndev = rx_ring->ndev; + pdata = netdev_priv(ndev); + dev = ndev_to_dev(rx_ring->ndev); + buf_pool = rx_ring->buf_pool; + + dma_unmap_single(dev, GET_VAL(DATAADDR, le64_to_cpu(raw_desc->m1)), + XGENE_ENET_MAX_MTU, DMA_FROM_DEVICE); + skb_index = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0)); + skb = buf_pool->rx_skb[skb_index]; + + /* checking for error */ + status = GET_VAL(LERR, le64_to_cpu(raw_desc->m0)); + if (unlikely(status > 2)) { + dev_kfree_skb_any(skb); + xgene_enet_parse_error(rx_ring, netdev_priv(rx_ring->ndev), + status); + pdata->stats.rx_dropped++; + ret = -EIO; + goto out; + } + + /* strip off CRC as HW isn't doing this */ + datalen = GET_VAL(BUFDATALEN, le64_to_cpu(raw_desc->m1)); + datalen -= 4; + prefetch(skb->data - NET_IP_ALIGN); + skb_put(skb, datalen); + + skb_checksum_none_assert(skb); + skb->protocol = eth_type_trans(skb, ndev); + if (likely((ndev->features & NETIF_F_IP_CSUM) && + skb->protocol == htons(ETH_P_IP))) { + xgene_enet_skip_csum(skb); + } + + pdata->stats.rx_packets++; + pdata->stats.rx_bytes += datalen; + napi_gro_receive(&rx_ring->napi, skb); +out: + if (--rx_ring->nbufpool == 0) { + ret = xgene_enet_refill_bufpool(buf_pool, NUM_BUFPOOL); + rx_ring->nbufpool = NUM_BUFPOOL; + } + + return ret; +} + +static bool is_rx_desc(struct xgene_enet_raw_desc *raw_desc) +{ + return GET_VAL(FPQNUM, le64_to_cpu(raw_desc->m0)) ? true : false; +} + +static int xgene_enet_process_ring(struct xgene_enet_desc_ring *ring, + int budget) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ring->ndev); + struct xgene_enet_raw_desc *raw_desc; + u16 head = ring->head; + u16 slots = ring->slots - 1; + int ret, count = 0; + + do { + raw_desc = &ring->raw_desc[head]; + if (unlikely(xgene_enet_is_desc_slot_empty(raw_desc))) + break; + + if (is_rx_desc(raw_desc)) + ret = xgene_enet_rx_frame(ring, raw_desc); + else + ret = xgene_enet_tx_completion(ring, raw_desc); + xgene_enet_mark_desc_slot_empty(raw_desc); + + head = (head + 1) & slots; + count++; + + if (ret) + break; + } while (--budget); + + if (likely(count)) { + iowrite32(-count, ring->cmd); + ring->head = head; + + if (netif_queue_stopped(ring->ndev)) { + if (xgene_enet_ring_len(ring) < pdata->cp_qcnt_low) + netif_wake_queue(ring->ndev); + } + } + + return budget; +} + +static int xgene_enet_napi(struct napi_struct *napi, const int budget) +{ + struct xgene_enet_desc_ring *ring; + int processed; + + ring = container_of(napi, struct xgene_enet_desc_ring, napi); + processed = xgene_enet_process_ring(ring, budget); + + if (processed != budget) { + napi_complete(napi); + enable_irq(ring->irq); + } + + return processed; +} + +static void xgene_enet_timeout(struct net_device *ndev) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + + xgene_gmac_reset(pdata); +} + +static int xgene_enet_register_irq(struct net_device *ndev) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + struct device *dev = ndev_to_dev(ndev); + int ret; + + ret = devm_request_irq(dev, pdata->rx_ring->irq, xgene_enet_rx_irq, + IRQF_SHARED, ndev->name, pdata->rx_ring); + if (ret) { + netdev_err(ndev, "rx%d interrupt request failed\n", + pdata->rx_ring->irq); + } + + return ret; +} + +static void xgene_enet_free_irq(struct net_device *ndev) +{ + struct xgene_enet_pdata *pdata; + struct device *dev; + + pdata = netdev_priv(ndev); + dev = ndev_to_dev(ndev); + devm_free_irq(dev, pdata->rx_ring->irq, pdata->rx_ring); +} + +static int xgene_enet_open(struct net_device *ndev) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + int ret; + + xgene_gmac_tx_enable(pdata); + xgene_gmac_rx_enable(pdata); + + ret = xgene_enet_register_irq(ndev); + if (ret) + return ret; + napi_enable(&pdata->rx_ring->napi); + + if (pdata->phy_dev) + phy_start(pdata->phy_dev); + + netif_start_queue(ndev); + + return ret; +} + +static int xgene_enet_close(struct net_device *ndev) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + + netif_stop_queue(ndev); + + if (pdata->phy_dev) + phy_stop(pdata->phy_dev); + + napi_disable(&pdata->rx_ring->napi); + xgene_enet_free_irq(ndev); + xgene_enet_process_ring(pdata->rx_ring, -1); + + xgene_gmac_tx_disable(pdata); + xgene_gmac_rx_disable(pdata); + + return 0; +} + +static void xgene_enet_delete_ring(struct xgene_enet_desc_ring *ring) +{ + struct xgene_enet_pdata *pdata; + struct device *dev; + + pdata = netdev_priv(ring->ndev); + dev = ndev_to_dev(ring->ndev); + + xgene_enet_clear_ring(ring); + dma_free_coherent(dev, ring->size, ring->desc_addr, ring->dma); +} + +static void xgene_enet_delete_desc_rings(struct xgene_enet_pdata *pdata) +{ + struct xgene_enet_desc_ring *buf_pool; + + if (pdata->tx_ring) { + xgene_enet_delete_ring(pdata->tx_ring); + pdata->tx_ring = NULL; + } + + if (pdata->rx_ring) { + buf_pool = pdata->rx_ring->buf_pool; + xgene_enet_delete_bufpool(buf_pool); + xgene_enet_delete_ring(buf_pool); + xgene_enet_delete_ring(pdata->rx_ring); + pdata->rx_ring = NULL; + } +} + +static int xgene_enet_get_ring_size(struct device *dev, + enum xgene_enet_ring_cfgsize cfgsize) +{ + int size = -EINVAL; + + switch (cfgsize) { + case RING_CFGSIZE_512B: + size = 0x200; + break; + case RING_CFGSIZE_2KB: + size = 0x800; + break; + case RING_CFGSIZE_16KB: + size = 0x4000; + break; + case RING_CFGSIZE_64KB: + size = 0x10000; + break; + case RING_CFGSIZE_512KB: + size = 0x80000; + break; + default: + dev_err(dev, "Unsupported cfg ring size %d\n", cfgsize); + break; + } + + return size; +} + +static void xgene_enet_free_desc_ring(struct xgene_enet_desc_ring *ring) +{ + struct device *dev; + + if (!ring) + return; + + dev = ndev_to_dev(ring->ndev); + + if (ring->desc_addr) { + xgene_enet_clear_ring(ring); + dma_free_coherent(dev, ring->size, ring->desc_addr, ring->dma); + } + devm_kfree(dev, ring); +} + +static void xgene_enet_free_desc_rings(struct xgene_enet_pdata *pdata) +{ + struct device *dev = &pdata->pdev->dev; + struct xgene_enet_desc_ring *ring; + + ring = pdata->tx_ring; + if (ring && ring->cp_ring && ring->cp_ring->cp_skb) + devm_kfree(dev, ring->cp_ring->cp_skb); + xgene_enet_free_desc_ring(ring); + + ring = pdata->rx_ring; + if (ring && ring->buf_pool && ring->buf_pool->rx_skb) + devm_kfree(dev, ring->buf_pool->rx_skb); + xgene_enet_free_desc_ring(ring->buf_pool); + xgene_enet_free_desc_ring(ring); +} + +static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring( + struct net_device *ndev, u32 ring_num, + enum xgene_enet_ring_cfgsize cfgsize, u32 ring_id) +{ + struct xgene_enet_desc_ring *ring; + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + struct device *dev = ndev_to_dev(ndev); + int size; + + size = xgene_enet_get_ring_size(dev, cfgsize); + if (size < 0) + return NULL; + + ring = devm_kzalloc(dev, sizeof(struct xgene_enet_desc_ring), + GFP_KERNEL); + if (!ring) + return NULL; + + ring->ndev = ndev; + ring->num = ring_num; + ring->cfgsize = cfgsize; + ring->id = ring_id; + + ring->desc_addr = dma_zalloc_coherent(dev, size, &ring->dma, + GFP_KERNEL); + if (!ring->desc_addr) { + devm_kfree(dev, ring); + return NULL; + } + ring->size = size; + + ring->cmd_base = pdata->ring_cmd_addr + (ring->num << 6); + ring->cmd = ring->cmd_base + INC_DEC_CMD_ADDR; + pdata->rm = RM3; + ring = xgene_enet_setup_ring(ring); + netdev_dbg(ndev, "ring info: num=%d size=%d id=%d slots=%d\n", + ring->num, ring->size, ring->id, ring->slots); + + return ring; +} + +static u16 xgene_enet_get_ring_id(enum xgene_ring_owner owner, u8 bufnum) +{ + return (owner << 6) | (bufnum & GENMASK(5, 0)); +} + +static int xgene_enet_create_desc_rings(struct net_device *ndev) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + struct device *dev = ndev_to_dev(ndev); + struct xgene_enet_desc_ring *rx_ring, *tx_ring, *cp_ring; + struct xgene_enet_desc_ring *buf_pool = NULL; + u8 cpu_bufnum = 0, eth_bufnum = 0; + u8 bp_bufnum = 0x20; + u16 ring_id, ring_num = 0; + int ret; + + /* allocate rx descriptor ring */ + ring_id = xgene_enet_get_ring_id(RING_OWNER_CPU, cpu_bufnum++); + rx_ring = xgene_enet_create_desc_ring(ndev, ring_num++, + RING_CFGSIZE_16KB, ring_id); + if (!rx_ring) { + ret = -ENOMEM; + goto err; + } + + /* allocate buffer pool for receiving packets */ + ring_id = xgene_enet_get_ring_id(RING_OWNER_ETH0, bp_bufnum++); + buf_pool = xgene_enet_create_desc_ring(ndev, ring_num++, + RING_CFGSIZE_2KB, ring_id); + if (!buf_pool) { + ret = -ENOMEM; + goto err; + } + + rx_ring->nbufpool = NUM_BUFPOOL; + rx_ring->buf_pool = buf_pool; + rx_ring->irq = pdata->rx_irq; + buf_pool->rx_skb = devm_kcalloc(dev, buf_pool->slots, + sizeof(struct sk_buff *), GFP_KERNEL); + if (!buf_pool->rx_skb) { + ret = -ENOMEM; + goto err; + } + + buf_pool->dst_ring_num = xgene_enet_dst_ring_num(buf_pool); + rx_ring->buf_pool = buf_pool; + pdata->rx_ring = rx_ring; + + /* allocate tx descriptor ring */ + ring_id = xgene_enet_get_ring_id(RING_OWNER_ETH0, eth_bufnum++); + tx_ring = xgene_enet_create_desc_ring(ndev, ring_num++, + RING_CFGSIZE_16KB, ring_id); + if (!tx_ring) { + ret = -ENOMEM; + goto err; + } + pdata->tx_ring = tx_ring; + + cp_ring = pdata->rx_ring; + cp_ring->cp_skb = devm_kcalloc(dev, tx_ring->slots, + sizeof(struct sk_buff *), GFP_KERNEL); + if (!cp_ring->cp_skb) { + ret = -ENOMEM; + goto err; + } + pdata->tx_ring->cp_ring = cp_ring; + pdata->tx_ring->dst_ring_num = xgene_enet_dst_ring_num(cp_ring); + + pdata->tx_qcnt_hi = pdata->tx_ring->slots / 2; + pdata->cp_qcnt_hi = pdata->rx_ring->slots / 2; + pdata->cp_qcnt_low = pdata->cp_qcnt_hi / 2; + + return 0; + +err: + xgene_enet_free_desc_rings(pdata); + return ret; +} + +static struct rtnl_link_stats64 *xgene_enet_get_stats64( + struct net_device *ndev, + struct rtnl_link_stats64 *storage) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + struct rtnl_link_stats64 *stats = &pdata->stats; + + stats->rx_errors += stats->rx_length_errors + + stats->rx_crc_errors + + stats->rx_frame_errors + + stats->rx_fifo_errors; + memcpy(storage, &pdata->stats, sizeof(struct rtnl_link_stats64)); + + return storage; +} + +static int xgene_enet_set_mac_address(struct net_device *ndev, void *addr) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + int ret; + + ret = eth_mac_addr(ndev, addr); + if (ret) + return ret; + xgene_gmac_set_mac_addr(pdata); + + return ret; +} + +static const struct net_device_ops xgene_ndev_ops = { + .ndo_open = xgene_enet_open, + .ndo_stop = xgene_enet_close, + .ndo_start_xmit = xgene_enet_start_xmit, + .ndo_tx_timeout = xgene_enet_timeout, + .ndo_get_stats64 = xgene_enet_get_stats64, + .ndo_change_mtu = eth_change_mtu, + .ndo_set_mac_address = xgene_enet_set_mac_address, +}; + +static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) +{ + struct platform_device *pdev; + struct net_device *ndev; + struct device *dev; + struct resource *res; + void __iomem *base_addr; + const char *mac; + int ret; + + pdev = pdata->pdev; + dev = &pdev->dev; + ndev = pdata->ndev; + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "enet_csr"); + if (!res) { + dev_err(dev, "Resource enet_csr not defined\n"); + return -ENODEV; + } + pdata->base_addr = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->base_addr)) { + dev_err(dev, "Unable to retrieve ENET Port CSR region\n"); + return PTR_ERR(pdata->base_addr); + } + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ring_csr"); + if (!res) { + dev_err(dev, "Resource ring_csr not defined\n"); + return -ENODEV; + } + pdata->ring_csr_addr = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->ring_csr_addr)) { + dev_err(dev, "Unable to retrieve ENET Ring CSR region\n"); + return PTR_ERR(pdata->ring_csr_addr); + } + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ring_cmd"); + if (!res) { + dev_err(dev, "Resource ring_cmd not defined\n"); + return -ENODEV; + } + pdata->ring_cmd_addr = devm_ioremap_resource(dev, res); + if (IS_ERR(pdata->ring_cmd_addr)) { + dev_err(dev, "Unable to retrieve ENET Ring command region\n"); + return PTR_ERR(pdata->ring_cmd_addr); + } + + ret = platform_get_irq(pdev, 0); + if (ret <= 0) { + dev_err(dev, "Unable to get ENET Rx IRQ\n"); + ret = ret ? : -ENXIO; + return ret; + } + pdata->rx_irq = ret; + + mac = of_get_mac_address(dev->of_node); + if (mac) + memcpy(ndev->dev_addr, mac, ndev->addr_len); + else + eth_hw_addr_random(ndev); + memcpy(ndev->perm_addr, ndev->dev_addr, ndev->addr_len); + + pdata->phy_mode = of_get_phy_mode(pdev->dev.of_node); + if (pdata->phy_mode < 0) { + dev_err(dev, "Incorrect phy-connection-type in DTS\n"); + return -EINVAL; + } + + pdata->clk = devm_clk_get(&pdev->dev, NULL); + ret = IS_ERR(pdata->clk); + if (IS_ERR(pdata->clk)) { + dev_err(&pdev->dev, "can't get clock\n"); + ret = PTR_ERR(pdata->clk); + return ret; + } + + base_addr = pdata->base_addr; + pdata->eth_csr_addr = base_addr + BLOCK_ETH_CSR_OFFSET; + pdata->eth_ring_if_addr = base_addr + BLOCK_ETH_RING_IF_OFFSET; + pdata->eth_diag_csr_addr = base_addr + BLOCK_ETH_DIAG_CSR_OFFSET; + pdata->mcx_mac_addr = base_addr + BLOCK_ETH_MAC_OFFSET; + pdata->mcx_stats_addr = base_addr + BLOCK_ETH_STATS_OFFSET; + pdata->mcx_mac_csr_addr = base_addr + BLOCK_ETH_MAC_CSR_OFFSET; + pdata->rx_buff_cnt = NUM_PKT_BUF; + + return ret; +} + +static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata) +{ + struct net_device *ndev = pdata->ndev; + struct xgene_enet_desc_ring *buf_pool; + u16 dst_ring_num; + int ret; + + xgene_gmac_tx_disable(pdata); + xgene_gmac_rx_disable(pdata); + + ret = xgene_enet_create_desc_rings(ndev); + if (ret) { + netdev_err(ndev, "Error in ring configuration\n"); + return ret; + } + + /* setup buffer pool */ + buf_pool = pdata->rx_ring->buf_pool; + xgene_enet_init_bufpool(buf_pool); + ret = xgene_enet_refill_bufpool(buf_pool, pdata->rx_buff_cnt); + if (ret) { + xgene_enet_delete_desc_rings(pdata); + return ret; + } + + dst_ring_num = xgene_enet_dst_ring_num(pdata->rx_ring); + xgene_enet_cle_bypass(pdata, dst_ring_num, buf_pool->id); + + return ret; +} + +static int xgene_enet_probe(struct platform_device *pdev) +{ + struct net_device *ndev; + struct xgene_enet_pdata *pdata; + struct device *dev = &pdev->dev; + struct napi_struct *napi; + int ret; + + ndev = alloc_etherdev(sizeof(struct xgene_enet_pdata)); + if (!ndev) + return -ENOMEM; + + pdata = netdev_priv(ndev); + + pdata->pdev = pdev; + pdata->ndev = ndev; + SET_NETDEV_DEV(ndev, dev); + platform_set_drvdata(pdev, pdata); + ndev->netdev_ops = &xgene_ndev_ops; + xgene_enet_set_ethtool_ops(ndev); + ndev->features |= NETIF_F_IP_CSUM | + NETIF_F_GSO | + NETIF_F_GRO; + + ret = xgene_enet_get_resources(pdata); + if (ret) + goto err; + + xgene_enet_reset(pdata); + xgene_gmac_init(pdata, SPEED_1000); + + ret = register_netdev(ndev); + if (ret) { + netdev_err(ndev, "Failed to register netdev\n"); + goto err; + } + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (ret) { + netdev_err(ndev, "No usable DMA configuration\n"); + goto err; + } + + ret = xgene_enet_init_hw(pdata); + if (ret) + goto err; + + napi = &pdata->rx_ring->napi; + netif_napi_add(ndev, napi, xgene_enet_napi, NAPI_POLL_WEIGHT); + ret = xgene_enet_mdio_config(pdata); + + return ret; +err: + free_netdev(ndev); + return ret; +} + +static int xgene_enet_remove(struct platform_device *pdev) +{ + struct xgene_enet_pdata *pdata; + struct net_device *ndev; + + pdata = platform_get_drvdata(pdev); + ndev = pdata->ndev; + + xgene_gmac_rx_disable(pdata); + xgene_gmac_tx_disable(pdata); + + netif_napi_del(&pdata->rx_ring->napi); + xgene_enet_mdio_remove(pdata); + xgene_enet_delete_desc_rings(pdata); + unregister_netdev(ndev); + xgene_gport_shutdown(pdata); + free_netdev(ndev); + + return 0; +} + +static struct of_device_id xgene_enet_match[] = { + {.compatible = "apm,xgene-enet",}, + {}, +}; + +MODULE_DEVICE_TABLE(of, xgene_enet_match); + +static struct platform_driver xgene_enet_driver = { + .driver = { + .name = "xgene-enet", + .of_match_table = xgene_enet_match, + }, + .probe = xgene_enet_probe, + .remove = xgene_enet_remove, +}; + +module_platform_driver(xgene_enet_driver); + +MODULE_DESCRIPTION("APM X-Gene SoC Ethernet driver"); +MODULE_VERSION(XGENE_DRV_VERSION); +MODULE_AUTHOR("Keyur Chudgar <kchudgar@apm.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h new file mode 100644 index 0000000..0815866 --- /dev/null +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -0,0 +1,135 @@ +/* Applied Micro X-Gene SoC Ethernet Driver + * + * Copyright (c) 2014, Applied Micro Circuits Corporation + * Authors: Iyappan Subramanian <isubramanian@apm.com> + * Ravi Patel <rapatel@apm.com> + * Keyur Chudgar <kchudgar@apm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __XGENE_ENET_MAIN_H__ +#define __XGENE_ENET_MAIN_H__ + +#include <linux/clk.h> +#include <linux/of_platform.h> +#include <linux/of_net.h> +#include <linux/of_mdio.h> +#include <linux/module.h> +#include <net/ip.h> +#include <linux/prefetch.h> +#include <linux/if_vlan.h> +#include <linux/phy.h> +#include "xgene_enet_hw.h" + +#define XGENE_DRV_VERSION "v1.0" +#define XGENE_ENET_MAX_MTU 1536 +#define SKB_BUFFER_SIZE (XGENE_ENET_MAX_MTU - NET_IP_ALIGN) +#define NUM_PKT_BUF 64 +#define NUM_BUFPOOL 32 + +/* software context of a descriptor ring */ +struct xgene_enet_desc_ring { + struct net_device *ndev; + u16 id; + u16 num; + u16 head; + u16 tail; + u16 slots; + u16 irq; + u32 size; + u32 state[NUM_RING_CONFIG]; + void __iomem *cmd_base; + void __iomem *cmd; + dma_addr_t dma; + u16 dst_ring_num; + u8 nbufpool; + struct sk_buff *(*rx_skb); + struct sk_buff *(*cp_skb); + enum xgene_enet_ring_cfgsize cfgsize; + struct xgene_enet_desc_ring *cp_ring; + struct xgene_enet_desc_ring *buf_pool; + struct napi_struct napi; + union { + void *desc_addr; + struct xgene_enet_raw_desc *raw_desc; + struct xgene_enet_raw_desc16 *raw_desc16; + }; +}; + +/* ethernet private data */ +struct xgene_enet_pdata { + struct net_device *ndev; + struct mii_bus *mdio_bus; + struct phy_device *phy_dev; + int phy_speed; + struct clk *clk; + struct platform_device *pdev; + struct xgene_enet_desc_ring *tx_ring; + struct xgene_enet_desc_ring *rx_ring; + char *dev_name; + u32 rx_buff_cnt; + u32 tx_qcnt_hi; + u32 cp_qcnt_hi; + u32 cp_qcnt_low; + u32 rx_irq; + void __iomem *eth_csr_addr; + void __iomem *eth_ring_if_addr; + void __iomem *eth_diag_csr_addr; + void __iomem *mcx_mac_addr; + void __iomem *mcx_stats_addr; + void __iomem *mcx_mac_csr_addr; + void __iomem *base_addr; + void __iomem *ring_csr_addr; + void __iomem *ring_cmd_addr; + u32 phy_addr; + int phy_mode; + u32 speed; + u16 rm; + struct rtnl_link_stats64 stats; +}; + +/* Set the specified value into a bit-field defined by its starting position + * and length within a single u64. + */ +static inline u64 xgene_enet_set_field_value(int pos, int len, u64 val) +{ + return (val & ((1ULL << len) - 1)) << pos; +} + +#define SET_VAL(field, val) \ + xgene_enet_set_field_value(field ## _POS, field ## _LEN, val) + +#define SET_BIT(field) \ + xgene_enet_set_field_value(field ## _POS, 1, 1) + +/* Get the value from a bit-field defined by its starting position + * and length within the specified u64. + */ +static inline u64 xgene_enet_get_field_value(int pos, int len, u64 src) +{ + return (src >> pos) & ((1ULL << len) - 1); +} + +#define GET_VAL(field, src) \ + xgene_enet_get_field_value(field ## _POS, field ## _LEN, src) + +static inline struct device *ndev_to_dev(struct net_device *ndev) +{ + return ndev->dev.parent; +} + +void xgene_enet_set_ethtool_ops(struct net_device *netdev); + +#endif /* __XGENE_ENET_MAIN_H__ */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index ce455ae..3f9d4de 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -739,7 +739,6 @@ static void bcmgenet_power_down(struct bcmgenet_priv *priv, case GENET_POWER_PASSIVE: /* Power down LED */ - bcmgenet_mii_reset(priv->dev); if (priv->hw_params->flags & GENET_HAS_EXT) { reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT); reg |= (EXT_PWR_DOWN_PHY | @@ -779,7 +778,9 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, } bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT); - bcmgenet_mii_reset(priv->dev); + + if (mode == GENET_POWER_PASSIVE) + bcmgenet_mii_reset(priv->dev); } /* ioctl handle special commands that are not present in ethtool. */ @@ -1961,7 +1962,8 @@ static void bcmgenet_set_hw_addr(struct bcmgenet_priv *priv, static int bcmgenet_wol_resume(struct bcmgenet_priv *priv) { /* From WOL-enabled suspend, switch to regular clock */ - clk_disable_unprepare(priv->clk_wol); + if (priv->wolopts) + clk_disable_unprepare(priv->clk_wol); phy_init_hw(priv->phydev); /* Speed settings must be restored */ @@ -2164,6 +2166,10 @@ static void bcmgenet_netif_stop(struct net_device *dev) * disabled no new work will be scheduled. */ cancel_work_sync(&priv->bcmgenet_irq_work); + + priv->old_pause = -1; + priv->old_link = -1; + priv->old_duplex = -1; } static int bcmgenet_close(struct net_device *dev) @@ -2533,6 +2539,13 @@ static int bcmgenet_probe(struct platform_device *pdev) priv->pdev = pdev; priv->version = (enum bcmgenet_version)of_id->data; + priv->clk = devm_clk_get(&priv->pdev->dev, "enet"); + if (IS_ERR(priv->clk)) + dev_warn(&priv->pdev->dev, "failed to get enet clock\n"); + + if (!IS_ERR(priv->clk)) + clk_prepare_enable(priv->clk); + bcmgenet_set_hw_params(priv); /* Mii wait queue */ @@ -2541,17 +2554,10 @@ static int bcmgenet_probe(struct platform_device *pdev) priv->rx_buf_len = RX_BUF_LENGTH; INIT_WORK(&priv->bcmgenet_irq_work, bcmgenet_irq_task); - priv->clk = devm_clk_get(&priv->pdev->dev, "enet"); - if (IS_ERR(priv->clk)) - dev_warn(&priv->pdev->dev, "failed to get enet clock\n"); - priv->clk_wol = devm_clk_get(&priv->pdev->dev, "enet-wol"); if (IS_ERR(priv->clk_wol)) dev_warn(&priv->pdev->dev, "failed to get enet-wol clock\n"); - if (!IS_ERR(priv->clk)) - clk_prepare_enable(priv->clk); - err = reset_umac(priv); if (err) goto err_clk_disable; @@ -2611,6 +2617,8 @@ static int bcmgenet_suspend(struct device *d) bcmgenet_netif_stop(dev); + phy_suspend(priv->phydev); + netif_device_detach(dev); /* Disable MAC receive */ @@ -2661,9 +2669,7 @@ static int bcmgenet_resume(struct device *d) if (ret) goto out_clk_disable; - if (priv->wolopts) - ret = bcmgenet_wol_resume(priv); - + ret = bcmgenet_wol_resume(priv); if (ret) goto out_clk_disable; @@ -2678,6 +2684,9 @@ static int bcmgenet_resume(struct device *d) bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT); } + if (priv->wolopts) + bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC); + /* Disable RX/TX DMA and flush TX queues */ dma_ctrl = bcmgenet_dma_disable(priv); @@ -2693,6 +2702,8 @@ static int bcmgenet_resume(struct device *d) netif_device_attach(dev); + phy_resume(priv->phydev); + bcmgenet_netif_start(dev); return 0; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 1896161..c88f7ae 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -129,7 +129,10 @@ static void bcmgenet_mii_setup(struct net_device *dev) cmd_bits |= CMD_RX_PAUSE_IGNORE | CMD_TX_PAUSE_IGNORE; } - if (status_changed) { + if (!status_changed) + return; + + if (phydev->link) { reg = bcmgenet_umac_readl(priv, UMAC_CMD); reg &= ~((CMD_SPEED_MASK << CMD_SPEED_SHIFT) | CMD_HD_EN | @@ -137,8 +140,9 @@ static void bcmgenet_mii_setup(struct net_device *dev) reg |= cmd_bits; bcmgenet_umac_writel(priv, reg, UMAC_CMD); - phy_print_status(phydev); } + + phy_print_status(phydev); } void bcmgenet_mii_reset(struct net_device *dev) @@ -303,12 +307,12 @@ static int bcmgenet_mii_probe(struct net_device *dev) /* In the case of a fixed PHY, the DT node associated * to the PHY is the Ethernet MAC DT node. */ - if (of_phy_is_fixed_link(dn)) { + if (!priv->phy_dn && of_phy_is_fixed_link(dn)) { ret = of_phy_register_fixed_link(dn); if (ret) return ret; - priv->phy_dn = dn; + priv->phy_dn = of_node_get(dn); } phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, 0, @@ -444,6 +448,7 @@ int bcmgenet_mii_init(struct net_device *dev) return 0; out: + of_node_put(priv->phy_dn); mdiobus_unregister(priv->mii_bus); out_free: kfree(priv->mii_bus->irq); @@ -455,6 +460,7 @@ void bcmgenet_mii_exit(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); + of_node_put(priv->phy_dn); mdiobus_unregister(priv->mii_bus); kfree(priv->mii_bus->irq); mdiobus_free(priv->mii_bus); diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ba3cead..3ac5d23 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14093,8 +14093,9 @@ static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev, spin_lock_bh(&tp->lock); if (!tp->hw_stats) { + *stats = tp->net_stats_prev; spin_unlock_bh(&tp->lock); - return &tp->net_stats_prev; + return stats; } tg3_get_nstats(tp, stats); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index c9b922c..d572821 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -50,13 +50,13 @@ #include "cxgb4_uld.h" #define T4FW_VERSION_MAJOR 0x01 -#define T4FW_VERSION_MINOR 0x09 -#define T4FW_VERSION_MICRO 0x17 +#define T4FW_VERSION_MINOR 0x0B +#define T4FW_VERSION_MICRO 0x1B #define T4FW_VERSION_BUILD 0x00 #define T5FW_VERSION_MAJOR 0x01 -#define T5FW_VERSION_MINOR 0x09 -#define T5FW_VERSION_MICRO 0x17 +#define T5FW_VERSION_MINOR 0x0B +#define T5FW_VERSION_MICRO 0x1B #define T5FW_VERSION_BUILD 0x00 #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__) @@ -522,6 +522,9 @@ struct sge_txq { struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ struct sge_txq q; struct netdev_queue *txq; /* associated netdev TX queue */ +#ifdef CONFIG_CHELSIO_T4_DCB + u8 dcb_prio; /* DCB Priority bound to queue */ +#endif unsigned long tso; /* # of TSO requests */ unsigned long tx_cso; /* # of Tx checksum offloads */ unsigned long vlan_ins; /* # of Tx VLAN insertions */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index 0d3a9df..8edf0f5 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -20,6 +20,17 @@ #include "cxgb4.h" +/* DCBx version control + */ +char *dcb_ver_array[] = { + "Unknown", + "DCBx-CIN", + "DCBx-CEE 1.01", + "DCBx-IEEE", + "", "", "", + "Auto Negotiated" +}; + /* Initialize a port's Data Center Bridging state. Typically used after a * Link Down event. */ @@ -27,25 +38,45 @@ void cxgb4_dcb_state_init(struct net_device *dev) { struct port_info *pi = netdev2pinfo(dev); struct port_dcb_info *dcb = &pi->dcb; + int version_temp = dcb->dcb_version; memset(dcb, 0, sizeof(struct port_dcb_info)); dcb->state = CXGB4_DCB_STATE_START; + if (version_temp) + dcb->dcb_version = version_temp; + + netdev_dbg(dev, "%s: Initializing DCB state for port[%d]\n", + __func__, pi->port_id); +} + +void cxgb4_dcb_version_init(struct net_device *dev) +{ + struct port_info *pi = netdev2pinfo(dev); + struct port_dcb_info *dcb = &pi->dcb; + + /* Any writes here are only done on kernels that exlicitly need + * a specific version, say < 2.6.38 which only support CEE + */ + dcb->dcb_version = FW_PORT_DCB_VER_AUTO; } /* Finite State machine for Data Center Bridging. */ void cxgb4_dcb_state_fsm(struct net_device *dev, - enum cxgb4_dcb_state_input input) + enum cxgb4_dcb_state_input transition_to) { struct port_info *pi = netdev2pinfo(dev); struct port_dcb_info *dcb = &pi->dcb; struct adapter *adap = pi->adapter; + enum cxgb4_dcb_state current_state = dcb->state; - switch (input) { - case CXGB4_DCB_INPUT_FW_DISABLED: { - /* Firmware tells us it's not doing DCB */ - switch (dcb->state) { - case CXGB4_DCB_STATE_START: { + netdev_dbg(dev, "%s: State change from %d to %d for %s\n", + __func__, dcb->state, transition_to, dev->name); + + switch (current_state) { + case CXGB4_DCB_STATE_START: { + switch (transition_to) { + case CXGB4_DCB_INPUT_FW_DISABLED: { /* we're going to use Host DCB */ dcb->state = CXGB4_DCB_STATE_HOST; dcb->supported = CXGB4_DCBX_HOST_SUPPORT; @@ -53,48 +84,62 @@ void cxgb4_dcb_state_fsm(struct net_device *dev, break; } - case CXGB4_DCB_STATE_HOST: { - /* we're alreaady in Host DCB mode */ + case CXGB4_DCB_INPUT_FW_ENABLED: { + /* we're going to use Firmware DCB */ + dcb->state = CXGB4_DCB_STATE_FW_INCOMPLETE; + dcb->supported = CXGB4_DCBX_FW_SUPPORT; + break; + } + + case CXGB4_DCB_INPUT_FW_INCOMPLETE: { + /* expected transition */ + break; + } + + case CXGB4_DCB_INPUT_FW_ALLSYNCED: { + dcb->state = CXGB4_DCB_STATE_FW_ALLSYNCED; break; } default: - goto bad_state_transition; + goto bad_state_input; } break; } - case CXGB4_DCB_INPUT_FW_ENABLED: { - /* Firmware tells us that it is doing DCB */ - switch (dcb->state) { - case CXGB4_DCB_STATE_START: { - /* we're going to use Firmware DCB */ - dcb->state = CXGB4_DCB_STATE_FW_INCOMPLETE; - dcb->supported = CXGB4_DCBX_FW_SUPPORT; + case CXGB4_DCB_STATE_FW_INCOMPLETE: { + switch (transition_to) { + case CXGB4_DCB_INPUT_FW_ENABLED: { + /* we're alreaady in firmware DCB mode */ break; } - case CXGB4_DCB_STATE_FW_INCOMPLETE: - case CXGB4_DCB_STATE_FW_ALLSYNCED: { - /* we're alreaady in firmware DCB mode */ + case CXGB4_DCB_INPUT_FW_INCOMPLETE: { + /* we're already incomplete */ + break; + } + + case CXGB4_DCB_INPUT_FW_ALLSYNCED: { + dcb->state = CXGB4_DCB_STATE_FW_ALLSYNCED; + dcb->enabled = 1; + linkwatch_fire_event(dev); break; } default: - goto bad_state_transition; + goto bad_state_input; } break; } - case CXGB4_DCB_INPUT_FW_INCOMPLETE: { - /* Firmware tells us that its DCB state is incomplete */ - switch (dcb->state) { - case CXGB4_DCB_STATE_FW_INCOMPLETE: { - /* we're already incomplete */ + case CXGB4_DCB_STATE_FW_ALLSYNCED: { + switch (transition_to) { + case CXGB4_DCB_INPUT_FW_ENABLED: { + /* we're alreaady in firmware DCB mode */ break; } - case CXGB4_DCB_STATE_FW_ALLSYNCED: { + case CXGB4_DCB_INPUT_FW_INCOMPLETE: { /* We were successfully running with firmware DCB but * now it's telling us that it's in an "incomplete * state. We need to reset back to a ground state @@ -107,46 +152,48 @@ void cxgb4_dcb_state_fsm(struct net_device *dev, break; } - default: - goto bad_state_transition; - } - break; - } - - case CXGB4_DCB_INPUT_FW_ALLSYNCED: { - /* Firmware tells us that its DCB state is complete */ - switch (dcb->state) { - case CXGB4_DCB_STATE_FW_INCOMPLETE: { - dcb->state = CXGB4_DCB_STATE_FW_ALLSYNCED; + case CXGB4_DCB_INPUT_FW_ALLSYNCED: { + /* we're already all sync'ed + * this is only applicable for IEEE or + * when another VI already completed negotiaton + */ dcb->enabled = 1; linkwatch_fire_event(dev); break; } - case CXGB4_DCB_STATE_FW_ALLSYNCED: { - /* we're already all sync'ed */ + default: + goto bad_state_input; + } + break; + } + + case CXGB4_DCB_STATE_HOST: { + switch (transition_to) { + case CXGB4_DCB_INPUT_FW_DISABLED: { + /* we're alreaady in Host DCB mode */ break; } default: - goto bad_state_transition; + goto bad_state_input; } break; } default: - goto bad_state_input; + goto bad_state_transition; } return; bad_state_input: dev_err(adap->pdev_dev, "cxgb4_dcb_state_fsm: illegal input symbol %d\n", - input); + transition_to); return; bad_state_transition: dev_err(adap->pdev_dev, "cxgb4_dcb_state_fsm: bad state transition, state = %d, input = %d\n", - dcb->state, input); + current_state, transition_to); } /* Handle a DCB/DCBX update message from the firmware. @@ -160,6 +207,7 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap, struct port_info *pi = netdev_priv(dev); struct port_dcb_info *dcb = &pi->dcb; int dcb_type = pcmd->u.dcb.pgid.type; + int dcb_running_version; /* Handle Firmware DCB Control messages separately since they drive * our state machine. @@ -171,6 +219,25 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap, ? CXGB4_DCB_STATE_FW_ALLSYNCED : CXGB4_DCB_STATE_FW_INCOMPLETE); + if (dcb->dcb_version != FW_PORT_DCB_VER_UNKNOWN) { + dcb_running_version = FW_PORT_CMD_DCB_VERSION_GET( + be16_to_cpu( + pcmd->u.dcb.control.dcb_version_to_app_state)); + if (dcb_running_version == FW_PORT_DCB_VER_CEE1D01 || + dcb_running_version == FW_PORT_DCB_VER_IEEE) { + dcb->dcb_version = dcb_running_version; + dev_warn(adap->pdev_dev, "Interface %s is running %s\n", + dev->name, + dcb_ver_array[dcb->dcb_version]); + } else { + dev_warn(adap->pdev_dev, + "Something screwed up, requested firmware for %s, but firmware returned %s instead\n", + dcb_ver_array[dcb->dcb_version], + dcb_ver_array[dcb_running_version]); + dcb->dcb_version = FW_PORT_DCB_VER_UNKNOWN; + } + } + cxgb4_dcb_state_fsm(dev, input); return; } @@ -199,7 +266,11 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap, dcb->pg_num_tcs_supported = fwdcb->pgrate.num_tcs_supported; memcpy(dcb->pgrate, &fwdcb->pgrate.pgrate, sizeof(dcb->pgrate)); + memcpy(dcb->tsa, &fwdcb->pgrate.tsa, + sizeof(dcb->tsa)); dcb->msgs |= CXGB4_DCB_FW_PGRATE; + if (dcb->msgs & CXGB4_DCB_FW_PGID) + IEEE_FAUX_SYNC(dev, dcb); break; case FW_PORT_DCB_TYPE_PRIORATE: @@ -212,6 +283,7 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap, dcb->pfcen = fwdcb->pfc.pfcen; dcb->pfc_num_tcs_supported = fwdcb->pfc.max_pfc_tcs; dcb->msgs |= CXGB4_DCB_FW_PFC; + IEEE_FAUX_SYNC(dev, dcb); break; case FW_PORT_DCB_TYPE_APP_ID: { @@ -220,13 +292,25 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap, struct app_priority *ap = &dcb->app_priority[idx]; struct dcb_app app = { - .selector = fwap->sel_field, .protocol = be16_to_cpu(fwap->protocolid), - .priority = fwap->user_prio_map, }; int err; - err = dcb_setapp(dev, &app); + /* Convert from firmware format to relevant format + * when using app selector + */ + if (dcb->dcb_version == FW_PORT_DCB_VER_IEEE) { + app.selector = (fwap->sel_field + 1); + app.priority = ffs(fwap->user_prio_map) - 1; + err = dcb_ieee_setapp(dev, &app); + IEEE_FAUX_SYNC(dev, dcb); + } else { + /* Default is CEE */ + app.selector = !!(fwap->sel_field); + app.priority = fwap->user_prio_map; + err = dcb_setapp(dev, &app); + } + if (err) dev_err(adap->pdev_dev, "Failed DCB Set Application Priority: sel=%d, prot=%d, prio=%d, err=%d\n", @@ -408,9 +492,10 @@ static void cxgb4_getpgbwgcfg(struct net_device *dev, int pgid, u8 *bw_per, if (err != FW_PORT_DCB_CFG_SUCCESS) { dev_err(adap->pdev_dev, "DCB read PGRATE failed with %d\n", -err); - } else { - *bw_per = pcmd.u.dcb.pgrate.pgrate[pgid]; + return; } + + *bw_per = pcmd.u.dcb.pgrate.pgrate[pgid]; } static void cxgb4_getpgbwgcfg_tx(struct net_device *dev, int pgid, u8 *bw_per) @@ -637,7 +722,8 @@ static int __cxgb4_getapp(struct net_device *dev, u8 app_idtype, u16 app_id, return err; } if (be16_to_cpu(pcmd.u.dcb.app_priority.protocolid) == app_id) - return pcmd.u.dcb.app_priority.user_prio_map; + if (pcmd.u.dcb.app_priority.sel_field == app_idtype) + return pcmd.u.dcb.app_priority.user_prio_map; /* exhausted app list */ if (!pcmd.u.dcb.app_priority.protocolid) @@ -657,8 +743,8 @@ static int cxgb4_getapp(struct net_device *dev, u8 app_idtype, u16 app_id) /* Write a new Application User Priority Map for the specified Application ID */ -static int cxgb4_setapp(struct net_device *dev, u8 app_idtype, u16 app_id, - u8 app_prio) +static int __cxgb4_setapp(struct net_device *dev, u8 app_idtype, u16 app_id, + u8 app_prio) { struct fw_port_cmd pcmd; struct port_info *pi = netdev2pinfo(dev); @@ -673,10 +759,6 @@ static int cxgb4_setapp(struct net_device *dev, u8 app_idtype, u16 app_id, if (!netif_carrier_ok(dev)) return -ENOLINK; - if (app_idtype != DCB_APP_IDTYPE_ETHTYPE && - app_idtype != DCB_APP_IDTYPE_PORTNUM) - return -EINVAL; - for (i = 0; i < CXGB4_MAX_DCBX_APP_SUPPORTED; i++) { INIT_PORT_DCB_READ_LOCAL_CMD(pcmd, pi->port_id); pcmd.u.dcb.app_priority.type = FW_PORT_DCB_TYPE_APP_ID; @@ -725,6 +807,30 @@ static int cxgb4_setapp(struct net_device *dev, u8 app_idtype, u16 app_id, return 0; } +/* Priority for CEE inside dcb_app is bitmask, with 0 being an invalid value */ +static int cxgb4_setapp(struct net_device *dev, u8 app_idtype, u16 app_id, + u8 app_prio) +{ + int ret; + struct dcb_app app = { + .selector = app_idtype, + .protocol = app_id, + .priority = app_prio, + }; + + if (app_idtype != DCB_APP_IDTYPE_ETHTYPE && + app_idtype != DCB_APP_IDTYPE_PORTNUM) + return -EINVAL; + + /* Convert app_idtype to a format that firmware understands */ + ret = __cxgb4_setapp(dev, app_idtype == DCB_APP_IDTYPE_ETHTYPE ? + app_idtype : 3, app_id, app_prio); + if (ret) + return ret; + + return dcb_setapp(dev, &app); +} + /* Return whether IEEE Data Center Bridging has been negotiated. */ static inline int cxgb4_ieee_negotiation_complete(struct net_device *dev) @@ -738,6 +844,7 @@ static inline int cxgb4_ieee_negotiation_complete(struct net_device *dev) /* Fill in the Application User Priority Map associated with the * specified Application. + * Priority for IEEE dcb_app is an integer, with 0 being a valid value */ static int cxgb4_ieee_getapp(struct net_device *dev, struct dcb_app *app) { @@ -748,28 +855,39 @@ static int cxgb4_ieee_getapp(struct net_device *dev, struct dcb_app *app) if (!(app->selector && app->protocol)) return -EINVAL; - prio = dcb_getapp(dev, app); - if (prio == 0) { - /* If app doesn't exist in dcb_app table, try firmware - * directly. - */ - prio = __cxgb4_getapp(dev, app->selector, app->protocol, 0); - } + /* Try querying firmware first, use firmware format */ + prio = __cxgb4_getapp(dev, app->selector - 1, app->protocol, 0); + + if (prio < 0) + prio = dcb_ieee_getapp_mask(dev, app); - app->priority = prio; + app->priority = ffs(prio) - 1; return 0; } -/* Write a new Application User Priority Map for the specified App id. */ +/* Write a new Application User Priority Map for the specified Application ID. + * Priority for IEEE dcb_app is an integer, with 0 being a valid value + */ static int cxgb4_ieee_setapp(struct net_device *dev, struct dcb_app *app) { + int ret; + if (!cxgb4_ieee_negotiation_complete(dev)) return -EINVAL; - if (!(app->selector && app->protocol && app->priority)) + if (!(app->selector && app->protocol)) + return -EINVAL; + + if (!(app->selector > IEEE_8021QAZ_APP_SEL_ETHERTYPE && + app->selector < IEEE_8021QAZ_APP_SEL_ANY)) return -EINVAL; - cxgb4_setapp(dev, app->selector, app->protocol, app->priority); - return dcb_setapp(dev, app); + /* change selector to a format that firmware understands */ + ret = __cxgb4_setapp(dev, app->selector - 1, app->protocol, + (1 << app->priority)); + if (ret) + return ret; + + return dcb_ieee_setapp(dev, app); } /* Return our DCBX parameters. @@ -794,8 +912,9 @@ static u8 cxgb4_setdcbx(struct net_device *dev, u8 dcb_request) != dcb_request) return 1; - /* Can't set DCBX capabilities if DCBX isn't enabled. */ - if (!pi->dcb.state) + /* Can't enable DCB if we haven't successfully negotiated it. + */ + if (pi->dcb.state != CXGB4_DCB_STATE_FW_ALLSYNCED) return 1; /* There's currently no mechanism to allow for the firmware DCBX @@ -874,7 +993,8 @@ static int cxgb4_getpeerapp_tbl(struct net_device *dev, struct dcb_app *table) table[i].selector = pcmd.u.dcb.app_priority.sel_field; table[i].protocol = be16_to_cpu(pcmd.u.dcb.app_priority.protocolid); - table[i].priority = pcmd.u.dcb.app_priority.user_prio_map; + table[i].priority = + ffs(pcmd.u.dcb.app_priority.user_prio_map) - 1; } return err; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h index 1ec1d83..2a6aa889 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h @@ -63,6 +63,13 @@ #define INIT_PORT_DCB_WRITE_CMD(__pcmd, __port) \ INIT_PORT_DCB_CMD(__pcmd, __port, EXEC, FW_PORT_ACTION_L2_DCB_CFG) +#define IEEE_FAUX_SYNC(__dev, __dcb) \ + do { \ + if ((__dcb)->dcb_version == FW_PORT_DCB_VER_IEEE) \ + cxgb4_dcb_state_fsm((__dev), \ + CXGB4_DCB_STATE_FW_ALLSYNCED); \ + } while (0) + /* States we can be in for a port's Data Center Bridging. */ enum cxgb4_dcb_state { @@ -108,11 +115,13 @@ struct port_dcb_info { * Native Endian format). */ u32 pgid; /* Priority Group[0..7] */ + u8 dcb_version; /* Running DCBx version */ u8 pfcen; /* Priority Flow Control[0..7] */ u8 pg_num_tcs_supported; /* max PG Traffic Classes */ u8 pfc_num_tcs_supported; /* max PFC Traffic Classes */ u8 pgrate[8]; /* Priority Group Rate[0..7] */ u8 priorate[8]; /* Priority Rate[0..7] */ + u8 tsa[8]; /* TSA Algorithm[0..7] */ struct app_priority { /* Application Information */ u8 user_prio_map; /* Priority Map bitfield */ u8 sel_field; /* Protocol ID interpretation */ @@ -121,6 +130,7 @@ struct port_dcb_info { }; void cxgb4_dcb_state_init(struct net_device *); +void cxgb4_dcb_version_init(struct net_device *); void cxgb4_dcb_state_fsm(struct net_device *, enum cxgb4_dcb_state_input); void cxgb4_dcb_handle_fw_update(struct adapter *, const struct fw_port_cmd *); void cxgb4_dcb_set_caps(struct adapter *, const struct fw_port_cmd *); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 8903e68..1afee70 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -522,6 +522,8 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable) dev_err(adap->pdev_dev, "Can't %s DCB Priority on port %d, TX Queue %d: err=%d\n", enable ? "set" : "unset", pi->port_id, i, -err); + else + txq->dcb_prio = value; } } #endif /* CONFIG_CHELSIO_T4_DCB */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index ff709e3..0549170 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1629,6 +1629,14 @@ enum fw_port_l2cfg_ctlbf { FW_PORT_L2_CTLBF_TXIPG = 0x20 }; +enum fw_port_dcb_versions { + FW_PORT_DCB_VER_UNKNOWN, + FW_PORT_DCB_VER_CEE1D0, + FW_PORT_DCB_VER_CEE1D01, + FW_PORT_DCB_VER_IEEE, + FW_PORT_DCB_VER_AUTO = 7 +}; + enum fw_port_dcb_cfg { FW_PORT_DCB_CFG_PG = 0x01, FW_PORT_DCB_CFG_PFC = 0x02, @@ -1709,6 +1717,7 @@ struct fw_port_cmd { __u8 r10_lo[5]; __u8 num_tcs_supported; __u8 pgrate[8]; + __u8 tsa[8]; } pgrate; struct fw_port_dcb_priorate { __u8 type; @@ -1735,7 +1744,7 @@ struct fw_port_cmd { struct fw_port_dcb_control { __u8 type; __u8 all_syncd_pkd; - __be16 pfc_state_to_app_state; + __be16 dcb_version_to_app_state; __be32 r11; __be64 r12; } control; @@ -1778,6 +1787,7 @@ struct fw_port_cmd { #define FW_PORT_CMD_DCBXDIS (1U << 7) #define FW_PORT_CMD_APPLY (1U << 7) #define FW_PORT_CMD_ALL_SYNCD (1U << 7) +#define FW_PORT_CMD_DCB_VERSION_GET(x) (((x) >> 8) & 0xf) #define FW_PORT_CMD_PPPEN(x) ((x) << 31) #define FW_PORT_CMD_TPSRC(x) ((x) << 28) diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 23084fb..9b33057 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -93,7 +93,7 @@ enum dm9000_type { }; /* Structure/enum declaration ------------------------------- */ -typedef struct board_info { +struct board_info { void __iomem *io_addr; /* Register I/O base address */ void __iomem *io_data; /* Data I/O address */ @@ -141,7 +141,7 @@ typedef struct board_info { u32 wake_state; int ip_summed; -} board_info_t; +}; /* debug code */ @@ -151,7 +151,7 @@ typedef struct board_info { } \ } while (0) -static inline board_info_t *to_dm9000_board(struct net_device *dev) +static inline struct board_info *to_dm9000_board(struct net_device *dev) { return netdev_priv(dev); } @@ -162,7 +162,7 @@ static inline board_info_t *to_dm9000_board(struct net_device *dev) * Read a byte from I/O port */ static u8 -ior(board_info_t *db, int reg) +ior(struct board_info *db, int reg) { writeb(reg, db->io_addr); return readb(db->io_data); @@ -173,14 +173,14 @@ ior(board_info_t *db, int reg) */ static void -iow(board_info_t *db, int reg, int value) +iow(struct board_info *db, int reg, int value) { writeb(reg, db->io_addr); writeb(value, db->io_data); } static void -dm9000_reset(board_info_t *db) +dm9000_reset(struct board_info *db) { dev_dbg(db->dev, "resetting device\n"); @@ -272,7 +272,7 @@ static void dm9000_dumpblk_32bit(void __iomem *reg, int count) * Sleep, either by using msleep() or if we are suspending, then * use mdelay() to sleep. */ -static void dm9000_msleep(board_info_t *db, unsigned int ms) +static void dm9000_msleep(struct board_info *db, unsigned int ms) { if (db->in_suspend || db->in_timeout) mdelay(ms); @@ -284,7 +284,7 @@ static void dm9000_msleep(board_info_t *db, unsigned int ms) static int dm9000_phy_read(struct net_device *dev, int phy_reg_unused, int reg) { - board_info_t *db = netdev_priv(dev); + struct board_info *db = netdev_priv(dev); unsigned long flags; unsigned int reg_save; int ret; @@ -330,7 +330,7 @@ static void dm9000_phy_write(struct net_device *dev, int phyaddr_unused, int reg, int value) { - board_info_t *db = netdev_priv(dev); + struct board_info *db = netdev_priv(dev); unsigned long flags; unsigned long reg_save; @@ -408,7 +408,7 @@ static void dm9000_set_io(struct board_info *db, int byte_width) } } -static void dm9000_schedule_poll(board_info_t *db) +static void dm9000_schedule_poll(struct board_info *db) { if (db->type == TYPE_DM9000E) schedule_delayed_work(&db->phy_poll, HZ * 2); @@ -416,7 +416,7 @@ static void dm9000_schedule_poll(board_info_t *db) static int dm9000_ioctl(struct net_device *dev, struct ifreq *req, int cmd) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); if (!netif_running(dev)) return -EINVAL; @@ -425,7 +425,7 @@ static int dm9000_ioctl(struct net_device *dev, struct ifreq *req, int cmd) } static unsigned int -dm9000_read_locked(board_info_t *db, int reg) +dm9000_read_locked(struct board_info *db, int reg) { unsigned long flags; unsigned int ret; @@ -437,7 +437,7 @@ dm9000_read_locked(board_info_t *db, int reg) return ret; } -static int dm9000_wait_eeprom(board_info_t *db) +static int dm9000_wait_eeprom(struct board_info *db) { unsigned int status; int timeout = 8; /* wait max 8msec */ @@ -474,7 +474,7 @@ static int dm9000_wait_eeprom(board_info_t *db) * Read a word data from EEPROM */ static void -dm9000_read_eeprom(board_info_t *db, int offset, u8 *to) +dm9000_read_eeprom(struct board_info *db, int offset, u8 *to) { unsigned long flags; @@ -514,7 +514,7 @@ dm9000_read_eeprom(board_info_t *db, int offset, u8 *to) * Write a word data to SROM */ static void -dm9000_write_eeprom(board_info_t *db, int offset, u8 *data) +dm9000_write_eeprom(struct board_info *db, int offset, u8 *data) { unsigned long flags; @@ -546,7 +546,7 @@ dm9000_write_eeprom(board_info_t *db, int offset, u8 *data) static void dm9000_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); strlcpy(info->driver, CARDNAME, sizeof(info->driver)); strlcpy(info->version, DRV_VERSION, sizeof(info->version)); @@ -556,21 +556,21 @@ static void dm9000_get_drvinfo(struct net_device *dev, static u32 dm9000_get_msglevel(struct net_device *dev) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); return dm->msg_enable; } static void dm9000_set_msglevel(struct net_device *dev, u32 value) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); dm->msg_enable = value; } static int dm9000_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); mii_ethtool_gset(&dm->mii, cmd); return 0; @@ -578,21 +578,21 @@ static int dm9000_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) static int dm9000_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); return mii_ethtool_sset(&dm->mii, cmd); } static int dm9000_nway_reset(struct net_device *dev) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); return mii_nway_restart(&dm->mii); } static int dm9000_set_features(struct net_device *dev, netdev_features_t features) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); netdev_features_t changed = dev->features ^ features; unsigned long flags; @@ -608,7 +608,7 @@ static int dm9000_set_features(struct net_device *dev, static u32 dm9000_get_link(struct net_device *dev) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); u32 ret; if (dm->flags & DM9000_PLATF_EXT_PHY) @@ -629,7 +629,7 @@ static int dm9000_get_eeprom_len(struct net_device *dev) static int dm9000_get_eeprom(struct net_device *dev, struct ethtool_eeprom *ee, u8 *data) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); int offset = ee->offset; int len = ee->len; int i; @@ -653,7 +653,7 @@ static int dm9000_get_eeprom(struct net_device *dev, static int dm9000_set_eeprom(struct net_device *dev, struct ethtool_eeprom *ee, u8 *data) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); int offset = ee->offset; int len = ee->len; int done; @@ -691,7 +691,7 @@ static int dm9000_set_eeprom(struct net_device *dev, static void dm9000_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); memset(w, 0, sizeof(struct ethtool_wolinfo)); @@ -702,7 +702,7 @@ static void dm9000_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) static int dm9000_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); unsigned long flags; u32 opts = w->wolopts; u32 wcr = 0; @@ -752,7 +752,7 @@ static const struct ethtool_ops dm9000_ethtool_ops = { .set_eeprom = dm9000_set_eeprom, }; -static void dm9000_show_carrier(board_info_t *db, +static void dm9000_show_carrier(struct board_info *db, unsigned carrier, unsigned nsr) { int lpa; @@ -775,7 +775,7 @@ static void dm9000_poll_work(struct work_struct *w) { struct delayed_work *dw = to_delayed_work(w); - board_info_t *db = container_of(dw, board_info_t, phy_poll); + struct board_info *db = container_of(dw, struct board_info, phy_poll); struct net_device *ndev = db->ndev; if (db->flags & DM9000_PLATF_SIMPLE_PHY && @@ -843,7 +843,7 @@ static unsigned char dm9000_type_to_char(enum dm9000_type type) static void dm9000_hash_table_unlocked(struct net_device *dev) { - board_info_t *db = netdev_priv(dev); + struct board_info *db = netdev_priv(dev); struct netdev_hw_addr *ha; int i, oft; u32 hash_val; @@ -879,7 +879,7 @@ dm9000_hash_table_unlocked(struct net_device *dev) static void dm9000_hash_table(struct net_device *dev) { - board_info_t *db = netdev_priv(dev); + struct board_info *db = netdev_priv(dev); unsigned long flags; spin_lock_irqsave(&db->lock, flags); @@ -888,13 +888,13 @@ dm9000_hash_table(struct net_device *dev) } static void -dm9000_mask_interrupts(board_info_t *db) +dm9000_mask_interrupts(struct board_info *db) { iow(db, DM9000_IMR, IMR_PAR); } static void -dm9000_unmask_interrupts(board_info_t *db) +dm9000_unmask_interrupts(struct board_info *db) { iow(db, DM9000_IMR, db->imr_all); } @@ -905,7 +905,7 @@ dm9000_unmask_interrupts(board_info_t *db) static void dm9000_init_dm9000(struct net_device *dev) { - board_info_t *db = netdev_priv(dev); + struct board_info *db = netdev_priv(dev); unsigned int imr; unsigned int ncr; @@ -970,7 +970,7 @@ dm9000_init_dm9000(struct net_device *dev) /* Our watchdog timed out. Called by the networking layer */ static void dm9000_timeout(struct net_device *dev) { - board_info_t *db = netdev_priv(dev); + struct board_info *db = netdev_priv(dev); u8 reg_save; unsigned long flags; @@ -996,7 +996,7 @@ static void dm9000_send_packet(struct net_device *dev, int ip_summed, u16 pkt_len) { - board_info_t *dm = to_dm9000_board(dev); + struct board_info *dm = to_dm9000_board(dev); /* The DM9000 is not smart enough to leave fragmented packets alone. */ if (dm->ip_summed != ip_summed) { @@ -1023,7 +1023,7 @@ static int dm9000_start_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned long flags; - board_info_t *db = netdev_priv(dev); + struct board_info *db = netdev_priv(dev); dm9000_dbg(db, 3, "%s:\n", __func__); @@ -1062,7 +1062,7 @@ dm9000_start_xmit(struct sk_buff *skb, struct net_device *dev) * receive the packet to upper layer, free the transmitted packet */ -static void dm9000_tx_done(struct net_device *dev, board_info_t *db) +static void dm9000_tx_done(struct net_device *dev, struct board_info *db) { int tx_status = ior(db, DM9000_NSR); /* Got TX status */ @@ -1094,7 +1094,7 @@ struct dm9000_rxhdr { static void dm9000_rx(struct net_device *dev) { - board_info_t *db = netdev_priv(dev); + struct board_info *db = netdev_priv(dev); struct dm9000_rxhdr rxhdr; struct sk_buff *skb; u8 rxbyte, *rdptr; @@ -1196,7 +1196,7 @@ dm9000_rx(struct net_device *dev) static irqreturn_t dm9000_interrupt(int irq, void *dev_id) { struct net_device *dev = dev_id; - board_info_t *db = netdev_priv(dev); + struct board_info *db = netdev_priv(dev); int int_status; unsigned long flags; u8 reg_save; @@ -1246,7 +1246,7 @@ static irqreturn_t dm9000_interrupt(int irq, void *dev_id) static irqreturn_t dm9000_wol_interrupt(int irq, void *dev_id) { struct net_device *dev = dev_id; - board_info_t *db = netdev_priv(dev); + struct board_info *db = netdev_priv(dev); unsigned long flags; unsigned nsr, wcr; @@ -1296,7 +1296,7 @@ static void dm9000_poll_controller(struct net_device *dev) static int dm9000_open(struct net_device *dev) { - board_info_t *db = netdev_priv(dev); + struct board_info *db = netdev_priv(dev); unsigned long irqflags = db->irq_res->flags & IRQF_TRIGGER_MASK; if (netif_msg_ifup(db)) @@ -1342,7 +1342,7 @@ dm9000_open(struct net_device *dev) static void dm9000_shutdown(struct net_device *dev) { - board_info_t *db = netdev_priv(dev); + struct board_info *db = netdev_priv(dev); /* RESET device */ dm9000_phy_write(dev, 0, MII_BMCR, BMCR_RESET); /* PHY RESET */ @@ -1358,7 +1358,7 @@ dm9000_shutdown(struct net_device *dev) static int dm9000_stop(struct net_device *ndev) { - board_info_t *db = netdev_priv(ndev); + struct board_info *db = netdev_priv(ndev); if (netif_msg_ifdown(db)) dev_dbg(db->dev, "shutting down %s\n", ndev->name); @@ -1681,7 +1681,7 @@ dm9000_drv_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct net_device *ndev = platform_get_drvdata(pdev); - board_info_t *db; + struct board_info *db; if (ndev) { db = netdev_priv(ndev); @@ -1704,7 +1704,7 @@ dm9000_drv_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct net_device *ndev = platform_get_drvdata(pdev); - board_info_t *db = netdev_priv(ndev); + struct board_info *db = netdev_priv(ndev); if (ndev) { if (netif_running(ndev)) { diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 811f135..43e08d0 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -897,5 +897,6 @@ void be_roce_dev_remove(struct be_adapter *); */ void be_roce_dev_open(struct be_adapter *); void be_roce_dev_close(struct be_adapter *); +void be_roce_dev_shutdown(struct be_adapter *); #endif /* BE_H */ diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index b06e540..93ff8ef 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -5014,6 +5014,7 @@ static void be_shutdown(struct pci_dev *pdev) if (!adapter) return; + be_roce_dev_shutdown(adapter); cancel_delayed_work_sync(&adapter->work); cancel_delayed_work_sync(&adapter->func_recovery_work); diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c index 5bf1660..ef4672d 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.c +++ b/drivers/net/ethernet/emulex/benet/be_roce.c @@ -120,7 +120,8 @@ static void _be_roce_dev_open(struct be_adapter *adapter) { if (ocrdma_drv && adapter->ocrdma_dev && ocrdma_drv->state_change_handler) - ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 0); + ocrdma_drv->state_change_handler(adapter->ocrdma_dev, + BE_DEV_UP); } void be_roce_dev_open(struct be_adapter *adapter) @@ -136,7 +137,8 @@ static void _be_roce_dev_close(struct be_adapter *adapter) { if (ocrdma_drv && adapter->ocrdma_dev && ocrdma_drv->state_change_handler) - ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 1); + ocrdma_drv->state_change_handler(adapter->ocrdma_dev, + BE_DEV_DOWN); } void be_roce_dev_close(struct be_adapter *adapter) @@ -148,6 +150,18 @@ void be_roce_dev_close(struct be_adapter *adapter) } } +void be_roce_dev_shutdown(struct be_adapter *adapter) +{ + if (be_roce_supported(adapter)) { + mutex_lock(&be_adapter_list_lock); + if (ocrdma_drv && adapter->ocrdma_dev && + ocrdma_drv->state_change_handler) + ocrdma_drv->state_change_handler(adapter->ocrdma_dev, + BE_DEV_SHUTDOWN); + mutex_unlock(&be_adapter_list_lock); + } +} + int be_roce_register_driver(struct ocrdma_driver *drv) { struct be_adapter *dev; diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h index a3d9e96..e6f7eb1 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.h +++ b/drivers/net/ethernet/emulex/benet/be_roce.h @@ -62,7 +62,8 @@ struct ocrdma_driver { enum { BE_DEV_UP = 0, - BE_DEV_DOWN = 1 + BE_DEV_DOWN = 1, + BE_DEV_SHUTDOWN = 2 }; /* APIs for RoCE driver to register callback handlers, diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index bd53caf..9f7fa64 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -310,6 +310,7 @@ struct fec_enet_private { int mii_timeout; uint phy_speed; phy_interface_t phy_interface; + struct device_node *phy_node; int link; int full_duplex; int speed; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 66fe1f6..4f87dff 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -52,6 +52,7 @@ #include <linux/of.h> #include <linux/of_device.h> #include <linux/of_gpio.h> +#include <linux/of_mdio.h> #include <linux/of_net.h> #include <linux/regulator/consumer.h> #include <linux/if_vlan.h> @@ -1648,29 +1649,37 @@ static int fec_enet_mii_probe(struct net_device *ndev) fep->phy_dev = NULL; - /* check for attached phy */ - for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) { - if ((fep->mii_bus->phy_mask & (1 << phy_id))) - continue; - if (fep->mii_bus->phy_map[phy_id] == NULL) - continue; - if (fep->mii_bus->phy_map[phy_id]->phy_id == 0) - continue; - if (dev_id--) - continue; - strncpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE); - break; - } + if (fep->phy_node) { + phy_dev = of_phy_connect(ndev, fep->phy_node, + &fec_enet_adjust_link, 0, + fep->phy_interface); + } else { + /* check for attached phy */ + for (phy_id = 0; (phy_id < PHY_MAX_ADDR); phy_id++) { + if ((fep->mii_bus->phy_mask & (1 << phy_id))) + continue; + if (fep->mii_bus->phy_map[phy_id] == NULL) + continue; + if (fep->mii_bus->phy_map[phy_id]->phy_id == 0) + continue; + if (dev_id--) + continue; + strncpy(mdio_bus_id, fep->mii_bus->id, MII_BUS_ID_SIZE); + break; + } - if (phy_id >= PHY_MAX_ADDR) { - netdev_info(ndev, "no PHY, assuming direct connection to switch\n"); - strncpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE); - phy_id = 0; + if (phy_id >= PHY_MAX_ADDR) { + netdev_info(ndev, "no PHY, assuming direct connection to switch\n"); + strncpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE); + phy_id = 0; + } + + snprintf(phy_name, sizeof(phy_name), + PHY_ID_FMT, mdio_bus_id, phy_id); + phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link, + fep->phy_interface); } - snprintf(phy_name, sizeof(phy_name), PHY_ID_FMT, mdio_bus_id, phy_id); - phy_dev = phy_connect(ndev, phy_name, &fec_enet_adjust_link, - fep->phy_interface); if (IS_ERR(phy_dev)) { netdev_err(ndev, "could not attach to PHY\n"); return PTR_ERR(phy_dev); @@ -1707,6 +1716,7 @@ static int fec_enet_mii_init(struct platform_device *pdev) struct fec_enet_private *fep = netdev_priv(ndev); const struct platform_device_id *id_entry = platform_get_device_id(fep->pdev); + struct device_node *node; int err = -ENXIO, i; /* @@ -1774,7 +1784,15 @@ static int fec_enet_mii_init(struct platform_device *pdev) for (i = 0; i < PHY_MAX_ADDR; i++) fep->mii_bus->irq[i] = PHY_POLL; - if (mdiobus_register(fep->mii_bus)) + node = of_get_child_by_name(pdev->dev.of_node, "mdio"); + if (node) { + err = of_mdiobus_register(fep->mii_bus, node); + of_node_put(node); + } else { + err = mdiobus_register(fep->mii_bus); + } + + if (err) goto err_out_free_mdio_irq; mii_cnt++; @@ -2527,6 +2545,7 @@ fec_probe(struct platform_device *pdev) struct resource *r; const struct of_device_id *of_id; static int dev_id; + struct device_node *np = pdev->dev.of_node, *phy_node; of_id = of_match_device(fec_dt_ids, &pdev->dev); if (of_id) @@ -2566,6 +2585,18 @@ fec_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ndev); + phy_node = of_parse_phandle(np, "phy-handle", 0); + if (!phy_node && of_phy_is_fixed_link(np)) { + ret = of_phy_register_fixed_link(np); + if (ret < 0) { + dev_err(&pdev->dev, + "broken fixed-link specification\n"); + goto failed_phy; + } + phy_node = of_node_get(np); + } + fep->phy_node = phy_node; + ret = of_get_phy_mode(pdev->dev.of_node); if (ret < 0) { pdata = dev_get_platdata(&pdev->dev); @@ -2670,6 +2701,8 @@ failed_init: failed_regulator: fec_enet_clk_enable(ndev, false); failed_clk: +failed_phy: + of_node_put(phy_node); failed_ioremap: free_netdev(ndev); @@ -2691,6 +2724,7 @@ fec_drv_remove(struct platform_device *pdev) if (fep->ptp_clock) ptp_clock_unregister(fep->ptp_clock); fec_enet_clk_enable(ndev, false); + of_node_put(fep->phy_node); free_netdev(ndev); return 0; diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 9947765..ff55fbb 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -1015,8 +1015,7 @@ mpc52xx_fec_remove(struct platform_device *op) unregister_netdev(ndev); - if (priv->phy_node) - of_node_put(priv->phy_node); + of_node_put(priv->phy_node); priv->phy_node = NULL; irq_dispose_mapping(ndev->irq); diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index cfaf17b..748fd24 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -1033,7 +1033,7 @@ static int fs_enet_probe(struct platform_device *ofdev) /* In the case of a fixed PHY, the DT node associated * to the PHY is the Ethernet MAC DT node. */ - fpi->phy_node = ofdev->dev.of_node; + fpi->phy_node = of_node_get(ofdev->dev.of_node); } if (of_device_is_compatible(ofdev->dev.of_node, "fsl,mpc5125-fec")) { diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index a6cf40e..fb29d04 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -892,12 +892,12 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev) /* In the case of a fixed PHY, the DT node associated * to the PHY is the Ethernet MAC DT node. */ - if (of_phy_is_fixed_link(np)) { + if (!priv->phy_node && of_phy_is_fixed_link(np)) { err = of_phy_register_fixed_link(np); if (err) goto err_grp_init; - priv->phy_node = np; + priv->phy_node = of_node_get(np); } /* Find the TBI PHY. If it's not there, we don't support SGMII */ @@ -1435,10 +1435,8 @@ register_fail: unmap_group_regs(priv); gfar_free_rx_queues(priv); gfar_free_tx_queues(priv); - if (priv->phy_node) - of_node_put(priv->phy_node); - if (priv->tbi_node) - of_node_put(priv->tbi_node); + of_node_put(priv->phy_node); + of_node_put(priv->tbi_node); free_gfar_dev(priv); return err; } @@ -1447,10 +1445,8 @@ static int gfar_remove(struct platform_device *ofdev) { struct gfar_private *priv = platform_get_drvdata(ofdev); - if (priv->phy_node) - of_node_put(priv->phy_node); - if (priv->tbi_node) - of_node_put(priv->tbi_node); + of_node_put(priv->phy_node); + of_node_put(priv->tbi_node); unregister_netdev(priv->ndev); unmap_group_regs(priv); diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 8ceaf7a..3cf0478 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3785,16 +3785,15 @@ static int ucc_geth_probe(struct platform_device* ofdev) ug_info->uf_info.irq = irq_of_parse_and_map(np, 0); ug_info->phy_node = of_parse_phandle(np, "phy-handle", 0); - if (!ug_info->phy_node) { - /* In the case of a fixed PHY, the DT node associated + if (!ug_info->phy_node && of_phy_is_fixed_link(np)) { + /* + * In the case of a fixed PHY, the DT node associated * to the PHY is the Ethernet MAC DT node. */ - if (of_phy_is_fixed_link(np)) { - err = of_phy_register_fixed_link(np); - if (err) - return err; - } - ug_info->phy_node = np; + err = of_phy_register_fixed_link(np); + if (err) + return err; + ug_info->phy_node = of_node_get(np); } /* Find the TBI PHY node. If it's not there, we don't support SGMII */ @@ -3862,8 +3861,11 @@ static int ucc_geth_probe(struct platform_device* ofdev) /* Create an ethernet device instance */ dev = alloc_etherdev(sizeof(*ugeth)); - if (dev == NULL) + if (dev == NULL) { + of_node_put(ug_info->tbi_node); + of_node_put(ug_info->phy_node); return -ENOMEM; + } ugeth = netdev_priv(dev); spin_lock_init(&ugeth->lock); @@ -3897,6 +3899,8 @@ static int ucc_geth_probe(struct platform_device* ofdev) pr_err("%s: Cannot register net device, aborting\n", dev->name); free_netdev(dev); + of_node_put(ug_info->tbi_node); + of_node_put(ug_info->phy_node); return err; } @@ -3920,6 +3924,8 @@ static int ucc_geth_remove(struct platform_device* ofdev) unregister_netdev(dev); free_netdev(dev); ucc_geth_memclean(ugeth); + of_node_put(ugeth->ug_info->tbi_node); + of_node_put(ugeth->ug_info->phy_node); return 0; } diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index cfe7a74..a7139f5 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -99,23 +99,23 @@ static const struct ethtool_ops netdev_ethtool_ops; /* card type */ -typedef enum { MBH10302, MBH10304, TDK, CONTEC, LA501, UNGERMANN, +enum cardtype { MBH10302, MBH10304, TDK, CONTEC, LA501, UNGERMANN, XXX10304, NEC, KME -} cardtype_t; +}; /* driver specific data structure */ -typedef struct local_info_t { +struct local_info { struct pcmcia_device *p_dev; long open_time; uint tx_started:1; uint tx_queue; u_short tx_queue_len; - cardtype_t cardtype; + enum cardtype cardtype; u_short sent; u_char __iomem *base; -} local_info_t; +}; #define MC_FILTERBREAK 64 @@ -232,13 +232,13 @@ static const struct net_device_ops fjn_netdev_ops = { static int fmvj18x_probe(struct pcmcia_device *link) { - local_info_t *lp; + struct local_info *lp; struct net_device *dev; dev_dbg(&link->dev, "fmvj18x_attach()\n"); /* Make up a FMVJ18x specific data structure */ - dev = alloc_etherdev(sizeof(local_info_t)); + dev = alloc_etherdev(sizeof(struct local_info)); if (!dev) return -ENOMEM; lp = netdev_priv(dev); @@ -327,10 +327,10 @@ static int fmvj18x_ioprobe(struct pcmcia_device *p_dev, void *priv_data) static int fmvj18x_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; - local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); int i, ret; unsigned int ioaddr; - cardtype_t cardtype; + enum cardtype cardtype; char *card_name = "unknown"; u8 *buf; size_t len; @@ -584,7 +584,7 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link) int i; struct net_device *dev = link->priv; unsigned int ioaddr; - local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); /* Allocate a small memory window */ link->resource[3]->flags = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; @@ -626,7 +626,7 @@ static void fmvj18x_release(struct pcmcia_device *link) { struct net_device *dev = link->priv; - local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); u_char __iomem *tmp; dev_dbg(&link->dev, "fmvj18x_release\n"); @@ -711,7 +711,7 @@ module_pcmcia_driver(fmvj18x_cs_driver); static irqreturn_t fjn_interrupt(int dummy, void *dev_id) { struct net_device *dev = dev_id; - local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); unsigned int ioaddr; unsigned short tx_stat, rx_stat; @@ -772,7 +772,7 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id) static void fjn_tx_timeout(struct net_device *dev) { - struct local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; netdev_notice(dev, "transmit timed out with status %04x, %s?\n", @@ -802,7 +802,7 @@ static void fjn_tx_timeout(struct net_device *dev) static netdev_tx_t fjn_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; short length = skb->len; @@ -874,7 +874,7 @@ static netdev_tx_t fjn_start_xmit(struct sk_buff *skb, static void fjn_reset(struct net_device *dev) { - struct local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; int i; @@ -1058,7 +1058,7 @@ static int fjn_config(struct net_device *dev, struct ifmap *map){ static int fjn_open(struct net_device *dev) { - struct local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; pr_debug("fjn_open('%s').\n", dev->name); @@ -1083,7 +1083,7 @@ static int fjn_open(struct net_device *dev) static int fjn_close(struct net_device *dev) { - struct local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; unsigned int ioaddr = dev->base_addr; diff --git a/drivers/net/ethernet/ibm/ehea/Makefile b/drivers/net/ethernet/ibm/ehea/Makefile index 775d996..cd473e2 100644 --- a/drivers/net/ethernet/ibm/ehea/Makefile +++ b/drivers/net/ethernet/ibm/ehea/Makefile @@ -1,6 +1,6 @@ # # Makefile for the eHEA ethernet device driver for IBM eServer System p # -ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o ehea_phyp.o +ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o obj-$(CONFIG_EHEA) += ehea.o diff --git a/drivers/net/ethernet/intel/e1000e/manage.c b/drivers/net/ethernet/intel/e1000e/manage.c index 5885603..06edfca 100644 --- a/drivers/net/ethernet/intel/e1000e/manage.c +++ b/drivers/net/ethernet/intel/e1000e/manage.c @@ -47,7 +47,7 @@ static u8 e1000_calculate_checksum(u8 *buffer, u32 length) * e1000_mng_enable_host_if - Checks host interface is enabled * @hw: pointer to the HW structure * - * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND + * Returns 0 upon success, else -E1000_ERR_HOST_INTERFACE_COMMAND * * This function checks whether the HOST IF is enabled for command operation * and also checks whether the previous command is completed. It busy waits @@ -78,7 +78,7 @@ static s32 e1000_mng_enable_host_if(struct e1000_hw *hw) } if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) { - e_dbg("Previous command timeout failed .\n"); + e_dbg("Previous command timeout failed.\n"); return -E1000_ERR_HOST_INTERFACE_COMMAND; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c index 6938fc1a..5d01db1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c +++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c @@ -33,6 +33,7 @@ #include <scsi/fc/fc_fcoe.h> #include <scsi/libfc.h> #include <scsi/libfcoe.h> +#include <uapi/linux/dcbnl.h> #include "i40e.h" #include "i40e_fcoe.h" diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d9f4972..eddec6b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4415,13 +4415,13 @@ static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) switch (vsi->back->hw.phy.link_info.link_speed) { case I40E_LINK_SPEED_40GB: - strncpy(speed, "40 Gbps", SPEED_SIZE); + strlcpy(speed, "40 Gbps", SPEED_SIZE); break; case I40E_LINK_SPEED_10GB: - strncpy(speed, "10 Gbps", SPEED_SIZE); + strlcpy(speed, "10 Gbps", SPEED_SIZE); break; case I40E_LINK_SPEED_1GB: - strncpy(speed, "1000 Mbps", SPEED_SIZE); + strlcpy(speed, "1000 Mbps", SPEED_SIZE); break; default: break; @@ -4429,16 +4429,16 @@ static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup) switch (vsi->back->hw.fc.current_mode) { case I40E_FC_FULL: - strncpy(fc, "RX/TX", FC_SIZE); + strlcpy(fc, "RX/TX", FC_SIZE); break; case I40E_FC_TX_PAUSE: - strncpy(fc, "TX", FC_SIZE); + strlcpy(fc, "TX", FC_SIZE); break; case I40E_FC_RX_PAUSE: - strncpy(fc, "RX", FC_SIZE); + strlcpy(fc, "RX", FC_SIZE); break; default: - strncpy(fc, "None", FC_SIZE); + strlcpy(fc, "None", FC_SIZE); break; } @@ -5839,7 +5839,7 @@ static void i40e_send_version(struct i40e_pf *pf) dv.minor_version = DRV_VERSION_MINOR; dv.build_version = DRV_VERSION_BUILD; dv.subbuild_version = 0; - strncpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string)); + strlcpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string)); i40e_aq_send_driver_version(&pf->hw, &dv, NULL); } @@ -6293,7 +6293,7 @@ static int i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors) if (alloc_qvectors) { /* allocate memory for q_vector pointers */ - size = sizeof(struct i40e_q_vectors *) * vsi->num_q_vectors; + size = sizeof(struct i40e_q_vector *) * vsi->num_q_vectors; vsi->q_vectors = kzalloc(size, GFP_KERNEL); if (!vsi->q_vectors) { ret = -ENOMEM; diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 97bda3d..25c4f9a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -251,9 +251,9 @@ i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset, * * Writes a 16 bit words buffer to the Shadow RAM using the admin command. **/ -i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer, - u32 offset, u16 words, void *data, - bool last_command) +static i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer, + u32 offset, u16 words, void *data, + bool last_command) { i40e_status ret_code = I40E_ERR_NVM; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index dadd9a5..c9f1d1b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2969,14 +2969,14 @@ static int mvneta_probe(struct platform_device *pdev) /* In the case of a fixed PHY, the DT node associated * to the PHY is the Ethernet MAC DT node. */ - phy_node = dn; + phy_node = of_node_get(dn); } phy_mode = of_get_phy_mode(dn); if (phy_mode < 0) { dev_err(&pdev->dev, "incorrect phy-mode\n"); err = -EINVAL; - goto err_free_irq; + goto err_put_phy_node; } dev->tx_queue_len = MVNETA_MAX_TXD; @@ -2992,7 +2992,7 @@ static int mvneta_probe(struct platform_device *pdev) pp->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pp->clk)) { err = PTR_ERR(pp->clk); - goto err_free_irq; + goto err_put_phy_node; } clk_prepare_enable(pp->clk); @@ -3071,6 +3071,8 @@ err_free_stats: free_percpu(pp->stats); err_clk: clk_disable_unprepare(pp->clk); +err_put_phy_node: + of_node_put(phy_node); err_free_irq: irq_dispose_mapping(dev->irq); err_free_netdev: @@ -3088,6 +3090,7 @@ static int mvneta_remove(struct platform_device *pdev) clk_disable_unprepare(pp->clk); free_percpu(pp->stats); irq_dispose_mapping(dev->irq); + of_node_put(pp->phy_node); free_netdev(dev); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 5d940a2..65a4a0f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1311,6 +1311,15 @@ static struct mlx4_cmd_info cmd_info[] = { .wrapper = mlx4_MAD_IFC_wrapper }, { + .opcode = MLX4_CMD_MAD_DEMUX, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper + }, + { .opcode = MLX4_CMD_QUERY_IF_STAT, .has_inbox = false, .has_outbox = true, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 688e1ea..494753e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -136,7 +136,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [7] = "FSM (MAC anti-spoofing) support", [8] = "Dynamic QP updates support", [9] = "Device managed flow steering IPoIB support", - [10] = "TCP/IP offloads/flow-steering for VXLAN support" + [10] = "TCP/IP offloads/flow-steering for VXLAN support", + [11] = "MAD DEMUX (Secure-Host) support" }; int i; @@ -571,6 +572,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 #define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d #define QUERY_DEV_CAP_VXLAN 0x9e +#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 dev_cap->flags2 = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -748,6 +750,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(dev_cap->max_counters, outbox, QUERY_DEV_CAP_MAX_COUNTERS_OFFSET); + MLX4_GET(field32, outbox, + QUERY_DEV_CAP_MAD_DEMUX_OFFSET); + if (field32 & (1 << 0)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX; + MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); if (field32 & (1 << 16)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; @@ -2016,3 +2023,85 @@ void mlx4_opreq_action(struct work_struct *work) out: mlx4_free_cmd_mailbox(dev, mailbox); } + +static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *mailbox) +{ +#define MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET 0x10 +#define MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET 0x20 +#define MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET 0x40 +#define MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET 0x70 + + u32 set_attr_mask, getresp_attr_mask; + u32 trap_attr_mask, traprepress_attr_mask; + + MLX4_GET(set_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall set_attribute_mask = 0x%x\n", + set_attr_mask); + + MLX4_GET(getresp_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall getresp_attribute_mask = 0x%x\n", + getresp_attr_mask); + + MLX4_GET(trap_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall trap_attribute_mask = 0x%x\n", + trap_attr_mask); + + MLX4_GET(traprepress_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall traprepress_attribute_mask = 0x%x\n", + traprepress_attr_mask); + + if (set_attr_mask && getresp_attr_mask && trap_attr_mask && + traprepress_attr_mask) + return 1; + + return 0; +} + +int mlx4_config_mad_demux(struct mlx4_dev *dev) +{ + struct mlx4_cmd_mailbox *mailbox; + int secure_host_active; + int err; + + /* Check if mad_demux is supported */ + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_MAD_DEMUX)) + return 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + mlx4_warn(dev, "Failed to allocate mailbox for cmd MAD_DEMUX"); + return -ENOMEM; + } + + /* Query mad_demux to find out which MADs are handled by internal sma */ + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0x01 /* subn mgmt class */, + MLX4_CMD_MAD_DEMUX_QUERY_RESTR, MLX4_CMD_MAD_DEMUX, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) { + mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: query restrictions failed (%d)\n", + err); + goto out; + } + + secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox); + + /* Config mad_demux to handle all MADs returned by the query above */ + err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */, + MLX4_CMD_MAD_DEMUX_CONFIG, MLX4_CMD_MAD_DEMUX, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) { + mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: configure failed (%d)\n", err); + goto out; + } + + if (secure_host_active) + mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n"); +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 110e020..7e2d5d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1853,6 +1853,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) mlx4_err(dev, "Failed to initialize multicast group table, aborting\n"); goto err_mr_table_free; } + err = mlx4_config_mad_demux(dev); + if (err) { + mlx4_err(dev, "Failed in config_mad_demux, aborting\n"); + goto err_mcg_table_free; + } } err = mlx4_init_eq_table(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 13fbcd0..b508c78 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -274,6 +274,8 @@ struct mlx4_icm_table { #define MLX4_MPT_FLAG_PHYSICAL (1 << 9) #define MLX4_MPT_FLAG_REGION (1 << 8) +#define MLX4_MPT_PD_MASK (0x1FFFFUL) +#define MLX4_MPT_PD_VF_MASK (0xFE0000UL) #define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) #define MLX4_MPT_PD_FLAG_RAE (1 << 28) #define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) @@ -1306,5 +1308,6 @@ void mlx4_init_quotas(struct mlx4_dev *dev); int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); /* Returns the VF index of slave */ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); +int mlx4_config_mad_demux(struct mlx4_dev *dev); #endif /* MLX4_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 2839abb..7d717ec 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -298,6 +298,131 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); } +int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, + struct mlx4_mpt_entry ***mpt_entry) +{ + int err; + int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1); + struct mlx4_cmd_mailbox *mailbox = NULL; + + /* Make sure that at this point we have single-threaded access only */ + + if (mmr->enabled != MLX4_MPT_EN_HW) + return -EINVAL; + + err = mlx4_HW2SW_MPT(dev, NULL, key); + + if (err) { + mlx4_warn(dev, "HW2SW_MPT failed (%d).", err); + mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n"); + return err; + } + + mmr->enabled = MLX4_MPT_EN_SW; + + if (!mlx4_is_mfunc(dev)) { + **mpt_entry = mlx4_table_find( + &mlx4_priv(dev)->mr_table.dmpt_table, + key, NULL); + } else { + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR_OR_NULL(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, key, + 0, MLX4_CMD_QUERY_MPT, + MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + + if (err) + goto free_mailbox; + + *mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf; + } + + if (!(*mpt_entry) || !(**mpt_entry)) { + err = -ENOMEM; + goto free_mailbox; + } + + return 0; + +free_mailbox: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt); + +int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, + struct mlx4_mpt_entry **mpt_entry) +{ + int err; + + if (!mlx4_is_mfunc(dev)) { + /* Make sure any changes to this entry are flushed */ + wmb(); + + *(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW; + + /* Make sure the new status is written */ + wmb(); + + err = mlx4_SYNC_TPT(dev); + } else { + int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1); + + struct mlx4_cmd_mailbox *mailbox = + container_of((void *)mpt_entry, struct mlx4_cmd_mailbox, + buf); + + err = mlx4_SW2HW_MPT(dev, mailbox, key); + } + + mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK; + if (!err) + mmr->enabled = MLX4_MPT_EN_HW; + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt); + +void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev, + struct mlx4_mpt_entry **mpt_entry) +{ + if (mlx4_is_mfunc(dev)) { + struct mlx4_cmd_mailbox *mailbox = + container_of((void *)mpt_entry, struct mlx4_cmd_mailbox, + buf); + mlx4_free_cmd_mailbox(dev, mailbox); + } +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt); + +int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry, + u32 pdn) +{ + u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags); + /* The wrapper function will put the slave's id here */ + if (mlx4_is_mfunc(dev)) + pd_flags &= ~MLX4_MPT_PD_VF_MASK; + mpt_entry->pd_flags = cpu_to_be32((pd_flags & ~MLX4_MPT_PD_MASK) | + (pdn & MLX4_MPT_PD_MASK) + | MLX4_MPT_PD_FLAG_EN_INV); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd); + +int mlx4_mr_hw_change_access(struct mlx4_dev *dev, + struct mlx4_mpt_entry *mpt_entry, + u32 access) +{ + u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) | + (access & MLX4_PERM_MASK); + + mpt_entry->flags = cpu_to_be32(flags); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access); + static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr) @@ -463,6 +588,41 @@ int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) } EXPORT_SYMBOL_GPL(mlx4_mr_free); +void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr) +{ + mlx4_mtt_cleanup(dev, &mr->mtt); +} +EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup); + +int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr, + u64 iova, u64 size, int npages, + int page_shift, struct mlx4_mpt_entry *mpt_entry) +{ + int err; + + mpt_entry->start = cpu_to_be64(mr->iova); + mpt_entry->length = cpu_to_be64(mr->size); + mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift); + + err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); + if (err) + return err; + + if (mr->mtt.order < 0) { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); + mpt_entry->mtt_addr = 0; + } else { + mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev, + &mr->mtt)); + if (mr->mtt.page_shift == 0) + mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order); + } + mr->enabled = MLX4_MPT_EN_SW; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write); + int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) { struct mlx4_cmd_mailbox *mailbox; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 0efc136..1089367 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2613,12 +2613,34 @@ int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave, if (err) return err; - if (mpt->com.from_state != RES_MPT_HW) { + if (mpt->com.from_state == RES_MPT_MAPPED) { + /* In order to allow rereg in SRIOV, we need to alter the MPT entry. To do + * that, the VF must read the MPT. But since the MPT entry memory is not + * in the VF's virtual memory space, it must use QUERY_MPT to obtain the + * entry contents. To guarantee that the MPT cannot be changed, the driver + * must perform HW2SW_MPT before this query and return the MPT entry to HW + * ownership fofollowing the change. The change here allows the VF to + * perform QUERY_MPT also when the entry is in SW ownership. + */ + struct mlx4_mpt_entry *mpt_entry = mlx4_table_find( + &mlx4_priv(dev)->mr_table.dmpt_table, + mpt->key, NULL); + + if (NULL == mpt_entry || NULL == outbox->buf) { + err = -EINVAL; + goto out; + } + + memcpy(outbox->buf, mpt_entry, sizeof(*mpt_entry)); + + err = 0; + } else if (mpt->com.from_state == RES_MPT_HW) { + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + } else { err = -EBUSY; goto out; } - err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: put_res(dev, slave, id, RES_MPT); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 4421004..9e7e3f1 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -574,6 +574,7 @@ myri10ge_validate_firmware(struct myri10ge_priv *mgp, /* save firmware version for ethtool */ strncpy(mgp->fw_version, hdr->version, sizeof(mgp->fw_version)); + mgp->fw_version[sizeof(mgp->fw_version) - 1] = '\0'; sscanf(mgp->fw_version, "%d.%d.%d", &mgp->fw_ver_major, &mgp->fw_ver_minor, &mgp->fw_ver_tiny); @@ -872,6 +873,10 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type) return -ENOMEM; dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) { + __free_page(dmatest_page); + return -ENOMEM; + } /* Run a small DMA test. * The magic multipliers to the length tell the firmware @@ -1293,6 +1298,7 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, int bytes, int watchdog) { struct page *page; + dma_addr_t bus; int idx; #if MYRI10GE_ALLOC_SIZE > 4096 int end_offset; @@ -1317,11 +1323,21 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, rx->watchdog_needed = 1; return; } + + bus = pci_map_page(mgp->pdev, page, 0, + MYRI10GE_ALLOC_SIZE, + PCI_DMA_FROMDEVICE); + if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) { + __free_pages(page, MYRI10GE_ALLOC_ORDER); + if (rx->fill_cnt - rx->cnt < 16) + rx->watchdog_needed = 1; + return; + } + rx->page = page; rx->page_offset = 0; - rx->bus = pci_map_page(mgp->pdev, page, 0, - MYRI10GE_ALLOC_SIZE, - PCI_DMA_FROMDEVICE); + rx->bus = bus; + } rx->info[idx].page = rx->page; rx->info[idx].page_offset = rx->page_offset; @@ -2763,6 +2779,35 @@ myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src, mb(); } +static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp, + struct myri10ge_tx_buf *tx, int idx) +{ + unsigned int len; + int last_idx; + + /* Free any DMA resources we've alloced and clear out the skb slot */ + last_idx = (idx + 1) & tx->mask; + idx = tx->req & tx->mask; + do { + len = dma_unmap_len(&tx->info[idx], len); + if (len) { + if (tx->info[idx].skb != NULL) + pci_unmap_single(mgp->pdev, + dma_unmap_addr(&tx->info[idx], + bus), len, + PCI_DMA_TODEVICE); + else + pci_unmap_page(mgp->pdev, + dma_unmap_addr(&tx->info[idx], + bus), len, + PCI_DMA_TODEVICE); + dma_unmap_len_set(&tx->info[idx], len, 0); + tx->info[idx].skb = NULL; + } + idx = (idx + 1) & tx->mask; + } while (idx != last_idx); +} + /* * Transmit a packet. We need to split the packet so that a single * segment does not cross myri10ge->tx_boundary, so this makes segment @@ -2786,7 +2831,7 @@ static netdev_tx_t myri10ge_xmit(struct sk_buff *skb, u32 low; __be32 high_swapped; unsigned int len; - int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments; + int idx, avail, frag_cnt, frag_idx, count, mss, max_segments; u16 pseudo_hdr_offset, cksum_offset, queue; int cum_len, seglen, boundary, rdma_count; u8 flags, odd_flag; @@ -2883,9 +2928,12 @@ again: /* map the skb for DMA */ len = skb_headlen(skb); + bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE); + if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) + goto drop; + idx = tx->req & tx->mask; tx->info[idx].skb = skb; - bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE); dma_unmap_addr_set(&tx->info[idx], bus, bus); dma_unmap_len_set(&tx->info[idx], len, len); @@ -2984,12 +3032,16 @@ again: break; /* map next fragment for DMA */ - idx = (count + tx->req) & tx->mask; frag = &skb_shinfo(skb)->frags[frag_idx]; frag_idx++; len = skb_frag_size(frag); bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len, DMA_TO_DEVICE); + if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) { + myri10ge_unmap_tx_dma(mgp, tx, idx); + goto drop; + } + idx = (count + tx->req) & tx->mask; dma_unmap_addr_set(&tx->info[idx], bus, bus); dma_unmap_len_set(&tx->info[idx], len, len); } @@ -3020,31 +3072,8 @@ again: return NETDEV_TX_OK; abort_linearize: - /* Free any DMA resources we've alloced and clear out the skb - * slot so as to not trip up assertions, and to avoid a - * double-free if linearizing fails */ + myri10ge_unmap_tx_dma(mgp, tx, idx); - last_idx = (idx + 1) & tx->mask; - idx = tx->req & tx->mask; - tx->info[idx].skb = NULL; - do { - len = dma_unmap_len(&tx->info[idx], len); - if (len) { - if (tx->info[idx].skb != NULL) - pci_unmap_single(mgp->pdev, - dma_unmap_addr(&tx->info[idx], - bus), len, - PCI_DMA_TODEVICE); - else - pci_unmap_page(mgp->pdev, - dma_unmap_addr(&tx->info[idx], - bus), len, - PCI_DMA_TODEVICE); - dma_unmap_len_set(&tx->info[idx], len, 0); - tx->info[idx].skb = NULL; - } - idx = (idx + 1) & tx->mask; - } while (idx != last_idx); if (skb_is_gso(skb)) { netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n"); goto drop; diff --git a/drivers/net/ethernet/qlogic/qlcnic/Makefile b/drivers/net/ethernet/qlogic/qlcnic/Makefile index a848d29..3c2c2c7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/Makefile +++ b/drivers/net/ethernet/qlogic/qlcnic/Makefile @@ -8,7 +8,7 @@ qlcnic-y := qlcnic_hw.o qlcnic_main.o qlcnic_init.o \ qlcnic_ethtool.o qlcnic_ctx.o qlcnic_io.o \ qlcnic_sysfs.o qlcnic_minidump.o qlcnic_83xx_hw.o \ qlcnic_83xx_init.o qlcnic_83xx_vnic.o \ - qlcnic_minidump.o qlcnic_sriov_common.o + qlcnic_sriov_common.o qlcnic-$(CONFIG_QLCNIC_SRIOV) += qlcnic_sriov_pf.o diff --git a/drivers/net/ethernet/smsc/smsc911x.h b/drivers/net/ethernet/smsc/smsc911x.h index 2395395..54d6489 100644 --- a/drivers/net/ethernet/smsc/smsc911x.h +++ b/drivers/net/ethernet/smsc/smsc911x.h @@ -51,7 +51,7 @@ #ifdef CONFIG_DEBUG_SPINLOCK #define SMSC_ASSERT_MAC_LOCK(pdata) \ - WARN_ON(!spin_is_locked(&pdata->mac_lock)) + WARN_ON_SMP(!spin_is_locked(&pdata->mac_lock)) #else #define SMSC_ASSERT_MAC_LOCK(pdata) do {} while (0) #endif /* CONFIG_DEBUG_SPINLOCK */ diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index d813bfb..23c89ab 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -32,6 +32,11 @@ MODULE_DESCRIPTION("Sun LDOM virtual network driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_MODULE_VERSION); +/* Heuristic for the number of times to exponentially backoff and + * retry sending an LDC trigger when EAGAIN is encountered + */ +#define VNET_MAX_RETRIES 10 + /* Ordered from largest major to lowest */ static struct vio_version vnet_versions[] = { { .major = 1, .minor = 0 }, @@ -260,6 +265,7 @@ static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr, .state = vio_dring_state, }; int err, delay; + int retries = 0; hdr.seq = dr->snd_nxt; delay = 1; @@ -272,6 +278,13 @@ static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr, udelay(delay); if ((delay <<= 1) > 128) delay = 128; + if (retries++ > VNET_MAX_RETRIES) { + pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n", + port->raddr[0], port->raddr[1], + port->raddr[2], port->raddr[3], + port->raddr[4], port->raddr[5]); + err = -ECONNRESET; + } } while (err == -EAGAIN); return err; @@ -475,8 +488,9 @@ static int handle_mcast(struct vnet_port *port, void *msgbuf) return 0; } -static void maybe_tx_wakeup(struct vnet *vp) +static void maybe_tx_wakeup(unsigned long param) { + struct vnet *vp = (struct vnet *)param; struct net_device *dev = vp->dev; netif_tx_lock(dev); @@ -573,8 +587,13 @@ static void vnet_event(void *arg, int event) break; } spin_unlock(&vio->lock); + /* Kick off a tasklet to wake the queue. We cannot call + * maybe_tx_wakeup directly here because we could deadlock on + * netif_tx_lock() with dev_watchdog() + */ if (unlikely(tx_wakeup && err != -ECONNRESET)) - maybe_tx_wakeup(port->vp); + tasklet_schedule(&port->vp->vnet_tx_wakeup); + local_irq_restore(flags); } @@ -593,6 +612,7 @@ static int __vnet_tx_trigger(struct vnet_port *port) .end_idx = (u32) -1, }; int err, delay; + int retries = 0; hdr.seq = dr->snd_nxt; delay = 1; @@ -605,6 +625,8 @@ static int __vnet_tx_trigger(struct vnet_port *port) udelay(delay); if ((delay <<= 1) > 128) delay = 128; + if (retries++ > VNET_MAX_RETRIES) + break; } while (err == -EAGAIN); return err; @@ -691,7 +713,15 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) memset(tx_buf+VNET_PACKET_SKIP+skb->len, 0, len - skb->len); } - d->hdr.ack = VIO_ACK_ENABLE; + /* We don't rely on the ACKs to free the skb in vnet_start_xmit(), + * thus it is safe to not set VIO_ACK_ENABLE for each transmission: + * the protocol itself does not require it as long as the peer + * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED. + * + * An ACK for every packet in the ring is expensive as the + * sending of LDC messages is slow and affects performance. + */ + d->hdr.ack = VIO_ACK_DISABLE; d->size = len; d->ncookies = port->tx_bufs[dr->prod].ncookies; for (i = 0; i < d->ncookies; i++) @@ -1046,6 +1076,7 @@ static struct vnet *vnet_new(const u64 *local_mac) vp = netdev_priv(dev); spin_lock_init(&vp->lock); + tasklet_init(&vp->vnet_tx_wakeup, maybe_tx_wakeup, (unsigned long)vp); vp->dev = dev; INIT_LIST_HEAD(&vp->port_list); @@ -1105,6 +1136,7 @@ static void vnet_cleanup(void) vp = list_first_entry(&vnet_list, struct vnet, list); list_del(&vp->list); dev = vp->dev; + tasklet_kill(&vp->vnet_tx_wakeup); /* vio_unregister_driver() should have cleaned up port_list */ BUG_ON(!list_empty(&vp->port_list)); unregister_netdev(dev); diff --git a/drivers/net/ethernet/sun/sunvnet.h b/drivers/net/ethernet/sun/sunvnet.h index d347a5b..de5c2c6 100644 --- a/drivers/net/ethernet/sun/sunvnet.h +++ b/drivers/net/ethernet/sun/sunvnet.h @@ -1,6 +1,8 @@ #ifndef _SUNVNET_H #define _SUNVNET_H +#include <linux/interrupt.h> + #define DESC_NCOOKIES(entry_size) \ ((entry_size) - sizeof(struct vio_net_desc)) @@ -78,6 +80,8 @@ struct vnet { struct list_head list; u64 local_mac; + + struct tasklet_struct vnet_tx_wakeup; }; #endif /* _SUNVNET_H */ diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index 3809f4e..f9bcf7a 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -1130,6 +1130,7 @@ static int cpmac_probe(struct platform_device *pdev) strncpy(mdio_bus_id, "fixed-0", MII_BUS_ID_SIZE); /* fixed phys bus */ phy_id = pdev->id; } + mdio_bus_id[sizeof(mdio_bus_id) - 1] = '\0'; dev = alloc_etherdev_mq(sizeof(*priv), CPMAC_QUEUES); if (!dev) diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 8a6e5c2..fda5891 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1148,8 +1148,7 @@ static int temac_of_remove(struct platform_device *op) temac_mdio_teardown(lp); unregister_netdev(ndev); sysfs_remove_group(&lp->dev->kobj, &temac_attr_group); - if (lp->phy_node) - of_node_put(lp->phy_node); + of_node_put(lp->phy_node); lp->phy_node = NULL; iounmap(lp->regs); if (lp->sdma_regs) @@ -1171,7 +1170,6 @@ static struct platform_driver temac_of_driver = { .probe = temac_of_probe, .remove = temac_of_remove, .driver = { - .owner = THIS_MODULE, .name = "xilinx_temac", .of_match_table = temac_of_match, }, diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 7b0a735..c8fd941 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1630,8 +1630,7 @@ static int axienet_of_remove(struct platform_device *op) axienet_mdio_teardown(lp); unregister_netdev(ndev); - if (lp->phy_node) - of_node_put(lp->phy_node); + of_node_put(lp->phy_node); lp->phy_node = NULL; iounmap(lp->regs); @@ -1646,7 +1645,6 @@ static struct platform_driver axienet_of_driver = { .probe = axienet_of_probe, .remove = axienet_of_remove, .driver = { - .owner = THIS_MODULE, .name = "xilinx_axienet", .of_match_table = axienet_of_match, }, diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 782bb93..28dbbdc 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1245,7 +1245,6 @@ MODULE_DEVICE_TABLE(of, xemaclite_of_match); static struct platform_driver xemaclite_of_driver = { .driver = { .name = DRIVER_NAME, - .owner = THIS_MODULE, .of_match_table = xemaclite_of_match, }, .probe = xemaclite_of_probe, diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index 7c81ffb..d56f869 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -266,7 +266,7 @@ static void xirc2ps_detach(struct pcmcia_device *p_dev); static irqreturn_t xirc2ps_interrupt(int irq, void *dev_id); -typedef struct local_info_t { +struct local_info { struct net_device *dev; struct pcmcia_device *p_dev; @@ -281,7 +281,7 @@ typedef struct local_info_t { unsigned last_ptr_value; /* last packets transmitted value */ const char *manf_str; struct work_struct tx_timeout_task; -} local_info_t; +}; /**************** * Some more prototypes @@ -475,12 +475,12 @@ static int xirc2ps_probe(struct pcmcia_device *link) { struct net_device *dev; - local_info_t *local; + struct local_info *local; dev_dbg(&link->dev, "attach()\n"); /* Allocate the device structure */ - dev = alloc_etherdev(sizeof(local_info_t)); + dev = alloc_etherdev(sizeof(struct local_info)); if (!dev) return -ENOMEM; local = netdev_priv(dev); @@ -536,7 +536,7 @@ static int set_card_type(struct pcmcia_device *link) { struct net_device *dev = link->priv; - local_info_t *local = netdev_priv(dev); + struct local_info *local = netdev_priv(dev); u8 *buf; unsigned int cisrev, mediaid, prodid; size_t len; @@ -690,7 +690,7 @@ static int xirc2ps_config(struct pcmcia_device * link) { struct net_device *dev = link->priv; - local_info_t *local = netdev_priv(dev); + struct local_info *local = netdev_priv(dev); unsigned int ioaddr; int err; u8 *buf; @@ -931,7 +931,7 @@ xirc2ps_release(struct pcmcia_device *link) if (link->resource[2]->end) { struct net_device *dev = link->priv; - local_info_t *local = netdev_priv(dev); + struct local_info *local = netdev_priv(dev); if (local->dingo) iounmap(local->dingo_ccr - 0x0800); } @@ -975,7 +975,7 @@ static irqreturn_t xirc2ps_interrupt(int irq, void *dev_id) { struct net_device *dev = (struct net_device *)dev_id; - local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); unsigned int ioaddr; u_char saved_page; unsigned bytes_rcvd; @@ -1194,8 +1194,8 @@ xirc2ps_interrupt(int irq, void *dev_id) static void xirc2ps_tx_timeout_task(struct work_struct *work) { - local_info_t *local = - container_of(work, local_info_t, tx_timeout_task); + struct local_info *local = + container_of(work, struct local_info, tx_timeout_task); struct net_device *dev = local->dev; /* reset the card */ do_reset(dev,1); @@ -1206,7 +1206,7 @@ xirc2ps_tx_timeout_task(struct work_struct *work) static void xirc_tx_timeout(struct net_device *dev) { - local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); dev->stats.tx_errors++; netdev_notice(dev, "transmit timed out\n"); schedule_work(&lp->tx_timeout_task); @@ -1215,7 +1215,7 @@ xirc_tx_timeout(struct net_device *dev) static netdev_tx_t do_start_xmit(struct sk_buff *skb, struct net_device *dev) { - local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; int okay; unsigned freespace; @@ -1300,7 +1300,7 @@ static void set_address(struct set_address_info *sa_info, char *addr) static void set_addresses(struct net_device *dev) { unsigned int ioaddr = dev->base_addr; - local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); struct netdev_hw_addr *ha; struct set_address_info sa_info; int i; @@ -1362,7 +1362,7 @@ set_multicast_list(struct net_device *dev) static int do_config(struct net_device *dev, struct ifmap *map) { - local_info_t *local = netdev_priv(dev); + struct local_info *local = netdev_priv(dev); pr_debug("do_config(%p)\n", dev); if (map->port != 255 && map->port != dev->if_port) { @@ -1387,7 +1387,7 @@ do_config(struct net_device *dev, struct ifmap *map) static int do_open(struct net_device *dev) { - local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; dev_dbg(&link->dev, "do_open(%p)\n", dev); @@ -1421,7 +1421,7 @@ static const struct ethtool_ops netdev_ethtool_ops = { static int do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - local_info_t *local = netdev_priv(dev); + struct local_info *local = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; struct mii_ioctl_data *data = if_mii(rq); @@ -1453,7 +1453,7 @@ do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) static void hardreset(struct net_device *dev) { - local_info_t *local = netdev_priv(dev); + struct local_info *local = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; SelectPage(4); @@ -1470,7 +1470,7 @@ hardreset(struct net_device *dev) static void do_reset(struct net_device *dev, int full) { - local_info_t *local = netdev_priv(dev); + struct local_info *local = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; unsigned value; @@ -1631,7 +1631,7 @@ do_reset(struct net_device *dev, int full) static int init_mii(struct net_device *dev) { - local_info_t *local = netdev_priv(dev); + struct local_info *local = netdev_priv(dev); unsigned int ioaddr = dev->base_addr; unsigned control, status, linkpartner; int i; @@ -1715,7 +1715,7 @@ static int do_stop(struct net_device *dev) { unsigned int ioaddr = dev->base_addr; - local_info_t *lp = netdev_priv(dev); + struct local_info *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; dev_dbg(&link->dev, "do_stop(%p)\n", dev); diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c index 9fd0c20..a87a82c 100644 --- a/drivers/net/irda/donauboe.c +++ b/drivers/net/irda/donauboe.c @@ -1755,17 +1755,4 @@ static struct pci_driver donauboe_pci_driver = { .resume = toshoboe_wakeup }; -static int __init -donauboe_init (void) -{ - return pci_register_driver(&donauboe_pci_driver); -} - -static void __exit -donauboe_cleanup (void) -{ - pci_unregister_driver(&donauboe_pci_driver); -} - -module_init(donauboe_init); -module_exit(donauboe_cleanup); +module_pci_driver(donauboe_pci_driver); diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index ef8a5c2..60e4ca0 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -45,10 +45,9 @@ struct macvlan_port { struct sk_buff_head bc_queue; struct work_struct bc_work; bool passthru; + int count; }; -#define MACVLAN_PORT_IS_EMPTY(port) list_empty(&port->vlans) - struct macvlan_skb_cb { const struct macvlan_dev *src; }; @@ -667,7 +666,8 @@ static void macvlan_uninit(struct net_device *dev) free_percpu(vlan->pcpu_stats); - if (MACVLAN_PORT_IS_EMPTY(port)) + port->count -= 1; + if (!port->count) macvlan_port_destroy(port->dev); } @@ -1020,12 +1020,13 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]); if (vlan->mode == MACVLAN_MODE_PASSTHRU) { - if (!MACVLAN_PORT_IS_EMPTY(port)) + if (port->count) return -EINVAL; port->passthru = true; eth_hw_addr_inherit(dev, lowerdev); } + port->count += 1; err = register_netdevice(dev); if (err < 0) goto destroy_port; @@ -1043,7 +1044,8 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, unregister_netdev: unregister_netdevice(dev); destroy_port: - if (MACVLAN_PORT_IS_EMPTY(port)) + port->count -= 1; + if (!port->count) macvlan_port_destroy(lowerdev); return err; diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index 7cc64ea..e5c7e61 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -90,7 +90,7 @@ #define LMI_ANSI_LENGTH 14 -typedef struct { +struct fr_hdr { #if defined(__LITTLE_ENDIAN_BITFIELD) unsigned ea1: 1; unsigned cr: 1; @@ -112,14 +112,14 @@ typedef struct { unsigned de: 1; unsigned ea2: 1; #endif -}__packed fr_hdr; +} __packed; -typedef struct pvc_device_struct { +struct pvc_device { struct net_device *frad; struct net_device *main; struct net_device *ether; /* bridged Ethernet interface */ - struct pvc_device_struct *next; /* Sorted in ascending DLCI order */ + struct pvc_device *next; /* Sorted in ascending DLCI order */ int dlci; int open_count; @@ -132,11 +132,11 @@ typedef struct pvc_device_struct { unsigned int becn: 1; unsigned int bandwidth; /* Cisco LMI reporting only */ }state; -}pvc_device; +}; struct frad_state { fr_proto settings; - pvc_device *first_pvc; + struct pvc_device *first_pvc; int dce_pvc_count; struct timer_list timer; @@ -174,9 +174,9 @@ static inline struct frad_state* state(hdlc_device *hdlc) } -static inline pvc_device* find_pvc(hdlc_device *hdlc, u16 dlci) +static inline struct pvc_device *find_pvc(hdlc_device *hdlc, u16 dlci) { - pvc_device *pvc = state(hdlc)->first_pvc; + struct pvc_device *pvc = state(hdlc)->first_pvc; while (pvc) { if (pvc->dlci == dlci) @@ -190,10 +190,10 @@ static inline pvc_device* find_pvc(hdlc_device *hdlc, u16 dlci) } -static pvc_device* add_pvc(struct net_device *dev, u16 dlci) +static struct pvc_device *add_pvc(struct net_device *dev, u16 dlci) { hdlc_device *hdlc = dev_to_hdlc(dev); - pvc_device *pvc, **pvc_p = &state(hdlc)->first_pvc; + struct pvc_device *pvc, **pvc_p = &state(hdlc)->first_pvc; while (*pvc_p) { if ((*pvc_p)->dlci == dlci) @@ -203,7 +203,7 @@ static pvc_device* add_pvc(struct net_device *dev, u16 dlci) pvc_p = &(*pvc_p)->next; } - pvc = kzalloc(sizeof(pvc_device), GFP_ATOMIC); + pvc = kzalloc(sizeof(*pvc), GFP_ATOMIC); #ifdef DEBUG_PVC printk(KERN_DEBUG "add_pvc: allocated pvc %p, frad %p\n", pvc, dev); #endif @@ -218,13 +218,13 @@ static pvc_device* add_pvc(struct net_device *dev, u16 dlci) } -static inline int pvc_is_used(pvc_device *pvc) +static inline int pvc_is_used(struct pvc_device *pvc) { return pvc->main || pvc->ether; } -static inline void pvc_carrier(int on, pvc_device *pvc) +static inline void pvc_carrier(int on, struct pvc_device *pvc) { if (on) { if (pvc->main) @@ -246,11 +246,11 @@ static inline void pvc_carrier(int on, pvc_device *pvc) static inline void delete_unused_pvcs(hdlc_device *hdlc) { - pvc_device **pvc_p = &state(hdlc)->first_pvc; + struct pvc_device **pvc_p = &state(hdlc)->first_pvc; while (*pvc_p) { if (!pvc_is_used(*pvc_p)) { - pvc_device *pvc = *pvc_p; + struct pvc_device *pvc = *pvc_p; #ifdef DEBUG_PVC printk(KERN_DEBUG "freeing unused pvc: %p\n", pvc); #endif @@ -263,7 +263,8 @@ static inline void delete_unused_pvcs(hdlc_device *hdlc) } -static inline struct net_device** get_dev_p(pvc_device *pvc, int type) +static inline struct net_device **get_dev_p(struct pvc_device *pvc, + int type) { if (type == ARPHRD_ETHER) return &pvc->ether; @@ -342,7 +343,7 @@ static int fr_hard_header(struct sk_buff **skb_p, u16 dlci) static int pvc_open(struct net_device *dev) { - pvc_device *pvc = dev->ml_priv; + struct pvc_device *pvc = dev->ml_priv; if ((pvc->frad->flags & IFF_UP) == 0) return -EIO; /* Frad must be UP in order to activate PVC */ @@ -362,7 +363,7 @@ static int pvc_open(struct net_device *dev) static int pvc_close(struct net_device *dev) { - pvc_device *pvc = dev->ml_priv; + struct pvc_device *pvc = dev->ml_priv; if (--pvc->open_count == 0) { hdlc_device *hdlc = dev_to_hdlc(pvc->frad); @@ -381,7 +382,7 @@ static int pvc_close(struct net_device *dev) static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - pvc_device *pvc = dev->ml_priv; + struct pvc_device *pvc = dev->ml_priv; fr_proto_pvc_info info; if (ifr->ifr_settings.type == IF_GET_PROTO) { @@ -409,7 +410,7 @@ static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static netdev_tx_t pvc_xmit(struct sk_buff *skb, struct net_device *dev) { - pvc_device *pvc = dev->ml_priv; + struct pvc_device *pvc = dev->ml_priv; if (pvc->state.active) { if (dev->type == ARPHRD_ETHER) { @@ -444,7 +445,7 @@ static netdev_tx_t pvc_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static inline void fr_log_dlci_active(pvc_device *pvc) +static inline void fr_log_dlci_active(struct pvc_device *pvc) { netdev_info(pvc->frad, "DLCI %d [%s%s%s]%s %s\n", pvc->dlci, @@ -469,7 +470,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep) { hdlc_device *hdlc = dev_to_hdlc(dev); struct sk_buff *skb; - pvc_device *pvc = state(hdlc)->first_pvc; + struct pvc_device *pvc = state(hdlc)->first_pvc; int lmi = state(hdlc)->settings.lmi; int dce = state(hdlc)->settings.dce; int len = lmi == LMI_ANSI ? LMI_ANSI_LENGTH : LMI_CCITT_CISCO_LENGTH; @@ -566,7 +567,7 @@ static void fr_lmi_send(struct net_device *dev, int fullrep) static void fr_set_link_state(int reliable, struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); - pvc_device *pvc = state(hdlc)->first_pvc; + struct pvc_device *pvc = state(hdlc)->first_pvc; state(hdlc)->reliable = reliable; if (reliable) { @@ -652,7 +653,7 @@ static void fr_timer(unsigned long arg) static int fr_lmi_recv(struct net_device *dev, struct sk_buff *skb) { hdlc_device *hdlc = dev_to_hdlc(dev); - pvc_device *pvc; + struct pvc_device *pvc; u8 rxseq, txseq; int lmi = state(hdlc)->settings.lmi; int dce = state(hdlc)->settings.dce; @@ -869,10 +870,10 @@ static int fr_rx(struct sk_buff *skb) { struct net_device *frad = skb->dev; hdlc_device *hdlc = dev_to_hdlc(frad); - fr_hdr *fh = (fr_hdr*)skb->data; + struct fr_hdr *fh = (struct fr_hdr *)skb->data; u8 *data = skb->data; u16 dlci; - pvc_device *pvc; + struct pvc_device *pvc; struct net_device *dev = NULL; if (skb->len <= 4 || fh->ea1 || data[2] != FR_UI) @@ -1028,7 +1029,7 @@ static void fr_stop(struct net_device *dev) static void fr_close(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); - pvc_device *pvc = state(hdlc)->first_pvc; + struct pvc_device *pvc = state(hdlc)->first_pvc; while (pvc) { /* Shutdown all PVCs for this FRAD */ if (pvc->main) @@ -1060,7 +1061,7 @@ static const struct net_device_ops pvc_ops = { static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) { hdlc_device *hdlc = dev_to_hdlc(frad); - pvc_device *pvc; + struct pvc_device *pvc; struct net_device *dev; int used; @@ -1117,7 +1118,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) static int fr_del_pvc(hdlc_device *hdlc, unsigned int dlci, int type) { - pvc_device *pvc; + struct pvc_device *pvc; struct net_device *dev; if ((pvc = find_pvc(hdlc, dlci)) == NULL) @@ -1145,13 +1146,13 @@ static int fr_del_pvc(hdlc_device *hdlc, unsigned int dlci, int type) static void fr_destroy(struct net_device *frad) { hdlc_device *hdlc = dev_to_hdlc(frad); - pvc_device *pvc = state(hdlc)->first_pvc; + struct pvc_device *pvc = state(hdlc)->first_pvc; state(hdlc)->first_pvc = NULL; /* All PVCs destroyed */ state(hdlc)->dce_pvc_count = 0; state(hdlc)->dce_changed = 1; while (pvc) { - pvc_device *next = pvc->next; + struct pvc_device *next = pvc->next; /* destructors will free_netdev() main and ether */ if (pvc->main) unregister_netdevice(pvc->main); diff --git a/drivers/net/wan/wanxl.c b/drivers/net/wan/wanxl.c index dc2fc23..e73f138 100644 --- a/drivers/net/wan/wanxl.c +++ b/drivers/net/wan/wanxl.c @@ -54,24 +54,24 @@ static const char* version = "wanXL serial card driver version: 0.48"; #define MBX2_MEMSZ_MASK 0xFFFF0000 /* PUTS Memory Size Register mask */ -typedef struct { +struct port { struct net_device *dev; - struct card_t *card; + struct card *card; spinlock_t lock; /* for wanxl_xmit */ int node; /* physical port #0 - 3 */ unsigned int clock_type; int tx_in, tx_out; struct sk_buff *tx_skbs[TX_BUFFERS]; -}port_t; +}; -typedef struct { +struct card_status { desc_t rx_descs[RX_QUEUE_LENGTH]; port_status_t port_status[4]; -}card_status_t; +}; -typedef struct card_t { +struct card { int n_ports; /* 1, 2 or 4 ports */ u8 irq; @@ -79,20 +79,20 @@ typedef struct card_t { struct pci_dev *pdev; /* for pci_name(pdev) */ int rx_in; struct sk_buff *rx_skbs[RX_QUEUE_LENGTH]; - card_status_t *status; /* shared between host and card */ + struct card_status *status; /* shared between host and card */ dma_addr_t status_address; - port_t ports[0]; /* 1 - 4 port_t structures follow */ -}card_t; + struct port ports[0]; /* 1 - 4 port structures follow */ +}; -static inline port_t* dev_to_port(struct net_device *dev) +static inline struct port *dev_to_port(struct net_device *dev) { - return (port_t *)dev_to_hdlc(dev)->priv; + return (struct port *)dev_to_hdlc(dev)->priv; } -static inline port_status_t* get_status(port_t *port) +static inline port_status_t *get_status(struct port *port) { return &port->card->status->port_status[port->node]; } @@ -115,7 +115,7 @@ static inline dma_addr_t pci_map_single_debug(struct pci_dev *pdev, void *ptr, /* Cable and/or personality module change interrupt service */ -static inline void wanxl_cable_intr(port_t *port) +static inline void wanxl_cable_intr(struct port *port) { u32 value = get_status(port)->cable; int valid = 1; @@ -160,7 +160,7 @@ static inline void wanxl_cable_intr(port_t *port) /* Transmit complete interrupt service */ -static inline void wanxl_tx_intr(port_t *port) +static inline void wanxl_tx_intr(struct port *port) { struct net_device *dev = port->dev; while (1) { @@ -193,7 +193,7 @@ static inline void wanxl_tx_intr(port_t *port) /* Receive complete interrupt service */ -static inline void wanxl_rx_intr(card_t *card) +static inline void wanxl_rx_intr(struct card *card) { desc_t *desc; while (desc = &card->status->rx_descs[card->rx_in], @@ -203,7 +203,7 @@ static inline void wanxl_rx_intr(card_t *card) pci_name(card->pdev)); else { struct sk_buff *skb = card->rx_skbs[card->rx_in]; - port_t *port = &card->ports[desc->stat & + struct port *port = &card->ports[desc->stat & PACKET_PORT_MASK]; struct net_device *dev = port->dev; @@ -245,7 +245,7 @@ static inline void wanxl_rx_intr(card_t *card) static irqreturn_t wanxl_intr(int irq, void* dev_id) { - card_t *card = dev_id; + struct card *card = dev_id; int i; u32 stat; int handled = 0; @@ -272,7 +272,7 @@ static irqreturn_t wanxl_intr(int irq, void* dev_id) static netdev_tx_t wanxl_xmit(struct sk_buff *skb, struct net_device *dev) { - port_t *port = dev_to_port(dev); + struct port *port = dev_to_port(dev); desc_t *desc; spin_lock(&port->lock); @@ -319,7 +319,7 @@ static netdev_tx_t wanxl_xmit(struct sk_buff *skb, struct net_device *dev) static int wanxl_attach(struct net_device *dev, unsigned short encoding, unsigned short parity) { - port_t *port = dev_to_port(dev); + struct port *port = dev_to_port(dev); if (encoding != ENCODING_NRZ && encoding != ENCODING_NRZI) @@ -343,7 +343,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { const size_t size = sizeof(sync_serial_settings); sync_serial_settings line; - port_t *port = dev_to_port(dev); + struct port *port = dev_to_port(dev); if (cmd != SIOCWANDEV) return hdlc_ioctl(dev, ifr, cmd); @@ -393,7 +393,7 @@ static int wanxl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static int wanxl_open(struct net_device *dev) { - port_t *port = dev_to_port(dev); + struct port *port = dev_to_port(dev); u8 __iomem *dbr = port->card->plx + PLX_DOORBELL_TO_CARD; unsigned long timeout; int i; @@ -429,7 +429,7 @@ static int wanxl_open(struct net_device *dev) static int wanxl_close(struct net_device *dev) { - port_t *port = dev_to_port(dev); + struct port *port = dev_to_port(dev); unsigned long timeout; int i; @@ -467,7 +467,7 @@ static int wanxl_close(struct net_device *dev) static struct net_device_stats *wanxl_get_stats(struct net_device *dev) { - port_t *port = dev_to_port(dev); + struct port *port = dev_to_port(dev); dev->stats.rx_over_errors = get_status(port)->rx_overruns; dev->stats.rx_frame_errors = get_status(port)->rx_frame_errors; @@ -478,7 +478,7 @@ static struct net_device_stats *wanxl_get_stats(struct net_device *dev) -static int wanxl_puts_command(card_t *card, u32 cmd) +static int wanxl_puts_command(struct card *card, u32 cmd) { unsigned long timeout = jiffies + 5 * HZ; @@ -495,7 +495,7 @@ static int wanxl_puts_command(card_t *card, u32 cmd) -static void wanxl_reset(card_t *card) +static void wanxl_reset(struct card *card) { u32 old_value = readl(card->plx + PLX_CONTROL) & ~PLX_CTL_RESET; @@ -511,7 +511,7 @@ static void wanxl_reset(card_t *card) static void wanxl_pci_remove_one(struct pci_dev *pdev) { - card_t *card = pci_get_drvdata(pdev); + struct card *card = pci_get_drvdata(pdev); int i; for (i = 0; i < card->n_ports; i++) { @@ -537,7 +537,7 @@ static void wanxl_pci_remove_one(struct pci_dev *pdev) iounmap(card->plx); if (card->status) - pci_free_consistent(pdev, sizeof(card_status_t), + pci_free_consistent(pdev, sizeof(struct card_status), card->status, card->status_address); pci_release_regions(pdev); @@ -560,7 +560,7 @@ static const struct net_device_ops wanxl_ops = { static int wanxl_pci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - card_t *card; + struct card *card; u32 ramsize, stat; unsigned long timeout; u32 plx_phy; /* PLX PCI base address */ @@ -601,7 +601,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev, default: ports = 4; } - alloc_size = sizeof(card_t) + ports * sizeof(port_t); + alloc_size = sizeof(struct card) + ports * sizeof(struct port); card = kzalloc(alloc_size, GFP_KERNEL); if (card == NULL) { pci_release_regions(pdev); @@ -612,7 +612,8 @@ static int wanxl_pci_init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, card); card->pdev = pdev; - card->status = pci_alloc_consistent(pdev, sizeof(card_status_t), + card->status = pci_alloc_consistent(pdev, + sizeof(struct card_status), &card->status_address); if (card->status == NULL) { wanxl_pci_remove_one(pdev); @@ -766,7 +767,7 @@ static int wanxl_pci_init_one(struct pci_dev *pdev, for (i = 0; i < ports; i++) { hdlc_device *hdlc; - port_t *port = &card->ports[i]; + struct port *port = &card->ports[i]; struct net_device *dev = alloc_hdlcdev(port); if (!dev) { pr_err("%s: unable to allocate memory\n", diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c index 7e9ede6..d9ed22b 100644 --- a/drivers/net/wireless/airo_cs.c +++ b/drivers/net/wireless/airo_cs.c @@ -56,18 +56,18 @@ static void airo_release(struct pcmcia_device *link); static void airo_detach(struct pcmcia_device *p_dev); -typedef struct local_info_t { +struct local_info { struct net_device *eth_dev; -} local_info_t; +}; static int airo_probe(struct pcmcia_device *p_dev) { - local_info_t *local; + struct local_info *local; dev_dbg(&p_dev->dev, "airo_attach()\n"); /* Allocate space for private device-specific data */ - local = kzalloc(sizeof(local_info_t), GFP_KERNEL); + local = kzalloc(sizeof(*local), GFP_KERNEL); if (!local) return -ENOMEM; @@ -82,10 +82,11 @@ static void airo_detach(struct pcmcia_device *link) airo_release(link); - if (((local_info_t *)link->priv)->eth_dev) { - stop_airo_card(((local_info_t *)link->priv)->eth_dev, 0); + if (((struct local_info *)link->priv)->eth_dev) { + stop_airo_card(((struct local_info *)link->priv)->eth_dev, + 0); } - ((local_info_t *)link->priv)->eth_dev = NULL; + ((struct local_info *)link->priv)->eth_dev = NULL; kfree(link->priv); } /* airo_detach */ @@ -101,7 +102,7 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev, void *priv_data) static int airo_config(struct pcmcia_device *link) { - local_info_t *dev; + struct local_info *dev; int ret; dev = link->priv; @@ -121,10 +122,10 @@ static int airo_config(struct pcmcia_device *link) ret = pcmcia_enable_device(link); if (ret) goto failed; - ((local_info_t *)link->priv)->eth_dev = + ((struct local_info *)link->priv)->eth_dev = init_airo_card(link->irq, link->resource[0]->start, 1, &link->dev); - if (!((local_info_t *)link->priv)->eth_dev) + if (!((struct local_info *)link->priv)->eth_dev) goto failed; return 0; @@ -142,7 +143,7 @@ static void airo_release(struct pcmcia_device *link) static int airo_suspend(struct pcmcia_device *link) { - local_info_t *local = link->priv; + struct local_info *local = link->priv; netif_device_detach(local->eth_dev); @@ -151,7 +152,7 @@ static int airo_suspend(struct pcmcia_device *link) static int airo_resume(struct pcmcia_device *link) { - local_info_t *local = link->priv; + struct local_info *local = link->priv; if (link->open) { reset_airo_card(local->eth_dev); diff --git a/drivers/net/wireless/ath/carl9170/carl9170.h b/drivers/net/wireless/ath/carl9170/carl9170.h index 8596aba..237d0cd 100644 --- a/drivers/net/wireless/ath/carl9170/carl9170.h +++ b/drivers/net/wireless/ath/carl9170/carl9170.h @@ -256,6 +256,7 @@ struct ar9170 { atomic_t rx_work_urbs; atomic_t rx_pool_urbs; kernel_ulong_t features; + bool usb_ep_cmd_is_bulk; /* firmware settings */ struct completion fw_load_wait; diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c index f35c7f3..c9f9331 100644 --- a/drivers/net/wireless/ath/carl9170/usb.c +++ b/drivers/net/wireless/ath/carl9170/usb.c @@ -621,9 +621,16 @@ int __carl9170_exec_cmd(struct ar9170 *ar, struct carl9170_cmd *cmd, goto err_free; } - usb_fill_int_urb(urb, ar->udev, usb_sndintpipe(ar->udev, - AR9170_USB_EP_CMD), cmd, cmd->hdr.len + 4, - carl9170_usb_cmd_complete, ar, 1); + if (ar->usb_ep_cmd_is_bulk) + usb_fill_bulk_urb(urb, ar->udev, + usb_sndbulkpipe(ar->udev, AR9170_USB_EP_CMD), + cmd, cmd->hdr.len + 4, + carl9170_usb_cmd_complete, ar); + else + usb_fill_int_urb(urb, ar->udev, + usb_sndintpipe(ar->udev, AR9170_USB_EP_CMD), + cmd, cmd->hdr.len + 4, + carl9170_usb_cmd_complete, ar, 1); if (free_buf) urb->transfer_flags |= URB_FREE_BUFFER; @@ -1032,9 +1039,10 @@ static void carl9170_usb_firmware_step2(const struct firmware *fw, static int carl9170_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { + struct usb_endpoint_descriptor *ep; struct ar9170 *ar; struct usb_device *udev; - int err; + int i, err; err = usb_reset_device(interface_to_usbdev(intf)); if (err) @@ -1050,6 +1058,21 @@ static int carl9170_usb_probe(struct usb_interface *intf, ar->intf = intf; ar->features = id->driver_info; + /* We need to remember the type of endpoint 4 because it differs + * between high- and full-speed configuration. The high-speed + * configuration specifies it as interrupt and the full-speed + * configuration as bulk endpoint. This information is required + * later when sending urbs to that endpoint. + */ + for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; ++i) { + ep = &intf->cur_altsetting->endpoint[i].desc; + + if (usb_endpoint_num(ep) == AR9170_USB_EP_CMD && + usb_endpoint_dir_out(ep) && + usb_endpoint_type(ep) == USB_ENDPOINT_XFER_BULK) + ar->usb_ep_cmd_is_bulk = true; + } + usb_set_intfdata(intf, ar); SET_IEEE80211_DEV(ar->hw, &intf->dev); diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c index 1fe41af..9183f1c 100644 --- a/drivers/net/wireless/atmel.c +++ b/drivers/net/wireless/atmel.c @@ -2598,11 +2598,11 @@ static const iw_handler atmel_private_handler[] = NULL, /* SIOCIWFIRSTPRIV */ }; -typedef struct atmel_priv_ioctl { +struct atmel_priv_ioctl { char id[32]; unsigned char __user *data; unsigned short len; -} atmel_priv_ioctl; +}; #define ATMELFWL SIOCIWFIRSTPRIV #define ATMELIDIFC ATMELFWL + 1 @@ -2615,7 +2615,7 @@ static const struct iw_priv_args atmel_private_args[] = { .cmd = ATMELFWL, .set_args = IW_PRIV_TYPE_BYTE | IW_PRIV_SIZE_FIXED - | sizeof (atmel_priv_ioctl), + | sizeof(struct atmel_priv_ioctl), .get_args = IW_PRIV_TYPE_NONE, .name = "atmelfwl" }, { @@ -2645,7 +2645,7 @@ static int atmel_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { int i, rc = 0; struct atmel_private *priv = netdev_priv(dev); - atmel_priv_ioctl com; + struct atmel_priv_ioctl com; struct iwreq *wrq = (struct iwreq *) rq; unsigned char *new_firmware; char domain[REGDOMAINSZ + 1]; diff --git a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c index 535c7eb..8f8b937 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c @@ -1318,6 +1318,8 @@ int brcmf_proto_msgbuf_attach(struct brcmf_pub *drvr) msgbuf->nrof_flowrings = if_msgbuf->nrof_flowrings; msgbuf->flowring_dma_handle = kzalloc(msgbuf->nrof_flowrings * sizeof(*msgbuf->flowring_dma_handle), GFP_ATOMIC); + if (!msgbuf->flowring_dma_handle) + goto fail; msgbuf->rx_dataoffset = if_msgbuf->rx_dataoffset; msgbuf->max_rxbufpost = if_msgbuf->max_rxbufpost; @@ -1362,6 +1364,7 @@ fail: kfree(msgbuf->flow_map); kfree(msgbuf->txstatus_done_map); brcmf_msgbuf_release_pktids(msgbuf); + kfree(msgbuf->flowring_dma_handle); if (msgbuf->ioctbuf) dma_free_coherent(drvr->bus_if->dev, BRCMF_TX_IOCTL_MAX_MSG_SIZE, @@ -1391,6 +1394,7 @@ void brcmf_proto_msgbuf_detach(struct brcmf_pub *drvr) BRCMF_TX_IOCTL_MAX_MSG_SIZE, msgbuf->ioctbuf, msgbuf->ioctbuf_handle); brcmf_msgbuf_release_pktids(msgbuf); + kfree(msgbuf->flowring_dma_handle); kfree(msgbuf); drvr->proto->pd = NULL; } diff --git a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c index bc972c0..e5101b2 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c @@ -591,12 +591,13 @@ static void brcmf_pcie_handle_mb_data(struct brcmf_pciedev_info *devinfo) } if (dtoh_mb_data & BRCMF_D2H_DEV_DS_EXIT_NOTE) brcmf_dbg(PCIE, "D2H_MB_DATA: DEEP SLEEP EXIT\n"); - if (dtoh_mb_data & BRCMF_D2H_DEV_D3_ACK) + if (dtoh_mb_data & BRCMF_D2H_DEV_D3_ACK) { brcmf_dbg(PCIE, "D2H_MB_DATA: D3 ACK\n"); if (waitqueue_active(&devinfo->mbdata_resp_wait)) { devinfo->mbdata_completed = true; wake_up(&devinfo->mbdata_resp_wait); } + } } diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c index 800b62b..a42f9c3 100644 --- a/drivers/net/wireless/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/ipw2x00/ipw2200.c @@ -9853,6 +9853,7 @@ static int ipw_wx_get_wireless_mode(struct net_device *dev, strncpy(extra, "unknown", MAX_WX_STRING); break; } + extra[MAX_WX_STRING - 1] = '\0'; IPW_DEBUG_WX("PRIV GET MODE: %s\n", extra); diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 0d6a8b7..7c87965 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -396,7 +396,8 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) else hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT; - hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; + /* TODO: enable that only for firmwares that don't crash */ + /* hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; */ hw->wiphy->max_sched_scan_ssids = PROBE_OPTION_MAX; hw->wiphy->max_match_sets = IWL_SCAN_MAX_PROFILES; /* we create the 802.11 header and zero length SSID IE. */ diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index ef3026f..d4eb8d2 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -165,6 +165,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ u16 dealloc_ring[MAX_PENDING_REQS]; struct task_struct *dealloc_task; wait_queue_head_t dealloc_wq; + atomic_t inflight_packets; /* Use kthread for guest RX */ struct task_struct *task; @@ -329,4 +330,8 @@ extern unsigned int xenvif_max_queues; extern struct dentry *xen_netback_dbg_root; #endif +void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue, + struct sk_buff *skb); +void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue); + #endif /* __XEN_NETBACK__COMMON_H__ */ diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 48a55cd..e29e15d 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -43,6 +43,23 @@ #define XENVIF_QUEUE_LENGTH 32 #define XENVIF_NAPI_WEIGHT 64 +/* This function is used to set SKBTX_DEV_ZEROCOPY as well as + * increasing the inflight counter. We need to increase the inflight + * counter because core driver calls into xenvif_zerocopy_callback + * which calls xenvif_skb_zerocopy_complete. + */ +void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue, + struct sk_buff *skb) +{ + skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + atomic_inc(&queue->inflight_packets); +} + +void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) +{ + atomic_dec(&queue->inflight_packets); +} + static inline void xenvif_stop_queue(struct xenvif_queue *queue) { struct net_device *dev = queue->vif->dev; @@ -78,12 +95,8 @@ int xenvif_poll(struct napi_struct *napi, int budget) /* This vif is rogue, we pretend we've there is nothing to do * for this vif to deschedule it from NAPI. But this interface * will be turned off in thread context later. - * Also, if a guest doesn't post enough slots to receive data on one of - * its queues, the carrier goes down and NAPI is descheduled here so - * the guest can't send more packets until it's ready to receive. */ - if (unlikely(queue->vif->disabled || - !netif_carrier_ok(queue->vif->dev))) { + if (unlikely(queue->vif->disabled)) { napi_complete(napi); return 0; } @@ -528,9 +541,6 @@ int xenvif_init_queue(struct xenvif_queue *queue) init_timer(&queue->rx_stalled); - netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll, - XENVIF_NAPI_WEIGHT); - return 0; } @@ -564,6 +574,7 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref, init_waitqueue_head(&queue->wq); init_waitqueue_head(&queue->dealloc_wq); + atomic_set(&queue->inflight_packets, 0); if (tx_evtchn == rx_evtchn) { /* feature-split-event-channels == 0 */ @@ -618,6 +629,9 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref, wake_up_process(queue->task); wake_up_process(queue->dealloc_task); + netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll, + XENVIF_NAPI_WEIGHT); + return 0; err_rx_unbind: @@ -646,25 +660,6 @@ void xenvif_carrier_off(struct xenvif *vif) rtnl_unlock(); } -static void xenvif_wait_unmap_timeout(struct xenvif_queue *queue, - unsigned int worst_case_skb_lifetime) -{ - int i, unmap_timeout = 0; - - for (i = 0; i < MAX_PENDING_REQS; ++i) { - if (queue->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) { - unmap_timeout++; - schedule_timeout(msecs_to_jiffies(1000)); - if (unmap_timeout > worst_case_skb_lifetime && - net_ratelimit()) - netdev_err(queue->vif->dev, - "Page still granted! Index: %x\n", - i); - i = -1; - } - } -} - void xenvif_disconnect(struct xenvif *vif) { struct xenvif_queue *queue = NULL; @@ -676,6 +671,8 @@ void xenvif_disconnect(struct xenvif *vif) for (queue_index = 0; queue_index < num_queues; ++queue_index) { queue = &vif->queues[queue_index]; + netif_napi_del(&queue->napi); + if (queue->task) { del_timer_sync(&queue->rx_stalled); kthread_stop(queue->task); @@ -708,7 +705,6 @@ void xenvif_disconnect(struct xenvif *vif) void xenvif_deinit_queue(struct xenvif_queue *queue) { free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages); - netif_napi_del(&queue->napi); } void xenvif_free(struct xenvif *vif) @@ -716,21 +712,11 @@ void xenvif_free(struct xenvif *vif) struct xenvif_queue *queue = NULL; unsigned int num_queues = vif->num_queues; unsigned int queue_index; - /* Here we want to avoid timeout messages if an skb can be legitimately - * stuck somewhere else. Realistically this could be an another vif's - * internal or QDisc queue. That another vif also has this - * rx_drain_timeout_msecs timeout, so give it time to drain out. - * Although if that other guest wakes up just before its timeout happens - * and takes only one skb from QDisc, it can hold onto other skbs for a - * longer period. - */ - unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000); unregister_netdev(vif->dev); for (queue_index = 0; queue_index < num_queues; ++queue_index) { queue = &vif->queues[queue_index]; - xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime); xenvif_deinit_queue(queue); } diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index aa20933..08f6599 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1525,10 +1525,12 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s /* remove traces of mapped pages and frag_list */ skb_frag_list_init(skb); uarg = skb_shinfo(skb)->destructor_arg; + /* increase inflight counter to offset decrement in callback */ + atomic_inc(&queue->inflight_packets); uarg->callback(uarg, true); skb_shinfo(skb)->destructor_arg = NULL; - skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + xenvif_skb_zerocopy_prepare(queue, nskb); kfree_skb(nskb); return 0; @@ -1589,7 +1591,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) if (net_ratelimit()) netdev_err(queue->vif->dev, "Not enough memory to consolidate frag_list!\n"); - skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + xenvif_skb_zerocopy_prepare(queue, skb); kfree_skb(skb); continue; } @@ -1609,7 +1611,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) "Can't setup checksum in net_tx_action\n"); /* We have to set this flag to trigger the callback */ if (skb_shinfo(skb)->destructor_arg) - skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + xenvif_skb_zerocopy_prepare(queue, skb); kfree_skb(skb); continue; } @@ -1641,7 +1643,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) * skb. E.g. the __pskb_pull_tail earlier can do such thing. */ if (skb_shinfo(skb)->destructor_arg) { - skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; + xenvif_skb_zerocopy_prepare(queue, skb); queue->stats.tx_zerocopy_sent++; } @@ -1681,6 +1683,7 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success) queue->stats.tx_zerocopy_success++; else queue->stats.tx_zerocopy_fail++; + xenvif_skb_zerocopy_complete(queue); } static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) @@ -2025,9 +2028,15 @@ int xenvif_kthread_guest_rx(void *data) * context so we defer it here, if this thread is * associated with queue 0. */ - if (unlikely(queue->vif->disabled && queue->id == 0)) + if (unlikely(queue->vif->disabled && queue->id == 0)) { xenvif_carrier_off(queue->vif); - else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT, + } else if (unlikely(queue->vif->disabled)) { + /* kthread_stop() would be called upon this thread soon, + * be a bit proactive + */ + skb_queue_purge(&queue->rx_queue); + queue->rx_last_skb_slots = 0; + } else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status))) { xenvif_rx_purge_event(queue); } else if (!netif_carrier_ok(queue->vif->dev)) { @@ -2052,15 +2061,24 @@ int xenvif_kthread_guest_rx(void *data) return 0; } +static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue) +{ + /* Dealloc thread must remain running until all inflight + * packets complete. + */ + return kthread_should_stop() && + !atomic_read(&queue->inflight_packets); +} + int xenvif_dealloc_kthread(void *data) { struct xenvif_queue *queue = data; - while (!kthread_should_stop()) { + for (;;) { wait_event_interruptible(queue->dealloc_wq, tx_dealloc_work_todo(queue) || - kthread_should_stop()); - if (kthread_should_stop()) + xenvif_dealloc_kthread_should_stop(queue)); + if (xenvif_dealloc_kthread_should_stop(queue)) break; xenvif_tx_dealloc_action(queue); diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 580517d..9c47b89 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -116,6 +116,7 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v) } #define XENVIF_KICK_STR "kick" +#define BUFFER_SIZE 32 static ssize_t xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count, @@ -124,22 +125,24 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count, struct xenvif_queue *queue = ((struct seq_file *)filp->private_data)->private; int len; - char write[sizeof(XENVIF_KICK_STR)]; + char write[BUFFER_SIZE]; /* don't allow partial writes and check the length */ if (*ppos != 0) return 0; - if (count < sizeof(XENVIF_KICK_STR) - 1) + if (count >= sizeof(write)) return -ENOSPC; len = simple_write_to_buffer(write, - sizeof(write), + sizeof(write) - 1, ppos, buf, count); if (len < 0) return len; + write[len] = '\0'; + if (!strncmp(write, XENVIF_KICK_STR, sizeof(XENVIF_KICK_STR) - 1)) xenvif_interrupt(0, (void *)queue); else { @@ -171,10 +174,9 @@ static const struct file_operations xenvif_dbg_io_ring_ops_fops = { .write = xenvif_write_io_ring, }; -static void xenvif_debugfs_addif(struct xenvif_queue *queue) +static void xenvif_debugfs_addif(struct xenvif *vif) { struct dentry *pfile; - struct xenvif *vif = queue->vif; int i; if (IS_ERR_OR_NULL(xen_netback_dbg_root)) @@ -733,10 +735,11 @@ static void connect(struct backend_info *be) be->vif->num_queues = queue_index; goto err; } + } + #ifdef CONFIG_DEBUG_FS - xenvif_debugfs_addif(queue); + xenvif_debugfs_addif(be->vif); #endif /* CONFIG_DEBUG_FS */ - } /* Initialisation completed, tell core driver the number of * active queues. diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 28204bc..ca82f54 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -628,9 +628,10 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) slots = DIV_ROUND_UP(offset + len, PAGE_SIZE) + xennet_count_skb_frag_slots(skb); if (unlikely(slots > MAX_SKB_FRAGS + 1)) { - net_alert_ratelimited( - "xennet: skb rides the rocket: %d slots\n", slots); - goto drop; + net_dbg_ratelimited("xennet: skb rides the rocket: %d slots, %d bytes\n", + slots, skb->len); + if (skb_linearize(skb)) + goto drop; } spin_lock_irqsave(&queue->tx_lock, flags); diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index 2dcb054..5160c4e 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -9,7 +9,8 @@ menu "Device Tree and Open Firmware support" config OF_SELFTEST bool "Device Tree Runtime self tests" - depends on OF_IRQ + depends on OF_IRQ && OF_EARLY_FLATTREE + select OF_DYNAMIC help This option builds in test cases for the device tree infrastructure that are executed once at boot time, and the results dumped to the diff --git a/drivers/of/Makefile b/drivers/of/Makefile index 099b1fb..2b6a7b1 100644 --- a/drivers/of/Makefile +++ b/drivers/of/Makefile @@ -1,11 +1,13 @@ obj-y = base.o device.o platform.o +obj-$(CONFIG_OF_DYNAMIC) += dynamic.o obj-$(CONFIG_OF_FLATTREE) += fdt.o obj-$(CONFIG_OF_EARLY_FLATTREE) += fdt_address.o obj-$(CONFIG_OF_PROMTREE) += pdt.o obj-$(CONFIG_OF_ADDRESS) += address.o obj-$(CONFIG_OF_IRQ) += irq.o obj-$(CONFIG_OF_NET) += of_net.o -obj-$(CONFIG_OF_SELFTEST) += selftest.o +obj-$(CONFIG_OF_SELFTEST) += of_selftest.o +of_selftest-objs := selftest.o testcase-data/testcases.dtb.o obj-$(CONFIG_OF_MDIO) += of_mdio.o obj-$(CONFIG_OF_PCI) += of_pci.o obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o diff --git a/drivers/of/base.c b/drivers/of/base.c index b986480..d8574ad 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -17,6 +17,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include <linux/console.h> #include <linux/ctype.h> #include <linux/cpu.h> #include <linux/module.h> @@ -35,15 +36,17 @@ struct device_node *of_allnodes; EXPORT_SYMBOL(of_allnodes); struct device_node *of_chosen; struct device_node *of_aliases; -static struct device_node *of_stdout; +struct device_node *of_stdout; -static struct kset *of_kset; +struct kset *of_kset; /* - * Used to protect the of_aliases; but also overloaded to hold off addition of - * nodes to sysfs + * Used to protect the of_aliases, to hold off addition of nodes to sysfs. + * This mutex must be held whenever modifications are being made to the + * device tree. The of_{attach,detach}_node() and + * of_{add,remove,update}_property() helpers make sure this happens. */ -DEFINE_MUTEX(of_aliases_mutex); +DEFINE_MUTEX(of_mutex); /* use when traversing tree through the allnext, child, sibling, * or parent members of struct device_node. @@ -89,79 +92,7 @@ int __weak of_node_to_nid(struct device_node *np) } #endif -#if defined(CONFIG_OF_DYNAMIC) -/** - * of_node_get - Increment refcount of a node - * @node: Node to inc refcount, NULL is supported to - * simplify writing of callers - * - * Returns node. - */ -struct device_node *of_node_get(struct device_node *node) -{ - if (node) - kobject_get(&node->kobj); - return node; -} -EXPORT_SYMBOL(of_node_get); - -static inline struct device_node *kobj_to_device_node(struct kobject *kobj) -{ - return container_of(kobj, struct device_node, kobj); -} - -/** - * of_node_release - release a dynamically allocated node - * @kref: kref element of the node to be released - * - * In of_node_put() this function is passed to kref_put() - * as the destructor. - */ -static void of_node_release(struct kobject *kobj) -{ - struct device_node *node = kobj_to_device_node(kobj); - struct property *prop = node->properties; - - /* We should never be releasing nodes that haven't been detached. */ - if (!of_node_check_flag(node, OF_DETACHED)) { - pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name); - dump_stack(); - return; - } - - if (!of_node_check_flag(node, OF_DYNAMIC)) - return; - - while (prop) { - struct property *next = prop->next; - kfree(prop->name); - kfree(prop->value); - kfree(prop); - prop = next; - - if (!prop) { - prop = node->deadprops; - node->deadprops = NULL; - } - } - kfree(node->full_name); - kfree(node->data); - kfree(node); -} - -/** - * of_node_put - Decrement refcount of a node - * @node: Node to dec refcount, NULL is supported to - * simplify writing of callers - * - */ -void of_node_put(struct device_node *node) -{ - if (node) - kobject_put(&node->kobj); -} -EXPORT_SYMBOL(of_node_put); -#else +#ifndef CONFIG_OF_DYNAMIC static void of_node_release(struct kobject *kobj) { /* Without CONFIG_OF_DYNAMIC, no nodes gets freed */ @@ -200,13 +131,16 @@ static const char *safe_name(struct kobject *kobj, const char *orig_name) return name; } -static int __of_add_property_sysfs(struct device_node *np, struct property *pp) +int __of_add_property_sysfs(struct device_node *np, struct property *pp) { int rc; /* Important: Don't leak passwords */ bool secure = strncmp(pp->name, "security-", 9) == 0; + if (!of_kset || !of_node_is_attached(np)) + return 0; + sysfs_bin_attr_init(&pp->attr); pp->attr.attr.name = safe_name(&np->kobj, pp->name); pp->attr.attr.mode = secure ? S_IRUSR : S_IRUGO; @@ -218,12 +152,15 @@ static int __of_add_property_sysfs(struct device_node *np, struct property *pp) return rc; } -static int __of_node_add(struct device_node *np) +int __of_attach_node_sysfs(struct device_node *np) { const char *name; struct property *pp; int rc; + if (!of_kset) + return 0; + np->kobj.kset = of_kset; if (!np->parent) { /* Nodes without parents are new top level trees */ @@ -245,59 +182,20 @@ static int __of_node_add(struct device_node *np) return 0; } -int of_node_add(struct device_node *np) -{ - int rc = 0; - - BUG_ON(!of_node_is_initialized(np)); - - /* - * Grab the mutex here so that in a race condition between of_init() and - * of_node_add(), node addition will still be consistent. - */ - mutex_lock(&of_aliases_mutex); - if (of_kset) - rc = __of_node_add(np); - else - /* This scenario may be perfectly valid, but report it anyway */ - pr_info("of_node_add(%s) before of_init()\n", np->full_name); - mutex_unlock(&of_aliases_mutex); - return rc; -} - -#if defined(CONFIG_OF_DYNAMIC) -static void of_node_remove(struct device_node *np) -{ - struct property *pp; - - BUG_ON(!of_node_is_initialized(np)); - - /* only remove properties if on sysfs */ - if (of_node_is_attached(np)) { - for_each_property_of_node(np, pp) - sysfs_remove_bin_file(&np->kobj, &pp->attr); - kobject_del(&np->kobj); - } - - /* finally remove the kobj_init ref */ - of_node_put(np); -} -#endif - static int __init of_init(void) { struct device_node *np; /* Create the kset, and register existing nodes */ - mutex_lock(&of_aliases_mutex); + mutex_lock(&of_mutex); of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj); if (!of_kset) { - mutex_unlock(&of_aliases_mutex); + mutex_unlock(&of_mutex); return -ENOMEM; } for_each_of_allnodes(np) - __of_node_add(np); - mutex_unlock(&of_aliases_mutex); + __of_attach_node_sysfs(np); + mutex_unlock(&of_mutex); /* Symlink in /proc as required by userspace ABI */ if (of_allnodes) @@ -369,8 +267,8 @@ EXPORT_SYMBOL(of_find_all_nodes); * Find a property with a given name for a given node * and return the value. */ -static const void *__of_get_property(const struct device_node *np, - const char *name, int *lenp) +const void *__of_get_property(const struct device_node *np, + const char *name, int *lenp) { struct property *pp = __of_find_property(np, name, lenp); @@ -1748,32 +1646,10 @@ int of_count_phandle_with_args(const struct device_node *np, const char *list_na } EXPORT_SYMBOL(of_count_phandle_with_args); -#if defined(CONFIG_OF_DYNAMIC) -static int of_property_notify(int action, struct device_node *np, - struct property *prop) -{ - struct of_prop_reconfig pr; - - /* only call notifiers if the node is attached */ - if (!of_node_is_attached(np)) - return 0; - - pr.dn = np; - pr.prop = prop; - return of_reconfig_notify(action, &pr); -} -#else -static int of_property_notify(int action, struct device_node *np, - struct property *prop) -{ - return 0; -} -#endif - /** * __of_add_property - Add a property to a node without lock operations */ -static int __of_add_property(struct device_node *np, struct property *prop) +int __of_add_property(struct device_node *np, struct property *prop) { struct property **next; @@ -1799,22 +1675,49 @@ int of_add_property(struct device_node *np, struct property *prop) unsigned long flags; int rc; - rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop); - if (rc) - return rc; + mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); rc = __of_add_property(np, prop); raw_spin_unlock_irqrestore(&devtree_lock, flags); - if (rc) - return rc; - if (of_node_is_attached(np)) + if (!rc) __of_add_property_sysfs(np, prop); + mutex_unlock(&of_mutex); + + if (!rc) + of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop, NULL); + return rc; } +int __of_remove_property(struct device_node *np, struct property *prop) +{ + struct property **next; + + for (next = &np->properties; *next; next = &(*next)->next) { + if (*next == prop) + break; + } + if (*next == NULL) + return -ENODEV; + + /* found the node */ + *next = prop->next; + prop->next = np->deadprops; + np->deadprops = prop; + + return 0; +} + +void __of_remove_property_sysfs(struct device_node *np, struct property *prop) +{ + /* at early boot, bail here and defer setup to of_init() */ + if (of_kset && of_node_is_attached(np)) + sysfs_remove_bin_file(&np->kobj, &prop->attr); +} + /** * of_remove_property - Remove a property from a node. * @@ -1825,211 +1728,98 @@ int of_add_property(struct device_node *np, struct property *prop) */ int of_remove_property(struct device_node *np, struct property *prop) { - struct property **next; unsigned long flags; - int found = 0; int rc; - rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop); - if (rc) - return rc; + mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - while (*next) { - if (*next == prop) { - /* found the node */ - *next = prop->next; - prop->next = np->deadprops; - np->deadprops = prop; - found = 1; - break; - } - next = &(*next)->next; - } + rc = __of_remove_property(np, prop); raw_spin_unlock_irqrestore(&devtree_lock, flags); - if (!found) - return -ENODEV; + if (!rc) + __of_remove_property_sysfs(np, prop); - /* at early boot, bail hear and defer setup to of_init() */ - if (!of_kset) - return 0; + mutex_unlock(&of_mutex); - sysfs_remove_bin_file(&np->kobj, &prop->attr); + if (!rc) + of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop, NULL); - return 0; + return rc; } -/* - * of_update_property - Update a property in a node, if the property does - * not exist, add it. - * - * Note that we don't actually remove it, since we have given out - * who-knows-how-many pointers to the data using get-property. - * Instead we just move the property to the "dead properties" list, - * and add the new property to the property list - */ -int of_update_property(struct device_node *np, struct property *newprop) +int __of_update_property(struct device_node *np, struct property *newprop, + struct property **oldpropp) { struct property **next, *oldprop; - unsigned long flags; - int rc; - - rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop); - if (rc) - return rc; - if (!newprop->name) - return -EINVAL; + for (next = &np->properties; *next; next = &(*next)->next) { + if (of_prop_cmp((*next)->name, newprop->name) == 0) + break; + } + *oldpropp = oldprop = *next; - raw_spin_lock_irqsave(&devtree_lock, flags); - next = &np->properties; - oldprop = __of_find_property(np, newprop->name, NULL); - if (!oldprop) { - /* add the new node */ - rc = __of_add_property(np, newprop); - } else while (*next) { + if (oldprop) { /* replace the node */ - if (*next == oldprop) { - newprop->next = oldprop->next; - *next = newprop; - oldprop->next = np->deadprops; - np->deadprops = oldprop; - break; - } - next = &(*next)->next; + newprop->next = oldprop->next; + *next = newprop; + oldprop->next = np->deadprops; + np->deadprops = oldprop; + } else { + /* new node */ + newprop->next = NULL; + *next = newprop; } - raw_spin_unlock_irqrestore(&devtree_lock, flags); - if (rc) - return rc; + return 0; +} + +void __of_update_property_sysfs(struct device_node *np, struct property *newprop, + struct property *oldprop) +{ /* At early boot, bail out and defer setup to of_init() */ if (!of_kset) - return 0; + return; - /* Update the sysfs attribute */ if (oldprop) sysfs_remove_bin_file(&np->kobj, &oldprop->attr); __of_add_property_sysfs(np, newprop); - - return 0; } -#if defined(CONFIG_OF_DYNAMIC) /* - * Support for dynamic device trees. + * of_update_property - Update a property in a node, if the property does + * not exist, add it. * - * On some platforms, the device tree can be manipulated at runtime. - * The routines in this section support adding, removing and changing - * device tree nodes. - */ - -static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain); - -int of_reconfig_notifier_register(struct notifier_block *nb) -{ - return blocking_notifier_chain_register(&of_reconfig_chain, nb); -} -EXPORT_SYMBOL_GPL(of_reconfig_notifier_register); - -int of_reconfig_notifier_unregister(struct notifier_block *nb) -{ - return blocking_notifier_chain_unregister(&of_reconfig_chain, nb); -} -EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister); - -int of_reconfig_notify(unsigned long action, void *p) -{ - int rc; - - rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p); - return notifier_to_errno(rc); -} - -/** - * of_attach_node - Plug a device node into the tree and global list. + * Note that we don't actually remove it, since we have given out + * who-knows-how-many pointers to the data using get-property. + * Instead we just move the property to the "dead properties" list, + * and add the new property to the property list */ -int of_attach_node(struct device_node *np) +int of_update_property(struct device_node *np, struct property *newprop) { + struct property *oldprop; unsigned long flags; int rc; - rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np); - if (rc) - return rc; - - raw_spin_lock_irqsave(&devtree_lock, flags); - np->sibling = np->parent->child; - np->allnext = np->parent->allnext; - np->parent->allnext = np; - np->parent->child = np; - of_node_clear_flag(np, OF_DETACHED); - raw_spin_unlock_irqrestore(&devtree_lock, flags); - - of_node_add(np); - return 0; -} - -/** - * of_detach_node - "Unplug" a node from the device tree. - * - * The caller must hold a reference to the node. The memory associated with - * the node is not freed until its refcount goes to zero. - */ -int of_detach_node(struct device_node *np) -{ - struct device_node *parent; - unsigned long flags; - int rc = 0; + if (!newprop->name) + return -EINVAL; - rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); - if (rc) - return rc; + mutex_lock(&of_mutex); raw_spin_lock_irqsave(&devtree_lock, flags); + rc = __of_update_property(np, newprop, &oldprop); + raw_spin_unlock_irqrestore(&devtree_lock, flags); - if (of_node_check_flag(np, OF_DETACHED)) { - /* someone already detached it */ - raw_spin_unlock_irqrestore(&devtree_lock, flags); - return rc; - } - - parent = np->parent; - if (!parent) { - raw_spin_unlock_irqrestore(&devtree_lock, flags); - return rc; - } + if (!rc) + __of_update_property_sysfs(np, newprop, oldprop); - if (of_allnodes == np) - of_allnodes = np->allnext; - else { - struct device_node *prev; - for (prev = of_allnodes; - prev->allnext != np; - prev = prev->allnext) - ; - prev->allnext = np->allnext; - } + mutex_unlock(&of_mutex); - if (parent->child == np) - parent->child = np->sibling; - else { - struct device_node *prevsib; - for (prevsib = np->parent->child; - prevsib->sibling != np; - prevsib = prevsib->sibling) - ; - prevsib->sibling = np->sibling; - } - - of_node_set_flag(np, OF_DETACHED); - raw_spin_unlock_irqrestore(&devtree_lock, flags); + if (!rc) + of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop, oldprop); - of_node_remove(np); return rc; } -#endif /* defined(CONFIG_OF_DYNAMIC) */ static void of_alias_add(struct alias_prop *ap, struct device_node *np, int id, const char *stem, int stem_len) @@ -2062,9 +1852,12 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align)) of_chosen = of_find_node_by_path("/chosen@0"); if (of_chosen) { + /* linux,stdout-path and /aliases/stdout are for legacy compatibility */ const char *name = of_get_property(of_chosen, "stdout-path", NULL); if (!name) name = of_get_property(of_chosen, "linux,stdout-path", NULL); + if (IS_ENABLED(CONFIG_PPC) && !name) + name = of_get_property(of_aliases, "stdout", NULL); if (name) of_stdout = of_find_node_by_path(name); } @@ -2122,7 +1915,7 @@ int of_alias_get_id(struct device_node *np, const char *stem) struct alias_prop *app; int id = -ENODEV; - mutex_lock(&of_aliases_mutex); + mutex_lock(&of_mutex); list_for_each_entry(app, &aliases_lookup, link) { if (strcmp(app->stem, stem) != 0) continue; @@ -2132,7 +1925,7 @@ int of_alias_get_id(struct device_node *np, const char *stem) break; } } - mutex_unlock(&of_aliases_mutex); + mutex_unlock(&of_mutex); return id; } @@ -2180,20 +1973,22 @@ const char *of_prop_next_string(struct property *prop, const char *cur) EXPORT_SYMBOL_GPL(of_prop_next_string); /** - * of_device_is_stdout_path - check if a device node matches the - * linux,stdout-path property - * - * Check if this device node matches the linux,stdout-path property - * in the chosen node. return true if yes, false otherwise. + * of_console_check() - Test and setup console for DT setup + * @dn - Pointer to device node + * @name - Name to use for preferred console without index. ex. "ttyS" + * @index - Index to use for preferred console. + * + * Check if the given device node matches the stdout-path property in the + * /chosen node. If it does then register it as the preferred console and return + * TRUE. Otherwise return FALSE. */ -int of_device_is_stdout_path(struct device_node *dn) +bool of_console_check(struct device_node *dn, char *name, int index) { - if (!of_stdout) + if (!dn || dn != of_stdout || console_set_on_cmdline) return false; - - return of_stdout == dn; + return add_preferred_console(name, index, NULL); } -EXPORT_SYMBOL_GPL(of_device_is_stdout_path); +EXPORT_SYMBOL_GPL(of_console_check); /** * of_find_next_cache_node - Find a node's subsidiary cache diff --git a/drivers/of/device.c b/drivers/of/device.c index dafb973..46d6c75c 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -160,7 +160,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env) add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen); seen = 0; - mutex_lock(&of_aliases_mutex); + mutex_lock(&of_mutex); list_for_each_entry(app, &aliases_lookup, link) { if (dev->of_node == app->np) { add_uevent_var(env, "OF_ALIAS_%d=%s", seen, @@ -168,7 +168,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env) seen++; } } - mutex_unlock(&of_aliases_mutex); + mutex_unlock(&of_mutex); } int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c new file mode 100644 index 0000000..54fecc4 --- /dev/null +++ b/drivers/of/dynamic.c @@ -0,0 +1,660 @@ +/* + * Support for dynamic device trees. + * + * On some platforms, the device tree can be manipulated at runtime. + * The routines in this section support adding, removing and changing + * device tree nodes. + */ + +#include <linux/of.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/proc_fs.h> + +#include "of_private.h" + +/** + * of_node_get() - Increment refcount of a node + * @node: Node to inc refcount, NULL is supported to simplify writing of + * callers + * + * Returns node. + */ +struct device_node *of_node_get(struct device_node *node) +{ + if (node) + kobject_get(&node->kobj); + return node; +} +EXPORT_SYMBOL(of_node_get); + +/** + * of_node_put() - Decrement refcount of a node + * @node: Node to dec refcount, NULL is supported to simplify writing of + * callers + */ +void of_node_put(struct device_node *node) +{ + if (node) + kobject_put(&node->kobj); +} +EXPORT_SYMBOL(of_node_put); + +void __of_detach_node_sysfs(struct device_node *np) +{ + struct property *pp; + + BUG_ON(!of_node_is_initialized(np)); + if (!of_kset) + return; + + /* only remove properties if on sysfs */ + if (of_node_is_attached(np)) { + for_each_property_of_node(np, pp) + sysfs_remove_bin_file(&np->kobj, &pp->attr); + kobject_del(&np->kobj); + } + + /* finally remove the kobj_init ref */ + of_node_put(np); +} + +static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain); + +int of_reconfig_notifier_register(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&of_reconfig_chain, nb); +} +EXPORT_SYMBOL_GPL(of_reconfig_notifier_register); + +int of_reconfig_notifier_unregister(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&of_reconfig_chain, nb); +} +EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister); + +int of_reconfig_notify(unsigned long action, void *p) +{ + int rc; + + rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p); + return notifier_to_errno(rc); +} + +int of_property_notify(int action, struct device_node *np, + struct property *prop, struct property *oldprop) +{ + struct of_prop_reconfig pr; + + /* only call notifiers if the node is attached */ + if (!of_node_is_attached(np)) + return 0; + + pr.dn = np; + pr.prop = prop; + pr.old_prop = oldprop; + return of_reconfig_notify(action, &pr); +} + +void __of_attach_node(struct device_node *np) +{ + const __be32 *phandle; + int sz; + + np->name = __of_get_property(np, "name", NULL) ? : "<NULL>"; + np->type = __of_get_property(np, "device_type", NULL) ? : "<NULL>"; + + phandle = __of_get_property(np, "phandle", &sz); + if (!phandle) + phandle = __of_get_property(np, "linux,phandle", &sz); + if (IS_ENABLED(PPC_PSERIES) && !phandle) + phandle = __of_get_property(np, "ibm,phandle", &sz); + np->phandle = (phandle && (sz >= 4)) ? be32_to_cpup(phandle) : 0; + + np->child = NULL; + np->sibling = np->parent->child; + np->allnext = np->parent->allnext; + np->parent->allnext = np; + np->parent->child = np; + of_node_clear_flag(np, OF_DETACHED); +} + +/** + * of_attach_node() - Plug a device node into the tree and global list. + */ +int of_attach_node(struct device_node *np) +{ + unsigned long flags; + + mutex_lock(&of_mutex); + raw_spin_lock_irqsave(&devtree_lock, flags); + __of_attach_node(np); + raw_spin_unlock_irqrestore(&devtree_lock, flags); + + __of_attach_node_sysfs(np); + mutex_unlock(&of_mutex); + + of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np); + + return 0; +} + +void __of_detach_node(struct device_node *np) +{ + struct device_node *parent; + + if (WARN_ON(of_node_check_flag(np, OF_DETACHED))) + return; + + parent = np->parent; + if (WARN_ON(!parent)) + return; + + if (of_allnodes == np) + of_allnodes = np->allnext; + else { + struct device_node *prev; + for (prev = of_allnodes; + prev->allnext != np; + prev = prev->allnext) + ; + prev->allnext = np->allnext; + } + + if (parent->child == np) + parent->child = np->sibling; + else { + struct device_node *prevsib; + for (prevsib = np->parent->child; + prevsib->sibling != np; + prevsib = prevsib->sibling) + ; + prevsib->sibling = np->sibling; + } + + of_node_set_flag(np, OF_DETACHED); +} + +/** + * of_detach_node() - "Unplug" a node from the device tree. + * + * The caller must hold a reference to the node. The memory associated with + * the node is not freed until its refcount goes to zero. + */ +int of_detach_node(struct device_node *np) +{ + unsigned long flags; + int rc = 0; + + mutex_lock(&of_mutex); + raw_spin_lock_irqsave(&devtree_lock, flags); + __of_detach_node(np); + raw_spin_unlock_irqrestore(&devtree_lock, flags); + + __of_detach_node_sysfs(np); + mutex_unlock(&of_mutex); + + of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); + + return rc; +} + +/** + * of_node_release() - release a dynamically allocated node + * @kref: kref element of the node to be released + * + * In of_node_put() this function is passed to kref_put() as the destructor. + */ +void of_node_release(struct kobject *kobj) +{ + struct device_node *node = kobj_to_device_node(kobj); + struct property *prop = node->properties; + + /* We should never be releasing nodes that haven't been detached. */ + if (!of_node_check_flag(node, OF_DETACHED)) { + pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name); + dump_stack(); + return; + } + + if (!of_node_check_flag(node, OF_DYNAMIC)) + return; + + while (prop) { + struct property *next = prop->next; + kfree(prop->name); + kfree(prop->value); + kfree(prop); + prop = next; + + if (!prop) { + prop = node->deadprops; + node->deadprops = NULL; + } + } + kfree(node->full_name); + kfree(node->data); + kfree(node); +} + +/** + * __of_prop_dup - Copy a property dynamically. + * @prop: Property to copy + * @allocflags: Allocation flags (typically pass GFP_KERNEL) + * + * Copy a property by dynamically allocating the memory of both the + * property stucture and the property name & contents. The property's + * flags have the OF_DYNAMIC bit set so that we can differentiate between + * dynamically allocated properties and not. + * Returns the newly allocated property or NULL on out of memory error. + */ +struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags) +{ + struct property *new; + + new = kzalloc(sizeof(*new), allocflags); + if (!new) + return NULL; + + /* + * NOTE: There is no check for zero length value. + * In case of a boolean property, this will allocate a value + * of zero bytes. We do this to work around the use + * of of_get_property() calls on boolean values. + */ + new->name = kstrdup(prop->name, allocflags); + new->value = kmemdup(prop->value, prop->length, allocflags); + new->length = prop->length; + if (!new->name || !new->value) + goto err_free; + + /* mark the property as dynamic */ + of_property_set_flag(new, OF_DYNAMIC); + + return new; + + err_free: + kfree(new->name); + kfree(new->value); + kfree(new); + return NULL; +} + +/** + * __of_node_alloc() - Create an empty device node dynamically. + * @full_name: Full name of the new device node + * @allocflags: Allocation flags (typically pass GFP_KERNEL) + * + * Create an empty device tree node, suitable for further modification. + * The node data are dynamically allocated and all the node flags + * have the OF_DYNAMIC & OF_DETACHED bits set. + * Returns the newly allocated node or NULL on out of memory error. + */ +struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags) +{ + struct device_node *node; + + node = kzalloc(sizeof(*node), allocflags); + if (!node) + return NULL; + + node->full_name = kstrdup(full_name, allocflags); + of_node_set_flag(node, OF_DYNAMIC); + of_node_set_flag(node, OF_DETACHED); + if (!node->full_name) + goto err_free; + + of_node_init(node); + + return node; + + err_free: + kfree(node->full_name); + kfree(node); + return NULL; +} + +static void __of_changeset_entry_destroy(struct of_changeset_entry *ce) +{ + of_node_put(ce->np); + list_del(&ce->node); + kfree(ce); +} + +#ifdef DEBUG +static void __of_changeset_entry_dump(struct of_changeset_entry *ce) +{ + switch (ce->action) { + case OF_RECONFIG_ADD_PROPERTY: + pr_debug("%p: %s %s/%s\n", + ce, "ADD_PROPERTY ", ce->np->full_name, + ce->prop->name); + break; + case OF_RECONFIG_REMOVE_PROPERTY: + pr_debug("%p: %s %s/%s\n", + ce, "REMOVE_PROPERTY", ce->np->full_name, + ce->prop->name); + break; + case OF_RECONFIG_UPDATE_PROPERTY: + pr_debug("%p: %s %s/%s\n", + ce, "UPDATE_PROPERTY", ce->np->full_name, + ce->prop->name); + break; + case OF_RECONFIG_ATTACH_NODE: + pr_debug("%p: %s %s\n", + ce, "ATTACH_NODE ", ce->np->full_name); + break; + case OF_RECONFIG_DETACH_NODE: + pr_debug("%p: %s %s\n", + ce, "DETACH_NODE ", ce->np->full_name); + break; + } +} +#else +static inline void __of_changeset_entry_dump(struct of_changeset_entry *ce) +{ + /* empty */ +} +#endif + +static void __of_changeset_entry_invert(struct of_changeset_entry *ce, + struct of_changeset_entry *rce) +{ + memcpy(rce, ce, sizeof(*rce)); + + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: + rce->action = OF_RECONFIG_DETACH_NODE; + break; + case OF_RECONFIG_DETACH_NODE: + rce->action = OF_RECONFIG_ATTACH_NODE; + break; + case OF_RECONFIG_ADD_PROPERTY: + rce->action = OF_RECONFIG_REMOVE_PROPERTY; + break; + case OF_RECONFIG_REMOVE_PROPERTY: + rce->action = OF_RECONFIG_ADD_PROPERTY; + break; + case OF_RECONFIG_UPDATE_PROPERTY: + rce->old_prop = ce->prop; + rce->prop = ce->old_prop; + break; + } +} + +static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool revert) +{ + struct of_changeset_entry ce_inverted; + int ret; + + if (revert) { + __of_changeset_entry_invert(ce, &ce_inverted); + ce = &ce_inverted; + } + + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: + case OF_RECONFIG_DETACH_NODE: + ret = of_reconfig_notify(ce->action, ce->np); + break; + case OF_RECONFIG_ADD_PROPERTY: + case OF_RECONFIG_REMOVE_PROPERTY: + case OF_RECONFIG_UPDATE_PROPERTY: + ret = of_property_notify(ce->action, ce->np, ce->prop, ce->old_prop); + break; + default: + pr_err("%s: invalid devicetree changeset action: %i\n", __func__, + (int)ce->action); + return; + } + + if (ret) + pr_err("%s: notifier error @%s\n", __func__, ce->np->full_name); +} + +static int __of_changeset_entry_apply(struct of_changeset_entry *ce) +{ + struct property *old_prop, **propp; + unsigned long flags; + int ret = 0; + + __of_changeset_entry_dump(ce); + + raw_spin_lock_irqsave(&devtree_lock, flags); + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: + __of_attach_node(ce->np); + break; + case OF_RECONFIG_DETACH_NODE: + __of_detach_node(ce->np); + break; + case OF_RECONFIG_ADD_PROPERTY: + /* If the property is in deadprops then it must be removed */ + for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) { + if (*propp == ce->prop) { + *propp = ce->prop->next; + ce->prop->next = NULL; + break; + } + } + + ret = __of_add_property(ce->np, ce->prop); + if (ret) { + pr_err("%s: add_property failed @%s/%s\n", + __func__, ce->np->full_name, + ce->prop->name); + break; + } + break; + case OF_RECONFIG_REMOVE_PROPERTY: + ret = __of_remove_property(ce->np, ce->prop); + if (ret) { + pr_err("%s: remove_property failed @%s/%s\n", + __func__, ce->np->full_name, + ce->prop->name); + break; + } + break; + + case OF_RECONFIG_UPDATE_PROPERTY: + /* If the property is in deadprops then it must be removed */ + for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) { + if (*propp == ce->prop) { + *propp = ce->prop->next; + ce->prop->next = NULL; + break; + } + } + + ret = __of_update_property(ce->np, ce->prop, &old_prop); + if (ret) { + pr_err("%s: update_property failed @%s/%s\n", + __func__, ce->np->full_name, + ce->prop->name); + break; + } + break; + default: + ret = -EINVAL; + } + raw_spin_unlock_irqrestore(&devtree_lock, flags); + + if (ret) + return ret; + + switch (ce->action) { + case OF_RECONFIG_ATTACH_NODE: + __of_attach_node_sysfs(ce->np); + break; + case OF_RECONFIG_DETACH_NODE: + __of_detach_node_sysfs(ce->np); + break; + case OF_RECONFIG_ADD_PROPERTY: + /* ignore duplicate names */ + __of_add_property_sysfs(ce->np, ce->prop); + break; + case OF_RECONFIG_REMOVE_PROPERTY: + __of_remove_property_sysfs(ce->np, ce->prop); + break; + case OF_RECONFIG_UPDATE_PROPERTY: + __of_update_property_sysfs(ce->np, ce->prop, ce->old_prop); + break; + } + + return 0; +} + +static inline int __of_changeset_entry_revert(struct of_changeset_entry *ce) +{ + struct of_changeset_entry ce_inverted; + + __of_changeset_entry_invert(ce, &ce_inverted); + return __of_changeset_entry_apply(&ce_inverted); +} + +/** + * of_changeset_init - Initialize a changeset for use + * + * @ocs: changeset pointer + * + * Initialize a changeset structure + */ +void of_changeset_init(struct of_changeset *ocs) +{ + memset(ocs, 0, sizeof(*ocs)); + INIT_LIST_HEAD(&ocs->entries); +} + +/** + * of_changeset_destroy - Destroy a changeset + * + * @ocs: changeset pointer + * + * Destroys a changeset. Note that if a changeset is applied, + * its changes to the tree cannot be reverted. + */ +void of_changeset_destroy(struct of_changeset *ocs) +{ + struct of_changeset_entry *ce, *cen; + + list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node) + __of_changeset_entry_destroy(ce); +} + +/** + * of_changeset_apply - Applies a changeset + * + * @ocs: changeset pointer + * + * Applies a changeset to the live tree. + * Any side-effects of live tree state changes are applied here on + * sucess, like creation/destruction of devices and side-effects + * like creation of sysfs properties and directories. + * Returns 0 on success, a negative error value in case of an error. + * On error the partially applied effects are reverted. + */ +int of_changeset_apply(struct of_changeset *ocs) +{ + struct of_changeset_entry *ce; + int ret; + + /* perform the rest of the work */ + pr_debug("of_changeset: applying...\n"); + list_for_each_entry(ce, &ocs->entries, node) { + ret = __of_changeset_entry_apply(ce); + if (ret) { + pr_err("%s: Error applying changeset (%d)\n", __func__, ret); + list_for_each_entry_continue_reverse(ce, &ocs->entries, node) + __of_changeset_entry_revert(ce); + return ret; + } + } + pr_debug("of_changeset: applied, emitting notifiers.\n"); + + /* drop the global lock while emitting notifiers */ + mutex_unlock(&of_mutex); + list_for_each_entry(ce, &ocs->entries, node) + __of_changeset_entry_notify(ce, 0); + mutex_lock(&of_mutex); + pr_debug("of_changeset: notifiers sent.\n"); + + return 0; +} + +/** + * of_changeset_revert - Reverts an applied changeset + * + * @ocs: changeset pointer + * + * Reverts a changeset returning the state of the tree to what it + * was before the application. + * Any side-effects like creation/destruction of devices and + * removal of sysfs properties and directories are applied. + * Returns 0 on success, a negative error value in case of an error. + */ +int of_changeset_revert(struct of_changeset *ocs) +{ + struct of_changeset_entry *ce; + int ret; + + pr_debug("of_changeset: reverting...\n"); + list_for_each_entry_reverse(ce, &ocs->entries, node) { + ret = __of_changeset_entry_revert(ce); + if (ret) { + pr_err("%s: Error reverting changeset (%d)\n", __func__, ret); + list_for_each_entry_continue(ce, &ocs->entries, node) + __of_changeset_entry_apply(ce); + return ret; + } + } + pr_debug("of_changeset: reverted, emitting notifiers.\n"); + + /* drop the global lock while emitting notifiers */ + mutex_unlock(&of_mutex); + list_for_each_entry_reverse(ce, &ocs->entries, node) + __of_changeset_entry_notify(ce, 1); + mutex_lock(&of_mutex); + pr_debug("of_changeset: notifiers sent.\n"); + + return 0; +} + +/** + * of_changeset_action - Perform a changeset action + * + * @ocs: changeset pointer + * @action: action to perform + * @np: Pointer to device node + * @prop: Pointer to property + * + * On action being one of: + * + OF_RECONFIG_ATTACH_NODE + * + OF_RECONFIG_DETACH_NODE, + * + OF_RECONFIG_ADD_PROPERTY + * + OF_RECONFIG_REMOVE_PROPERTY, + * + OF_RECONFIG_UPDATE_PROPERTY + * Returns 0 on success, a negative error value in case of an error. + */ +int of_changeset_action(struct of_changeset *ocs, unsigned long action, + struct device_node *np, struct property *prop) +{ + struct of_changeset_entry *ce; + + ce = kzalloc(sizeof(*ce), GFP_KERNEL); + if (!ce) { + pr_err("%s: Failed to allocate\n", __func__); + return -ENOMEM; + } + /* get a reference to the node */ + ce->action = action; + ce->np = of_node_get(np); + ce->prop = prop; + + if (action == OF_RECONFIG_UPDATE_PROPERTY && prop) + ce->old_prop = of_find_property(np, prop->name, NULL); + + /* add it to the list */ + list_add_tail(&ce->node, &ocs->entries); + return 0; +} diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 9aa012e..f46a24f 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -923,24 +923,24 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, } #ifdef CONFIG_HAVE_MEMBLOCK +#define MAX_PHYS_ADDR ((phys_addr_t)~0) + void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) { const u64 phys_offset = __pa(PAGE_OFFSET); base &= PAGE_MASK; size &= PAGE_MASK; - if (sizeof(phys_addr_t) < sizeof(u64)) { - if (base > ULONG_MAX) { - pr_warning("Ignoring memory block 0x%llx - 0x%llx\n", - base, base + size); - return; - } + if (base > MAX_PHYS_ADDR) { + pr_warning("Ignoring memory block 0x%llx - 0x%llx\n", + base, base + size); + return; + } - if (base + size > ULONG_MAX) { - pr_warning("Ignoring memory range 0x%lx - 0x%llx\n", - ULONG_MAX, base + size); - size = ULONG_MAX - base; - } + if (base + size > MAX_PHYS_ADDR) { + pr_warning("Ignoring memory range 0x%lx - 0x%llx\n", + ULONG_MAX, base + size); + size = MAX_PHYS_ADDR - base; } if (base + size < phys_offset) { diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index ff350c8..858e0a5 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -31,6 +31,63 @@ struct alias_prop { char stem[0]; }; -extern struct mutex of_aliases_mutex; +extern struct mutex of_mutex; extern struct list_head aliases_lookup; +extern struct kset *of_kset; + + +static inline struct device_node *kobj_to_device_node(struct kobject *kobj) +{ + return container_of(kobj, struct device_node, kobj); +} + +#if defined(CONFIG_OF_DYNAMIC) +extern int of_property_notify(int action, struct device_node *np, + struct property *prop, struct property *old_prop); +extern void of_node_release(struct kobject *kobj); +#else /* CONFIG_OF_DYNAMIC */ +static inline int of_property_notify(int action, struct device_node *np, + struct property *prop, struct property *old_prop) +{ + return 0; +} +#endif /* CONFIG_OF_DYNAMIC */ + +/** + * General utilities for working with live trees. + * + * All functions with two leading underscores operate + * without taking node references, so you either have to + * own the devtree lock or work on detached trees only. + */ +struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags); +struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags); + +extern const void *__of_get_property(const struct device_node *np, + const char *name, int *lenp); +extern int __of_add_property(struct device_node *np, struct property *prop); +extern int __of_add_property_sysfs(struct device_node *np, + struct property *prop); +extern int __of_remove_property(struct device_node *np, struct property *prop); +extern void __of_remove_property_sysfs(struct device_node *np, + struct property *prop); +extern int __of_update_property(struct device_node *np, + struct property *newprop, struct property **oldprop); +extern void __of_update_property_sysfs(struct device_node *np, + struct property *newprop, struct property *oldprop); + +extern void __of_attach_node(struct device_node *np); +extern int __of_attach_node_sysfs(struct device_node *np); +extern void __of_detach_node(struct device_node *np); +extern void __of_detach_node_sysfs(struct device_node *np); + +/* iterators for transactions, used for overlays */ +/* forward iterator */ +#define for_each_transaction_entry(_oft, _te) \ + list_for_each_entry(_te, &(_oft)->te_list, node) + +/* reverse iterator */ +#define for_each_transaction_entry_reverse(_oft, _te) \ + list_for_each_entry_reverse(_te, &(_oft)->te_list, node) + #endif /* _LINUX_OF_PRIVATE_H */ diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 632aae8..59fb12e 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -206,8 +206,16 @@ void __init fdt_init_reserved_mem(void) for (i = 0; i < reserved_mem_count; i++) { struct reserved_mem *rmem = &reserved_mem[i]; unsigned long node = rmem->fdt_node; + int len; + const __be32 *prop; int err = 0; + prop = of_get_flat_dt_prop(node, "phandle", &len); + if (!prop) + prop = of_get_flat_dt_prop(node, "linux,phandle", &len); + if (prop) + rmem->phandle = of_read_number(prop, len/4); + if (rmem->size == 0) err = __reserved_mem_alloc_size(node, rmem->name, &rmem->base, &rmem->size); @@ -215,3 +223,65 @@ void __init fdt_init_reserved_mem(void) __reserved_mem_init_node(rmem); } } + +static inline struct reserved_mem *__find_rmem(struct device_node *node) +{ + unsigned int i; + + if (!node->phandle) + return NULL; + + for (i = 0; i < reserved_mem_count; i++) + if (reserved_mem[i].phandle == node->phandle) + return &reserved_mem[i]; + return NULL; +} + +/** + * of_reserved_mem_device_init() - assign reserved memory region to given device + * + * This function assign memory region pointed by "memory-region" device tree + * property to the given device. + */ +void of_reserved_mem_device_init(struct device *dev) +{ + struct reserved_mem *rmem; + struct device_node *np; + + np = of_parse_phandle(dev->of_node, "memory-region", 0); + if (!np) + return; + + rmem = __find_rmem(np); + of_node_put(np); + + if (!rmem || !rmem->ops || !rmem->ops->device_init) + return; + + rmem->ops->device_init(rmem, dev); + dev_info(dev, "assigned reserved memory node %s\n", rmem->name); +} + +/** + * of_reserved_mem_device_release() - release reserved memory device structures + * + * This function releases structures allocated for memory region handling for + * the given device. + */ +void of_reserved_mem_device_release(struct device *dev) +{ + struct reserved_mem *rmem; + struct device_node *np; + + np = of_parse_phandle(dev->of_node, "memory-region", 0); + if (!np) + return; + + rmem = __find_rmem(np); + of_node_put(np); + + if (!rmem || !rmem->ops || !rmem->ops->device_release) + return; + + rmem->ops->device_release(rmem, dev); +} diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 500436f..0197725 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -422,6 +422,7 @@ static int of_platform_bus_create(struct device_node *bus, break; } } + of_node_set_flag(bus, OF_POPULATED_BUS); return rc; } @@ -508,19 +509,13 @@ EXPORT_SYMBOL_GPL(of_platform_populate); static int of_platform_device_destroy(struct device *dev, void *data) { - bool *children_left = data; - /* Do not touch devices not populated from the device tree */ - if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) { - *children_left = true; + if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) return 0; - } - /* Recurse, but don't touch this device if it has any children left */ - if (of_platform_depopulate(dev) != 0) { - *children_left = true; - return 0; - } + /* Recurse for any nodes that were treated as busses */ + if (of_node_check_flag(dev->of_node, OF_POPULATED_BUS)) + device_for_each_child(dev, NULL, of_platform_device_destroy); if (dev->bus == &platform_bus_type) platform_device_unregister(to_platform_device(dev)); @@ -528,19 +523,15 @@ static int of_platform_device_destroy(struct device *dev, void *data) else if (dev->bus == &amba_bustype) amba_device_unregister(to_amba_device(dev)); #endif - else { - *children_left = true; - return 0; - } of_node_clear_flag(dev->of_node, OF_POPULATED); - + of_node_clear_flag(dev->of_node, OF_POPULATED_BUS); return 0; } /** * of_platform_depopulate() - Remove devices populated from device tree - * @parent: device which childred will be removed + * @parent: device which children will be removed * * Complementary to of_platform_populate(), this function removes children * of the given device (and, recurrently, their children) that have been @@ -550,14 +541,9 @@ static int of_platform_device_destroy(struct device *dev, void *data) * Returns 0 when all children devices have been removed or * -EBUSY when some children remained. */ -int of_platform_depopulate(struct device *parent) +void of_platform_depopulate(struct device *parent) { - bool children_left = false; - - device_for_each_child(parent, &children_left, - of_platform_device_destroy); - - return children_left ? -EBUSY : 0; + device_for_each_child(parent, NULL, of_platform_device_destroy); } EXPORT_SYMBOL_GPL(of_platform_depopulate); diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 077314e..d410026 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -9,6 +9,7 @@ #include <linux/errno.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_fdt.h> #include <linux/of_irq.h> #include <linux/of_platform.h> #include <linux/list.h> @@ -16,11 +17,17 @@ #include <linux/slab.h> #include <linux/device.h> +#include "of_private.h" + static struct selftest_results { int passed; int failed; } selftest_results; +#define NO_OF_NODES 2 +static struct device_node *nodes[NO_OF_NODES]; +static int last_node_index; + #define selftest(result, fmt, ...) { \ if (!(result)) { \ selftest_results.failed++; \ @@ -266,6 +273,81 @@ static void __init of_selftest_property_match_string(void) selftest(rc == -EILSEQ, "unterminated string; rc=%i", rc); } +#define propcmp(p1, p2) (((p1)->length == (p2)->length) && \ + (p1)->value && (p2)->value && \ + !memcmp((p1)->value, (p2)->value, (p1)->length) && \ + !strcmp((p1)->name, (p2)->name)) +static void __init of_selftest_property_copy(void) +{ +#ifdef CONFIG_OF_DYNAMIC + struct property p1 = { .name = "p1", .length = 0, .value = "" }; + struct property p2 = { .name = "p2", .length = 5, .value = "abcd" }; + struct property *new; + + new = __of_prop_dup(&p1, GFP_KERNEL); + selftest(new && propcmp(&p1, new), "empty property didn't copy correctly\n"); + kfree(new->value); + kfree(new->name); + kfree(new); + + new = __of_prop_dup(&p2, GFP_KERNEL); + selftest(new && propcmp(&p2, new), "non-empty property didn't copy correctly\n"); + kfree(new->value); + kfree(new->name); + kfree(new); +#endif +} + +static void __init of_selftest_changeset(void) +{ +#ifdef CONFIG_OF_DYNAMIC + struct property *ppadd, padd = { .name = "prop-add", .length = 0, .value = "" }; + struct property *ppupdate, pupdate = { .name = "prop-update", .length = 5, .value = "abcd" }; + struct property *ppremove; + struct device_node *n1, *n2, *n21, *nremove, *parent; + struct of_changeset chgset; + + of_changeset_init(&chgset); + n1 = __of_node_alloc("/testcase-data/changeset/n1", GFP_KERNEL); + selftest(n1, "testcase setup failure\n"); + n2 = __of_node_alloc("/testcase-data/changeset/n2", GFP_KERNEL); + selftest(n2, "testcase setup failure\n"); + n21 = __of_node_alloc("/testcase-data/changeset/n2/n21", GFP_KERNEL); + selftest(n21, "testcase setup failure %p\n", n21); + nremove = of_find_node_by_path("/testcase-data/changeset/node-remove"); + selftest(nremove, "testcase setup failure\n"); + ppadd = __of_prop_dup(&padd, GFP_KERNEL); + selftest(ppadd, "testcase setup failure\n"); + ppupdate = __of_prop_dup(&pupdate, GFP_KERNEL); + selftest(ppupdate, "testcase setup failure\n"); + parent = nremove->parent; + n1->parent = parent; + n2->parent = parent; + n21->parent = n2; + n2->child = n21; + ppremove = of_find_property(parent, "prop-remove", NULL); + selftest(ppremove, "failed to find removal prop"); + + of_changeset_init(&chgset); + selftest(!of_changeset_attach_node(&chgset, n1), "fail attach n1\n"); + selftest(!of_changeset_attach_node(&chgset, n2), "fail attach n2\n"); + selftest(!of_changeset_detach_node(&chgset, nremove), "fail remove node\n"); + selftest(!of_changeset_attach_node(&chgset, n21), "fail attach n21\n"); + selftest(!of_changeset_add_property(&chgset, parent, ppadd), "fail add prop\n"); + selftest(!of_changeset_update_property(&chgset, parent, ppupdate), "fail update prop\n"); + selftest(!of_changeset_remove_property(&chgset, parent, ppremove), "fail remove prop\n"); + mutex_lock(&of_mutex); + selftest(!of_changeset_apply(&chgset), "apply failed\n"); + mutex_unlock(&of_mutex); + + mutex_lock(&of_mutex); + selftest(!of_changeset_revert(&chgset), "revert failed\n"); + mutex_unlock(&of_mutex); + + of_changeset_destroy(&chgset); +#endif +} + static void __init of_selftest_parse_interrupts(void) { struct device_node *np; @@ -517,9 +599,156 @@ static void __init of_selftest_platform_populate(void) } } +/** + * update_node_properties - adds the properties + * of np into dup node (present in live tree) and + * updates parent of children of np to dup. + * + * @np: node already present in live tree + * @dup: node present in live tree to be updated + */ +static void update_node_properties(struct device_node *np, + struct device_node *dup) +{ + struct property *prop; + struct device_node *child; + + for_each_property_of_node(np, prop) + of_add_property(dup, prop); + + for_each_child_of_node(np, child) + child->parent = dup; +} + +/** + * attach_node_and_children - attaches nodes + * and its children to live tree + * + * @np: Node to attach to live tree + */ +static int attach_node_and_children(struct device_node *np) +{ + struct device_node *next, *root = np, *dup; + + if (!np) { + pr_warn("%s: No tree to attach; not running tests\n", + __func__); + return -ENODATA; + } + + + /* skip root node */ + np = np->child; + /* storing a copy in temporary node */ + dup = np; + + while (dup) { + nodes[last_node_index++] = dup; + dup = dup->sibling; + } + dup = NULL; + + while (np) { + next = np->allnext; + dup = of_find_node_by_path(np->full_name); + if (dup) + update_node_properties(np, dup); + else { + np->child = NULL; + if (np->parent == root) + np->parent = of_allnodes; + of_attach_node(np); + } + np = next; + } + + return 0; +} + +/** + * selftest_data_add - Reads, copies data from + * linked tree and attaches it to the live tree + */ +static int __init selftest_data_add(void) +{ + void *selftest_data; + struct device_node *selftest_data_node; + extern uint8_t __dtb_testcases_begin[]; + extern uint8_t __dtb_testcases_end[]; + const int size = __dtb_testcases_end - __dtb_testcases_begin; + + if (!size || !of_allnodes) { + pr_warn("%s: No testcase data to attach; not running tests\n", + __func__); + return -ENODATA; + } + + /* creating copy */ + selftest_data = kmemdup(__dtb_testcases_begin, size, GFP_KERNEL); + + if (!selftest_data) { + pr_warn("%s: Failed to allocate memory for selftest_data; " + "not running tests\n", __func__); + return -ENOMEM; + } + of_fdt_unflatten_tree(selftest_data, &selftest_data_node); + + /* attach the sub-tree to live tree */ + return attach_node_and_children(selftest_data_node); +} + +/** + * detach_node_and_children - detaches node + * and its children from live tree + * + * @np: Node to detach from live tree + */ +static void detach_node_and_children(struct device_node *np) +{ + while (np->child) + detach_node_and_children(np->child); + + while (np->sibling) + detach_node_and_children(np->sibling); + + of_detach_node(np); +} + +/** + * selftest_data_remove - removes the selftest data + * nodes from the live tree + */ +static void selftest_data_remove(void) +{ + struct device_node *np; + struct property *prop; + + while (last_node_index >= 0) { + if (nodes[last_node_index]) { + np = of_find_node_by_path(nodes[last_node_index]->full_name); + if (strcmp(np->full_name, "/aliases") != 0) { + detach_node_and_children(np->child); + of_detach_node(np); + } else { + for_each_property_of_node(np, prop) { + if (strcmp(prop->name, "testcase-alias") == 0) + of_remove_property(np, prop); + } + } + } + last_node_index--; + } +} + static int __init of_selftest(void) { struct device_node *np; + int res; + + /* adding data for selftest */ + res = selftest_data_add(); + if (res) + return res; np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a"); if (!np) { @@ -533,12 +762,18 @@ static int __init of_selftest(void) of_selftest_dynamic(); of_selftest_parse_phandle_with_args(); of_selftest_property_match_string(); + of_selftest_property_copy(); + of_selftest_changeset(); of_selftest_parse_interrupts(); of_selftest_parse_interrupts_extended(); of_selftest_match_node(); of_selftest_platform_populate(); pr_info("end of selftest - %i passed, %i failed\n", selftest_results.passed, selftest_results.failed); + + /* removing selftest data from live tree */ + selftest_data_remove(); + return 0; } late_initcall(of_selftest); diff --git a/drivers/of/testcase-data/testcases.dts b/drivers/of/testcase-data/testcases.dts new file mode 100644 index 0000000..219ef93 --- /dev/null +++ b/drivers/of/testcase-data/testcases.dts @@ -0,0 +1,15 @@ +/dts-v1/; +/ { + testcase-data { + changeset { + prop-update = "hello"; + prop-remove = "world"; + node-remove { + }; + }; + }; +}; +#include "tests-phandle.dtsi" +#include "tests-interrupts.dtsi" +#include "tests-match.dtsi" +#include "tests-platform.dtsi" diff --git a/drivers/of/testcase-data/testcases.dtsi b/drivers/of/testcase-data/testcases.dtsi deleted file mode 100644 index 6d8d980a..0000000 --- a/drivers/of/testcase-data/testcases.dtsi +++ /dev/null @@ -1,4 +0,0 @@ -#include "tests-phandle.dtsi" -#include "tests-interrupts.dtsi" -#include "tests-match.dtsi" -#include "tests-platform.dtsi" diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 93aa29f..f2945fa 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -375,11 +375,11 @@ static void __exit cleanup_slots(void) static int __init rpaphp_init(void) { - struct device_node *dn = NULL; + struct device_node *dn; info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); - while ((dn = of_find_node_by_name(dn, "pci"))) + for_each_node_by_name(dn, "pci") rpaphp_add_slot(dn); return 0; diff --git a/drivers/scsi/cxgbi/cxgb3i/Kconfig b/drivers/scsi/cxgbi/cxgb3i/Kconfig index 6bbc36f..e460398 100644 --- a/drivers/scsi/cxgbi/cxgb3i/Kconfig +++ b/drivers/scsi/cxgbi/cxgb3i/Kconfig @@ -1,6 +1,6 @@ config SCSI_CXGB3_ISCSI tristate "Chelsio T3 iSCSI support" - depends on PCI && INET + depends on PCI && INET && (IPV6 || IPV6=n) select NETDEVICES select ETHERNET select NET_VENDOR_CHELSIO diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig index 16b2c7d..8c4e423 100644 --- a/drivers/scsi/cxgbi/cxgb4i/Kconfig +++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig @@ -1,6 +1,6 @@ config SCSI_CXGB4_ISCSI tristate "Chelsio T4 iSCSI support" - depends on PCI && INET + depends on PCI && INET && (IPV6 || IPV6=n) select NETDEVICES select ETHERNET select NET_VENDOR_CHELSIO diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 43fea22..ae45bd9 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -472,7 +472,8 @@ static void __srp_start_tl_fail_timers(struct srp_rport *rport) if (delay > 0) queue_delayed_work(system_long_wq, &rport->reconnect_work, 1UL * delay * HZ); - if (srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { + if ((fast_io_fail_tmo >= 0 || dev_loss_tmo >= 0) && + srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev), rport->state); scsi_target_block(&shost->shost_gendev); diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index f9a1386..693208e 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -151,7 +151,7 @@ config KIRKWOOD_THERMAL config DOVE_THERMAL tristate "Temperature sensor on Marvell Dove SoCs" - depends on ARCH_DOVE + depends on ARCH_DOVE || MACH_DOVE depends on OF help Support for the Dove thermal sensor driver in the Linux thermal @@ -243,4 +243,9 @@ depends on ARCH_EXYNOS source "drivers/thermal/samsung/Kconfig" endmenu +menu "STMicroelectronics thermal drivers" +depends on ARCH_STI && OF +source "drivers/thermal/st/Kconfig" +endmenu + endif diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index de0636a..31e232f 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -32,3 +32,4 @@ obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o obj-$(CONFIG_INTEL_SOC_DTS_THERMAL) += intel_soc_dts_thermal.o obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/ obj-$(CONFIG_ACPI_INT3403_THERMAL) += int3403_thermal.o +obj-$(CONFIG_ST_THERMAL) += st/ diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 84a75f8..1ab0018 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -305,7 +305,7 @@ static int cpufreq_apply_cooling(struct cpufreq_cooling_device *cpufreq_device, * @event: value showing cpufreq event for which this function invoked. * @data: callback-specific data * - * Callback to highjack the notification on cpufreq policy transition. + * Callback to hijack the notification on cpufreq policy transition. * Every time there is a change in policy, we will intercept and * update the cpufreq policy with thermal constraints. * diff --git a/drivers/thermal/int3403_thermal.c b/drivers/thermal/int3403_thermal.c index e93f025..17554ee 100644 --- a/drivers/thermal/int3403_thermal.c +++ b/drivers/thermal/int3403_thermal.c @@ -33,6 +33,10 @@ struct int3403_sensor { struct thermal_zone_device *tzone; unsigned long *thresholds; + unsigned long crit_temp; + int crit_trip_id; + unsigned long psv_temp; + int psv_trip_id; }; static int sys_get_curr_temp(struct thermal_zone_device *tzone, @@ -79,12 +83,18 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzone, struct acpi_device *device = tzone->devdata; struct int3403_sensor *obj = acpi_driver_data(device); - /* - * get_trip_temp is a mandatory callback but - * PATx method doesn't return any value, so return - * cached value, which was last set from user space. - */ - *temp = obj->thresholds[trip]; + if (trip == obj->crit_trip_id) + *temp = obj->crit_temp; + else if (trip == obj->psv_trip_id) + *temp = obj->psv_temp; + else { + /* + * get_trip_temp is a mandatory callback but + * PATx method doesn't return any value, so return + * cached value, which was last set from user space. + */ + *temp = obj->thresholds[trip]; + } return 0; } @@ -92,8 +102,14 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzone, static int sys_get_trip_type(struct thermal_zone_device *thermal, int trip, enum thermal_trip_type *type) { + struct acpi_device *device = thermal->devdata; + struct int3403_sensor *obj = acpi_driver_data(device); + /* Mandatory callback, may not mean much here */ - *type = THERMAL_TRIP_PASSIVE; + if (trip == obj->crit_trip_id) + *type = THERMAL_TRIP_CRITICAL; + else + *type = THERMAL_TRIP_PASSIVE; return 0; } @@ -155,6 +171,34 @@ static void acpi_thermal_notify(struct acpi_device *device, u32 event) } } +static int sys_get_trip_crt(struct acpi_device *device, unsigned long *temp) +{ + unsigned long long crt; + acpi_status status; + + status = acpi_evaluate_integer(device->handle, "_CRT", NULL, &crt); + if (ACPI_FAILURE(status)) + return -EIO; + + *temp = DECI_KELVIN_TO_MILLI_CELSIUS(crt, KELVIN_OFFSET); + + return 0; +} + +static int sys_get_trip_psv(struct acpi_device *device, unsigned long *temp) +{ + unsigned long long psv; + acpi_status status; + + status = acpi_evaluate_integer(device->handle, "_PSV", NULL, &psv); + if (ACPI_FAILURE(status)) + return -EIO; + + *temp = DECI_KELVIN_TO_MILLI_CELSIUS(psv, KELVIN_OFFSET); + + return 0; +} + static int acpi_int3403_add(struct acpi_device *device) { int result = 0; @@ -194,6 +238,15 @@ static int acpi_int3403_add(struct acpi_device *device) return -ENOMEM; trip_mask = BIT(trip_cnt) - 1; } + + obj->psv_trip_id = -1; + if (!sys_get_trip_psv(device, &obj->psv_temp)) + obj->psv_trip_id = trip_cnt++; + + obj->crit_trip_id = -1; + if (!sys_get_trip_crt(device, &obj->crit_temp)) + obj->crit_trip_id = trip_cnt++; + obj->tzone = thermal_zone_device_register(acpi_device_bid(device), trip_cnt, trip_mask, device, &tzone_ops, NULL, 0, 0); diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index d7ca9f4..acbff14 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -505,6 +505,10 @@ static irqreturn_t exynos_tmu_irq(int irq, void *id) static const struct of_device_id exynos_tmu_match[] = { { + .compatible = "samsung,exynos3250-tmu", + .data = (void *)EXYNOS3250_TMU_DRV_DATA, + }, + { .compatible = "samsung,exynos4210-tmu", .data = (void *)EXYNOS4210_TMU_DRV_DATA, }, @@ -677,7 +681,8 @@ static int exynos_tmu_probe(struct platform_device *pdev) goto err_clk_sec; } - if (pdata->type == SOC_ARCH_EXYNOS4210 || + if (pdata->type == SOC_ARCH_EXYNOS3250 || + pdata->type == SOC_ARCH_EXYNOS4210 || pdata->type == SOC_ARCH_EXYNOS4412 || pdata->type == SOC_ARCH_EXYNOS5250 || pdata->type == SOC_ARCH_EXYNOS5260 || @@ -759,10 +764,10 @@ static int exynos_tmu_remove(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); - exynos_tmu_control(pdev, false); - exynos_unregister_thermal(data->reg_conf); + exynos_tmu_control(pdev, false); + clk_unprepare(data->clk); if (!IS_ERR(data->clk_sec)) clk_unprepare(data->clk_sec); diff --git a/drivers/thermal/samsung/exynos_tmu.h b/drivers/thermal/samsung/exynos_tmu.h index edd08cf..1b4a644 100644 --- a/drivers/thermal/samsung/exynos_tmu.h +++ b/drivers/thermal/samsung/exynos_tmu.h @@ -40,7 +40,8 @@ enum calibration_mode { }; enum soc_type { - SOC_ARCH_EXYNOS4210 = 1, + SOC_ARCH_EXYNOS3250 = 1, + SOC_ARCH_EXYNOS4210, SOC_ARCH_EXYNOS4412, SOC_ARCH_EXYNOS5250, SOC_ARCH_EXYNOS5260, diff --git a/drivers/thermal/samsung/exynos_tmu_data.c b/drivers/thermal/samsung/exynos_tmu_data.c index c1d81dc..aa8e0de 100644 --- a/drivers/thermal/samsung/exynos_tmu_data.c +++ b/drivers/thermal/samsung/exynos_tmu_data.c @@ -90,6 +90,95 @@ struct exynos_tmu_init_data const exynos4210_default_tmu_data = { }; #endif +#if defined(CONFIG_SOC_EXYNOS3250) +static const struct exynos_tmu_registers exynos3250_tmu_registers = { + .triminfo_data = EXYNOS_TMU_REG_TRIMINFO, + .triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT, + .triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT, + .tmu_ctrl = EXYNOS_TMU_REG_CONTROL, + .test_mux_addr_shift = EXYNOS4412_MUX_ADDR_SHIFT, + .buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT, + .buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK, + .therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT, + .therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK, + .therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT, + .buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT, + .buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK, + .core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT, + .tmu_status = EXYNOS_TMU_REG_STATUS, + .tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP, + .threshold_th0 = EXYNOS_THD_TEMP_RISE, + .threshold_th1 = EXYNOS_THD_TEMP_FALL, + .tmu_inten = EXYNOS_TMU_REG_INTEN, + .inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT, + .inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT, + .inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT, + .inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT, + .tmu_intstat = EXYNOS_TMU_REG_INTSTAT, + .tmu_intclear = EXYNOS_TMU_REG_INTCLEAR, + .intclr_fall_shift = EXYNOS_TMU_CLEAR_FALL_INT_SHIFT, + .intclr_rise_shift = EXYNOS_TMU_RISE_INT_SHIFT, + .intclr_rise_mask = EXYNOS_TMU_RISE_INT_MASK, + .intclr_fall_mask = EXYNOS_TMU_FALL_INT_MASK, + .emul_con = EXYNOS_EMUL_CON, + .emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT, + .emul_time_shift = EXYNOS_EMUL_TIME_SHIFT, + .emul_time_mask = EXYNOS_EMUL_TIME_MASK, +}; + +#define EXYNOS3250_TMU_DATA \ + .threshold_falling = 10, \ + .trigger_levels[0] = 70, \ + .trigger_levels[1] = 95, \ + .trigger_levels[2] = 110, \ + .trigger_levels[3] = 120, \ + .trigger_enable[0] = true, \ + .trigger_enable[1] = true, \ + .trigger_enable[2] = true, \ + .trigger_enable[3] = false, \ + .trigger_type[0] = THROTTLE_ACTIVE, \ + .trigger_type[1] = THROTTLE_ACTIVE, \ + .trigger_type[2] = SW_TRIP, \ + .trigger_type[3] = HW_TRIP, \ + .max_trigger_level = 4, \ + .gain = 8, \ + .reference_voltage = 16, \ + .noise_cancel_mode = 4, \ + .cal_type = TYPE_TWO_POINT_TRIMMING, \ + .efuse_value = 55, \ + .min_efuse_value = 40, \ + .max_efuse_value = 100, \ + .first_point_trim = 25, \ + .second_point_trim = 85, \ + .default_temp_offset = 50, \ + .freq_tab[0] = { \ + .freq_clip_max = 800 * 1000, \ + .temp_level = 70, \ + }, \ + .freq_tab[1] = { \ + .freq_clip_max = 400 * 1000, \ + .temp_level = 95, \ + }, \ + .freq_tab_count = 2, \ + .registers = &exynos3250_tmu_registers, \ + .features = (TMU_SUPPORT_EMULATION | \ + TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \ + TMU_SUPPORT_EMUL_TIME) +#endif + +#if defined(CONFIG_SOC_EXYNOS3250) +struct exynos_tmu_init_data const exynos3250_default_tmu_data = { + .tmu_data = { + { + EXYNOS3250_TMU_DATA, + .type = SOC_ARCH_EXYNOS3250, + .test_mux = EXYNOS4412_MUX_ADDR_VALUE, + }, + }, + .tmu_count = 1, +}; +#endif + #if defined(CONFIG_SOC_EXYNOS4412) || defined(CONFIG_SOC_EXYNOS5250) static const struct exynos_tmu_registers exynos4412_tmu_registers = { .triminfo_data = EXYNOS_TMU_REG_TRIMINFO, diff --git a/drivers/thermal/samsung/exynos_tmu_data.h b/drivers/thermal/samsung/exynos_tmu_data.h index d268981..f0979e5 100644 --- a/drivers/thermal/samsung/exynos_tmu_data.h +++ b/drivers/thermal/samsung/exynos_tmu_data.h @@ -148,6 +148,13 @@ #define EXYNOS5440_TMU_TH_RISE4_SHIFT 24 #define EXYNOS5440_EFUSE_SWAP_OFFSET 8 +#if defined(CONFIG_SOC_EXYNOS3250) +extern struct exynos_tmu_init_data const exynos3250_default_tmu_data; +#define EXYNOS3250_TMU_DRV_DATA (&exynos3250_default_tmu_data) +#else +#define EXYNOS3250_TMU_DRV_DATA (NULL) +#endif + #if defined(CONFIG_CPU_EXYNOS4210) extern struct exynos_tmu_init_data const exynos4210_default_tmu_data; #define EXYNOS4210_TMU_DRV_DATA (&exynos4210_default_tmu_data) diff --git a/drivers/thermal/st/Kconfig b/drivers/thermal/st/Kconfig new file mode 100644 index 0000000..490fdbe --- /dev/null +++ b/drivers/thermal/st/Kconfig @@ -0,0 +1,12 @@ +config ST_THERMAL + tristate "Thermal sensors on STMicroelectronics STi series of SoCs" + help + Support for thermal sensors on STMicroelectronics STi series of SoCs. + +config ST_THERMAL_SYSCFG + select ST_THERMAL + tristate "STi series syscfg register access based thermal sensors" + +config ST_THERMAL_MEMMAP + select ST_THERMAL + tristate "STi series memory mapped access based thermal sensors" diff --git a/drivers/thermal/st/Makefile b/drivers/thermal/st/Makefile new file mode 100644 index 0000000..b388789 --- /dev/null +++ b/drivers/thermal/st/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_ST_THERMAL) := st_thermal.o +obj-$(CONFIG_ST_THERMAL_SYSCFG) += st_thermal_syscfg.o +obj-$(CONFIG_ST_THERMAL_MEMMAP) += st_thermal_memmap.o diff --git a/drivers/thermal/st/st_thermal.c b/drivers/thermal/st/st_thermal.c new file mode 100644 index 0000000..90163b3 --- /dev/null +++ b/drivers/thermal/st/st_thermal.c @@ -0,0 +1,313 @@ +/* + * ST Thermal Sensor Driver core routines + * Author: Ajit Pal Singh <ajitpal.singh@st.com> + * + * Copyright (C) 2003-2014 STMicroelectronics (R&D) Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/clk.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> + +#include "st_thermal.h" + +/* The Thermal Framework expects millidegrees */ +#define mcelsius(temp) ((temp) * 1000) + +/* + * Function to allocate regfields which are common + * between syscfg and memory mapped based sensors + */ +int st_thermal_alloc_regfields(struct st_thermal_sensor *sensor) +{ + struct device *dev = sensor->dev; + struct regmap *regmap = sensor->regmap; + const struct reg_field *reg_fields = sensor->cdata->reg_fields; + + sensor->dcorrect = devm_regmap_field_alloc(dev, regmap, + reg_fields[DCORRECT]); + + sensor->overflow = devm_regmap_field_alloc(dev, regmap, + reg_fields[OVERFLOW]); + + sensor->temp_data = devm_regmap_field_alloc(dev, regmap, + reg_fields[DATA]); + + if (IS_ERR(sensor->dcorrect) || + IS_ERR(sensor->overflow) || + IS_ERR(sensor->temp_data)) { + dev_err(dev, "failed to allocate common regfields\n"); + return -EINVAL; + } + + return sensor->ops->alloc_regfields(sensor); +} + +static int st_thermal_sensor_on(struct st_thermal_sensor *sensor) +{ + int ret; + struct device *dev = sensor->dev; + + ret = clk_prepare_enable(sensor->clk); + if (ret) { + dev_err(dev, "failed to enable clk\n"); + return ret; + } + + ret = sensor->ops->power_ctrl(sensor, POWER_ON); + if (ret) { + dev_err(dev, "failed to power on sensor\n"); + clk_disable_unprepare(sensor->clk); + } + + return ret; +} + +static int st_thermal_sensor_off(struct st_thermal_sensor *sensor) +{ + int ret; + + ret = sensor->ops->power_ctrl(sensor, POWER_OFF); + if (ret) + return ret; + + clk_disable_unprepare(sensor->clk); + + return 0; +} + +static int st_thermal_calibration(struct st_thermal_sensor *sensor) +{ + int ret; + unsigned int val; + struct device *dev = sensor->dev; + + /* Check if sensor calibration data is already written */ + ret = regmap_field_read(sensor->dcorrect, &val); + if (ret) { + dev_err(dev, "failed to read calibration data\n"); + return ret; + } + + if (!val) { + /* + * Sensor calibration value not set by bootloader, + * default calibration data to be used + */ + ret = regmap_field_write(sensor->dcorrect, + sensor->cdata->calibration_val); + if (ret) + dev_err(dev, "failed to set calibration data\n"); + } + + return ret; +} + +/* Callback to get temperature from HW*/ +static int st_thermal_get_temp(struct thermal_zone_device *th, + unsigned long *temperature) +{ + struct st_thermal_sensor *sensor = th->devdata; + struct device *dev = sensor->dev; + unsigned int temp; + unsigned int overflow; + int ret; + + ret = regmap_field_read(sensor->overflow, &overflow); + if (ret) + return ret; + if (overflow) + return -EIO; + + ret = regmap_field_read(sensor->temp_data, &temp); + if (ret) + return ret; + + temp += sensor->cdata->temp_adjust_val; + temp = mcelsius(temp); + + dev_dbg(dev, "temperature: %d\n", temp); + + *temperature = temp; + + return 0; +} + +static int st_thermal_get_trip_type(struct thermal_zone_device *th, + int trip, enum thermal_trip_type *type) +{ + struct st_thermal_sensor *sensor = th->devdata; + struct device *dev = sensor->dev; + + switch (trip) { + case 0: + *type = THERMAL_TRIP_CRITICAL; + break; + default: + dev_err(dev, "invalid trip point\n"); + return -EINVAL; + } + + return 0; +} + +static int st_thermal_get_trip_temp(struct thermal_zone_device *th, + int trip, unsigned long *temp) +{ + struct st_thermal_sensor *sensor = th->devdata; + struct device *dev = sensor->dev; + + switch (trip) { + case 0: + *temp = mcelsius(sensor->cdata->crit_temp); + break; + default: + dev_err(dev, "Invalid trip point\n"); + return -EINVAL; + } + + return 0; +} + +static struct thermal_zone_device_ops st_tz_ops = { + .get_temp = st_thermal_get_temp, + .get_trip_type = st_thermal_get_trip_type, + .get_trip_temp = st_thermal_get_trip_temp, +}; + +int st_thermal_register(struct platform_device *pdev, + const struct of_device_id *st_thermal_of_match) +{ + struct st_thermal_sensor *sensor; + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + const struct of_device_id *match; + + int polling_delay; + int ret; + + if (!np) { + dev_err(dev, "device tree node not found\n"); + return -EINVAL; + } + + sensor = devm_kzalloc(dev, sizeof(*sensor), GFP_KERNEL); + if (!sensor) + return -ENOMEM; + + sensor->dev = dev; + + match = of_match_device(st_thermal_of_match, dev); + if (!(match && match->data)) + return -EINVAL; + + sensor->cdata = match->data; + if (!sensor->cdata->ops) + return -EINVAL; + + sensor->ops = sensor->cdata->ops; + + ret = sensor->ops->regmap_init(sensor); + if (ret) + return ret; + + ret = st_thermal_alloc_regfields(sensor); + if (ret) + return ret; + + sensor->clk = devm_clk_get(dev, "thermal"); + if (IS_ERR(sensor->clk)) { + dev_err(dev, "failed to fetch clock\n"); + return PTR_ERR(sensor->clk); + } + + if (sensor->ops->register_enable_irq) { + ret = sensor->ops->register_enable_irq(sensor); + if (ret) + return ret; + } + + ret = st_thermal_sensor_on(sensor); + if (ret) + return ret; + + ret = st_thermal_calibration(sensor); + if (ret) + goto sensor_off; + + polling_delay = sensor->ops->register_enable_irq ? 0 : 1000; + + sensor->thermal_dev = + thermal_zone_device_register(dev_name(dev), 1, 0, sensor, + &st_tz_ops, NULL, 0, polling_delay); + if (IS_ERR(sensor->thermal_dev)) { + dev_err(dev, "failed to register thermal zone device\n"); + ret = PTR_ERR(sensor->thermal_dev); + goto sensor_off; + } + + platform_set_drvdata(pdev, sensor); + + return 0; + +sensor_off: + st_thermal_sensor_off(sensor); + + return ret; +} +EXPORT_SYMBOL_GPL(st_thermal_register); + +int st_thermal_unregister(struct platform_device *pdev) +{ + struct st_thermal_sensor *sensor = platform_get_drvdata(pdev); + + st_thermal_sensor_off(sensor); + thermal_zone_device_unregister(sensor->thermal_dev); + + return 0; +} +EXPORT_SYMBOL_GPL(st_thermal_unregister); + +static int st_thermal_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct st_thermal_sensor *sensor = platform_get_drvdata(pdev); + + return st_thermal_sensor_off(sensor); +} + +static int st_thermal_resume(struct device *dev) +{ + int ret; + struct platform_device *pdev = to_platform_device(dev); + struct st_thermal_sensor *sensor = platform_get_drvdata(pdev); + + ret = st_thermal_sensor_on(sensor); + if (ret) + return ret; + + ret = st_thermal_calibration(sensor); + if (ret) + return ret; + + if (sensor->ops->enable_irq) { + ret = sensor->ops->enable_irq(sensor); + if (ret) + return ret; + } + + return 0; +} +SIMPLE_DEV_PM_OPS(st_thermal_pm_ops, st_thermal_suspend, st_thermal_resume); +EXPORT_SYMBOL_GPL(st_thermal_pm_ops); + +MODULE_AUTHOR("STMicroelectronics (R&D) Limited <ajitpal.singh@st.com>"); +MODULE_DESCRIPTION("STMicroelectronics STi SoC Thermal Sensor Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/thermal/st/st_thermal.h b/drivers/thermal/st/st_thermal.h new file mode 100644 index 0000000..fecafbe --- /dev/null +++ b/drivers/thermal/st/st_thermal.h @@ -0,0 +1,104 @@ +/* + * ST Thermal Sensor Driver for STi series of SoCs + * Author: Ajit Pal Singh <ajitpal.singh@st.com> + * + * Copyright (C) 2003-2014 STMicroelectronics (R&D) Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __STI_THERMAL_SYSCFG_H +#define __STI_THERMAL_SYSCFG_H + +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/thermal.h> + +enum st_thermal_regfield_ids { + INT_THRESH_HI = 0, /* Top two regfield IDs are mutually exclusive */ + TEMP_PWR = 0, + DCORRECT, + OVERFLOW, + DATA, + INT_ENABLE, + + MAX_REGFIELDS +}; + +/* Thermal sensor power states */ +enum st_thermal_power_state { + POWER_OFF = 0, + POWER_ON +}; + +struct st_thermal_sensor; + +/** + * Description of private thermal sensor ops. + * + * @power_ctrl: Function for powering on/off a sensor. Clock to the + * sensor is also controlled from this function. + * @alloc_regfields: Allocate regmap register fields, specific to a sensor. + * @do_memmap_regmap: Memory map the thermal register space and init regmap + * instance or find regmap instance. + * @register_irq: Register an interrupt handler for a sensor. + */ +struct st_thermal_sensor_ops { + int (*power_ctrl)(struct st_thermal_sensor *, enum st_thermal_power_state); + int (*alloc_regfields)(struct st_thermal_sensor *); + int (*regmap_init)(struct st_thermal_sensor *); + int (*register_enable_irq)(struct st_thermal_sensor *); + int (*enable_irq)(struct st_thermal_sensor *); +}; + +/** + * Description of thermal driver compatible data. + * + * @reg_fields: Pointer to the regfields array for a sensor. + * @sys_compat: Pointer to the syscon node compatible string. + * @ops: Pointer to private thermal ops for a sensor. + * @calibration_val: Default calibration value to be written to the DCORRECT + * register field for a sensor. + * @temp_adjust_val: Value to be added/subtracted from the data read from + * the sensor. If value needs to be added please provide a + * positive value and if it is to be subtracted please + * provide a negative value. + * @crit_temp: The temperature beyond which the SoC should be shutdown + * to prevent damage. + */ +struct st_thermal_compat_data { + char *sys_compat; + const struct reg_field *reg_fields; + const struct st_thermal_sensor_ops *ops; + unsigned int calibration_val; + int temp_adjust_val; + int crit_temp; +}; + +struct st_thermal_sensor { + struct device *dev; + struct thermal_zone_device *thermal_dev; + const struct st_thermal_sensor_ops *ops; + const struct st_thermal_compat_data *cdata; + struct clk *clk; + struct regmap *regmap; + struct regmap_field *pwr; + struct regmap_field *dcorrect; + struct regmap_field *overflow; + struct regmap_field *temp_data; + struct regmap_field *int_thresh_hi; + struct regmap_field *int_enable; + int irq; + void __iomem *mmio_base; +}; + +extern int st_thermal_register(struct platform_device *pdev, + const struct of_device_id *st_thermal_of_match); +extern int st_thermal_unregister(struct platform_device *pdev); +extern const struct dev_pm_ops st_thermal_pm_ops; + +#endif /* __STI_RESET_SYSCFG_H */ diff --git a/drivers/thermal/st/st_thermal_memmap.c b/drivers/thermal/st/st_thermal_memmap.c new file mode 100644 index 0000000..39896ce --- /dev/null +++ b/drivers/thermal/st/st_thermal_memmap.c @@ -0,0 +1,209 @@ +/* + * ST Thermal Sensor Driver for memory mapped sensors. + * Author: Ajit Pal Singh <ajitpal.singh@st.com> + * + * Copyright (C) 2003-2014 STMicroelectronics (R&D) Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/of.h> +#include <linux/module.h> + +#include "st_thermal.h" + +#define STIH416_MPE_CONF 0x0 +#define STIH416_MPE_STATUS 0x4 +#define STIH416_MPE_INT_THRESH 0x8 +#define STIH416_MPE_INT_EN 0xC + +/* Power control bits for the memory mapped thermal sensor */ +#define THERMAL_PDN BIT(4) +#define THERMAL_SRSTN BIT(10) + +static const struct reg_field st_mmap_thermal_regfields[MAX_REGFIELDS] = { + /* + * According to the STIH416 MPE temp sensor data sheet - + * the PDN (Power Down Bit) and SRSTN (Soft Reset Bit) need to be + * written simultaneously for powering on and off the temperature + * sensor. regmap_update_bits() will be used to update the register. + */ + [INT_THRESH_HI] = REG_FIELD(STIH416_MPE_INT_THRESH, 0, 7), + [DCORRECT] = REG_FIELD(STIH416_MPE_CONF, 5, 9), + [OVERFLOW] = REG_FIELD(STIH416_MPE_STATUS, 9, 9), + [DATA] = REG_FIELD(STIH416_MPE_STATUS, 11, 18), + [INT_ENABLE] = REG_FIELD(STIH416_MPE_INT_EN, 0, 0), +}; + +static irqreturn_t st_mmap_thermal_trip_handler(int irq, void *sdata) +{ + struct st_thermal_sensor *sensor = sdata; + + thermal_zone_device_update(sensor->thermal_dev); + + return IRQ_HANDLED; +} + +/* Private ops for the Memory Mapped based thermal sensors */ +static int st_mmap_power_ctrl(struct st_thermal_sensor *sensor, + enum st_thermal_power_state power_state) +{ + const unsigned int mask = (THERMAL_PDN | THERMAL_SRSTN); + const unsigned int val = power_state ? mask : 0; + + return regmap_update_bits(sensor->regmap, STIH416_MPE_CONF, mask, val); +} + +static int st_mmap_alloc_regfields(struct st_thermal_sensor *sensor) +{ + struct device *dev = sensor->dev; + struct regmap *regmap = sensor->regmap; + const struct reg_field *reg_fields = sensor->cdata->reg_fields; + + sensor->int_thresh_hi = devm_regmap_field_alloc(dev, regmap, + reg_fields[INT_THRESH_HI]); + sensor->int_enable = devm_regmap_field_alloc(dev, regmap, + reg_fields[INT_ENABLE]); + + if (IS_ERR(sensor->int_thresh_hi) || IS_ERR(sensor->int_enable)) { + dev_err(dev, "failed to alloc mmap regfields\n"); + return -EINVAL; + } + + return 0; +} + +static int st_mmap_enable_irq(struct st_thermal_sensor *sensor) +{ + int ret; + + /* Set upper critical threshold */ + ret = regmap_field_write(sensor->int_thresh_hi, + sensor->cdata->crit_temp - + sensor->cdata->temp_adjust_val); + if (ret) + return ret; + + return regmap_field_write(sensor->int_enable, 1); +} + +static int st_mmap_register_enable_irq(struct st_thermal_sensor *sensor) +{ + struct device *dev = sensor->dev; + struct platform_device *pdev = to_platform_device(dev); + int ret; + + sensor->irq = platform_get_irq(pdev, 0); + if (sensor->irq < 0) { + dev_err(dev, "failed to register IRQ\n"); + return sensor->irq; + } + + ret = devm_request_threaded_irq(dev, sensor->irq, + NULL, st_mmap_thermal_trip_handler, + IRQF_TRIGGER_RISING | IRQF_ONESHOT, + dev->driver->name, sensor); + if (ret) { + dev_err(dev, "failed to register IRQ %d\n", sensor->irq); + return ret; + } + + return st_mmap_enable_irq(sensor); +} + +static const struct regmap_config st_416mpe_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, +}; + +static int st_mmap_regmap_init(struct st_thermal_sensor *sensor) +{ + struct device *dev = sensor->dev; + struct platform_device *pdev = to_platform_device(dev); + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "no memory resources defined\n"); + return -ENODEV; + } + + sensor->mmio_base = devm_ioremap_resource(dev, res); + if (IS_ERR(sensor->mmio_base)) { + dev_err(dev, "failed to remap IO\n"); + return PTR_ERR(sensor->mmio_base); + } + + sensor->regmap = devm_regmap_init_mmio(dev, sensor->mmio_base, + &st_416mpe_regmap_config); + if (IS_ERR(sensor->regmap)) { + dev_err(dev, "failed to initialise regmap\n"); + return PTR_ERR(sensor->regmap); + } + + return 0; +} + +static const struct st_thermal_sensor_ops st_mmap_sensor_ops = { + .power_ctrl = st_mmap_power_ctrl, + .alloc_regfields = st_mmap_alloc_regfields, + .regmap_init = st_mmap_regmap_init, + .register_enable_irq = st_mmap_register_enable_irq, + .enable_irq = st_mmap_enable_irq, +}; + +/* Compatible device data stih416 mpe thermal sensor */ +const struct st_thermal_compat_data st_416mpe_cdata = { + .reg_fields = st_mmap_thermal_regfields, + .ops = &st_mmap_sensor_ops, + .calibration_val = 14, + .temp_adjust_val = -95, + .crit_temp = 120, +}; + +/* Compatible device data stih407 thermal sensor */ +const struct st_thermal_compat_data st_407_cdata = { + .reg_fields = st_mmap_thermal_regfields, + .ops = &st_mmap_sensor_ops, + .calibration_val = 16, + .temp_adjust_val = -95, + .crit_temp = 120, +}; + +static struct of_device_id st_mmap_thermal_of_match[] = { + { .compatible = "st,stih416-mpe-thermal", .data = &st_416mpe_cdata }, + { .compatible = "st,stih407-thermal", .data = &st_407_cdata }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, st_mmap_thermal_of_match); + +int st_mmap_probe(struct platform_device *pdev) +{ + return st_thermal_register(pdev, st_mmap_thermal_of_match); +} + +int st_mmap_remove(struct platform_device *pdev) +{ + return st_thermal_unregister(pdev); +} + +static struct platform_driver st_mmap_thermal_driver = { + .driver = { + .name = "st_thermal_mmap", + .owner = THIS_MODULE, + .pm = &st_thermal_pm_ops, + .of_match_table = st_mmap_thermal_of_match, + }, + .probe = st_mmap_probe, + .remove = st_mmap_remove, +}; + +module_platform_driver(st_mmap_thermal_driver); + +MODULE_AUTHOR("STMicroelectronics (R&D) Limited <ajitpal.singh@st.com>"); +MODULE_DESCRIPTION("STMicroelectronics STi SoC Thermal Sensor Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/thermal/st/st_thermal_syscfg.c b/drivers/thermal/st/st_thermal_syscfg.c new file mode 100644 index 0000000..888b58e --- /dev/null +++ b/drivers/thermal/st/st_thermal_syscfg.c @@ -0,0 +1,179 @@ +/* + * ST Thermal Sensor Driver for syscfg based sensors. + * Author: Ajit Pal Singh <ajitpal.singh@st.com> + * + * Copyright (C) 2003-2014 STMicroelectronics (R&D) Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/of.h> +#include <linux/module.h> +#include <linux/mfd/syscon.h> + +#include "st_thermal.h" + +/* STiH415 */ +#define STIH415_SYSCFG_FRONT(num) ((num - 100) * 4) +#define STIH415_SAS_THSENS_CONF STIH415_SYSCFG_FRONT(178) +#define STIH415_SAS_THSENS_STATUS STIH415_SYSCFG_FRONT(198) +#define STIH415_SYSCFG_MPE(num) ((num - 600) * 4) +#define STIH415_MPE_THSENS_CONF STIH415_SYSCFG_MPE(607) +#define STIH415_MPE_THSENS_STATUS STIH415_SYSCFG_MPE(667) + +/* STiH416 */ +#define STIH416_SYSCFG_FRONT(num) ((num - 1000) * 4) +#define STIH416_SAS_THSENS_CONF STIH416_SYSCFG_FRONT(1552) +#define STIH416_SAS_THSENS_STATUS1 STIH416_SYSCFG_FRONT(1554) +#define STIH416_SAS_THSENS_STATUS2 STIH416_SYSCFG_FRONT(1594) + +/* STiD127 */ +#define STID127_SYSCFG_CPU(num) ((num - 700) * 4) +#define STID127_THSENS_CONF STID127_SYSCFG_CPU(743) +#define STID127_THSENS_STATUS STID127_SYSCFG_CPU(767) + +static const struct reg_field st_415sas_regfields[MAX_REGFIELDS] = { + [TEMP_PWR] = REG_FIELD(STIH415_SAS_THSENS_CONF, 9, 9), + [DCORRECT] = REG_FIELD(STIH415_SAS_THSENS_CONF, 4, 8), + [OVERFLOW] = REG_FIELD(STIH415_SAS_THSENS_STATUS, 8, 8), + [DATA] = REG_FIELD(STIH415_SAS_THSENS_STATUS, 10, 16), +}; + +static const struct reg_field st_415mpe_regfields[MAX_REGFIELDS] = { + [TEMP_PWR] = REG_FIELD(STIH415_MPE_THSENS_CONF, 8, 8), + [DCORRECT] = REG_FIELD(STIH415_MPE_THSENS_CONF, 3, 7), + [OVERFLOW] = REG_FIELD(STIH415_MPE_THSENS_STATUS, 9, 9), + [DATA] = REG_FIELD(STIH415_MPE_THSENS_STATUS, 11, 18), +}; + +static const struct reg_field st_416sas_regfields[MAX_REGFIELDS] = { + [TEMP_PWR] = REG_FIELD(STIH416_SAS_THSENS_CONF, 9, 9), + [DCORRECT] = REG_FIELD(STIH416_SAS_THSENS_CONF, 4, 8), + [OVERFLOW] = REG_FIELD(STIH416_SAS_THSENS_STATUS1, 8, 8), + [DATA] = REG_FIELD(STIH416_SAS_THSENS_STATUS2, 10, 16), +}; + +static const struct reg_field st_127_regfields[MAX_REGFIELDS] = { + [TEMP_PWR] = REG_FIELD(STID127_THSENS_CONF, 7, 7), + [DCORRECT] = REG_FIELD(STID127_THSENS_CONF, 2, 6), + [OVERFLOW] = REG_FIELD(STID127_THSENS_STATUS, 9, 9), + [DATA] = REG_FIELD(STID127_THSENS_STATUS, 11, 18), +}; + +/* Private OPs for System Configuration Register based thermal sensors */ +static int st_syscfg_power_ctrl(struct st_thermal_sensor *sensor, + enum st_thermal_power_state power_state) +{ + return regmap_field_write(sensor->pwr, power_state); +} + +static int st_syscfg_alloc_regfields(struct st_thermal_sensor *sensor) +{ + struct device *dev = sensor->dev; + + sensor->pwr = devm_regmap_field_alloc(dev, sensor->regmap, + sensor->cdata->reg_fields[TEMP_PWR]); + + if (IS_ERR(sensor->pwr)) { + dev_err(dev, "failed to alloc syscfg regfields\n"); + return PTR_ERR(sensor->pwr); + } + + return 0; +} + +static int st_syscfg_regmap_init(struct st_thermal_sensor *sensor) +{ + sensor->regmap = + syscon_regmap_lookup_by_compatible(sensor->cdata->sys_compat); + if (IS_ERR(sensor->regmap)) { + dev_err(sensor->dev, "failed to find syscfg regmap\n"); + return PTR_ERR(sensor->regmap); + } + + return 0; +} + +static const struct st_thermal_sensor_ops st_syscfg_sensor_ops = { + .power_ctrl = st_syscfg_power_ctrl, + .alloc_regfields = st_syscfg_alloc_regfields, + .regmap_init = st_syscfg_regmap_init, +}; + +/* Compatible device data for stih415 sas thermal sensor */ +const struct st_thermal_compat_data st_415sas_cdata = { + .sys_compat = "st,stih415-front-syscfg", + .reg_fields = st_415sas_regfields, + .ops = &st_syscfg_sensor_ops, + .calibration_val = 16, + .temp_adjust_val = 20, + .crit_temp = 120, +}; + +/* Compatible device data for stih415 mpe thermal sensor */ +const struct st_thermal_compat_data st_415mpe_cdata = { + .sys_compat = "st,stih415-system-syscfg", + .reg_fields = st_415mpe_regfields, + .ops = &st_syscfg_sensor_ops, + .calibration_val = 16, + .temp_adjust_val = -103, + .crit_temp = 120, +}; + +/* Compatible device data for stih416 sas thermal sensor */ +const struct st_thermal_compat_data st_416sas_cdata = { + .sys_compat = "st,stih416-front-syscfg", + .reg_fields = st_416sas_regfields, + .ops = &st_syscfg_sensor_ops, + .calibration_val = 16, + .temp_adjust_val = 20, + .crit_temp = 120, +}; + +/* Compatible device data for stid127 thermal sensor */ +const struct st_thermal_compat_data st_127_cdata = { + .sys_compat = "st,stid127-cpu-syscfg", + .reg_fields = st_127_regfields, + .ops = &st_syscfg_sensor_ops, + .calibration_val = 8, + .temp_adjust_val = -103, + .crit_temp = 120, +}; + +static struct of_device_id st_syscfg_thermal_of_match[] = { + { .compatible = "st,stih415-sas-thermal", .data = &st_415sas_cdata }, + { .compatible = "st,stih415-mpe-thermal", .data = &st_415mpe_cdata }, + { .compatible = "st,stih416-sas-thermal", .data = &st_416sas_cdata }, + { .compatible = "st,stid127-thermal", .data = &st_127_cdata }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, st_syscfg_thermal_of_match); + +int st_syscfg_probe(struct platform_device *pdev) +{ + return st_thermal_register(pdev, st_syscfg_thermal_of_match); +} + +int st_syscfg_remove(struct platform_device *pdev) +{ + return st_thermal_unregister(pdev); +} + +static struct platform_driver st_syscfg_thermal_driver = { + .driver = { + .name = "st_syscfg_thermal", + .owner = THIS_MODULE, + .pm = &st_thermal_pm_ops, + .of_match_table = st_syscfg_thermal_of_match, + }, + .probe = st_syscfg_probe, + .remove = st_syscfg_remove, +}; +module_platform_driver(st_syscfg_thermal_driver); + +MODULE_AUTHOR("STMicroelectronics (R&D) Limited <ajitpal.singh@st.com>"); +MODULE_DESCRIPTION("STMicroelectronics STi SoC Thermal Sensor Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c index 0419b69..4f485e8 100644 --- a/drivers/tty/ehv_bytechan.c +++ b/drivers/tty/ehv_bytechan.c @@ -108,55 +108,23 @@ static void disable_tx_interrupt(struct ehv_bc_data *bc) * * The byte channel to be used for the console is specified via a "stdout" * property in the /chosen node. - * - * For compatible with legacy device trees, we also look for a "stdout" alias. */ static int find_console_handle(void) { - struct device_node *np, *np2; + struct device_node *np = of_stdout; const char *sprop = NULL; const uint32_t *iprop; - np = of_find_node_by_path("/chosen"); - if (np) - sprop = of_get_property(np, "stdout-path", NULL); - - if (!np || !sprop) { - of_node_put(np); - np = of_find_node_by_name(NULL, "aliases"); - if (np) - sprop = of_get_property(np, "stdout", NULL); - } - - if (!sprop) { - of_node_put(np); - return 0; - } - /* We don't care what the aliased node is actually called. We only * care if it's compatible with "epapr,hv-byte-channel", because that - * indicates that it's a byte channel node. We use a temporary - * variable, 'np2', because we can't release 'np' until we're done with - * 'sprop'. + * indicates that it's a byte channel node. */ - np2 = of_find_node_by_path(sprop); - of_node_put(np); - np = np2; - if (!np) { - pr_warning("ehv-bc: stdout node '%s' does not exist\n", sprop); - return 0; - } - - /* Is it a byte channel? */ - if (!of_device_is_compatible(np, "epapr,hv-byte-channel")) { - of_node_put(np); + if (!np || !of_device_is_compatible(np, "epapr,hv-byte-channel")) return 0; - } stdout_irq = irq_of_parse_and_map(np, 0); if (stdout_irq == NO_IRQ) { - pr_err("ehv-bc: no 'interrupts' property in %s node\n", sprop); - of_node_put(np); + pr_err("ehv-bc: no 'interrupts' property in %s node\n", np->full_name); return 0; } @@ -167,12 +135,9 @@ static int find_console_handle(void) if (!iprop) { pr_err("ehv-bc: no 'hv-handle' property in %s node\n", np->name); - of_node_put(np); return 0; } stdout_bc = be32_to_cpu(*iprop); - - of_node_put(np); return 1; } diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index a585079..a2cc5f8 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -342,22 +342,13 @@ static void udbg_init_opal_common(void) void __init hvc_opal_init_early(void) { - struct device_node *stdout_node = NULL; + struct device_node *stdout_node = of_node_get(of_stdout); const __be32 *termno; - const char *name = NULL; const struct hv_ops *ops; u32 index; - /* find the boot console from /chosen/stdout */ - if (of_chosen) - name = of_get_property(of_chosen, "linux,stdout-path", NULL); - if (name) { - stdout_node = of_find_node_by_path(name); - if (!stdout_node) { - pr_err("hvc_opal: Failed to locate default console!\n"); - return; - } - } else { + /* If the console wasn't in /chosen, try /ibm,opal */ + if (!stdout_node) { struct device_node *opal, *np; /* Current OPAL takeover doesn't provide the stdout diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c index b594abf..5618b5f 100644 --- a/drivers/tty/hvc/hvc_vio.c +++ b/drivers/tty/hvc/hvc_vio.c @@ -404,42 +404,35 @@ module_exit(hvc_vio_exit); void __init hvc_vio_init_early(void) { - struct device_node *stdout_node; const __be32 *termno; const char *name; const struct hv_ops *ops; /* find the boot console from /chosen/stdout */ - if (!of_chosen) + if (!of_stdout) return; - name = of_get_property(of_chosen, "linux,stdout-path", NULL); - if (name == NULL) - return; - stdout_node = of_find_node_by_path(name); - if (!stdout_node) - return; - name = of_get_property(stdout_node, "name", NULL); + name = of_get_property(of_stdout, "name", NULL); if (!name) { printk(KERN_WARNING "stdout node missing 'name' property!\n"); - goto out; + return; } /* Check if it's a virtual terminal */ if (strncmp(name, "vty", 3) != 0) - goto out; - termno = of_get_property(stdout_node, "reg", NULL); + return; + termno = of_get_property(of_stdout, "reg", NULL); if (termno == NULL) - goto out; + return; hvterm_priv0.termno = of_read_number(termno, 1); spin_lock_init(&hvterm_priv0.buf_lock); hvterm_privs[0] = &hvterm_priv0; /* Check the protocol */ - if (of_device_is_compatible(stdout_node, "hvterm1")) { + if (of_device_is_compatible(of_stdout, "hvterm1")) { hvterm_priv0.proto = HV_PROTOCOL_RAW; ops = &hvterm_raw_ops; } - else if (of_device_is_compatible(stdout_node, "hvterm-protocol")) { + else if (of_device_is_compatible(of_stdout, "hvterm-protocol")) { hvterm_priv0.proto = HV_PROTOCOL_HVSI; ops = &hvterm_hvsi_ops; hvsilib_init(&hvterm_priv0.hvsi, hvc_get_chars, hvc_put_chars, @@ -447,7 +440,7 @@ void __init hvc_vio_init_early(void) /* HVSI, perform the handshake now */ hvsilib_establish(&hvterm_priv0.hvsi); } else - goto out; + return; udbg_putc = udbg_hvc_putc; udbg_getc = udbg_hvc_getc; udbg_getc_poll = udbg_hvc_getc_poll; @@ -456,14 +449,12 @@ void __init hvc_vio_init_early(void) * backend for HVSI, only do udbg */ if (hvterm_priv0.proto == HV_PROTOCOL_HVSI) - goto out; + return; #endif /* Check whether the user has requested a different console. */ if (!strstr(cmd_line, "console=")) add_preferred_console("hvc", 0, NULL); hvc_instantiate(0, 0, ops); -out: - of_node_put(stdout_node); } /* call this from early_init() for a working debug console on diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c index f7ad5b9..abbfedb 100644 --- a/drivers/tty/serial/pmac_zilog.c +++ b/drivers/tty/serial/pmac_zilog.c @@ -1653,8 +1653,7 @@ static int __init pmz_probe(void) /* * Find all escc chips in the system */ - node_p = of_find_node_by_name(NULL, "escc"); - while (node_p) { + for_each_node_by_name(node_p, "escc") { /* * First get channel A/B node pointers * @@ -1672,7 +1671,7 @@ static int __init pmz_probe(void) of_node_put(node_b); printk(KERN_ERR "pmac_zilog: missing node %c for escc %s\n", (!node_a) ? 'a' : 'b', node_p->full_name); - goto next; + continue; } /* @@ -1699,11 +1698,9 @@ static int __init pmz_probe(void) of_node_put(node_b); memset(&pmz_ports[count], 0, sizeof(struct uart_pmac_port)); memset(&pmz_ports[count+1], 0, sizeof(struct uart_pmac_port)); - goto next; + continue; } count += 2; -next: - node_p = of_find_node_by_name(node_p, "escc"); } pmz_ports_count = count; diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 8bb19da..29a7be4 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/console.h> +#include <linux/of.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/device.h> @@ -2611,6 +2612,8 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport) spin_lock_init(&uport->lock); lockdep_set_class(&uport->lock, &port_lock_key); } + if (uport->cons && uport->dev) + of_console_check(uport->dev->of_node, uport->cons->name, uport->line); uart_configure_port(drv, state, uport); diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index af7b204..d8c5763 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE depends on VFIO && SPAPR_TCE_IOMMU default n +config VFIO_SPAPR_EEH + tristate + depends on EEH && VFIO_IOMMU_SPAPR_TCE + default n + menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API select VFIO_IOMMU_TYPE1 if X86 select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) + select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES) select ANON_INODES help VFIO provides a framework for secure userspace device drivers. diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 50e30bc..0b035b1 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_VFIO) += vfio.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o -obj-$(CONFIG_EEH) += vfio_spapr_eeh.o +obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e2ee80f..f782533 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -37,6 +37,10 @@ module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nointxmask, "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag."); +static DEFINE_MUTEX(driver_lock); + +static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); + static int vfio_pci_enable(struct vfio_pci_device *vdev) { struct pci_dev *pdev = vdev->pdev; @@ -44,6 +48,9 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) u16 cmd; u8 msix_pos; + /* Don't allow our initial saved state to include busmaster */ + pci_clear_master(pdev); + ret = pci_enable_device(pdev); if (ret) return ret; @@ -99,7 +106,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) struct pci_dev *pdev = vdev->pdev; int bar; - pci_disable_device(pdev); + /* Stop the device from further DMA */ + pci_clear_master(pdev); vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, @@ -117,6 +125,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) vdev->barmap[bar] = NULL; } + vdev->needs_reset = true; + /* * If we have saved state, restore it. If we can reset the device, * even better. Resetting with current state seems better than @@ -128,7 +138,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) __func__, dev_name(&pdev->dev)); if (!vdev->reset_works) - return; + goto out; pci_save_state(pdev); } @@ -148,46 +158,55 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) if (ret) pr_warn("%s: Failed to reset device %s (%d)\n", __func__, dev_name(&pdev->dev), ret); + else + vdev->needs_reset = false; } pci_restore_state(pdev); +out: + pci_disable_device(pdev); + + vfio_pci_try_bus_reset(vdev); } static void vfio_pci_release(void *device_data) { struct vfio_pci_device *vdev = device_data; - if (atomic_dec_and_test(&vdev->refcnt)) { + mutex_lock(&driver_lock); + + if (!(--vdev->refcnt)) { vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_disable(vdev); } + mutex_unlock(&driver_lock); + module_put(THIS_MODULE); } static int vfio_pci_open(void *device_data) { struct vfio_pci_device *vdev = device_data; - int ret; + int ret = 0; if (!try_module_get(THIS_MODULE)) return -ENODEV; - if (atomic_inc_return(&vdev->refcnt) == 1) { + mutex_lock(&driver_lock); + + if (!vdev->refcnt) { ret = vfio_pci_enable(vdev); if (ret) goto error; - ret = vfio_spapr_pci_eeh_open(vdev->pdev); - if (ret) { - vfio_pci_disable(vdev); - goto error; - } + vfio_spapr_pci_eeh_open(vdev->pdev); } - - return 0; + vdev->refcnt++; error: - module_put(THIS_MODULE); + mutex_unlock(&driver_lock); + if (ret) + module_put(THIS_MODULE); return ret; } @@ -843,7 +862,6 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) vdev->irq_type = VFIO_PCI_NUM_IRQS; mutex_init(&vdev->igate); spin_lock_init(&vdev->irqlock); - atomic_set(&vdev->refcnt, 0); ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); if (ret) { @@ -858,12 +876,15 @@ static void vfio_pci_remove(struct pci_dev *pdev) { struct vfio_pci_device *vdev; + mutex_lock(&driver_lock); + vdev = vfio_del_group_dev(&pdev->dev); - if (!vdev) - return; + if (vdev) { + iommu_group_put(pdev->dev.iommu_group); + kfree(vdev); + } - iommu_group_put(pdev->dev.iommu_group); - kfree(vdev); + mutex_unlock(&driver_lock); } static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, @@ -906,6 +927,110 @@ static struct pci_driver vfio_pci_driver = { .err_handler = &vfio_err_handlers, }; +/* + * Test whether a reset is necessary and possible. We mark devices as + * needs_reset when they are released, but don't have a function-local reset + * available. If any of these exist in the affected devices, we want to do + * a bus/slot reset. We also need all of the affected devices to be unused, + * so we abort if any device has a non-zero refcnt. driver_lock prevents a + * device from being opened during the scan or unbound from vfio-pci. + */ +static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data) +{ + bool *needs_reset = data; + struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); + int ret = -EBUSY; + + if (pci_drv == &vfio_pci_driver) { + struct vfio_device *device; + struct vfio_pci_device *vdev; + + device = vfio_device_get_from_dev(&pdev->dev); + if (!device) + return ret; + + vdev = vfio_device_data(device); + if (vdev) { + if (vdev->needs_reset) + *needs_reset = true; + + if (!vdev->refcnt) + ret = 0; + } + + vfio_device_put(device); + } + + /* + * TODO: vfio-core considers groups to be viable even if some devices + * are attached to known drivers, like pci-stub or pcieport. We can't + * freeze devices from being unbound to those drivers like we can + * here though, so it would be racy to test for them. We also can't + * use device_lock() to prevent changes as that would interfere with + * PCI-core taking device_lock during bus reset. For now, we require + * devices to be bound to vfio-pci to get a bus/slot reset on release. + */ + + return ret; +} + +/* Clear needs_reset on all affected devices after successful bus/slot reset */ +static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data) +{ + struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); + + if (pci_drv == &vfio_pci_driver) { + struct vfio_device *device; + struct vfio_pci_device *vdev; + + device = vfio_device_get_from_dev(&pdev->dev); + if (!device) + return 0; + + vdev = vfio_device_data(device); + if (vdev) + vdev->needs_reset = false; + + vfio_device_put(device); + } + + return 0; +} + +/* + * Attempt to do a bus/slot reset if there are devices affected by a reset for + * this device that are needs_reset and all of the affected devices are unused + * (!refcnt). Callers of this function are required to hold driver_lock such + * that devices can not be unbound from vfio-pci or opened by a user while we + * test for and perform a bus/slot reset. + */ +static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) +{ + bool needs_reset = false, slot = false; + int ret; + + if (!pci_probe_reset_slot(vdev->pdev->slot)) + slot = true; + else if (pci_probe_reset_bus(vdev->pdev->bus)) + return; + + if (vfio_pci_for_each_slot_or_bus(vdev->pdev, + vfio_pci_test_bus_reset, + &needs_reset, slot) || !needs_reset) + return; + + if (slot) + ret = pci_try_reset_slot(vdev->pdev->slot); + else + ret = pci_try_reset_bus(vdev->pdev->bus); + + if (ret) + return; + + vfio_pci_for_each_slot_or_bus(vdev->pdev, + vfio_pci_clear_needs_reset, NULL, slot); +} + static void __exit vfio_pci_cleanup(void) { pci_unregister_driver(&vfio_pci_driver); diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 9c6d5d0..671c17a 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -54,8 +54,9 @@ struct vfio_pci_device { bool extended_caps; bool bardirty; bool has_vga; + bool needs_reset; struct pci_saved_state *pci_saved_state; - atomic_t refcnt; + int refcnt; struct eventfd_ctx *err_trigger; }; diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index f834b4c..86dfceb 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -9,20 +9,27 @@ * published by the Free Software Foundation. */ +#include <linux/module.h> #include <linux/uaccess.h> #include <linux/vfio.h> #include <asm/eeh.h> +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" +#define DRIVER_DESC "VFIO IOMMU SPAPR EEH" + /* We might build address mapping here for "fast" path later */ -int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) +void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) { - return eeh_dev_open(pdev); + eeh_dev_open(pdev); } +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open); void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) { eeh_dev_release(pdev); } +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release); long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd, unsigned long arg) @@ -85,3 +92,9 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, return ret; } +EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); |