diff options
-rw-r--r-- | drivers/misc/mic/Kconfig | 1 | ||||
-rw-r--r-- | drivers/misc/mic/scif/Makefile | 5 | ||||
-rw-r--r-- | drivers/misc/mic/scif/scif_api.c | 33 | ||||
-rw-r--r-- | drivers/misc/mic/scif/scif_debugfs.c | 85 | ||||
-rw-r--r-- | drivers/misc/mic/scif/scif_epd.c | 26 | ||||
-rw-r--r-- | drivers/misc/mic/scif/scif_epd.h | 28 | ||||
-rw-r--r-- | drivers/misc/mic/scif/scif_fd.c | 169 | ||||
-rw-r--r-- | drivers/misc/mic/scif/scif_main.c | 23 | ||||
-rw-r--r-- | drivers/misc/mic/scif/scif_main.h | 30 | ||||
-rw-r--r-- | drivers/misc/mic/scif/scif_map.h | 25 | ||||
-rw-r--r-- | drivers/misc/mic/scif/scif_nm.c | 10 | ||||
-rw-r--r-- | drivers/misc/mic/scif/scif_nodeqp.c | 65 | ||||
-rw-r--r-- | drivers/misc/mic/scif/scif_nodeqp.h | 42 | ||||
-rw-r--r-- | drivers/misc/mic/scif/scif_peer_bus.c | 14 |
14 files changed, 516 insertions, 40 deletions
diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index 1488232..60376fb 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -75,6 +75,7 @@ comment "SCIF Driver" config SCIF tristate "SCIF Driver" depends on 64BIT && PCI && X86 && SCIF_BUS + select IOMMU_IOVA help This enables SCIF Driver support for the Intel Many Integrated Core (MIC) family of PCIe form factor coprocessor devices that diff --git a/drivers/misc/mic/scif/Makefile b/drivers/misc/mic/scif/Makefile index bf10bb7..29cfc3e 100644 --- a/drivers/misc/mic/scif/Makefile +++ b/drivers/misc/mic/scif/Makefile @@ -13,3 +13,8 @@ scif-objs += scif_epd.o scif-objs += scif_rb.o scif-objs += scif_nodeqp.o scif-objs += scif_nm.o +scif-objs += scif_dma.o +scif-objs += scif_fence.o +scif-objs += scif_mmap.o +scif-objs += scif_rma.o +scif-objs += scif_rma_list.o diff --git a/drivers/misc/mic/scif/scif_api.c b/drivers/misc/mic/scif/scif_api.c index b47d56d..ddc9e4b 100644 --- a/drivers/misc/mic/scif/scif_api.c +++ b/drivers/misc/mic/scif/scif_api.c @@ -70,6 +70,7 @@ scif_epd_t scif_open(void) mutex_init(&ep->sendlock); mutex_init(&ep->recvlock); + scif_rma_ep_init(ep); ep->state = SCIFEP_UNBOUND; dev_dbg(scif_info.mdev.this_device, "SCIFAPI open: ep %p success\n", ep); @@ -184,8 +185,11 @@ int scif_close(scif_epd_t epd) switch (oldstate) { case SCIFEP_ZOMBIE: + dev_err(scif_info.mdev.this_device, + "SCIFAPI close: zombie state unexpected\n"); case SCIFEP_DISCONNECTED: spin_unlock(&ep->lock); + scif_unregister_all_windows(epd); /* Remove from the disconnected list */ mutex_lock(&scif_info.connlock); list_for_each_safe(pos, tmpq, &scif_info.disconnected) { @@ -207,6 +211,7 @@ int scif_close(scif_epd_t epd) case SCIFEP_CLOSING: { spin_unlock(&ep->lock); + scif_unregister_all_windows(epd); scif_disconnect_ep(ep); break; } @@ -218,7 +223,7 @@ int scif_close(scif_epd_t epd) struct scif_endpt *aep; spin_unlock(&ep->lock); - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); /* remove from listen list */ list_for_each_safe(pos, tmpq, &scif_info.listen) { @@ -240,7 +245,7 @@ int scif_close(scif_epd_t epd) break; } } - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); mutex_lock(&scif_info.connlock); list_for_each_safe(pos, tmpq, &scif_info.connected) { tmpep = list_entry(pos, @@ -260,13 +265,13 @@ int scif_close(scif_epd_t epd) } mutex_unlock(&scif_info.connlock); scif_teardown_ep(aep); - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD); ep->acceptcnt--; } spin_lock(&ep->lock); - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); /* Remove and reject any pending connection requests. */ while (ep->conreqcnt) { @@ -428,9 +433,9 @@ int scif_listen(scif_epd_t epd, int backlog) scif_teardown_ep(ep); ep->qp_info.qp = NULL; - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); list_add_tail(&ep->list, &scif_info.listen); - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); return 0; } EXPORT_SYMBOL_GPL(scif_listen); @@ -469,6 +474,13 @@ static int scif_conn_func(struct scif_endpt *ep) struct scifmsg msg; struct device *spdev; + err = scif_reserve_dma_chan(ep); + if (err) { + dev_err(&ep->remote_dev->sdev->dev, + "%s %d err %d\n", __func__, __LINE__, err); + ep->state = SCIFEP_BOUND; + goto connect_error_simple; + } /* Initiate the first part of the endpoint QP setup */ err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset, SCIF_ENDPT_QP_SIZE, ep->remote_dev); @@ -804,6 +816,15 @@ retry_connection: cep->remote_dev = &scif_dev[peer->node]; cep->remote_ep = conreq->msg.payload[0]; + scif_rma_ep_init(cep); + + err = scif_reserve_dma_chan(cep); + if (err) { + dev_err(scif_info.mdev.this_device, + "%s %d err %d\n", __func__, __LINE__, err); + goto scif_accept_error_qpalloc; + } + cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL); if (!cep->qp_info.qp) { err = -ENOMEM; diff --git a/drivers/misc/mic/scif/scif_debugfs.c b/drivers/misc/mic/scif/scif_debugfs.c index 51f14e2..6884dad 100644 --- a/drivers/misc/mic/scif/scif_debugfs.c +++ b/drivers/misc/mic/scif/scif_debugfs.c @@ -62,10 +62,87 @@ static const struct file_operations scif_dev_ops = { .release = scif_dev_test_release }; -void __init scif_init_debugfs(void) +static void scif_display_window(struct scif_window *window, struct seq_file *s) +{ + int j; + struct scatterlist *sg; + scif_pinned_pages_t pin = window->pinned_pages; + + seq_printf(s, "window %p type %d temp %d offset 0x%llx ", + window, window->type, window->temp, window->offset); + seq_printf(s, "nr_pages 0x%llx nr_contig_chunks 0x%x prot %d ", + window->nr_pages, window->nr_contig_chunks, window->prot); + seq_printf(s, "ref_count %d magic 0x%llx peer_window 0x%llx ", + window->ref_count, window->magic, window->peer_window); + seq_printf(s, "unreg_state 0x%x va_for_temp 0x%lx\n", + window->unreg_state, window->va_for_temp); + + for (j = 0; j < window->nr_contig_chunks; j++) + seq_printf(s, "page[%d] dma_addr 0x%llx num_pages 0x%llx\n", j, + window->dma_addr[j], window->num_pages[j]); + + if (window->type == SCIF_WINDOW_SELF && pin) + for (j = 0; j < window->nr_pages; j++) + seq_printf(s, "page[%d] = pinned_pages %p address %p\n", + j, pin->pages[j], + page_address(pin->pages[j])); + + if (window->st) + for_each_sg(window->st->sgl, sg, window->st->nents, j) + seq_printf(s, "sg[%d] dma addr 0x%llx length 0x%x\n", + j, sg_dma_address(sg), sg_dma_len(sg)); +} + +static void scif_display_all_windows(struct list_head *head, struct seq_file *s) { - struct dentry *d; + struct list_head *item; + struct scif_window *window; + list_for_each(item, head) { + window = list_entry(item, struct scif_window, list); + scif_display_window(window, s); + } +} + +static int scif_rma_test(struct seq_file *s, void *unused) +{ + struct scif_endpt *ep; + struct list_head *pos; + + mutex_lock(&scif_info.connlock); + list_for_each(pos, &scif_info.connected) { + ep = list_entry(pos, struct scif_endpt, list); + seq_printf(s, "ep %p self windows\n", ep); + mutex_lock(&ep->rma_info.rma_lock); + scif_display_all_windows(&ep->rma_info.reg_list, s); + seq_printf(s, "ep %p remote windows\n", ep); + scif_display_all_windows(&ep->rma_info.remote_reg_list, s); + mutex_unlock(&ep->rma_info.rma_lock); + } + mutex_unlock(&scif_info.connlock); + return 0; +} + +static int scif_rma_test_open(struct inode *inode, struct file *file) +{ + return single_open(file, scif_rma_test, inode->i_private); +} + +static int scif_rma_test_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations scif_rma_ops = { + .owner = THIS_MODULE, + .open = scif_rma_test_open, + .read = seq_read, + .llseek = seq_lseek, + .release = scif_rma_test_release +}; + +void __init scif_init_debugfs(void) +{ scif_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL); if (!scif_dbg) { dev_err(scif_info.mdev.this_device, @@ -73,8 +150,8 @@ void __init scif_init_debugfs(void) return; } - d = debugfs_create_file("scif_dev", 0444, scif_dbg, - NULL, &scif_dev_ops); + debugfs_create_file("scif_dev", 0444, scif_dbg, NULL, &scif_dev_ops); + debugfs_create_file("scif_rma", 0444, scif_dbg, NULL, &scif_rma_ops); debugfs_create_u8("en_msg_log", 0666, scif_dbg, &scif_info.en_msg_log); debugfs_create_u8("p2p_enable", 0666, scif_dbg, &scif_info.p2p_enable); } diff --git a/drivers/misc/mic/scif/scif_epd.c b/drivers/misc/mic/scif/scif_epd.c index b4bfbb0..00e5d6d 100644 --- a/drivers/misc/mic/scif/scif_epd.c +++ b/drivers/misc/mic/scif/scif_epd.c @@ -65,14 +65,14 @@ void scif_teardown_ep(void *endpt) void scif_add_epd_to_zombie_list(struct scif_endpt *ep, bool eplock_held) { if (!eplock_held) - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); spin_lock(&ep->lock); ep->state = SCIFEP_ZOMBIE; spin_unlock(&ep->lock); list_add_tail(&ep->list, &scif_info.zombie); scif_info.nr_zombies++; if (!eplock_held) - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); schedule_work(&scif_info.misc_work); } @@ -81,16 +81,15 @@ static struct scif_endpt *scif_find_listen_ep(u16 port) struct scif_endpt *ep = NULL; struct list_head *pos, *tmpq; - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); list_for_each_safe(pos, tmpq, &scif_info.listen) { ep = list_entry(pos, struct scif_endpt, list); if (ep->port.port == port) { - spin_lock(&ep->lock); - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); return ep; } } - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); return NULL; } @@ -99,14 +98,17 @@ void scif_cleanup_zombie_epd(void) struct list_head *pos, *tmpq; struct scif_endpt *ep; - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); list_for_each_safe(pos, tmpq, &scif_info.zombie) { ep = list_entry(pos, struct scif_endpt, list); - list_del(pos); - scif_info.nr_zombies--; - kfree(ep); + if (scif_rma_ep_can_uninit(ep)) { + list_del(pos); + scif_info.nr_zombies--; + put_iova_domain(&ep->rma_info.iovad); + kfree(ep); + } } - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); } /** @@ -137,6 +139,8 @@ void scif_cnctreq(struct scif_dev *scifdev, struct scifmsg *msg) if (!ep) /* Send reject due to no listening ports */ goto conreq_sendrej_free; + else + spin_lock(&ep->lock); if (ep->backlog <= ep->conreqcnt) { /* Send reject due to too many pending requests */ diff --git a/drivers/misc/mic/scif/scif_epd.h b/drivers/misc/mic/scif/scif_epd.h index 4dc4ccd..1771d7a 100644 --- a/drivers/misc/mic/scif/scif_epd.h +++ b/drivers/misc/mic/scif/scif_epd.h @@ -98,6 +98,8 @@ struct scif_endpt_qp_info { * @conn_async_state: Async connection * @conn_pend_wq: Used by poll while waiting for incoming connections * @conn_list: List of async connection requests + * @rma_info: Information for triggering SCIF RMA and DMA operations + * @mmu_list: link to list of MMU notifier cleanup work * @anon: anonymous file for use in kernel mode scif poll */ struct scif_endpt { @@ -129,6 +131,8 @@ struct scif_endpt { int conn_async_state; wait_queue_head_t conn_pend_wq; struct list_head conn_list; + struct scif_endpt_rma_info rma_info; + struct list_head mmu_list; struct file *anon; }; @@ -137,6 +141,27 @@ static inline int scifdev_alive(struct scif_endpt *ep) return _scifdev_alive(ep->remote_dev); } +/* + * scif_verify_epd: + * ep: SCIF endpoint + * + * Checks several generic error conditions and returns the + * appropriate error. + */ +static inline int scif_verify_epd(struct scif_endpt *ep) +{ + if (ep->state == SCIFEP_DISCONNECTED) + return -ECONNRESET; + + if (ep->state != SCIFEP_CONNECTED) + return -ENOTCONN; + + if (!scifdev_alive(ep)) + return -ENODEV; + + return 0; +} + static inline int scif_anon_inode_getfile(scif_epd_t epd) { epd->anon = anon_inode_getfile("scif", &scif_anon_fops, NULL, 0); @@ -177,6 +202,9 @@ void scif_clientsend(struct scif_dev *scifdev, struct scifmsg *msg); void scif_clientrcvd(struct scif_dev *scifdev, struct scifmsg *msg); int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block); int __scif_flush(scif_epd_t epd); +int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd); unsigned int __scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep); +int __scif_pin_pages(void *addr, size_t len, int *out_prot, + int map_flags, scif_pinned_pages_t *pages); #endif /* SCIF_EPD_H */ diff --git a/drivers/misc/mic/scif/scif_fd.c b/drivers/misc/mic/scif/scif_fd.c index 24e47f7..f7e8261 100644 --- a/drivers/misc/mic/scif/scif_fd.c +++ b/drivers/misc/mic/scif/scif_fd.c @@ -34,6 +34,13 @@ static int scif_fdclose(struct inode *inode, struct file *f) return scif_close(priv); } +static int scif_fdmmap(struct file *f, struct vm_area_struct *vma) +{ + struct scif_endpt *priv = f->private_data; + + return scif_mmap(vma, priv); +} + static unsigned int scif_fdpoll(struct file *f, poll_table *wait) { struct scif_endpt *priv = f->private_data; @@ -147,12 +154,12 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg) * Add to the list of user mode eps where the second half * of the accept is not yet completed. */ - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); list_add_tail(&((*ep)->miacceptlist), &scif_info.uaccept); list_add_tail(&((*ep)->liacceptlist), &priv->li_accept); (*ep)->listenep = priv; priv->acceptcnt++; - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); return 0; } @@ -170,7 +177,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EFAULT; /* Remove form the user accept queue */ - spin_lock(&scif_info.eplock); + mutex_lock(&scif_info.eplock); list_for_each_safe(pos, tmpq, &scif_info.uaccept) { tmpep = list_entry(pos, struct scif_endpt, miacceptlist); @@ -182,7 +189,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg) } if (!fep) { - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); return -ENOENT; } @@ -197,7 +204,7 @@ static long scif_fdioctl(struct file *f, unsigned int cmd, unsigned long arg) } } - spin_unlock(&scif_info.eplock); + mutex_unlock(&scif_info.eplock); /* Free the resources automatically created from the open. */ scif_anon_inode_fput(priv); @@ -298,6 +305,157 @@ getnodes_err1: getnodes_err2: return err; } + case SCIF_REG: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_reg reg; + off_t ret; + + if (copy_from_user(®, argp, sizeof(reg))) { + err = -EFAULT; + goto reg_err; + } + if (reg.flags & SCIF_MAP_KERNEL) { + err = -EINVAL; + goto reg_err; + } + ret = scif_register(priv, (void *)reg.addr, reg.len, + reg.offset, reg.prot, reg.flags); + if (ret < 0) { + err = (int)ret; + goto reg_err; + } + + if (copy_to_user(&((struct scifioctl_reg __user *)argp) + ->out_offset, &ret, sizeof(reg.out_offset))) { + err = -EFAULT; + goto reg_err; + } + err = 0; +reg_err: + scif_err_debug(err, "scif_register"); + return err; + } + case SCIF_UNREG: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_unreg unreg; + + if (copy_from_user(&unreg, argp, sizeof(unreg))) { + err = -EFAULT; + goto unreg_err; + } + err = scif_unregister(priv, unreg.offset, unreg.len); +unreg_err: + scif_err_debug(err, "scif_unregister"); + return err; + } + case SCIF_READFROM: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto readfrom_err; + } + err = scif_readfrom(priv, copy.loffset, copy.len, copy.roffset, + copy.flags); +readfrom_err: + scif_err_debug(err, "scif_readfrom"); + return err; + } + case SCIF_WRITETO: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto writeto_err; + } + err = scif_writeto(priv, copy.loffset, copy.len, copy.roffset, + copy.flags); +writeto_err: + scif_err_debug(err, "scif_writeto"); + return err; + } + case SCIF_VREADFROM: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto vreadfrom_err; + } + err = scif_vreadfrom(priv, (void __force *)copy.addr, copy.len, + copy.roffset, copy.flags); +vreadfrom_err: + scif_err_debug(err, "scif_vreadfrom"); + return err; + } + case SCIF_VWRITETO: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_copy copy; + + if (copy_from_user(©, argp, sizeof(copy))) { + err = -EFAULT; + goto vwriteto_err; + } + err = scif_vwriteto(priv, (void __force *)copy.addr, copy.len, + copy.roffset, copy.flags); +vwriteto_err: + scif_err_debug(err, "scif_vwriteto"); + return err; + } + case SCIF_FENCE_MARK: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_fence_mark mark; + int tmp_mark = 0; + + if (copy_from_user(&mark, argp, sizeof(mark))) { + err = -EFAULT; + goto fence_mark_err; + } + err = scif_fence_mark(priv, mark.flags, &tmp_mark); + if (err) + goto fence_mark_err; + if (copy_to_user((void __user *)mark.mark, &tmp_mark, + sizeof(tmp_mark))) { + err = -EFAULT; + goto fence_mark_err; + } +fence_mark_err: + scif_err_debug(err, "scif_fence_mark"); + return err; + } + case SCIF_FENCE_WAIT: + { + struct scif_endpt *priv = f->private_data; + + err = scif_fence_wait(priv, arg); + scif_err_debug(err, "scif_fence_wait"); + return err; + } + case SCIF_FENCE_SIGNAL: + { + struct scif_endpt *priv = f->private_data; + struct scifioctl_fence_signal signal; + + if (copy_from_user(&signal, argp, sizeof(signal))) { + err = -EFAULT; + goto fence_signal_err; + } + + err = scif_fence_signal(priv, signal.loff, signal.lval, + signal.roff, signal.rval, signal.flags); +fence_signal_err: + scif_err_debug(err, "scif_fence_signal"); + return err; + } } return -EINVAL; } @@ -306,6 +464,7 @@ const struct file_operations scif_fops = { .open = scif_fdopen, .release = scif_fdclose, .unlocked_ioctl = scif_fdioctl, + .mmap = scif_fdmmap, .poll = scif_fdpoll, .flush = scif_fdflush, .owner = THIS_MODULE, diff --git a/drivers/misc/mic/scif/scif_main.c b/drivers/misc/mic/scif/scif_main.c index f90bd06..36d847a 100644 --- a/drivers/misc/mic/scif/scif_main.c +++ b/drivers/misc/mic/scif/scif_main.c @@ -34,6 +34,7 @@ struct scif_info scif_info = { }; struct scif_dev *scif_dev; +struct kmem_cache *unaligned_cache; static atomic_t g_loopb_cnt; /* Runs in the context of intr_wq */ @@ -263,27 +264,44 @@ static int _scif_init(void) { int rc; - spin_lock_init(&scif_info.eplock); + mutex_init(&scif_info.eplock); + spin_lock_init(&scif_info.rmalock); spin_lock_init(&scif_info.nb_connect_lock); spin_lock_init(&scif_info.port_lock); mutex_init(&scif_info.conflock); mutex_init(&scif_info.connlock); + mutex_init(&scif_info.fencelock); INIT_LIST_HEAD(&scif_info.uaccept); INIT_LIST_HEAD(&scif_info.listen); INIT_LIST_HEAD(&scif_info.zombie); INIT_LIST_HEAD(&scif_info.connected); INIT_LIST_HEAD(&scif_info.disconnected); + INIT_LIST_HEAD(&scif_info.rma); + INIT_LIST_HEAD(&scif_info.rma_tc); + INIT_LIST_HEAD(&scif_info.mmu_notif_cleanup); + INIT_LIST_HEAD(&scif_info.fence); INIT_LIST_HEAD(&scif_info.nb_connect_list); init_waitqueue_head(&scif_info.exitwq); + scif_info.rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT; scif_info.en_msg_log = 0; scif_info.p2p_enable = 1; rc = scif_setup_scifdev(); if (rc) goto error; + unaligned_cache = kmem_cache_create("Unaligned_DMA", + SCIF_KMEM_UNALIGNED_BUF_SIZE, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!unaligned_cache) { + rc = -ENOMEM; + goto free_sdev; + } INIT_WORK(&scif_info.misc_work, scif_misc_handler); + INIT_WORK(&scif_info.mmu_notif_work, scif_mmu_notif_handler); INIT_WORK(&scif_info.conn_work, scif_conn_handler); idr_init(&scif_ports); return 0; +free_sdev: + scif_destroy_scifdev(); error: return rc; } @@ -291,6 +309,7 @@ error: static void _scif_exit(void) { idr_destroy(&scif_ports); + kmem_cache_destroy(unaligned_cache); scif_destroy_scifdev(); } @@ -300,6 +319,7 @@ static int __init scif_init(void) int rc; _scif_init(); + iova_cache_get(); rc = scif_peer_bus_init(); if (rc) goto exit; @@ -326,6 +346,7 @@ static void __exit scif_exit(void) misc_deregister(&scif_info.mdev); scif_unregister_driver(&scif_driver); scif_peer_bus_exit(); + iova_cache_put(); _scif_exit(); } diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h index b0795b0..a08f0b6 100644 --- a/drivers/misc/mic/scif/scif_main.h +++ b/drivers/misc/mic/scif/scif_main.h @@ -22,16 +22,18 @@ #include <linux/pci.h> #include <linux/miscdevice.h> #include <linux/dmaengine.h> +#include <linux/iova.h> #include <linux/anon_inodes.h> #include <linux/file.h> +#include <linux/vmalloc.h> #include <linux/scif.h> - #include "../common/mic_dev.h" #define SCIF_MGMT_NODE 0 #define SCIF_DEFAULT_WATCHDOG_TO 30 #define SCIF_NODE_ACCEPT_TIMEOUT (3 * HZ) #define SCIF_NODE_ALIVE_TIMEOUT (SCIF_DEFAULT_WATCHDOG_TO * HZ) +#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000 /* * Generic state used for certain node QP message exchanges @@ -74,13 +76,21 @@ enum scif_msg_state { * @loopb_work: Used for submitting work to loopb_wq * @loopb_recv_q: List of messages received on the loopb_wq * @card_initiated_exit: set when the card has initiated the exit + * @rmalock: Synchronize access to RMA operations + * @fencelock: Synchronize access to list of remote fences requested. + * @rma: List of temporary registered windows to be destroyed. + * @rma_tc: List of temporary registered & cached Windows to be destroyed + * @fence: List of remote fence requests + * @mmu_notif_work: Work for registration caching MMU notifier workqueue + * @mmu_notif_cleanup: List of temporary cached windows for reg cache + * @rma_tc_limit: RMA temporary cache limit */ struct scif_info { u8 nodeid; u8 maxid; u8 total; u32 nr_zombies; - spinlock_t eplock; + struct mutex eplock; struct mutex connlock; spinlock_t nb_connect_lock; spinlock_t port_lock; @@ -103,6 +113,14 @@ struct scif_info { struct work_struct loopb_work; struct list_head loopb_recv_q; bool card_initiated_exit; + spinlock_t rmalock; + struct mutex fencelock; + struct list_head rma; + struct list_head rma_tc; + struct list_head fence; + struct work_struct mmu_notif_work; + struct list_head mmu_notif_cleanup; + unsigned long rma_tc_limit; }; /* @@ -153,6 +171,8 @@ struct scif_p2p_info { * @disconn_rescnt: Keeps track of number of node remove requests sent * @exit: Status of exit message * @qp_dma_addr: Queue pair DMA address passed to the peer + * @dma_ch_idx: Round robin index for DMA channels + * @signal_pool: DMA pool used for scheduling scif_fence_signal DMA's */ struct scif_dev { u8 node; @@ -179,8 +199,12 @@ struct scif_dev { atomic_t disconn_rescnt; enum scif_msg_state exit; dma_addr_t qp_dma_addr; + int dma_ch_idx; + struct dma_pool *signal_pool; }; +extern bool scif_reg_cache_enable; +extern bool scif_ulimit_check; extern struct scif_info scif_info; extern struct idr scif_ports; extern struct bus_type scif_peer_bus; @@ -192,6 +216,8 @@ extern const struct file_operations scif_anon_fops; #define SCIF_NODE_QP_SIZE 0x10000 #include "scif_nodeqp.h" +#include "scif_rma.h" +#include "scif_rma_list.h" /* * scifdev_self: diff --git a/drivers/misc/mic/scif/scif_map.h b/drivers/misc/mic/scif/scif_map.h index 20e50b4..3e86360 100644 --- a/drivers/misc/mic/scif/scif_map.h +++ b/drivers/misc/mic/scif/scif_map.h @@ -80,7 +80,7 @@ scif_unmap_single(dma_addr_t local, struct scif_dev *scifdev, size_t size) { if (!scifdev_self(scifdev)) { - if (scifdev_is_p2p(scifdev) && local > scifdev->base_addr) + if (scifdev_is_p2p(scifdev)) local = local - scifdev->base_addr; dma_unmap_single(&scifdev->sdev->dev, local, size, DMA_BIDIRECTIONAL); @@ -110,4 +110,27 @@ scif_iounmap(void *virt, size_t len, struct scif_dev *scifdev) sdev->hw_ops->iounmap(sdev, (void __force __iomem *)virt); } } + +static __always_inline int +scif_map_page(dma_addr_t *dma_handle, struct page *page, + struct scif_dev *scifdev) +{ + int err = 0; + + if (scifdev_self(scifdev)) { + *dma_handle = page_to_phys(page); + } else { + struct scif_hw_dev *sdev = scifdev->sdev; + *dma_handle = dma_map_page(&sdev->dev, + page, 0x0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&sdev->dev, *dma_handle)) + err = -ENOMEM; + else if (scifdev_is_p2p(scifdev)) + *dma_handle = *dma_handle + scifdev->base_addr; + } + if (err) + *dma_handle = 0; + return err; +} #endif /* SCIF_MAP_H */ diff --git a/drivers/misc/mic/scif/scif_nm.c b/drivers/misc/mic/scif/scif_nm.c index f1b1b97..79f26a0 100644 --- a/drivers/misc/mic/scif/scif_nm.c +++ b/drivers/misc/mic/scif/scif_nm.c @@ -34,6 +34,7 @@ static void scif_invalidate_ep(int node) list_for_each_safe(pos, tmpq, &scif_info.disconnected) { ep = list_entry(pos, struct scif_endpt, list); if (ep->remote_dev->node == node) { + scif_unmap_all_windows(ep); spin_lock(&ep->lock); scif_cleanup_ep_qp(ep); spin_unlock(&ep->lock); @@ -50,6 +51,7 @@ static void scif_invalidate_ep(int node) wake_up_interruptible(&ep->sendwq); wake_up_interruptible(&ep->recvwq); spin_unlock(&ep->lock); + scif_unmap_all_windows(ep); } } mutex_unlock(&scif_info.connlock); @@ -61,8 +63,8 @@ void scif_free_qp(struct scif_dev *scifdev) if (!qp) return; - scif_free_coherent((void *)qp->inbound_q.rb_base, - qp->local_buf, scifdev, qp->inbound_q.size); + scif_unmap_single(qp->local_buf, scifdev, qp->inbound_q.size); + kfree(qp->inbound_q.rb_base); scif_unmap_single(qp->local_qp, scifdev, sizeof(struct scif_qp)); kfree(scifdev->qpairs); scifdev->qpairs = NULL; @@ -125,8 +127,12 @@ void scif_cleanup_scifdev(struct scif_dev *dev) } scif_destroy_intr_wq(dev); } + flush_work(&scif_info.misc_work); scif_destroy_p2p(dev); scif_invalidate_ep(dev->node); + scif_zap_mmaps(dev->node); + scif_cleanup_rma_for_zombies(dev->node); + flush_work(&scif_info.misc_work); scif_send_acks(dev); if (!dev->node && scif_info.card_initiated_exit) { /* diff --git a/drivers/misc/mic/scif/scif_nodeqp.c b/drivers/misc/mic/scif/scif_nodeqp.c index 5cdb9f0..7180d56 100644 --- a/drivers/misc/mic/scif/scif_nodeqp.c +++ b/drivers/misc/mic/scif/scif_nodeqp.c @@ -105,18 +105,22 @@ int scif_setup_qp_connect(struct scif_qp *qp, dma_addr_t *qp_offset, int local_size, struct scif_dev *scifdev) { - void *local_q = NULL; + void *local_q = qp->inbound_q.rb_base; int err = 0; u32 tmp_rd = 0; spin_lock_init(&qp->send_lock); spin_lock_init(&qp->recv_lock); - local_q = kzalloc(local_size, GFP_KERNEL); + /* Allocate rb only if not already allocated */ if (!local_q) { - err = -ENOMEM; - return err; + local_q = kzalloc(local_size, GFP_KERNEL); + if (!local_q) { + err = -ENOMEM; + return err; + } } + err = scif_map_single(&qp->local_buf, local_q, scifdev, local_size); if (err) goto kfree; @@ -552,7 +556,29 @@ static char *message_types[] = {"BAD", "DISCNT_ACK", "CLIENT_SENT", "CLIENT_RCVD", - "SCIF_GET_NODE_INFO"}; + "SCIF_GET_NODE_INFO", + "REGISTER", + "REGISTER_ACK", + "REGISTER_NACK", + "UNREGISTER", + "UNREGISTER_ACK", + "UNREGISTER_NACK", + "ALLOC_REQ", + "ALLOC_GNT", + "ALLOC_REJ", + "FREE_PHYS", + "FREE_VIRT", + "MUNMAP", + "MARK", + "MARK_ACK", + "MARK_NACK", + "WAIT", + "WAIT_ACK", + "WAIT_NACK", + "SIGNAL_LOCAL", + "SIGNAL_REMOTE", + "SIG_ACK", + "SIG_NACK"}; static void scif_display_message(struct scif_dev *scifdev, struct scifmsg *msg, @@ -646,10 +672,16 @@ int scif_nodeqp_send(struct scif_dev *scifdev, struct scifmsg *msg) * * Work queue handler for servicing miscellaneous SCIF tasks. * Examples include: - * 1) Cleanup of zombie endpoints. + * 1) Remote fence requests. + * 2) Destruction of temporary registered windows + * created during scif_vreadfrom()/scif_vwriteto(). + * 3) Cleanup of zombie endpoints. */ void scif_misc_handler(struct work_struct *work) { + scif_rma_handle_remote_fences(); + scif_rma_destroy_windows(); + scif_rma_destroy_tcw_invalid(); scif_cleanup_zombie_epd(); } @@ -995,6 +1027,27 @@ static void (*scif_intr_func[SCIF_MAX_MSG + 1]) scif_clientsend, /* SCIF_CLIENT_SENT */ scif_clientrcvd, /* SCIF_CLIENT_RCVD */ scif_get_node_info_resp,/* SCIF_GET_NODE_INFO */ + scif_recv_reg, /* SCIF_REGISTER */ + scif_recv_reg_ack, /* SCIF_REGISTER_ACK */ + scif_recv_reg_nack, /* SCIF_REGISTER_NACK */ + scif_recv_unreg, /* SCIF_UNREGISTER */ + scif_recv_unreg_ack, /* SCIF_UNREGISTER_ACK */ + scif_recv_unreg_nack, /* SCIF_UNREGISTER_NACK */ + scif_alloc_req, /* SCIF_ALLOC_REQ */ + scif_alloc_gnt_rej, /* SCIF_ALLOC_GNT */ + scif_alloc_gnt_rej, /* SCIF_ALLOC_REJ */ + scif_free_virt, /* SCIF_FREE_VIRT */ + scif_recv_munmap, /* SCIF_MUNMAP */ + scif_recv_mark, /* SCIF_MARK */ + scif_recv_mark_resp, /* SCIF_MARK_ACK */ + scif_recv_mark_resp, /* SCIF_MARK_NACK */ + scif_recv_wait, /* SCIF_WAIT */ + scif_recv_wait_resp, /* SCIF_WAIT_ACK */ + scif_recv_wait_resp, /* SCIF_WAIT_NACK */ + scif_recv_sig_local, /* SCIF_SIG_LOCAL */ + scif_recv_sig_remote, /* SCIF_SIG_REMOTE */ + scif_recv_sig_resp, /* SCIF_SIG_ACK */ + scif_recv_sig_resp, /* SCIF_SIG_NACK */ }; /** diff --git a/drivers/misc/mic/scif/scif_nodeqp.h b/drivers/misc/mic/scif/scif_nodeqp.h index 6c0ed67..9589627 100644 --- a/drivers/misc/mic/scif/scif_nodeqp.h +++ b/drivers/misc/mic/scif/scif_nodeqp.h @@ -74,7 +74,28 @@ #define SCIF_CLIENT_SENT 16 /* Notify the peer that data has been written */ #define SCIF_CLIENT_RCVD 17 /* Notify the peer that data has been read */ #define SCIF_GET_NODE_INFO 18 /* Get current node mask from the mgmt node*/ -#define SCIF_MAX_MSG SCIF_GET_NODE_INFO +#define SCIF_REGISTER 19 /* Tell peer about a new registered window */ +#define SCIF_REGISTER_ACK 20 /* Notify peer about unregistration success */ +#define SCIF_REGISTER_NACK 21 /* Notify peer about registration success */ +#define SCIF_UNREGISTER 22 /* Tell peer about unregistering a window */ +#define SCIF_UNREGISTER_ACK 23 /* Notify peer about registration failure */ +#define SCIF_UNREGISTER_NACK 24 /* Notify peer about unregistration failure */ +#define SCIF_ALLOC_REQ 25 /* Request a mapped buffer */ +#define SCIF_ALLOC_GNT 26 /* Notify peer about allocation success */ +#define SCIF_ALLOC_REJ 27 /* Notify peer about allocation failure */ +#define SCIF_FREE_VIRT 28 /* Free previously allocated virtual memory */ +#define SCIF_MUNMAP 29 /* Acknowledgment for a SCIF_MMAP request */ +#define SCIF_MARK 30 /* SCIF Remote Fence Mark Request */ +#define SCIF_MARK_ACK 31 /* SCIF Remote Fence Mark Success */ +#define SCIF_MARK_NACK 32 /* SCIF Remote Fence Mark Failure */ +#define SCIF_WAIT 33 /* SCIF Remote Fence Wait Request */ +#define SCIF_WAIT_ACK 34 /* SCIF Remote Fence Wait Success */ +#define SCIF_WAIT_NACK 35 /* SCIF Remote Fence Wait Failure */ +#define SCIF_SIG_LOCAL 36 /* SCIF Remote Fence Local Signal Request */ +#define SCIF_SIG_REMOTE 37 /* SCIF Remote Fence Remote Signal Request */ +#define SCIF_SIG_ACK 38 /* SCIF Remote Fence Remote Signal Success */ +#define SCIF_SIG_NACK 39 /* SCIF Remote Fence Remote Signal Failure */ +#define SCIF_MAX_MSG SCIF_SIG_NACK /* * struct scifmsg - Node QP message format @@ -92,6 +113,24 @@ struct scifmsg { } __packed; /* + * struct scif_allocmsg - Used with SCIF_ALLOC_REQ to request + * the remote note to allocate memory + * + * phys_addr: Physical address of the buffer + * vaddr: Virtual address of the buffer + * size: Size of the buffer + * state: Current state + * allocwq: wait queue for status + */ +struct scif_allocmsg { + dma_addr_t phys_addr; + unsigned long vaddr; + size_t size; + enum scif_msg_state state; + wait_queue_head_t allocwq; +}; + +/* * struct scif_qp - Node Queue Pair * * Interesting structure -- a little difficult because we can only @@ -158,7 +197,6 @@ int scif_setup_qp_connect_response(struct scif_dev *scifdev, int scif_setup_loopback_qp(struct scif_dev *scifdev); int scif_destroy_loopback_qp(struct scif_dev *scifdev); void scif_poll_qp_state(struct work_struct *work); -void scif_qp_response_ack(struct work_struct *work); void scif_destroy_p2p(struct scif_dev *scifdev); void scif_send_exit(struct scif_dev *scifdev); static inline struct device *scif_get_peer_dev(struct scif_dev *scifdev) diff --git a/drivers/misc/mic/scif/scif_peer_bus.c b/drivers/misc/mic/scif/scif_peer_bus.c index 547bf7b..6ffa3bd 100644 --- a/drivers/misc/mic/scif/scif_peer_bus.c +++ b/drivers/misc/mic/scif/scif_peer_bus.c @@ -72,6 +72,7 @@ err: static int scif_peer_add_device(struct scif_dev *scifdev) { struct scif_peer_dev *spdev = rcu_dereference(scifdev->spdev); + char pool_name[16]; int ret; ret = device_add(&spdev->dev); @@ -81,8 +82,21 @@ static int scif_peer_add_device(struct scif_dev *scifdev) "dnode %d: peer device_add failed\n", scifdev->node); goto put_spdev; } + + scnprintf(pool_name, sizeof(pool_name), "scif-%d", spdev->dnode); + scifdev->signal_pool = dmam_pool_create(pool_name, &scifdev->sdev->dev, + sizeof(struct scif_status), 1, + 0); + if (!scifdev->signal_pool) { + dev_err(&scifdev->sdev->dev, + "dnode %d: dmam_pool_create failed\n", scifdev->node); + ret = -ENOMEM; + goto del_spdev; + } dev_dbg(&spdev->dev, "Added peer dnode %d\n", spdev->dnode); return 0; +del_spdev: + device_del(&spdev->dev); put_spdev: RCU_INIT_POINTER(scifdev->spdev, NULL); synchronize_rcu(); |