diff options
author | Timothy Pearson <tpearson@raptorengineering.com> | 2017-08-23 14:45:25 -0500 |
---|---|---|
committer | Timothy Pearson <tpearson@raptorengineering.com> | 2017-08-23 14:45:25 -0500 |
commit | fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204 (patch) | |
tree | 22962a4387943edc841c72a4e636a068c66d58fd /net/9p | |
download | ast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.zip ast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.tar.gz |
Initial import of modified Linux 2.6.28 tree
Original upstream URL:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git | branch linux-2.6.28.y
Diffstat (limited to 'net/9p')
-rw-r--r-- | net/9p/Kconfig | 36 | ||||
-rw-r--r-- | net/9p/Makefile | 17 | ||||
-rw-r--r-- | net/9p/client.c | 1286 | ||||
-rw-r--r-- | net/9p/error.c | 247 | ||||
-rw-r--r-- | net/9p/mod.c | 174 | ||||
-rw-r--r-- | net/9p/protocol.c | 567 | ||||
-rw-r--r-- | net/9p/protocol.h | 34 | ||||
-rw-r--r-- | net/9p/trans_fd.c | 1125 | ||||
-rw-r--r-- | net/9p/trans_rdma.c | 713 | ||||
-rw-r--r-- | net/9p/trans_virtio.c | 390 | ||||
-rw-r--r-- | net/9p/util.c | 145 |
11 files changed, 4734 insertions, 0 deletions
diff --git a/net/9p/Kconfig b/net/9p/Kconfig new file mode 100644 index 0000000..0663f99 --- /dev/null +++ b/net/9p/Kconfig @@ -0,0 +1,36 @@ +# +# 9P protocol configuration +# + +menuconfig NET_9P + depends on NET && EXPERIMENTAL + tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" + help + If you say Y here, you will get experimental support for + Plan 9 resource sharing via the 9P2000 protocol. + + See <http://v9fs.sf.net> for more information. + + If unsure, say N. + +if NET_9P + +config NET_9P_VIRTIO + depends on EXPERIMENTAL && VIRTIO + tristate "9P Virtio Transport (Experimental)" + help + This builds support for a transports between + guest partitions and a host partition. + +config NET_9P_RDMA + depends on INET && INFINIBAND && EXPERIMENTAL + tristate "9P RDMA Transport (Experimental)" + help + This builds support for an RDMA transport. + +config NET_9P_DEBUG + bool "Debug information" + help + Say Y if you want the 9P subsystem to log debug information. + +endif diff --git a/net/9p/Makefile b/net/9p/Makefile new file mode 100644 index 0000000..198a640 --- /dev/null +++ b/net/9p/Makefile @@ -0,0 +1,17 @@ +obj-$(CONFIG_NET_9P) := 9pnet.o +obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o +obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o + +9pnet-objs := \ + mod.o \ + client.o \ + error.o \ + util.o \ + protocol.o \ + trans_fd.o \ + +9pnet_virtio-objs := \ + trans_virtio.o \ + +9pnet_rdma-objs := \ + trans_rdma.o \ diff --git a/net/9p/client.c b/net/9p/client.c new file mode 100644 index 0000000..4b52945 --- /dev/null +++ b/net/9p/client.c @@ -0,0 +1,1286 @@ +/* + * net/9p/clnt.c + * + * 9P Client + * + * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/idr.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/uaccess.h> +#include <net/9p/9p.h> +#include <linux/parser.h> +#include <net/9p/client.h> +#include <net/9p/transport.h> +#include "protocol.h" + +/* + * Client Option Parsing (code inspired by NFS code) + * - a little lazy - parse all client options + */ + +enum { + Opt_msize, + Opt_trans, + Opt_legacy, + Opt_err, +}; + +static const match_table_t tokens = { + {Opt_msize, "msize=%u"}, + {Opt_legacy, "noextend"}, + {Opt_trans, "trans=%s"}, + {Opt_err, NULL}, +}; + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); + +/** + * v9fs_parse_options - parse mount options into session structure + * @options: options string passed from mount + * @v9ses: existing v9fs session information + * + * Return 0 upon success, -ERRNO upon failure + */ + +static int parse_opts(char *opts, struct p9_client *clnt) +{ + char *options; + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + int ret = 0; + + clnt->dotu = 1; + clnt->msize = 8192; + + if (!opts) + return 0; + + options = kstrdup(opts, GFP_KERNEL); + if (!options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + token = match_token(p, tokens, args); + if (token < Opt_trans) { + int r = match_int(&args[0], &option); + if (r < 0) { + P9_DPRINTK(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + ret = r; + continue; + } + } + switch (token) { + case Opt_msize: + clnt->msize = option; + break; + case Opt_trans: + clnt->trans_mod = v9fs_get_trans_by_name(&args[0]); + break; + case Opt_legacy: + clnt->dotu = 0; + break; + default: + continue; + } + } + + if (!clnt->trans_mod) + clnt->trans_mod = v9fs_get_default_trans(); + + kfree(options); + return ret; +} + +/** + * p9_tag_alloc - lookup/allocate a request by tag + * @c: client session to lookup tag within + * @tag: numeric id for transaction + * + * this is a simple array lookup, but will grow the + * request_slots as necessary to accomodate transaction + * ids which did not previously have a slot. + * + * this code relies on the client spinlock to manage locks, its + * possible we should switch to something else, but I'd rather + * stick with something low-overhead for the common case. + * + */ + +static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) +{ + unsigned long flags; + int row, col; + struct p9_req_t *req; + + /* This looks up the original request by tag so we know which + * buffer to read the data into */ + tag++; + + if (tag >= c->max_tag) { + spin_lock_irqsave(&c->lock, flags); + /* check again since original check was outside of lock */ + while (tag >= c->max_tag) { + row = (tag / P9_ROW_MAXTAG); + c->reqs[row] = kcalloc(P9_ROW_MAXTAG, + sizeof(struct p9_req_t), GFP_ATOMIC); + + if (!c->reqs[row]) { + printk(KERN_ERR "Couldn't grow tag array\n"); + spin_unlock_irqrestore(&c->lock, flags); + return ERR_PTR(-ENOMEM); + } + for (col = 0; col < P9_ROW_MAXTAG; col++) { + c->reqs[row][col].status = REQ_STATUS_IDLE; + c->reqs[row][col].tc = NULL; + } + c->max_tag += P9_ROW_MAXTAG; + } + spin_unlock_irqrestore(&c->lock, flags); + } + row = tag / P9_ROW_MAXTAG; + col = tag % P9_ROW_MAXTAG; + + req = &c->reqs[row][col]; + if (!req->tc) { + req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); + if (!req->wq) { + printk(KERN_ERR "Couldn't grow tag array\n"); + return ERR_PTR(-ENOMEM); + } + init_waitqueue_head(req->wq); + req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + if ((!req->tc) || (!req->rc)) { + printk(KERN_ERR "Couldn't grow tag array\n"); + kfree(req->tc); + kfree(req->rc); + kfree(req->wq); + req->tc = req->rc = NULL; + req->wq = NULL; + return ERR_PTR(-ENOMEM); + } + req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); + req->tc->capacity = c->msize; + req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); + req->rc->capacity = c->msize; + } + + p9pdu_reset(req->tc); + p9pdu_reset(req->rc); + + req->flush_tag = 0; + req->tc->tag = tag-1; + req->status = REQ_STATUS_ALLOC; + + return &c->reqs[row][col]; +} + +/** + * p9_tag_lookup - lookup a request by tag + * @c: client session to lookup tag within + * @tag: numeric id for transaction + * + */ + +struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) +{ + int row, col; + + /* This looks up the original request by tag so we know which + * buffer to read the data into */ + tag++; + + BUG_ON(tag >= c->max_tag); + + row = tag / P9_ROW_MAXTAG; + col = tag % P9_ROW_MAXTAG; + + return &c->reqs[row][col]; +} +EXPORT_SYMBOL(p9_tag_lookup); + +/** + * p9_tag_init - setup tags structure and contents + * @tags: tags structure from the client struct + * + * This initializes the tags structure for each client instance. + * + */ + +static int p9_tag_init(struct p9_client *c) +{ + int err = 0; + + c->tagpool = p9_idpool_create(); + if (IS_ERR(c->tagpool)) { + err = PTR_ERR(c->tagpool); + c->tagpool = NULL; + goto error; + } + + p9_idpool_get(c->tagpool); /* reserve tag 0 */ + + c->max_tag = 0; +error: + return err; +} + +/** + * p9_tag_cleanup - cleans up tags structure and reclaims resources + * @tags: tags structure from the client struct + * + * This frees resources associated with the tags structure + * + */ +static void p9_tag_cleanup(struct p9_client *c) +{ + int row, col; + + /* check to insure all requests are idle */ + for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { + for (col = 0; col < P9_ROW_MAXTAG; col++) { + if (c->reqs[row][col].status != REQ_STATUS_IDLE) { + P9_DPRINTK(P9_DEBUG_MUX, + "Attempting to cleanup non-free tag %d,%d\n", + row, col); + /* TODO: delay execution of cleanup */ + return; + } + } + } + + if (c->tagpool) + p9_idpool_destroy(c->tagpool); + + /* free requests associated with tags */ + for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { + for (col = 0; col < P9_ROW_MAXTAG; col++) { + kfree(c->reqs[row][col].wq); + kfree(c->reqs[row][col].tc); + kfree(c->reqs[row][col].rc); + } + kfree(c->reqs[row]); + } + c->max_tag = 0; +} + +/** + * p9_free_req - free a request and clean-up as necessary + * c: client state + * r: request to release + * + */ + +static void p9_free_req(struct p9_client *c, struct p9_req_t *r) +{ + int tag = r->tc->tag; + P9_DPRINTK(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); + + r->status = REQ_STATUS_IDLE; + if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) + p9_idpool_put(tag, c->tagpool); +} + +/** + * p9_client_cb - call back from transport to client + * c: client state + * req: request received + * + */ +void p9_client_cb(struct p9_client *c, struct p9_req_t *req) +{ + struct p9_req_t *other_req; + unsigned long flags; + + P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); + + if (req->status == REQ_STATUS_ERROR) + wake_up(req->wq); + + if (req->flush_tag) { /* flush receive path */ + P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag); + spin_lock_irqsave(&c->lock, flags); + other_req = p9_tag_lookup(c, req->flush_tag); + if (other_req->status != REQ_STATUS_FLSH) /* stale flush */ + spin_unlock_irqrestore(&c->lock, flags); + else { + other_req->status = REQ_STATUS_FLSHD; + spin_unlock_irqrestore(&c->lock, flags); + wake_up(other_req->wq); + } + p9_free_req(c, req); + } else { /* normal receive path */ + P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag); + spin_lock_irqsave(&c->lock, flags); + if (req->status != REQ_STATUS_FLSHD) + req->status = REQ_STATUS_RCVD; + spin_unlock_irqrestore(&c->lock, flags); + wake_up(req->wq); + P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); + } +} +EXPORT_SYMBOL(p9_client_cb); + +/** + * p9_parse_header - parse header arguments out of a packet + * @pdu: packet to parse + * @size: size of packet + * @type: type of request + * @tag: tag of packet + * @rewind: set if we need to rewind offset afterwards + */ + +int +p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag, + int rewind) +{ + int8_t r_type; + int16_t r_tag; + int32_t r_size; + int offset = pdu->offset; + int err; + + pdu->offset = 0; + if (pdu->size == 0) + pdu->size = 7; + + err = p9pdu_readf(pdu, 0, "dbw", &r_size, &r_type, &r_tag); + if (err) + goto rewind_and_exit; + + pdu->size = r_size; + pdu->id = r_type; + pdu->tag = r_tag; + + P9_DPRINTK(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", pdu->size, + pdu->id, pdu->tag); + + if (type) + *type = r_type; + if (tag) + *tag = r_tag; + if (size) + *size = r_size; + + +rewind_and_exit: + if (rewind) + pdu->offset = offset; + return err; +} +EXPORT_SYMBOL(p9_parse_header); + +/** + * p9_check_errors - check 9p packet for error return and process it + * @c: current client instance + * @req: request to parse and check for error conditions + * + * returns error code if one is discovered, otherwise returns 0 + * + * this will have to be more complicated if we have multiple + * error packet types + */ + +static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) +{ + int8_t type; + int err; + + err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + if (err) { + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + return err; + } + + if (type == P9_RERROR) { + int ecode; + char *ename; + + err = p9pdu_readf(req->rc, c->dotu, "s?d", &ename, &ecode); + if (err) { + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", + err); + return err; + } + + if (c->dotu) + err = -ecode; + + if (!err) { + err = p9_errstr2errno(ename, strlen(ename)); + + /* string match failed */ + if (!err) + err = -ESERVERFAULT; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); + + kfree(ename); + } else + err = 0; + + return err; +} + +/** + * p9_client_flush - flush (cancel) a request + * c: client state + * req: request to cancel + * + * This sents a flush for a particular requests and links + * the flush request to the original request. The current + * code only supports a single flush request although the protocol + * allows for multiple flush requests to be sent for a single request. + * + */ + +static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) +{ + struct p9_req_t *req; + int16_t oldtag; + int err; + + err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1); + if (err) + return err; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag); + + req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag); + if (IS_ERR(req)) + return PTR_ERR(req); + + req->flush_tag = oldtag; + + /* we don't free anything here because RPC isn't complete */ + return 0; +} + +/** + * p9_client_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +{ + va_list ap; + int tag, err; + struct p9_req_t *req; + unsigned long flags; + int sigpending; + int flushed = 0; + + P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); + + if (c->status != Connected) + return ERR_PTR(-EIO); + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; + + tag = P9_NOTAG; + if (type != P9_TVERSION) { + tag = p9_idpool_get(c->tagpool); + if (tag < 0) + return ERR_PTR(-ENOMEM); + } + + req = p9_tag_alloc(c, tag); + if (IS_ERR(req)) + return req; + + /* marshall the data */ + p9pdu_prepare(req->tc, tag, type); + va_start(ap, fmt); + err = p9pdu_vwritef(req->tc, c->dotu, fmt, ap); + va_end(ap); + p9pdu_finalize(req->tc); + + err = c->trans_mod->request(c, req); + if (err < 0) { + c->status = Disconnected; + goto reterr; + } + + /* if it was a flush we just transmitted, return our tag */ + if (type == P9_TFLUSH) + return req; +again: + P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); + err = wait_event_interruptible(*req->wq, + req->status >= REQ_STATUS_RCVD); + P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n", + req->wq, tag, err, flushed); + + if (req->status == REQ_STATUS_ERROR) { + P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } else if (err == -ERESTARTSYS && flushed) { + P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n"); + goto again; + } else if (req->status == REQ_STATUS_FLSHD) { + P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n"); + err = -ERESTARTSYS; + } + + if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) { + P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); + spin_lock_irqsave(&c->lock, flags); + if (req->status == REQ_STATUS_SENT) + req->status = REQ_STATUS_FLSH; + spin_unlock_irqrestore(&c->lock, flags); + sigpending = 1; + flushed = 1; + clear_thread_flag(TIF_SIGPENDING); + + if (c->trans_mod->cancel(c, req)) { + err = p9_client_flush(c, req); + if (err == 0) + goto again; + } + } + + if (sigpending) { + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + + if (err < 0) + goto reterr; + + err = p9_check_errors(c, req); + if (!err) { + P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); + return req; + } + +reterr: + P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, + err); + p9_free_req(c, req); + return ERR_PTR(err); +} + +static struct p9_fid *p9_fid_create(struct p9_client *clnt) +{ + int ret; + struct p9_fid *fid; + unsigned long flags; + + P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt); + fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); + if (!fid) + return ERR_PTR(-ENOMEM); + + ret = p9_idpool_get(clnt->fidpool); + if (fid->fid < 0) { + ret = -ENOSPC; + goto error; + } + fid->fid = ret; + + memset(&fid->qid, 0, sizeof(struct p9_qid)); + fid->mode = -1; + fid->rdir_fpos = 0; + fid->uid = current->fsuid; + fid->clnt = clnt; + fid->aux = NULL; + + spin_lock_irqsave(&clnt->lock, flags); + list_add(&fid->flist, &clnt->fidlist); + spin_unlock_irqrestore(&clnt->lock, flags); + + return fid; + +error: + kfree(fid); + return ERR_PTR(ret); +} + +static void p9_fid_destroy(struct p9_fid *fid) +{ + struct p9_client *clnt; + unsigned long flags; + + P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid); + clnt = fid->clnt; + p9_idpool_put(fid->fid, clnt->fidpool); + spin_lock_irqsave(&clnt->lock, flags); + list_del(&fid->flist); + spin_unlock_irqrestore(&clnt->lock, flags); + kfree(fid); +} + +int p9_client_version(struct p9_client *c) +{ + int err = 0; + struct p9_req_t *req; + char *version; + int msize; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d extended %d\n", + c->msize, c->dotu); + req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize, + c->dotu ? "9P2000.u" : "9P2000"); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version); + if (err) { + P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); + p9pdu_dump(1, req->rc); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); + if (!memcmp(version, "9P2000.u", 8)) + c->dotu = 1; + else if (!memcmp(version, "9P2000", 6)) + c->dotu = 0; + else { + err = -EREMOTEIO; + goto error; + } + + if (msize < c->msize) + c->msize = msize; + +error: + kfree(version); + p9_free_req(c, req); + + return err; +} +EXPORT_SYMBOL(p9_client_version); + +struct p9_client *p9_client_create(const char *dev_name, char *options) +{ + int err; + struct p9_client *clnt; + + err = 0; + clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL); + if (!clnt) + return ERR_PTR(-ENOMEM); + + clnt->trans_mod = NULL; + clnt->trans = NULL; + spin_lock_init(&clnt->lock); + INIT_LIST_HEAD(&clnt->fidlist); + clnt->fidpool = p9_idpool_create(); + if (IS_ERR(clnt->fidpool)) { + err = PTR_ERR(clnt->fidpool); + clnt->fidpool = NULL; + goto error; + } + + p9_tag_init(clnt); + + err = parse_opts(options, clnt); + if (err < 0) + goto error; + + if (clnt->trans_mod == NULL) { + err = -EPROTONOSUPPORT; + P9_DPRINTK(P9_DEBUG_ERROR, + "No transport defined or default transport\n"); + goto error; + } + + P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n", + clnt, clnt->trans_mod, clnt->msize, clnt->dotu); + + err = clnt->trans_mod->create(clnt, dev_name, options); + if (err) + goto error; + + if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize) + clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ; + + err = p9_client_version(clnt); + if (err) + goto error; + + return clnt; + +error: + p9_client_destroy(clnt); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_create); + +void p9_client_destroy(struct p9_client *clnt) +{ + struct p9_fid *fid, *fidptr; + + P9_DPRINTK(P9_DEBUG_MUX, "clnt %p\n", clnt); + + if (clnt->trans_mod) + clnt->trans_mod->close(clnt); + + v9fs_put_trans(clnt->trans_mod); + + list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) + p9_fid_destroy(fid); + + if (clnt->fidpool) + p9_idpool_destroy(clnt->fidpool); + + p9_tag_cleanup(clnt); + + kfree(clnt); +} +EXPORT_SYMBOL(p9_client_destroy); + +void p9_client_disconnect(struct p9_client *clnt) +{ + P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + clnt->status = Disconnected; +} +EXPORT_SYMBOL(p9_client_disconnect); + +struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, + char *uname, u32 n_uname, char *aname) +{ + int err; + struct p9_req_t *req; + struct p9_fid *fid; + struct p9_qid qid; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", + afid ? afid->fid : -1, uname, aname); + err = 0; + + fid = p9_fid_create(clnt); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; + goto error; + } + + req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid, + afid ? afid->fid : P9_NOFID, uname, aname, n_uname); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", + qid.type, + (unsigned long long)qid.path, + qid.version); + + memmove(&fid->qid, &qid, sizeof(struct p9_qid)); + + p9_free_req(clnt, req); + return fid; + +error: + if (fid) + p9_fid_destroy(fid); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_attach); + +struct p9_fid * +p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) +{ + int err; + struct p9_req_t *req; + struct p9_qid qid; + struct p9_fid *afid; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TAUTH uname %s aname %s\n", uname, aname); + err = 0; + + afid = p9_fid_create(clnt); + if (IS_ERR(afid)) { + err = PTR_ERR(afid); + afid = NULL; + goto error; + } + + req = p9_client_rpc(clnt, P9_TAUTH, "dss?d", + afid ? afid->fid : P9_NOFID, uname, aname, n_uname); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n", + qid.type, + (unsigned long long)qid.path, + qid.version); + + memmove(&afid->qid, &qid, sizeof(struct p9_qid)); + p9_free_req(clnt, req); + return afid; + +error: + if (afid) + p9_fid_destroy(afid); + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_auth); + +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, + int clone) +{ + int err; + struct p9_client *clnt; + struct p9_fid *fid; + struct p9_qid *wqids; + struct p9_req_t *req; + int16_t nwqids, count; + + err = 0; + clnt = oldfid->clnt; + if (clone) { + fid = p9_fid_create(clnt); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; + goto error; + } + + fid->uid = oldfid->uid; + } else + fid = oldfid; + + + P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n", + oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); + + req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, + nwname, wnames); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); + goto clunk_fid; + } + p9_free_req(clnt, req); + + P9_DPRINTK(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); + + if (nwqids != nwname) { + err = -ENOENT; + goto clunk_fid; + } + + for (count = 0; count < nwqids; count++) + P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", + count, wqids[count].type, + (unsigned long long)wqids[count].path, + wqids[count].version); + + if (nwname) + memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid)); + else + fid->qid = oldfid->qid; + + return fid; + +clunk_fid: + p9_client_clunk(fid); + fid = NULL; + +error: + if (fid && (fid != oldfid)) + p9_fid_destroy(fid); + + return ERR_PTR(err); +} +EXPORT_SYMBOL(p9_client_walk); + +int p9_client_open(struct p9_fid *fid, int mode) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + struct p9_qid qid; + int iounit; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TOPEN fid %d mode %d\n", fid->fid, mode); + err = 0; + clnt = fid->clnt; + + if (fid->mode != -1) + return -EINVAL; + + req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); + + fid->mode = mode; + fid->iounit = iounit; + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_open); + +int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, + char *extension) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + struct p9_qid qid; + int iounit; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", + fid->fid, name, perm, mode); + err = 0; + clnt = fid->clnt; + + if (fid->mode != -1) + return -EINVAL; + + req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm, + mode, extension); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); + + fid->mode = mode; + fid->iounit = iounit; + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_fcreate); + +int p9_client_clunk(struct p9_fid *fid) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); + err = 0; + clnt = fid->clnt; + + req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); + + p9_free_req(clnt, req); + p9_fid_destroy(fid); + +error: + return err; +} +EXPORT_SYMBOL(p9_client_clunk); + +int p9_client_remove(struct p9_fid *fid) +{ + int err; + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid); + err = 0; + clnt = fid->clnt; + + req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); + + p9_free_req(clnt, req); + p9_fid_destroy(fid); + +error: + return err; +} +EXPORT_SYMBOL(p9_client_remove); + +int +p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, + u32 count) +{ + int err, rsize, total; + struct p9_client *clnt; + struct p9_req_t *req; + char *dataptr; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, + (long long unsigned) offset, count); + err = 0; + clnt = fid->clnt; + total = 0; + + rsize = fid->iounit; + if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) + rsize = clnt->msize - P9_IOHDRSZ; + + if (count < rsize) + rsize = count; + + req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); + + if (data) { + memmove(data, dataptr, count); + data += count; + } + + if (udata) { + err = copy_to_user(udata, dataptr, count); + if (err) { + err = -EFAULT; + goto free_and_error; + } + } + + p9_free_req(clnt, req); + return count; + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_read); + +int +p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, + u64 offset, u32 count) +{ + int err, rsize, total; + struct p9_client *clnt; + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", + fid->fid, (long long unsigned) offset, count); + err = 0; + clnt = fid->clnt; + total = 0; + + rsize = fid->iounit; + if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) + rsize = clnt->msize - P9_IOHDRSZ; + + if (count < rsize) + rsize = count; + if (data) + req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset, + rsize, data); + else + req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset, + rsize, udata); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->dotu, "d", &count); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); + + p9_free_req(clnt, req); + return count; + +free_and_error: + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_write); + +struct p9_wstat *p9_client_stat(struct p9_fid *fid) +{ + int err; + struct p9_client *clnt; + struct p9_wstat *ret = kmalloc(sizeof(struct p9_wstat), GFP_KERNEL); + struct p9_req_t *req; + u16 ignored; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); + + if (!ret) + return ERR_PTR(-ENOMEM); + + err = 0; + clnt = fid->clnt; + + req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret); + if (err) { + ret = ERR_PTR(err); + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, + "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n" + "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" + "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" + "<<< uid=%d gid=%d n_muid=%d\n", + ret->size, ret->type, ret->dev, ret->qid.type, + (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, + ret->atime, ret->mtime, (unsigned long long)ret->length, + ret->name, ret->uid, ret->gid, ret->muid, ret->extension, + ret->n_uid, ret->n_gid, ret->n_muid); + +free_and_error: + p9_free_req(clnt, req); +error: + return ret; +} +EXPORT_SYMBOL(p9_client_stat); + +int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) +{ + int err; + struct p9_req_t *req; + struct p9_client *clnt; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, + " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" + " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" + " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" + " uid=%d gid=%d n_muid=%d\n", + wst->size, wst->type, wst->dev, wst->qid.type, + (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, + wst->atime, wst->mtime, (unsigned long long)wst->length, + wst->name, wst->uid, wst->gid, wst->muid, wst->extension, + wst->n_uid, wst->n_gid, wst->n_muid); + err = 0; + clnt = fid->clnt; + + req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); + + p9_free_req(clnt, req); +error: + return err; +} +EXPORT_SYMBOL(p9_client_wstat); diff --git a/net/9p/error.c b/net/9p/error.c new file mode 100644 index 0000000..fdebe43 --- /dev/null +++ b/net/9p/error.c @@ -0,0 +1,247 @@ +/* + * linux/fs/9p/error.c + * + * Error string handling + * + * Plan 9 uses error strings, Unix uses error numbers. These functions + * try to help manage that and provide for dynamically adding error + * mappings. + * + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/module.h> +#include <linux/list.h> +#include <linux/jhash.h> +#include <linux/errno.h> +#include <net/9p/9p.h> + +/** + * struct errormap - map string errors from Plan 9 to Linux numeric ids + * @name: string sent over 9P + * @val: numeric id most closely representing @name + * @namelen: length of string + * @list: hash-table list for string lookup + */ +struct errormap { + char *name; + int val; + + int namelen; + struct hlist_node list; +}; + +#define ERRHASHSZ 32 +static struct hlist_head hash_errmap[ERRHASHSZ]; + +/* FixMe - reduce to a reasonable size */ +static struct errormap errmap[] = { + {"Operation not permitted", EPERM}, + {"wstat prohibited", EPERM}, + {"No such file or directory", ENOENT}, + {"directory entry not found", ENOENT}, + {"file not found", ENOENT}, + {"Interrupted system call", EINTR}, + {"Input/output error", EIO}, + {"No such device or address", ENXIO}, + {"Argument list too long", E2BIG}, + {"Bad file descriptor", EBADF}, + {"Resource temporarily unavailable", EAGAIN}, + {"Cannot allocate memory", ENOMEM}, + {"Permission denied", EACCES}, + {"Bad address", EFAULT}, + {"Block device required", ENOTBLK}, + {"Device or resource busy", EBUSY}, + {"File exists", EEXIST}, + {"Invalid cross-device link", EXDEV}, + {"No such device", ENODEV}, + {"Not a directory", ENOTDIR}, + {"Is a directory", EISDIR}, + {"Invalid argument", EINVAL}, + {"Too many open files in system", ENFILE}, + {"Too many open files", EMFILE}, + {"Text file busy", ETXTBSY}, + {"File too large", EFBIG}, + {"No space left on device", ENOSPC}, + {"Illegal seek", ESPIPE}, + {"Read-only file system", EROFS}, + {"Too many links", EMLINK}, + {"Broken pipe", EPIPE}, + {"Numerical argument out of domain", EDOM}, + {"Numerical result out of range", ERANGE}, + {"Resource deadlock avoided", EDEADLK}, + {"File name too long", ENAMETOOLONG}, + {"No locks available", ENOLCK}, + {"Function not implemented", ENOSYS}, + {"Directory not empty", ENOTEMPTY}, + {"Too many levels of symbolic links", ELOOP}, + {"No message of desired type", ENOMSG}, + {"Identifier removed", EIDRM}, + {"No data available", ENODATA}, + {"Machine is not on the network", ENONET}, + {"Package not installed", ENOPKG}, + {"Object is remote", EREMOTE}, + {"Link has been severed", ENOLINK}, + {"Communication error on send", ECOMM}, + {"Protocol error", EPROTO}, + {"Bad message", EBADMSG}, + {"File descriptor in bad state", EBADFD}, + {"Streams pipe error", ESTRPIPE}, + {"Too many users", EUSERS}, + {"Socket operation on non-socket", ENOTSOCK}, + {"Message too long", EMSGSIZE}, + {"Protocol not available", ENOPROTOOPT}, + {"Protocol not supported", EPROTONOSUPPORT}, + {"Socket type not supported", ESOCKTNOSUPPORT}, + {"Operation not supported", EOPNOTSUPP}, + {"Protocol family not supported", EPFNOSUPPORT}, + {"Network is down", ENETDOWN}, + {"Network is unreachable", ENETUNREACH}, + {"Network dropped connection on reset", ENETRESET}, + {"Software caused connection abort", ECONNABORTED}, + {"Connection reset by peer", ECONNRESET}, + {"No buffer space available", ENOBUFS}, + {"Transport endpoint is already connected", EISCONN}, + {"Transport endpoint is not connected", ENOTCONN}, + {"Cannot send after transport endpoint shutdown", ESHUTDOWN}, + {"Connection timed out", ETIMEDOUT}, + {"Connection refused", ECONNREFUSED}, + {"Host is down", EHOSTDOWN}, + {"No route to host", EHOSTUNREACH}, + {"Operation already in progress", EALREADY}, + {"Operation now in progress", EINPROGRESS}, + {"Is a named type file", EISNAM}, + {"Remote I/O error", EREMOTEIO}, + {"Disk quota exceeded", EDQUOT}, +/* errors from fossil, vacfs, and u9fs */ + {"fid unknown or out of range", EBADF}, + {"permission denied", EACCES}, + {"file does not exist", ENOENT}, + {"authentication failed", ECONNREFUSED}, + {"bad offset in directory read", ESPIPE}, + {"bad use of fid", EBADF}, + {"wstat can't convert between files and directories", EPERM}, + {"directory is not empty", ENOTEMPTY}, + {"file exists", EEXIST}, + {"file already exists", EEXIST}, + {"file or directory already exists", EEXIST}, + {"fid already in use", EBADF}, + {"file in use", ETXTBSY}, + {"i/o error", EIO}, + {"file already open for I/O", ETXTBSY}, + {"illegal mode", EINVAL}, + {"illegal name", ENAMETOOLONG}, + {"not a directory", ENOTDIR}, + {"not a member of proposed group", EPERM}, + {"not owner", EACCES}, + {"only owner can change group in wstat", EACCES}, + {"read only file system", EROFS}, + {"no access to special file", EPERM}, + {"i/o count too large", EIO}, + {"unknown group", EINVAL}, + {"unknown user", EINVAL}, + {"bogus wstat buffer", EPROTO}, + {"exclusive use file already open", EAGAIN}, + {"corrupted directory entry", EIO}, + {"corrupted file entry", EIO}, + {"corrupted block label", EIO}, + {"corrupted meta data", EIO}, + {"illegal offset", EINVAL}, + {"illegal path element", ENOENT}, + {"root of file system is corrupted", EIO}, + {"corrupted super block", EIO}, + {"protocol botch", EPROTO}, + {"file system is full", ENOSPC}, + {"file is in use", EAGAIN}, + {"directory entry is not allocated", ENOENT}, + {"file is read only", EROFS}, + {"file has been removed", EIDRM}, + {"only support truncation to zero length", EPERM}, + {"cannot remove root", EPERM}, + {"file too big", EFBIG}, + {"venti i/o error", EIO}, + /* these are not errors */ + {"u9fs rhostsauth: no authentication required", 0}, + {"u9fs authnone: no authentication required", 0}, + {NULL, -1} +}; + +/** + * p9_error_init - preload mappings into hash list + * + */ + +int p9_error_init(void) +{ + struct errormap *c; + int bucket; + + /* initialize hash table */ + for (bucket = 0; bucket < ERRHASHSZ; bucket++) + INIT_HLIST_HEAD(&hash_errmap[bucket]); + + /* load initial error map into hash table */ + for (c = errmap; c->name != NULL; c++) { + c->namelen = strlen(c->name); + bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ; + INIT_HLIST_NODE(&c->list); + hlist_add_head(&c->list, &hash_errmap[bucket]); + } + + return 1; +} +EXPORT_SYMBOL(p9_error_init); + +/** + * errstr2errno - convert error string to error number + * @errstr: error string + * @len: length of error string + * + */ + +int p9_errstr2errno(char *errstr, int len) +{ + int errno; + struct hlist_node *p; + struct errormap *c; + int bucket; + + errno = 0; + p = NULL; + c = NULL; + bucket = jhash(errstr, len, 0) % ERRHASHSZ; + hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { + if (c->namelen == len && !memcmp(c->name, errstr, len)) { + errno = c->val; + break; + } + } + + if (errno == 0) { + /* TODO: if error isn't found, add it dynamically */ + errstr[len] = 0; + printk(KERN_ERR "%s: server reported unknown error %s\n", + __func__, errstr); + errno = 1; + } + + return -errno; +} +EXPORT_SYMBOL(p9_errstr2errno); diff --git a/net/9p/mod.c b/net/9p/mod.c new file mode 100644 index 0000000..cf8a412 --- /dev/null +++ b/net/9p/mod.c @@ -0,0 +1,174 @@ +/* + * net/9p/9p.c + * + * 9P entry point + * + * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <net/9p/9p.h> +#include <linux/fs.h> +#include <linux/parser.h> +#include <net/9p/client.h> +#include <net/9p/transport.h> +#include <linux/list.h> +#include <linux/spinlock.h> + +#ifdef CONFIG_NET_9P_DEBUG +unsigned int p9_debug_level = 0; /* feature-rific global debug level */ +EXPORT_SYMBOL(p9_debug_level); +module_param_named(debug, p9_debug_level, uint, 0); +MODULE_PARM_DESC(debug, "9P debugging level"); +#endif + +/* + * Dynamic Transport Registration Routines + * + */ + +static DEFINE_SPINLOCK(v9fs_trans_lock); +static LIST_HEAD(v9fs_trans_list); + +/** + * v9fs_register_trans - register a new transport with 9p + * @m: structure describing the transport module and entry points + * + */ +void v9fs_register_trans(struct p9_trans_module *m) +{ + spin_lock(&v9fs_trans_lock); + list_add_tail(&m->list, &v9fs_trans_list); + spin_unlock(&v9fs_trans_lock); +} +EXPORT_SYMBOL(v9fs_register_trans); + +/** + * v9fs_unregister_trans - unregister a 9p transport + * @m: the transport to remove + * + */ +void v9fs_unregister_trans(struct p9_trans_module *m) +{ + spin_lock(&v9fs_trans_lock); + list_del_init(&m->list); + spin_unlock(&v9fs_trans_lock); +} +EXPORT_SYMBOL(v9fs_unregister_trans); + +/** + * v9fs_get_trans_by_name - get transport with the matching name + * @name: string identifying transport + * + */ +struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name) +{ + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (strncmp(t->name, name->from, name->to-name->from) == 0 && + try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; +} +EXPORT_SYMBOL(v9fs_get_trans_by_name); + +/** + * v9fs_get_default_trans - get the default transport + * + */ + +struct p9_trans_module *v9fs_get_default_trans(void) +{ + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (t->def && try_module_get(t->owner)) { + found = t; + break; + } + + if (!found) + list_for_each_entry(t, &v9fs_trans_list, list) + if (try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; +} +EXPORT_SYMBOL(v9fs_get_default_trans); + +/** + * v9fs_put_trans - put trans + * @m: transport to put + * + */ +void v9fs_put_trans(struct p9_trans_module *m) +{ + if (m) + module_put(m->owner); +} + +/** + * v9fs_init - Initialize module + * + */ +static int __init init_p9(void) +{ + int ret = 0; + + p9_error_init(); + printk(KERN_INFO "Installing 9P2000 support\n"); + p9_trans_fd_init(); + + return ret; +} + +/** + * v9fs_init - shutdown module + * + */ + +static void __exit exit_p9(void) +{ + printk(KERN_INFO "Unloading 9P2000 support\n"); + + p9_trans_fd_exit(); +} + +module_init(init_p9) +module_exit(exit_p9) + +MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>"); +MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); +MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>"); +MODULE_LICENSE("GPL"); diff --git a/net/9p/protocol.c b/net/9p/protocol.c new file mode 100644 index 0000000..dcd7666 --- /dev/null +++ b/net/9p/protocol.c @@ -0,0 +1,567 @@ +/* + * net/9p/protocol.c + * + * 9P Protocol Support Code + * + * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> + * + * Base on code from Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2008 by IBM, Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/uaccess.h> +#include <linux/sched.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include "protocol.h" + +#ifndef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +#ifndef MAX +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +#ifndef offset_of +#define offset_of(type, memb) \ + ((unsigned long)(&((type *)0)->memb)) +#endif +#ifndef container_of +#define container_of(obj, type, memb) \ + ((type *)(((char *)obj) - offset_of(type, memb))) +#endif + +static int +p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...); + +#ifdef CONFIG_NET_9P_DEBUG +void +p9pdu_dump(int way, struct p9_fcall *pdu) +{ + int i, n; + u8 *data = pdu->sdata; + int datalen = pdu->size; + char buf[255]; + int buflen = 255; + + i = n = 0; + if (datalen > (buflen-16)) + datalen = buflen-16; + while (i < datalen) { + n += scnprintf(buf + n, buflen - n, "%02x ", data[i]); + if (i%4 == 3) + n += scnprintf(buf + n, buflen - n, " "); + if (i%32 == 31) + n += scnprintf(buf + n, buflen - n, "\n"); + + i++; + } + n += scnprintf(buf + n, buflen - n, "\n"); + + if (way) + P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf); + else + P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf); +} +#else +void +p9pdu_dump(int way, struct p9_fcall *pdu) +{ +} +#endif +EXPORT_SYMBOL(p9pdu_dump); + +void p9stat_free(struct p9_wstat *stbuf) +{ + kfree(stbuf->name); + kfree(stbuf->uid); + kfree(stbuf->gid); + kfree(stbuf->muid); + kfree(stbuf->extension); +} +EXPORT_SYMBOL(p9stat_free); + +static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) +{ + size_t len = MIN(pdu->size - pdu->offset, size); + memcpy(data, &pdu->sdata[pdu->offset], len); + pdu->offset += len; + return size - len; +} + +static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) +{ + size_t len = MIN(pdu->capacity - pdu->size, size); + memcpy(&pdu->sdata[pdu->size], data, len); + pdu->size += len; + return size - len; +} + +static size_t +pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) +{ + size_t len = MIN(pdu->capacity - pdu->size, size); + int err = copy_from_user(&pdu->sdata[pdu->size], udata, len); + if (err) + printk(KERN_WARNING "pdu_write_u returning: %d\n", err); + + pdu->size += len; + return size - len; +} + +/* + b - int8_t + w - int16_t + d - int32_t + q - int64_t + s - string + S - stat + Q - qid + D - data blob (int32_t size followed by void *, results are not freed) + T - array of strings (int16_t count, followed by strings) + R - array of qids (int16_t count, followed by qids) + ? - if optional = 1, continue parsing +*/ + +static int +p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) +{ + const char *ptr; + int errcode = 0; + + for (ptr = fmt; *ptr; ptr++) { + switch (*ptr) { + case 'b':{ + int8_t *val = va_arg(ap, int8_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + } + break; + case 'w':{ + int16_t *val = va_arg(ap, int16_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + *val = cpu_to_le16(*val); + } + break; + case 'd':{ + int32_t *val = va_arg(ap, int32_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + *val = cpu_to_le32(*val); + } + break; + case 'q':{ + int64_t *val = va_arg(ap, int64_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + *val = cpu_to_le64(*val); + } + break; + case 's':{ + char **sptr = va_arg(ap, char **); + int16_t len; + int size; + + errcode = p9pdu_readf(pdu, optional, "w", &len); + if (errcode) + break; + + size = MAX(len, 0); + + *sptr = kmalloc(size + 1, GFP_KERNEL); + if (*sptr == NULL) { + errcode = -EFAULT; + break; + } + if (pdu_read(pdu, *sptr, size)) { + errcode = -EFAULT; + kfree(*sptr); + *sptr = NULL; + } else + (*sptr)[size] = 0; + } + break; + case 'Q':{ + struct p9_qid *qid = + va_arg(ap, struct p9_qid *); + + errcode = p9pdu_readf(pdu, optional, "bdq", + &qid->type, &qid->version, + &qid->path); + } + break; + case 'S':{ + struct p9_wstat *stbuf = + va_arg(ap, struct p9_wstat *); + + memset(stbuf, 0, sizeof(struct p9_wstat)); + stbuf->n_uid = stbuf->n_gid = stbuf->n_muid = + -1; + errcode = + p9pdu_readf(pdu, optional, + "wwdQdddqssss?sddd", + &stbuf->size, &stbuf->type, + &stbuf->dev, &stbuf->qid, + &stbuf->mode, &stbuf->atime, + &stbuf->mtime, &stbuf->length, + &stbuf->name, &stbuf->uid, + &stbuf->gid, &stbuf->muid, + &stbuf->extension, + &stbuf->n_uid, &stbuf->n_gid, + &stbuf->n_muid); + if (errcode) + p9stat_free(stbuf); + } + break; + case 'D':{ + int32_t *count = va_arg(ap, int32_t *); + void **data = va_arg(ap, void **); + + errcode = + p9pdu_readf(pdu, optional, "d", count); + if (!errcode) { + *count = + MIN(*count, + pdu->size - pdu->offset); + *data = &pdu->sdata[pdu->offset]; + } + } + break; + case 'T':{ + int16_t *nwname = va_arg(ap, int16_t *); + char ***wnames = va_arg(ap, char ***); + + errcode = + p9pdu_readf(pdu, optional, "w", nwname); + if (!errcode) { + *wnames = + kmalloc(sizeof(char *) * *nwname, + GFP_KERNEL); + if (!*wnames) + errcode = -ENOMEM; + } + + if (!errcode) { + int i; + + for (i = 0; i < *nwname; i++) { + errcode = + p9pdu_readf(pdu, optional, + "s", + &(*wnames)[i]); + if (errcode) + break; + } + } + + if (errcode) { + if (*wnames) { + int i; + + for (i = 0; i < *nwname; i++) + kfree((*wnames)[i]); + } + kfree(*wnames); + *wnames = NULL; + } + } + break; + case 'R':{ + int16_t *nwqid = va_arg(ap, int16_t *); + struct p9_qid **wqids = + va_arg(ap, struct p9_qid **); + + *wqids = NULL; + + errcode = + p9pdu_readf(pdu, optional, "w", nwqid); + if (!errcode) { + *wqids = + kmalloc(*nwqid * + sizeof(struct p9_qid), + GFP_KERNEL); + if (*wqids == NULL) + errcode = -ENOMEM; + } + + if (!errcode) { + int i; + + for (i = 0; i < *nwqid; i++) { + errcode = + p9pdu_readf(pdu, optional, + "Q", + &(*wqids)[i]); + if (errcode) + break; + } + } + + if (errcode) { + kfree(*wqids); + *wqids = NULL; + } + } + break; + case '?': + if (!optional) + return 0; + break; + default: + BUG(); + break; + } + + if (errcode) + break; + } + + return errcode; +} + +int +p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) +{ + const char *ptr; + int errcode = 0; + + for (ptr = fmt; *ptr; ptr++) { + switch (*ptr) { + case 'b':{ + int8_t val = va_arg(ap, int); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 'w':{ + int16_t val = va_arg(ap, int); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 'd':{ + int32_t val = va_arg(ap, int32_t); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 'q':{ + int64_t val = va_arg(ap, int64_t); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 's':{ + const char *sptr = va_arg(ap, const char *); + int16_t len = 0; + if (sptr) + len = MIN(strlen(sptr), USHORT_MAX); + + errcode = p9pdu_writef(pdu, optional, "w", len); + if (!errcode && pdu_write(pdu, sptr, len)) + errcode = -EFAULT; + } + break; + case 'Q':{ + const struct p9_qid *qid = + va_arg(ap, const struct p9_qid *); + errcode = + p9pdu_writef(pdu, optional, "bdq", + qid->type, qid->version, + qid->path); + } break; + case 'S':{ + const struct p9_wstat *stbuf = + va_arg(ap, const struct p9_wstat *); + errcode = + p9pdu_writef(pdu, optional, + "wwdQdddqssss?sddd", + stbuf->size, stbuf->type, + stbuf->dev, &stbuf->qid, + stbuf->mode, stbuf->atime, + stbuf->mtime, stbuf->length, + stbuf->name, stbuf->uid, + stbuf->gid, stbuf->muid, + stbuf->extension, stbuf->n_uid, + stbuf->n_gid, stbuf->n_muid); + } break; + case 'D':{ + int32_t count = va_arg(ap, int32_t); + const void *data = va_arg(ap, const void *); + + errcode = + p9pdu_writef(pdu, optional, "d", count); + if (!errcode && pdu_write(pdu, data, count)) + errcode = -EFAULT; + } + break; + case 'U':{ + int32_t count = va_arg(ap, int32_t); + const char __user *udata = + va_arg(ap, const void __user *); + errcode = + p9pdu_writef(pdu, optional, "d", count); + if (!errcode && pdu_write_u(pdu, udata, count)) + errcode = -EFAULT; + } + break; + case 'T':{ + int16_t nwname = va_arg(ap, int); + const char **wnames = va_arg(ap, const char **); + + errcode = + p9pdu_writef(pdu, optional, "w", nwname); + if (!errcode) { + int i; + + for (i = 0; i < nwname; i++) { + errcode = + p9pdu_writef(pdu, optional, + "s", + wnames[i]); + if (errcode) + break; + } + } + } + break; + case 'R':{ + int16_t nwqid = va_arg(ap, int); + struct p9_qid *wqids = + va_arg(ap, struct p9_qid *); + + errcode = + p9pdu_writef(pdu, optional, "w", nwqid); + if (!errcode) { + int i; + + for (i = 0; i < nwqid; i++) { + errcode = + p9pdu_writef(pdu, optional, + "Q", + &wqids[i]); + if (errcode) + break; + } + } + } + break; + case '?': + if (!optional) + return 0; + break; + default: + BUG(); + break; + } + + if (errcode) + break; + } + + return errcode; +} + +int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = p9pdu_vreadf(pdu, optional, fmt, ap); + va_end(ap); + + return ret; +} + +static int +p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = p9pdu_vwritef(pdu, optional, fmt, ap); + va_end(ap); + + return ret; +} + +int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu) +{ + struct p9_fcall fake_pdu; + int ret; + + fake_pdu.size = len; + fake_pdu.capacity = len; + fake_pdu.sdata = buf; + fake_pdu.offset = 0; + + ret = p9pdu_readf(&fake_pdu, dotu, "S", st); + if (ret) { + P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); + p9pdu_dump(1, &fake_pdu); + } + + return ret; +} +EXPORT_SYMBOL(p9stat_read); + +int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) +{ + return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); +} + +int p9pdu_finalize(struct p9_fcall *pdu) +{ + int size = pdu->size; + int err; + + pdu->size = 0; + err = p9pdu_writef(pdu, 0, "d", size); + pdu->size = size; + +#ifdef CONFIG_NET_9P_DEBUG + if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) + p9pdu_dump(0, pdu); +#endif + + P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, + pdu->id, pdu->tag); + + return err; +} + +void p9pdu_reset(struct p9_fcall *pdu) +{ + pdu->offset = 0; + pdu->size = 0; +} diff --git a/net/9p/protocol.h b/net/9p/protocol.h new file mode 100644 index 0000000..ccde462 --- /dev/null +++ b/net/9p/protocol.h @@ -0,0 +1,34 @@ +/* + * net/9p/protocol.h + * + * 9P Protocol Support Code + * + * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> + * + * Base on code from Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2008 by IBM, Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +int +p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap); +int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...); +int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); +int p9pdu_finalize(struct p9_fcall *pdu); +void p9pdu_dump(int, struct p9_fcall *); +void p9pdu_reset(struct p9_fcall *pdu); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c new file mode 100644 index 0000000..1df0356 --- /dev/null +++ b/net/9p/trans_fd.c @@ -0,0 +1,1125 @@ +/* + * linux/fs/9p/trans_fd.c + * + * Fd transport layer. Includes deprecated socket layer. + * + * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> + * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> + * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/in.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/ipv6.h> +#include <linux/kthread.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/un.h> +#include <linux/uaccess.h> +#include <linux/inet.h> +#include <linux/idr.h> +#include <linux/file.h> +#include <linux/parser.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include <net/9p/transport.h> + +#define P9_PORT 564 +#define MAX_SOCK_BUF (64*1024) +#define MAXPOLLWADDR 2 + +/** + * struct p9_fd_opts - per-transport options + * @rfd: file descriptor for reading (trans=fd) + * @wfd: file descriptor for writing (trans=fd) + * @port: port to connect to (trans=tcp) + * + */ + +struct p9_fd_opts { + int rfd; + int wfd; + u16 port; +}; + +/** + * struct p9_trans_fd - transport state + * @rd: reference to file to read from + * @wr: reference of file to write to + * @conn: connection state reference + * + */ + +struct p9_trans_fd { + struct file *rd; + struct file *wr; + struct p9_conn *conn; +}; + +/* + * Option Parsing (code inspired by NFS code) + * - a little lazy - parse all fd-transport options + */ + +enum { + /* Options that take integer arguments */ + Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, +}; + +static const match_table_t tokens = { + {Opt_port, "port=%u"}, + {Opt_rfdno, "rfdno=%u"}, + {Opt_wfdno, "wfdno=%u"}, + {Opt_err, NULL}, +}; + +enum { + Rworksched = 1, /* read work scheduled or running */ + Rpending = 2, /* can read */ + Wworksched = 4, /* write work scheduled or running */ + Wpending = 8, /* can write */ +}; + +struct p9_poll_wait { + struct p9_conn *conn; + wait_queue_t wait; + wait_queue_head_t *wait_addr; +}; + +/** + * struct p9_conn - fd mux connection state information + * @mux_list: list link for mux to manage multiple connections (?) + * @client: reference to client instance for this connection + * @err: error state + * @req_list: accounting for requests which have been sent + * @unsent_req_list: accounting for requests that haven't been sent + * @req: current request being processed (if any) + * @tmp_buf: temporary buffer to read in header + * @rsize: amount to read for current frame + * @rpos: read position in current frame + * @rbuf: current read buffer + * @wpos: write position for current frame + * @wsize: amount of data to write for current frame + * @wbuf: current write buffer + * @poll_wait: array of wait_q's for various worker threads + * @poll_waddr: ???? + * @pt: poll state + * @rq: current read work + * @wq: current write work + * @wsched: ???? + * + */ + +struct p9_conn { + struct list_head mux_list; + struct p9_client *client; + int err; + struct list_head req_list; + struct list_head unsent_req_list; + struct p9_req_t *req; + char tmp_buf[7]; + int rsize; + int rpos; + char *rbuf; + int wpos; + int wsize; + char *wbuf; + struct list_head poll_pending_link; + struct p9_poll_wait poll_wait[MAXPOLLWADDR]; + poll_table pt; + struct work_struct rq; + struct work_struct wq; + unsigned long wsched; +}; + +static DEFINE_SPINLOCK(p9_poll_lock); +static LIST_HEAD(p9_poll_pending_list); +static struct workqueue_struct *p9_mux_wq; +static struct task_struct *p9_poll_task; + +static void p9_mux_poll_stop(struct p9_conn *m) +{ + unsigned long flags; + int i; + + for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { + struct p9_poll_wait *pwait = &m->poll_wait[i]; + + if (pwait->wait_addr) { + remove_wait_queue(pwait->wait_addr, &pwait->wait); + pwait->wait_addr = NULL; + } + } + + spin_lock_irqsave(&p9_poll_lock, flags); + list_del_init(&m->poll_pending_link); + spin_unlock_irqrestore(&p9_poll_lock, flags); +} + +/** + * p9_conn_cancel - cancel all pending requests with error + * @m: mux data + * @err: error code + * + */ + +static void p9_conn_cancel(struct p9_conn *m, int err) +{ + struct p9_req_t *req, *rtmp; + unsigned long flags; + LIST_HEAD(cancel_list); + + P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); + + spin_lock_irqsave(&m->client->lock, flags); + + if (m->err) { + spin_unlock_irqrestore(&m->client->lock, flags); + return; + } + + m->err = err; + + list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { + req->status = REQ_STATUS_ERROR; + if (!req->t_err) + req->t_err = err; + list_move(&req->req_list, &cancel_list); + } + list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { + req->status = REQ_STATUS_ERROR; + if (!req->t_err) + req->t_err = err; + list_move(&req->req_list, &cancel_list); + } + spin_unlock_irqrestore(&m->client->lock, flags); + + list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { + list_del(&req->req_list); + P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req); + p9_client_cb(m->client, req); + } +} + +static unsigned int +p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) +{ + int ret, n; + struct p9_trans_fd *ts = NULL; + + if (client && client->status == Connected) + ts = client->trans; + + if (!ts) + return -EREMOTEIO; + + if (!ts->rd->f_op || !ts->rd->f_op->poll) + return -EIO; + + if (!ts->wr->f_op || !ts->wr->f_op->poll) + return -EIO; + + ret = ts->rd->f_op->poll(ts->rd, pt); + if (ret < 0) + return ret; + + if (ts->rd != ts->wr) { + n = ts->wr->f_op->poll(ts->wr, pt); + if (n < 0) + return n; + ret = (ret & ~POLLOUT) | (n & ~POLLIN); + } + + return ret; +} + +/** + * p9_fd_read- read from a fd + * @client: client instance + * @v: buffer to receive data into + * @len: size of receive buffer + * + */ + +static int p9_fd_read(struct p9_client *client, void *v, int len) +{ + int ret; + struct p9_trans_fd *ts = NULL; + + if (client && client->status != Disconnected) + ts = client->trans; + + if (!ts) + return -EREMOTEIO; + + if (!(ts->rd->f_flags & O_NONBLOCK)) + P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n"); + + ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); + if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) + client->status = Disconnected; + return ret; +} + +/** + * p9_read_work - called when there is some data to be read from a transport + * @work: container of work to be done + * + */ + +static void p9_read_work(struct work_struct *work) +{ + int n, err; + struct p9_conn *m; + + m = container_of(work, struct p9_conn, rq); + + if (m->err < 0) + return; + + P9_DPRINTK(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos); + + if (!m->rbuf) { + m->rbuf = m->tmp_buf; + m->rpos = 0; + m->rsize = 7; /* start by reading header */ + } + + clear_bit(Rpending, &m->wsched); + P9_DPRINTK(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", m, + m->rpos, m->rsize, m->rsize-m->rpos); + err = p9_fd_read(m->client, m->rbuf + m->rpos, + m->rsize - m->rpos); + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); + if (err == -EAGAIN) { + clear_bit(Rworksched, &m->wsched); + return; + } + + if (err <= 0) + goto error; + + m->rpos += err; + + if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */ + u16 tag; + P9_DPRINTK(P9_DEBUG_TRANS, "got new header\n"); + + n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */ + if (n >= m->client->msize) { + P9_DPRINTK(P9_DEBUG_ERROR, + "requested packet size too big: %d\n", n); + err = -EIO; + goto error; + } + + tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */ + P9_DPRINTK(P9_DEBUG_TRANS, + "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); + + m->req = p9_tag_lookup(m->client, tag); + if (!m->req) { + P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", + tag); + err = -EIO; + goto error; + } + + if (m->req->rc == NULL) { + m->req->rc = kmalloc(sizeof(struct p9_fcall) + + m->client->msize, GFP_KERNEL); + if (!m->req->rc) { + m->req = NULL; + err = -ENOMEM; + goto error; + } + } + m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall); + memcpy(m->rbuf, m->tmp_buf, m->rsize); + m->rsize = n; + } + + /* not an else because some packets (like clunk) have no payload */ + if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ + P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n"); + spin_lock(&m->client->lock); + list_del(&m->req->req_list); + spin_unlock(&m->client->lock); + p9_client_cb(m->client, m->req); + + m->rbuf = NULL; + m->rpos = 0; + m->rsize = 0; + m->req = NULL; + } + + if (!list_empty(&m->req_list)) { + if (test_and_clear_bit(Rpending, &m->wsched)) + n = POLLIN; + else + n = p9_fd_poll(m->client, NULL); + + if (n & POLLIN) { + P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); + queue_work(p9_mux_wq, &m->rq); + } else + clear_bit(Rworksched, &m->wsched); + } else + clear_bit(Rworksched, &m->wsched); + + return; +error: + p9_conn_cancel(m, err); + clear_bit(Rworksched, &m->wsched); +} + +/** + * p9_fd_write - write to a socket + * @client: client instance + * @v: buffer to send data from + * @len: size of send buffer + * + */ + +static int p9_fd_write(struct p9_client *client, void *v, int len) +{ + int ret; + mm_segment_t oldfs; + struct p9_trans_fd *ts = NULL; + + if (client && client->status != Disconnected) + ts = client->trans; + + if (!ts) + return -EREMOTEIO; + + if (!(ts->wr->f_flags & O_NONBLOCK)) + P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n"); + + oldfs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); + set_fs(oldfs); + + if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) + client->status = Disconnected; + return ret; +} + +/** + * p9_write_work - called when a transport can send some data + * @work: container for work to be done + * + */ + +static void p9_write_work(struct work_struct *work) +{ + int n, err; + struct p9_conn *m; + struct p9_req_t *req; + + m = container_of(work, struct p9_conn, wq); + + if (m->err < 0) { + clear_bit(Wworksched, &m->wsched); + return; + } + + if (!m->wsize) { + if (list_empty(&m->unsent_req_list)) { + clear_bit(Wworksched, &m->wsched); + return; + } + + spin_lock(&m->client->lock); + req = list_entry(m->unsent_req_list.next, struct p9_req_t, + req_list); + req->status = REQ_STATUS_SENT; + list_move_tail(&req->req_list, &m->req_list); + + m->wbuf = req->tc->sdata; + m->wsize = req->tc->size; + m->wpos = 0; + spin_unlock(&m->client->lock); + } + + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos, + m->wsize); + clear_bit(Wpending, &m->wsched); + err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); + if (err == -EAGAIN) { + clear_bit(Wworksched, &m->wsched); + return; + } + + if (err < 0) + goto error; + else if (err == 0) { + err = -EREMOTEIO; + goto error; + } + + m->wpos += err; + if (m->wpos == m->wsize) + m->wpos = m->wsize = 0; + + if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { + if (test_and_clear_bit(Wpending, &m->wsched)) + n = POLLOUT; + else + n = p9_fd_poll(m->client, NULL); + + if (n & POLLOUT) { + P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); + queue_work(p9_mux_wq, &m->wq); + } else + clear_bit(Wworksched, &m->wsched); + } else + clear_bit(Wworksched, &m->wsched); + + return; + +error: + p9_conn_cancel(m, err); + clear_bit(Wworksched, &m->wsched); +} + +static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct p9_poll_wait *pwait = + container_of(wait, struct p9_poll_wait, wait); + struct p9_conn *m = pwait->conn; + unsigned long flags; + DECLARE_WAITQUEUE(dummy_wait, p9_poll_task); + + spin_lock_irqsave(&p9_poll_lock, flags); + if (list_empty(&m->poll_pending_link)) + list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); + spin_unlock_irqrestore(&p9_poll_lock, flags); + + /* perform the default wake up operation */ + return default_wake_function(&dummy_wait, mode, sync, key); +} + +/** + * p9_pollwait - add poll task to the wait queue + * @filp: file pointer being polled + * @wait_address: wait_q to block on + * @p: poll state + * + * called by files poll operation to add v9fs-poll task to files wait queue + */ + +static void +p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) +{ + struct p9_conn *m = container_of(p, struct p9_conn, pt); + struct p9_poll_wait *pwait = NULL; + int i; + + for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { + if (m->poll_wait[i].wait_addr == NULL) { + pwait = &m->poll_wait[i]; + break; + } + } + + if (!pwait) { + P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); + return; + } + + pwait->conn = m; + pwait->wait_addr = wait_address; + init_waitqueue_func_entry(&pwait->wait, p9_pollwake); + add_wait_queue(wait_address, &pwait->wait); +} + +/** + * p9_conn_create - allocate and initialize the per-session mux data + * @client: client instance + * + * Note: Creates the polling task if this is the first session. + */ + +static struct p9_conn *p9_conn_create(struct p9_client *client) +{ + int n; + struct p9_conn *m; + + P9_DPRINTK(P9_DEBUG_TRANS, "client %p msize %d\n", client, + client->msize); + m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); + if (!m) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&m->mux_list); + m->client = client; + + INIT_LIST_HEAD(&m->req_list); + INIT_LIST_HEAD(&m->unsent_req_list); + INIT_WORK(&m->rq, p9_read_work); + INIT_WORK(&m->wq, p9_write_work); + INIT_LIST_HEAD(&m->poll_pending_link); + init_poll_funcptr(&m->pt, p9_pollwait); + + n = p9_fd_poll(client, &m->pt); + if (n & POLLIN) { + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); + set_bit(Rpending, &m->wsched); + } + + if (n & POLLOUT) { + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); + set_bit(Wpending, &m->wsched); + } + + return m; +} + +/** + * p9_poll_mux - polls a mux and schedules read or write works if necessary + * @m: connection to poll + * + */ + +static void p9_poll_mux(struct p9_conn *m) +{ + int n; + + if (m->err < 0) + return; + + n = p9_fd_poll(m->client, NULL); + if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { + P9_DPRINTK(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); + if (n >= 0) + n = -ECONNRESET; + p9_conn_cancel(m, n); + } + + if (n & POLLIN) { + set_bit(Rpending, &m->wsched); + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); + if (!test_and_set_bit(Rworksched, &m->wsched)) { + P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); + queue_work(p9_mux_wq, &m->rq); + } + } + + if (n & POLLOUT) { + set_bit(Wpending, &m->wsched); + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); + if ((m->wsize || !list_empty(&m->unsent_req_list)) + && !test_and_set_bit(Wworksched, &m->wsched)) { + P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); + queue_work(p9_mux_wq, &m->wq); + } + } +} + +/** + * p9_fd_request - send 9P request + * The function can sleep until the request is scheduled for sending. + * The function can be interrupted. Return from the function is not + * a guarantee that the request is sent successfully. + * + * @client: client instance + * @req: request to be sent + * + */ + +static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) +{ + int n; + struct p9_trans_fd *ts = client->trans; + struct p9_conn *m = ts->conn; + + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, + current, req->tc, req->tc->id); + if (m->err < 0) + return m->err; + + spin_lock(&client->lock); + req->status = REQ_STATUS_UNSENT; + list_add_tail(&req->req_list, &m->unsent_req_list); + spin_unlock(&client->lock); + + if (test_and_clear_bit(Wpending, &m->wsched)) + n = POLLOUT; + else + n = p9_fd_poll(m->client, NULL); + + if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) + queue_work(p9_mux_wq, &m->wq); + + return 0; +} + +static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) +{ + int ret = 1; + + P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req); + + spin_lock(&client->lock); + list_del(&req->req_list); + + if (req->status == REQ_STATUS_UNSENT) { + req->status = REQ_STATUS_FLSHD; + ret = 0; + } + + spin_unlock(&client->lock); + + return ret; +} + +/** + * parse_options - parse mount options into session structure + * @options: options string passed from mount + * @opts: transport-specific structure to parse options into + * + * Returns 0 upon success, -ERRNO upon failure + */ + +static int parse_opts(char *params, struct p9_fd_opts *opts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + char *options; + int ret; + + opts->port = P9_PORT; + opts->rfd = ~0; + opts->wfd = ~0; + + if (!params) + return 0; + + options = kstrdup(params, GFP_KERNEL); + if (!options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } + + while ((p = strsep(&options, ",")) != NULL) { + int token; + int r; + if (!*p) + continue; + token = match_token(p, tokens, args); + r = match_int(&args[0], &option); + if (r < 0) { + P9_DPRINTK(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + ret = r; + continue; + } + switch (token) { + case Opt_port: + opts->port = option; + break; + case Opt_rfdno: + opts->rfd = option; + break; + case Opt_wfdno: + opts->wfd = option; + break; + default: + continue; + } + } + kfree(options); + return 0; +} + +static int p9_fd_open(struct p9_client *client, int rfd, int wfd) +{ + struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), + GFP_KERNEL); + if (!ts) + return -ENOMEM; + + ts->rd = fget(rfd); + ts->wr = fget(wfd); + if (!ts->rd || !ts->wr) { + if (ts->rd) + fput(ts->rd); + if (ts->wr) + fput(ts->wr); + kfree(ts); + return -EIO; + } + + client->trans = ts; + client->status = Connected; + + return 0; +} + +static int p9_socket_open(struct p9_client *client, struct socket *csocket) +{ + int fd, ret; + + csocket->sk->sk_allocation = GFP_NOIO; + fd = sock_map_fd(csocket, 0); + if (fd < 0) { + P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n"); + return fd; + } + + ret = p9_fd_open(client, fd, fd); + if (ret < 0) { + P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to open fd\n"); + sockfd_put(csocket); + return ret; + } + + ((struct p9_trans_fd *)client->trans)->rd->f_flags |= O_NONBLOCK; + + return 0; +} + +/** + * p9_mux_destroy - cancels all pending requests and frees mux resources + * @m: mux to destroy + * + */ + +static void p9_conn_destroy(struct p9_conn *m) +{ + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m, + m->mux_list.prev, m->mux_list.next); + + p9_mux_poll_stop(m); + cancel_work_sync(&m->rq); + cancel_work_sync(&m->wq); + + p9_conn_cancel(m, -ECONNRESET); + + m->client = NULL; + kfree(m); +} + +/** + * p9_fd_close - shutdown file descriptor transport + * @client: client instance + * + */ + +static void p9_fd_close(struct p9_client *client) +{ + struct p9_trans_fd *ts; + + if (!client) + return; + + ts = client->trans; + if (!ts) + return; + + client->status = Disconnected; + + p9_conn_destroy(ts->conn); + + if (ts->rd) + fput(ts->rd); + if (ts->wr) + fput(ts->wr); + + kfree(ts); +} + +/* + * stolen from NFS - maybe should be made a generic function? + */ +static inline int valid_ipaddr4(const char *buf) +{ + int rc, count, in[4]; + + rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); + if (rc != 4) + return -EINVAL; + for (count = 0; count < 4; count++) { + if (in[count] > 255) + return -EINVAL; + } + return 0; +} + +static int +p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) +{ + int err; + struct socket *csocket; + struct sockaddr_in sin_server; + struct p9_fd_opts opts; + struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */ + + err = parse_opts(args, &opts); + if (err < 0) + return err; + + if (valid_ipaddr4(addr) < 0) + return -EINVAL; + + csocket = NULL; + + sin_server.sin_family = AF_INET; + sin_server.sin_addr.s_addr = in_aton(addr); + sin_server.sin_port = htons(opts.port); + sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); + + if (!csocket) { + P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); + err = -EIO; + goto error; + } + + err = csocket->ops->connect(csocket, + (struct sockaddr *)&sin_server, + sizeof(struct sockaddr_in), 0); + if (err < 0) { + P9_EPRINTK(KERN_ERR, + "p9_trans_tcp: problem connecting socket to %s\n", + addr); + goto error; + } + + err = p9_socket_open(client, csocket); + if (err < 0) + goto error; + + p = (struct p9_trans_fd *) client->trans; + p->conn = p9_conn_create(client); + if (IS_ERR(p->conn)) { + err = PTR_ERR(p->conn); + p->conn = NULL; + goto error; + } + + return 0; + +error: + if (csocket) + sock_release(csocket); + + kfree(p); + + return err; +} + +static int +p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) +{ + int err; + struct socket *csocket; + struct sockaddr_un sun_server; + struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */ + + csocket = NULL; + + if (strlen(addr) > UNIX_PATH_MAX) { + P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", + addr); + err = -ENAMETOOLONG; + goto error; + } + + sun_server.sun_family = PF_UNIX; + strcpy(sun_server.sun_path, addr); + sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); + err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server, + sizeof(struct sockaddr_un) - 1, 0); + if (err < 0) { + P9_EPRINTK(KERN_ERR, + "p9_trans_unix: problem connecting socket: %s: %d\n", + addr, err); + goto error; + } + + err = p9_socket_open(client, csocket); + if (err < 0) + goto error; + + p = (struct p9_trans_fd *) client->trans; + p->conn = p9_conn_create(client); + if (IS_ERR(p->conn)) { + err = PTR_ERR(p->conn); + p->conn = NULL; + goto error; + } + + return 0; + +error: + if (csocket) + sock_release(csocket); + + kfree(p); + return err; +} + +static int +p9_fd_create(struct p9_client *client, const char *addr, char *args) +{ + int err; + struct p9_fd_opts opts; + struct p9_trans_fd *p = NULL; /* this get allocated in p9_fd_open */ + + parse_opts(args, &opts); + + if (opts.rfd == ~0 || opts.wfd == ~0) { + printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); + return -ENOPROTOOPT; + } + + err = p9_fd_open(client, opts.rfd, opts.wfd); + if (err < 0) + goto error; + + p = (struct p9_trans_fd *) client->trans; + p->conn = p9_conn_create(client); + if (IS_ERR(p->conn)) { + err = PTR_ERR(p->conn); + p->conn = NULL; + goto error; + } + + return 0; + +error: + kfree(p); + return err; +} + +static struct p9_trans_module p9_tcp_trans = { + .name = "tcp", + .maxsize = MAX_SOCK_BUF, + .def = 1, + .create = p9_fd_create_tcp, + .close = p9_fd_close, + .request = p9_fd_request, + .cancel = p9_fd_cancel, + .owner = THIS_MODULE, +}; + +static struct p9_trans_module p9_unix_trans = { + .name = "unix", + .maxsize = MAX_SOCK_BUF, + .def = 0, + .create = p9_fd_create_unix, + .close = p9_fd_close, + .request = p9_fd_request, + .cancel = p9_fd_cancel, + .owner = THIS_MODULE, +}; + +static struct p9_trans_module p9_fd_trans = { + .name = "fd", + .maxsize = MAX_SOCK_BUF, + .def = 0, + .create = p9_fd_create, + .close = p9_fd_close, + .request = p9_fd_request, + .cancel = p9_fd_cancel, + .owner = THIS_MODULE, +}; + +/** + * p9_poll_proc - poll worker thread + * @a: thread state and arguments + * + * polls all v9fs transports for new events and queues the appropriate + * work to the work queue + * + */ + +static int p9_poll_proc(void *a) +{ + unsigned long flags; + + P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); + repeat: + spin_lock_irqsave(&p9_poll_lock, flags); + while (!list_empty(&p9_poll_pending_list)) { + struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, + struct p9_conn, + poll_pending_link); + list_del_init(&conn->poll_pending_link); + spin_unlock_irqrestore(&p9_poll_lock, flags); + + p9_poll_mux(conn); + + spin_lock_irqsave(&p9_poll_lock, flags); + } + spin_unlock_irqrestore(&p9_poll_lock, flags); + + set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&p9_poll_pending_list)) { + P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n"); + schedule(); + } + __set_current_state(TASK_RUNNING); + + if (!kthread_should_stop()) + goto repeat; + + P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); + return 0; +} + +int p9_trans_fd_init(void) +{ + p9_mux_wq = create_workqueue("v9fs"); + if (!p9_mux_wq) { + printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); + return -ENOMEM; + } + + p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll"); + if (IS_ERR(p9_poll_task)) { + destroy_workqueue(p9_mux_wq); + printk(KERN_WARNING "v9fs: mux: creating poll task failed\n"); + return PTR_ERR(p9_poll_task); + } + + v9fs_register_trans(&p9_tcp_trans); + v9fs_register_trans(&p9_unix_trans); + v9fs_register_trans(&p9_fd_trans); + + return 0; +} + +void p9_trans_fd_exit(void) +{ + kthread_stop(p9_poll_task); + v9fs_unregister_trans(&p9_tcp_trans); + v9fs_unregister_trans(&p9_unix_trans); + v9fs_unregister_trans(&p9_fd_trans); + + destroy_workqueue(p9_mux_wq); +} diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c new file mode 100644 index 0000000..2f1fe5f --- /dev/null +++ b/net/9p/trans_rdma.c @@ -0,0 +1,713 @@ +/* + * linux/fs/9p/trans_rdma.c + * + * RDMA transport layer based on the trans_fd.c implementation. + * + * Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com> + * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> + * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> + * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/in.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/ipv6.h> +#include <linux/kthread.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/un.h> +#include <linux/uaccess.h> +#include <linux/inet.h> +#include <linux/idr.h> +#include <linux/file.h> +#include <linux/parser.h> +#include <linux/semaphore.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include <net/9p/transport.h> +#include <rdma/ib_verbs.h> +#include <rdma/rdma_cm.h> + +#define P9_PORT 5640 +#define P9_RDMA_SQ_DEPTH 32 +#define P9_RDMA_RQ_DEPTH 32 +#define P9_RDMA_SEND_SGE 4 +#define P9_RDMA_RECV_SGE 4 +#define P9_RDMA_IRD 0 +#define P9_RDMA_ORD 0 +#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ +#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can + * safely advertise a maxsize + * of 64k */ + +#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT) +/** + * struct p9_trans_rdma - RDMA transport instance + * + * @state: tracks the transport state machine for connection setup and tear down + * @cm_id: The RDMA CM ID + * @pd: Protection Domain pointer + * @qp: Queue Pair pointer + * @cq: Completion Queue pointer + * @lkey: The local access only memory region key + * @timeout: Number of uSecs to wait for connection management events + * @sq_depth: The depth of the Send Queue + * @sq_sem: Semaphore for the SQ + * @rq_depth: The depth of the Receive Queue. + * @addr: The remote peer's address + * @req_lock: Protects the active request list + * @send_wait: Wait list when the SQ fills up + * @cm_done: Completion event for connection management tracking + */ +struct p9_trans_rdma { + enum { + P9_RDMA_INIT, + P9_RDMA_ADDR_RESOLVED, + P9_RDMA_ROUTE_RESOLVED, + P9_RDMA_CONNECTED, + P9_RDMA_FLUSHING, + P9_RDMA_CLOSING, + P9_RDMA_CLOSED, + } state; + struct rdma_cm_id *cm_id; + struct ib_pd *pd; + struct ib_qp *qp; + struct ib_cq *cq; + struct ib_mr *dma_mr; + u32 lkey; + long timeout; + int sq_depth; + struct semaphore sq_sem; + int rq_depth; + atomic_t rq_count; + struct sockaddr_in addr; + spinlock_t req_lock; + + struct completion cm_done; +}; + +/** + * p9_rdma_context - Keeps track of in-process WR + * + * @wc_op: The original WR op for when the CQE completes in error. + * @busa: Bus address to unmap when the WR completes + * @req: Keeps track of requests (send) + * @rc: Keepts track of replies (receive) + */ +struct p9_rdma_req; +struct p9_rdma_context { + enum ib_wc_opcode wc_op; + dma_addr_t busa; + union { + struct p9_req_t *req; + struct p9_fcall *rc; + }; +}; + +/** + * p9_rdma_opts - Collection of mount options + * @port: port of connection + * @sq_depth: The requested depth of the SQ. This really doesn't need + * to be any deeper than the number of threads used in the client + * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth + * @timeout: Time to wait in msecs for CM events + */ +struct p9_rdma_opts { + short port; + int sq_depth; + int rq_depth; + long timeout; +}; + +/* + * Option Parsing (code inspired by NFS code) + */ +enum { + /* Options that take integer arguments */ + Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, Opt_err, +}; + +static match_table_t tokens = { + {Opt_port, "port=%u"}, + {Opt_sq_depth, "sq=%u"}, + {Opt_rq_depth, "rq=%u"}, + {Opt_timeout, "timeout=%u"}, + {Opt_err, NULL}, +}; + +/** + * parse_options - parse mount options into session structure + * @options: options string passed from mount + * @opts: transport-specific structure to parse options into + * + * Returns 0 upon success, -ERRNO upon failure + */ +static int parse_opts(char *params, struct p9_rdma_opts *opts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + char *options; + int ret; + + opts->port = P9_PORT; + opts->sq_depth = P9_RDMA_SQ_DEPTH; + opts->rq_depth = P9_RDMA_RQ_DEPTH; + opts->timeout = P9_RDMA_TIMEOUT; + + if (!params) + return 0; + + options = kstrdup(params, GFP_KERNEL); + if (!options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } + + while ((p = strsep(&options, ",")) != NULL) { + int token; + int r; + if (!*p) + continue; + token = match_token(p, tokens, args); + r = match_int(&args[0], &option); + if (r < 0) { + P9_DPRINTK(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + ret = r; + continue; + } + switch (token) { + case Opt_port: + opts->port = option; + break; + case Opt_sq_depth: + opts->sq_depth = option; + break; + case Opt_rq_depth: + opts->rq_depth = option; + break; + case Opt_timeout: + opts->timeout = option; + break; + default: + continue; + } + } + /* RQ must be at least as large as the SQ */ + opts->rq_depth = max(opts->rq_depth, opts->sq_depth); + kfree(options); + return 0; +} + +static int +p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) +{ + struct p9_client *c = id->context; + struct p9_trans_rdma *rdma = c->trans; + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + BUG_ON(rdma->state != P9_RDMA_INIT); + rdma->state = P9_RDMA_ADDR_RESOLVED; + break; + + case RDMA_CM_EVENT_ROUTE_RESOLVED: + BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED); + rdma->state = P9_RDMA_ROUTE_RESOLVED; + break; + + case RDMA_CM_EVENT_ESTABLISHED: + BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED); + rdma->state = P9_RDMA_CONNECTED; + break; + + case RDMA_CM_EVENT_DISCONNECTED: + if (rdma) + rdma->state = P9_RDMA_CLOSED; + if (c) + c->status = Disconnected; + break; + + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + break; + + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_DEVICE_REMOVAL: + case RDMA_CM_EVENT_MULTICAST_JOIN: + case RDMA_CM_EVENT_MULTICAST_ERROR: + case RDMA_CM_EVENT_REJECTED: + case RDMA_CM_EVENT_CONNECT_REQUEST: + case RDMA_CM_EVENT_CONNECT_RESPONSE: + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + c->status = Disconnected; + rdma_disconnect(rdma->cm_id); + break; + default: + BUG(); + } + complete(&rdma->cm_done); + return 0; +} + +static void +handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, + struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) +{ + struct p9_req_t *req; + int err = 0; + int16_t tag; + + req = NULL; + ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, + DMA_FROM_DEVICE); + + if (status != IB_WC_SUCCESS) + goto err_out; + + err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); + if (err) + goto err_out; + + req = p9_tag_lookup(client, tag); + if (!req) + goto err_out; + + req->rc = c->rc; + p9_client_cb(client, req); + + return; + + err_out: + P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n", + req, err, status); + rdma->state = P9_RDMA_FLUSHING; + client->status = Disconnected; + return; +} + +static void +handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, + struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) +{ + ib_dma_unmap_single(rdma->cm_id->device, + c->busa, c->req->tc->size, + DMA_TO_DEVICE); +} + +static void qp_event_handler(struct ib_event *event, void *context) +{ + P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event, + context); +} + +static void cq_comp_handler(struct ib_cq *cq, void *cq_context) +{ + struct p9_client *client = cq_context; + struct p9_trans_rdma *rdma = client->trans; + int ret; + struct ib_wc wc; + + ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); + while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { + struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id; + + switch (c->wc_op) { + case IB_WC_RECV: + atomic_dec(&rdma->rq_count); + handle_recv(client, rdma, c, wc.status, wc.byte_len); + break; + + case IB_WC_SEND: + handle_send(client, rdma, c, wc.status, wc.byte_len); + up(&rdma->sq_sem); + break; + + default: + printk(KERN_ERR "9prdma: unexpected completion type, " + "c->wc_op=%d, wc.opcode=%d, status=%d\n", + c->wc_op, wc.opcode, wc.status); + break; + } + kfree(c); + } +} + +static void cq_event_handler(struct ib_event *e, void *v) +{ + P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); +} + +static void rdma_destroy_trans(struct p9_trans_rdma *rdma) +{ + if (!rdma) + return; + + if (rdma->dma_mr && !IS_ERR(rdma->dma_mr)) + ib_dereg_mr(rdma->dma_mr); + + if (rdma->qp && !IS_ERR(rdma->qp)) + ib_destroy_qp(rdma->qp); + + if (rdma->pd && !IS_ERR(rdma->pd)) + ib_dealloc_pd(rdma->pd); + + if (rdma->cq && !IS_ERR(rdma->cq)) + ib_destroy_cq(rdma->cq); + + if (rdma->cm_id && !IS_ERR(rdma->cm_id)) + rdma_destroy_id(rdma->cm_id); + + kfree(rdma); +} + +static int +post_recv(struct p9_client *client, struct p9_rdma_context *c) +{ + struct p9_trans_rdma *rdma = client->trans; + struct ib_recv_wr wr, *bad_wr; + struct ib_sge sge; + + c->busa = ib_dma_map_single(rdma->cm_id->device, + c->rc->sdata, client->msize, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) + goto error; + + sge.addr = c->busa; + sge.length = client->msize; + sge.lkey = rdma->lkey; + + wr.next = NULL; + c->wc_op = IB_WC_RECV; + wr.wr_id = (unsigned long) c; + wr.sg_list = &sge; + wr.num_sge = 1; + return ib_post_recv(rdma->qp, &wr, &bad_wr); + + error: + P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); + return -EIO; +} + +static int rdma_request(struct p9_client *client, struct p9_req_t *req) +{ + struct p9_trans_rdma *rdma = client->trans; + struct ib_send_wr wr, *bad_wr; + struct ib_sge sge; + int err = 0; + unsigned long flags; + struct p9_rdma_context *c = NULL; + struct p9_rdma_context *rpl_context = NULL; + + /* Allocate an fcall for the reply */ + rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); + if (!rpl_context) + goto err_close; + + /* + * If the request has a buffer, steal it, otherwise + * allocate a new one. Typically, requests should already + * have receive buffers allocated and just swap them around + */ + if (!req->rc) { + req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, + GFP_KERNEL); + if (req->rc) { + req->rc->sdata = (char *) req->rc + + sizeof(struct p9_fcall); + req->rc->capacity = client->msize; + } + } + rpl_context->rc = req->rc; + if (!rpl_context->rc) { + kfree(rpl_context); + goto err_close; + } + + /* + * Post a receive buffer for this request. We need to ensure + * there is a reply buffer available for every outstanding + * request. A flushed request can result in no reply for an + * outstanding request, so we must keep a count to avoid + * overflowing the RQ. + */ + if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { + err = post_recv(client, rpl_context); + if (err) { + kfree(rpl_context->rc); + kfree(rpl_context); + goto err_close; + } + } else + atomic_dec(&rdma->rq_count); + + /* remove posted receive buffer from request structure */ + req->rc = NULL; + + /* Post the request */ + c = kmalloc(sizeof *c, GFP_KERNEL); + if (!c) + goto err_close; + c->req = req; + + c->busa = ib_dma_map_single(rdma->cm_id->device, + c->req->tc->sdata, c->req->tc->size, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) + goto error; + + sge.addr = c->busa; + sge.length = c->req->tc->size; + sge.lkey = rdma->lkey; + + wr.next = NULL; + c->wc_op = IB_WC_SEND; + wr.wr_id = (unsigned long) c; + wr.opcode = IB_WR_SEND; + wr.send_flags = IB_SEND_SIGNALED; + wr.sg_list = &sge; + wr.num_sge = 1; + + if (down_interruptible(&rdma->sq_sem)) + goto error; + + return ib_post_send(rdma->qp, &wr, &bad_wr); + + error: + P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); + return -EIO; + + err_close: + spin_lock_irqsave(&rdma->req_lock, flags); + if (rdma->state < P9_RDMA_CLOSING) { + rdma->state = P9_RDMA_CLOSING; + spin_unlock_irqrestore(&rdma->req_lock, flags); + rdma_disconnect(rdma->cm_id); + } else + spin_unlock_irqrestore(&rdma->req_lock, flags); + return err; +} + +static void rdma_close(struct p9_client *client) +{ + struct p9_trans_rdma *rdma; + + if (!client) + return; + + rdma = client->trans; + if (!rdma) + return; + + client->status = Disconnected; + rdma_disconnect(rdma->cm_id); + rdma_destroy_trans(rdma); +} + +/** + * alloc_rdma - Allocate and initialize the rdma transport structure + * @msize: MTU + * @dotu: Extension attribute + * @opts: Mount options structure + */ +static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) +{ + struct p9_trans_rdma *rdma; + + rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL); + if (!rdma) + return NULL; + + rdma->sq_depth = opts->sq_depth; + rdma->rq_depth = opts->rq_depth; + rdma->timeout = opts->timeout; + spin_lock_init(&rdma->req_lock); + init_completion(&rdma->cm_done); + sema_init(&rdma->sq_sem, rdma->sq_depth); + atomic_set(&rdma->rq_count, 0); + + return rdma; +} + +/* its not clear to me we can do anything after send has been posted */ +static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) +{ + return 1; +} + +/** + * trans_create_rdma - Transport method for creating atransport instance + * @client: client instance + * @addr: IP address string + * @args: Mount options string + */ +static int +rdma_create_trans(struct p9_client *client, const char *addr, char *args) +{ + int err; + struct p9_rdma_opts opts; + struct p9_trans_rdma *rdma; + struct rdma_conn_param conn_param; + struct ib_qp_init_attr qp_attr; + struct ib_device_attr devattr; + + /* Parse the transport specific mount options */ + err = parse_opts(args, &opts); + if (err < 0) + return err; + + /* Create and initialize the RDMA transport structure */ + rdma = alloc_rdma(&opts); + if (!rdma) + return -ENOMEM; + + /* Create the RDMA CM ID */ + rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); + if (IS_ERR(rdma->cm_id)) + goto error; + + /* Associate the client with the transport */ + client->trans = rdma; + + /* Resolve the server's address */ + rdma->addr.sin_family = AF_INET; + rdma->addr.sin_addr.s_addr = in_aton(addr); + rdma->addr.sin_port = htons(opts.port); + err = rdma_resolve_addr(rdma->cm_id, NULL, + (struct sockaddr *)&rdma->addr, + rdma->timeout); + if (err) + goto error; + err = wait_for_completion_interruptible(&rdma->cm_done); + if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) + goto error; + + /* Resolve the route to the server */ + err = rdma_resolve_route(rdma->cm_id, rdma->timeout); + if (err) + goto error; + err = wait_for_completion_interruptible(&rdma->cm_done); + if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) + goto error; + + /* Query the device attributes */ + err = ib_query_device(rdma->cm_id->device, &devattr); + if (err) + goto error; + + /* Create the Completion Queue */ + rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, + cq_event_handler, client, + opts.sq_depth + opts.rq_depth + 1, 0); + if (IS_ERR(rdma->cq)) + goto error; + ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); + + /* Create the Protection Domain */ + rdma->pd = ib_alloc_pd(rdma->cm_id->device); + if (IS_ERR(rdma->pd)) + goto error; + + /* Cache the DMA lkey in the transport */ + rdma->dma_mr = NULL; + if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) + rdma->lkey = rdma->cm_id->device->local_dma_lkey; + else { + rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(rdma->dma_mr)) + goto error; + rdma->lkey = rdma->dma_mr->lkey; + } + + /* Create the Queue Pair */ + memset(&qp_attr, 0, sizeof qp_attr); + qp_attr.event_handler = qp_event_handler; + qp_attr.qp_context = client; + qp_attr.cap.max_send_wr = opts.sq_depth; + qp_attr.cap.max_recv_wr = opts.rq_depth; + qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; + qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; + qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + qp_attr.qp_type = IB_QPT_RC; + qp_attr.send_cq = rdma->cq; + qp_attr.recv_cq = rdma->cq; + err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); + if (err) + goto error; + rdma->qp = rdma->cm_id->qp; + + /* Request a connection */ + memset(&conn_param, 0, sizeof(conn_param)); + conn_param.private_data = NULL; + conn_param.private_data_len = 0; + conn_param.responder_resources = P9_RDMA_IRD; + conn_param.initiator_depth = P9_RDMA_ORD; + err = rdma_connect(rdma->cm_id, &conn_param); + if (err) + goto error; + err = wait_for_completion_interruptible(&rdma->cm_done); + if (err || (rdma->state != P9_RDMA_CONNECTED)) + goto error; + + client->status = Connected; + + return 0; + +error: + rdma_destroy_trans(rdma); + return -ENOTCONN; +} + +static struct p9_trans_module p9_rdma_trans = { + .name = "rdma", + .maxsize = P9_RDMA_MAXSIZE, + .def = 0, + .owner = THIS_MODULE, + .create = rdma_create_trans, + .close = rdma_close, + .request = rdma_request, + .cancel = rdma_cancel, +}; + +/** + * p9_trans_rdma_init - Register the 9P RDMA transport driver + */ +static int __init p9_trans_rdma_init(void) +{ + v9fs_register_trans(&p9_rdma_trans); + return 0; +} + +static void __exit p9_trans_rdma_exit(void) +{ + v9fs_unregister_trans(&p9_rdma_trans); +} + +module_init(p9_trans_rdma_init); +module_exit(p9_trans_rdma_exit); + +MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); +MODULE_DESCRIPTION("RDMA Transport for 9P"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c new file mode 100644 index 0000000..2d7781e --- /dev/null +++ b/net/9p/trans_virtio.c @@ -0,0 +1,390 @@ +/* + * The Virtio 9p transport driver + * + * This is a block based transport driver based on the lguest block driver + * code. + * + * Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation + * + * Based on virtio console driver + * Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/in.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/ipv6.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/un.h> +#include <linux/uaccess.h> +#include <linux/inet.h> +#include <linux/idr.h> +#include <linux/file.h> +#include <net/9p/9p.h> +#include <linux/parser.h> +#include <net/9p/client.h> +#include <net/9p/transport.h> +#include <linux/scatterlist.h> +#include <linux/virtio.h> +#include <linux/virtio_9p.h> + +#define VIRTQUEUE_NUM 128 + +/* a single mutex to manage channel initialization and attachment */ +static DEFINE_MUTEX(virtio_9p_lock); +/* global which tracks highest initialized channel */ +static int chan_index; + +/** + * struct virtio_chan - per-instance transport information + * @initialized: whether the channel is initialized + * @inuse: whether the channel is in use + * @lock: protects multiple elements within this structure + * @vdev: virtio dev associated with this channel + * @vq: virtio queue associated with this channel + * @tagpool: accounting for tag ids (and request slots) + * @reqs: array of request slots + * @max_tag: current number of request_slots allocated + * @sg: scatter gather list which is used to pack a request (protected?) + * + * We keep all per-channel information in a structure. + * This structure is allocated within the devices dev->mem space. + * A pointer to the structure will get put in the transport private. + * + */ + +static struct virtio_chan { + bool initialized; + bool inuse; + + spinlock_t lock; + + struct p9_client *client; + struct virtio_device *vdev; + struct virtqueue *vq; + + /* Scatterlist: can be too big for stack. */ + struct scatterlist sg[VIRTQUEUE_NUM]; +} channels[MAX_9P_CHAN]; + +/* How many bytes left in this page. */ +static unsigned int rest_of_page(void *data) +{ + return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); +} + +/** + * p9_virtio_close - reclaim resources of a channel + * @trans: transport state + * + * This reclaims a channel by freeing its resources and + * reseting its inuse flag. + * + */ + +static void p9_virtio_close(struct p9_client *client) +{ + struct virtio_chan *chan = client->trans; + + mutex_lock(&virtio_9p_lock); + chan->inuse = false; + mutex_unlock(&virtio_9p_lock); +} + +/** + * req_done - callback which signals activity from the server + * @vq: virtio queue activity was received on + * + * This notifies us that the server has triggered some activity + * on the virtio channel - most likely a response to request we + * sent. Figure out which requests now have responses and wake up + * those threads. + * + * Bugs: could do with some additional sanity checking, but appears to work. + * + */ + +static void req_done(struct virtqueue *vq) +{ + struct virtio_chan *chan = vq->vdev->priv; + struct p9_fcall *rc; + unsigned int len; + struct p9_req_t *req; + + P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); + + while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) { + P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); + P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); + req = p9_tag_lookup(chan->client, rc->tag); + p9_client_cb(chan->client, req); + } +} + +/** + * pack_sg_list - pack a scatter gather list from a linear buffer + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @limit: maximum segment to pack data to + * @data: data to pack into scatter/gather list + * @count: amount of data to pack into the scatter/gather list + * + * sg_lists have multiple segments of various sizes. This will pack + * arbitrary data into an existing scatter gather list, segmenting the + * data as necessary within constraints. + * + */ + +static int +pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, + int count) +{ + int s; + int index = start; + + while (count) { + s = rest_of_page(data); + if (s > count) + s = count; + sg_set_buf(&sg[index++], data, s); + count -= s; + data += s; + BUG_ON(index > limit); + } + + return index-start; +} + +/* We don't currently allow canceling of virtio requests */ +static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) +{ + return 1; +} + +/** + * p9_virtio_request - issue a request + * @t: transport state + * @tc: &p9_fcall request to transmit + * @rc: &p9_fcall to put reponse into + * + */ + +static int +p9_virtio_request(struct p9_client *client, struct p9_req_t *req) +{ + int in, out; + struct virtio_chan *chan = client->trans; + char *rdata = (char *)req->rc+sizeof(struct p9_fcall); + + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); + + out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, + req->tc->size); + in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, + client->msize); + + req->status = REQ_STATUS_SENT; + + if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc)) { + P9_DPRINTK(P9_DEBUG_TRANS, + "9p debug: virtio rpc add_buf returned failure"); + return -EIO; + } + + chan->vq->vq_ops->kick(chan->vq); + + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); + return 0; +} + +/** + * p9_virtio_probe - probe for existence of 9P virtio channels + * @vdev: virtio device to probe + * + * This probes for existing virtio channels. At present only + * a single channel is in use, so in the future more work may need + * to be done here. + * + */ + +static int p9_virtio_probe(struct virtio_device *vdev) +{ + int err; + struct virtio_chan *chan; + int index; + + mutex_lock(&virtio_9p_lock); + index = chan_index++; + chan = &channels[index]; + mutex_unlock(&virtio_9p_lock); + + if (chan_index > MAX_9P_CHAN) { + printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n"); + BUG(); + err = -ENOMEM; + goto fail; + } + + chan->vdev = vdev; + + /* We expect one virtqueue, for requests. */ + chan->vq = vdev->config->find_vq(vdev, 0, req_done); + if (IS_ERR(chan->vq)) { + err = PTR_ERR(chan->vq); + goto out_free_vq; + } + chan->vq->vdev->priv = chan; + spin_lock_init(&chan->lock); + + sg_init_table(chan->sg, VIRTQUEUE_NUM); + + chan->inuse = false; + chan->initialized = true; + return 0; + +out_free_vq: + vdev->config->del_vq(chan->vq); +fail: + mutex_lock(&virtio_9p_lock); + chan_index--; + mutex_unlock(&virtio_9p_lock); + return err; +} + + +/** + * p9_virtio_create - allocate a new virtio channel + * @client: client instance invoking this transport + * @devname: string identifying the channel to connect to (unused) + * @args: args passed from sys_mount() for per-transport options (unused) + * + * This sets up a transport channel for 9p communication. Right now + * we only match the first available channel, but eventually we couldlook up + * alternate channels by matching devname versus a virtio_config entry. + * We use a simple reference count mechanism to ensure that only a single + * mount has a channel open at a time. + * + * Bugs: doesn't allow identification of a specific channel + * to allocate, channels are allocated sequentially. This was + * a pragmatic decision to get things rolling, but ideally some + * way of identifying the channel to attach to would be nice + * if we are going to support multiple channels. + * + */ + +static int +p9_virtio_create(struct p9_client *client, const char *devname, char *args) +{ + struct virtio_chan *chan = channels; + int index = 0; + + mutex_lock(&virtio_9p_lock); + while (index < MAX_9P_CHAN) { + if (chan->initialized && !chan->inuse) { + chan->inuse = true; + break; + } else { + index++; + chan = &channels[index]; + } + } + mutex_unlock(&virtio_9p_lock); + + if (index >= MAX_9P_CHAN) { + printk(KERN_ERR "9p: no channels available\n"); + return -ENODEV; + } + + client->trans = (void *)chan; + chan->client = client; + + return 0; +} + +/** + * p9_virtio_remove - clean up resources associated with a virtio device + * @vdev: virtio device to remove + * + */ + +static void p9_virtio_remove(struct virtio_device *vdev) +{ + struct virtio_chan *chan = vdev->priv; + + BUG_ON(chan->inuse); + + if (chan->initialized) { + vdev->config->del_vq(chan->vq); + chan->initialized = false; + } +} + +#define VIRTIO_ID_9P 9 + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +/* The standard "struct lguest_driver": */ +static struct virtio_driver p9_virtio_drv = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = p9_virtio_probe, + .remove = p9_virtio_remove, +}; + +static struct p9_trans_module p9_virtio_trans = { + .name = "virtio", + .create = p9_virtio_create, + .close = p9_virtio_close, + .request = p9_virtio_request, + .cancel = p9_virtio_cancel, + .maxsize = PAGE_SIZE*16, + .def = 0, + .owner = THIS_MODULE, +}; + +/* The standard init function */ +static int __init p9_virtio_init(void) +{ + int count; + + for (count = 0; count < MAX_9P_CHAN; count++) + channels[count].initialized = false; + + v9fs_register_trans(&p9_virtio_trans); + return register_virtio_driver(&p9_virtio_drv); +} + +static void __exit p9_virtio_cleanup(void) +{ + unregister_virtio_driver(&p9_virtio_drv); + v9fs_unregister_trans(&p9_virtio_trans); +} + +module_init(p9_virtio_init); +module_exit(p9_virtio_cleanup); + +MODULE_DEVICE_TABLE(virtio, id_table); +MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); +MODULE_DESCRIPTION("Virtio 9p Transport"); +MODULE_LICENSE("GPL"); diff --git a/net/9p/util.c b/net/9p/util.c new file mode 100644 index 0000000..dc4ec05 --- /dev/null +++ b/net/9p/util.c @@ -0,0 +1,145 @@ +/* + * net/9p/util.c + * + * This file contains some helper functions + * + * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> + * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/sched.h> +#include <linux/parser.h> +#include <linux/idr.h> +#include <net/9p/9p.h> + +/** + * struct p9_idpool - per-connection accounting for tag idpool + * @lock: protects the pool + * @pool: idr to allocate tag id from + * + */ + +struct p9_idpool { + spinlock_t lock; + struct idr pool; +}; + +/** + * p9_idpool_create - create a new per-connection id pool + * + */ + +struct p9_idpool *p9_idpool_create(void) +{ + struct p9_idpool *p; + + p = kmalloc(sizeof(struct p9_idpool), GFP_KERNEL); + if (!p) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&p->lock); + idr_init(&p->pool); + + return p; +} +EXPORT_SYMBOL(p9_idpool_create); + +/** + * p9_idpool_destroy - create a new per-connection id pool + * @p: idpool to destory + */ + +void p9_idpool_destroy(struct p9_idpool *p) +{ + idr_destroy(&p->pool); + kfree(p); +} +EXPORT_SYMBOL(p9_idpool_destroy); + +/** + * p9_idpool_get - allocate numeric id from pool + * @p: pool to allocate from + * + * Bugs: This seems to be an awful generic function, should it be in idr.c with + * the lock included in struct idr? + */ + +int p9_idpool_get(struct p9_idpool *p) +{ + int i = 0; + int error; + unsigned long flags; + +retry: + if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) + return 0; + + spin_lock_irqsave(&p->lock, flags); + + /* no need to store exactly p, we just need something non-null */ + error = idr_get_new(&p->pool, p, &i); + spin_unlock_irqrestore(&p->lock, flags); + + if (error == -EAGAIN) + goto retry; + else if (error) + return -1; + + P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", i, p); + return i; +} +EXPORT_SYMBOL(p9_idpool_get); + +/** + * p9_idpool_put - release numeric id from pool + * @id: numeric id which is being released + * @p: pool to release id into + * + * Bugs: This seems to be an awful generic function, should it be in idr.c with + * the lock included in struct idr? + */ + +void p9_idpool_put(int id, struct p9_idpool *p) +{ + unsigned long flags; + + P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", id, p); + + spin_lock_irqsave(&p->lock, flags); + idr_remove(&p->pool, id); + spin_unlock_irqrestore(&p->lock, flags); +} +EXPORT_SYMBOL(p9_idpool_put); + +/** + * p9_idpool_check - check if the specified id is available + * @id: id to check + * @p: pool to check + */ + +int p9_idpool_check(int id, struct p9_idpool *p) +{ + return idr_find(&p->pool, id) != NULL; +} +EXPORT_SYMBOL(p9_idpool_check); + |