diff options
Diffstat (limited to 'net')
207 files changed, 2417 insertions, 1818 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 2a739ad..ab2225d 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -382,6 +382,18 @@ static void vlan_sync_address(struct net_device *dev, memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN); } +static void vlan_transfer_features(struct net_device *dev, + struct net_device *vlandev) +{ + unsigned long old_features = vlandev->features; + + vlandev->features &= ~dev->vlan_features; + vlandev->features |= dev->features & dev->vlan_features; + + if (old_features != vlandev->features) + netdev_features_change(vlandev); +} + static void __vlan_device_event(struct net_device *dev, unsigned long event) { switch (event) { @@ -410,10 +422,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, int i, flgs; struct net_device *vlandev; - if (is_vlan_dev(dev)) { + if (is_vlan_dev(dev)) __vlan_device_event(dev, event); - goto out; - } grp = __vlan_find_group(dev); if (!grp) @@ -450,6 +460,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, } break; + case NETDEV_FEAT_CHANGE: + /* Propagate device features to underlying device */ + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + vlandev = vlan_group_get_device(grp, i); + if (!vlandev) + continue; + + vlan_transfer_features(dev, vlandev); + } + + break; + case NETDEV_DOWN: /* Put all VLANs for this dev in the down state too. */ for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index c961f08..5d055c2 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -663,6 +663,8 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); + dev->features |= real_dev->features & real_dev->vlan_features; + /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index daad006..08b54b5 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -176,12 +176,11 @@ int vlan_proc_add_dev(struct net_device *vlandev) struct vlan_dev_info *dev_info = vlan_dev_info(vlandev); struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id); - dev_info->dent = proc_create(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR, - vn->proc_vlan_dir, &vlandev_fops); + dev_info->dent = + proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR, + vn->proc_vlan_dir, &vlandev_fops, vlandev); if (!dev_info->dent) return -ENOBUFS; - - dev_info->dent->data = vlandev; return 0; } diff --git a/net/9p/Kconfig b/net/9p/Kconfig index bafc50c..ff34c5a 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -13,16 +13,6 @@ menuconfig NET_9P If unsure, say N. -config NET_9P_FD - depends on NET_9P - default y if NET_9P - tristate "9P File Descriptor Transports (Experimental)" - help - This builds support for file descriptor transports for 9p - which includes support for TCP/IP, named pipes, or passed - file descriptors. TCP/IP is the default transport for 9p, - so if you are going to use 9p, you'll likely want this. - config NET_9P_VIRTIO depends on NET_9P && EXPERIMENTAL && VIRTIO tristate "9P Virtio Transport (Experimental)" diff --git a/net/9p/Makefile b/net/9p/Makefile index 8a10511..5192194 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,5 +1,4 @@ obj-$(CONFIG_NET_9P) := 9pnet.o -obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o 9pnet-objs := \ @@ -9,8 +8,6 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o error.o \ fcprint.o \ util.o \ - -9pnet_fd-objs := \ trans_fd.o \ 9pnet_virtio-objs := \ diff --git a/net/9p/client.c b/net/9p/client.c index 84e087e..2ffe40c 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -64,21 +64,30 @@ static match_table_t tokens = { * @options: options string passed from mount * @v9ses: existing v9fs session information * + * Return 0 upon success, -ERRNO upon failure */ -static void parse_opts(char *options, struct p9_client *clnt) +static int parse_opts(char *opts, struct p9_client *clnt) { + char *options; char *p; substring_t args[MAX_OPT_ARGS]; int option; - int ret; + int ret = 0; clnt->trans_mod = v9fs_default_trans(); clnt->dotu = 1; clnt->msize = 8192; - if (!options) - return; + if (!opts) + return 0; + + options = kstrdup(opts, GFP_KERNEL); + if (!options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } while ((p = strsep(&options, ",")) != NULL) { int token; @@ -86,10 +95,11 @@ static void parse_opts(char *options, struct p9_client *clnt) continue; token = match_token(p, tokens, args); if (token < Opt_trans) { - ret = match_int(&args[0], &option); - if (ret < 0) { + int r = match_int(&args[0], &option); + if (r < 0) { P9_DPRINTK(P9_DEBUG_ERROR, "integer field, but no integer?\n"); + ret = r; continue; } } @@ -107,6 +117,8 @@ static void parse_opts(char *options, struct p9_client *clnt) continue; } } + kfree(options); + return ret; } @@ -138,16 +150,20 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (!clnt) return ERR_PTR(-ENOMEM); + clnt->trans = NULL; spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); clnt->fidpool = p9_idpool_create(); - if (!clnt->fidpool) { + if (IS_ERR(clnt->fidpool)) { err = PTR_ERR(clnt->fidpool); clnt->fidpool = NULL; goto error; } - parse_opts(options, clnt); + err = parse_opts(options, clnt); + if (err < 0) + goto error; + if (clnt->trans_mod == NULL) { err = -EPROTONOSUPPORT; P9_DPRINTK(P9_DEBUG_ERROR, diff --git a/net/9p/conv.c b/net/9p/conv.c index 3fe35d5..4454720 100644 --- a/net/9p/conv.c +++ b/net/9p/conv.c @@ -197,7 +197,7 @@ static void buf_get_qid(struct cbuf *bufp, struct p9_qid *qid) /** * p9_size_wstat - calculate the size of a variable length stat struct - * @stat: metadata (stat) structure + * @wstat: metadata (stat) structure * @dotu: non-zero if 9P2000.u * */ @@ -511,6 +511,12 @@ p9_create_common(struct cbuf *bufp, u32 size, u8 id) return fc; } +/** + * p9_set_tag - set the tag field of an &p9_fcall structure + * @fc: fcall structure to set tag within + * @tag: tag id to set + */ + void p9_set_tag(struct p9_fcall *fc, u16 tag) { fc->tag = tag; @@ -518,6 +524,12 @@ void p9_set_tag(struct p9_fcall *fc, u16 tag) } EXPORT_SYMBOL(p9_set_tag); +/** + * p9_create_tversion - allocates and creates a T_VERSION request + * @msize: requested maximum data size + * @version: version string to negotiate + * + */ struct p9_fcall *p9_create_tversion(u32 msize, char *version) { int size; @@ -542,6 +554,16 @@ error: } EXPORT_SYMBOL(p9_create_tversion); +/** + * p9_create_tauth - allocates and creates a T_AUTH request + * @afid: handle to use for authentication protocol + * @uname: user name attempting to authenticate + * @aname: mount specifier for remote server + * @n_uname: numeric id for user attempting to authneticate + * @dotu: 9P2000.u extension flag + * + */ + struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname, u32 n_uname, int dotu) { @@ -580,6 +602,18 @@ error: } EXPORT_SYMBOL(p9_create_tauth); +/** + * p9_create_tattach - allocates and creates a T_ATTACH request + * @fid: handle to use for the new mount point + * @afid: handle to use for authentication protocol + * @uname: user name attempting to attach + * @aname: mount specifier for remote server + * @n_uname: numeric id for user attempting to attach + * @n_uname: numeric id for user attempting to attach + * @dotu: 9P2000.u extension flag + * + */ + struct p9_fcall * p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname, u32 n_uname, int dotu) @@ -616,6 +650,12 @@ error: } EXPORT_SYMBOL(p9_create_tattach); +/** + * p9_create_tflush - allocates and creates a T_FLUSH request + * @oldtag: tag id for the transaction we are attempting to cancel + * + */ + struct p9_fcall *p9_create_tflush(u16 oldtag) { int size; @@ -639,6 +679,15 @@ error: } EXPORT_SYMBOL(p9_create_tflush); +/** + * p9_create_twalk - allocates and creates a T_FLUSH request + * @fid: handle we are traversing from + * @newfid: a new handle for this transaction + * @nwname: number of path elements to traverse + * @wnames: array of path elements + * + */ + struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname, char **wnames) { @@ -677,6 +726,13 @@ error: } EXPORT_SYMBOL(p9_create_twalk); +/** + * p9_create_topen - allocates and creates a T_OPEN request + * @fid: handle we are trying to open + * @mode: what mode we are trying to open the file in + * + */ + struct p9_fcall *p9_create_topen(u32 fid, u8 mode) { int size; @@ -701,6 +757,19 @@ error: } EXPORT_SYMBOL(p9_create_topen); +/** + * p9_create_tcreate - allocates and creates a T_CREATE request + * @fid: handle of directory we are trying to create in + * @name: name of the file we are trying to create + * @perm: permissions for the file we are trying to create + * @mode: what mode we are trying to open the file in + * @extension: 9p2000.u extension string (for special files) + * @dotu: 9p2000.u enabled flag + * + * Note: Plan 9 create semantics include opening the resulting file + * which is why mode is included. + */ + struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, char *extension, int dotu) { @@ -736,6 +805,13 @@ error: } EXPORT_SYMBOL(p9_create_tcreate); +/** + * p9_create_tread - allocates and creates a T_READ request + * @fid: handle of the file we are trying to read + * @offset: offset to start reading from + * @count: how many bytes to read + */ + struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count) { int size; @@ -761,6 +837,17 @@ error: } EXPORT_SYMBOL(p9_create_tread); +/** + * p9_create_twrite - allocates and creates a T_WRITE request from the kernel + * @fid: handle of the file we are trying to write + * @offset: offset to start writing at + * @count: how many bytes to write + * @data: data to write + * + * This function will create a requst with data buffers from the kernel + * such as the page cache. + */ + struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count, const char *data) { @@ -794,6 +881,16 @@ error: } EXPORT_SYMBOL(p9_create_twrite); +/** + * p9_create_twrite_u - allocates and creates a T_WRITE request from userspace + * @fid: handle of the file we are trying to write + * @offset: offset to start writing at + * @count: how many bytes to write + * @data: data to write + * + * This function will create a request with data buffers from userspace + */ + struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count, const char __user *data) { @@ -827,6 +924,14 @@ error: } EXPORT_SYMBOL(p9_create_twrite_u); +/** + * p9_create_tclunk - allocate a request to forget about a file handle + * @fid: handle of the file we closing or forgetting about + * + * clunk is used both to close open files and to discard transient handles + * which may be created during meta-data operations and hierarchy traversal. + */ + struct p9_fcall *p9_create_tclunk(u32 fid) { int size; @@ -850,6 +955,12 @@ error: } EXPORT_SYMBOL(p9_create_tclunk); +/** + * p9_create_tremove - allocate and create a request to remove a file + * @fid: handle of the file or directory we are removing + * + */ + struct p9_fcall *p9_create_tremove(u32 fid) { int size; @@ -873,6 +984,12 @@ error: } EXPORT_SYMBOL(p9_create_tremove); +/** + * p9_create_tstat - allocate and populate a request for attributes + * @fid: handle of the file or directory we are trying to get the attributes of + * + */ + struct p9_fcall *p9_create_tstat(u32 fid) { int size; @@ -896,6 +1013,14 @@ error: } EXPORT_SYMBOL(p9_create_tstat); +/** + * p9_create_tstat - allocate and populate a request to change attributes + * @fid: handle of the file or directory we are trying to change + * @wstat: &p9_stat structure with attributes we wish to set + * @dotu: 9p2000.u enabled flag + * + */ + struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat, int dotu) { @@ -922,3 +1047,4 @@ error: return fc; } EXPORT_SYMBOL(p9_create_twstat); + diff --git a/net/9p/error.c b/net/9p/error.c index 64104b9..fdebe43 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -33,6 +33,13 @@ #include <linux/errno.h> #include <net/9p/9p.h> +/** + * struct errormap - map string errors from Plan 9 to Linux numeric ids + * @name: string sent over 9P + * @val: numeric id most closely representing @name + * @namelen: length of string + * @list: hash-table list for string lookup + */ struct errormap { char *name; int val; @@ -177,8 +184,7 @@ static struct errormap errmap[] = { }; /** - * p9_error_init - preload - * @errstr: error string + * p9_error_init - preload mappings into hash list * */ @@ -206,6 +212,7 @@ EXPORT_SYMBOL(p9_error_init); /** * errstr2errno - convert error string to error number * @errstr: error string + * @len: length of error string * */ @@ -230,8 +237,8 @@ int p9_errstr2errno(char *errstr, int len) if (errno == 0) { /* TODO: if error isn't found, add it dynamically */ errstr[len] = 0; - printk(KERN_ERR "%s: errstr :%s: not found\n", __func__, - errstr); + printk(KERN_ERR "%s: server reported unknown error %s\n", + __func__, errstr); errno = 1; } diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c index 40244fb..53dd8e2 100644 --- a/net/9p/fcprint.c +++ b/net/9p/fcprint.c @@ -142,6 +142,14 @@ p9_printdata(char *buf, int buflen, u8 *data, int datalen) return p9_dumpdata(buf, buflen, data, datalen < 16?datalen:16); } +/** + * p9_printfcall - decode and print a protocol structure into a buffer + * @buf: buffer to deposit decoded structure into + * @buflen: available space in buffer + * @fc: protocol rpc structure of type &p9_fcall + * @extended: whether or not session is operating with extended protocol + */ + int p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) { diff --git a/net/9p/mod.c b/net/9p/mod.c index c285aab..bdee1fb 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -39,9 +39,6 @@ module_param_named(debug, p9_debug_level, uint, 0); MODULE_PARM_DESC(debug, "9P debugging level"); #endif -extern int p9_mux_global_init(void); -extern void p9_mux_global_exit(void); - /* * Dynamic Transport Registration Routines * @@ -52,7 +49,7 @@ static struct p9_trans_module *v9fs_default_transport; /** * v9fs_register_trans - register a new transport with 9p - * @m - structure describing the transport module and entry points + * @m: structure describing the transport module and entry points * */ void v9fs_register_trans(struct p9_trans_module *m) @@ -65,7 +62,7 @@ EXPORT_SYMBOL(v9fs_register_trans); /** * v9fs_match_trans - match transport versus registered transports - * @arg: string identifying transport + * @name: string identifying transport * */ struct p9_trans_module *v9fs_match_trans(const substring_t *name) @@ -110,6 +107,7 @@ static int __init init_p9(void) p9_error_init(); printk(KERN_INFO "Installing 9P2000 support\n"); + p9_trans_fd_init(); return ret; } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index f624dff..4507f74 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -47,12 +47,29 @@ #define SCHED_TIMEOUT 10 #define MAXPOLLWADDR 2 +/** + * struct p9_fd_opts - per-transport options + * @rfd: file descriptor for reading (trans=fd) + * @wfd: file descriptor for writing (trans=fd) + * @port: port to connect to (trans=tcp) + * + */ + struct p9_fd_opts { int rfd; int wfd; u16 port; }; + +/** + * struct p9_trans_fd - transport state + * @rd: reference to file to read from + * @wr: reference of file to write to + * @conn: connection state reference + * + */ + struct p9_trans_fd { struct file *rd; struct file *wr; @@ -90,10 +107,24 @@ enum { }; struct p9_req; - typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a); + +/** + * struct p9_req - fd mux encoding of an rpc transaction + * @lock: protects req_list + * @tag: numeric tag for rpc transaction + * @tcall: request &p9_fcall structure + * @rcall: response &p9_fcall structure + * @err: error state + * @cb: callback for when response is received + * @cba: argument to pass to callback + * @flush: flag to indicate RPC has been flushed + * @req_list: list link for higher level objects to chain requests + * + */ + struct p9_req { - spinlock_t lock; /* protect request structure */ + spinlock_t lock; int tag; struct p9_fcall *tcall; struct p9_fcall *rcall; @@ -104,7 +135,39 @@ struct p9_req { struct list_head req_list; }; -struct p9_mux_poll_task; +struct p9_mux_poll_task { + struct task_struct *task; + struct list_head mux_list; + int muxnum; +}; + +/** + * struct p9_conn - fd mux connection state information + * @lock: protects mux_list (?) + * @mux_list: list link for mux to manage multiple connections (?) + * @poll_task: task polling on this connection + * @msize: maximum size for connection (dup) + * @extended: 9p2000.u flag (dup) + * @trans: reference to transport instance for this connection + * @tagpool: id accounting for transactions + * @err: error state + * @equeue: event wait_q (?) + * @req_list: accounting for requests which have been sent + * @unsent_req_list: accounting for requests that haven't been sent + * @rcall: current response &p9_fcall structure + * @rpos: read position in current frame + * @rbuf: current read buffer + * @wpos: write position for current frame + * @wsize: amount of data to write for current frame + * @wbuf: current write buffer + * @poll_wait: array of wait_q's for various worker threads + * @poll_waddr: ???? + * @pt: poll state + * @rq: current read work + * @wq: current write work + * @wsched: ???? + * + */ struct p9_conn { spinlock_t lock; /* protect lock structure */ @@ -132,11 +195,16 @@ struct p9_conn { unsigned long wsched; }; -struct p9_mux_poll_task { - struct task_struct *task; - struct list_head mux_list; - int muxnum; -}; +/** + * struct p9_mux_rpc - fd mux rpc accounting structure + * @m: connection this request was issued on + * @err: error state + * @tcall: request &p9_fcall + * @rcall: response &p9_fcall + * @wqueue: wait queue that client is blocked on for this rpc + * + * Bug: isn't this information duplicated elsewhere like &p9_req + */ struct p9_mux_rpc { struct p9_conn *m; @@ -207,10 +275,12 @@ static void p9_mux_put_tag(struct p9_conn *m, u16 tag) /** * p9_mux_calc_poll_procs - calculates the number of polling procs - * based on the number of mounted v9fs filesystems. + * @muxnum: number of mounts * + * Calculation is based on the number of mounted v9fs filesystems. * The current implementation returns sqrt of the number of mounts. */ + static int p9_mux_calc_poll_procs(int muxnum) { int n; @@ -331,12 +401,11 @@ static void p9_mux_poll_stop(struct p9_conn *m) /** * p9_conn_create - allocate and initialize the per-session mux data - * Creates the polling task if this is the first session. + * @trans: transport structure * - * @trans - transport structure - * @msize - maximum message size - * @extended - extended flag + * Note: Creates the polling task if this is the first session. */ + static struct p9_conn *p9_conn_create(struct p9_trans *trans) { int i, n; @@ -406,7 +475,10 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) /** * p9_mux_destroy - cancels all pending requests and frees mux resources + * @m: mux to destroy + * */ + static void p9_conn_destroy(struct p9_conn *m) { P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, @@ -429,9 +501,14 @@ static void p9_conn_destroy(struct p9_conn *m) } /** - * p9_pollwait - called by files poll operation to add v9fs-poll task - * to files wait queue + * p9_pollwait - add poll task to the wait queue + * @filp: file pointer being polled + * @wait_address: wait_q to block on + * @p: poll state + * + * called by files poll operation to add v9fs-poll task to files wait queue */ + static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { @@ -462,7 +539,10 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) /** * p9_poll_mux - polls a mux and schedules read or write works if necessary + * @m: connection to poll + * */ + static void p9_poll_mux(struct p9_conn *m) { int n; @@ -499,9 +579,14 @@ static void p9_poll_mux(struct p9_conn *m) } /** - * p9_poll_proc - polls all v9fs transports for new events and queues - * the appropriate work to the work queue + * p9_poll_proc - poll worker thread + * @a: thread state and arguments + * + * polls all v9fs transports for new events and queues the appropriate + * work to the work queue + * */ + static int p9_poll_proc(void *a) { struct p9_conn *m, *mtmp; @@ -527,7 +612,10 @@ static int p9_poll_proc(void *a) /** * p9_write_work - called when a transport can send some data + * @work: container for work to be done + * */ + static void p9_write_work(struct work_struct *work) { int n, err; @@ -638,7 +726,10 @@ static void process_request(struct p9_conn *m, struct p9_req *req) /** * p9_read_work - called when there is some data to be read from a transport + * @work: container of work to be done + * */ + static void p9_read_work(struct work_struct *work) { int n, err; @@ -793,7 +884,9 @@ error: * @tc: request to be sent * @cb: callback function to call when response is received * @cba: parameter to pass to the callback function + * */ + static struct p9_req *p9_send_request(struct p9_conn *m, struct p9_fcall *tc, p9_conn_req_callback cb, void *cba) @@ -961,10 +1054,12 @@ p9_conn_rpc_cb(struct p9_req *req, void *a) /** * p9_fd_rpc- sends 9P request and waits until a response is available. * The function can be interrupted. - * @m: mux data + * @t: transport data * @tc: request to be sent * @rc: pointer where a pointer to the response is stored + * */ + int p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) { @@ -1041,8 +1136,10 @@ p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) * @m: mux data * @tc: request to be sent * @cb: callback function to be called when response arrives - * @cba: value to pass to the callback function + * @a: value to pass to the callback function + * */ + int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, p9_conn_req_callback cb, void *a) { @@ -1065,7 +1162,9 @@ int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, * p9_conn_cancel - cancel all pending requests with error * @m: mux data * @err: error code + * */ + void p9_conn_cancel(struct p9_conn *m, int err) { struct p9_req *req, *rtmp; @@ -1097,35 +1196,46 @@ void p9_conn_cancel(struct p9_conn *m, int err) } /** - * v9fs_parse_options - parse mount options into session structure + * parse_options - parse mount options into session structure * @options: options string passed from mount - * @v9ses: existing v9fs session information + * @opts: transport-specific structure to parse options into * + * Returns 0 upon success, -ERRNO upon failure */ -static void parse_opts(char *options, struct p9_fd_opts *opts) +static int parse_opts(char *params, struct p9_fd_opts *opts) { char *p; substring_t args[MAX_OPT_ARGS]; int option; + char *options; int ret; opts->port = P9_PORT; opts->rfd = ~0; opts->wfd = ~0; - if (!options) - return; + if (!params) + return 0; + + options = kstrdup(params, GFP_KERNEL); + if (!options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } while ((p = strsep(&options, ",")) != NULL) { int token; + int r; if (!*p) continue; token = match_token(p, tokens, args); - ret = match_int(&args[0], &option); - if (ret < 0) { + r = match_int(&args[0], &option); + if (r < 0) { P9_DPRINTK(P9_DEBUG_ERROR, "integer field, but no integer?\n"); + ret = r; continue; } switch (token) { @@ -1142,6 +1252,8 @@ static void parse_opts(char *options, struct p9_fd_opts *opts) continue; } } + kfree(options); + return 0; } static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd) @@ -1193,11 +1305,12 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) /** * p9_fd_read- read from a fd - * @v9ses: session information + * @trans: transport instance state * @v: buffer to receive data into * @len: size of receive buffer * */ + static int p9_fd_read(struct p9_trans *trans, void *v, int len) { int ret; @@ -1220,11 +1333,12 @@ static int p9_fd_read(struct p9_trans *trans, void *v, int len) /** * p9_fd_write - write to a socket - * @v9ses: session information + * @trans: transport instance state * @v: buffer to send data from * @len: size of send buffer * */ + static int p9_fd_write(struct p9_trans *trans, void *v, int len) { int ret; @@ -1296,6 +1410,7 @@ end: * @trans: private socket structure * */ + static void p9_fd_close(struct p9_trans *trans) { struct p9_trans_fd *ts; @@ -1318,6 +1433,23 @@ static void p9_fd_close(struct p9_trans *trans) kfree(ts); } +/* + * stolen from NFS - maybe should be made a generic function? + */ +static inline int valid_ipaddr4(const char *buf) +{ + int rc, count, in[4]; + + rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); + if (rc != 4) + return -EINVAL; + for (count = 0; count < 4; count++) { + if (in[count] > 255) + return -EINVAL; + } + return 0; +} + static struct p9_trans * p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu) { @@ -1328,7 +1460,12 @@ p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu) struct p9_fd_opts opts; struct p9_trans_fd *p; - parse_opts(args, &opts); + err = parse_opts(args, &opts); + if (err < 0) + return ERR_PTR(err); + + if (valid_ipaddr4(addr) < 0) + return ERR_PTR(-EINVAL); csocket = NULL; trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); @@ -1508,7 +1645,7 @@ static struct p9_trans_module p9_fd_trans = { .create = p9_trans_create_fd, }; -static int __init p9_trans_fd_init(void) +int p9_trans_fd_init(void) { int ret = p9_mux_global_init(); if (ret) { @@ -1522,9 +1659,4 @@ static int __init p9_trans_fd_init(void) return 0; } - -module_init(p9_trans_fd_init); - -MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>"); -MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); -MODULE_LICENSE("GPL"); +EXPORT_SYMBOL(p9_trans_fd_init); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index de7a9f5..42adc05 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -49,29 +49,75 @@ #define VIRTQUEUE_NUM 128 /* a single mutex to manage channel initialization and attachment */ -static DECLARE_MUTEX(virtio_9p_lock); +static DEFINE_MUTEX(virtio_9p_lock); /* global which tracks highest initialized channel */ static int chan_index; #define P9_INIT_MAXTAG 16 -#define REQ_STATUS_IDLE 0 -#define REQ_STATUS_SENT 1 -#define REQ_STATUS_RCVD 2 -#define REQ_STATUS_FLSH 3 + +/** + * enum p9_req_status_t - virtio request status + * @REQ_STATUS_IDLE: request slot unused + * @REQ_STATUS_SENT: request sent to server + * @REQ_STATUS_RCVD: response received from server + * @REQ_STATUS_FLSH: request has been flushed + * + * The @REQ_STATUS_IDLE state is used to mark a request slot as unused + * but use is actually tracked by the idpool structure which handles tag + * id allocation. + * + */ + +enum p9_req_status_t { + REQ_STATUS_IDLE, + REQ_STATUS_SENT, + REQ_STATUS_RCVD, + REQ_STATUS_FLSH, +}; + +/** + * struct p9_req_t - virtio request slots + * @status: status of this request slot + * @wq: wait_queue for the client to block on for this request + * + * The virtio transport uses an array to track outstanding requests + * instead of a list. While this may incurr overhead during initial + * allocation or expansion, it makes request lookup much easier as the + * tag id is a index into an array. (We use tag+1 so that we can accomodate + * the -1 tag for the T_VERSION request). + * This also has the nice effect of only having to allocate wait_queues + * once, instead of constantly allocating and freeing them. Its possible + * other resources could benefit from this scheme as well. + * + */ struct p9_req_t { int status; wait_queue_head_t *wq; }; -/* We keep all per-channel information in a structure. +/** + * struct virtio_chan - per-instance transport information + * @initialized: whether the channel is initialized + * @inuse: whether the channel is in use + * @lock: protects multiple elements within this structure + * @vdev: virtio dev associated with this channel + * @vq: virtio queue associated with this channel + * @tagpool: accounting for tag ids (and request slots) + * @reqs: array of request slots + * @max_tag: current number of request_slots allocated + * @sg: scatter gather list which is used to pack a request (protected?) + * + * We keep all per-channel information in a structure. * This structure is allocated within the devices dev->mem space. * A pointer to the structure will get put in the transport private. + * */ + static struct virtio_chan { - bool initialized; /* channel is initialized */ - bool inuse; /* channel is in use */ + bool initialized; + bool inuse; spinlock_t lock; @@ -86,7 +132,19 @@ static struct virtio_chan { struct scatterlist sg[VIRTQUEUE_NUM]; } channels[MAX_9P_CHAN]; -/* Lookup requests by tag */ +/** + * p9_lookup_tag - Lookup requests by tag + * @c: virtio channel to lookup tag within + * @tag: numeric id for transaction + * + * this is a simple array lookup, but will grow the + * request_slots as necessary to accomodate transaction + * ids which did not previously have a slot. + * + * Bugs: there is currently no upper limit on request slots set + * here, but that should be constrained by the id accounting. + */ + static struct p9_req_t *p9_lookup_tag(struct virtio_chan *c, u16 tag) { /* This looks up the original request by tag so we know which @@ -130,11 +188,20 @@ static unsigned int rest_of_page(void *data) return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); } +/** + * p9_virtio_close - reclaim resources of a channel + * @trans: transport state + * + * This reclaims a channel by freeing its resources and + * reseting its inuse flag. + * + */ + static void p9_virtio_close(struct p9_trans *trans) { struct virtio_chan *chan = trans->priv; int count; - unsigned int flags; + unsigned long flags; spin_lock_irqsave(&chan->lock, flags); p9_idpool_destroy(chan->tagpool); @@ -144,13 +211,26 @@ static void p9_virtio_close(struct p9_trans *trans) chan->max_tag = 0; spin_unlock_irqrestore(&chan->lock, flags); - down(&virtio_9p_lock); + mutex_lock(&virtio_9p_lock); chan->inuse = false; - up(&virtio_9p_lock); + mutex_unlock(&virtio_9p_lock); kfree(trans); } +/** + * req_done - callback which signals activity from the server + * @vq: virtio queue activity was received on + * + * This notifies us that the server has triggered some activity + * on the virtio channel - most likely a response to request we + * sent. Figure out which requests now have responses and wake up + * those threads. + * + * Bugs: could do with some additional sanity checking, but appears to work. + * + */ + static void req_done(struct virtqueue *vq) { struct virtio_chan *chan = vq->vdev->priv; @@ -169,6 +249,20 @@ static void req_done(struct virtqueue *vq) spin_unlock_irqrestore(&chan->lock, flags); } +/** + * pack_sg_list - pack a scatter gather list from a linear buffer + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @limit: maximum segment to pack data to + * @data: data to pack into scatter/gather list + * @count: amount of data to pack into the scatter/gather list + * + * sg_lists have multiple segments of various sizes. This will pack + * arbitrary data into an existing scatter gather list, segmenting the + * data as necessary within constraints. + * + */ + static int pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, int count) @@ -189,6 +283,14 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, return index-start; } +/** + * p9_virtio_rpc - issue a request and wait for a response + * @t: transport state + * @tc: &p9_fcall request to transmit + * @rc: &p9_fcall to put reponse into + * + */ + static int p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) { @@ -263,16 +365,26 @@ p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) return 0; } +/** + * p9_virtio_probe - probe for existence of 9P virtio channels + * @vdev: virtio device to probe + * + * This probes for existing virtio channels. At present only + * a single channel is in use, so in the future more work may need + * to be done here. + * + */ + static int p9_virtio_probe(struct virtio_device *vdev) { int err; struct virtio_chan *chan; int index; - down(&virtio_9p_lock); + mutex_lock(&virtio_9p_lock); index = chan_index++; chan = &channels[index]; - up(&virtio_9p_lock); + mutex_unlock(&virtio_9p_lock); if (chan_index > MAX_9P_CHAN) { printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n"); @@ -301,17 +413,34 @@ static int p9_virtio_probe(struct virtio_device *vdev) out_free_vq: vdev->config->del_vq(chan->vq); fail: - down(&virtio_9p_lock); + mutex_lock(&virtio_9p_lock); chan_index--; - up(&virtio_9p_lock); + mutex_unlock(&virtio_9p_lock); return err; } -/* This sets up a transport channel for 9p communication. Right now + +/** + * p9_virtio_create - allocate a new virtio channel + * @devname: string identifying the channel to connect to (unused) + * @args: args passed from sys_mount() for per-transport options (unused) + * @msize: requested maximum packet size + * @extended: 9p2000.u enabled flag + * + * This sets up a transport channel for 9p communication. Right now * we only match the first available channel, but eventually we couldlook up * alternate channels by matching devname versus a virtio_config entry. * We use a simple reference count mechanism to ensure that only a single - * mount has a channel open at a time. */ + * mount has a channel open at a time. + * + * Bugs: doesn't allow identification of a specific channel + * to allocate, channels are allocated sequentially. This was + * a pragmatic decision to get things rolling, but ideally some + * way of identifying the channel to attach to would be nice + * if we are going to support multiple channels. + * + */ + static struct p9_trans * p9_virtio_create(const char *devname, char *args, int msize, unsigned char extended) @@ -320,7 +449,7 @@ p9_virtio_create(const char *devname, char *args, int msize, struct virtio_chan *chan = channels; int index = 0; - down(&virtio_9p_lock); + mutex_lock(&virtio_9p_lock); while (index < MAX_9P_CHAN) { if (chan->initialized && !chan->inuse) { chan->inuse = true; @@ -330,7 +459,7 @@ p9_virtio_create(const char *devname, char *args, int msize, chan = &channels[index]; } } - up(&virtio_9p_lock); + mutex_unlock(&virtio_9p_lock); if (index >= MAX_9P_CHAN) { printk(KERN_ERR "9p: no channels available\n"); @@ -360,6 +489,12 @@ p9_virtio_create(const char *devname, char *args, int msize, return trans; } +/** + * p9_virtio_remove - clean up resources associated with a virtio device + * @vdev: virtio device to remove + * + */ + static void p9_virtio_remove(struct virtio_device *vdev) { struct virtio_chan *chan = vdev->priv; diff --git a/net/9p/util.c b/net/9p/util.c index ef72155..958fc58 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -32,11 +32,23 @@ #include <linux/idr.h> #include <net/9p/9p.h> +/** + * struct p9_idpool - per-connection accounting for tag idpool + * @lock: protects the pool + * @pool: idr to allocate tag id from + * + */ + struct p9_idpool { spinlock_t lock; struct idr pool; }; +/** + * p9_idpool_create - create a new per-connection id pool + * + */ + struct p9_idpool *p9_idpool_create(void) { struct p9_idpool *p; @@ -52,6 +64,11 @@ struct p9_idpool *p9_idpool_create(void) } EXPORT_SYMBOL(p9_idpool_create); +/** + * p9_idpool_destroy - create a new per-connection id pool + * @p: idpool to destory + */ + void p9_idpool_destroy(struct p9_idpool *p) { idr_destroy(&p->pool); @@ -61,9 +78,9 @@ EXPORT_SYMBOL(p9_idpool_destroy); /** * p9_idpool_get - allocate numeric id from pool - * @p - pool to allocate from + * @p: pool to allocate from * - * XXX - This seems to be an awful generic function, should it be in idr.c with + * Bugs: This seems to be an awful generic function, should it be in idr.c with * the lock included in struct idr? */ @@ -71,7 +88,7 @@ int p9_idpool_get(struct p9_idpool *p) { int i = 0; int error; - unsigned int flags; + unsigned long flags; retry: if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) @@ -94,15 +111,16 @@ EXPORT_SYMBOL(p9_idpool_get); /** * p9_idpool_put - release numeric id from pool - * @p - pool to allocate from + * @id: numeric id which is being released + * @p: pool to release id into * - * XXX - This seems to be an awful generic function, should it be in idr.c with + * Bugs: This seems to be an awful generic function, should it be in idr.c with * the lock included in struct idr? */ void p9_idpool_put(int id, struct p9_idpool *p) { - unsigned int flags; + unsigned long flags; spin_lock_irqsave(&p->lock, flags); idr_remove(&p->pool, id); spin_unlock_irqrestore(&p->lock, flags); @@ -111,11 +129,13 @@ EXPORT_SYMBOL(p9_idpool_put); /** * p9_idpool_check - check if the specified id is available - * @id - id to check - * @p - pool + * @id: id to check + * @p: pool to check */ + int p9_idpool_check(int id, struct p9_idpool *p) { return idr_find(&p->pool, id) != NULL; } EXPORT_SYMBOL(p9_idpool_check); + diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 1b22806..05fafdc 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -188,10 +188,13 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, return 0; } } - } else { - skb_push(skb, 2); - if (brdev->payload == p_bridged) + } else { /* e_vc */ + if (brdev->payload == p_bridged) { + skb_push(skb, 2); memset(skb->data, 0, 2); + } else { /* p_routed */ + skb_pull(skb, ETH_HLEN); + } } skb_debug(skb); @@ -346,9 +349,9 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) /* skb==NULL means VCC is being destroyed */ br2684_close_vcc(brvcc); if (list_empty(&brdev->brvccs)) { - read_lock(&devs_lock); + write_lock_irq(&devs_lock); list_del(&brdev->br2684_devs); - read_unlock(&devs_lock); + write_unlock_irq(&devs_lock); unregister_netdev(net_dev); free_netdev(net_dev); } @@ -377,11 +380,8 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) (skb->data + 6, ethertype_ipv4, sizeof(ethertype_ipv4)) == 0) skb->protocol = __constant_htons(ETH_P_IP); - else { - brdev->stats.rx_errors++; - dev_kfree_skb(skb); - return; - } + else + goto error; skb_pull(skb, sizeof(llc_oui_ipv4)); skb_reset_network_header(skb); skb->pkt_type = PACKET_HOST; @@ -394,44 +394,56 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) (memcmp(skb->data, llc_oui_pid_pad, 7) == 0)) { skb_pull(skb, sizeof(llc_oui_pid_pad)); skb->protocol = eth_type_trans(skb, net_dev); - } else { - brdev->stats.rx_errors++; - dev_kfree_skb(skb); - return; - } + } else + goto error; - } else { - /* first 2 chars should be 0 */ - if (*((u16 *) (skb->data)) != 0) { - brdev->stats.rx_errors++; - dev_kfree_skb(skb); - return; + } else { /* e_vc */ + if (brdev->payload == p_routed) { + struct iphdr *iph; + + skb_reset_network_header(skb); + iph = ip_hdr(skb); + if (iph->version == 4) + skb->protocol = __constant_htons(ETH_P_IP); + else if (iph->version == 6) + skb->protocol = __constant_htons(ETH_P_IPV6); + else + goto error; + skb->pkt_type = PACKET_HOST; + } else { /* p_bridged */ + /* first 2 chars should be 0 */ + if (*((u16 *) (skb->data)) != 0) + goto error; + skb_pull(skb, BR2684_PAD_LEN); + skb->protocol = eth_type_trans(skb, net_dev); } - skb_pull(skb, BR2684_PAD_LEN + ETH_HLEN); /* pad, dstmac, srcmac, ethtype */ - skb->protocol = eth_type_trans(skb, net_dev); } #ifdef CONFIG_ATM_BR2684_IPFILTER - if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) { - brdev->stats.rx_dropped++; - dev_kfree_skb(skb); - return; - } + if (unlikely(packet_fails_filter(skb->protocol, brvcc, skb))) + goto dropped; #endif /* CONFIG_ATM_BR2684_IPFILTER */ skb->dev = net_dev; ATM_SKB(skb)->vcc = atmvcc; /* needed ? */ pr_debug("received packet's protocol: %x\n", ntohs(skb->protocol)); skb_debug(skb); - if (unlikely(!(net_dev->flags & IFF_UP))) { - /* sigh, interface is down */ - brdev->stats.rx_dropped++; - dev_kfree_skb(skb); - return; - } + /* sigh, interface is down? */ + if (unlikely(!(net_dev->flags & IFF_UP))) + goto dropped; brdev->stats.rx_packets++; brdev->stats.rx_bytes += skb->len; memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); netif_rx(skb); + return; + +dropped: + brdev->stats.rx_dropped++; + goto free_skb; +error: + brdev->stats.rx_errors++; +free_skb: + dev_kfree_skb(skb); + return; } /* @@ -518,9 +530,9 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) struct sk_buff *next = skb->next; skb->next = skb->prev = NULL; + br2684_push(atmvcc, skb); BRPRIV(skb->dev)->stats.rx_bytes -= skb->len; BRPRIV(skb->dev)->stats.rx_packets--; - br2684_push(atmvcc, skb); skb = next; } diff --git a/net/atm/proc.c b/net/atm/proc.c index 5c9f3d1..49487b3 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -417,12 +417,10 @@ int atm_proc_dev_register(struct atm_dev *dev) goto err_out; sprintf(dev->proc_name,"%s:%d",dev->type, dev->number); - dev->proc_entry = proc_create(dev->proc_name, 0, atm_proc_root, - &proc_atm_dev_ops); + dev->proc_entry = proc_create_data(dev->proc_name, 0, atm_proc_root, + &proc_atm_dev_ops, dev); if (!dev->proc_entry) goto err_free_name; - dev->proc_entry->data = dev; - dev->proc_entry->owner = THIS_MODULE; return 0; err_free_name: kfree(dev->proc_name); diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index d8f2157..034aa10 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -64,20 +64,15 @@ void ax25_frames_acked(ax25_cb *ax25, unsigned short nr) void ax25_requeue_frames(ax25_cb *ax25) { - struct sk_buff *skb, *skb_prev = NULL; + struct sk_buff *skb; /* * Requeue all the un-ack-ed frames on the output queue to be picked * up by ax25_kick called from the timer. This arrangement handles the * possibility of an empty output queue. */ - while ((skb = skb_dequeue(&ax25->ack_queue)) != NULL) { - if (skb_prev == NULL) - skb_queue_head(&ax25->write_queue, skb); - else - skb_append(skb_prev, skb, &ax25->write_queue); - skb_prev = skb; - } + while ((skb = skb_dequeue_tail(&ax25->ack_queue)) != NULL) + skb_queue_head(&ax25->write_queue, skb); } /* diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 347e935..f85d946 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -135,7 +135,7 @@ static int bnep_ctrl_set_netfilter(struct bnep_session *s, __be16 *data, int len if (len < 2) return -EILSEQ; - n = ntohs(get_unaligned(data)); + n = get_unaligned_be16(data); data++; len -= 2; if (len < n) @@ -150,8 +150,8 @@ static int bnep_ctrl_set_netfilter(struct bnep_session *s, __be16 *data, int len int i; for (i = 0; i < n; i++) { - f[i].start = ntohs(get_unaligned(data++)); - f[i].end = ntohs(get_unaligned(data++)); + f[i].start = get_unaligned_be16(data++); + f[i].end = get_unaligned_be16(data++); BT_DBG("proto filter start %d end %d", f[i].start, f[i].end); @@ -180,7 +180,7 @@ static int bnep_ctrl_set_mcfilter(struct bnep_session *s, u8 *data, int len) if (len < 2) return -EILSEQ; - n = ntohs(get_unaligned((__be16 *) data)); + n = get_unaligned_be16(data); data += 2; len -= 2; if (len < n) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 46df2e4..6aef8f2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -129,8 +129,7 @@ static void hci_cc_write_link_policy(struct hci_dev *hdev, struct sk_buff *skb) conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(rp->handle)); if (conn) { - __le16 policy = get_unaligned((__le16 *) (sent + 2)); - conn->link_policy = __le16_to_cpu(policy); + conn->link_policy = get_unaligned_le16(sent + 2); } hci_dev_unlock(hdev); @@ -313,7 +312,7 @@ static void hci_cc_write_voice_setting(struct hci_dev *hdev, struct sk_buff *skb return; if (!status) { - __u16 setting = __le16_to_cpu(get_unaligned((__le16 *) sent)); + __u16 setting = get_unaligned_le16(sent); if (hdev->voice_setting != setting) { hdev->voice_setting = setting; @@ -1152,8 +1151,8 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s struct hci_conn *conn; __u16 handle, count; - handle = __le16_to_cpu(get_unaligned(ptr++)); - count = __le16_to_cpu(get_unaligned(ptr++)); + handle = get_unaligned_le16(ptr++); + count = get_unaligned_le16(ptr++); conn = hci_conn_hash_lookup_handle(hdev, handle); if (conn) { diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 1d36c09..747fabd 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -440,7 +440,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, skb->dev = (void *) hdev; if (bt_cb(skb)->pkt_type == HCI_COMMAND_PKT) { - u16 opcode = __le16_to_cpu(get_unaligned((__le16 *) skb->data)); + u16 opcode = get_unaligned_le16(skb->data); u16 ogf = hci_opcode_ogf(opcode); u16 ocf = hci_opcode_ocf(opcode); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index a4849f2..6e180d2 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1827,7 +1827,7 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm del_timer(&conn->info_timer); if (type == L2CAP_IT_FEAT_MASK) - conn->feat_mask = __le32_to_cpu(get_unaligned((__le32 *) rsp->data)); + conn->feat_mask = get_unaligned_le32(rsp->data); l2cap_conn_start(conn); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index eb62558..0c2c937 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -423,8 +423,8 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) rfcomm_dlc_lock(d); d->state = BT_CLOSED; - rfcomm_dlc_unlock(d); d->state_change(d, err); + rfcomm_dlc_unlock(d); skb_queue_purge(&d->tx_queue); rfcomm_dlc_unlink(d); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index c3f749a..c919187 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -566,11 +566,22 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) if (dlc->state == BT_CLOSED) { if (!dev->tty) { if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { - if (rfcomm_dev_get(dev->id) == NULL) + /* Drop DLC lock here to avoid deadlock + * 1. rfcomm_dev_get will take rfcomm_dev_lock + * but in rfcomm_dev_add there's lock order: + * rfcomm_dev_lock -> dlc lock + * 2. rfcomm_dev_put will deadlock if it's + * the last reference + */ + rfcomm_dlc_unlock(dlc); + if (rfcomm_dev_get(dev->id) == NULL) { + rfcomm_dlc_lock(dlc); return; + } rfcomm_dev_del(dev); rfcomm_dev_put(dev); + rfcomm_dlc_lock(dlc); } } else tty_hangup(dev->tty); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9326c37..72c5976 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -285,7 +285,11 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, /* convert from internal format to API */ memcpy(fe->mac_addr, f->addr.addr, ETH_ALEN); + + /* due to ABI compat need to split into hi/lo */ fe->port_no = f->dst->port_no; + fe->port_hi = f->dst->port_no >> 8; + fe->is_local = f->is_local; if (!f->is_static) fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->ageing_timer); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 77a981a..f38cc53 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -273,15 +273,13 @@ int br_add_bridge(const char *name) rtnl_lock(); if (strchr(dev->name, '%')) { ret = dev_alloc_name(dev, dev->name); - if (ret < 0) { - free_netdev(dev); - goto out; - } + if (ret < 0) + goto out_free; } ret = register_netdevice(dev); if (ret) - goto out; + goto out_free; ret = br_sysfs_addbr(dev); if (ret) @@ -289,6 +287,10 @@ int br_add_bridge(const char *name) out: rtnl_unlock(); return ret; + +out_free: + free_netdev(dev); + goto out; } int br_del_bridge(const char *name) @@ -440,12 +442,16 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) void __exit br_cleanup_bridges(void) { - struct net_device *dev, *nxt; + struct net_device *dev; rtnl_lock(); - for_each_netdev_safe(&init_net, dev, nxt) - if (dev->priv_flags & IFF_EBRIDGE) +restart: + for_each_netdev(&init_net, dev) { + if (dev->priv_flags & IFF_EBRIDGE) { del_br(dev->priv); + goto restart; + } + } rtnl_unlock(); } diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 8deab64..ddeb6e5 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -58,12 +58,12 @@ static inline void br_set_ticks(unsigned char *dest, int j) { unsigned long ticks = (STP_HZ * j)/ HZ; - put_unaligned(htons(ticks), (__be16 *)dest); + put_unaligned_be16(ticks, dest); } static inline int br_get_ticks(const unsigned char *src) { - unsigned long ticks = ntohs(get_unaligned((__be16 *)src)); + unsigned long ticks = get_unaligned_be16(src); return DIV_ROUND_UP(ticks * HZ, STP_HZ); } diff --git a/net/can/af_can.c b/net/can/af_can.c index 2759b76..484bbf6 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -205,11 +205,19 @@ static int can_create(struct net *net, struct socket *sock, int protocol) * -ENOBUFS on full driver queue (see net_xmit_errno()) * -ENOMEM when local loopback failed at calling skb_clone() * -EPERM when trying to send on a non-CAN interface + * -EINVAL when the skb->data does not contain a valid CAN frame */ int can_send(struct sk_buff *skb, int loop) { + struct sk_buff *newskb = NULL; + struct can_frame *cf = (struct can_frame *)skb->data; int err; + if (skb->len != sizeof(struct can_frame) || cf->can_dlc > 8) { + kfree_skb(skb); + return -EINVAL; + } + if (skb->dev->type != ARPHRD_CAN) { kfree_skb(skb); return -EPERM; @@ -244,8 +252,7 @@ int can_send(struct sk_buff *skb, int loop) * If the interface is not capable to do loopback * itself, we do it here. */ - struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); - + newskb = skb_clone(skb, GFP_ATOMIC); if (!newskb) { kfree_skb(skb); return -ENOMEM; @@ -254,7 +261,6 @@ int can_send(struct sk_buff *skb, int loop) newskb->sk = skb->sk; newskb->ip_summed = CHECKSUM_UNNECESSARY; newskb->pkt_type = PACKET_BROADCAST; - netif_rx(newskb); } } else { /* indication for the CAN driver: no loopback required */ @@ -266,11 +272,20 @@ int can_send(struct sk_buff *skb, int loop) if (err > 0) err = net_xmit_errno(err); + if (err) { + if (newskb) + kfree_skb(newskb); + return err; + } + + if (newskb) + netif_rx(newskb); + /* update statistics */ can_stats.tx_frames++; can_stats.tx_frames_delta++; - return err; + return 0; } EXPORT_SYMBOL(can_send); @@ -597,6 +612,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct dev_rcv_lists *d; + struct can_frame *cf = (struct can_frame *)skb->data; int matches; if (dev->type != ARPHRD_CAN || dev_net(dev) != &init_net) { @@ -604,6 +620,8 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, return 0; } + BUG_ON(skb->len != sizeof(struct can_frame) || cf->can_dlc > 8); + /* update statistics */ can_stats.rx_frames++; can_stats.rx_frames_delta++; diff --git a/net/can/bcm.c b/net/can/bcm.c index 74fd2d3..72c2ce9 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -298,7 +298,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, if (head->nframes) { /* can_frames starting here */ - firstframe = (struct can_frame *) skb_tail_pointer(skb); + firstframe = (struct can_frame *)skb_tail_pointer(skb); memcpy(skb_put(skb, datalen), frames, datalen); @@ -412,12 +412,6 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data) bcm_send_to_user(op, &head, data, 1); } -/* TODO: move to linux/hrtimer.h */ -static inline int hrtimer_callback_running(struct hrtimer *timer) -{ - return timer->state & HRTIMER_STATE_CALLBACK; -} - /* * bcm_rx_update_and_send - process a detected relevant receive content change * 1. update the last received data @@ -832,6 +826,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, for (i = 0; i < msg_head->nframes; i++) { err = memcpy_fromiovec((u8 *)&op->frames[i], msg->msg_iov, CFSIZ); + + if (op->frames[i].can_dlc > 8) + err = -EINVAL; + if (err < 0) return err; @@ -864,6 +862,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, for (i = 0; i < msg_head->nframes; i++) { err = memcpy_fromiovec((u8 *)&op->frames[i], msg->msg_iov, CFSIZ); + + if (op->frames[i].can_dlc > 8) + err = -EINVAL; + if (err < 0) { if (op->frames != &op->sframe) kfree(op->frames); @@ -1170,9 +1172,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) skb->dev = dev; skb->sk = sk; - can_send(skb, 1); /* send with loopback */ + err = can_send(skb, 1); /* send with loopback */ dev_put(dev); + if (err) + return err; + return CFSIZ + MHSIZ; } @@ -1191,6 +1196,10 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, if (!bo->bound) return -ENOTCONN; + /* check for valid message length from userspace */ + if (size < MHSIZ || (size - MHSIZ) % CFSIZ) + return -EINVAL; + /* check for alternative ifindex for this bcm_op */ if (!ifindex && msg->msg_name) { @@ -1265,8 +1274,8 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, break; case TX_SEND: - /* we need at least one can_frame */ - if (msg_head.nframes < 1) + /* we need exactly one can_frame behind the msg head */ + if ((msg_head.nframes != 1) || (size != CFSIZ + MHSIZ)) ret = -EINVAL; else ret = bcm_tx_send(msg, ifindex, sk); diff --git a/net/can/raw.c b/net/can/raw.c index 69877b8..3e46ee3 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -632,6 +632,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, } else ifindex = ro->ifindex; + if (size != sizeof(struct can_frame)) + return -EINVAL; + dev = dev_get_by_index(&init_net, ifindex); if (!dev) return -ENXIO; diff --git a/net/core/dev.c b/net/core/dev.c index ed49da5..fca23a3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -119,6 +119,7 @@ #include <linux/err.h> #include <linux/ctype.h> #include <linux/if_arp.h> +#include <linux/if_vlan.h> #include "net-sysfs.h" @@ -162,7 +163,7 @@ struct net_dma { struct dma_client client; spinlock_t lock; cpumask_t channel_mask; - struct dma_chan *channels[NR_CPUS]; + struct dma_chan **channels; }; static enum dma_state_client @@ -453,7 +454,7 @@ static int netdev_boot_setup_add(char *name, struct ifmap *map) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { memset(s[i].name, 0, sizeof(s[i].name)); - strcpy(s[i].name, name); + strlcpy(s[i].name, name, IFNAMSIZ); memcpy(&s[i].map, map, sizeof(s[i].map)); break; } @@ -478,7 +479,7 @@ int netdev_boot_setup_check(struct net_device *dev) for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && - !strncmp(dev->name, s[i].name, strlen(s[i].name))) { + !strcmp(dev->name, s[i].name)) { dev->irq = s[i].map.irq; dev->base_addr = s[i].map.base_addr; dev->mem_start = s[i].map.mem_start; @@ -903,7 +904,11 @@ int dev_change_name(struct net_device *dev, char *newname) strlcpy(dev->name, newname, IFNAMSIZ); rollback: - device_rename(&dev->dev, dev->name); + err = device_rename(&dev->dev, dev->name); + if (err) { + memcpy(dev->name, oldname, IFNAMSIZ); + return err; + } write_lock_bh(&dev_base_lock); hlist_del(&dev->name_hlist); @@ -994,6 +999,8 @@ int dev_open(struct net_device *dev) { int ret = 0; + ASSERT_RTNL(); + /* * Is it already up? */ @@ -1060,6 +1067,8 @@ int dev_open(struct net_device *dev) */ int dev_close(struct net_device *dev) { + ASSERT_RTNL(); + might_sleep(); if (!(dev->flags & IFF_UP)) @@ -1354,6 +1363,29 @@ void netif_device_attach(struct net_device *dev) } EXPORT_SYMBOL(netif_device_attach); +static bool can_checksum_protocol(unsigned long features, __be16 protocol) +{ + return ((features & NETIF_F_GEN_CSUM) || + ((features & NETIF_F_IP_CSUM) && + protocol == htons(ETH_P_IP)) || + ((features & NETIF_F_IPV6_CSUM) && + protocol == htons(ETH_P_IPV6))); +} + +static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) +{ + if (can_checksum_protocol(dev->features, skb->protocol)) + return true; + + if (skb->protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + if (can_checksum_protocol(dev->features & dev->vlan_features, + veh->h_vlan_encapsulated_proto)) + return true; + } + + return false; +} /* * Invalidate hardware checksum when packet is to be mangled, and @@ -1632,14 +1664,8 @@ int dev_queue_xmit(struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { skb_set_transport_header(skb, skb->csum_start - skb_headroom(skb)); - - if (!(dev->features & NETIF_F_GEN_CSUM) && - !((dev->features & NETIF_F_IP_CSUM) && - skb->protocol == htons(ETH_P_IP)) && - !((dev->features & NETIF_F_IPV6_CSUM) && - skb->protocol == htons(ETH_P_IPV6))) - if (skb_checksum_help(skb)) - goto out_kfree_skb; + if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb)) + goto out_kfree_skb; } gso: @@ -2051,6 +2077,10 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); + /* Don't receive packets in an exiting network namespace */ + if (!net_alive(dev_net(skb->dev))) + goto out; + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); @@ -2444,7 +2474,7 @@ static struct netif_rx_stats *softnet_get_online(loff_t *pos) { struct netif_rx_stats *rc = NULL; - while (*pos < NR_CPUS) + while (*pos < nr_cpu_ids) if (cpu_online(*pos)) { rc = &per_cpu(netdev_rx_stat, *pos); break; @@ -2943,7 +2973,7 @@ EXPORT_SYMBOL(dev_unicast_delete); /** * dev_unicast_add - add a secondary unicast address * @dev: device - * @addr: address to delete + * @addr: address to add * @alen: length of @addr * * Add a secondary unicast address to the device or increase @@ -3133,7 +3163,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST) + if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST) dev->change_rx_flags(dev, IFF_MULTICAST); dev_set_rx_mode(dev); @@ -3776,6 +3806,7 @@ int register_netdevice(struct net_device *dev) } } + netdev_initialize_kobject(dev); ret = netdev_register_kobject(dev); if (ret) goto err_uninit; @@ -4208,7 +4239,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char } /* Fixup kobjects */ - err = device_rename(&dev->dev, dev->name); + netdev_unregister_kobject(dev); + err = netdev_register_kobject(dev); WARN_ON(err); /* Add the device back in the hashes */ @@ -4324,7 +4356,7 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan, spin_lock(&net_dma->lock); switch (state) { case DMA_RESOURCE_AVAILABLE: - for (i = 0; i < NR_CPUS; i++) + for (i = 0; i < nr_cpu_ids; i++) if (net_dma->channels[i] == chan) { found = 1; break; @@ -4339,7 +4371,7 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan, } break; case DMA_RESOURCE_REMOVED: - for (i = 0; i < NR_CPUS; i++) + for (i = 0; i < nr_cpu_ids; i++) if (net_dma->channels[i] == chan) { found = 1; pos = i; @@ -4366,6 +4398,13 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan, */ static int __init netdev_dma_register(void) { + net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma), + GFP_KERNEL); + if (unlikely(!net_dma.channels)) { + printk(KERN_NOTICE + "netdev_dma: no memory for net_dma.channels\n"); + return -ENOMEM; + } spin_lock_init(&net_dma.lock); dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); dma_async_client_register(&net_dma.client); @@ -4471,17 +4510,19 @@ static void __net_exit default_device_exit(struct net *net) rtnl_lock(); for_each_netdev_safe(net, dev, next) { int err; + char fb_name[IFNAMSIZ]; /* Ignore unmoveable devices (i.e. loopback) */ if (dev->features & NETIF_F_NETNS_LOCAL) continue; /* Push remaing network devices to init_net */ - err = dev_change_net_namespace(dev, &init_net, "dev%d"); + snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); + err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { - printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n", + printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n", __func__, dev->name, err); - unregister_netdevice(dev); + BUG(); } } rtnl_unlock(); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e3e9ab0..277a230 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -226,7 +226,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { - err = EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto errout; } @@ -365,7 +365,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) ops = lookup_rules_ops(net, frh->family); if (ops == NULL) { - err = EAFNOSUPPORT; + err = -EAFNOSUPPORT; goto errout; } diff --git a/net/core/filter.c b/net/core/filter.c index f5f3cf60..df37443 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -68,7 +68,6 @@ static inline void *load_pointer(struct sk_buff *skb, int k, * sk_filter - run a packet through a socket filter * @sk: sock associated with &sk_buff * @skb: buffer to filter - * @needlock: set to 1 if the sock is not locked by caller. * * Run the filter code and then cut skb->data to correct size returned by * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller @@ -213,7 +212,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int load_w: ptr = load_pointer(skb, k, 4, &tmp); if (ptr != NULL) { - A = ntohl(get_unaligned((__be32 *)ptr)); + A = get_unaligned_be32(ptr); continue; } break; @@ -222,7 +221,7 @@ load_w: load_h: ptr = load_pointer(skb, k, 2, &tmp); if (ptr != NULL) { - A = ntohs(get_unaligned((__be16 *)ptr)); + A = get_unaligned_be16(ptr); continue; } break; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 75075c3..65f01f7 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1430,11 +1430,10 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - tbl->pde = proc_create(tbl->id, 0, init_net.proc_net_stat, - &neigh_stat_seq_fops); + tbl->pde = proc_create_data(tbl->id, 0, init_net.proc_net_stat, + &neigh_stat_seq_fops, tbl); if (!tbl->pde) panic("cannot create neighbour proc dir entry"); - tbl->pde->data = tbl; #endif tbl->hash_mask = 1; @@ -1715,7 +1714,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) return nla_nest_end(skb, nest); nla_put_failure: - return nla_nest_cancel(skb, nest); + nla_nest_cancel(skb, nest); + return -EMSGSIZE; } static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, @@ -2058,9 +2058,9 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, goto nla_put_failure; } - ci.ndm_used = now - neigh->used; - ci.ndm_confirmed = now - neigh->confirmed; - ci.ndm_updated = now - neigh->updated; + ci.ndm_used = jiffies_to_clock_t(now - neigh->used); + ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); + ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; read_unlock_bh(&neigh->lock); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4e7b847..90e2177 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -449,7 +449,6 @@ int netdev_register_kobject(struct net_device *net) struct device *dev = &(net->dev); struct attribute_group **groups = net->sysfs_groups; - device_initialize(dev); dev->class = &net_class; dev->platform_data = net; dev->groups = groups; @@ -470,6 +469,12 @@ int netdev_register_kobject(struct net_device *net) return device_add(dev); } +void netdev_initialize_kobject(struct net_device *net) +{ + struct device *device = &(net->dev); + device_initialize(device); +} + int netdev_kobject_init(void) { return class_register(&net_class); diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index f5f108d..14e7524 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -4,5 +4,5 @@ int netdev_kobject_init(void); int netdev_register_kobject(struct net_device *); void netdev_unregister_kobject(struct net_device *); - +void netdev_initialize_kobject(struct net_device *); #endif diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 72b4c18..7c52fe2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -140,6 +140,9 @@ static void cleanup_net(struct work_struct *work) struct pernet_operations *ops; struct net *net; + /* Be very certain incoming network packets will not find us */ + rcu_barrier(); + net = container_of(work, struct net, work); mutex_lock(&net_mutex); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index b04d643..8fb134d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -419,7 +419,7 @@ static void arp_reply(struct sk_buff *skb) return; size = arp_hdr_len(skb->dev); - send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev), + send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev), LL_RESERVED_SPACE(np->dev)); if (!send_skb) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a803b44..fdf5377 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -390,6 +390,7 @@ struct pktgen_thread { int cpu; wait_queue_head_t queue; + struct completion start_done; }; #define REMOVE 1 @@ -3414,6 +3415,7 @@ static int pktgen_thread_worker(void *arg) BUG_ON(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); + complete(&t->start_done); pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); @@ -3570,15 +3572,14 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) if (err) goto out1; - pkt_dev->entry = proc_create(ifname, 0600, - pg_proc_dir, &pktgen_if_fops); + pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, + &pktgen_if_fops, pkt_dev); if (!pkt_dev->entry) { printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", PG_PROC_DIR, ifname); err = -EINVAL; goto out2; } - pkt_dev->entry->data = pkt_dev; #ifdef CONFIG_XFRM pkt_dev->ipsmode = XFRM_MODE_TRANSPORT; pkt_dev->ipsproto = IPPROTO_ESP; @@ -3616,6 +3617,7 @@ static int __init pktgen_create_thread(int cpu) INIT_LIST_HEAD(&t->if_list); list_add_tail(&t->th_list, &pktgen_threads); + init_completion(&t->start_done); p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); if (IS_ERR(p)) { @@ -3628,7 +3630,8 @@ static int __init pktgen_create_thread(int cpu) kthread_bind(p, cpu); t->tsk = p; - pe = proc_create(t->tsk->comm, 0600, pg_proc_dir, &pktgen_thread_fops); + pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir, + &pktgen_thread_fops, t); if (!pe) { printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", PG_PROC_DIR, t->tsk->comm); @@ -3638,9 +3641,8 @@ static int __init pktgen_create_thread(int cpu) return -EINVAL; } - pe->data = t; - wake_up_process(p); + wait_for_completion(&t->start_done); return 0; } @@ -3716,8 +3718,6 @@ static int __init pg_init(void) return -EINVAL; } - pe->data = NULL; - /* Register us to receive netdevice events */ register_netdevice_notifier(&pktgen_notifier_block); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cf857c4..a9a7721 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -498,7 +498,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) return nla_nest_end(skb, mx); nla_put_failure: - return nla_nest_cancel(skb, mx); + nla_nest_cancel(skb, mx); + return -EMSGSIZE; } int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4fe605f..3666216 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -200,7 +200,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, goto nodata; /* - * See comment in sk_buff definition, just before the 'tail' member + * Only clear those fields we need to clear, not those that we will + * actually initialise below. Hence, don't put any more fields after + * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); skb->truesize = size + sizeof(struct sk_buff); @@ -1290,12 +1292,14 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, { unsigned int nr_pages = spd->nr_pages; unsigned int poff, plen, len, toff, tlen; - int headlen, seg; + int headlen, seg, error = 0; toff = *offset; tlen = *total_len; - if (!tlen) + if (!tlen) { + error = 1; goto err; + } /* * if the offset is greater than the linear part, go directly to @@ -1337,7 +1341,8 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, * just jump directly to update and return, no point * in going over fragments when the output is full. */ - if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb)) + error = spd_fill_page(spd, virt_to_page(p), plen, poff, skb); + if (error) goto done; tlen -= plen; @@ -1367,7 +1372,8 @@ map_frag: if (!plen) break; - if (spd_fill_page(spd, f->page, plen, poff, skb)) + error = spd_fill_page(spd, f->page, plen, poff, skb); + if (error) break; tlen -= plen; @@ -1380,7 +1386,10 @@ done: return 0; } err: - return 1; + /* update the offset to reflect the linear part skip, if any */ + if (!error) + *offset = toff; + return error; } /* @@ -1443,6 +1452,7 @@ done: if (spd.nr_pages) { int ret; + struct sock *sk = __skb->sk; /* * Drop the socket lock, otherwise we have reverse @@ -1453,9 +1463,9 @@ done: * we call into ->sendpage() with the i_mutex lock held * and networking will grab the socket lock. */ - release_sock(__skb->sk); + release_sock(sk); ret = splice_to_pipe(pipe, &spd); - lock_sock(__skb->sk); + lock_sock(sk); return ret; } diff --git a/net/core/sock.c b/net/core/sock.c index 5dbb81b..88094cb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -228,11 +228,12 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) static int warned __read_mostly; *timeo_p = 0; - if (warned < 10 && net_ratelimit()) + if (warned < 10 && net_ratelimit()) { warned++; printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " "tries to set negative timeout\n", current->comm, task_pid_nr(current)); + } return 0; } *timeo_p = MAX_SCHEDULE_TIMEOUT; @@ -269,7 +270,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) int err = 0; int skb_len; - /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces + /* Cast sk->rcvbuf to unsigned... It's pointless, but reduces number of warnings when compiling with -W --ANK */ if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 0ad1cd5..c77aff9 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -75,7 +75,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, end = start + skb_shinfo(skb)->frags[i].size; copy = end - offset; - if ((copy = end - offset) > 0) { + if (copy > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = frag->page; diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 6de4bd1..1e8be24 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -290,12 +290,12 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, while (1) { const u8 len = dccp_ackvec_len(av, index); - const u8 state = dccp_ackvec_state(av, index); + const u8 av_state = dccp_ackvec_state(av, index); /* * valid packets not yet in av_buf have a reserved * entry, with a len equal to 0. */ - if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && + if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED && len == 0 && delta == 0) { /* Found our reserved seat! */ dccp_pr_debug("Found %llu reserved seat!\n", @@ -325,31 +325,6 @@ out_duplicate: return -EILSEQ; } -#ifdef CONFIG_IP_DCCP_DEBUG -void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) -{ - dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len, - (unsigned long long)ackno); - - while (len--) { - const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; - const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; - - dccp_pr_debug_cat("%d,%d|", state, rl); - ++vector; - } - - dccp_pr_debug_cat("\n"); -} - -void dccp_ackvec_print(const struct dccp_ackvec *av) -{ - dccp_ackvector_print(av->av_buf_ackno, - av->av_buf + av->av_buf_head, - av->av_vec_len); -} -#endif - static void dccp_ackvec_throw_record(struct dccp_ackvec *av, struct dccp_ackvec_record *avr) { diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index b5b52eb..8e95808 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -716,7 +716,7 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) * packets for new connections, following the rules from [RFC3390]". * We need to convert the bytes of RFC3390 into the packets of RFC 4341. */ - hctx->ccid2hctx_cwnd = min(4U, max(2U, 4380U / dp->dccps_mss_cache)); + hctx->ccid2hctx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); /* Make sure that Ack Ratio is enabled and within bounds. */ max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2); diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index e76f460..a1929f3 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -88,8 +88,8 @@ static void ccid3_hc_tx_set_state(struct sock *sk, static inline u64 rfc3390_initial_rate(struct sock *sk) { const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - const __u32 w_init = min_t(__u32, 4 * hctx->ccid3hctx_s, - max_t(__u32, 2 * hctx->ccid3hctx_s, 4380)); + const __u32 w_init = clamp_t(__u32, 4380U, + 2 * hctx->ccid3hctx_s, 4 * hctx->ccid3hctx_s); return scaled_div(w_init << 6, hctx->ccid3hctx_rtt); } @@ -159,8 +159,8 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp) } else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld) - (s64)hctx->ccid3hctx_rtt >= 0) { - hctx->ccid3hctx_x = - max(min(2 * hctx->ccid3hctx_x, min_rate), + hctx->ccid3hctx_x = min(2 * hctx->ccid3hctx_x, min_rate); + hctx->ccid3hctx_x = max(hctx->ccid3hctx_x, scaled_div(((__u64)hctx->ccid3hctx_s) << 6, hctx->ccid3hctx_rtt)); hctx->ccid3hctx_t_ld = now; @@ -193,22 +193,17 @@ static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hctx, int len) /* * Update Window Counter using the algorithm from [RFC 4342, 8.1]. - * The algorithm is not applicable if RTT < 4 microseconds. + * As elsewhere, RTT > 0 is assumed by using dccp_sample_rtt(). */ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, ktime_t now) { - u32 quarter_rtts; - - if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */ - return; - - quarter_rtts = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count); - quarter_rtts /= hctx->ccid3hctx_rtt / 4; + u32 delta = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count), + quarter_rtts = (4 * delta) / hctx->ccid3hctx_rtt; if (quarter_rtts > 0) { hctx->ccid3hctx_t_last_win_count = now; - hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5); + hctx->ccid3hctx_last_win_count += min(quarter_rtts, 5U); hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ } } @@ -334,8 +329,14 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb) hctx->ccid3hctx_x = rfc3390_initial_rate(sk); hctx->ccid3hctx_t_ld = now; } else { - /* Sender does not have RTT sample: X_pps = 1 pkt/sec */ - hctx->ccid3hctx_x = hctx->ccid3hctx_s; + /* + * Sender does not have RTT sample: + * - set fallback RTT (RFC 4340, 3.4) since a RTT value + * is needed in several parts (e.g. window counter); + * - set sending rate X_pps = 1pps as per RFC 3448, 4.2. + */ + hctx->ccid3hctx_rtt = DCCP_FALLBACK_RTT; + hctx->ccid3hctx_x = hctx->ccid3hctx_s; hctx->ccid3hctx_x <<= 6; } ccid3_update_send_interval(hctx); diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index d1dfbb8..97ecec0 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -14,14 +14,6 @@ module_param(tfrc_debug, bool, 0444); MODULE_PARM_DESC(tfrc_debug, "Enable debug messages"); #endif -extern int tfrc_tx_packet_history_init(void); -extern void tfrc_tx_packet_history_exit(void); -extern int tfrc_rx_packet_history_init(void); -extern void tfrc_rx_packet_history_exit(void); - -extern int tfrc_li_init(void); -extern void tfrc_li_exit(void); - static int __init tfrc_module_init(void) { int rc = tfrc_li_init(); diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 1fb1187..ed98575 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -15,7 +15,7 @@ * (at your option) any later version. */ #include <linux/types.h> -#include <asm/div64.h> +#include <linux/math64.h> #include "../../dccp.h" /* internal includes that this module exports: */ #include "loss_interval.h" @@ -29,21 +29,19 @@ extern int tfrc_debug; #endif /* integer-arithmetic divisions of type (a * 1000000)/b */ -static inline u64 scaled_div(u64 a, u32 b) +static inline u64 scaled_div(u64 a, u64 b) { BUG_ON(b==0); - a *= 1000000; - do_div(a, b); - return a; + return div64_u64(a * 1000000, b); } -static inline u32 scaled_div32(u64 a, u32 b) +static inline u32 scaled_div32(u64 a, u64 b) { u64 result = scaled_div(a, b); if (result > UINT_MAX) { - DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U", - (unsigned long long)a, b); + DCCP_CRIT("Overflow: %llu/%llu > UINT_MAX", + (unsigned long long)a, (unsigned long long)b); return UINT_MAX; } return result; @@ -58,7 +56,14 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight) return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval; } -extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); -extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); +extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); +extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); +extern int tfrc_tx_packet_history_init(void); +extern void tfrc_tx_packet_history_exit(void); +extern int tfrc_rx_packet_history_init(void); +extern void tfrc_rx_packet_history_exit(void); + +extern int tfrc_li_init(void); +extern void tfrc_li_exit(void); #endif /* _TFRC_H_ */ diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index e4e64b7..2f20a29 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -661,7 +661,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p) EXPORT_SYMBOL_GPL(tfrc_calc_x); -/* +/** * tfrc_calc_x_reverse_lookup - try to find p given f(p) * * @fvalue: function value to match, scaled by 1000000 @@ -676,11 +676,11 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue) /* Error cases. */ if (fvalue < tfrc_calc_x_lookup[0][1]) { - DCCP_WARN("fvalue %d smaller than resolution\n", fvalue); - return tfrc_calc_x_lookup[0][1]; + DCCP_WARN("fvalue %u smaller than resolution\n", fvalue); + return TFRC_SMALLEST_P; } if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) { - DCCP_WARN("fvalue %d exceeds bounds!\n", fvalue); + DCCP_WARN("fvalue %u exceeds bounds!\n", fvalue); return 1000000; } diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 4a4f6ce..933a0ec 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -32,7 +32,7 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, if (len > 3) { DCCP_WARN("invalid length %d\n", len); - return 1; + return -EINVAL; } /* XXX add further sanity checks */ diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index b348dd7..37d27bc 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -589,7 +589,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = reqsk_alloc(&dccp_request_sock_ops); + req = inet_reqsk_alloc(&dccp_request_sock_ops); if (req == NULL) goto drop; @@ -605,7 +605,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; - ireq->opt = NULL; /* * Step 3: Process LISTEN state @@ -739,8 +738,8 @@ int dccp_invalid_packet(struct sk_buff *skb) * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet * has short sequence numbers), drop packet and return */ - if (dh->dccph_type >= DCCP_PKT_DATA && - dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0) { + if ((dh->dccph_type < DCCP_PKT_DATA || + dh->dccph_type > DCCP_PKT_DATAACK) && dh->dccph_x == 0) { DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n", dccp_packet_name(dh->dccph_type)); return 1; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9b1129b..f7fe2a5 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -421,7 +421,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) ireq6 = inet6_rsk(req); ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); - ireq6->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 33ad483..66dca5b 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -165,12 +165,12 @@ out_free: /* See dccp_v4_conn_request */ newdmsk->dccpms_sequence_window = req->rcv_wnd; - newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr; - dccp_update_gsr(newsk, dreq->dreq_isr); - - newdp->dccps_iss = dreq->dreq_iss; + newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss; dccp_update_gss(newsk, dreq->dreq_iss); + newdp->dccps_isr = dreq->dreq_isr; + dccp_update_gsr(newsk, dreq->dreq_isr); + /* * SWL and AWL are initially adjusted so that they are not less than * the initial Sequence Numbers received and sent, respectively: diff --git a/net/dccp/options.c b/net/dccp/options.c index d2a84a2..43bc24e 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -107,9 +107,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, * * CCID-specific options are ignored during connection setup, as * negotiation may still be in progress (see RFC 4340, 10.3). + * The same applies to Ack Vectors, as these depend on the CCID. * */ - if (dreq != NULL && opt >= 128) + if (dreq != NULL && (opt >= 128 || + opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) goto ignore_option; switch (opt) { diff --git a/net/dccp/output.c b/net/dccp/output.c index 1f8a9b6..fe20068 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -508,6 +508,7 @@ void dccp_send_ack(struct sock *sk) EXPORT_SYMBOL_GPL(dccp_send_ack); +#if 0 /* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */ void dccp_send_delayed_ack(struct sock *sk) { @@ -538,6 +539,7 @@ void dccp_send_delayed_ack(struct sock *sk) icsk->icsk_ack.timeout = timeout; sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } +#endif void dccp_send_sync(struct sock *sk, const u64 ackno, const enum dccp_pkt_type pkt_type) diff --git a/net/dccp/probe.c b/net/dccp/probe.c index 0bcdc92..81368a7 100644 --- a/net/dccp/probe.c +++ b/net/dccp/probe.c @@ -42,7 +42,7 @@ static int bufsize = 64 * 1024; static const char procname[] = "dccpprobe"; -struct { +static struct { struct kfifo *fifo; spinlock_t lock; wait_queue_head_t wait; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 2f665a5..f50e88b 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -235,14 +235,14 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) else min_mtu -= 21; - if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= min_mtu) { + if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) { if (!(dst_metric_locked(dst, RTAX_MTU))) { dst->metrics[RTAX_MTU-1] = mtu; dst_set_expires(dst, dn_rt_mtu_expires); } if (!(dst_metric_locked(dst, RTAX_ADVMSS))) { u32 mss = mtu - DN_MAX_NSP_DATA_HEADER; - if (dst->metrics[RTAX_ADVMSS-1] > mss) + if (dst_metric(dst, RTAX_ADVMSS) > mss) dst->metrics[RTAX_ADVMSS-1] = mss; } } @@ -805,12 +805,12 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) rt->u.dst.neighbour = n; } - if (rt->u.dst.metrics[RTAX_MTU-1] == 0 || - rt->u.dst.metrics[RTAX_MTU-1] > rt->u.dst.dev->mtu) + if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 || + dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu) rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst)); - if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0 || - rt->u.dst.metrics[RTAX_ADVMSS-1] > mss) + if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 || + dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss) rt->u.dst.metrics[RTAX_ADVMSS-1] = mss; return 0; } diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 68d1544..7c9bb13 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -340,7 +340,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, dev_hold(dev); - skb = sock_alloc_send_skb(sk, len+LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, len+LL_ALLOCATED_SPACE(dev), msg->msg_flags & MSG_DONTWAIT, &err); if (skb==NULL) goto out_unlock; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 68b72a7..9b539fa 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -570,7 +570,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, * Allocate a buffer */ - skb = alloc_skb(arp_hdr_len(dev) + LL_RESERVED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) return NULL; @@ -1288,7 +1288,6 @@ static void arp_format_neigh_entry(struct seq_file *seq, struct neighbour *n) { char hbuffer[HBUFFERLEN]; - const char hexbuf[] = "0123456789ABCDEF"; int k, j; char tbuf[16]; struct net_device *dev = n->dev; @@ -1302,8 +1301,8 @@ static void arp_format_neigh_entry(struct seq_file *seq, else { #endif for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) { - hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15]; - hbuffer[k++] = hexbuf[n->ha[j] & 15]; + hbuffer[k++] = hex_asc_hi(n->ha[j]); + hbuffer[k++] = hex_asc_lo(n->ha[j]); hbuffer[k++] = ':'; } hbuffer[--k] = 0; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 4637ded..2c0e457 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -338,7 +338,7 @@ static int cipso_v4_cache_check(const unsigned char *key, return -ENOENT; hash = cipso_v4_map_cache_hash(key, key_len); - bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); + bkt = hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { if (entry->hash == hash && @@ -417,7 +417,7 @@ int cipso_v4_cache_add(const struct sk_buff *skb, atomic_inc(&secattr->cache->refcount); entry->lsm_data = secattr->cache; - bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); + bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1); spin_lock_bh(&cipso_v4_cache[bkt].lock); if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { list_add(&entry->list, &cipso_v4_cache[bkt].list); @@ -983,7 +983,7 @@ static int cipso_v4_map_cat_enum_valid(const struct cipso_v4_doi *doi_def, return -EFAULT; for (iter = 0; iter < enumcat_len; iter += 2) { - cat = ntohs(get_unaligned((__be16 *)&enumcat[iter])); + cat = get_unaligned_be16(&enumcat[iter]); if (cat <= cat_prev) return -EFAULT; cat_prev = cat; @@ -1052,7 +1052,7 @@ static int cipso_v4_map_cat_enum_ntoh(const struct cipso_v4_doi *doi_def, for (iter = 0; iter < net_cat_len; iter += 2) { ret_val = netlbl_secattr_catmap_setbit(secattr->attr.mls.cat, - ntohs(get_unaligned((__be16 *)&net_cat[iter])), + get_unaligned_be16(&net_cat[iter]), GFP_ATOMIC); if (ret_val != 0) return ret_val; @@ -1086,10 +1086,9 @@ static int cipso_v4_map_cat_rng_valid(const struct cipso_v4_doi *doi_def, return -EFAULT; for (iter = 0; iter < rngcat_len; iter += 4) { - cat_high = ntohs(get_unaligned((__be16 *)&rngcat[iter])); + cat_high = get_unaligned_be16(&rngcat[iter]); if ((iter + 4) <= rngcat_len) - cat_low = ntohs( - get_unaligned((__be16 *)&rngcat[iter + 2])); + cat_low = get_unaligned_be16(&rngcat[iter + 2]); else cat_low = 0; @@ -1188,10 +1187,9 @@ static int cipso_v4_map_cat_rng_ntoh(const struct cipso_v4_doi *doi_def, u16 cat_high; for (net_iter = 0; net_iter < net_cat_len; net_iter += 4) { - cat_high = ntohs(get_unaligned((__be16 *)&net_cat[net_iter])); + cat_high = get_unaligned_be16(&net_cat[net_iter]); if ((net_iter + 4) <= net_cat_len) - cat_low = ntohs( - get_unaligned((__be16 *)&net_cat[net_iter + 2])); + cat_low = get_unaligned_be16(&net_cat[net_iter + 2]); else cat_low = 0; @@ -1562,7 +1560,7 @@ int cipso_v4_validate(unsigned char **option) } rcu_read_lock(); - doi_def = cipso_v4_doi_search(ntohl(get_unaligned((__be32 *)&opt[2]))); + doi_def = cipso_v4_doi_search(get_unaligned_be32(&opt[2])); if (doi_def == NULL) { err_offset = 2; goto validate_return_locked; @@ -1843,7 +1841,7 @@ static int cipso_v4_getattr(const unsigned char *cipso, if (cipso_v4_cache_check(cipso, cipso[1], secattr) == 0) return 0; - doi = ntohl(get_unaligned((__be32 *)&cipso[2])); + doi = get_unaligned_be32(&cipso[2]); rcu_read_lock(); doi_def = cipso_v4_doi_search(doi); if (doi_def == NULL) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6848e47..79a7ef6 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -90,7 +90,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U32 }, [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, - [IFA_ANYCAST] = { .type = NLA_U32 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, }; @@ -536,9 +535,6 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) if (tb[IFA_BROADCAST]) ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); - if (tb[IFA_ANYCAST]) - ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]); - if (tb[IFA_LABEL]) nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); else @@ -745,7 +741,6 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_broadcast = 0; - ifa->ifa_anycast = 0; ifa->ifa_scope = 0; } @@ -1113,7 +1108,6 @@ static inline size_t inet_nlmsg_size(void) + nla_total_size(4) /* IFA_ADDRESS */ + nla_total_size(4) /* IFA_LOCAL */ + nla_total_size(4) /* IFA_BROADCAST */ - + nla_total_size(4) /* IFA_ANYCAST */ + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ } @@ -1143,9 +1137,6 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, if (ifa->ifa_broadcast) NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); - if (ifa->ifa_anycast) - NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast); - if (ifa->ifa_label[0]) NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 0f1557a..0b2ac6a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -506,7 +506,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { [RTA_PREFSRC] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, - [RTA_PROTOINFO] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 }, }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3b83c34..0d4d728 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -960,7 +960,10 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtm->rtm_dst_len = dst_len; rtm->rtm_src_len = 0; rtm->rtm_tos = tos; - rtm->rtm_table = tb_id; + if (tb_id < 256) + rtm->rtm_table = tb_id; + else + rtm->rtm_table = RT_TABLE_COMPAT; NLA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4b02d14..e1600ad 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1359,17 +1359,17 @@ static int check_leaf(struct trie *t, struct leaf *l, t->stats.semantic_match_miss++; #endif if (err <= 0) - return plen; + return err; } - return -1; + return 1; } static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) { struct trie *t = (struct trie *) tb->tb_data; - int plen, ret = 0; + int ret; struct node *n; struct tnode *pn; int pos, bits; @@ -1393,10 +1393,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, /* Just a leaf? */ if (IS_LEAF(n)) { - plen = check_leaf(t, (struct leaf *)n, key, flp, res); - if (plen < 0) - goto failed; - ret = 0; + ret = check_leaf(t, (struct leaf *)n, key, flp, res); goto found; } @@ -1421,11 +1418,9 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, } if (IS_LEAF(n)) { - plen = check_leaf(t, (struct leaf *)n, key, flp, res); - if (plen < 0) + ret = check_leaf(t, (struct leaf *)n, key, flp, res); + if (ret > 0) goto backtrace; - - ret = 0; goto found; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6250f42..2769dc4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -292,7 +292,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct iphdr *pip; struct igmpv3_report *pig; - skb = alloc_skb(size + LL_RESERVED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) return NULL; @@ -653,7 +653,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, return -1; } - skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC); + skb=alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) { ip_rt_put(rt); return -1; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 828ea21..ec83448 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -419,7 +419,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, struct inet_connection_sock *icsk = inet_csk(parent); struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct listen_sock *lopt = queue->listen_opt; - int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + int thresh = max_retries; unsigned long now = jiffies; struct request_sock **reqp, *req; int i, budget; @@ -455,6 +456,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, } } + if (queue->rskq_defer_accept) + max_retries = queue->rskq_defer_accept; + budget = 2 * (lopt->nr_table_entries / (timeout / interval)); i = lopt->clock_hand; @@ -462,8 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, reqp=&lopt->syn_table[i]; while ((req = *reqp) != NULL) { if (time_after_eq(now, req->expires)) { - if (req->retrans < thresh && - !req->rsk_ops->rtx_syn_ack(parent, req)) { + if ((req->retrans < thresh || + (inet_rsk(req)->acked && req->retrans < max_retries)) + && !req->rsk_ops->rtx_syn_ack(parent, req)) { unsigned long timeo; if (req->retrans++ == 0) diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 4ed429b..0546a0b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -192,14 +192,21 @@ EXPORT_SYMBOL(inet_frag_evictor); static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, struct inet_frag_queue *qp_in, struct inet_frags *f, - unsigned int hash, void *arg) + void *arg) { struct inet_frag_queue *qp; #ifdef CONFIG_SMP struct hlist_node *n; #endif + unsigned int hash; write_lock(&f->lock); + /* + * While we stayed w/o the lock other CPU could update + * the rnd seed, so we need to re-calculate the hash + * chain. Fortunatelly the qp_in can be used to get one. + */ + hash = f->hashfn(qp_in); #ifdef CONFIG_SMP /* With SMP race we have to recheck hash table, because * such entry could be created on other cpu, while we @@ -247,7 +254,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, } static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, - struct inet_frags *f, void *arg, unsigned int hash) + struct inet_frags *f, void *arg) { struct inet_frag_queue *q; @@ -255,7 +262,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, if (q == NULL) return NULL; - return inet_frag_intern(nf, q, f, hash, arg); + return inet_frag_intern(nf, q, f, arg); } struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, @@ -264,7 +271,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frag_queue *q; struct hlist_node *n; - read_lock(&f->lock); hlist_for_each_entry(q, n, &f->hash[hash], list) { if (q->net == nf && f->match(q, key)) { atomic_inc(&q->refcnt); @@ -274,6 +280,6 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, } read_unlock(&f->lock); - return inet_frag_create(nf, f, key, hash); + return inet_frag_create(nf, f, key); } EXPORT_SYMBOL(inet_frag_find); diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 4a4d49f..cfd034a 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -383,8 +383,7 @@ static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, out2: /* send aggregated SKBs to stack */ lro_flush(lro_mgr, lro_desc); -out: /* Original SKB has to be posted to stack */ - skb->ip_summed = lro_mgr->ip_summed; +out: return 1; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cd6ce6a..37221f6 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -229,6 +229,8 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) arg.iph = iph; arg.user = user; + + read_lock(&ip4_frags.lock); hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2ada033..4342cba 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -313,9 +313,8 @@ static void ipgre_tunnel_uninit(struct net_device *dev) static void ipgre_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. @@ -398,149 +397,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipgre_lock); return; -#else - struct iphdr *iph = (struct iphdr*)dp; - struct iphdr *eiph; - __be16 *p = (__be16*)(dp+(iph->ihl<<2)); - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - __be32 rel_info = 0; - __u32 n = 0; - __be16 flags; - int grehlen = (iph->ihl<<2) + 4; - struct sk_buff *skb2; - struct flowi fl; - struct rtable *rt; - - if (p[1] != htons(ETH_P_IP)) - return; - - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) - return; - if (flags&GRE_CSUM) - grehlen += 4; - if (flags&GRE_KEY) - grehlen += 4; - if (flags&GRE_SEQ) - grehlen += 4; - } - if (len < grehlen + sizeof(struct iphdr)) - return; - eiph = (struct iphdr*)(dp + grehlen); - - switch (type) { - default: - return; - case ICMP_PARAMETERPROB: - n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; - if (n < (iph->ihl<<2)) - return; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMP_PARAMETERPROB; - n -= grehlen; - rel_info = htonl(n << 24); - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return; - case ICMP_FRAG_NEEDED: - /* And it is the only really necessary thing :-) */ - n = ntohs(icmp_hdr(skb)->un.frag.mtu); - if (n < grehlen+68) - return; - n -= grehlen; - /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (n > ntohs(eiph->tot_len)) - return; - rel_info = htonl(n); - break; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return; - break; - } - - /* Prepare fake skb to feed it to icmp_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)eiph); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - memset(&fl, 0, sizeof(fl)); - fl.fl4_dst = eiph->saddr; - fl.fl4_tos = RT_TOS(eiph->tos); - fl.proto = IPPROTO_GRE; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) { - kfree_skb(skb2); - return; - } - skb2->dev = rt->u.dst.dev; - - /* route "incoming" packet */ - if (rt->rt_flags&RTCF_LOCAL) { - ip_rt_put(rt); - rt = NULL; - fl.fl4_dst = eiph->daddr; - fl.fl4_src = eiph->saddr; - fl.fl4_tos = eiph->tos; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || - rt->u.dst.dev->type != ARPHRD_IPGRE) { - ip_rt_put(rt); - kfree_skb(skb2); - return; - } - } else { - ip_rt_put(rt); - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb2->dst->dev->type != ARPHRD_IPGRE) { - kfree_skb(skb2); - return; - } - } - - /* change mtu on this route */ - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (n > dst_mtu(skb2->dst)) { - kfree_skb(skb2); - return; - } - skb2->dst->ops->update_pmtu(skb2->dst, n); - } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = netdev_priv(skb2->dev); - if (t->parms.iph.ttl) { - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - } - } - - icmp_send(skb2, rel_type, rel_code, rel_info); - kfree_skb(skb2); -#endif } static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 7b4bad6..ff77a4a 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -397,7 +397,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, iph = ip_hdr(skb); /* - * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. + * RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum. * * Is the datagram acceptable? * diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 89dee43..ed45037 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -710,14 +710,14 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d struct net_device *dev = d->dev; struct sk_buff *skb; struct bootp_pkt *b; - int hh_len = LL_RESERVED_SPACE(dev); struct iphdr *h; /* Allocate packet */ - skb = alloc_skb(sizeof(struct bootp_pkt) + hh_len + 15, GFP_KERNEL); + skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15, + GFP_KERNEL); if (!skb) return; - skb_reserve(skb, hh_len); + skb_reserve(skb, LL_RESERVED_SPACE(dev)); b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt)); memset(b, 0, sizeof(struct bootp_pkt)); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 149111f..af5cb53 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -278,9 +278,8 @@ static void ipip_tunnel_uninit(struct net_device *dev) static int ipip_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ @@ -337,133 +336,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipip_lock); return err; -#else - struct iphdr *iph = (struct iphdr*)dp; - int hlen = iph->ihl<<2; - struct iphdr *eiph; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - __be32 rel_info = 0; - __u32 n = 0; - struct sk_buff *skb2; - struct flowi fl; - struct rtable *rt; - - if (len < hlen + sizeof(struct iphdr)) - return 0; - eiph = (struct iphdr*)(dp + hlen); - - switch (type) { - default: - return 0; - case ICMP_PARAMETERPROB: - n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; - if (n < hlen) - return 0; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMP_PARAMETERPROB; - rel_info = htonl((n - hlen) << 24); - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return 0; - case ICMP_FRAG_NEEDED: - /* And it is the only really necessary thing :-) */ - n = ntohs(icmp_hdr(skb)->un.frag.mtu); - if (n < hlen+68) - return 0; - n -= hlen; - /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (n > ntohs(eiph->tot_len)) - return 0; - rel_info = htonl(n); - break; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return 0; - break; - } - - /* Prepare fake skb to feed it to icmp_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return 0; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)eiph); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - memset(&fl, 0, sizeof(fl)); - fl.fl4_daddr = eiph->saddr; - fl.fl4_tos = RT_TOS(eiph->tos); - fl.proto = IPPROTO_IPIP; - if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) { - kfree_skb(skb2); - return 0; - } - skb2->dev = rt->u.dst.dev; - - /* route "incoming" packet */ - if (rt->rt_flags&RTCF_LOCAL) { - ip_rt_put(rt); - rt = NULL; - fl.fl4_daddr = eiph->daddr; - fl.fl4_src = eiph->saddr; - fl.fl4_tos = eiph->tos; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || - rt->u.dst.dev->type != ARPHRD_TUNNEL) { - ip_rt_put(rt); - kfree_skb(skb2); - return 0; - } - } else { - ip_rt_put(rt); - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb2->dst->dev->type != ARPHRD_TUNNEL) { - kfree_skb(skb2); - return 0; - } - } - - /* change mtu on this route */ - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (n > dst_mtu(skb2->dst)) { - kfree_skb(skb2); - return 0; - } - skb2->dst->ops->update_pmtu(skb2->dst, n); - } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = netdev_priv(skb2->dev); - if (t->parms.iph.ttl) { - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - } - } - - icmp_send(skb2, rel_type, rel_code, rel_info); - kfree_skb(skb2); - return 0; -#endif } static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 22d8e7c..1819ad7 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -169,14 +169,14 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, /* create proc dir entry */ sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); - c->pde = proc_create(buffer, S_IWUSR|S_IRUSR, - clusterip_procdir, &clusterip_proc_fops); + c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, + clusterip_procdir, + &clusterip_proc_fops, c); if (!c->pde) { kfree(c); return NULL; } } - c->pde->data = c; #endif write_lock_bh(&clusterip_lock); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 0457859..d2a887f 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -556,7 +556,6 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) spin_lock_bh(&nf_nat_lock); hlist_del_rcu(&nat->bysource); - nat->ct = NULL; spin_unlock_bh(&nf_nat_lock); } @@ -570,8 +569,8 @@ static void nf_nat_move_storage(void *new, void *old) return; spin_lock_bh(&nf_nat_lock); - hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); new_nat->ct = ct; + hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); spin_unlock_bh(&nf_nat_lock); } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 5daefad..ffeaffc 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -232,6 +232,11 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx, } } } + + /* don't trust len bigger than ctx buffer */ + if (*len > ctx->end - ctx->pointer) + return 0; + return 1; } @@ -250,6 +255,10 @@ static unsigned char asn1_header_decode(struct asn1_ctx *ctx, if (!asn1_length_decode(ctx, &def, &len)) return 0; + /* primitive shall be definite, indefinite shall be constructed */ + if (*con == ASN1_PRI && !def) + return 0; + if (def) *eoc = ctx->pointer + len; else @@ -430,10 +439,15 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, unsigned int *len) { unsigned long subid; - unsigned int size; unsigned long *optr; + size_t size; size = eoc - ctx->pointer + 1; + + /* first subid actually encodes first two subids */ + if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) + return 0; + *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); if (*oid == NULL) { if (net_ratelimit()) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 11d7f75..37a1ecd 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -322,7 +322,6 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); - int hh_len; struct iphdr *iph; struct sk_buff *skb; unsigned int iphlen; @@ -336,13 +335,12 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, if (flags&MSG_PROBE) goto out; - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - - skb = sock_alloc_send_skb(sk, length+hh_len+15, - flags&MSG_DONTWAIT, &err); + skb = sock_alloc_send_skb(sk, + length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, + flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, hh_len); + skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; @@ -610,6 +608,14 @@ static void raw_close(struct sock *sk, long timeout) sk_common_release(sk); } +static int raw_destroy(struct sock *sk) +{ + lock_sock(sk); + ip_flush_pending_frames(sk); + release_sock(sk); + return 0; +} + /* This gets rid of all the nasties in af_inet. -DaveM */ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -822,6 +828,7 @@ struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, .close = raw_close, + .destroy = raw_destroy, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = raw_ioctl, @@ -927,7 +934,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) srcp = inet->num; seq_printf(seq, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d", + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d\n", i, src, srcp, dest, destp, sp->sk_state, atomic_read(&sp->sk_wmem_alloc), atomic_read(&sp->sk_rmem_alloc), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5e3685c..96be336 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -160,7 +160,7 @@ static struct dst_ops ipv4_dst_ops = { .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, - .local_out = ip_local_out, + .local_out = __ip_local_out, .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; @@ -1468,14 +1468,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, /* BSD 4.2 compatibility hack :-( */ if (mtu == 0 && - old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] && + old_mtu >= dst_metric(&rth->u.dst, RTAX_MTU) && old_mtu >= 68 + (iph->ihl << 2)) old_mtu -= iph->ihl << 2; mtu = guess_mtu(old_mtu); } - if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) { - if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) { + if (mtu <= dst_metric(&rth->u.dst, RTAX_MTU)) { + if (mtu < dst_metric(&rth->u.dst, RTAX_MTU)) { dst_confirm(&rth->u.dst); if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1497,7 +1497,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { - if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 && + if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= 68 && !(dst_metric_locked(dst, RTAX_MTU))) { if (mtu < ip_rt_min_pmtu) { mtu = ip_rt_min_pmtu; @@ -1613,7 +1613,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) sizeof(rt->u.dst.metrics)); if (fi->fib_mtu == 0) { rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; - if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) && + if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && rt->rt_gateway != rt->rt_dst && rt->u.dst.dev->mtu > 576) rt->u.dst.metrics[RTAX_MTU-1] = 576; @@ -1624,14 +1624,14 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) } else rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; - if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) + if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; - if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU) + if (dst_metric(&rt->u.dst, RTAX_MTU) > IP_MAX_MTU) rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; - if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0) + if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, ip_rt_min_advmss); - if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40) + if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; #ifdef CONFIG_NET_CLS_ROUTE @@ -1792,7 +1792,7 @@ static int __mkroute_input(struct sk_buff *skb, if (err) flags |= RTCF_DIRECTSRC; - if (out_dev == in_dev && err && !(flags & RTCF_MASQ) && + if (out_dev == in_dev && err && (IN_DEV_SHARED_MEDIA(out_dev) || inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) flags |= RTCF_DOREDIRECT; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 73ba989..d182a2a 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -285,7 +285,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, cookie_check_timestamp(&tcp_opt); ret = NULL; - req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */ + req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ if (!req) goto out; @@ -301,7 +301,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; - ireq->opt = NULL; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->rcv_wscale = tcp_opt.rcv_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f886531..1d723de1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -255,11 +255,14 @@ #include <linux/init.h> #include <linux/fs.h> #include <linux/skbuff.h> +#include <linux/scatterlist.h> #include <linux/splice.h> #include <linux/net.h> #include <linux/socket.h> #include <linux/random.h> #include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/swap.h> #include <linux/cache.h> #include <linux/err.h> #include <linux/crypto.h> @@ -1206,7 +1209,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, return -ENOTCONN; while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { if (offset < skb->len) { - size_t used, len; + int used; + size_t len; len = skb->len - offset; /* Stop reading if we hit a patch of urgent data */ @@ -1227,7 +1231,14 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, copied += used; offset += used; } - if (offset != skb->len) + /* + * If recv_actor drops the lock (e.g. TCP splice + * receive) the skb pointer might be invalid when + * getting here: tcp_collapse might have deleted it + * while aggregating skbs from the socket queue. + */ + skb = tcp_recv_skb(sk, seq-1, &offset); + if (!skb || (offset+1 != skb->len)) break; } if (tcp_hdr(skb)->fin) { @@ -2105,12 +2116,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_DEFER_ACCEPT: - if (val < 0) { - err = -EINVAL; - } else { - if (val > MAX_TCP_ACCEPT_DEFERRED) - val = MAX_TCP_ACCEPT_DEFERRED; - icsk->icsk_accept_queue.rskq_defer_accept = val; + icsk->icsk_accept_queue.rskq_defer_accept = 0; + if (val > 0) { + /* Translate value in seconds to number of + * retransmits */ + while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && + val > ((TCP_TIMEOUT_INIT / HZ) << + icsk->icsk_accept_queue.rskq_defer_accept)) + icsk->icsk_accept_queue.rskq_defer_accept++; + icsk->icsk_accept_queue.rskq_defer_accept++; } break; @@ -2292,7 +2306,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = (val ? : sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: - val = icsk->icsk_accept_queue.rskq_defer_accept; + val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : + ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; @@ -2609,7 +2624,7 @@ __setup("thash_entries=", set_thash_entries); void __init tcp_init(void) { struct sk_buff *skb = NULL; - unsigned long limit; + unsigned long nr_pages, limit; int order, i, max_share; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -2678,8 +2693,9 @@ void __init tcp_init(void) * is up to 1/2 at 256 MB, decreasing toward zero with the amount of * memory, with a floor of 128 pages. */ - limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); - limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + nr_pages = totalram_pages - totalhigh_pages; + limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); limit = max(limit, 128UL); sysctl_tcp_mem[0] = limit / 4 * 3; sysctl_tcp_mem[1] = limit; diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 44618b6..bfcbd14 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -101,8 +101,10 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) if (!tcp_is_cwnd_limited(sk, in_flight)) return; - if (!ca->hybla_en) - return tcp_reno_cong_avoid(sk, ack, in_flight); + if (!ca->hybla_en) { + tcp_reno_cong_avoid(sk, ack, in_flight); + return; + } if (ca->rho == 0) hybla_recalc_param(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0298f80..cad73b7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -66,6 +66,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/sysctl.h> +#include <net/dst.h> #include <net/tcp.h> #include <net/inet_common.h> #include <linux/ipsec.h> @@ -113,8 +114,6 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) #define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) -#define IsSackFrto() (sysctl_tcp_frto == 0x2) - #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -605,7 +604,7 @@ static u32 tcp_rto_min(struct sock *sk) u32 rto_min = TCP_RTO_MIN; if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) - rto_min = dst->metrics[RTAX_RTO_MIN - 1]; + rto_min = dst_metric(dst, RTAX_RTO_MIN); return rto_min; } @@ -769,7 +768,7 @@ void tcp_update_metrics(struct sock *sk) dst->metrics[RTAX_RTTVAR - 1] = m; else dst->metrics[RTAX_RTTVAR-1] -= - (dst->metrics[RTAX_RTTVAR-1] - m)>>2; + (dst_metric(dst, RTAX_RTTVAR) - m)>>2; } if (tp->snd_ssthresh >= 0xFFFF) { @@ -788,21 +787,21 @@ void tcp_update_metrics(struct sock *sk) dst->metrics[RTAX_SSTHRESH-1] = max(tp->snd_cwnd >> 1, tp->snd_ssthresh); if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1; + dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1; } else { /* Else slow start did not finish, cwnd is non-sense, ssthresh may be also invalid. */ if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1; - if (dst->metrics[RTAX_SSTHRESH-1] && + dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1; + if (dst_metric(dst, RTAX_SSTHRESH) && !dst_metric_locked(dst, RTAX_SSTHRESH) && - tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1]) + tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; } if (!dst_metric_locked(dst, RTAX_REORDERING)) { - if (dst->metrics[RTAX_REORDERING-1] < tp->reordering && + if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && tp->reordering != sysctl_tcp_reordering) dst->metrics[RTAX_REORDERING-1] = tp->reordering; } @@ -1172,8 +1171,8 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp, int num_sacks, u32 prior_snd_una) { - u32 start_seq_0 = ntohl(get_unaligned(&sp[0].start_seq)); - u32 end_seq_0 = ntohl(get_unaligned(&sp[0].end_seq)); + u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq); + u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq); int dup_sack = 0; if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { @@ -1181,8 +1180,8 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, tcp_dsack_seen(tp); NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1) { - u32 end_seq_1 = ntohl(get_unaligned(&sp[1].end_seq)); - u32 start_seq_1 = ntohl(get_unaligned(&sp[1].start_seq)); + u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq); + u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq); if (!after(end_seq_0, end_seq_1) && !before(start_seq_0, start_seq_1)) { @@ -1393,9 +1392,9 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, if (before(next_dup->start_seq, skip_to_seq)) { skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); - tcp_sacktag_walk(skb, sk, NULL, - next_dup->start_seq, next_dup->end_seq, - 1, fack_count, reord, flag); + skb = tcp_sacktag_walk(skb, sk, NULL, + next_dup->start_seq, next_dup->end_seq, + 1, fack_count, reord, flag); } return skb; @@ -1453,8 +1452,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, for (i = 0; i < num_sacks; i++) { int dup_sack = !i && found_dup_sack; - sp[used_sacks].start_seq = ntohl(get_unaligned(&sp_wire[i].start_seq)); - sp[used_sacks].end_seq = ntohl(get_unaligned(&sp_wire[i].end_seq)); + sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq); + sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq); if (!tcp_is_sackblock_valid(tp, dup_sack, sp[used_sacks].start_seq, @@ -1685,6 +1684,11 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) tp->sacked_out = 0; } +static int tcp_is_sackfrto(const struct tcp_sock *tp) +{ + return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); +} + /* F-RTO can only be used if TCP has never retransmitted anything other than * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) */ @@ -1701,7 +1705,7 @@ int tcp_use_frto(struct sock *sk) if (icsk->icsk_mtup.probe_size) return 0; - if (IsSackFrto()) + if (tcp_is_sackfrto(tp)) return 1; /* Avoid expensive walking of rexmit queue if possible */ @@ -1791,7 +1795,7 @@ void tcp_enter_frto(struct sock *sk) /* Earlier loss recovery underway (see RFC4138; Appendix B). * The last condition is necessary at least in tp->frto_counter case. */ - if (IsSackFrto() && (tp->frto_counter || + if (tcp_is_sackfrto(tp) && (tp->frto_counter || ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && after(tp->high_seq, tp->snd_una)) { tp->frto_highmark = tp->high_seq; @@ -1838,9 +1842,16 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; } - /* Don't lost mark skbs that were fwd transmitted after RTO */ - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) && - !after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { + /* Marking forward transmissions that were made after RTO lost + * can cause unnecessary retransmissions in some scenarios, + * SACK blocks will mitigate that in some but not in all cases. + * We used to not mark them but it was causing break-ups with + * receivers that do only in-order receival. + * + * TODO: we could detect presence of such receiver and select + * different behavior per flow. + */ + if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); } @@ -1856,7 +1867,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); tcp_set_ca_state(sk, TCP_CA_Loss); - tp->high_seq = tp->frto_highmark; + tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); tcp_clear_retrans_hints_partial(tp); @@ -2472,28 +2483,34 @@ static inline void tcp_complete_cwr(struct sock *sk) tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } +static void tcp_try_keep_open(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + int state = TCP_CA_Open; + + if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) + state = TCP_CA_Disorder; + + if (inet_csk(sk)->icsk_ca_state != state) { + tcp_set_ca_state(sk, state); + tp->high_seq = tp->snd_nxt; + } +} + static void tcp_try_to_open(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); tcp_verify_left_out(tp); - if (tp->retrans_out == 0) + if (!tp->frto_counter && tp->retrans_out == 0) tp->retrans_stamp = 0; if (flag & FLAG_ECE) tcp_enter_cwr(sk, 1); if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { - int state = TCP_CA_Open; - - if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) - state = TCP_CA_Disorder; - - if (inet_csk(sk)->icsk_ca_state != state) { - tcp_set_ca_state(sk, state); - tp->high_seq = tp->snd_nxt; - } + tcp_try_keep_open(sk); tcp_moderate_cwnd(tp); } else { tcp_cwnd_down(sk, flag); @@ -3123,7 +3140,7 @@ static int tcp_process_frto(struct sock *sk, int flag) return 1; } - if (!IsSackFrto() || tcp_is_reno(tp)) { + if (!tcp_is_sackfrto(tp)) { /* RFC4138 shortcoming in step 2; should also have case c): * ACK isn't duplicate nor advances window, e.g., opposite dir * data, winupdate @@ -3299,8 +3316,11 @@ no_queue: return 1; old_ack: - if (TCP_SKB_CB(skb)->sacked) + if (TCP_SKB_CB(skb)->sacked) { tcp_sacktag_write_queue(sk, skb, prior_snd_una); + if (icsk->icsk_ca_state == TCP_CA_Open) + tcp_try_keep_open(sk); + } uninteresting_ack: SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); @@ -3340,7 +3360,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, switch (opcode) { case TCPOPT_MSS: if (opsize == TCPOLEN_MSS && th->syn && !estab) { - u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); + u16 in_mss = get_unaligned_be16(ptr); if (in_mss) { if (opt_rx->user_mss && opt_rx->user_mss < in_mss) @@ -3369,8 +3389,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, ((estab && opt_rx->tstamp_ok) || (!estab && sysctl_tcp_timestamps))) { opt_rx->saw_tstamp = 1; - opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr)); - opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4))); + opt_rx->rcv_tsval = get_unaligned_be32(ptr); + opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); } break; case TCPOPT_SACK_PERM: @@ -4521,49 +4541,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) } } -static int tcp_defer_accept_check(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (tp->defer_tcp_accept.request) { - int queued_data = tp->rcv_nxt - tp->copied_seq; - int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ? - tcp_hdr((struct sk_buff *) - sk->sk_receive_queue.prev)->fin : 0; - - if (queued_data && hasfin) - queued_data--; - - if (queued_data && - tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) { - if (sock_flag(sk, SOCK_KEEPOPEN)) { - inet_csk_reset_keepalive_timer(sk, - keepalive_time_when(tp)); - } else { - inet_csk_delete_keepalive_timer(sk); - } - - inet_csk_reqsk_queue_add( - tp->defer_tcp_accept.listen_sk, - tp->defer_tcp_accept.request, - sk); - - tp->defer_tcp_accept.listen_sk->sk_data_ready( - tp->defer_tcp_accept.listen_sk, 0); - - sock_put(tp->defer_tcp_accept.listen_sk); - sock_put(sk); - tp->defer_tcp_accept.listen_sk = NULL; - tp->defer_tcp_accept.request = NULL; - } else if (hasfin || - tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) { - tcp_reset(sk); - return -1; - } - } - return 0; -} - static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) { struct tcp_sock *tp = tcp_sk(sk); @@ -4924,8 +4901,6 @@ step5: tcp_data_snd_check(sk); tcp_ack_snd_check(sk); - - tcp_defer_accept_check(sk); return 0; csum_error: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0e9bc12..ffe869a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -85,10 +85,6 @@ int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; -/* Check TCP sequence numbers in ICMP packets. */ -#define ICMP_MIN_LENGTH 8 - -void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); #ifdef CONFIG_TCP_MD5SIG static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, @@ -1285,7 +1281,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; - req = reqsk_alloc(&tcp_request_sock_ops); + req = inet_reqsk_alloc(&tcp_request_sock_ops); if (!req) goto drop; @@ -1918,14 +1914,6 @@ int tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } - if (tp->defer_tcp_accept.request) { - reqsk_free(tp->defer_tcp_accept.request); - sock_put(tp->defer_tcp_accept.listen_sk); - sock_put(sk); - tp->defer_tcp_accept.listen_sk = NULL; - tp->defer_tcp_accept.request = NULL; - } - atomic_dec(&tcp_sockets_allocated); return 0; @@ -2214,9 +2202,6 @@ static int tcp_seq_open(struct inode *inode, struct file *file) struct tcp_iter_state *s; int err; - if (unlikely(afinfo == NULL)) - return -EINVAL; - err = seq_open_net(inode, file, &afinfo->seq_ops, sizeof(struct tcp_iter_state)); if (err < 0) @@ -2241,10 +2226,9 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) afinfo->seq_ops.next = tcp_seq_next; afinfo->seq_ops.stop = tcp_seq_stop; - p = proc_net_fops_create(net, afinfo->name, S_IRUGO, &afinfo->seq_fops); - if (p) - p->data = afinfo; - else + p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, + &afinfo->seq_fops, afinfo); + if (!p) rc = -ENOMEM; return rc; } @@ -2307,7 +2291,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) } seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " - "%08X %5d %8d %lu %d %p %u %u %u %u %d%n", + "%08X %5d %8d %lu %d %p %lu %lu %u %u %d%n", i, src, srcp, dest, destp, sk->sk_state, tp->write_seq - tp->snd_una, sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : @@ -2319,8 +2303,8 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) icsk->icsk_probes_out, sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, - icsk->icsk_rto, - icsk->icsk_ack.ato, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 019c8c1..8245247 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, does sequence test, SYN is truncated, and thus we consider it a bare ACK. - Both ends (listening sockets) accept the new incoming - connection and try to talk to each other. 8-) + If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this + bare ACK. Otherwise, we create an established connection. Both + ends (listening sockets) accept the new incoming connection and try + to talk to each other. 8-) Note: This case is both harmless, and rare. Possibility is about the same as us discovering intelligent life on another plant tomorrow. @@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, if (!(flg & TCP_FLAG_ACK)) return NULL; + /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ + if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { + inet_rsk(req)->acked = 1; + return NULL; + } + /* OK, ACK is valid, create big socket and * feed this segment to it. It will repeat all * the tests. THIS SEGMENT MUST MOVE SOCKET TO @@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, inet_csk_reqsk_queue_unlink(sk, req, prev); inet_csk_reqsk_queue_removed(sk, req); - if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && - TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { - - /* the accept queue handling is done is est recv slow - * path so lets make sure to start there - */ - tcp_sk(child)->pred_flags = 0; - sock_hold(sk); - sock_hold(child); - tcp_sk(child)->defer_tcp_accept.listen_sk = sk; - tcp_sk(child)->defer_tcp_accept.request = req; - - inet_csk_reset_keepalive_timer(child, - inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ); - } else { - inet_csk_reqsk_queue_add(sk, req, child); - } - + inet_csk_reqsk_queue_add(sk, req, child); return child; listen_overflow: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index debf235..ad993ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1836,7 +1836,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - unsigned int cur_mss = tcp_current_mss(sk, 0); + unsigned int cur_mss; int err; /* Inconslusive MTU probe */ @@ -1858,6 +1858,11 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return -ENOMEM; } + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) + return -EHOSTUNREACH; /* Routing failure or similar. */ + + cur_mss = tcp_current_mss(sk, 0); + /* If receiver has shrunk his window, and skb is out of * new window, do not retransmit it. The exception is the * case, when window is shrunk to zero. In this case @@ -1884,9 +1889,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) (sysctl_tcp_retrans_collapse != 0)) tcp_retrans_try_collapse(sk, skb, cur_mss); - if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) - return -EHOSTUNREACH; /* Routing failure or similar. */ - /* Some Solaris stacks overoptimize and ignore the FIN on a * retransmit when old data is attached. So strip it off * since it is cheap to do so and saves bytes on the network. @@ -2129,6 +2131,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); + + TCP_INC_STATS(TCP_MIB_OUTRSTS); } /* WARNING: This routine must only be called when we have already sent diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 5ff0ce6..7ddc30f 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -224,7 +224,7 @@ static __init int tcpprobe_init(void) if (bufsize < 0) return -EINVAL; - tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL); + tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL); if (!tcp_probe.log) goto err0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4de68cf..63ed9d6 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -489,11 +489,6 @@ static void tcp_keepalive_timer (unsigned long data) goto death; } - if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) { - tcp_send_active_reset(sk, GFP_ATOMIC); - goto death; - } - if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) goto out; diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 0e1a8c9..14504da 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -167,8 +167,10 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) struct tcp_sock *tp = tcp_sk(sk); struct vegas *vegas = inet_csk_ca(sk); - if (!vegas->doing_vegas_now) - return tcp_reno_cong_avoid(sk, ack, in_flight); + if (!vegas->doing_vegas_now) { + tcp_reno_cong_avoid(sk, ack, in_flight); + return; + } /* The key players are v_beg_snd_una and v_beg_snd_nxt. * diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 2bf618a3..d08b2e8 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -119,8 +119,10 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) struct tcp_sock *tp = tcp_sk(sk); struct veno *veno = inet_csk_ca(sk); - if (!veno->doing_veno_now) - return tcp_reno_cong_avoid(sk, ack, in_flight); + if (!veno->doing_veno_now) { + tcp_reno_cong_avoid(sk, ack, in_flight); + return; + } /* limited by applications */ if (!tcp_is_cwnd_limited(sk, in_flight)) diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index d3b709a..cb1f0e8 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -97,7 +97,7 @@ static int tunnel64_rcv(struct sk_buff *skb) { struct xfrm_tunnel *handler; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; for (handler = tunnel64_handlers; handler; handler = handler->next) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1f535e31..56fcda3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info) /* * Throw away all pending data and cancel the corking. Socket is locked. */ -static void udp_flush_pending_frames(struct sock *sk) +void udp_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); @@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk) ip_flush_pending_frames(sk); } } +EXPORT_SYMBOL(udp_flush_pending_frames); /** * udp4_hwcsum_outgoing - handle outgoing HW checksumming @@ -1605,10 +1606,9 @@ int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) afinfo->seq_ops.next = udp_seq_next; afinfo->seq_ops.stop = udp_seq_stop; - p = proc_net_fops_create(net, afinfo->name, S_IRUGO, &afinfo->seq_fops); - if (p) - p->data = afinfo; - else + p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, + &afinfo->seq_fops, afinfo); + if (!p) rc = -ENOMEM; return rc; } diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 584e6d7..7135279 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -52,7 +52,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) IP_ECN_clear(top_iph); top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? - 0 : XFRM_MODE_SKB_CB(skb)->frag_off; + 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); ip_select_ident(top_iph, dst->child, NULL); top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e591e09..ff61a5c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -731,8 +731,13 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) onlink = -1; spin_lock(&ifa->lock); - lifetime = min_t(unsigned long, - ifa->valid_lft, 0x7fffffffUL/HZ); + + lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); + /* + * Note: Because this address is + * not permanent, lifetime < + * LONG_MAX / HZ here. + */ if (time_before(expires, ifa->tstamp + lifetime * HZ)) expires = ifa->tstamp + lifetime * HZ; @@ -744,12 +749,12 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } write_unlock_bh(&idev->lock); + addrconf_del_timer(ifp); + ipv6_ifa_notify(RTM_DELADDR, ifp); atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); - addrconf_del_timer(ifp); - /* * Purge or update corresponding prefix * @@ -1722,7 +1727,6 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) __u32 valid_lft; __u32 prefered_lft; int addr_type; - unsigned long rt_expires; struct inet6_dev *in6_dev; pinfo = (struct prefix_info *) opt; @@ -1764,41 +1768,49 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) * 2) Configure prefixes with the auto flag set */ - /* Avoid arithmetic overflow. Really, we could - save rt_expires in seconds, likely valid_lft, - but it would require division in fib gc, that it - not good. - */ - if (valid_lft >= 0x7FFFFFFF/HZ) - rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); - else - rt_expires = valid_lft * HZ; - - /* - * We convert this (in jiffies) to clock_t later. - * Avoid arithmetic overflow there as well. - * Overflow can happen only if HZ < USER_HZ. - */ - if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ) - rt_expires = 0x7FFFFFFF / USER_HZ; - if (pinfo->onlink) { struct rt6_info *rt; + unsigned long rt_expires; + + /* Avoid arithmetic overflow. Really, we could + * save rt_expires in seconds, likely valid_lft, + * but it would require division in fib gc, that it + * not good. + */ + if (HZ > USER_HZ) + rt_expires = addrconf_timeout_fixup(valid_lft, HZ); + else + rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ); + + if (addrconf_finite_timeout(rt_expires)) + rt_expires *= HZ; + rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL, dev->ifindex, 1); if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { - if (rt->rt6i_flags&RTF_EXPIRES) { - if (valid_lft == 0) { - ip6_del_rt(rt); - rt = NULL; - } else { - rt->rt6i_expires = jiffies + rt_expires; - } + /* Autoconf prefix route */ + if (valid_lft == 0) { + ip6_del_rt(rt); + rt = NULL; + } else if (addrconf_finite_timeout(rt_expires)) { + /* not infinity */ + rt->rt6i_expires = jiffies + rt_expires; + rt->rt6i_flags |= RTF_EXPIRES; + } else { + rt->rt6i_flags &= ~RTF_EXPIRES; + rt->rt6i_expires = 0; } } else if (valid_lft) { + clock_t expires = 0; + int flags = RTF_ADDRCONF | RTF_PREFIX_RT; + if (addrconf_finite_timeout(rt_expires)) { + /* not infinity */ + flags |= RTF_EXPIRES; + expires = jiffies_to_clock_t(rt_expires); + } addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, - dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT); + dev, expires, flags); } if (rt) dst_release(&rt->u.dst); @@ -2014,17 +2026,22 @@ err_exit: * Manual configuration of address on an interface */ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, - int plen, __u8 ifa_flags, __u32 prefered_lft, + unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; int scope; - u32 flags = RTF_EXPIRES; + u32 flags; + clock_t expires; + unsigned long timeout; ASSERT_RTNL(); + if (plen > 128) + return -EINVAL; + /* check the lifetime */ if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; @@ -2038,17 +2055,23 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, scope = ipv6_addr_scope(pfx); - if (valid_lft == INFINITY_LIFE_TIME) { - ifa_flags |= IFA_F_PERMANENT; + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + expires = jiffies_to_clock_t(timeout * HZ); + valid_lft = timeout; + flags = RTF_EXPIRES; + } else { + expires = 0; flags = 0; - } else if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + ifa_flags |= IFA_F_PERMANENT; + } - if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; - else if ((prefered_lft >= 0x7FFFFFFF/HZ) && - (prefered_lft != INFINITY_LIFE_TIME)) - prefered_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa_flags |= IFA_F_DEPRECATED; + prefered_lft = timeout; + } ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); @@ -2060,7 +2083,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, spin_unlock_bh(&ifp->lock); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, - jiffies_to_clock_t(valid_lft * HZ), flags); + expires, flags); /* * Note that section 3.1 of RFC 4429 indicates * that the Optimistic flag should not be set for @@ -2076,12 +2099,15 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, } static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, - int plen) + unsigned int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + if (plen > 128) + return -EINVAL; + dev = __dev_get_by_index(net, ifindex); if (!dev) return -ENODEV; @@ -3148,22 +3174,30 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, u32 prefered_lft, u32 valid_lft) { - u32 flags = RTF_EXPIRES; + u32 flags; + clock_t expires; + unsigned long timeout; if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; - if (valid_lft == INFINITY_LIFE_TIME) { - ifa_flags |= IFA_F_PERMANENT; + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + expires = jiffies_to_clock_t(timeout * HZ); + valid_lft = timeout; + flags = RTF_EXPIRES; + } else { + expires = 0; flags = 0; - } else if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + ifa_flags |= IFA_F_PERMANENT; + } - if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; - else if ((prefered_lft >= 0x7FFFFFFF/HZ) && - (prefered_lft != INFINITY_LIFE_TIME)) - prefered_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa_flags |= IFA_F_DEPRECATED; + prefered_lft = timeout; + } spin_lock_bh(&ifp->lock); ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; @@ -3176,7 +3210,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, ipv6_ifa_notify(0, ifp); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, - jiffies_to_clock_t(valid_lft * HZ), flags); + expires, flags); addrconf_verify(0); return 0; @@ -4242,7 +4276,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev) neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change, - NULL); + ndisc_ifinfo_sysctl_strategy); __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, idev->dev->ifindex, idev, &idev->cnf); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3c6aafb..e84b3fd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -191,7 +191,7 @@ lookup_protocol: np->mcast_hops = -1; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->ipv6only = init_net.ipv6.sysctl.bindv6only; + np->ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 94fa6ae..0f0f94a 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -496,7 +496,8 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) return 0; } -int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, +int datagram_send_ctl(struct net *net, + struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, int *hlimit, int *tclass) { @@ -509,7 +510,6 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { int addr_type; - struct net_device *dev = NULL; if (!CMSG_OK(msg, cmsg)) { err = -EINVAL; @@ -522,6 +522,9 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, switch (cmsg->cmsg_type) { case IPV6_PKTINFO: case IPV6_2292PKTINFO: + { + struct net_device *dev = NULL; + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { err = -EINVAL; goto exit_f; @@ -535,32 +538,32 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, fl->oif = src_info->ipi6_ifindex; } - addr_type = ipv6_addr_type(&src_info->ipi6_addr); + addr_type = __ipv6_addr_type(&src_info->ipi6_addr); - if (addr_type == IPV6_ADDR_ANY) - break; + if (fl->oif) { + dev = dev_get_by_index(net, fl->oif); + if (!dev) + return -ENODEV; + } else if (addr_type & IPV6_ADDR_LINKLOCAL) + return -EINVAL; - if (addr_type & IPV6_ADDR_LINKLOCAL) { - if (!src_info->ipi6_ifindex) - return -EINVAL; - else { - dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex); - if (!dev) - return -ENODEV; - } - } - if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, - dev, 0)) { - if (dev) - dev_put(dev); - err = -EINVAL; - goto exit_f; + if (addr_type != IPV6_ADDR_ANY) { + int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; + if (!ipv6_chk_addr(net, &src_info->ipi6_addr, + strict ? dev : NULL, 0)) + err = -EINVAL; + else + ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); } + if (dev) dev_put(dev); - ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); + if (err) + goto exit_f; + break; + } case IPV6_FLOWINFO: if (cmsg->cmsg_len < CMSG_LEN(4)) { @@ -702,6 +705,11 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, } *hlimit = *(int *)CMSG_DATA(cmsg); + if (*hlimit < -1 || *hlimit > 0xff) { + err = -EINVAL; + goto exit_f; + } + break; case IPV6_TCLASS: diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3cd1c99..dcf94fd 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -445,7 +445,7 @@ looped_back: kfree_skb(skb); return -1; } - if (!ipv6_chk_home_addr(&init_net, addr)) { + if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index eb7a9403..37a4e77 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -354,7 +354,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_control = (void*)(fl->opt+1); flowi.oif = 0; - err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); + err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 4e5c861..17eb48b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -102,6 +102,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->version != 6) goto err; + /* + * RFC4291 2.5.3 + * A packet received on an interface with a destination address + * of loopback must be dropped. + */ + if (!(dev->flags & IFF_LOOPBACK) && + ipv6_addr_loopback(&hdr->daddr)) + goto err; + skb->transport_header = skb->network_header + sizeof(*hdr); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0af2e05..48cdce9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -780,7 +780,7 @@ slow_path: * Allocate buffer. */ - if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { + if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2de3c46..1479618 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -197,7 +197,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) const char *name = vif->dev ? vif->dev->name : "none"; seq_printf(seq, - "%2Zd %-10s %8ld %7ld %8ld %7ld %05X\n", + "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", vif - vif6_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 56d55fe..86e28a7 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -67,7 +67,7 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) /* RA packet may be delivered ONLY to IPPROTO_RAW socket */ if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW) - return -EINVAL; + return -ENOPROTOOPT; new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; @@ -161,9 +161,17 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct ipv6_txoptions *opt; struct sk_buff *pktopt; - if (sk->sk_protocol != IPPROTO_UDP && - sk->sk_protocol != IPPROTO_UDPLITE && - sk->sk_protocol != IPPROTO_TCP) + if (sk->sk_type == SOCK_RAW) + break; + + if (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_UDPLITE) { + struct udp_sock *up = udp_sk(sk); + if (up->pending == AF_INET6) { + retv = -EBUSY; + break; + } + } else if (sk->sk_protocol != IPPROTO_TCP) break; if (sk->sk_state != TCP_ESTABLISHED) { @@ -337,18 +345,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_DSTOPTS: { struct ipv6_txoptions *opt; + + /* remove any sticky options header with a zero option + * length, per RFC3542. + */ if (optlen == 0) optval = NULL; + else if (optlen < sizeof(struct ipv6_opt_hdr) || + optlen & 0x7 || optlen > 8 * 255) + goto e_inval; /* hop-by-hop / destination options are privileged option */ retv = -EPERM; if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) break; - if (optlen < sizeof(struct ipv6_opt_hdr) || - optlen & 0x7 || optlen > 8 * 255) - goto e_inval; - opt = ipv6_renew_options(sk, np->opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); @@ -416,7 +427,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk); + retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk); if (retv) goto done; update: @@ -438,7 +449,7 @@ done: case IPV6_MULTICAST_HOPS: if (sk->sk_type == SOCK_STREAM) - goto e_inval; + break; if (optlen < sizeof(int)) goto e_inval; if (val > 255 || val < -1) @@ -450,13 +461,15 @@ done: case IPV6_MULTICAST_LOOP: if (optlen < sizeof(int)) goto e_inval; + if (val != valbool) + goto e_inval; np->mc_loop = valbool; retv = 0; break; case IPV6_MULTICAST_IF: if (sk->sk_type == SOCK_STREAM) - goto e_inval; + break; if (optlen < sizeof(int)) goto e_inval; @@ -832,7 +845,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, len = min_t(unsigned int, len, ipv6_optlen(hdr)); if (copy_to_user(optval, hdr, len)) return -EFAULT; - return ipv6_optlen(hdr); + return len; } static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, @@ -852,7 +865,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, if (sk->sk_protocol != IPPROTO_UDP && sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) - return -EINVAL; + return -ENOPROTOOPT; if (sk->sk_state != TCP_ESTABLISHED) return -ENOTCONN; val = sk->sk_family; @@ -866,6 +879,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, return -EINVAL; if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) return -EFAULT; + if (gsf.gf_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; lock_sock(sk); err = ip6_mc_msfget(sk, &gsf, (struct group_filter __user *)optval, optlen); @@ -975,6 +990,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, len = ipv6_getsockopt_sticky(sk, np->opt, optname, optval, len); release_sock(sk); + /* check if ipv6_getsockopt_sticky() returns err code */ + if (len < 0) + return len; return put_user(len, optlen); } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 54f91ef..fd632dd 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1411,7 +1411,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - skb = sock_alloc_send_skb(sk, size + LL_RESERVED_SPACE(dev), 1, &err); + skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err); if (!skb) return NULL; @@ -1790,7 +1790,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) payload_len = len + sizeof(ra); full_len = sizeof(struct ipv6hdr) + payload_len; - skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err); + skb = sock_alloc_send_skb(sk, LL_ALLOCATED_SPACE(dev) + full_len, 1, &err); if (skb == NULL) { rcu_read_lock(); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2c74885..282fdb3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -479,7 +479,7 @@ static void __ndisc_send(struct net_device *dev, skb = sock_alloc_send_skb(sk, (MAX_HEADER + sizeof(struct ipv6hdr) + - len + LL_RESERVED_SPACE(dev)), + len + LL_ALLOCATED_SPACE(dev)), 1, &err); if (!skb) { ND_PRINTK0(KERN_ERR @@ -1521,7 +1521,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, buff = sock_alloc_send_skb(sk, (MAX_HEADER + sizeof(struct ipv6hdr) + - len + LL_RESERVED_SPACE(dev)), + len + LL_ALLOCATED_SPACE(dev)), 1, &err); if (buff == NULL) { ND_PRINTK0(KERN_ERR @@ -1727,10 +1727,10 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f return ret; } -static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, - int nlen, void __user *oldval, - size_t __user *oldlenp, - void __user *newval, size_t newlen) +int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 27a5e8b..f405cea 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -129,7 +129,7 @@ static struct nf_hook_ops ip6t_ops[] __read_mostly = { .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_local_hook, + .hook = ip6t_route_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 2dccad4..cf20bc4 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -207,9 +207,12 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.id = id; arg.src = src; arg.dst = dst; + + read_lock_bh(&nf_frags.lock); hash = ip6qhashfn(id, src, dst); q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); + local_bh_enable(); if (q == NULL) goto oom; @@ -638,10 +641,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - spin_lock(&fq->q.lock); + spin_lock_bh(&fq->q.lock); if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { - spin_unlock(&fq->q.lock); + spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); fq_put(fq); goto ret_orig; @@ -653,7 +656,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } - spin_unlock(&fq->q.lock); + spin_unlock_bh(&fq->q.lock); fq_put(fq); return ret_skb; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index ca8b82f..df0736a 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -247,13 +247,11 @@ int snmp6_register_dev(struct inet6_dev *idev) if (!proc_net_devsnmp6) return -ENOENT; - p = proc_create(idev->dev->name, S_IRUGO, - proc_net_devsnmp6, &snmp6_seq_fops); + p = proc_create_data(idev->dev->name, S_IRUGO, + proc_net_devsnmp6, &snmp6_seq_fops, idev); if (!p) return -ENOMEM; - p->data = idev; - idev->stats.proc_dir_entry = p; return 0; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 396f0ea..3aee123 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -609,7 +609,6 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *iph; struct sk_buff *skb; - unsigned int hh_len; int err; if (length > rt->u.dst.dev->mtu) { @@ -619,13 +618,12 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, if (flags&MSG_PROBE) goto out; - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - - skb = sock_alloc_send_skb(sk, length+hh_len+15, - flags&MSG_DONTWAIT, &err); + skb = sock_alloc_send_skb(sk, + length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, + flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, hh_len); + skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; @@ -815,7 +813,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1166,6 +1164,15 @@ static void rawv6_close(struct sock *sk, long timeout) sk_common_release(sk); } +static int raw6_destroy(struct sock *sk) +{ + lock_sock(sk); + ip6_flush_pending_frames(sk); + release_sock(sk); + + return inet6_destroy_sock(sk); +} + static int rawv6_init_sk(struct sock *sk) { struct raw6_sock *rp = raw6_sk(sk); @@ -1189,11 +1196,11 @@ struct proto rawv6_prot = { .name = "RAWv6", .owner = THIS_MODULE, .close = rawv6_close, + .destroy = raw6_destroy, .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, - .destroy = inet6_destroy_sock, .setsockopt = rawv6_setsockopt, .getsockopt = rawv6_getsockopt, .sendmsg = rawv6_sendmsg, diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 7b247e3..a60d7d1 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -197,6 +197,7 @@ static void ip6_frag_expire(unsigned long data) { struct frag_queue *fq; struct net_device *dev = NULL; + struct net *net; fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); @@ -207,7 +208,8 @@ static void ip6_frag_expire(unsigned long data) fq_kill(fq); - dev = dev_get_by_index(&init_net, fq->iif); + net = container_of(fq->q.net, struct net, ipv6.frags); + dev = dev_get_by_index(net, fq->iif); if (!dev) goto out; @@ -245,6 +247,8 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, arg.id = id; arg.src = src; arg.dst = dst; + + read_lock(&ip6_frags.lock); hash = ip6qhashfn(id, src, dst); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a493ad9..7ff6870 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -109,7 +109,7 @@ static struct dst_ops ip6_dst_ops_template = { .negative_advice = ip6_negative_advice, .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, - .local_out = ip6_local_out, + .local_out = __ip6_local_out, .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; @@ -240,7 +240,7 @@ static inline int rt6_need_strict(struct in6_addr *daddr) static inline struct rt6_info *rt6_device_match(struct net *net, struct rt6_info *rt, int oif, - int strict) + int flags) { struct rt6_info *local = NULL; struct rt6_info *sprt; @@ -253,7 +253,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (dev->flags & IFF_LOOPBACK) { if (sprt->rt6i_idev == NULL || sprt->rt6i_idev->dev->ifindex != oif) { - if (strict && oif) + if (flags & RT6_LOOKUP_F_IFACE && oif) continue; if (local && (!oif || local->rt6i_idev->dev->ifindex == oif)) @@ -266,7 +266,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (local) return local; - if (strict) + if (flags & RT6_LOOKUP_F_IFACE) return net->ipv6.ip6_null_entry; } return rt; @@ -446,7 +446,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; - u32 lifetime; + unsigned long lifetime; struct rt6_info *rt; if (len < sizeof(struct route_info)) { @@ -472,13 +472,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (pref == ICMPV6_ROUTER_PREF_INVALID) pref = ICMPV6_ROUTER_PREF_MEDIUM; - lifetime = ntohl(rinfo->lifetime); - if (lifetime == 0xffffffff) { - /* infinity */ - } else if (lifetime > 0x7fffffff/HZ) { - /* Avoid arithmetic overflow */ - lifetime = 0x7fffffff/HZ - 1; - } + lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); if (rinfo->length == 3) prefix = (struct in6_addr *)rinfo->prefix; @@ -506,7 +500,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { - if (lifetime == 0xffffffff) { + if (!addrconf_finite_timeout(lifetime)) { rt->rt6i_flags &= ~RTF_EXPIRES; } else { rt->rt6i_expires = jiffies + HZ * lifetime; @@ -1106,7 +1100,9 @@ int ip6_route_add(struct fib6_config *cfg) } rt->u.dst.obsolete = -1; - rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); + rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? + jiffies + clock_t_to_jiffies(cfg->fc_expires) : + 0; if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; @@ -1243,11 +1239,11 @@ install_route: } } - if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) + if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; - if (!rt->u.dst.metrics[RTAX_MTU-1]) + if (!dst_metric(&rt->u.dst, RTAX_MTU)) rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); - if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) + if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = idev; @@ -2200,7 +2196,13 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); - expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0; + if (!(rt->rt6i_flags & RTF_EXPIRES)) + expires = 0; + else if (rt->rt6i_expires - jiffies < INT_MAX) + expires = rt->rt6i_expires - jiffies; + else + expires = INT_MAX; + if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, rt->u.dst.error) < 0) goto nla_put_failure; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 4b2f103..32e871a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -222,15 +222,18 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) } -static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) +static int ipip6_tunnel_get_prl(struct ip_tunnel *t, + struct ip_tunnel_prl __user *a) { - struct ip_tunnel_prl *kp; + struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; unsigned int cmax, c = 0, ca, len; int ret = 0; - cmax = a->datalen / sizeof(*a); - if (cmax > 1 && a->addr != htonl(INADDR_ANY)) + if (copy_from_user(&kprl, a, sizeof(kprl))) + return -EFAULT; + cmax = kprl.datalen / sizeof(kprl); + if (cmax > 1 && kprl.addr != htonl(INADDR_ANY)) cmax = 1; /* For simple GET or for root users, @@ -261,26 +264,25 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a) for (prl = t->prl; prl; prl = prl->next) { if (c > cmax) break; - if (a->addr != htonl(INADDR_ANY) && prl->addr != a->addr) + if (kprl.addr != htonl(INADDR_ANY) && prl->addr != kprl.addr) continue; kp[c].addr = prl->addr; kp[c].flags = prl->flags; c++; - if (a->addr != htonl(INADDR_ANY)) + if (kprl.addr != htonl(INADDR_ANY)) break; } out: read_unlock(&ipip6_lock); len = sizeof(*kp) * c; - ret = len ? copy_to_user(a->data, kp, len) : 0; + ret = 0; + if ((len && copy_to_user(a + 1, kp, len)) || put_user(len, &a->datalen)) + ret = -EFAULT; kfree(kp); - if (ret) - return -EFAULT; - a->datalen = len; - return 0; + return ret; } static int @@ -403,9 +405,8 @@ static void ipip6_tunnel_uninit(struct net_device *dev) static int ipip6_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ @@ -462,92 +463,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipip6_lock); return err; -#else - struct iphdr *iph = (struct iphdr*)dp; - int hlen = iph->ihl<<2; - struct ipv6hdr *iph6; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - int rel_info = 0; - struct sk_buff *skb2; - struct rt6_info *rt6i; - - if (len < hlen + sizeof(struct ipv6hdr)) - return; - iph6 = (struct ipv6hdr*)(dp + hlen); - - switch (type) { - default: - return; - case ICMP_PARAMETERPROB: - if (icmp_hdr(skb)->un.gateway < hlen) - return; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMPV6_PARAMPROB; - rel_info = icmp_hdr(skb)->un.gateway - hlen; - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return; - case ICMP_FRAG_NEEDED: - /* Too complicated case ... */ - return; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMPV6_DEST_UNREACH; - rel_code = ICMPV6_ADDR_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return; - rel_type = ICMPV6_TIME_EXCEED; - rel_code = ICMPV6_EXC_HOPLIMIT; - break; - } - - /* Prepare fake skb to feed it to icmpv6_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return 0; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)iph6); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - rt6i = rt6_lookup(dev_net(skb->dev), &iph6->saddr, NULL, NULL, 0); - if (rt6i && rt6i->rt6i_dev) { - skb2->dev = rt6i->rt6i_dev; - - rt6i = rt6_lookup(dev_net(skb->dev), - &iph6->daddr, &iph6->saddr, NULL, 0); - - if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { - struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev); - if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) { - rel_type = ICMPV6_DEST_UNREACH; - rel_code = ICMPV6_ADDR_UNREACH; - } - icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev); - } - } - kfree_skb(skb2); - return 0; -#endif } static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) @@ -596,9 +511,9 @@ static int ipip6_rcv(struct sk_buff *skb) } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - kfree_skb(skb); read_unlock(&ipip6_lock); out: + kfree_skb(skb); return 0; } @@ -960,11 +875,20 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) break; case SIOCGETPRL: + err = -EINVAL; + if (dev == sitn->fb_tunnel_dev) + goto done; + err = -ENOENT; + if (!(t = netdev_priv(dev))) + goto done; + err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); + break; + case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: err = -EPERM; - if (cmd != SIOCGETPRL && !capable(CAP_NET_ADMIN)) + if (!capable(CAP_NET_ADMIN)) goto done; err = -EINVAL; if (dev == sitn->fb_tunnel_dev) @@ -977,12 +901,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; switch (cmd) { - case SIOCGETPRL: - err = ipip6_tunnel_get_prl(t, &prl); - if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, - &prl, sizeof(prl))) - err = -EFAULT; - break; case SIOCDELPRL: err = ipip6_tunnel_del_prl(t, &prl); break; @@ -991,8 +909,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } - if (cmd != SIOCGETPRL) - netdev_state_change(dev); + netdev_state_change(dev); break; default: diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 938ce4e..3ecc115 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -198,7 +198,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq6 = inet6_rsk(req); treq = tcp_rsk(req); - ireq6->pktopts = NULL; if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 715965f..40ea9c3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1299,7 +1299,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) treq = inet6_rsk(req); ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); - treq->pktopts = NULL; if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); @@ -2037,7 +2036,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n", + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %lu %lu %u %u %d\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, @@ -2053,8 +2052,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - icsk->icsk_rto, - icsk->icsk_ack.ato, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh ); diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 6323921..669f280 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -109,7 +109,7 @@ static int tunnel46_rcv(struct sk_buff *skb) { struct xfrm6_tunnel *handler; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; for (handler = tunnel46_handlers; handler; handler = handler->next) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1fd784f..dd30962 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -534,7 +534,9 @@ static void udp_v6_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); - if (up->pending) { + if (up->pending == AF_INET) + udp_flush_pending_frames(sk); + else if (up->pending) { up->len = 0; up->pending = 0; ip6_flush_pending_frames(sk); @@ -731,7 +733,7 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -848,12 +850,14 @@ do_append_data: } else { dst_release(dst); } + dst = NULL; } if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; release_sock(sk); out: + dst_release(dst); fl6_sock_release(flowlabel); if (!err) return len; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ae54b20..3eb5bcc 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1093,11 +1093,6 @@ static int irda_create(struct net *net, struct socket *sock, int protocol) init_waitqueue_head(&self->query_wait); - /* Initialise networking socket struct */ - sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */ - sk->sk_family = PF_IRDA; - sk->sk_protocol = protocol; - switch (sock->type) { case SOCK_STREAM: sock->ops = &irda_stream_ops; @@ -1124,13 +1119,20 @@ static int irda_create(struct net *net, struct socket *sock, int protocol) self->max_sdu_size_rx = TTP_SAR_UNBOUND; break; default: + sk_free(sk); return -ESOCKTNOSUPPORT; } break; default: + sk_free(sk); return -ESOCKTNOSUPPORT; } + /* Initialise networking socket struct */ + sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */ + sk->sk_family = PF_IRDA; + sk->sk_protocol = protocol; + /* Register as a client with IrLMP */ self->ckey = irlmp_register_client(0, NULL, NULL, NULL); self->mask.word = 0xffff; diff --git a/net/irda/discovery.c b/net/irda/discovery.c index bfacef8..a6f99b5 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c @@ -40,6 +40,8 @@ #include <net/irda/discovery.h> +#include <asm/unaligned.h> + /* * Function irlmp_add_discovery (cachelog, discovery) * @@ -87,7 +89,7 @@ void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *new) */ hashbin_remove_this(cachelog, (irda_queue_t *) node); /* Check if hints bits are unchanged */ - if(u16ho(node->data.hints) == u16ho(new->data.hints)) + if (get_unaligned((__u16 *)node->data.hints) == get_unaligned((__u16 *)new->data.hints)) /* Set time of first discovery for this node */ new->firststamp = node->firststamp; kfree(node); @@ -281,9 +283,9 @@ struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn, /* Mask out the ones we don't want : * We want to match the discovery mask, and to get only * the most recent one (unless we want old ones) */ - if ((u16ho(discovery->data.hints) & mask) && + if ((get_unaligned((__u16 *)discovery->data.hints) & mask) && ((old_entries) || - ((jiffies - discovery->firststamp) < j_timeout)) ) { + ((jiffies - discovery->firststamp) < j_timeout))) { /* Create buffer as needed. * As this function get called a lot and most time * we don't have anything to put in the log (we are diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 9e15c82..4a105dc 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -451,12 +451,14 @@ static void iriap_getvaluebyclass_confirm(struct iriap_cb *self, n = 2; /* Get length, MSB first */ - len = be16_to_cpu(get_unaligned((__be16 *)(fp+n))); n += 2; + len = get_unaligned_be16(fp + n); + n += 2; IRDA_DEBUG(4, "%s(), len=%d\n", __func__, len); /* Get object ID, MSB first */ - obj_id = be16_to_cpu(get_unaligned((__be16 *)(fp+n))); n += 2; + obj_id = get_unaligned_be16(fp + n); + n += 2; type = fp[n++]; IRDA_DEBUG(4, "%s(), Value type = %d\n", __func__, type); @@ -506,7 +508,7 @@ static void iriap_getvaluebyclass_confirm(struct iriap_cb *self, value = irias_new_string_value(fp+n); break; case IAS_OCT_SEQ: - value_len = be16_to_cpu(get_unaligned((__be16 *)(fp+n))); + value_len = get_unaligned_be16(fp + n); n += 2; /* Will truncate to IAS_MAX_OCTET_STRING bytes */ diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 1f81f8e..7bf5b91 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -1062,7 +1062,8 @@ void irlmp_discovery_expiry(discinfo_t *expiries, int number) for(i = 0; i < number; i++) { /* Check if we should notify client */ if ((client->expir_callback) && - (client->hint_mask.word & u16ho(expiries[i].hints) + (client->hint_mask.word & + get_unaligned((__u16 *)expiries[i].hints) & 0x7f7f) ) client->expir_callback(&(expiries[i]), EXPIRY_TIMEOUT, @@ -1086,7 +1087,7 @@ discovery_t *irlmp_get_discovery_response(void) IRDA_ASSERT(irlmp != NULL, return NULL;); - u16ho(irlmp->discovery_rsp.data.hints) = irlmp->hints.word; + put_unaligned(irlmp->hints.word, (__u16 *)irlmp->discovery_rsp.data.hints); /* * Set character set for device name (we use ASCII), and diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c index 75497e5..cf9a4b5 100644 --- a/net/irda/irnet/irnet_irda.c +++ b/net/irda/irnet/irnet_irda.c @@ -10,6 +10,7 @@ #include "irnet_irda.h" /* Private header */ #include <linux/seq_file.h> +#include <asm/unaligned.h> /* * PPP disconnect work: we need to make sure we're in @@ -1673,7 +1674,7 @@ irnet_discovery_indication(discinfo_t * discovery, /* Notify the control channel */ irnet_post_event(NULL, IRNET_DISCOVER, discovery->saddr, discovery->daddr, discovery->info, - u16ho(discovery->hints)); + get_unaligned((__u16 *)discovery->hints)); DEXIT(IRDA_OCB_TRACE, "\n"); } @@ -1704,7 +1705,7 @@ irnet_expiry_indication(discinfo_t * expiry, /* Notify the control channel */ irnet_post_event(NULL, IRNET_EXPIRE, expiry->saddr, expiry->daddr, expiry->info, - u16ho(expiry->hints)); + get_unaligned((__u16 *)expiry->hints)); DEXIT(IRDA_OCB_TRACE, "\n"); } diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 9e1fb82..2f05ec1 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -101,8 +101,8 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); - if (IS_ERR(hdr)) { - ret = PTR_ERR(hdr); + if (hdr == NULL) { + ret = -EMSGSIZE; goto err_out; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 9e7236f..7470e36 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1251,7 +1251,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, x->sel.prefixlen_s = addr->sadb_address_prefixlen; } - if (x->props.mode == XFRM_MODE_TRANSPORT) + if (!x->sel.family) x->sel.family = x->props.family; if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { @@ -3030,6 +3030,9 @@ static int key_notify_sa_expire(struct xfrm_state *x, struct km_event *c) static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) { + if (atomic_read(&pfkey_socks_nr) == 0) + return 0; + switch (c->event) { case XFRM_MSG_EXPIRE: return key_notify_sa_expire(x, c); diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index e2ddde7..008de1f 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -286,12 +286,14 @@ void llc_build_and_send_xid_pkt(struct llc_sap *sap, struct sk_buff *skb, * * Sends received pdus to the sap state machine. */ -static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb) +static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, + struct sock *sk) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; + skb->sk = sk; llc_sap_state_process(sap, skb); } @@ -360,8 +362,7 @@ static void llc_sap_mcast(struct llc_sap *sap, break; sock_hold(sk); - skb_set_owner_r(skb1, sk); - llc_sap_rcv(sap, skb1); + llc_sap_rcv(sap, skb1, sk); sock_put(sk); } read_unlock_bh(&sap->sk_list.lock); @@ -381,8 +382,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb) } else { struct sock *sk = llc_lookup_dgram(sap, &laddr); if (sk) { - skb_set_owner_r(skb, sk); - llc_sap_rcv(sap, skb); + llc_sap_rcv(sap, skb, sk); sock_put(sk); } else kfree_skb(skb); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 699d97b..a9fce4a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -672,7 +672,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN || + if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN && sdata->vif.type != IEEE80211_IF_TYPE_AP) return -EINVAL; } else @@ -760,7 +760,7 @@ static int ieee80211_change_station(struct wiphy *wiphy, if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN || + if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN && vlansdata->vif.type != IEEE80211_IF_TYPE_AP) { rcu_read_unlock(); return -EINVAL; diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 879e721..19efc3a 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -255,14 +255,23 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key) void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata) { char buf[50]; + struct ieee80211_key *key; if (!sdata->debugfsdir) return; - sprintf(buf, "../keys/%d", sdata->default_key->debugfs.cnt); - sdata->debugfs.default_key = - debugfs_create_symlink("default_key", sdata->debugfsdir, buf); + /* this is running under the key lock */ + + key = sdata->default_key; + if (key) { + sprintf(buf, "../keys/%d", key->debugfs.cnt); + sdata->debugfs.default_key = + debugfs_create_symlink("default_key", + sdata->debugfsdir, buf); + } else + ieee80211_debugfs_key_remove_default(sdata); } + void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata) { if (!sdata) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8e53ce7..006486b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -354,7 +354,7 @@ struct ieee80211_if_sta { int preq_queue_len; struct mesh_stats mshstats; struct mesh_config mshcfg; - u8 mesh_seqnum[3]; + u32 mesh_seqnum; bool accepting_plinks; #endif u16 aid; @@ -899,7 +899,7 @@ extern const struct iw_handler_def ieee80211_iw_handler_def; /* ieee80211_ioctl.c */ -int ieee80211_set_freq(struct ieee80211_local *local, int freq); +int ieee80211_set_freq(struct net_device *dev, int freq); /* ieee80211_sta.c */ void ieee80211_sta_timer(unsigned long data); void ieee80211_sta_work(struct work_struct *work); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 80954a5..06e88a5 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -54,6 +54,15 @@ int ieee80211_if_add(struct net_device *dev, const char *name, if (!ndev) return -ENOMEM; + ndev->needed_headroom = local->tx_headroom + + 4*6 /* four MAC addresses */ + + 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */ + + 6 /* mesh */ + + 8 /* rfc1042/bridge tunnel */ + - ETH_HLEN /* ethernet hard_header_len */ + + IEEE80211_ENCRYPT_HEADROOM; + ndev->needed_tailroom = IEEE80211_ENCRYPT_TAILROOM; + ret = dev_alloc_name(ndev, ndev->name); if (ret < 0) goto fail; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 150d66d..220e83b 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -380,6 +380,15 @@ void ieee80211_key_free(struct ieee80211_key *key) if (!key) return; + if (!key->sdata) { + /* The key has not been linked yet, simply free it + * and don't Oops */ + if (key->conf.alg == ALG_CCMP) + ieee80211_aes_key_free(key->u.ccmp.tfm); + kfree(key); + return; + } + spin_lock_irqsave(&key->sdata->local->key_lock, flags); __ieee80211_key_free(key); spin_unlock_irqrestore(&key->sdata->local->key_lock, flags); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e9a9789..df0836f 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -255,22 +255,8 @@ static int ieee80211_open(struct net_device *dev) switch (sdata->vif.type) { case IEEE80211_IF_TYPE_WDS: - if (is_zero_ether_addr(sdata->u.wds.remote_addr)) + if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) return -ENOLINK; - - /* Create STA entry for the WDS peer */ - sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, - GFP_KERNEL); - if (!sta) - return -ENOMEM; - - sta->flags |= WLAN_STA_AUTHORIZED; - - res = sta_info_insert(sta); - if (res) { - /* STA has been freed */ - return res; - } break; case IEEE80211_IF_TYPE_VLAN: if (!sdata->u.vlan.ap) @@ -337,10 +323,8 @@ static int ieee80211_open(struct net_device *dev) conf.type = sdata->vif.type; conf.mac_addr = dev->dev_addr; res = local->ops->add_interface(local_to_hw(local), &conf); - if (res && !local->open_count && local->ops->stop) - local->ops->stop(local_to_hw(local)); if (res) - return res; + goto err_stop; ieee80211_if_config(dev); ieee80211_reset_erp_info(dev); @@ -353,9 +337,29 @@ static int ieee80211_open(struct net_device *dev) netif_carrier_on(dev); } + if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { + /* Create STA entry for the WDS peer */ + sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, + GFP_KERNEL); + if (!sta) { + res = -ENOMEM; + goto err_del_interface; + } + + sta->flags |= WLAN_STA_AUTHORIZED; + + res = sta_info_insert(sta); + if (res) { + /* STA has been freed */ + goto err_del_interface; + } + } + if (local->open_count == 0) { res = dev_open(local->mdev); WARN_ON(res); + if (res) + goto err_del_interface; tasklet_enable(&local->tx_pending_tasklet); tasklet_enable(&local->tasklet); } @@ -390,6 +394,12 @@ static int ieee80211_open(struct net_device *dev) netif_start_queue(dev); return 0; + err_del_interface: + local->ops->remove_interface(local_to_hw(local), &conf); + err_stop: + if (!local->open_count && local->ops->stop) + local->ops->stop(local_to_hw(local)); + return res; } static int ieee80211_stop(struct net_device *dev) @@ -501,6 +511,7 @@ static int ieee80211_stop(struct net_device *dev) case IEEE80211_IF_TYPE_STA: case IEEE80211_IF_TYPE_IBSS: sdata->u.sta.state = IEEE80211_DISABLED; + memset(sdata->u.sta.bssid, 0, ETH_ALEN); del_timer_sync(&sdata->u.sta.timer); /* * When we get here, the interface is marked down. @@ -519,8 +530,6 @@ static int ieee80211_stop(struct net_device *dev) local->sta_hw_scanning = 0; } - flush_workqueue(local->hw.workqueue); - sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; kfree(sdata->u.sta.extra_ie); sdata->u.sta.extra_ie = NULL; @@ -544,6 +553,8 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_led_radio(local, 0); + flush_workqueue(local->hw.workqueue); + tasklet_disable(&local->tx_pending_tasklet); tasklet_disable(&local->tasklet); } @@ -975,6 +986,7 @@ static int __ieee80211_if_config(struct net_device *dev, conf.ssid_len = sdata->u.sta.ssid_len; } else if (ieee80211_vif_is_mesh(&sdata->vif)) { conf.beacon = beacon; + conf.beacon_control = control; ieee80211_start_mesh(dev); } else if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { conf.ssid = sdata->u.ap.ssid; @@ -1302,7 +1314,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, /* * Clear the TX filter mask for this STA when sending the next * packet. If the STA went to power save mode, this will happen - * happen when it wakes up for the next time. + * when it wakes up for the next time. */ sta->flags |= WLAN_STA_CLEAR_PS_FILT; @@ -1755,6 +1767,7 @@ fail_wep: fail_rate: ieee80211_debugfs_remove_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev)); unregister_netdevice(local->mdev); + local->mdev = NULL; fail_dev: rtnl_unlock(); sta_info_stop(local); @@ -1762,8 +1775,10 @@ fail_sta_info: debugfs_hw_del(local); destroy_workqueue(local->hw.workqueue); fail_workqueue: - ieee80211_if_free(local->mdev); - local->mdev = NULL; + if (local->mdev != NULL) { + ieee80211_if_free(local->mdev); + local->mdev = NULL; + } fail_mdev_alloc: wiphy_unregister(local->hw.wiphy); return result; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 594a335..697ef67 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ +#include <asm/unaligned.h> #include "ieee80211_i.h" #include "mesh.h" @@ -167,8 +168,8 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, struct rmc_entry *p, *n; /* Don't care about endianness since only match matters */ - memcpy(&seqnum, mesh_hdr->seqnum, sizeof(mesh_hdr->seqnum)); - idx = mesh_hdr->seqnum[0] & rmc->idx_mask; + memcpy(&seqnum, &mesh_hdr->seqnum, sizeof(mesh_hdr->seqnum)); + idx = le32_to_cpu(mesh_hdr->seqnum) & rmc->idx_mask; list_for_each_entry_safe(p, n, &rmc->bucket[idx].list, list) { ++entries; if (time_after(jiffies, p->exp_time) || @@ -393,18 +394,10 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, { meshhdr->flags = 0; meshhdr->ttl = sdata->u.sta.mshcfg.dot11MeshTTL; + put_unaligned(cpu_to_le32(sdata->u.sta.mesh_seqnum), &meshhdr->seqnum); + sdata->u.sta.mesh_seqnum++; - meshhdr->seqnum[0] = sdata->u.sta.mesh_seqnum[0]++; - meshhdr->seqnum[1] = sdata->u.sta.mesh_seqnum[1]; - meshhdr->seqnum[2] = sdata->u.sta.mesh_seqnum[2]; - - if (sdata->u.sta.mesh_seqnum[0] == 0) { - sdata->u.sta.mesh_seqnum[1]++; - if (sdata->u.sta.mesh_seqnum[1] == 0) - sdata->u.sta.mesh_seqnum[2]++; - } - - return 5; + return 6; } void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 9ee3aff..2e161f6 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -140,7 +140,7 @@ struct rmc_entry { struct mesh_rmc { struct rmc_entry bucket[RMC_BUCKETS]; - u8 idx_mask; + u32 idx_mask; }; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 3df8092..af0cd1e 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -120,7 +120,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, *pos++ = WLAN_EID_PREP; break; default: - kfree(skb); + kfree_skb(skb); return -ENOTSUPP; break; } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 5845dc2..99c2d36 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -158,19 +158,25 @@ int mesh_path_add(u8 *dst, struct net_device *dev) if (atomic_add_unless(&sdata->u.sta.mpaths, 1, MESH_MAX_MPATHS) == 0) return -ENOSPC; - read_lock(&pathtbl_resize_lock); - new_mpath = kzalloc(sizeof(struct mesh_path), GFP_KERNEL); if (!new_mpath) { atomic_dec(&sdata->u.sta.mpaths); err = -ENOMEM; goto endadd2; } + new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL); + if (!new_node) { + kfree(new_mpath); + atomic_dec(&sdata->u.sta.mpaths); + err = -ENOMEM; + goto endadd2; + } + + read_lock(&pathtbl_resize_lock); memcpy(new_mpath->dst, dst, ETH_ALEN); new_mpath->dev = dev; new_mpath->flags = 0; skb_queue_head_init(&new_mpath->frame_queue); - new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL); new_node->mpath = new_mpath; new_mpath->timer.data = (unsigned long) new_mpath; new_mpath->timer.function = mesh_path_timer; @@ -202,7 +208,6 @@ int mesh_path_add(u8 *dst, struct net_device *dev) endadd: spin_unlock(&mesh_paths->hashwlock[hash_idx]); -endadd2: read_unlock(&pathtbl_resize_lock); if (!err && grow) { struct mesh_table *oldtbl, *newtbl; @@ -215,10 +220,12 @@ endadd2: return -ENOMEM; } rcu_assign_pointer(mesh_paths, newtbl); + write_unlock(&pathtbl_resize_lock); + synchronize_rcu(); mesh_table_free(oldtbl, false); - write_unlock(&pathtbl_resize_lock); } +endadd2: return err; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a5e5c31..b404537 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -44,7 +44,7 @@ #define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) #define IEEE80211_SCAN_INTERVAL (2 * HZ) #define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ) -#define IEEE80211_IBSS_JOIN_TIMEOUT (20 * HZ) +#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) #define IEEE80211_PROBE_DELAY (HZ / 33) #define IEEE80211_CHANNEL_TIME (HZ / 33) @@ -547,15 +547,14 @@ static void ieee80211_set_associated(struct net_device *dev, sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; } - netif_carrier_on(dev); ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); ieee80211_sta_send_associnfo(dev, ifsta); } else { + netif_carrier_off(dev); ieee80211_sta_tear_down_BA_sessions(dev, ifsta->bssid); ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; - netif_carrier_off(dev); ieee80211_reset_erp_info(dev); sdata->bss_conf.assoc_ht = 0; @@ -569,6 +568,10 @@ static void ieee80211_set_associated(struct net_device *dev, sdata->bss_conf.assoc = assoc; ieee80211_bss_info_change_notify(sdata, changed); + + if (assoc) + netif_carrier_on(dev); + wrqu.ap_addr.sa_family = ARPHRD_ETHER; wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); } @@ -665,6 +668,26 @@ static void ieee80211_authenticate(struct net_device *dev, mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); } +static int ieee80211_compatible_rates(struct ieee80211_sta_bss *bss, + struct ieee80211_supported_band *sband, + u64 *rates) +{ + int i, j, count; + *rates = 0; + count = 0; + for (i = 0; i < bss->supp_rates_len; i++) { + int rate = (bss->supp_rates[i] & 0x7F) * 5; + + for (j = 0; j < sband->n_bitrates; j++) + if (sband->bitrates[j].bitrate == rate) { + *rates |= BIT(j); + count++; + break; + } + } + + return count; +} static void ieee80211_send_assoc(struct net_device *dev, struct ieee80211_if_sta *ifsta) @@ -673,11 +696,12 @@ static void ieee80211_send_assoc(struct net_device *dev, struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u8 *pos, *ies; - int i, len; + int i, len, count, rates_len, supp_rates_len; u16 capab; struct ieee80211_sta_bss *bss; int wmm = 0; struct ieee80211_supported_band *sband; + u64 rates = 0; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 + ifsta->extra_ie_len + @@ -709,7 +733,17 @@ static void ieee80211_send_assoc(struct net_device *dev, if (bss->wmm_ie) { wmm = 1; } + + /* get all rates supported by the device and the AP as + * some APs don't like getting a superset of their rates + * in the association request (e.g. D-Link DAP 1353 in + * b-only mode) */ + rates_len = ieee80211_compatible_rates(bss, sband, &rates); + ieee80211_rx_bss_put(dev, bss); + } else { + rates = ~0; + rates_len = sband->n_bitrates; } mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); @@ -740,24 +774,36 @@ static void ieee80211_send_assoc(struct net_device *dev, *pos++ = ifsta->ssid_len; memcpy(pos, ifsta->ssid, ifsta->ssid_len); + /* add all rates which were marked to be used above */ + supp_rates_len = rates_len; + if (supp_rates_len > 8) + supp_rates_len = 8; + len = sband->n_bitrates; - if (len > 8) - len = 8; - pos = skb_put(skb, len + 2); + pos = skb_put(skb, supp_rates_len + 2); *pos++ = WLAN_EID_SUPP_RATES; - *pos++ = len; - for (i = 0; i < len; i++) { - int rate = sband->bitrates[i].bitrate; - *pos++ = (u8) (rate / 5); - } + *pos++ = supp_rates_len; - if (sband->n_bitrates > len) { - pos = skb_put(skb, sband->n_bitrates - len + 2); - *pos++ = WLAN_EID_EXT_SUPP_RATES; - *pos++ = sband->n_bitrates - len; - for (i = len; i < sband->n_bitrates; i++) { + count = 0; + for (i = 0; i < sband->n_bitrates; i++) { + if (BIT(i) & rates) { int rate = sband->bitrates[i].bitrate; *pos++ = (u8) (rate / 5); + if (++count == 8) + break; + } + } + + if (count == 8) { + pos = skb_put(skb, rates_len - count + 2); + *pos++ = WLAN_EID_EXT_SUPP_RATES; + *pos++ = rates_len - count; + + for (i++; i < sband->n_bitrates; i++) { + if (BIT(i) & rates) { + int rate = sband->bitrates[i].bitrate; + *pos++ = (u8) (rate / 5); + } } } @@ -1282,7 +1328,7 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev, /* prepare reordering buffer */ tid_agg_rx->reorder_buf = - kmalloc(buf_size * sizeof(struct sk_buf *), GFP_ATOMIC); + kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC); if (!tid_agg_rx->reorder_buf) { if (net_ratelimit()) printk(KERN_ERR "can not allocate reordering buffer " @@ -1291,7 +1337,7 @@ static void ieee80211_sta_process_addba_request(struct net_device *dev, goto end; } memset(tid_agg_rx->reorder_buf, 0, - buf_size * sizeof(struct sk_buf *)); + buf_size * sizeof(struct sk_buff *)); if (local->ops->ampdu_action) ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, @@ -1571,7 +1617,7 @@ void sta_addba_resp_timer_expired(unsigned long data) * only one argument, and both sta_info and TID are needed, so init * flow in sta_info_create gives the TID as data, while the timer_to_id * array gives the sta through container_of */ - u16 tid = *(int *)data; + u16 tid = *(u8 *)data; struct sta_info *temp_sta = container_of((void *)data, struct sta_info, timer_to_tid[tid]); @@ -1619,7 +1665,7 @@ timer_expired_exit: void sta_rx_agg_session_timer_expired(unsigned long data) { /* not an elegant detour, but there is no choice as the timer passes - * only one argument, and verious sta_info are needed here, so init + * only one argument, and various sta_info are needed here, so init * flow in sta_info_create gives the TID as data, while the timer_to_id * array gives the sta through container_of */ u8 *ptid = (u8 *)data; @@ -2293,6 +2339,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, u8 *pos; struct ieee80211_sub_if_data *sdata; struct ieee80211_supported_band *sband; + union iwreq_data wrqu; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; @@ -2315,13 +2362,10 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, sdata->drop_unencrypted = bss->capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - res = ieee80211_set_freq(local, bss->freq); + res = ieee80211_set_freq(dev, bss->freq); - if (local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS) { - printk(KERN_DEBUG "%s: IBSS not allowed on frequency " - "%d MHz\n", dev->name, local->oper_channel->center_freq); - return -1; - } + if (res) + return res; /* Set beacon template */ skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); @@ -2436,7 +2480,9 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, ifsta->state = IEEE80211_IBSS_JOINED; mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); - ieee80211_rx_bss_put(dev, bss); + memset(&wrqu, 0, sizeof(wrqu)); + memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN); + wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); return res; } @@ -3410,21 +3456,17 @@ static int ieee80211_sta_config_auth(struct net_device *dev, struct ieee80211_sta_bss *bss, *selected = NULL; int top_rssi = 0, freq; - if (!(ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | - IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL))) { - ifsta->state = IEEE80211_AUTHENTICATE; - ieee80211_sta_reset_auth(dev, ifsta); - return 0; - } - spin_lock_bh(&local->sta_bss_lock); freq = local->oper_channel->center_freq; list_for_each_entry(bss, &local->sta_bss_list, list) { if (!(bss->capability & WLAN_CAPABILITY_ESS)) continue; - if (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ - !!sdata->default_key) + if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | + IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL)) && + (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ + !!sdata->default_key)) continue; if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && @@ -3449,7 +3491,7 @@ static int ieee80211_sta_config_auth(struct net_device *dev, spin_unlock_bh(&local->sta_bss_lock); if (selected) { - ieee80211_set_freq(local, selected->freq); + ieee80211_set_freq(dev, selected->freq); if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) ieee80211_sta_set_ssid(dev, selected->ssid, selected->ssid_len); @@ -3484,6 +3526,7 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, struct ieee80211_supported_band *sband; u8 bssid[ETH_ALEN], *pos; int i; + int ret; DECLARE_MAC_BUF(mac); #if 0 @@ -3528,7 +3571,9 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, *pos++ = (u8) (rate / 5); } - return ieee80211_sta_join_ibss(dev, ifsta, bss); + ret = ieee80211_sta_join_ibss(dev, ifsta, bss); + ieee80211_rx_bss_put(dev, bss); + return ret; } @@ -3569,17 +3614,22 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, spin_unlock_bh(&local->sta_bss_lock); #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG " sta_find_ibss: selected %s current " - "%s\n", print_mac(mac, bssid), print_mac(mac2, ifsta->bssid)); + if (found) + printk(KERN_DEBUG " sta_find_ibss: selected %s current " + "%s\n", print_mac(mac, bssid), + print_mac(mac2, ifsta->bssid)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0 && (bss = ieee80211_rx_bss_get(dev, bssid, local->hw.conf.channel->center_freq, ifsta->ssid, ifsta->ssid_len))) { + int ret; printk(KERN_DEBUG "%s: Selected IBSS BSSID %s" " based on configured SSID\n", dev->name, print_mac(mac, bssid)); - return ieee80211_sta_join_ibss(dev, ifsta, bss); + ret = ieee80211_sta_join_ibss(dev, ifsta, bss); + ieee80211_rx_bss_put(dev, bss); + return ret; } #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG " did not try to join ibss\n"); @@ -4056,18 +4106,17 @@ ieee80211_sta_scan_result(struct net_device *dev, memset(&iwe, 0, sizeof(iwe)); iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = bss->freq; - iwe.u.freq.e = 6; + iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); + iwe.u.freq.e = 0; current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_FREQ_LEN); memset(&iwe, 0, sizeof(iwe)); iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); - iwe.u.freq.e = 0; + iwe.u.freq.m = bss->freq; + iwe.u.freq.e = 6; current_ev = iwe_stream_add_event(current_ev, end_buf, &iwe, IW_EV_FREQ_LEN); - memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVQUAL; iwe.u.qual.qual = bss->signal; diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index 04afc13..4ea7b97 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -141,7 +141,6 @@ struct rc_pid_events_file_info { * rate behaviour values (lower means we should trust more what we learnt * about behaviour of rates, higher means we should trust more the natural * ordering of rates) - * @fast_start: if Y, push high rates right after initialization */ struct rc_pid_debugfs_entries { struct dentry *dir; @@ -154,7 +153,6 @@ struct rc_pid_debugfs_entries { struct dentry *sharpen_factor; struct dentry *sharpen_duration; struct dentry *norm_offset; - struct dentry *fast_start; }; void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, @@ -267,9 +265,6 @@ struct rc_pid_info { /* Normalization offset. */ unsigned int norm_offset; - /* Fast starst parameter. */ - unsigned int fast_start; - /* Rates information. */ struct rc_pid_rateinfo *rinfo; diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index a849b74..bcd27c1d 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -398,13 +398,25 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) return NULL; } + pinfo->target = RC_PID_TARGET_PF; + pinfo->sampling_period = RC_PID_INTERVAL; + pinfo->coeff_p = RC_PID_COEFF_P; + pinfo->coeff_i = RC_PID_COEFF_I; + pinfo->coeff_d = RC_PID_COEFF_D; + pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT; + pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR; + pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION; + pinfo->norm_offset = RC_PID_NORM_OFFSET; + pinfo->rinfo = rinfo; + pinfo->oldrate = 0; + /* Sort the rates. This is optimized for the most common case (i.e. * almost-sorted CCK+OFDM rates). Kind of bubble-sort with reversed * mapping too. */ for (i = 0; i < sband->n_bitrates; i++) { rinfo[i].index = i; rinfo[i].rev_index = i; - if (pinfo->fast_start) + if (RC_PID_FAST_START) rinfo[i].diff = 0; else rinfo[i].diff = i * pinfo->norm_offset; @@ -425,19 +437,6 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) break; } - pinfo->target = RC_PID_TARGET_PF; - pinfo->sampling_period = RC_PID_INTERVAL; - pinfo->coeff_p = RC_PID_COEFF_P; - pinfo->coeff_i = RC_PID_COEFF_I; - pinfo->coeff_d = RC_PID_COEFF_D; - pinfo->smoothing_shift = RC_PID_SMOOTHING_SHIFT; - pinfo->sharpen_factor = RC_PID_SHARPENING_FACTOR; - pinfo->sharpen_duration = RC_PID_SHARPENING_DURATION; - pinfo->norm_offset = RC_PID_NORM_OFFSET; - pinfo->fast_start = RC_PID_FAST_START; - pinfo->rinfo = rinfo; - pinfo->oldrate = 0; - #ifdef CONFIG_MAC80211_DEBUGFS de = &pinfo->dentries; de->dir = debugfs_create_dir("rc80211_pid", @@ -465,9 +464,6 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) de->norm_offset = debugfs_create_u32("norm_offset", S_IRUSR | S_IWUSR, de->dir, &pinfo->norm_offset); - de->fast_start = debugfs_create_bool("fast_start", - S_IRUSR | S_IWUSR, de->dir, - &pinfo->fast_start); #endif return pinfo; @@ -479,7 +475,6 @@ static void rate_control_pid_free(void *priv) #ifdef CONFIG_MAC80211_DEBUGFS struct rc_pid_debugfs_entries *de = &pinfo->dentries; - debugfs_remove(de->fast_start); debugfs_remove(de->norm_offset); debugfs_remove(de->sharpen_duration); debugfs_remove(de->sharpen_factor); diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c index ae75d41..ff5c380f 100644 --- a/net/mac80211/rc80211_pid_debugfs.c +++ b/net/mac80211/rc80211_pid_debugfs.c @@ -85,7 +85,7 @@ static int rate_control_pid_events_open(struct inode *inode, struct file *file) struct rc_pid_sta_info *sinfo = inode->i_private; struct rc_pid_event_buffer *events = &sinfo->events; struct rc_pid_events_file_info *file_info; - unsigned int status; + unsigned long status; /* Allocate a state struct */ file_info = kmalloc(sizeof(*file_info), GFP_KERNEL); @@ -135,7 +135,7 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf, char pb[RC_PID_PRINT_BUF_SIZE]; int ret; int p; - unsigned int status; + unsigned long status; /* Check if there is something to read. */ if (events->next_entry == file_info->next_entry) { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 02f436a..0941e5d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1091,7 +1091,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) u16 fc, hdrlen, ethertype; u8 *payload; u8 dst[ETH_ALEN]; - u8 src[ETH_ALEN]; + u8 src[ETH_ALEN] __aligned(2); struct sk_buff *skb = rx->skb; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); DECLARE_MAC_BUF(mac); @@ -1234,7 +1234,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) */ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx) { - static const u8 pae_group_addr[ETH_ALEN] + static const u8 pae_group_addr[ETH_ALEN] __aligned(2) = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 }; struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; @@ -1305,11 +1305,11 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(skb->data)) { if (*mesh_ttl > 0) { xmit_skb = skb_copy(skb, GFP_ATOMIC); - if (!xmit_skb && net_ratelimit()) + if (xmit_skb) + xmit_skb->pkt_type = PACKET_OTHERHOST; + else if (net_ratelimit()) printk(KERN_DEBUG "%s: failed to clone " "multicast frame\n", dev->name); - else - xmit_skb->pkt_type = PACKET_OTHERHOST; } else IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.sta, dropped_frames_ttl); @@ -1395,7 +1395,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) padding = ((4 - subframe_len) & 0x3); /* the last MSDU has no padding */ if (subframe_len > remaining) { - printk(KERN_DEBUG "%s: wrong buffer size", dev->name); + printk(KERN_DEBUG "%s: wrong buffer size\n", dev->name); return RX_DROP_UNUSABLE; } @@ -1418,7 +1418,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) eth = (struct ethhdr *) skb_pull(skb, ntohs(len) + padding); if (!eth) { - printk(KERN_DEBUG "%s: wrong buffer size ", + printk(KERN_DEBUG "%s: wrong buffer size\n", dev->name); dev_kfree_skb(frame); return RX_DROP_UNUSABLE; @@ -1952,7 +1952,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (!skb_new) { if (net_ratelimit()) printk(KERN_DEBUG "%s: failed to copy " - "multicast frame for %s", + "multicast frame for %s\n", wiphy_name(local->hw.wiphy), prev->dev->name); continue; diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index dddbfd6..09093da 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -230,10 +230,8 @@ void ieee80211_get_tkip_key(struct ieee80211_key_conf *keyconf, iv16 = data[hdr_len] << 8; iv16 += data[hdr_len + 2]; - iv32 = data[hdr_len + 4] + - (data[hdr_len + 5] >> 8) + - (data[hdr_len + 6] >> 16) + - (data[hdr_len + 7] >> 24); + iv32 = data[hdr_len + 4] | (data[hdr_len + 5] << 8) | + (data[hdr_len + 6] << 16) | (data[hdr_len + 7] << 24); #ifdef CONFIG_TKIP_DEBUG printk(KERN_DEBUG "TKIP encrypt: iv16 = 0x%04x, iv32 = 0x%08x\n", diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f35eaea9..c80d589 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1132,7 +1132,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, ieee80211_tx_handler *handler; struct ieee80211_tx_data tx; ieee80211_tx_result res = TX_DROP, res_prepare; - int ret, i; + int ret, i, retries = 0; WARN_ON(__ieee80211_queue_pending(local, control->queue)); @@ -1216,6 +1216,13 @@ retry: if (!__ieee80211_queue_stopped(local, control->queue)) { clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[control->queue]); + retries++; + /* + * Driver bug, it's rejecting packets but + * not stopping queues. + */ + if (WARN_ON_ONCE(retries > 5)) + goto drop; goto retry; } memcpy(&store->control, control, @@ -1898,6 +1905,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, control->flags |= IEEE80211_TXCTL_SHORT_PREAMBLE; control->antenna_sel_tx = local->hw.conf.antenna_sel_tx; control->flags |= IEEE80211_TXCTL_NO_ACK; + control->flags |= IEEE80211_TXCTL_DO_NOT_ENCRYPT; control->retry_limit = 1; control->flags |= IEEE80211_TXCTL_CLEAR_PS_FILT; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index cc9f715..4e97b26 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -34,11 +34,11 @@ void *mac80211_wiphy_privid = &mac80211_wiphy_privid; /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ -const unsigned char rfc1042_header[] = +const unsigned char rfc1042_header[] __aligned(2) = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 }; /* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */ -const unsigned char bridge_tunnel_header[] = +const unsigned char bridge_tunnel_header[] __aligned(2) = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; @@ -153,15 +153,15 @@ int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) /* 7.1.3.5a.2 */ switch (ae) { case 0: - return 5; + return 6; case 1: - return 11; + return 12; case 2: - return 17; + return 18; case 3: - return 23; + return 24; default: - return 5; + return 6; } } @@ -389,6 +389,41 @@ void ieee80211_iterate_active_interfaces( struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; + rtnl_lock(); + + list_for_each_entry(sdata, &local->interfaces, list) { + switch (sdata->vif.type) { + case IEEE80211_IF_TYPE_INVALID: + case IEEE80211_IF_TYPE_MNTR: + case IEEE80211_IF_TYPE_VLAN: + continue; + case IEEE80211_IF_TYPE_AP: + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_IBSS: + case IEEE80211_IF_TYPE_WDS: + case IEEE80211_IF_TYPE_MESH_POINT: + break; + } + if (sdata->dev == local->mdev) + continue; + if (netif_running(sdata->dev)) + iterator(data, sdata->dev->dev_addr, + &sdata->vif); + } + + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); + +void ieee80211_iterate_active_interfaces_atomic( + struct ieee80211_hw *hw, + void (*iterator)(void *data, u8 *mac, + struct ieee80211_vif *vif), + void *data) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { @@ -413,4 +448,4 @@ void ieee80211_iterate_active_interfaces( rcu_read_unlock(); } -EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); +EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic); diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 76e1de1..e840421 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -95,6 +95,13 @@ static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, } } + if (alg == ALG_WEP && + key_len != LEN_WEP40 && key_len != LEN_WEP104) { + ieee80211_key_free(key); + err = -EINVAL; + goto out_unlock; + } + ieee80211_key_link(key, sdata, sta); if (set_tx_key || (!sta && !sdata->default_key && key)) @@ -209,7 +216,6 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, range->num_frequency = c; IW_EVENT_CAPA_SET_KERNEL(range->event_capa); - IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWTHRSPY); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); @@ -291,14 +297,22 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev, return 0; } -int ieee80211_set_freq(struct ieee80211_local *local, int freqMHz) +int ieee80211_set_freq(struct net_device *dev, int freqMHz) { int ret = -EINVAL; struct ieee80211_channel *chan; + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { + if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && + chan->flags & IEEE80211_CHAN_NO_IBSS) { + printk(KERN_DEBUG "%s: IBSS not allowed on frequency " + "%d MHz\n", dev->name, chan->center_freq); + return ret; + } local->oper_channel = chan; if (local->sta_sw_scanning || local->sta_hw_scanning) @@ -316,7 +330,6 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *freq, char *extra) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type == IEEE80211_IF_TYPE_STA) @@ -330,14 +343,14 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; } else - return ieee80211_set_freq(local, + return ieee80211_set_freq(dev, ieee80211_channel_to_frequency(freq->m)); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) div /= 10; if (div > 0) - return ieee80211_set_freq(local, freq->m / div); + return ieee80211_set_freq(dev, freq->m / div); else return -EINVAL; } @@ -490,9 +503,15 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type == IEEE80211_IF_TYPE_STA || sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - ap_addr->sa_family = ARPHRD_ETHER; - memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); - return 0; + if (sdata->u.sta.state == IEEE80211_ASSOCIATED || + sdata->u.sta.state == IEEE80211_IBSS_JOINED) { + ap_addr->sa_family = ARPHRD_ETHER; + memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); + return 0; + } else { + memset(&ap_addr->sa_data, 0, ETH_ALEN); + return 0; + } } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { ap_addr->sa_family = ARPHRD_ETHER; memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 64faa3d..5d09e86 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -323,8 +323,7 @@ static void wme_qdiscop_destroy(struct Qdisc* qd) struct ieee80211_hw *hw = &local->hw; int queue; - tcf_destroy_chain(q->filter_list); - q->filter_list = NULL; + tcf_destroy_chain(&q->filter_list); for (queue=0; queue < hw->queues; queue++) { skb_queue_purge(&q->requeued[queue]); @@ -394,7 +393,8 @@ static int wme_qdiscop_init(struct Qdisc *qd, struct nlattr *opt) qd->handle); if (!q->queues[i]) { q->queues[i] = &noop_qdisc; - printk(KERN_ERR "%s child qdisc %i creation failed", dev->name, i); + printk(KERN_ERR "%s child qdisc %i creation failed\n", + dev->name, i); } } @@ -672,7 +672,7 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) printk(KERN_DEBUG "allocated aggregation queue" - " %d tid %d addr %s pool=0x%lX", + " %d tid %d addr %s pool=0x%lX\n", i, tid, print_mac(mac, sta->addr), q->qdisc_pool[0]); #endif /* CONFIG_MAC80211_HT_DEBUG */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c1fc0f1..aa8d80c 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -90,6 +90,7 @@ config NF_CT_PROTO_DCCP tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' depends on EXPERIMENTAL && NF_CONNTRACK depends on NETFILTER_ADVANCED + default IP_DCCP help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on DCCP connections. @@ -104,6 +105,7 @@ config NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' depends on EXPERIMENTAL && NF_CONNTRACK depends on NETFILTER_ADVANCED + default IP_SCTP help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on SCTP connections. @@ -532,6 +534,7 @@ config NETFILTER_XT_MATCH_DCCP tristate '"dccp" protocol match support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED + default IP_DCCP help With this option enabled, you will be able to use the iptables `dccp' match in order to match on DCCP source/destination ports @@ -725,6 +728,7 @@ config NETFILTER_XT_MATCH_SCTP tristate '"sctp" protocol match support (EXPERIMENTAL)' depends on NETFILTER_XTABLES && EXPERIMENTAL depends on NETFILTER_ADVANCED + default IP_SCTP help With this option enabled, you will be able to use the `sctp' match in order to match on SCTP source/destination ports diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c4b1799..662c1cc 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -196,8 +196,6 @@ destroy_conntrack(struct nf_conntrack *nfct) if (l4proto && l4proto->destroy) l4proto->destroy(ct); - nf_ct_ext_destroy(ct); - rcu_read_unlock(); spin_lock_bh(&nf_conntrack_lock); @@ -520,6 +518,7 @@ static void nf_conntrack_free_rcu(struct rcu_head *head) void nf_conntrack_free(struct nf_conn *ct) { + nf_ct_ext_destroy(ct); call_rcu(&ct->rcu, nf_conntrack_free_rcu); } EXPORT_SYMBOL_GPL(nf_conntrack_free); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e31beeb..e8f0dea 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -587,10 +587,10 @@ int __init nf_conntrack_expect_init(void) return 0; err3: + kmem_cache_destroy(nf_ct_expect_cachep); +err2: nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, nf_ct_expect_hsize); -err2: - kmem_cache_destroy(nf_ct_expect_cachep); err1: return err; } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index bcc19fa..8a3f8b3 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -59,12 +59,19 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) if (!*ext) return NULL; + INIT_RCU_HEAD(&(*ext)->rcu); (*ext)->offset[id] = off; (*ext)->len = len; return (void *)(*ext) + off; } +static void __nf_ct_ext_free_rcu(struct rcu_head *head) +{ + struct nf_ct_ext *ext = container_of(head, struct nf_ct_ext, rcu); + kfree(ext); +} + void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) { struct nf_ct_ext *new; @@ -106,7 +113,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) (void *)ct->ext + ct->ext->offset[i]); rcu_read_unlock(); } - kfree(ct->ext); + call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu); ct->ext = new; } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 95da1a2..2f83c15 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -619,6 +619,7 @@ static const struct nf_conntrack_expect_policy h245_exp_policy = { static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { .name = "H.245", .me = THIS_MODULE, + .tuple.src.l3num = AF_UNSPEC, .tuple.dst.protonum = IPPROTO_UDP, .help = h245_help, .expect_policy = &h245_exp_policy, @@ -1765,6 +1766,7 @@ static void __exit nf_conntrack_h323_fini(void) nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); + nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); kfree(h323_buffer); pr_debug("nf_ct_h323: fini\n"); } @@ -1777,28 +1779,34 @@ static int __init nf_conntrack_h323_init(void) h323_buffer = kmalloc(65536, GFP_KERNEL); if (!h323_buffer) return -ENOMEM; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]); + ret = nf_conntrack_helper_register(&nf_conntrack_helper_h245); if (ret < 0) goto err1; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]); + ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[0]); if (ret < 0) goto err2; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]); + ret = nf_conntrack_helper_register(&nf_conntrack_helper_q931[1]); if (ret < 0) goto err3; - ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]); + ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[0]); if (ret < 0) goto err4; + ret = nf_conntrack_helper_register(&nf_conntrack_helper_ras[1]); + if (ret < 0) + goto err5; pr_debug("nf_ct_h323: init success\n"); return 0; -err4: +err5: nf_conntrack_helper_unregister(&nf_conntrack_helper_ras[0]); -err3: +err4: nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[1]); -err2: +err3: nf_conntrack_helper_unregister(&nf_conntrack_helper_q931[0]); +err2: + nf_conntrack_helper_unregister(&nf_conntrack_helper_h245); err1: + kfree(h323_buffer); return ret; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 16774ec..0edefcf 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -472,6 +472,9 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, goto nla_put_failure; nla_nest_end(skb, nest_parms); + if (ctnetlink_dump_id(skb, ct) < 0) + goto nla_put_failure; + if (events & IPCT_DESTROY) { if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ba94004..dd28fb2 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -331,12 +331,13 @@ static unsigned int get_conntrack_index(const struct tcphdr *tcph) I. Upper bound for valid data: seq <= sender.td_maxend II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin - III. Upper bound for valid ack: sack <= receiver.td_end - IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW + III. Upper bound for valid (s)ack: sack <= receiver.td_end + IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW - where sack is the highest right edge of sack block found in the packet. + where sack is the highest right edge of sack block found in the packet + or ack in the case of packet without SACK option. - The upper bound limit for a valid ack is not ignored - + The upper bound limit for a valid (s)ack is not ignored - we doesn't have to deal with fragments. */ @@ -606,12 +607,12 @@ static bool tcp_in_window(const struct nf_conn *ct, before(seq, sender->td_maxend + 1), after(end, sender->td_end - receiver->td_maxwin - 1), before(sack, receiver->td_end + 1), - after(ack, receiver->td_end - MAXACKWINDOW(sender))); + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); if (before(seq, sender->td_maxend + 1) && after(end, sender->td_end - receiver->td_maxwin - 1) && before(sack, receiver->td_end + 1) && - after(ack, receiver->td_end - MAXACKWINDOW(sender))) { + after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { /* * Take into account window scaling (RFC 1323). */ @@ -843,9 +844,15 @@ static int tcp_packet(struct nf_conn *ct, /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ write_unlock_bh(&tcp_lock); - if (del_timer(&ct->timeout)) + /* Only repeat if we can actually remove the timer. + * Destruction may already be in progress in process + * context and we must give it a chance to terminate. + */ + if (del_timer(&ct->timeout)) { ct->timeout.function((unsigned long)ct); - return -NF_REPEAT; + return -NF_REPEAT; + } + return -NF_DROP; } /* Fall through */ case TCP_CONNTRACK_IGNORE: diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 9f49000..2f9bbc0 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -870,6 +870,7 @@ static int process_sdp(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); unsigned int matchoff, matchlen; unsigned int mediaoff, medialen; unsigned int sdpoff; @@ -959,6 +960,9 @@ static int process_sdp(struct sk_buff *skb, if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr); + if (ret == NF_ACCEPT && i > 0) + help->help.ct_sip_info.invite_cseq = cseq; + return ret; } static int process_invite_response(struct sk_buff *skb, @@ -967,14 +971,14 @@ static int process_invite_response(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dptr, datalen, cseq); - else { + else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); - return NF_ACCEPT; - } + return NF_ACCEPT; } static int process_update_response(struct sk_buff *skb, @@ -983,14 +987,14 @@ static int process_update_response(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dptr, datalen, cseq); - else { + else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); - return NF_ACCEPT; - } + return NF_ACCEPT; } static int process_prack_response(struct sk_buff *skb, @@ -999,14 +1003,14 @@ static int process_prack_response(struct sk_buff *skb, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dptr, datalen, cseq); - else { + else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); - return NF_ACCEPT; - } + return NF_ACCEPT; } static int process_bye_request(struct sk_buff *skb, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index b59871f..46ea542 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -296,11 +296,11 @@ static int nf_conntrack_standalone_init_proc(void) pde = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops); if (!pde) goto out_nf_conntrack; - pde = create_proc_entry("nf_conntrack", S_IRUGO, init_net.proc_net_stat); + + pde = proc_create("nf_conntrack", S_IRUGO, init_net.proc_net_stat, + &ct_cpu_seq_fops); if (!pde) goto out_stat_nf_conntrack; - pde->proc_fops = &ct_cpu_seq_fops; - pde->owner = THIS_MODULE; return 0; out_stat_nf_conntrack: diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index bc11d70..9fda6ee 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -92,10 +92,6 @@ void nf_log_packet(int pf, vsnprintf(prefix, sizeof(prefix), fmt, args); va_end(args); logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); - } else if (net_ratelimit()) { - printk(KERN_WARNING "nf_log_packet: can\'t log since " - "no backend logging module loaded in! Please either " - "load one, or disable logging explicitly\n"); } rcu_read_unlock(); } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 11b22ab..5d75cd8 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -936,25 +936,24 @@ int xt_proto_init(struct net *net, int af) #ifdef CONFIG_PROC_FS strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); - proc = proc_net_fops_create(net, buf, 0440, &xt_table_ops); + proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops, + (void *)(unsigned long)af); if (!proc) goto out; - proc->data = (void *)(unsigned long)af; - strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); - proc = proc_net_fops_create(net, buf, 0440, &xt_match_ops); + proc = proc_create_data(buf, 0440, net->proc_net, &xt_match_ops, + (void *)(unsigned long)af); if (!proc) goto out_remove_tables; - proc->data = (void *)(unsigned long)af; strlcpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); - proc = proc_net_fops_create(net, buf, 0440, &xt_target_ops); + proc = proc_create_data(buf, 0440, net->proc_net, &xt_target_ops, + (void *)(unsigned long)af); if (!proc) goto out_remove_matches; - proc->data = (void *)(unsigned long)af; #endif return 0; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 2e89a00..70907f6b 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -73,7 +73,8 @@ connlimit_iphash6(const union nf_inet_addr *addr, static inline bool already_closed(const struct nf_conn *conn) { if (nf_ct_protonum(conn) == IPPROTO_TCP) - return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT; + return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT || + conn->proto.tcp.state == TCP_CONNTRACK_CLOSE; else return 0; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 40d344b..6809af5 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -237,15 +237,15 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) hinfo->family = family; hinfo->rnd_initialized = 0; spin_lock_init(&hinfo->lock); - hinfo->pde = proc_create(minfo->name, 0, + hinfo->pde = + proc_create_data(minfo->name, 0, family == AF_INET ? hashlimit_procdir4 : hashlimit_procdir6, - &dl_file_ops); + &dl_file_ops, hinfo); if (!hinfo->pde) { vfree(hinfo); return -1; } - hinfo->pde->data = hinfo; setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo); hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval); @@ -301,15 +301,15 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, hinfo->rnd_initialized = 0; spin_lock_init(&hinfo->lock); - hinfo->pde = proc_create(minfo->name, 0, + hinfo->pde = + proc_create_data(minfo->name, 0, family == AF_INET ? hashlimit_procdir4 : hashlimit_procdir6, - &dl_file_ops); + &dl_file_ops, hinfo); if (hinfo->pde == NULL) { vfree(hinfo); return -1; } - hinfo->pde->data = hinfo; setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo); hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval); diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index 500528d..c63e933 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -179,3 +179,5 @@ module_exit(iprange_mt_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>, Jan Engelhardt <jengelh@computergmbh.de>"); MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching"); +MODULE_ALIAS("ipt_iprange"); +MODULE_ALIAS("ip6t_iprange"); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index fdc14a0..9080c61 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -584,12 +584,7 @@ list_start: rcu_read_unlock(); genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto list_failure; - - return 0; + return genlmsg_reply(ans_skb, info); list_retry: /* XXX - this limit is a guesstimate */ diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 22c1912..44be5d5 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -386,11 +386,7 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto listdef_failure; - return 0; + return genlmsg_reply(ans_skb, info); listdef_failure_lock: rcu_read_unlock(); @@ -501,11 +497,7 @@ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) goto version_failure; genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto version_failure; - return 0; + return genlmsg_reply(ans_skb, info); version_failure: kfree_skb(ans_skb); diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 0099da5b..56f8087 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1107,11 +1107,7 @@ static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) goto list_failure; genlmsg_end(ans_skb, data); - - ret_val = genlmsg_reply(ans_skb, info); - if (ret_val != 0) - goto list_failure; - return 0; + return genlmsg_reply(ans_skb, info); list_failure: kfree_skb(ans_skb); @@ -1534,7 +1530,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, } } list_for_each_entry_rcu(addr6, &iface->addr6_list, list) { - if (addr6->valid || iter_addr6++ < skip_addr6) + if (!addr6->valid || iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, iface, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9b97f80..349aba1 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -886,7 +886,7 @@ retry: return netlink_unicast_kernel(sk, skb); if (sk_filter(sk, skb)) { - int err = skb->len; + err = skb->len; kfree_skb(skb); sock_put(sk); return err; diff --git a/net/netlink/attr.c b/net/netlink/attr.c index feb326f..2d106cf 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -132,6 +132,7 @@ errout: * @maxtype: maximum attribute type to be expected * @head: head of attribute stream * @len: length of attribute stream + * @policy: validation policy * * Parses a stream of attributes and stores a pointer to each attribute in * the tb array accessable via the attribute type. Attributes with a type @@ -194,7 +195,7 @@ struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) /** * nla_strlcpy - Copy string attribute payload into a sized buffer * @dst: where to copy the string to - * @src: attribute to copy the string from + * @nla: attribute to copy the string from * @dstsize: size of destination buffer * * Copies at most dstsize - 1 bytes into the destination buffer. @@ -340,9 +341,9 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) } /** - * nla_reserve - reserve room for attribute without header + * nla_reserve_nohdr - reserve room for attribute without header * @skb: socket buffer to reserve room on - * @len: length of attribute payload + * @attrlen: length of attribute payload * * Reserves room for attribute payload without a header. * @@ -400,13 +401,13 @@ void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) * @attrlen: length of attribute payload * @data: head of attribute payload * - * Returns -1 if the tailroom of the skb is insufficient to store + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute header and payload. */ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) - return -1; + return -EMSGSIZE; __nla_put(skb, attrtype, attrlen, data); return 0; @@ -418,13 +419,13 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) * @attrlen: length of attribute payload * @data: head of attribute payload * - * Returns -1 if the tailroom of the skb is insufficient to store + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute payload. */ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -1; + return -EMSGSIZE; __nla_put_nohdr(skb, attrlen, data); return 0; @@ -436,13 +437,13 @@ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) * @attrlen: length of attribute payload * @data: head of attribute payload * - * Returns -1 if the tailroom of the skb is insufficient to store + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute payload. */ int nla_append(struct sk_buff *skb, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -1; + return -EMSGSIZE; memcpy(skb_put(skb, attrlen), data, attrlen); return 0; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index d16929c..3e1191c 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -444,8 +444,11 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (ops->dumpit == NULL) return -EOPNOTSUPP; - return netlink_dump_start(genl_sock, skb, nlh, - ops->dumpit, ops->done); + genl_unlock(); + err = netlink_dump_start(genl_sock, skb, nlh, + ops->dumpit, ops->done); + genl_lock(); + return err; } if (ops->doit == NULL) @@ -554,7 +557,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, return genlmsg_end(skb, hdr); nla_put_failure: - return genlmsg_cancel(skb, hdr); + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; } static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, @@ -590,7 +594,8 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, return genlmsg_end(skb, hdr); nla_put_failure: - return genlmsg_cancel(skb, hdr); + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; } static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) @@ -601,9 +606,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; - if (chains_to_skip != 0) - genl_lock(); - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { if (i < chains_to_skip) continue; @@ -621,9 +623,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) } errout: - if (chains_to_skip != 0) - genl_unlock(); - cb->args[0] = i; cb->args[1] = n; @@ -768,7 +767,7 @@ static int __init genl_init(void) /* we'll bump the group number right afterwards */ genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC, 0, - genl_rcv, NULL, THIS_MODULE); + genl_rcv, &genl_mutex, THIS_MODULE); if (genl_sock == NULL) panic("GENL: Cannot initialize generic netlink\n"); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2507024..2cee87d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -743,7 +743,7 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, if (len > dev->mtu+reserve) goto out_unlock; - skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev), + skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), msg->msg_flags & MSG_DONTWAIT, &err); if (skb==NULL) goto out_unlock; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 5053a53..bd59387 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -1066,12 +1066,12 @@ out: #ifdef CONFIG_PROC_FS static void *rose_node_start(struct seq_file *seq, loff_t *pos) - __acquires(rose_neigh_list_lock) + __acquires(rose_node_list_lock) { struct rose_node *rose_node; int i = 1; - spin_lock_bh(&rose_neigh_list_lock); + spin_lock_bh(&rose_node_list_lock); if (*pos == 0) return SEQ_START_TOKEN; @@ -1090,9 +1090,9 @@ static void *rose_node_next(struct seq_file *seq, void *v, loff_t *pos) } static void rose_node_stop(struct seq_file *seq, void *v) - __releases(rose_neigh_list_lock) + __releases(rose_node_list_lock) { - spin_unlock_bh(&rose_neigh_list_lock); + spin_unlock_bh(&rose_node_list_lock); } static int rose_node_show(struct seq_file *seq, void *v) diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c index bb282a6..64069c8 100644 --- a/net/rxrpc/ar-transport.c +++ b/net/rxrpc/ar-transport.c @@ -184,12 +184,13 @@ void rxrpc_put_transport(struct rxrpc_transport *trans) ASSERTCMP(atomic_read(&trans->usage), >, 0); trans->put_time = get_seconds(); - if (unlikely(atomic_dec_and_test(&trans->usage))) + if (unlikely(atomic_dec_and_test(&trans->usage))) { _debug("zombie"); /* let the reaper determine the timeout to avoid a race with * overextending the timeout if the reaper is running at the * same time */ rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0); + } _leave(""); } diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 82adfe6..9437b27 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -106,17 +106,6 @@ config NET_SCH_PRIO To compile this code as a module, choose M here: the module will be called sch_prio. -config NET_SCH_RR - tristate "Multi Band Round Robin Queuing (RR)" - select NET_SCH_PRIO - ---help--- - Say Y here if you want to use an n-band round robin packet - scheduler. - - The module uses sch_prio for its framework and is aliased as - sch_rr, so it will load sch_prio, although it is referred - to using sch_rr. - config NET_SCH_RED tristate "Random Early Detection (RED)" ---help--- diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 64b2d13..1d421d0 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -6,7 +6,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Authors: Jamal Hadi Salim (2005) + * Authors: Jamal Hadi Salim (2005-8) * */ @@ -34,6 +34,7 @@ static struct tcf_hashinfo simp_hash_info = { .lock = &simp_lock, }; +#define SIMP_MAX_DATA 32 static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { struct tcf_defact *d = a->priv; @@ -69,23 +70,28 @@ static int tcf_simp_release(struct tcf_defact *d, int bind) return ret; } -static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) +static int alloc_defdata(struct tcf_defact *d, char *defdata) { - d->tcfd_defdata = kmemdup(defdata, datalen, GFP_KERNEL); + d->tcfd_defdata = kstrndup(defdata, SIMP_MAX_DATA, GFP_KERNEL); if (unlikely(!d->tcfd_defdata)) return -ENOMEM; - d->tcfd_datalen = datalen; + return 0; } -static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) +static void reset_policy(struct tcf_defact *d, char *defdata, + struct tc_defact *p) { - kfree(d->tcfd_defdata); - return alloc_defdata(d, datalen, defdata); + spin_lock_bh(&d->tcf_lock); + d->tcf_action = p->action; + memset(d->tcfd_defdata, 0, SIMP_MAX_DATA); + strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); + spin_unlock_bh(&d->tcf_lock); } static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { [TCA_DEF_PARMS] = { .len = sizeof(struct tc_defact) }, + [TCA_DEF_DATA] = { .type = NLA_STRING, .len = SIMP_MAX_DATA }, }; static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, @@ -95,28 +101,24 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, struct tc_defact *parm; struct tcf_defact *d; struct tcf_common *pc; - void *defdata; - u32 datalen = 0; + char *defdata; int ret = 0, err; if (nla == NULL) return -EINVAL; - err = nla_parse_nested(tb, TCA_DEF_MAX, nla, NULL); + err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy); if (err < 0) return err; if (tb[TCA_DEF_PARMS] == NULL) return -EINVAL; - parm = nla_data(tb[TCA_DEF_PARMS]); - defdata = nla_data(tb[TCA_DEF_DATA]); - if (defdata == NULL) + if (tb[TCA_DEF_DATA] == NULL) return -EINVAL; - datalen = nla_len(tb[TCA_DEF_DATA]); - if (datalen == 0) - return -EINVAL; + parm = nla_data(tb[TCA_DEF_PARMS]); + defdata = nla_data(tb[TCA_DEF_DATA]); pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info); if (!pc) { @@ -126,11 +128,12 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, return -ENOMEM; d = to_defact(pc); - ret = alloc_defdata(d, datalen, defdata); + ret = alloc_defdata(d, defdata); if (ret < 0) { kfree(pc); return ret; } + d->tcf_action = parm->action; ret = ACT_P_CREATED; } else { d = to_defact(pc); @@ -138,13 +141,9 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, tcf_simp_release(d, bind); return -EEXIST; } - realloc_defdata(d, datalen, defdata); + reset_policy(d, defdata, parm); } - spin_lock_bh(&d->tcf_lock); - d->tcf_action = parm->action; - spin_unlock_bh(&d->tcf_lock); - if (ret == ACT_P_CREATED) tcf_hash_insert(pc, &simp_hash_info); return ret; @@ -172,7 +171,7 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, opt.bindcnt = d->tcf_bindcnt - bind; opt.action = d->tcf_action; NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); - NLA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata); + NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata); t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(d->tcf_tm.expires); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1086df7..9360fc8 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -220,7 +220,7 @@ replay: tp = kzalloc(sizeof(*tp), GFP_KERNEL); if (tp == NULL) goto errout; - err = -EINVAL; + err = -ENOENT; tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); if (tp_ops == NULL) { #ifdef CONFIG_KMOD diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c40773c..10f01ad 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1252,12 +1252,12 @@ void tcf_destroy(struct tcf_proto *tp) kfree(tp); } -void tcf_destroy_chain(struct tcf_proto *fl) +void tcf_destroy_chain(struct tcf_proto **fl) { struct tcf_proto *tp; - while ((tp = fl) != NULL) { - fl = tp->next; + while ((tp = *fl) != NULL) { + *fl = tp->next; tcf_destroy(tp); } } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 3352734..db0e23a 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -160,7 +160,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) *prev = flow->next; pr_debug("atm_tc_put: qdisc %p\n", flow->q); qdisc_destroy(flow->q); - tcf_destroy_chain(flow->filter_list); + tcf_destroy_chain(&flow->filter_list); if (flow->sock) { pr_debug("atm_tc_put: f_count %d\n", file_count(flow->sock->file)); @@ -586,10 +586,11 @@ static void atm_tc_destroy(struct Qdisc *sch) struct atm_flow_data *flow; pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); + for (flow = p->flows; flow; flow = flow->next) + tcf_destroy_chain(&flow->filter_list); + /* races ? */ while ((flow = p->flows)) { - tcf_destroy_chain(flow->filter_list); - flow->filter_list = NULL; if (flow->ref > 1) printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, flow->ref); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 09969c1..2a3c97f 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1704,7 +1704,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) BUG_TRAP(!cl->filters); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); gen_kill_estimator(&cl->bstats, &cl->rate_est); @@ -1728,10 +1728,8 @@ cbq_destroy(struct Qdisc* sch) * be bound to classes which have been destroyed already. --TGR '04 */ for (h = 0; h < 16; h++) { - for (cl = q->classes[h]; cl; cl = cl->next) { - tcf_destroy_chain(cl->filter_list); - cl->filter_list = NULL; - } + for (cl = q->classes[h]; cl; cl = cl->next) + tcf_destroy_chain(&cl->filter_list); } for (h = 0; h < 16; h++) { struct cbq_class *next; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 0df911f..c4c1317 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -416,7 +416,7 @@ static void dsmark_destroy(struct Qdisc *sch) pr_debug("dsmark_destroy(sch %p,[qdisc %p])\n", sch, p); - tcf_destroy_chain(p->filter_list); + tcf_destroy_chain(&p->filter_list); qdisc_destroy(p->q); kfree(p->mask); } @@ -444,7 +444,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -466,7 +467,8 @@ static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static const struct Qdisc_class_ops dsmark_class_ops = { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b741618..13afa72 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -219,6 +219,7 @@ static void dev_watchdog(unsigned long arg) printk(KERN_INFO "NETDEV WATCHDOG: %s: transmit timed out\n", dev->name); dev->tx_timeout(dev); + WARN_ON_ONCE(1); } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + dev->watchdog_timeo))) dev_hold(dev); @@ -467,7 +468,7 @@ struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops) return sch; errout: - return ERR_PTR(-err); + return ERR_PTR(err); } struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops, diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 3a9d226..c89fba5 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -582,7 +582,8 @@ append_opt: return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static void gred_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 87293d0..e817aa0 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1123,7 +1123,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) { struct hfsc_sched *q = qdisc_priv(sch); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); qdisc_destroy(cl->qdisc); gen_kill_estimator(&cl->bstats, &cl->rate_est); if (cl != &q->root) @@ -1360,7 +1360,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, nla_put_failure: nla_nest_cancel(skb, nest); - return -1; + return -EMSGSIZE; } static int @@ -1541,6 +1541,10 @@ hfsc_destroy_qdisc(struct Qdisc *sch) unsigned int i; for (i = 0; i < HFSC_HSIZE; i++) { + list_for_each_entry(cl, &q->clhash[i], hlist) + tcf_destroy_chain(&cl->filter_list); + } + for (i = 0; i < HFSC_HSIZE; i++) { list_for_each_entry_safe(cl, next, &q->clhash[i], hlist) hfsc_destroy_class(sch, cl); } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 66148cc..3fb58f4 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -28,6 +28,7 @@ * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $ */ #include <linux/module.h> +#include <linux/moduleparam.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> @@ -53,13 +54,17 @@ */ #define HTB_HSIZE 16 /* classid hash size */ -#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ +static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */ #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ #if HTB_VER >> 16 != TC_HTB_PROTOVER #error "Mismatched sch_htb.c and pkt_sch.h" #endif +/* Module parameter and sysfs export */ +module_param (htb_hysteresis, int, 0640); +MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate"); + /* used internaly to keep status of single class */ enum htb_cmode { HTB_CANT_SEND, /* class can't send and can't borrow */ @@ -462,19 +467,21 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) htb_remove_class_from_row(q, cl, mask); } -#if HTB_HYSTERESIS static inline long htb_lowater(const struct htb_class *cl) { - return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; + if (htb_hysteresis) + return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; + else + return 0; } static inline long htb_hiwater(const struct htb_class *cl) { - return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; + if (htb_hysteresis) + return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; + else + return 0; } -#else -#define htb_lowater(cl) (0) -#define htb_hiwater(cl) (0) -#endif + /** * htb_class_mode - computes and returns current class mode @@ -1197,12 +1204,16 @@ static inline int htb_parent_last_child(struct htb_class *cl) return 1; } -static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q) +static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, + struct Qdisc *new_q) { struct htb_class *parent = cl->parent; BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity); + if (parent->cmode != HTB_CAN_SEND) + htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level); + parent->level = 0; memset(&parent->un.inner, 0, sizeof(parent->un.inner)); INIT_LIST_HEAD(&parent->un.leaf.drop_list); @@ -1227,7 +1238,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); - tcf_destroy_chain(cl->filter_list); + tcf_destroy_chain(&cl->filter_list); while (!list_empty(&cl->children)) htb_destroy_class(sch, list_entry(cl->children.next, @@ -1256,7 +1267,7 @@ static void htb_destroy(struct Qdisc *sch) and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call unbind_filter on it (without Oops). */ - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); while (!list_empty(&q->root)) htb_destroy_class(sch, list_entry(q->root.next, @@ -1300,7 +1311,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) htb_deactivate(q, cl); if (last_child) - htb_parent_to_leaf(cl, new_q); + htb_parent_to_leaf(q, cl, new_q); if (--cl->refcnt == 0) htb_destroy_class(sch, cl); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 274b1dd..956c80a 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -104,7 +104,7 @@ static void ingress_destroy(struct Qdisc *sch) { struct ingress_qdisc_data *p = qdisc_priv(sch); - tcf_destroy_chain(p->filter_list); + tcf_destroy_chain(&p->filter_list); } static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 4aa2b45..5532f10 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -219,7 +219,7 @@ prio_destroy(struct Qdisc* sch) int prio; struct prio_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); for (prio=0; prio<q->bands; prio++) qdisc_destroy(q->queues[prio]); } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 3dcd493f4..5c56985 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -281,7 +281,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f0463d7..6a97afb 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -520,7 +520,7 @@ static void sfq_destroy(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - tcf_destroy_chain(q->filter_list); + tcf_destroy_chain(&q->filter_list); q->perturb_period = 0; del_timer_sync(&q->perturb_timer); } diff --git a/net/sctp/associola.c b/net/sctp/associola.c index b4cd2b7..024c3eb 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -474,6 +474,15 @@ static void sctp_association_destroy(struct sctp_association *asoc) void sctp_assoc_set_primary(struct sctp_association *asoc, struct sctp_transport *transport) { + int changeover = 0; + + /* it's a changeover only if we already have a primary path + * that we are changing + */ + if (asoc->peer.primary_path != NULL && + asoc->peer.primary_path != transport) + changeover = 1 ; + asoc->peer.primary_path = transport; /* Set a default msg_name for events. */ @@ -499,12 +508,12 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, * double switch to the same destination address. */ if (transport->cacc.changeover_active) - transport->cacc.cycling_changeover = 1; + transport->cacc.cycling_changeover = changeover; /* 2) The sender MUST set CHANGEOVER_ACTIVE to indicate that * a changeover has occurred. */ - transport->cacc.changeover_active = 1; + transport->cacc.changeover_active = changeover; /* 3) The sender MUST store the next TSN to be sent in * next_tsn_at_change. @@ -1203,6 +1212,9 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) struct list_head *head = &asoc->peer.transport_addr_list; struct list_head *pos; + if (asoc->peer.transport_count == 1) + return; + /* Find the next transport in a round-robin fashion. */ t = asoc->peer.retran_path; pos = &t->transports; @@ -1217,6 +1229,15 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) t = list_entry(pos, struct sctp_transport, transports); + /* We have exhausted the list, but didn't find any + * other active transports. If so, use the next + * transport. + */ + if (t == asoc->peer.retran_path) { + t = next; + break; + } + /* Try to find an active transport. */ if ((t->state == SCTP_ACTIVE) || @@ -1229,15 +1250,6 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) if (!next) next = t; } - - /* We have exhausted the list, but didn't find any - * other active transports. If so, use the next - * transport. - */ - if (t == asoc->peer.retran_path) { - t = next; - break; - } } asoc->peer.retran_path = t; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e45e44c..a2f4d4d 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -299,7 +299,8 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1, /* Fills in the source address(saddr) based on the destination address(daddr) * and asoc's bind address list. */ -static void sctp_v6_get_saddr(struct sctp_association *asoc, +static void sctp_v6_get_saddr(struct sctp_sock *sk, + struct sctp_association *asoc, struct dst_entry *dst, union sctp_addr *daddr, union sctp_addr *saddr) @@ -318,7 +319,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, if (!asoc) { ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, &daddr->v6.sin6_addr, - inet6_sk(asoc->base.sk)->srcprefs, + inet6_sk(&sk->inet.sk)->srcprefs, &saddr->v6.sin6_addr); SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n", NIP6(saddr->v6.sin6_addr)); @@ -726,6 +727,11 @@ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr)); } +static void sctp_v6_ecn_capable(struct sock *sk) +{ + inet6_sk(sk)->tclass |= INET_ECN_ECT_0; +} + /* Initialize a PF_INET6 socket msg_name. */ static void sctp_inet6_msgname(char *msgname, int *addr_len) { @@ -996,6 +1002,7 @@ static struct sctp_af sctp_af_inet6 = { .skb_iif = sctp_v6_skb_iif, .is_ce = sctp_v6_is_ce, .seq_dump_addr = sctp_v6_seq_dump_addr, + .ecn_capable = sctp_v6_ecn_capable, .net_header_len = sizeof(struct ipv6hdr), .sockaddr_len = sizeof(struct sockaddr_in6), #ifdef CONFIG_COMPAT diff --git a/net/sctp/output.c b/net/sctp/output.c index cf4f9fb..6d45bae 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -548,7 +548,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) * Note: The works for IPv6 layer checks this bit too later * in transmission. See IP6_ECN_flow_xmit(). */ - INET_ECN_xmit(nskb->sk); + (*tp->af_specific->ecn_capable)(nskb->sk); /* Set up the IP options. */ /* BUG: not implemented diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 59edfd2..ace6770 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -208,6 +208,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); + q->fast_rtx = 0; q->outstanding_bytes = 0; q->empty = 1; q->cork = 0; @@ -500,6 +501,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, case SCTP_RTXR_FAST_RTX: SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); + q->fast_rtx = 1; break; case SCTP_RTXR_PMTUD: SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); @@ -518,9 +520,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by * following the procedures outlined in C1 - C5. */ - sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point); + if (reason == SCTP_RTXR_T3_RTX) + sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point); - error = sctp_outq_flush(q, /* rtx_timeout */ 1); + /* Flush the queues only on timeout, since fast_rtx is only + * triggered during sack processing and the queue + * will be flushed at the end. + */ + if (reason != SCTP_RTXR_FAST_RTX) + error = sctp_outq_flush(q, /* rtx_timeout */ 1); if (error) q->asoc->base.sk->sk_err = -error; @@ -538,17 +546,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, int rtx_timeout, int *start_timer) { struct list_head *lqueue; - struct list_head *lchunk; struct sctp_transport *transport = pkt->transport; sctp_xmit_t status; struct sctp_chunk *chunk, *chunk1; struct sctp_association *asoc; + int fast_rtx; int error = 0; + int timer = 0; + int done = 0; asoc = q->asoc; lqueue = &q->retransmit; + fast_rtx = q->fast_rtx; - /* RFC 2960 6.3.3 Handle T3-rtx Expiration + /* This loop handles time-out retransmissions, fast retransmissions, + * and retransmissions due to opening of whindow. + * + * RFC 2960 6.3.3 Handle T3-rtx Expiration * * E3) Determine how many of the earliest (i.e., lowest TSN) * outstanding DATA chunks for the address for which the @@ -563,12 +577,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, * [Just to be painfully clear, if we are retransmitting * because a timeout just happened, we should send only ONE * packet of retransmitted data.] + * + * For fast retransmissions we also send only ONE packet. However, + * if we are just flushing the queue due to open window, we'll + * try to send as much as possible. */ - lchunk = sctp_list_dequeue(lqueue); - - while (lchunk) { - chunk = list_entry(lchunk, struct sctp_chunk, - transmitted_list); + list_for_each_entry_safe(chunk, chunk1, lqueue, transmitted_list) { /* Make sure that Gap Acked TSNs are not retransmitted. A * simple approach is just to move such TSNs out of the @@ -576,58 +590,60 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, * next chunk. */ if (chunk->tsn_gap_acked) { - list_add_tail(lchunk, &transport->transmitted); - lchunk = sctp_list_dequeue(lqueue); + list_del(&chunk->transmitted_list); + list_add_tail(&chunk->transmitted_list, + &transport->transmitted); continue; } + /* If we are doing fast retransmit, ignore non-fast_rtransmit + * chunks + */ + if (fast_rtx && !chunk->fast_retransmit) + continue; + /* Attempt to append this chunk to the packet. */ status = sctp_packet_append_chunk(pkt, chunk); switch (status) { case SCTP_XMIT_PMTU_FULL: /* Send this packet. */ - if ((error = sctp_packet_transmit(pkt)) == 0) - *start_timer = 1; + error = sctp_packet_transmit(pkt); /* If we are retransmitting, we should only * send a single packet. */ - if (rtx_timeout) { - list_add(lchunk, lqueue); - lchunk = NULL; - } + if (rtx_timeout || fast_rtx) + done = 1; - /* Bundle lchunk in the next round. */ + /* Bundle next chunk in the next round. */ break; case SCTP_XMIT_RWND_FULL: /* Send this packet. */ - if ((error = sctp_packet_transmit(pkt)) == 0) - *start_timer = 1; + error = sctp_packet_transmit(pkt); /* Stop sending DATA as there is no more room * at the receiver. */ - list_add(lchunk, lqueue); - lchunk = NULL; + done = 1; break; case SCTP_XMIT_NAGLE_DELAY: /* Send this packet. */ - if ((error = sctp_packet_transmit(pkt)) == 0) - *start_timer = 1; + error = sctp_packet_transmit(pkt); /* Stop sending DATA because of nagle delay. */ - list_add(lchunk, lqueue); - lchunk = NULL; + done = 1; break; default: /* The append was successful, so add this chunk to * the transmitted list. */ - list_add_tail(lchunk, &transport->transmitted); + list_del(&chunk->transmitted_list); + list_add_tail(&chunk->transmitted_list, + &transport->transmitted); /* Mark the chunk as ineligible for fast retransmit * after it is retransmitted. @@ -635,27 +651,44 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, if (chunk->fast_retransmit > 0) chunk->fast_retransmit = -1; - *start_timer = 1; - q->empty = 0; + /* Force start T3-rtx timer when fast retransmitting + * the earliest outstanding TSN + */ + if (!timer && fast_rtx && + ntohl(chunk->subh.data_hdr->tsn) == + asoc->ctsn_ack_point + 1) + timer = 2; - /* Retrieve a new chunk to bundle. */ - lchunk = sctp_list_dequeue(lqueue); + q->empty = 0; break; } - /* If we are here due to a retransmit timeout or a fast - * retransmit and if there are any chunks left in the retransmit - * queue that could not fit in the PMTU sized packet, they need - * to be marked as ineligible for a subsequent fast retransmit. - */ - if (rtx_timeout && !lchunk) { - list_for_each_entry(chunk1, lqueue, transmitted_list) { - if (chunk1->fast_retransmit > 0) - chunk1->fast_retransmit = -1; - } + /* Set the timer if there were no errors */ + if (!error && !timer) + timer = 1; + + if (done) + break; + } + + /* If we are here due to a retransmit timeout or a fast + * retransmit and if there are any chunks left in the retransmit + * queue that could not fit in the PMTU sized packet, they need + * to be marked as ineligible for a subsequent fast retransmit. + */ + if (rtx_timeout || fast_rtx) { + list_for_each_entry(chunk1, lqueue, transmitted_list) { + if (chunk1->fast_retransmit > 0) + chunk1->fast_retransmit = -1; } } + *start_timer = timer; + + /* Clear fast retransmit hint */ + if (fast_rtx) + q->fast_rtx = 0; + return error; } @@ -862,7 +895,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) rtx_timeout, &start_timer); if (start_timer) - sctp_transport_reset_timers(transport); + sctp_transport_reset_timers(transport, + start_timer-1); /* This can happen on COOKIE-ECHO resend. Only * one chunk can get bundled with a COOKIE-ECHO. @@ -977,7 +1011,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) list_add_tail(&chunk->transmitted_list, &transport->transmitted); - sctp_transport_reset_timers(transport); + sctp_transport_reset_timers(transport, start_timer-1); q->empty = 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 0ec234b..9258dfe 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -108,14 +108,23 @@ static __init int sctp_proc_init(void) } if (sctp_snmp_proc_init()) - goto out_nomem; + goto out_snmp_proc_init; if (sctp_eps_proc_init()) - goto out_nomem; + goto out_eps_proc_init; if (sctp_assocs_proc_init()) - goto out_nomem; + goto out_assocs_proc_init; return 0; +out_assocs_proc_init: + sctp_eps_proc_exit(); +out_eps_proc_init: + sctp_snmp_proc_exit(); +out_snmp_proc_init: + if (proc_net_sctp) { + proc_net_sctp = NULL; + remove_proc_entry("sctp", init_net.proc_net); + } out_nomem: return -ENOMEM; } @@ -470,11 +479,11 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, /* Walk through the bind address list and look for a bind * address that matches the source address of the returned dst. */ + sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) continue; - sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; } @@ -519,7 +528,8 @@ out: /* For v4, the source address is cached in the route entry(dst). So no need * to cache it separately and hence this is an empty routine. */ -static void sctp_v4_get_saddr(struct sctp_association *asoc, +static void sctp_v4_get_saddr(struct sctp_sock *sk, + struct sctp_association *asoc, struct dst_entry *dst, union sctp_addr *daddr, union sctp_addr *saddr) @@ -616,6 +626,11 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); } +static void sctp_v4_ecn_capable(struct sock *sk) +{ + INET_ECN_xmit(sk); +} + /* Event handler for inet address addition/deletion events. * The sctp_local_addr_list needs to be protocted by a spin lock since * multiple notifiers (say IPv4 and IPv6) may be running at the same @@ -934,6 +949,7 @@ static struct sctp_af sctp_af_inet = { .skb_iif = sctp_v4_skb_iif, .is_ce = sctp_v4_is_ce, .seq_dump_addr = sctp_v4_seq_dump_addr, + .ecn_capable = sctp_v4_ecn_capable, .net_header_len = sizeof(struct iphdr), .sockaddr_len = sizeof(struct sockaddr_in), #ifdef CONFIG_COMPAT diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 81b6064..bbc7107 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2418,7 +2418,8 @@ static int sctp_process_param(struct sctp_association *asoc, break; case SCTP_PARAM_IPV6_ADDRESS: - asoc->peer.ipv6_address = 1; + if (PF_INET6 == asoc->base.sk->sk_family) + asoc->peer.ipv6_address = 1; break; case SCTP_PARAM_HOST_NAME_ADDRESS: @@ -2829,6 +2830,19 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, addr_param = (union sctp_addr_param *) ((void *)asconf_param + sizeof(sctp_addip_param_t)); + switch (addr_param->v4.param_hdr.type) { + case SCTP_PARAM_IPV6_ADDRESS: + if (!asoc->peer.ipv6_address) + return SCTP_ERROR_INV_PARAM; + break; + case SCTP_PARAM_IPV4_ADDRESS: + if (!asoc->peer.ipv4_address) + return SCTP_ERROR_INV_PARAM; + break; + default: + return SCTP_ERROR_INV_PARAM; + } + af = sctp_get_af_specific(param_type2af(addr_param->v4.param_hdr.type)); if (unlikely(!af)) return SCTP_ERROR_INV_PARAM; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 0c9d5a6..fcdb45d 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -5899,12 +5899,6 @@ static int sctp_eat_data(const struct sctp_association *asoc, return SCTP_IERROR_NO_DATA; } - /* If definately accepting the DATA chunk, record its TSN, otherwise - * wait for renege processing. - */ - if (SCTP_CMD_CHUNK_ULP == deliver) - sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); - chunk->data_accepted = 1; /* Note: Some chunks may get overcounted (if we drop) or overcounted @@ -5924,6 +5918,9 @@ static int sctp_eat_data(const struct sctp_association *asoc, * and discard the DATA chunk. */ if (ntohs(data_hdr->stream) >= asoc->c.sinit_max_instreams) { + /* Mark tsn as received even though we drop it */ + sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn)); + err = sctp_make_op_error(asoc, chunk, SCTP_ERROR_INV_STRM, &data_hdr->stream, sizeof(data_hdr->stream)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e7e3baf..0dbcde6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4401,7 +4401,9 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (copy_from_user(&getaddrs, optval, len)) return -EFAULT; - if (getaddrs.addr_num <= 0) return -EINVAL; + if (getaddrs.addr_num <= 0 || + getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr))) + return -EINVAL; /* * For UDP-style sockets, id specifies the association to query. * If the id field is set to the value '0' then the locally bound diff --git a/net/sctp/transport.c b/net/sctp/transport.c index f4938f6..3f34f61 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->rttvar = 0; peer->srtt = 0; peer->rto_pending = 0; + peer->fast_recovery = 0; peer->last_time_heard = jiffies; peer->last_time_used = jiffies; @@ -190,7 +191,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport) /* Start T3_rtx timer if it is not already running and update the heartbeat * timer. This routine is called every time a DATA chunk is sent. */ -void sctp_transport_reset_timers(struct sctp_transport *transport) +void sctp_transport_reset_timers(struct sctp_transport *transport, int force) { /* RFC 2960 6.3.2 Retransmission Timer Rules * @@ -200,7 +201,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport) * address. */ - if (!timer_pending(&transport->T3_rtx_timer)) + if (force || !timer_pending(&transport->T3_rtx_timer)) if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) sctp_transport_hold(transport); @@ -291,7 +292,7 @@ void sctp_transport_route(struct sctp_transport *transport, if (saddr) memcpy(&transport->saddr, saddr, sizeof(union sctp_addr)); else - af->get_saddr(asoc, dst, daddr, &transport->saddr); + af->get_saddr(opt, asoc, dst, daddr, &transport->saddr); transport->dst = dst; if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { @@ -403,11 +404,16 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, cwnd = transport->cwnd; flight_size = transport->flight_size; + /* See if we need to exit Fast Recovery first */ + if (transport->fast_recovery && + TSN_lte(transport->fast_recovery_exit, sack_ctsn)) + transport->fast_recovery = 0; + /* The appropriate cwnd increase algorithm is performed if, and only - * if the cumulative TSN has advanced and the congestion window is + * if the cumulative TSN whould advanced and the congestion window is * being fully utilized. */ - if ((transport->asoc->ctsn_ack_point >= sack_ctsn) || + if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) || (flight_size < cwnd)) return; @@ -416,17 +422,23 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, pmtu = transport->asoc->pathmtu; if (cwnd <= ssthresh) { - /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less - * than or equal to ssthresh an SCTP endpoint MUST use the - * slow start algorithm to increase cwnd only if the current - * congestion window is being fully utilized and an incoming - * SACK advances the Cumulative TSN Ack Point. Only when these - * two conditions are met can the cwnd be increased otherwise - * the cwnd MUST not be increased. If these conditions are met - * then cwnd MUST be increased by at most the lesser of - * 1) the total size of the previously outstanding DATA - * chunk(s) acknowledged, and 2) the destination's path MTU. + /* RFC 4960 7.2.1 + * o When cwnd is less than or equal to ssthresh, an SCTP + * endpoint MUST use the slow-start algorithm to increase + * cwnd only if the current congestion window is being fully + * utilized, an incoming SACK advances the Cumulative TSN + * Ack Point, and the data sender is not in Fast Recovery. + * Only when these three conditions are met can the cwnd be + * increased; otherwise, the cwnd MUST not be increased. + * If these conditions are met, then cwnd MUST be increased + * by, at most, the lesser of 1) the total size of the + * previously outstanding DATA chunk(s) acknowledged, and + * 2) the destination's path MTU. This upper bound protects + * against the ACK-Splitting attack outlined in [SAVAGE99]. */ + if (transport->fast_recovery) + return; + if (bytes_acked > pmtu) cwnd += pmtu; else @@ -502,6 +514,13 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * cwnd = ssthresh * partial_bytes_acked = 0 */ + if (transport->fast_recovery) + return; + + /* Mark Fast recovery */ + transport->fast_recovery = 1; + transport->fast_recovery_exit = transport->asoc->next_tsn - 1; + transport->ssthresh = max(transport->cwnd/2, 4*transport->asoc->pathmtu); transport->cwnd = transport->ssthresh; @@ -586,6 +605,7 @@ void sctp_transport_reset(struct sctp_transport *t) t->flight_size = 0; t->error_count = 0; t->rto_pending = 0; + t->fast_recovery = 0; /* Initialize the state information for SFR-CACC */ t->cacc.changeover_active = 0; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index ce6cda6..a1f654a 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -710,6 +710,11 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, if (!skb) goto fail; + /* Now that all memory allocations for this chunk succeeded, we + * can mark it as received so the tsn_map is updated correctly. + */ + sctp_tsnmap_mark(&asoc->peer.tsn_map, ntohl(chunk->subh.data_hdr->tsn)); + /* First calculate the padding, so we don't inadvertently * pass up the wrong length to the user. * diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index d927d9f..744b79f 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -17,8 +17,8 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#define RPC_ANONYMOUS_USERID ((uid_t)-2) -#define RPC_ANONYMOUS_GROUPID ((gid_t)-2) +#define RPC_MACHINE_CRED_USERID ((uid_t)0) +#define RPC_MACHINE_CRED_GROUPID ((gid_t)0) struct generic_cred { struct rpc_cred gc_base; @@ -44,8 +44,8 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred); struct rpc_cred *rpc_lookup_machine_cred(void) { struct auth_cred acred = { - .uid = RPC_ANONYMOUS_USERID, - .gid = RPC_ANONYMOUS_GROUPID, + .uid = RPC_MACHINE_CRED_USERID, + .gid = RPC_MACHINE_CRED_GROUPID, .machine_cred = 1, }; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 5905d567..81ae3d6 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1144,20 +1144,20 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) case RPC_GSS_SVC_NONE: break; case RPC_GSS_SVC_INTEGRITY: + /* placeholders for length and seq. number: */ + svc_putnl(resv, 0); + svc_putnl(resv, 0); if (unwrap_integ_data(&rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; + break; + case RPC_GSS_SVC_PRIVACY: /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); - break; - case RPC_GSS_SVC_PRIVACY: if (unwrap_priv_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; - /* placeholders for length and seq. number: */ - svc_putnl(resv, 0); - svc_putnl(resv, 0); break; default: goto auth_err; @@ -1170,8 +1170,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) goto out; } garbage_args: - /* Restore write pointer to its original value: */ - xdr_ressize_check(rqstp, reject_stat); ret = SVC_GARBAGE; goto out; auth_err: diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index d75530f..c996671 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -316,31 +316,28 @@ static int create_cache_proc_entries(struct cache_detail *cd) cd->proc_ent->owner = cd->owner; cd->channel_ent = cd->content_ent = NULL; - p = proc_create("flush", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &cache_flush_operations); + p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, + cd->proc_ent, &cache_flush_operations, cd); cd->flush_ent = p; if (p == NULL) goto out_nomem; p->owner = cd->owner; - p->data = cd; if (cd->cache_request || cd->cache_parse) { - p = proc_create("channel", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &cache_file_operations); + p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, + cd->proc_ent, &cache_file_operations, cd); cd->channel_ent = p; if (p == NULL) goto out_nomem; p->owner = cd->owner; - p->data = cd; } if (cd->cache_show) { - p = proc_create("content", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &content_file_operations); + p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, + cd->proc_ent, &content_file_operations, cd); cd->content_ent = p; if (p == NULL) goto out_nomem; p->owner = cd->owner; - p->data = cd; } return 0; out_nomem: diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 0517967..e6fb21b 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -243,10 +243,10 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) } EXPORT_SYMBOL_GPL(rpcb_getport_sync); -static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version) +static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc) { struct rpc_message msg = { - .rpc_proc = rpcb_next_version[version].rpc_proc, + .rpc_proc = proc, .rpc_argp = map, .rpc_resp = &map->r_port, }; @@ -271,6 +271,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi void rpcb_getport_async(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_procinfo *proc; u32 bind_version; struct rpc_xprt *xprt = task->tk_xprt; struct rpc_clnt *rpcb_clnt; @@ -280,7 +281,6 @@ void rpcb_getport_async(struct rpc_task *task) struct sockaddr *sap = (struct sockaddr *)&addr; size_t salen; int status; - struct rpcb_info *info; dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __func__, @@ -313,10 +313,12 @@ void rpcb_getport_async(struct rpc_task *task) /* Don't ever use rpcbind v2 for AF_INET6 requests */ switch (sap->sa_family) { case AF_INET: - info = rpcb_next_version; + proc = rpcb_next_version[xprt->bind_index].rpc_proc; + bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; break; case AF_INET6: - info = rpcb_next_version6; + proc = rpcb_next_version6[xprt->bind_index].rpc_proc; + bind_version = rpcb_next_version6[xprt->bind_index].rpc_vers; break; default: status = -EAFNOSUPPORT; @@ -324,14 +326,13 @@ void rpcb_getport_async(struct rpc_task *task) task->tk_pid, __func__); goto bailout_nofree; } - if (info[xprt->bind_index].rpc_proc == NULL) { + if (proc == NULL) { xprt->bind_index = 0; status = -EPFNOSUPPORT; dprintk("RPC: %5u %s: no more getport versions available\n", task->tk_pid, __func__); goto bailout_nofree; } - bind_version = info[xprt->bind_index].rpc_vers; dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __func__, bind_version); @@ -361,22 +362,20 @@ void rpcb_getport_async(struct rpc_task *task) map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ - child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index); + child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; + /* rpcb_map_release() has freed the arguments */ dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __func__); - goto bailout; + goto bailout_nofree; } rpc_put_task(child); task->tk_xprt->stat.bind_count++; return; -bailout: - kfree(map); - xprt_put(xprt); bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); bailout_nowake: diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index c6061a4..50b049c 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -224,16 +224,10 @@ EXPORT_SYMBOL_GPL(rpc_print_iostats); static inline struct proc_dir_entry * do_register(const char *name, void *data, const struct file_operations *fops) { - struct proc_dir_entry *ent; - rpc_proc_init(); dprintk("RPC: registering /proc/net/rpc/%s\n", name); - ent = proc_create(name, 0, proc_net_rpc, fops); - if (ent) { - ent->data = data; - } - return ent; + return proc_create_data(name, 0, proc_net_rpc, fops, data); } struct proc_dir_entry * diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index d74c2d2..01c7e31 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -18,7 +18,6 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/module.h> -#include <linux/sched.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/xdr.h> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index d8e8d79..e46c825 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -6,30 +6,9 @@ #include <linux/sched.h> #include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/net.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/udp.h> -#include <linux/tcp.h> -#include <linux/unistd.h> -#include <linux/slab.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/file.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <net/sock.h> -#include <net/checksum.h> -#include <net/ip.h> -#include <net/ipv6.h> -#include <net/tcp_states.h> -#include <linux/uaccess.h> -#include <asm/ioctls.h> - -#include <linux/sunrpc/types.h> -#include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/xdr.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svc_xprt.h> @@ -296,8 +275,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) if (!(xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) return; - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) - return; cpu = get_cpu(); pool = svc_pool_for_cpu(xprt->xpt_server, cpu); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 3f30ee6..f24800f 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -278,7 +278,7 @@ static int ip_map_show(struct seq_file *m, dom = im->m_client->h.name; if (ipv6_addr_v4mapped(&addr)) { - seq_printf(m, "%s" NIPQUAD_FMT "%s\n", + seq_printf(m, "%s " NIPQUAD_FMT " %s\n", im->m_class, ntohl(addr.s6_addr32[3]) >> 24 & 0xff, ntohl(addr.s6_addr32[3]) >> 16 & 0xff, @@ -286,7 +286,7 @@ static int ip_map_show(struct seq_file *m, ntohl(addr.s6_addr32[3]) >> 0 & 0xff, dom); } else { - seq_printf(m, "%s" NIP6_FMT "%s\n", + seq_printf(m, "%s " NIP6_FMT " %s\n", im->m_class, NIP6(addr), dom); } return 0; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index c22d6b6..06ab484 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -260,11 +260,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) * On our side, we need to read into a pagelist. The first page immediately * follows the RPC header. * - * This function returns 1 to indicate success. The data is not yet in + * This function returns: + * 0 - No error and no read-list found. + * + * 1 - Successful read-list processing. The data is not yet in * the pagelist and therefore the RPC request must be deferred. The * I/O completion will enqueue the transport again and * svc_rdma_recvfrom will complete the request. * + * <0 - Error processing/posting read-list. + * * NOTE: The ctxt must not be touched after the last WR has been posted * because the I/O completion processing may occur on another * processor and free / modify the context. Ne touche pas! @@ -284,7 +289,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, u64 sgl_offset; struct rpcrdma_read_chunk *ch; struct svc_rdma_op_ctxt *ctxt = NULL; - struct svc_rdma_op_ctxt *head; struct svc_rdma_op_ctxt *tmp_sge_ctxt; struct svc_rdma_op_ctxt *tmp_ch_ctxt; struct chunk_sge *ch_sge_ary; @@ -302,25 +306,19 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); + if (ch_count > RPCSVC_MAXPAGES) + return -EINVAL; sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, sge, ch_sge_ary, ch_count, byte_count); - head = svc_rdma_get_context(xprt); sgl_offset = 0; ch_no = 0; for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++, ch_no++) { next_sge: - if (!ctxt) - ctxt = head; - else { - ctxt->next = svc_rdma_get_context(xprt); - ctxt = ctxt->next; - } - ctxt->next = NULL; + ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_FROM_DEVICE; - clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); /* Prepare READ WR */ @@ -347,20 +345,15 @@ next_sge: * the client and the RPC needs to be enqueued. */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); - ctxt->next = hdr_ctxt; - hdr_ctxt->next = head; + ctxt->read_hdr = hdr_ctxt; } /* Post the read */ err = svc_rdma_send(xprt, &read_wr); if (err) { - printk(KERN_ERR "svcrdma: Error posting send = %d\n", + printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n", err); - /* - * Break the circular list so free knows when - * to stop if the error happened to occur on - * the last read - */ - ctxt->next = NULL; + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + svc_rdma_put_context(ctxt, 0); goto out; } atomic_inc(&rdma_stat_read); @@ -371,7 +364,7 @@ next_sge: goto next_sge; } sgl_offset = 0; - err = 0; + err = 1; } out: @@ -389,25 +382,12 @@ next_sge: while (rqstp->rq_resused) rqstp->rq_respages[--rqstp->rq_resused] = NULL; - if (err) { - printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err); - set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); - /* Free the linked list of read contexts */ - while (head != NULL) { - ctxt = head->next; - svc_rdma_put_context(head, 1); - head = ctxt; - } - return 0; - } - - return 1; + return err; } static int rdma_read_complete(struct svc_rqst *rqstp, - struct svc_rdma_op_ctxt *data) + struct svc_rdma_op_ctxt *head) { - struct svc_rdma_op_ctxt *head = data->next; int page_no; int ret; @@ -433,21 +413,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp, rqstp->rq_arg.len = head->arg.len; rqstp->rq_arg.buflen = head->arg.buflen; + /* Free the context */ + svc_rdma_put_context(head, 0); + /* XXX: What should this be? */ rqstp->rq_prot = IPPROTO_MAX; - - /* - * Free the contexts we used to build the RDMA_READ. We have - * to be careful here because the context list uses the same - * next pointer used to chain the contexts associated with the - * RDMA_READ - */ - data->next = NULL; /* terminate circular list */ - do { - data = head->next; - svc_rdma_put_context(head, 0); - head = data; - } while (head != NULL); + svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt); ret = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len @@ -457,8 +428,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp, ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, rqstp->rq_arg.head[0].iov_len); - /* Indicate that we've consumed an RQ credit */ - rqstp->rq_xprt_ctxt = rqstp->rq_xprt; svc_xprt_received(rqstp->rq_xprt); return ret; } @@ -480,13 +449,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) dprintk("svcrdma: rqstp=%p\n", rqstp); - /* - * The rq_xprt_ctxt indicates if we've consumed an RQ credit - * or not. It is used in the rdma xpo_release_rqst function to - * determine whether or not to return an RQ WQE to the RQ. - */ - rqstp->rq_xprt_ctxt = NULL; - spin_lock_bh(&rdma_xprt->sc_read_complete_lock); if (!list_empty(&rdma_xprt->sc_read_complete_q)) { ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, @@ -537,21 +499,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) /* If the request is invalid, reply with an error */ if (len < 0) { if (len == -ENOSYS) - (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); + svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); goto close_out; } - /* Read read-list data. If we would need to wait, defer - * it. Not that in this case, we don't return the RQ credit - * until after the read completes. - */ - if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) { + /* Read read-list data. */ + ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt); + if (ret > 0) { + /* read-list posted, defer until data received from client. */ svc_xprt_received(xprt); return 0; } - - /* Indicate we've consumed an RQ credit */ - rqstp->rq_xprt_ctxt = rqstp->rq_xprt; + if (ret < 0) { + /* Post of read-list failed, free context. */ + svc_rdma_put_context(ctxt, 1); + return 0; + } ret = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len @@ -569,11 +532,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) return ret; close_out: - if (ctxt) { + if (ctxt) svc_rdma_put_context(ctxt, 1); - /* Indicate we've consumed an RQ credit */ - rqstp->rq_xprt_ctxt = rqstp->rq_xprt; - } dprintk("svcrdma: transport %p is closing\n", xprt); /* * Set the close bit and enqueue it. svc_recv will see the diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 981f190..fb82b1b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -389,6 +389,17 @@ static int send_reply(struct svcxprt_rdma *rdma, int page_no; int ret; + /* Post a recv buffer to handle another request. */ + ret = svc_rdma_post_recv(rdma); + if (ret) { + printk(KERN_INFO + "svcrdma: could not post a receive buffer, err=%d." + "Closing transport %p.\n", ret, rdma); + set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); + svc_rdma_put_context(ctxt, 0); + return -ENOTCONN; + } + /* Prepare the context */ ctxt->pages[0] = page; ctxt->count = 1; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index af408fc..e132509 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -103,8 +103,8 @@ static int rdma_bump_context_cache(struct svcxprt_rdma *xprt) spin_lock_bh(&xprt->sc_ctxt_lock); if (ctxt) { at_least_one = 1; - ctxt->next = xprt->sc_ctxt_head; - xprt->sc_ctxt_head = ctxt; + INIT_LIST_HEAD(&ctxt->free_list); + list_add(&ctxt->free_list, &xprt->sc_ctxt_free); } else { /* kmalloc failed...give up for now */ xprt->sc_ctxt_cnt--; @@ -123,7 +123,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) while (1) { spin_lock_bh(&xprt->sc_ctxt_lock); - if (unlikely(xprt->sc_ctxt_head == NULL)) { + if (unlikely(list_empty(&xprt->sc_ctxt_free))) { /* Try to bump my cache. */ spin_unlock_bh(&xprt->sc_ctxt_lock); @@ -136,12 +136,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) schedule_timeout_uninterruptible(msecs_to_jiffies(500)); continue; } - ctxt = xprt->sc_ctxt_head; - xprt->sc_ctxt_head = ctxt->next; + ctxt = list_entry(xprt->sc_ctxt_free.next, + struct svc_rdma_op_ctxt, + free_list); + list_del_init(&ctxt->free_list); spin_unlock_bh(&xprt->sc_ctxt_lock); ctxt->xprt = xprt; INIT_LIST_HEAD(&ctxt->dto_q); ctxt->count = 0; + atomic_inc(&xprt->sc_ctxt_used); break; } return ctxt; @@ -159,14 +162,15 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) put_page(ctxt->pages[i]); for (i = 0; i < ctxt->count; i++) - dma_unmap_single(xprt->sc_cm_id->device->dma_device, - ctxt->sge[i].addr, - ctxt->sge[i].length, - ctxt->direction); + ib_dma_unmap_single(xprt->sc_cm_id->device, + ctxt->sge[i].addr, + ctxt->sge[i].length, + ctxt->direction); + spin_lock_bh(&xprt->sc_ctxt_lock); - ctxt->next = xprt->sc_ctxt_head; - xprt->sc_ctxt_head = ctxt; + list_add(&ctxt->free_list, &xprt->sc_ctxt_free); spin_unlock_bh(&xprt->sc_ctxt_lock); + atomic_dec(&xprt->sc_ctxt_used); } /* ib_cq event handler */ @@ -228,23 +232,8 @@ static void dto_tasklet_func(unsigned long data) list_del_init(&xprt->sc_dto_q); spin_unlock_irqrestore(&dto_lock, flags); - if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) { - ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); - rq_cq_reap(xprt); - set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); - /* - * If data arrived before established event, - * don't enqueue. This defers RPC I/O until the - * RDMA connection is complete. - */ - if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) - svc_xprt_enqueue(&xprt->sc_xprt); - } - - if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) { - ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); - sq_cq_reap(xprt); - } + rq_cq_reap(xprt); + sq_cq_reap(xprt); svc_xprt_put(&xprt->sc_xprt); spin_lock_irqsave(&dto_lock, flags); @@ -263,11 +252,15 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context) struct svcxprt_rdma *xprt = cq_context; unsigned long flags; + /* Guard against unconditional flush call for destroyed QP */ + if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0) + return; + /* * Set the bit regardless of whether or not it's on the list * because it may be on the list already due to an SQ * completion. - */ + */ set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags); /* @@ -290,6 +283,8 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context) * * Take all completing WC off the CQE and enqueue the associated DTO * context on the dto_q for the transport. + * + * Note that caller must hold a transport reference. */ static void rq_cq_reap(struct svcxprt_rdma *xprt) { @@ -297,29 +292,47 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) struct ib_wc wc; struct svc_rdma_op_ctxt *ctxt = NULL; + if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) + return; + + ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_rq_poll); - spin_lock_bh(&xprt->sc_rq_dto_lock); while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; ctxt->wc_status = wc.status; ctxt->byte_len = wc.byte_len; if (wc.status != IB_WC_SUCCESS) { /* Close the transport */ + dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 1); + svc_xprt_put(&xprt->sc_xprt); continue; } + spin_lock_bh(&xprt->sc_rq_dto_lock); list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); + spin_unlock_bh(&xprt->sc_rq_dto_lock); + svc_xprt_put(&xprt->sc_xprt); } - spin_unlock_bh(&xprt->sc_rq_dto_lock); if (ctxt) atomic_inc(&rdma_stat_rq_prod); + + set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); + /* + * If data arrived before established event, + * don't enqueue. This defers RPC I/O until the + * RDMA connection is complete. + */ + if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) + svc_xprt_enqueue(&xprt->sc_xprt); } /* * Send Queue Completion Handler - potentially called on interrupt context. + * + * Note that caller must hold a transport reference. */ static void sq_cq_reap(struct svcxprt_rdma *xprt) { @@ -328,6 +341,11 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) struct ib_cq *cq = xprt->sc_sq_cq; int ret; + + if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) + return; + + ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; @@ -349,14 +367,16 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) case IB_WR_RDMA_READ: if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { + struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; + BUG_ON(!read_hdr); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); - set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); spin_lock_bh(&xprt->sc_read_complete_lock); - list_add_tail(&ctxt->dto_q, + list_add_tail(&read_hdr->dto_q, &xprt->sc_read_complete_q); spin_unlock_bh(&xprt->sc_read_complete_lock); svc_xprt_enqueue(&xprt->sc_xprt); } + svc_rdma_put_context(ctxt, 0); break; default: @@ -365,6 +385,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) wc.opcode, wc.status); break; } + svc_xprt_put(&xprt->sc_xprt); } if (ctxt) @@ -376,11 +397,15 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context) struct svcxprt_rdma *xprt = cq_context; unsigned long flags; + /* Guard against unconditional flush call for destroyed QP */ + if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0) + return; + /* * Set the bit regardless of whether or not it's on the list * because it may be on the list already due to an RQ * completion. - */ + */ set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags); /* @@ -407,28 +432,29 @@ static void create_context_cache(struct svcxprt_rdma *xprt, xprt->sc_ctxt_max = ctxt_max; xprt->sc_ctxt_bump = ctxt_bump; xprt->sc_ctxt_cnt = 0; - xprt->sc_ctxt_head = NULL; + atomic_set(&xprt->sc_ctxt_used, 0); + + INIT_LIST_HEAD(&xprt->sc_ctxt_free); for (i = 0; i < ctxt_count; i++) { ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); if (ctxt) { - ctxt->next = xprt->sc_ctxt_head; - xprt->sc_ctxt_head = ctxt; + INIT_LIST_HEAD(&ctxt->free_list); + list_add(&ctxt->free_list, &xprt->sc_ctxt_free); xprt->sc_ctxt_cnt++; } } } -static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt) +static void destroy_context_cache(struct svcxprt_rdma *xprt) { - struct svc_rdma_op_ctxt *next; - if (!ctxt) - return; - - do { - next = ctxt->next; + while (!list_empty(&xprt->sc_ctxt_free)) { + struct svc_rdma_op_ctxt *ctxt; + ctxt = list_entry(xprt->sc_ctxt_free.next, + struct svc_rdma_op_ctxt, + free_list); + list_del_init(&ctxt->free_list); kfree(ctxt); - ctxt = next; - } while (next); + } } static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, @@ -465,7 +491,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, reqs + cma_xprt->sc_sq_depth + RPCRDMA_MAX_THREADS + 1); /* max */ - if (!cma_xprt->sc_ctxt_head) { + if (list_empty(&cma_xprt->sc_ctxt_free)) { kfree(cma_xprt); return NULL; } @@ -520,7 +546,12 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) recv_wr.num_sge = ctxt->count; recv_wr.wr_id = (u64)(unsigned long)ctxt; + svc_xprt_get(&xprt->sc_xprt); ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); + if (ret) { + svc_xprt_put(&xprt->sc_xprt); + svc_rdma_put_context(ctxt, 1); + } return ret; } @@ -539,6 +570,7 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id) { struct svcxprt_rdma *listen_xprt = new_cma_id->context; struct svcxprt_rdma *newxprt; + struct sockaddr *sa; /* Create a new transport */ newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); @@ -551,6 +583,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id) dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", newxprt, newxprt->sc_cm_id, listen_xprt); + /* Set the local and remote addresses in the transport */ + sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; + svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); + sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; + svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); + /* * Enqueue the new transport on the accept queue of the listening * transport @@ -627,6 +665,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id, if (xprt) { set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); + svc_xprt_put(xprt); } break; case RDMA_CM_EVENT_DEVICE_REMOVAL: @@ -661,31 +700,27 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, cma_xprt = rdma_create_xprt(serv, 1); if (!cma_xprt) - return ERR_PTR(ENOMEM); + return ERR_PTR(-ENOMEM); xprt = &cma_xprt->sc_xprt; listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); if (IS_ERR(listen_id)) { - svc_xprt_put(&cma_xprt->sc_xprt); - dprintk("svcrdma: rdma_create_id failed = %ld\n", - PTR_ERR(listen_id)); - return (void *)listen_id; + ret = PTR_ERR(listen_id); + dprintk("svcrdma: rdma_create_id failed = %d\n", ret); + goto err0; } + ret = rdma_bind_addr(listen_id, sa); if (ret) { - rdma_destroy_id(listen_id); - svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); - return ERR_PTR(ret); + goto err1; } cma_xprt->sc_cm_id = listen_id; ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); if (ret) { - rdma_destroy_id(listen_id); - svc_xprt_put(&cma_xprt->sc_xprt); dprintk("svcrdma: rdma_listen failed = %d\n", ret); - return ERR_PTR(ret); + goto err1; } /* @@ -696,6 +731,12 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); return &cma_xprt->sc_xprt; + + err1: + rdma_destroy_id(listen_id); + err0: + kfree(cma_xprt); + return ERR_PTR(ret); } /* @@ -716,7 +757,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; - struct sockaddr *sa; int ret; int i; @@ -826,7 +866,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; } - svc_xprt_get(&newxprt->sc_xprt); newxprt->sc_qp = newxprt->sc_cm_id->qp; /* Register all of physical memory */ @@ -850,6 +889,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) /* Swap out the handler */ newxprt->sc_cm_id->event_handler = rdma_cma_handler; + /* + * Arm the CQs for the SQ and RQ before accepting so we can't + * miss the first message + */ + ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP); + ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP); + /* Accept Connection */ set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); memset(&conn_param, 0, sizeof conn_param); @@ -886,58 +932,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_requests, newxprt->sc_ord); - /* Set the local and remote addresses in the transport */ - sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; - svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); - sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; - svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); - - ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP); - ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP); return &newxprt->sc_xprt; errout: dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); /* Take a reference in case the DTO handler runs */ svc_xprt_get(&newxprt->sc_xprt); - if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) { + if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) ib_destroy_qp(newxprt->sc_qp); - svc_xprt_put(&newxprt->sc_xprt); - } rdma_destroy_id(newxprt->sc_cm_id); /* This call to put will destroy the transport */ svc_xprt_put(&newxprt->sc_xprt); return NULL; } -/* - * Post an RQ WQE to the RQ when the rqst is being released. This - * effectively returns an RQ credit to the client. The rq_xprt_ctxt - * will be null if the request is deferred due to an RDMA_READ or the - * transport had no data ready (EAGAIN). Note that an RPC deferred in - * svc_process will still return the credit, this is because the data - * is copied and no longer consume a WQE/WC. - */ static void svc_rdma_release_rqst(struct svc_rqst *rqstp) { - int err; - struct svcxprt_rdma *rdma = - container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt); - if (rqstp->rq_xprt_ctxt) { - BUG_ON(rqstp->rq_xprt_ctxt != rdma); - err = svc_rdma_post_recv(rdma); - if (err) - dprintk("svcrdma: failed to post an RQ WQE error=%d\n", - err); - } - rqstp->rq_xprt_ctxt = NULL; } /* - * When connected, an svc_xprt has at least three references: - * - * - A reference held by the QP. We still hold that here because this - * code deletes the QP and puts the reference. + * When connected, an svc_xprt has at least two references: * * - A reference held by the cm_id between the ESTABLISHED and * DISCONNECTED events. If the remote peer disconnected first, this @@ -946,7 +960,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp) * - A reference held by the svc_recv code that called this function * as part of close processing. * - * At a minimum two references should still be held. + * At a minimum one references should still be held. */ static void svc_rdma_detach(struct svc_xprt *xprt) { @@ -956,23 +970,53 @@ static void svc_rdma_detach(struct svc_xprt *xprt) /* Disconnect and flush posted WQE */ rdma_disconnect(rdma->sc_cm_id); - - /* Destroy the QP if present (not a listener) */ - if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) { - ib_destroy_qp(rdma->sc_qp); - svc_xprt_put(xprt); - } - - /* Destroy the CM ID */ - rdma_destroy_id(rdma->sc_cm_id); } -static void svc_rdma_free(struct svc_xprt *xprt) +static void __svc_rdma_free(struct work_struct *work) { - struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; + struct svcxprt_rdma *rdma = + container_of(work, struct svcxprt_rdma, sc_work); dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); + /* We should only be called from kref_put */ - BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0); + BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0); + + /* + * Destroy queued, but not processed read completions. Note + * that this cleanup has to be done before destroying the + * cm_id because the device ptr is needed to unmap the dma in + * svc_rdma_put_context. + */ + spin_lock_bh(&rdma->sc_read_complete_lock); + while (!list_empty(&rdma->sc_read_complete_q)) { + struct svc_rdma_op_ctxt *ctxt; + ctxt = list_entry(rdma->sc_read_complete_q.next, + struct svc_rdma_op_ctxt, + dto_q); + list_del_init(&ctxt->dto_q); + svc_rdma_put_context(ctxt, 1); + } + spin_unlock_bh(&rdma->sc_read_complete_lock); + + /* Destroy queued, but not processed recv completions */ + spin_lock_bh(&rdma->sc_rq_dto_lock); + while (!list_empty(&rdma->sc_rq_dto_q)) { + struct svc_rdma_op_ctxt *ctxt; + ctxt = list_entry(rdma->sc_rq_dto_q.next, + struct svc_rdma_op_ctxt, + dto_q); + list_del_init(&ctxt->dto_q); + svc_rdma_put_context(ctxt, 1); + } + spin_unlock_bh(&rdma->sc_rq_dto_lock); + + /* Warn if we leaked a resource or under-referenced */ + WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); + + /* Destroy the QP if present (not a listener) */ + if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) + ib_destroy_qp(rdma->sc_qp); + if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) ib_destroy_cq(rdma->sc_sq_cq); @@ -985,10 +1029,21 @@ static void svc_rdma_free(struct svc_xprt *xprt) if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) ib_dealloc_pd(rdma->sc_pd); - destroy_context_cache(rdma->sc_ctxt_head); + /* Destroy the CM ID */ + rdma_destroy_id(rdma->sc_cm_id); + + destroy_context_cache(rdma); kfree(rdma); } +static void svc_rdma_free(struct svc_xprt *xprt) +{ + struct svcxprt_rdma *rdma = + container_of(xprt, struct svcxprt_rdma, sc_xprt); + INIT_WORK(&rdma->sc_work, __svc_rdma_free); + schedule_work(&rdma->sc_work); +} + static int svc_rdma_has_wspace(struct svc_xprt *xprt) { struct svcxprt_rdma *rdma = @@ -1018,7 +1073,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) int ret; if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) - return 0; + return -ENOTCONN; BUG_ON(wr->send_flags != IB_SEND_SIGNALED); BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != @@ -1029,7 +1084,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { spin_unlock_bh(&xprt->sc_lock); atomic_inc(&rdma_stat_sq_starve); - /* See if we can reap some SQ WR */ + + /* See if we can opportunistically reap SQ WR to make room */ sq_cq_reap(xprt); /* Wait until SQ WR available if SQ still full */ @@ -1041,22 +1097,25 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) continue; } /* Bumped used SQ WR count and post */ + svc_xprt_get(&xprt->sc_xprt); ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); if (!ret) atomic_inc(&xprt->sc_sq_count); - else + else { + svc_xprt_put(&xprt->sc_xprt); dprintk("svcrdma: failed to post SQ WR rc=%d, " "sc_sq_count=%d, sc_sq_depth=%d\n", ret, atomic_read(&xprt->sc_sq_count), xprt->sc_sq_depth); + } spin_unlock_bh(&xprt->sc_lock); break; } return ret; } -int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, - enum rpcrdma_errcode err) +void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, + enum rpcrdma_errcode err) { struct ib_send_wr err_wr; struct ib_sge sge; @@ -1094,9 +1153,8 @@ int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, /* Post It */ ret = svc_rdma_send(xprt, &err_wr); if (ret) { - dprintk("svcrdma: Error posting send = %d\n", ret); + dprintk("svcrdma: Error %d posting send for protocol error\n", + ret); svc_rdma_put_context(ctxt, 1); } - - return ret; } diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 665e856..b4f0525 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -82,6 +82,6 @@ EXPORT_SYMBOL_GPL(register_net_sysctl_table); void unregister_net_sysctl_table(struct ctl_table_header *header) { - return unregister_sysctl_table(header); + unregister_sysctl_table(header); } EXPORT_SYMBOL_GPL(unregister_net_sysctl_table); diff --git a/net/tipc/core.h b/net/tipc/core.h index 325404f..5a0e487 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -279,15 +279,14 @@ static inline void k_term_timer(struct timer_list *timer) /* * TIPC message buffer code * - * TIPC message buffer headroom reserves space for a link-level header - * (in case the message is sent off-node), - * while ensuring TIPC header is word aligned for quicker access + * TIPC message buffer headroom reserves space for the worst-case + * link-level device header (in case the message is sent off-node). * - * The largest header currently supported is 18 bytes, which is used when - * the standard 14 byte Ethernet header has 4 added bytes for VLAN info + * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields + * are word aligned for quicker access */ -#define BUF_HEADROOM 20u +#define BUF_HEADROOM LL_MAX_HEADER struct tipc_skb_cb { void *handle; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e18cd36..783317d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -169,6 +169,11 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk) return (unix_peer(osk) == NULL || unix_our_peer(sk, osk)); } +static inline int unix_recvq_full(struct sock const *sk) +{ + return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; +} + static struct sock *unix_peer_get(struct sock *s) { struct sock *peer; @@ -482,6 +487,8 @@ static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int); static int unix_getname(struct socket *, struct sockaddr *, int *, int); static unsigned int unix_poll(struct file *, struct socket *, poll_table *); +static unsigned int unix_dgram_poll(struct file *, struct socket *, + poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct kiocb *, struct socket *, @@ -527,7 +534,7 @@ static const struct proto_ops unix_dgram_ops = { .socketpair = unix_socketpair, .accept = sock_no_accept, .getname = unix_getname, - .poll = datagram_poll, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = sock_no_listen, .shutdown = unix_shutdown, @@ -548,7 +555,7 @@ static const struct proto_ops unix_seqpacket_ops = { .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, - .poll = datagram_poll, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, @@ -983,8 +990,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo) sched = !sock_flag(other, SOCK_DEAD) && !(other->sk_shutdown & RCV_SHUTDOWN) && - (skb_queue_len(&other->sk_receive_queue) > - other->sk_max_ack_backlog); + unix_recvq_full(other); unix_state_unlock(other); @@ -1058,8 +1064,7 @@ restart: if (other->sk_state != TCP_LISTEN) goto out_unlock; - if (skb_queue_len(&other->sk_receive_queue) > - other->sk_max_ack_backlog) { + if (unix_recvq_full(other)) { err = -EAGAIN; if (!timeo) goto out_unlock; @@ -1428,9 +1433,7 @@ restart: goto out_unlock; } - if (unix_peer(other) != sk && - (skb_queue_len(&other->sk_receive_queue) > - other->sk_max_ack_backlog)) { + if (unix_peer(other) != sk && unix_recvq_full(other)) { if (!timeo) { err = -EAGAIN; goto out_unlock; @@ -1991,6 +1994,60 @@ static unsigned int unix_poll(struct file * file, struct socket *sock, poll_tabl return mask; } +static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk, *other; + unsigned int mask, writable; + + poll_wait(file, sk->sk_sleep, wait); + mask = 0; + + /* exceptional events? */ + if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + mask |= POLLERR; + if (sk->sk_shutdown & RCV_SHUTDOWN) + mask |= POLLRDHUP; + if (sk->sk_shutdown == SHUTDOWN_MASK) + mask |= POLLHUP; + + /* readable? */ + if (!skb_queue_empty(&sk->sk_receive_queue) || + (sk->sk_shutdown & RCV_SHUTDOWN)) + mask |= POLLIN | POLLRDNORM; + + /* Connection-based need to check for termination and startup */ + if (sk->sk_type == SOCK_SEQPACKET) { + if (sk->sk_state == TCP_CLOSE) + mask |= POLLHUP; + /* connection hasn't started yet? */ + if (sk->sk_state == TCP_SYN_SENT) + return mask; + } + + /* writable? */ + writable = unix_writable(sk); + if (writable) { + other = unix_peer_get(sk); + if (other) { + if (unix_peer(other) != sk) { + poll_wait(file, &unix_sk(other)->peer_wait, + wait); + if (unix_recvq_full(other)) + writable = 0; + } + + sock_put(other); + } + } + + if (writable) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + else + set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + + return mask; +} #ifdef CONFIG_PROC_FS static struct sock *first_unix_socket(int *i) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2bdd4dd..fb75f26 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -187,7 +187,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) @@ -273,7 +274,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb) @@ -928,7 +930,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_station(struct sk_buff *skb, @@ -1267,7 +1270,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_mpath(struct sk_buff *skb, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 185488d..855bff4 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -80,6 +80,23 @@ static const struct ieee80211_channel_range ieee80211_JP_channels[] = { IEEE80211_CHAN_RADAR), }; +static const struct ieee80211_channel_range ieee80211_EU_channels[] = { + /* IEEE 802.11b/g, channels 1..13 */ + RANGE_PWR(2412, 2472, 20, 6, 0), + /* IEEE 802.11a, channel 36*/ + RANGE_PWR(5180, 5180, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), + /* IEEE 802.11a, channel 40*/ + RANGE_PWR(5200, 5200, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), + /* IEEE 802.11a, channel 44*/ + RANGE_PWR(5220, 5220, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), + /* IEEE 802.11a, channels 48..64 */ + RANGE_PWR(5240, 5320, 23, 6, IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_RADAR), + /* IEEE 802.11a, channels 100..140 */ + RANGE_PWR(5500, 5700, 30, 6, IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_RADAR), +}; + #define REGDOM(_code) \ { \ .code = __stringify(_code), \ @@ -90,6 +107,7 @@ static const struct ieee80211_channel_range ieee80211_JP_channels[] = { static const struct ieee80211_regdomain ieee80211_regdoms[] = { REGDOM(US), REGDOM(JP), + REGDOM(EU), }; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index ac765dd..23a2cc04 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -200,8 +200,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "hmac(ripemd160)", - .compat = "ripemd160", + .name = "hmac(rmd160)", + .compat = "rmd160", .uinfo = { .auth = { diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 09cd9c0..3f964db 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -25,11 +25,11 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) struct dst_entry *dst = skb->dst; int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) - skb_headroom(skb); + int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); - if (nhead > 0) - return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); + if (nhead > 0 || ntail > 0) + return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); - /* Check tail too... */ return 0; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a1b0fbe..04c4150 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -50,19 +50,8 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) switch (type) { case XFRMA_ALG_AUTH: - if (!algp->alg_key_len && - strcmp(algp->alg_name, "digest_null") != 0) - return -EINVAL; - break; - case XFRMA_ALG_CRYPT: - if (!algp->alg_key_len && - strcmp(algp->alg_name, "cipher_null") != 0) - return -EINVAL; - break; - case XFRMA_ALG_COMP: - /* Zero length keys are legal. */ break; default: @@ -288,9 +277,8 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; - if (!x->sel.family) + if (!x->sel.family && !(p->flags & XFRM_STATE_AF_UNSPEC)) x->sel.family = p->family; - } /* |