From 38516ab59fbc5b3bb278cf5e1fe2867c70cff32e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 20 Apr 2010 17:04:50 -0400 Subject: tracing: Let tracepoints have data passed to tracepoint callbacks This patch adds data to be passed to tracepoint callbacks. The created functions from DECLARE_TRACE() now need a mandatory data parameter. For example: DECLARE_TRACE(mytracepoint, int value, value) Will create the register function: int register_trace_mytracepoint((void(*)(void *data, int value))probe, void *data); As the first argument, all callbacks (probes) must take a (void *data) parameter. So a callback for the above tracepoint will look like: void myprobe(void *data, int value) { } The callback may choose to ignore the data parameter. This change allows callbacks to register a private data pointer along with the function probe. void mycallback(void *data, int value); register_trace_mytracepoint(mycallback, mydata); Then the mycallback() will receive the "mydata" as the first parameter before the args. A more detailed example: DECLARE_TRACE(mytracepoint, TP_PROTO(int status), TP_ARGS(status)); /* In the C file */ DEFINE_TRACE(mytracepoint, TP_PROTO(int status), TP_ARGS(status)); [...] trace_mytracepoint(status); /* In a file registering this tracepoint */ int my_callback(void *data, int status) { struct my_struct my_data = data; [...] } [...] my_data = kmalloc(sizeof(*my_data), GFP_KERNEL); init_my_data(my_data); register_trace_mytracepoint(my_callback, my_data); The same callback can also be registered to the same tracepoint as long as the data registered is different. Note, the data must also be used to unregister the callback: unregister_trace_mytracepoint(my_callback, my_data); Because of the data parameter, tracepoints declared this way can not have no args. That is: DECLARE_TRACE(mytracepoint, TP_PROTO(void), TP_ARGS()); will cause an error. If no arguments are needed, a new macro can be used instead: DECLARE_TRACE_NOARGS(mytracepoint); Since there are no arguments, the proto and args fields are left out. This is part of a series to make the tracepoint footprint smaller: text data bss dec hex filename 4913961 1088356 861512 6863829 68bbd5 vmlinux.orig 4914025 1088868 861512 6864405 68be15 vmlinux.class 4918492 1084612 861512 6864616 68bee8 vmlinux.tracepoint Again, this patch also increases the size of the kernel, but lays the ground work for decreasing it. v5: Fixed net/core/drop_monitor.c to handle these updates. v4: Moved the DECLARE_TRACE() DECLARE_TRACE_NOARGS out of the #ifdef CONFIG_TRACE_POINTS, since the two are the same in both cases. The __DECLARE_TRACE() is what changes. Thanks to Frederic Weisbecker for pointing this out. v3: Made all register_* functions require data to be passed and all callbacks to take a void * parameter as its first argument. This makes the calling functions comply with C standards. Also added more comments to the modifications of DECLARE_TRACE(). v2: Made the DECLARE_TRACE() have the ability to pass arguments and added a new DECLARE_TRACE_NOARGS() for tracepoints that do not need any arguments. Acked-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Cc: Neil Horman Cc: David S. Miller Signed-off-by: Steven Rostedt --- net/core/drop_monitor.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/core') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index cf208d8..ad41529 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -172,12 +172,12 @@ out: return; } -static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) { trace_drop_common(skb, location); } -static void trace_napi_poll_hit(struct napi_struct *napi) +static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi) { struct dm_hw_stat_delta *new_stat; @@ -225,12 +225,12 @@ static int set_all_monitor_traces(int state) switch (state) { case TRACE_ON: - rc |= register_trace_kfree_skb(trace_kfree_skb_hit); - rc |= register_trace_napi_poll(trace_napi_poll_hit); + rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL); + rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL); break; case TRACE_OFF: - rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); - rc |= unregister_trace_napi_poll(trace_napi_poll_hit); + rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL); + rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL); tracepoint_synchronize_unregister(); -- cgit v1.1 From 608b4b9548dedf4185ca47edcaae4bff2ceb62de Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 4 May 2010 17:36:45 -0700 Subject: netns: Teach network device kobjects which namespace they are in. The problem. Network devices show up in sysfs and with the network namespace active multiple devices with the same name can show up in the same directory, ouch! To avoid that problem and allow existing applications in network namespaces to see the same interface that is currently presented in sysfs, this patch enables the tagging directory support in sysfs. By using the network namespace pointers as tags to separate out the the sysfs directory entries we ensure that we don't have conflicts in the directories and applications only see a limited set of the network devices. Signed-off-by: Eric W. Biederman Acked-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/net-sysfs.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'net/core') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c57c4b2..b388cda 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -14,7 +14,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -766,6 +768,38 @@ static void rx_queue_remove_kobjects(struct net_device *net) kset_unregister(net->queues_kset); } #endif /* CONFIG_RPS */ + +static const void *net_current_ns(void) +{ + return current->nsproxy->net_ns; +} + +static const void *net_initial_ns(void) +{ + return &init_net; +} + +static const void *net_netlink_ns(struct sock *sk) +{ + return sock_net(sk); +} + +static struct kobj_ns_type_operations net_ns_type_operations = { + .type = KOBJ_NS_TYPE_NET, + .current_ns = net_current_ns, + .netlink_ns = net_netlink_ns, + .initial_ns = net_initial_ns, +}; + +static void net_kobj_ns_exit(struct net *net) +{ + kobj_ns_exit(KOBJ_NS_TYPE_NET, net); +} + +static struct pernet_operations sysfs_net_ops = { + .exit = net_kobj_ns_exit, +}; + #endif /* CONFIG_SYSFS */ #ifdef CONFIG_HOTPLUG @@ -806,6 +840,13 @@ static void netdev_release(struct device *d) kfree((char *)dev - dev->padded); } +static const void *net_namespace(struct device *d) +{ + struct net_device *dev; + dev = container_of(d, struct net_device, dev); + return dev_net(dev); +} + static struct class net_class = { .name = "net", .dev_release = netdev_release, @@ -815,6 +856,8 @@ static struct class net_class = { #ifdef CONFIG_HOTPLUG .dev_uevent = netdev_uevent, #endif + .ns_type = &net_ns_type_operations, + .namespace = net_namespace, }; /* Delete sysfs entries but hold kobject reference until after all @@ -904,5 +947,9 @@ void netdev_initialize_kobject(struct net_device *net) int netdev_kobject_init(void) { + kobj_ns_type_register(&net_ns_type_operations); +#ifdef CONFIG_SYSFS + register_pernet_subsys(&sysfs_net_ops); +#endif return class_register(&net_class); } -- cgit v1.1 From d6523ddf2376f39eaa89a4d68a33052d20c138b9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 16 May 2010 21:59:45 -0700 Subject: net/sysfs: Fix the bitrot in network device kobject namespace support I had a couple of stupid bugs in: netns: Teach network device kobjects which namespace they are in. - I duplicated the Kconfig for the NET_NS - The build was broken when sysfs was not compiled in The sysfs breakage is because after I moved the operations for the sysfs to the kobject layer, to make things cleaner I forgot to move the ifdefs. Opps. I'm not quite certain how I got introduced a second NET_NS Kconfig, but it was probably a 3 way merge somewhere along the way that did not notice that the NET_NS Kconfig option had mvoed and thout that was a bug. It probably slipped in because it used to be the sysfs patches were the first patches in my network namespace patches. Some things just don't go like you would expect. Neither of these bugs actually affect anything in the common case but they should be fixed. Thanks to Serge for noticing they were present. Reported-by: Serge E. Hallyn Signed-off-by: Eric W. Biederman Acked-by: David S. Miller --- net/core/net-sysfs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b388cda..6881e65 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -469,6 +469,7 @@ static struct attribute_group wireless_group = { .attrs = wireless_attrs, }; #endif +#endif /* CONFIG_SYSFS */ #ifdef CONFIG_RPS /* @@ -796,11 +797,10 @@ static void net_kobj_ns_exit(struct net *net) kobj_ns_exit(KOBJ_NS_TYPE_NET, net); } -static struct pernet_operations sysfs_net_ops = { +static struct pernet_operations kobj_net_ops = { .exit = net_kobj_ns_exit, }; -#endif /* CONFIG_SYSFS */ #ifdef CONFIG_HOTPLUG static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) @@ -948,8 +948,6 @@ void netdev_initialize_kobject(struct net_device *net) int netdev_kobject_init(void) { kobj_ns_type_register(&net_ns_type_operations); -#ifdef CONFIG_SYSFS - register_pernet_subsys(&sysfs_net_ops); -#endif + register_pernet_subsys(&kobj_net_ops); return class_register(&net_class); } -- cgit v1.1 From a1b3f594dc5faab91d3a218c7019e9b5edd9fe1a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 4 May 2010 17:36:49 -0700 Subject: net: Expose all network devices in a namespaces in sysfs This reverts commit aaf8cdc34ddba08122f02217d9d684e2f9f5d575. Drivers like the ipw2100 call device_create_group when they are initialized and device_remove_group when they are shutdown. Moving them between namespaces deletes their sysfs groups early. In particular the following call chain results. netdev_unregister_kobject -> device_del -> kobject_del -> sysfs_remove_dir With sysfs_remove_dir recursively deleting all of it's subdirectories, and nothing adding them back. Ouch! Therefore we need to call something that ultimate calls sysfs_mv_dir as that sysfs function can move sysfs directories between namespaces without deleting their subdirectories or their contents. Allowing us to avoid placing extra boiler plate into every driver that does something interesting with sysfs. Currently the function that provides that capability is device_rename. That is the code works without nasty side effects as originally written. So remove the misguided fix for moving devices between namespaces. The bug in the kobject layer that inspired it has now been recognized and fixed. Signed-off-by: Eric W. Biederman Acked-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 28 +++++----------------------- net/core/net-sysfs.c | 16 +--------------- net/core/net-sysfs.h | 1 - 3 files changed, 6 insertions(+), 39 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 6c82065..d273e4e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1002,15 +1002,10 @@ int dev_change_name(struct net_device *dev, const char *newname) return err; rollback: - /* For now only devices in the initial network namespace - * are in sysfs. - */ - if (net_eq(net, &init_net)) { - ret = device_rename(&dev->dev, dev->name); - if (ret) { - memcpy(dev->name, oldname, IFNAMSIZ); - return ret; - } + ret = device_rename(&dev->dev, dev->name); + if (ret) { + memcpy(dev->name, oldname, IFNAMSIZ); + return ret; } write_lock_bh(&dev_base_lock); @@ -4994,8 +4989,6 @@ int register_netdevice(struct net_device *dev) if (dev->features & NETIF_F_SG) dev->features |= NETIF_F_GSO; - netdev_initialize_kobject(dev); - ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = notifier_to_errno(ret); if (ret) @@ -5547,15 +5540,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char if (dev->features & NETIF_F_NETNS_LOCAL) goto out; -#ifdef CONFIG_SYSFS - /* Don't allow real devices to be moved when sysfs - * is enabled. - */ - err = -EINVAL; - if (dev->dev.parent) - goto out; -#endif - /* Ensure the device has been registrered */ err = -EINVAL; if (dev->reg_state != NETREG_REGISTERED) @@ -5606,8 +5590,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_uc_flush(dev); dev_mc_flush(dev); - netdev_unregister_kobject(dev); - /* Actually switch the network namespace */ dev_net_set(dev, net); @@ -5620,7 +5602,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char } /* Fixup kobjects */ - err = netdev_register_kobject(dev); + err = device_rename(&dev->dev, dev->name); WARN_ON(err); /* Add the device back in the hashes */ diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 6881e65..99e7052 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -808,9 +808,6 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) struct net_device *dev = to_net_dev(d); int retval; - if (!net_eq(dev_net(dev), &init_net)) - return 0; - /* pass interface to uevent. */ retval = add_uevent_var(env, "INTERFACE=%s", dev->name); if (retval) @@ -869,9 +866,6 @@ void netdev_unregister_kobject(struct net_device * net) kobject_get(&dev->kobj); - if (!net_eq(dev_net(net), &init_net)) - return; - #ifdef CONFIG_RPS rx_queue_remove_kobjects(net); #endif @@ -886,6 +880,7 @@ int netdev_register_kobject(struct net_device *net) const struct attribute_group **groups = net->sysfs_groups; int error = 0; + device_initialize(dev); dev->class = &net_class; dev->platform_data = net; dev->groups = groups; @@ -908,9 +903,6 @@ int netdev_register_kobject(struct net_device *net) #endif #endif /* CONFIG_SYSFS */ - if (!net_eq(dev_net(net), &init_net)) - return 0; - error = device_add(dev); if (error) return error; @@ -939,12 +931,6 @@ void netdev_class_remove_file(struct class_attribute *class_attr) EXPORT_SYMBOL(netdev_class_create_file); EXPORT_SYMBOL(netdev_class_remove_file); -void netdev_initialize_kobject(struct net_device *net) -{ - struct device *device = &(net->dev); - device_initialize(device); -} - int netdev_kobject_init(void) { kobj_ns_type_register(&net_ns_type_operations); diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 14e7524..805555e 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -4,5 +4,4 @@ int netdev_kobject_init(void); int netdev_register_kobject(struct net_device *); void netdev_unregister_kobject(struct net_device *); -void netdev_initialize_kobject(struct net_device *); #endif -- cgit v1.1 From 35f3d14dbbc58447c61e38a162ea10add6b31dc7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 20 May 2010 10:43:18 +0200 Subject: pipe: add support for shrinking and growing pipes This patch adds F_GETPIPE_SZ and F_SETPIPE_SZ fcntl() actions for growing and shrinking the size of a pipe and adjusts pipe.c and splice.c (and relay and network splice) usage to work with these larger (or smaller) pipes. Signed-off-by: Jens Axboe --- net/core/skbuff.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 93c4e06..9319817 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1417,12 +1417,13 @@ new_page: /* * Fill page/offset/length into spd, if it can hold more pages. */ -static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, +static inline int spd_fill_page(struct splice_pipe_desc *spd, + struct pipe_inode_info *pipe, struct page *page, unsigned int *len, unsigned int offset, struct sk_buff *skb, int linear, struct sock *sk) { - if (unlikely(spd->nr_pages == PIPE_BUFFERS)) + if (unlikely(spd->nr_pages == pipe->buffers)) return 1; if (linear) { @@ -1458,7 +1459,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct sk_buff *skb, struct splice_pipe_desc *spd, int linear, - struct sock *sk) + struct sock *sk, + struct pipe_inode_info *pipe) { if (!*len) return 1; @@ -1481,7 +1483,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff, /* the linear region may spread across several pages */ flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk)) + if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk)) return 1; __segment_seek(&page, &poff, &plen, flen); @@ -1496,9 +1498,9 @@ static inline int __splice_segment(struct page *page, unsigned int poff, * Map linear and fragment data from the skb to spd. It reports failure if the * pipe is full or if we already spliced the requested length. */ -static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, - unsigned int *len, struct splice_pipe_desc *spd, - struct sock *sk) +static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, + unsigned int *offset, unsigned int *len, + struct splice_pipe_desc *spd, struct sock *sk) { int seg; @@ -1508,7 +1510,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd, 1, sk)) + offset, len, skb, spd, 1, sk, pipe)) return 1; /* @@ -1518,7 +1520,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(f->page, f->page_offset, f->size, - offset, len, skb, spd, 0, sk)) + offset, len, skb, spd, 0, sk, pipe)) return 1; } @@ -1535,8 +1537,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct pipe_inode_info *pipe, unsigned int tlen, unsigned int flags) { - struct partial_page partial[PIPE_BUFFERS]; - struct page *pages[PIPE_BUFFERS]; + struct partial_page partial[PIPE_DEF_BUFFERS]; + struct page *pages[PIPE_DEF_BUFFERS]; struct splice_pipe_desc spd = { .pages = pages, .partial = partial, @@ -1546,12 +1548,16 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, }; struct sk_buff *frag_iter; struct sock *sk = skb->sk; + int ret = 0; + + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; /* * __skb_splice_bits() only fails if the output has no room left, * so no point in going over the frag_list for the error case. */ - if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) + if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk)) goto done; else if (!tlen) goto done; @@ -1562,14 +1568,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, skb_walk_frags(skb, frag_iter) { if (!tlen) break; - if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk)) + if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk)) break; } done: if (spd.nr_pages) { - int ret; - /* * Drop the socket lock, otherwise we have reverse * locking dependencies between sk_lock and i_mutex @@ -1582,10 +1586,10 @@ done: release_sock(sk); ret = splice_to_pipe(pipe, &spd); lock_sock(sk); - return ret; } - return 0; + splice_shrink_spd(pipe, &spd); + return ret; } /** -- cgit v1.1