From d0deef5b14af7d5bbd0003a0a2a1a32326e20a6d Mon Sep 17 00:00:00 2001 From: Shawn Du Date: Tue, 14 Apr 2009 13:58:56 +0800 Subject: blktrace: support per-partition tracing Though one can specify '-d /dev/sda1' when using blktrace, it still traces the whole sda. To support per-partition tracing, when we start tracing, we initialize bt->start_lba and bt->end_lba to the start and end sector of that partition. Note some actions are per device, thus we don't filter 0-sector events. The original patch and discussion can be found here: http://marc.info/?l=linux-btrace&m=122949374214540&w=2 Signed-off-by: Shawn Du Signed-off-by: Li Zefan Acked-by: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Cc: Jens Axboe LKML-Reference: <49E42620.4050701@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- block/compat_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index f87615d..f8c218c 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -568,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg) memcpy(&buts.name, &cbuts.name, 32); mutex_lock(&bdev->bd_mutex); - ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts); + ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts); mutex_unlock(&bdev->bd_mutex); if (ret) return ret; -- cgit v1.1 From 1d54ad6da9192fed5dd3b60224d9f2dfea0dcd82 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 14 Apr 2009 14:00:05 +0800 Subject: blktrace: add trace/ to /sys/block/sda Impact: allow ftrace-plugin blktrace to trace device-mapper devices To trace a single partition: # echo 1 > /sys/block/sda/sda1/enable To trace the whole sda instead: # echo 1 > /sys/block/sda/enable Thus we also fix an issue reported by Ted, that ftrace-plugin blktrace can't be used to trace device-mapper devices. Now: # echo 1 > /sys/block/dm-0/trace/enable echo: write error: No such device or address # mount -t ext4 /dev/dm-0 /mnt # echo 1 > /sys/block/dm-0/trace/enable # echo blk > /debug/tracing/current_tracer Reported-by: Theodore Tso Signed-off-by: Li Zefan Acked-by: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Cc: Shawn Du Cc: Jens Axboe LKML-Reference: <49E42665.6020506@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- block/blk-sysfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 73f36be..8653d71 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -387,16 +387,21 @@ struct kobj_type blk_queue_ktype = { int blk_register_queue(struct gendisk *disk) { int ret; + struct device *dev = disk_to_dev(disk); struct request_queue *q = disk->queue; if (WARN_ON(!q)) return -ENXIO; + ret = blk_trace_init_sysfs(dev); + if (ret) + return ret; + if (!q->request_fn) return 0; - ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), + ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); if (ret < 0) return ret; -- cgit v1.1 From 22a7c31a9659deaddafbbcec6562d44141e84474 Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Mon, 4 May 2009 16:35:08 -0400 Subject: blktrace: from-sector redundant in trace_block_remap Remove redundant from-sector parameter: it's /always/ the bio's sector passed in. [ Impact: cleanup ] Signed-off-by: Alan D. Brunelle Reviewed-by: Li Zefan Reviewed-by: KOSAKI Motohiro Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo LKML-Reference: <49FF517C.7000503@hp.com> Signed-off-by: Ingo Molnar --- block/blk-core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 07ab754..a5f747a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1275,7 +1275,7 @@ static inline void blk_partition_remap(struct bio *bio) bio->bi_bdev = bdev->bd_contains; trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, - bdev->bd_dev, bio->bi_sector, + bdev->bd_dev, bio->bi_sector - p->start_sect); } } @@ -1444,8 +1444,7 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; if (old_sector != -1) - trace_block_remap(q, bio, old_dev, bio->bi_sector, - old_sector); + trace_block_remap(q, bio, old_dev, old_sector); trace_block_bio_queue(q, bio); -- cgit v1.1 From 55782138e47d9baf2f7d3a7af9e7cf42adf72c56 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 9 Jun 2009 13:43:05 +0800 Subject: tracing/events: convert block trace points to TRACE_EVENT() TRACE_EVENT is a more generic way to define tracepoints. Doing so adds these new capabilities to this tracepoint: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions ... Cons: - no dev_t info for the output of plug, unplug_timer and unplug_io events. no dev_t info for getrq and sleeprq events if bio == NULL. no dev_t info for rq_abort,...,rq_requeue events if rq->rq_disk == NULL. This is mainly because we can't get the deivce from a request queue. But this may change in the future. - A packet command is converted to a string in TP_assign, not TP_print. While blktrace do the convertion just before output. Since pc requests should be rather rare, this is not a big issue. - In blktrace, an event can have 2 different print formats, but a TRACE_EVENT has a unique format, which means we have some unused data in a trace entry. The overhead is minimized by using __dynamic_array() instead of __array(). I've benchmarked the ioctl blktrace vs the splice based TRACE_EVENT tracing: dd dd + ioctl blktrace dd + TRACE_EVENT (splice) 1 7.36s, 42.7 MB/s 7.50s, 42.0 MB/s 7.41s, 42.5 MB/s 2 7.43s, 42.3 MB/s 7.48s, 42.1 MB/s 7.43s, 42.4 MB/s 3 7.38s, 42.6 MB/s 7.45s, 42.2 MB/s 7.41s, 42.5 MB/s So the overhead of tracing is very small, and no regression when using those trace events vs blktrace. And the binary output of TRACE_EVENT is much smaller than blktrace: # ls -l -h -rw-r--r-- 1 root root 8.8M 06-09 13:24 sda.blktrace.0 -rw-r--r-- 1 root root 195K 06-09 13:24 sda.blktrace.1 -rw-r--r-- 1 root root 2.7M 06-09 13:25 trace_splice.out Following are some comparisons between TRACE_EVENT and blktrace: plug: kjournald-480 [000] 303.084981: block_plug: [kjournald] kjournald-480 [000] 303.084981: 8,0 P N [kjournald] unplug_io: kblockd/0-118 [000] 300.052973: block_unplug_io: [kblockd/0] 1 kblockd/0-118 [000] 300.052974: 8,0 U N [kblockd/0] 1 remap: kjournald-480 [000] 303.085042: block_remap: 8,0 W 102736992 + 8 <- (8,8) 33384 kjournald-480 [000] 303.085043: 8,0 A W 102736992 + 8 <- (8,8) 33384 bio_backmerge: kjournald-480 [000] 303.085086: block_bio_backmerge: 8,0 W 102737032 + 8 [kjournald] kjournald-480 [000] 303.085086: 8,0 M W 102737032 + 8 [kjournald] getrq: kjournald-480 [000] 303.084974: block_getrq: 8,0 W 102736984 + 8 [kjournald] kjournald-480 [000] 303.084975: 8,0 G W 102736984 + 8 [kjournald] bash-2066 [001] 1072.953770: 8,0 G N [bash] bash-2066 [001] 1072.953773: block_getrq: 0,0 N 0 + 0 [bash] rq_complete: konsole-2065 [001] 300.053184: block_rq_complete: 8,0 W () 103669040 + 16 [0] konsole-2065 [001] 300.053191: 8,0 C W 103669040 + 16 [0] ksoftirqd/1-7 [001] 1072.953811: 8,0 C N (5a 00 08 00 00 00 00 00 24 00) [0] ksoftirqd/1-7 [001] 1072.953813: block_rq_complete: 0,0 N (5a 00 08 00 00 00 00 00 24 00) 0 + 0 [0] rq_insert: kjournald-480 [000] 303.084985: block_rq_insert: 8,0 W 0 () 102736984 + 8 [kjournald] kjournald-480 [000] 303.084986: 8,0 I W 102736984 + 8 [kjournald] Changelog from v2 -> v3: - use the newly introduced __dynamic_array(). Changelog from v1 -> v2: - use __string() instead of __array() to minimize the memory required to store hex dump of rq->cmd(). - support large pc requests. - add missing blk_fill_rwbs_rq() in block_rq_requeue TRACE_EVENT. - some cleanups. Signed-off-by: Li Zefan LKML-Reference: <4A2DF669.5070905@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- block/blk-core.c | 16 ++++------------ block/elevator.c | 8 ++------ 2 files changed, 6 insertions(+), 18 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 1306de9..9475bf9 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -28,22 +28,14 @@ #include #include #include -#include + +#define CREATE_TRACE_POINTS +#include #include "blk.h" -DEFINE_TRACE(block_plug); -DEFINE_TRACE(block_unplug_io); -DEFINE_TRACE(block_unplug_timer); -DEFINE_TRACE(block_getrq); -DEFINE_TRACE(block_sleeprq); -DEFINE_TRACE(block_rq_requeue); -DEFINE_TRACE(block_bio_backmerge); -DEFINE_TRACE(block_bio_frontmerge); -DEFINE_TRACE(block_bio_queue); -DEFINE_TRACE(block_rq_complete); -DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */ EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); static int __make_request(struct request_queue *q, struct bio *bio); diff --git a/block/elevator.c b/block/elevator.c index 7073a90..e220f0c 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -33,17 +33,16 @@ #include #include #include -#include #include #include +#include + #include "blk.h" static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); -DEFINE_TRACE(block_rq_abort); - /* * Merge hash stuff. */ @@ -55,9 +54,6 @@ static const int elv_hash_shift = 6; #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) -DEFINE_TRACE(block_rq_insert); -DEFINE_TRACE(block_rq_issue); - /* * Query io scheduler to see if the current process issuing bio may be * merged with rq. -- cgit v1.1