From 4d152e2c9a6a3e3556ce5da7782a9e2836edbe0f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Nov 2014 07:51:14 -0500 Subject: sunrpc: add a generic rq_flags field to svc_rqst and move rq_secure to it In a later patch, we're going to need some atomic bit flags. Since that field will need to be an unsigned long, we mitigate that space consumption by migrating some other bitflags to the new field. Start with the rq_secure flag. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 171ca4f..5eb5f79 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -412,6 +412,10 @@ TRACE_EVENT(xs_tcp_data_recv, __entry->copied, __entry->reclen, __entry->offset) ); +#define show_rqstp_flags(flags) \ + __print_flags(flags, "|", \ + { (1UL << RQ_SECURE), "RQ_SECURE"}) + TRACE_EVENT(svc_recv, TP_PROTO(struct svc_rqst *rqst, int status), @@ -421,16 +425,19 @@ TRACE_EVENT(svc_recv, __field(struct sockaddr *, addr) __field(__be32, xid) __field(int, status) + __field(unsigned long, flags) ), TP_fast_assign( __entry->addr = (struct sockaddr *)&rqst->rq_addr; __entry->xid = status > 0 ? rqst->rq_xid : 0; __entry->status = status; + __entry->flags = rqst->rq_flags; ), - TP_printk("addr=%pIScp xid=0x%x status=%d", __entry->addr, - be32_to_cpu(__entry->xid), __entry->status) + TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s", __entry->addr, + be32_to_cpu(__entry->xid), __entry->status, + show_rqstp_flags(__entry->flags)) ); DECLARE_EVENT_CLASS(svc_rqst_status, @@ -444,6 +451,7 @@ DECLARE_EVENT_CLASS(svc_rqst_status, __field(__be32, xid) __field(int, dropme) __field(int, status) + __field(unsigned long, flags) ), TP_fast_assign( @@ -451,11 +459,12 @@ DECLARE_EVENT_CLASS(svc_rqst_status, __entry->xid = rqst->rq_xid; __entry->dropme = (int)rqst->rq_dropme; __entry->status = status; + __entry->flags = rqst->rq_flags; ), - TP_printk("addr=%pIScp rq_xid=0x%x dropme=%d status=%d", + TP_printk("addr=%pIScp rq_xid=0x%x dropme=%d status=%d flags=%s", __entry->addr, be32_to_cpu(__entry->xid), __entry->dropme, - __entry->status) + __entry->status, show_rqstp_flags(__entry->flags)) ); DEFINE_EVENT(svc_rqst_status, svc_process, -- cgit v1.1 From 7501cc2bcf9a71cc1f19e38775c234815ee44578 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Nov 2014 07:51:15 -0500 Subject: sunrpc: move rq_local field to rq_flags Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5eb5f79..98259f1 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -414,7 +414,8 @@ TRACE_EVENT(xs_tcp_data_recv, #define show_rqstp_flags(flags) \ __print_flags(flags, "|", \ - { (1UL << RQ_SECURE), "RQ_SECURE"}) + { (1UL << RQ_SECURE), "RQ_SECURE"}, \ + { (1UL << RQ_LOCAL), "RQ_LOCAL"}) TRACE_EVENT(svc_recv, TP_PROTO(struct svc_rqst *rqst, int status), -- cgit v1.1 From 30660e04b0d4bbbd15fd21098681f45a9f4080b9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Nov 2014 07:51:16 -0500 Subject: sunrpc: move rq_usedeferral flag to rq_flags Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 98259f1..6d1facd 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -412,10 +412,11 @@ TRACE_EVENT(xs_tcp_data_recv, __entry->copied, __entry->reclen, __entry->offset) ); -#define show_rqstp_flags(flags) \ - __print_flags(flags, "|", \ - { (1UL << RQ_SECURE), "RQ_SECURE"}, \ - { (1UL << RQ_LOCAL), "RQ_LOCAL"}) +#define show_rqstp_flags(flags) \ + __print_flags(flags, "|", \ + { (1UL << RQ_SECURE), "RQ_SECURE"}, \ + { (1UL << RQ_LOCAL), "RQ_LOCAL"}, \ + { (1UL << RQ_USEDEFERRAL), "RQ_USEDEFERRAL"}) TRACE_EVENT(svc_recv, TP_PROTO(struct svc_rqst *rqst, int status), -- cgit v1.1 From 78b65eb3fda95c6d131c4bbb0536e21f0bd7a7d4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Nov 2014 07:51:17 -0500 Subject: sunrpc: move rq_dropme flag into rq_flags Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 6d1facd..355671f 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -416,7 +416,8 @@ TRACE_EVENT(xs_tcp_data_recv, __print_flags(flags, "|", \ { (1UL << RQ_SECURE), "RQ_SECURE"}, \ { (1UL << RQ_LOCAL), "RQ_LOCAL"}, \ - { (1UL << RQ_USEDEFERRAL), "RQ_USEDEFERRAL"}) + { (1UL << RQ_USEDEFERRAL), "RQ_USEDEFERRAL"}, \ + { (1UL << RQ_DROPME), "RQ_DROPME"}) TRACE_EVENT(svc_recv, TP_PROTO(struct svc_rqst *rqst, int status), @@ -459,13 +460,12 @@ DECLARE_EVENT_CLASS(svc_rqst_status, TP_fast_assign( __entry->addr = (struct sockaddr *)&rqst->rq_addr; __entry->xid = rqst->rq_xid; - __entry->dropme = (int)rqst->rq_dropme; __entry->status = status; __entry->flags = rqst->rq_flags; ), - TP_printk("addr=%pIScp rq_xid=0x%x dropme=%d status=%d flags=%s", - __entry->addr, be32_to_cpu(__entry->xid), __entry->dropme, + TP_printk("addr=%pIScp rq_xid=0x%x status=%d flags=%s", + __entry->addr, be32_to_cpu(__entry->xid), __entry->status, show_rqstp_flags(__entry->flags)) ); -- cgit v1.1 From 779fb0f3af3089daa2e88cf8ef0ef0c5d2fecb40 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 19 Nov 2014 07:51:18 -0500 Subject: sunrpc: move rq_splice_ok flag into rq_flags Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 355671f..5848fc2 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -417,7 +417,8 @@ TRACE_EVENT(xs_tcp_data_recv, { (1UL << RQ_SECURE), "RQ_SECURE"}, \ { (1UL << RQ_LOCAL), "RQ_LOCAL"}, \ { (1UL << RQ_USEDEFERRAL), "RQ_USEDEFERRAL"}, \ - { (1UL << RQ_DROPME), "RQ_DROPME"}) + { (1UL << RQ_DROPME), "RQ_DROPME"}, \ + { (1UL << RQ_SPLICE_OK), "RQ_SPLICE_OK"}) TRACE_EVENT(svc_recv, TP_PROTO(struct svc_rqst *rqst, int status), -- cgit v1.1 From 812443865c5fc255363d4a684a62c086af1addca Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Nov 2014 14:19:28 -0500 Subject: sunrpc: add a rcu_head to svc_rqst and use kfree_rcu to free it ...also make the manipulation of sp_all_threads list use RCU-friendly functions. Signed-off-by: Jeff Layton Tested-by: Chris Worley Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5848fc2..08a5fed 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -418,7 +418,8 @@ TRACE_EVENT(xs_tcp_data_recv, { (1UL << RQ_LOCAL), "RQ_LOCAL"}, \ { (1UL << RQ_USEDEFERRAL), "RQ_USEDEFERRAL"}, \ { (1UL << RQ_DROPME), "RQ_DROPME"}, \ - { (1UL << RQ_SPLICE_OK), "RQ_SPLICE_OK"}) + { (1UL << RQ_SPLICE_OK), "RQ_SPLICE_OK"}, \ + { (1UL << RQ_VICTIM), "RQ_VICTIM"}) TRACE_EVENT(svc_recv, TP_PROTO(struct svc_rqst *rqst, int status), -- cgit v1.1 From b1691bc03d4eddb959234409167bef9be9e62d74 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Nov 2014 14:19:30 -0500 Subject: sunrpc: convert to lockless lookup of queued server threads Testing has shown that the pool->sp_lock can be a bottleneck on a busy server. Every time data is received on a socket, the server must take that lock in order to dequeue a thread from the sp_threads list. Address this problem by eliminating the sp_threads list (which contains threads that are currently idle) and replacing it with a RQ_BUSY flag in svc_rqst. This allows us to walk the sp_all_threads list under the rcu_read_lock and find a suitable thread for the xprt by doing a test_and_set_bit. Note that we do still have a potential atomicity problem however with this approach. We don't want svc_xprt_do_enqueue to set the rqst->rq_xprt pointer unless a test_and_set_bit of RQ_BUSY returned zero (which indicates that the thread was idle). But, by the time we check that, the bit could be flipped by a waking thread. To address this, we acquire a new per-rqst spinlock (rq_lock) and take that before doing the test_and_set_bit. If that returns false, then we can set rq_xprt and drop the spinlock. Then, when the thread wakes up, it must set the bit under the same spinlock and can trust that if it was already set then the rq_xprt is also properly set. With this scheme, the case where we have an idle thread no longer needs to take the highly contended pool->sp_lock at all, and that removes the bottleneck. That still leaves one issue: What of the case where we walk the whole sp_all_threads list and don't find an idle thread? Because the search is lockess, it's possible for the queueing to race with a thread that is going to sleep. To address that, we queue the xprt and then search again. If we find an idle thread at that point, we can't attach the xprt to it directly since that might race with a different thread waking up and finding it. All we can do is wake the idle thread back up and let it attempt to find the now-queued xprt. Signed-off-by: Jeff Layton Tested-by: Chris Worley Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 08a5fed..ee4438a 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -419,7 +419,8 @@ TRACE_EVENT(xs_tcp_data_recv, { (1UL << RQ_USEDEFERRAL), "RQ_USEDEFERRAL"}, \ { (1UL << RQ_DROPME), "RQ_DROPME"}, \ { (1UL << RQ_SPLICE_OK), "RQ_SPLICE_OK"}, \ - { (1UL << RQ_VICTIM), "RQ_VICTIM"}) + { (1UL << RQ_VICTIM), "RQ_VICTIM"}, \ + { (1UL << RQ_BUSY), "RQ_BUSY"}) TRACE_EVENT(svc_recv, TP_PROTO(struct svc_rqst *rqst, int status), -- cgit v1.1 From 83a712e0afefaf68555f816ea78ecd2862c6cf30 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Nov 2014 14:19:31 -0500 Subject: sunrpc: add some tracepoints around enqueue and dequeue of svc_xprt These were useful when I was tracking down a race condition between svc_xprt_do_enqueue and svc_get_next_xprt. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/trace/events/sunrpc.h | 94 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ee4438a..b9c1dc6 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -480,6 +481,99 @@ DEFINE_EVENT(svc_rqst_status, svc_send, TP_PROTO(struct svc_rqst *rqst, int status), TP_ARGS(rqst, status)); +#define show_svc_xprt_flags(flags) \ + __print_flags(flags, "|", \ + { (1UL << XPT_BUSY), "XPT_BUSY"}, \ + { (1UL << XPT_CONN), "XPT_CONN"}, \ + { (1UL << XPT_CLOSE), "XPT_CLOSE"}, \ + { (1UL << XPT_DATA), "XPT_DATA"}, \ + { (1UL << XPT_TEMP), "XPT_TEMP"}, \ + { (1UL << XPT_DEAD), "XPT_DEAD"}, \ + { (1UL << XPT_CHNGBUF), "XPT_CHNGBUF"}, \ + { (1UL << XPT_DEFERRED), "XPT_DEFERRED"}, \ + { (1UL << XPT_OLD), "XPT_OLD"}, \ + { (1UL << XPT_LISTENER), "XPT_LISTENER"}, \ + { (1UL << XPT_CACHE_AUTH), "XPT_CACHE_AUTH"}, \ + { (1UL << XPT_LOCAL), "XPT_LOCAL"}) + +TRACE_EVENT(svc_xprt_do_enqueue, + TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst), + + TP_ARGS(xprt, rqst), + + TP_STRUCT__entry( + __field(struct svc_xprt *, xprt) + __field(struct svc_rqst *, rqst) + ), + + TP_fast_assign( + __entry->xprt = xprt; + __entry->rqst = rqst; + ), + + TP_printk("xprt=0x%p addr=%pIScp pid=%d flags=%s", __entry->xprt, + (struct sockaddr *)&__entry->xprt->xpt_remote, + __entry->rqst ? __entry->rqst->rq_task->pid : 0, + show_svc_xprt_flags(__entry->xprt->xpt_flags)) +); + +TRACE_EVENT(svc_xprt_dequeue, + TP_PROTO(struct svc_xprt *xprt), + + TP_ARGS(xprt), + + TP_STRUCT__entry( + __field(struct svc_xprt *, xprt) + __field_struct(struct sockaddr_storage, ss) + __field(unsigned long, flags) + ), + + TP_fast_assign( + __entry->xprt = xprt, + xprt ? memcpy(&__entry->ss, &xprt->xpt_remote, sizeof(__entry->ss)) : memset(&__entry->ss, 0, sizeof(__entry->ss)); + __entry->flags = xprt ? xprt->xpt_flags : 0; + ), + + TP_printk("xprt=0x%p addr=%pIScp flags=%s", __entry->xprt, + (struct sockaddr *)&__entry->ss, + show_svc_xprt_flags(__entry->flags)) +); + +TRACE_EVENT(svc_wake_up, + TP_PROTO(int pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __field(int, pid) + ), + + TP_fast_assign( + __entry->pid = pid; + ), + + TP_printk("pid=%d", __entry->pid) +); + +TRACE_EVENT(svc_handle_xprt, + TP_PROTO(struct svc_xprt *xprt, int len), + + TP_ARGS(xprt, len), + + TP_STRUCT__entry( + __field(struct svc_xprt *, xprt) + __field(int, len) + ), + + TP_fast_assign( + __entry->xprt = xprt; + __entry->len = len; + ), + + TP_printk("xprt=0x%p addr=%pIScp len=%d flags=%s", __entry->xprt, + (struct sockaddr *)&__entry->xprt->xpt_remote, __entry->len, + show_svc_xprt_flags(__entry->xprt->xpt_flags)) +); #endif /* _TRACE_SUNRPC_H */ #include -- cgit v1.1