From 2d48d67fa8cd129ea85ea02d91b4a793286866f8 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 24 Jun 2013 10:28:03 +0300 Subject: net: poll/select low latency socket support select/poll busy-poll support. Split sysctl value into two separate ones, one for read and one for poll. updated Documentation/sysctl/net.txt Add a new poll flag POLL_LL. When this flag is set, sock_poll will call sk_poll_ll if possible. sock_poll sets this flag in its return value to indicate to select/poll when a socket that can busy poll is found. When poll/select have nothing to report, call the low-level sock_poll again until we are out of time or we find something. Once the system call finds something, it stops setting POLL_LL, so it can return the result to the user ASAP. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- fs/select.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/select.c b/fs/select.c index 8c1c96c..79b876e 100644 --- a/fs/select.c +++ b/fs/select.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -384,9 +385,10 @@ get_max: #define POLLEX_SET (POLLPRI) static inline void wait_key_set(poll_table *wait, unsigned long in, - unsigned long out, unsigned long bit) + unsigned long out, unsigned long bit, + unsigned int ll_flag) { - wait->_key = POLLEX_SET; + wait->_key = POLLEX_SET | ll_flag; if (in & bit) wait->_key |= POLLIN_SET; if (out & bit) @@ -400,6 +402,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) poll_table *wait; int retval, i, timed_out = 0; unsigned long slack = 0; + unsigned int ll_flag = POLL_LL; + u64 ll_time = ll_end_time(); rcu_read_lock(); retval = max_select_fd(n, fds); @@ -422,6 +426,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) retval = 0; for (;;) { unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; + bool can_ll = false; inp = fds->in; outp = fds->out; exp = fds->ex; rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; @@ -449,7 +454,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) f_op = f.file->f_op; mask = DEFAULT_POLLMASK; if (f_op && f_op->poll) { - wait_key_set(wait, in, out, bit); + wait_key_set(wait, in, out, + bit, ll_flag); mask = (*f_op->poll)(f.file, wait); } fdput(f); @@ -468,6 +474,11 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) retval++; wait->_qproc = NULL; } + if (mask & POLL_LL) + can_ll = true; + /* got something, stop busy polling */ + if (retval) + ll_flag = 0; } } if (res_in) @@ -486,6 +497,9 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) break; } + if (can_ll && can_poll_ll(ll_time)) + continue; + /* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to @@ -717,7 +731,8 @@ struct poll_list { * pwait poll_table will be used by the fd-provided poll handler for waiting, * if pwait->_qproc is non-NULL. */ -static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) +static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, + bool *can_ll, unsigned int ll_flag) { unsigned int mask; int fd; @@ -731,7 +746,10 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) mask = DEFAULT_POLLMASK; if (f.file->f_op && f.file->f_op->poll) { pwait->_key = pollfd->events|POLLERR|POLLHUP; + pwait->_key |= ll_flag; mask = f.file->f_op->poll(f.file, pwait); + if (mask & POLL_LL) + *can_ll = true; } /* Mask out unneeded events. */ mask &= pollfd->events | POLLERR | POLLHUP; @@ -750,6 +768,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list, ktime_t expire, *to = NULL; int timed_out = 0, count = 0; unsigned long slack = 0; + unsigned int ll_flag = POLL_LL; + u64 ll_time = ll_end_time(); /* Optimise the no-wait case */ if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { @@ -762,6 +782,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, for (;;) { struct poll_list *walk; + bool can_ll = false; for (walk = list; walk != NULL; walk = walk->next) { struct pollfd * pfd, * pfd_end; @@ -776,9 +797,10 @@ static int do_poll(unsigned int nfds, struct poll_list *list, * this. They'll get immediately deregistered * when we break out and return. */ - if (do_pollfd(pfd, pt)) { + if (do_pollfd(pfd, pt, &can_ll, ll_flag)) { count++; pt->_qproc = NULL; + ll_flag = 0; } } } @@ -795,6 +817,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list, if (count || timed_out) break; + if (can_ll && can_poll_ll(ll_time)) + continue; /* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to -- cgit v1.1 From 91e2fd337839319c7745e2cb84cfbf8cf1426a1a Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Fri, 28 Jun 2013 15:59:35 +0300 Subject: net: avoid calling sched_clock when LLS is off Change Low Latency Sockets code for select and poll so that when LLS is disabled sched_clock() is never called. Also, avoid sending POLL_LL to sockets if disabled. Reported-by: Andi Kleen Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- fs/select.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/select.c b/fs/select.c index 79b876e..3654075 100644 --- a/fs/select.c +++ b/fs/select.c @@ -402,7 +402,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) poll_table *wait; int retval, i, timed_out = 0; unsigned long slack = 0; - unsigned int ll_flag = POLL_LL; + unsigned int ll_flag = ll_get_flag(); u64 ll_time = ll_end_time(); rcu_read_lock(); @@ -497,7 +497,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) break; } - if (can_ll && can_poll_ll(ll_time)) + /* only if on, have sockets with POLL_LL and not out of time */ + if (ll_flag && can_ll && can_poll_ll(ll_time)) continue; /* @@ -768,7 +769,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, ktime_t expire, *to = NULL; int timed_out = 0, count = 0; unsigned long slack = 0; - unsigned int ll_flag = POLL_LL; + unsigned int ll_flag = ll_get_flag(); u64 ll_time = ll_end_time(); /* Optimise the no-wait case */ @@ -817,8 +818,10 @@ static int do_poll(unsigned int nfds, struct poll_list *list, if (count || timed_out) break; - if (can_ll && can_poll_ll(ll_time)) + /* only if on, have sockets with POLL_LL and not out of time */ + if (ll_flag && can_ll && can_poll_ll(ll_time)) continue; + /* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to -- cgit v1.1 From 1bc2774d866444c5b316fd1dc2b782f20c762cf4 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Tue, 2 Jul 2013 23:22:47 +0300 Subject: net: convert lls to use time_in_range() Time in range will fail safely if we move to a different cpu with an extremely large clock skew. Add time_in_range64() and convert lls to use it. changelog: v2 - fixed double call to sched_clock in can_poll_ll - fixed checkpatchisms Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- fs/select.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/select.c b/fs/select.c index 3654075..f28a585 100644 --- a/fs/select.c +++ b/fs/select.c @@ -403,7 +403,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) int retval, i, timed_out = 0; unsigned long slack = 0; unsigned int ll_flag = ll_get_flag(); - u64 ll_time = ll_end_time(); + u64 ll_start = ll_start_time(ll_flag); + u64 ll_time = ll_run_time(); rcu_read_lock(); retval = max_select_fd(n, fds); @@ -498,7 +499,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) } /* only if on, have sockets with POLL_LL and not out of time */ - if (ll_flag && can_ll && can_poll_ll(ll_time)) + if (ll_flag && can_ll && can_poll_ll(ll_start, ll_time)) continue; /* @@ -770,7 +771,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list, int timed_out = 0, count = 0; unsigned long slack = 0; unsigned int ll_flag = ll_get_flag(); - u64 ll_time = ll_end_time(); + u64 ll_start = ll_start_time(ll_flag); + u64 ll_time = ll_run_time(); /* Optimise the no-wait case */ if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { @@ -819,7 +821,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, break; /* only if on, have sockets with POLL_LL and not out of time */ - if (ll_flag && can_ll && can_poll_ll(ll_time)) + if (ll_flag && can_ll && can_poll_ll(ll_start, ll_time)) continue; /* -- cgit v1.1 From cbf55001b2ddb814329735641be5d29b08c82b08 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Mon, 8 Jul 2013 16:20:34 +0300 Subject: net: rename low latency sockets functions to busy poll Rename functions in include/net/ll_poll.h to busy wait. Clarify documentation about expected power use increase. Rename POLL_LL to POLL_BUSY_LOOP. Add need_resched() testing to poll/select busy loops. Note, that in select and poll can_busy_poll is dynamic and is updated continuously to reflect the existence of supported sockets with valid queue information. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- fs/select.c | 60 +++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/select.c b/fs/select.c index f28a585..25cac5f 100644 --- a/fs/select.c +++ b/fs/select.c @@ -402,9 +402,9 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) poll_table *wait; int retval, i, timed_out = 0; unsigned long slack = 0; - unsigned int ll_flag = ll_get_flag(); - u64 ll_start = ll_start_time(ll_flag); - u64 ll_time = ll_run_time(); + unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; + u64 busy_start = busy_loop_start_time(busy_flag); + u64 busy_end = busy_loop_end_time(); rcu_read_lock(); retval = max_select_fd(n, fds); @@ -427,7 +427,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) retval = 0; for (;;) { unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; - bool can_ll = false; + bool can_busy_loop = false; inp = fds->in; outp = fds->out; exp = fds->ex; rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; @@ -456,7 +456,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) mask = DEFAULT_POLLMASK; if (f_op && f_op->poll) { wait_key_set(wait, in, out, - bit, ll_flag); + bit, busy_flag); mask = (*f_op->poll)(f.file, wait); } fdput(f); @@ -475,11 +475,18 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) retval++; wait->_qproc = NULL; } - if (mask & POLL_LL) - can_ll = true; /* got something, stop busy polling */ - if (retval) - ll_flag = 0; + if (retval) { + can_busy_loop = false; + busy_flag = 0; + + /* + * only remember a returned + * POLL_BUSY_LOOP if we asked for it + */ + } else if (busy_flag & mask) + can_busy_loop = true; + } } if (res_in) @@ -498,8 +505,9 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) break; } - /* only if on, have sockets with POLL_LL and not out of time */ - if (ll_flag && can_ll && can_poll_ll(ll_start, ll_time)) + /* only if found POLL_BUSY_LOOP sockets && not out of time */ + if (!need_resched() && can_busy_loop && + busy_loop_range(busy_start, busy_end)) continue; /* @@ -734,7 +742,8 @@ struct poll_list { * if pwait->_qproc is non-NULL. */ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, - bool *can_ll, unsigned int ll_flag) + bool *can_busy_poll, + unsigned int busy_flag) { unsigned int mask; int fd; @@ -748,10 +757,10 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, mask = DEFAULT_POLLMASK; if (f.file->f_op && f.file->f_op->poll) { pwait->_key = pollfd->events|POLLERR|POLLHUP; - pwait->_key |= ll_flag; + pwait->_key |= busy_flag; mask = f.file->f_op->poll(f.file, pwait); - if (mask & POLL_LL) - *can_ll = true; + if (mask & busy_flag) + *can_busy_poll = true; } /* Mask out unneeded events. */ mask &= pollfd->events | POLLERR | POLLHUP; @@ -770,9 +779,10 @@ static int do_poll(unsigned int nfds, struct poll_list *list, ktime_t expire, *to = NULL; int timed_out = 0, count = 0; unsigned long slack = 0; - unsigned int ll_flag = ll_get_flag(); - u64 ll_start = ll_start_time(ll_flag); - u64 ll_time = ll_run_time(); + unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; + u64 busy_start = busy_loop_start_time(busy_flag); + u64 busy_end = busy_loop_end_time(); + /* Optimise the no-wait case */ if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { @@ -785,7 +795,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, for (;;) { struct poll_list *walk; - bool can_ll = false; + bool can_busy_loop = false; for (walk = list; walk != NULL; walk = walk->next) { struct pollfd * pfd, * pfd_end; @@ -800,10 +810,13 @@ static int do_poll(unsigned int nfds, struct poll_list *list, * this. They'll get immediately deregistered * when we break out and return. */ - if (do_pollfd(pfd, pt, &can_ll, ll_flag)) { + if (do_pollfd(pfd, pt, &can_busy_loop, + busy_flag)) { count++; pt->_qproc = NULL; - ll_flag = 0; + /* found something, stop busy polling */ + busy_flag = 0; + can_busy_loop = false; } } } @@ -820,8 +833,9 @@ static int do_poll(unsigned int nfds, struct poll_list *list, if (count || timed_out) break; - /* only if on, have sockets with POLL_LL and not out of time */ - if (ll_flag && can_ll && can_poll_ll(ll_start, ll_time)) + /* only if found POLL_BUSY_LOOP sockets && not out of time */ + if (!need_resched() && can_busy_loop && + busy_loop_range(busy_start, busy_end)) continue; /* -- cgit v1.1 From 76b1e9b9813e412bde7446525f6c299803713545 Mon Sep 17 00:00:00 2001 From: Eliezer Tamir Date: Tue, 9 Jul 2013 13:09:21 +0300 Subject: net/fs: change busy poll time accounting Suggested by Linus: Changed time accounting for busy-poll: - Make it microsecond based. - Use unsigned longs. - Revert back to use time_after instead of time_in_range. Reorder poll/select busy loop conditions: - Clear busy_flag after one time we can't busy-poll. - Only init busy_end if we actually are going to busy-poll. Added one more missing need_resched() test. Signed-off-by: Eliezer Tamir Signed-off-by: David S. Miller --- fs/select.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/select.c b/fs/select.c index 25cac5f..50a804b 100644 --- a/fs/select.c +++ b/fs/select.c @@ -403,8 +403,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) int retval, i, timed_out = 0; unsigned long slack = 0; unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; - u64 busy_start = busy_loop_start_time(busy_flag); - u64 busy_end = busy_loop_end_time(); + unsigned long busy_end = 0; rcu_read_lock(); retval = max_select_fd(n, fds); @@ -506,9 +505,15 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) } /* only if found POLL_BUSY_LOOP sockets && not out of time */ - if (!need_resched() && can_busy_loop && - busy_loop_range(busy_start, busy_end)) - continue; + if (can_busy_loop && !need_resched()) { + if (!busy_end) { + busy_end = busy_loop_end_time(); + continue; + } + if (!busy_loop_timeout(busy_end)) + continue; + } + busy_flag = 0; /* * If this is the first loop and we have a timeout @@ -780,9 +785,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, int timed_out = 0, count = 0; unsigned long slack = 0; unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; - u64 busy_start = busy_loop_start_time(busy_flag); - u64 busy_end = busy_loop_end_time(); - + unsigned long busy_end = 0; /* Optimise the no-wait case */ if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { @@ -834,9 +837,15 @@ static int do_poll(unsigned int nfds, struct poll_list *list, break; /* only if found POLL_BUSY_LOOP sockets && not out of time */ - if (!need_resched() && can_busy_loop && - busy_loop_range(busy_start, busy_end)) - continue; + if (can_busy_loop && !need_resched()) { + if (!busy_end) { + busy_end = busy_loop_end_time(); + continue; + } + if (!busy_loop_timeout(busy_end)) + continue; + } + busy_flag = 0; /* * If this is the first loop and we have a timeout -- cgit v1.1