summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteven Rostedt (VMware) <rostedt@goodmis.org>2017-05-01 09:35:09 -0400
committerSteven Rostedt (VMware) <rostedt@goodmis.org>2017-05-01 10:26:40 -0400
commit73a757e63114dfd765f1c5d1ff7e994f123d0234 (patch)
tree757adb3d5a56f4a22c8e98e106068fc6666d1969
parentca2958f14c4706d5dced95f4f7dfe2bdd1b268de (diff)
downloadop-kernel-dev-73a757e63114dfd765f1c5d1ff7e994f123d0234.zip
op-kernel-dev-73a757e63114dfd765f1c5d1ff7e994f123d0234.tar.gz
ring-buffer: Return reader page back into existing ring buffer
When reading the ring buffer for consuming, it is optimized for splice, where a page is taken out of the ring buffer (zero copy) and sent to the reading consumer. When the read is finished with the page, it calls ring_buffer_free_read_page(), which simply frees the page. The next time the reader needs to get a page from the ring buffer, it must call ring_buffer_alloc_read_page() which allocates and initializes a reader page for the ring buffer to be swapped into the ring buffer for a new filled page for the reader. The problem is that there's no reason to actually free the page when it is passed back to the ring buffer. It can hold it off and reuse it for the next iteration. This completely removes the interaction with the page_alloc mechanism. Using the trace-cmd utility to record all events (causing trace-cmd to require reading lots of pages from the ring buffer, and calling ring_buffer_alloc/free_read_page() several times), and also assigning a stack trace trigger to the mm_page_alloc event, we can see how many times the ring_buffer_alloc_read_page() needed to allocate a page for the ring buffer. Before this change: # trace-cmd record -e all -e mem_page_alloc -R stacktrace sleep 1 # trace-cmd report |grep ring_buffer_alloc_read_page | wc -l 9968 After this change: # trace-cmd record -e all -e mem_page_alloc -R stacktrace sleep 1 # trace-cmd report |grep ring_buffer_alloc_read_page | wc -l 4 Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
-rw-r--r--include/linux/ring_buffer.h2
-rw-r--r--kernel/trace/ring_buffer.c40
-rw-r--r--kernel/trace/ring_buffer_benchmark.c2
-rw-r--r--kernel/trace/trace.c17
4 files changed, 51 insertions, 10 deletions
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index b6d4568..ee9b461 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -185,7 +185,7 @@ size_t ring_buffer_page_len(void *page);
void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu);
-void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
+void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data);
int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
size_t len, int cpu, int full);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 96fc3c0..01b4ee5 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -438,6 +438,7 @@ struct ring_buffer_per_cpu {
raw_spinlock_t reader_lock; /* serialize readers */
arch_spinlock_t lock;
struct lock_class_key lock_key;
+ struct buffer_data_page *free_page;
unsigned long nr_pages;
unsigned int current_context;
struct list_head *pages;
@@ -4377,9 +4378,25 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
*/
void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
{
- struct buffer_data_page *bpage;
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct buffer_data_page *bpage = NULL;
+ unsigned long flags;
struct page *page;
+ local_irq_save(flags);
+ arch_spin_lock(&cpu_buffer->lock);
+
+ if (cpu_buffer->free_page) {
+ bpage = cpu_buffer->free_page;
+ cpu_buffer->free_page = NULL;
+ }
+
+ arch_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
+
+ if (bpage)
+ goto out;
+
page = alloc_pages_node(cpu_to_node(cpu),
GFP_KERNEL | __GFP_NORETRY, 0);
if (!page)
@@ -4387,6 +4404,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
bpage = page_address(page);
+ out:
rb_init_page(bpage);
return bpage;
@@ -4396,13 +4414,29 @@ EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
/**
* ring_buffer_free_read_page - free an allocated read page
* @buffer: the buffer the page was allocate for
+ * @cpu: the cpu buffer the page came from
* @data: the page to free
*
* Free a page allocated from ring_buffer_alloc_read_page.
*/
-void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
+void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
{
- free_page((unsigned long)data);
+ struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct buffer_data_page *bpage = data;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ arch_spin_lock(&cpu_buffer->lock);
+
+ if (!cpu_buffer->free_page) {
+ cpu_buffer->free_page = bpage;
+ bpage = NULL;
+ }
+
+ arch_spin_unlock(&cpu_buffer->lock);
+ local_irq_restore(flags);
+
+ free_page((unsigned long)bpage);
}
EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index c190a4d..9fbcaf5 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -171,7 +171,7 @@ static enum event_status read_page(int cpu)
}
}
}
- ring_buffer_free_read_page(buffer, bpage);
+ ring_buffer_free_read_page(buffer, cpu, bpage);
if (ret < 0)
return EVENT_DROPPED;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 60c904f..5b645b0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6054,6 +6054,7 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
struct ftrace_buffer_info {
struct trace_iterator iter;
void *spare;
+ unsigned int spare_cpu;
unsigned int read;
};
@@ -6383,9 +6384,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
return -EBUSY;
#endif
- if (!info->spare)
+ if (!info->spare) {
info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
iter->cpu_file);
+ info->spare_cpu = iter->cpu_file;
+ }
if (!info->spare)
return -ENOMEM;
@@ -6445,7 +6448,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
__trace_array_put(iter->tr);
if (info->spare)
- ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
+ ring_buffer_free_read_page(iter->trace_buffer->buffer,
+ info->spare_cpu, info->spare);
kfree(info);
mutex_unlock(&trace_types_lock);
@@ -6456,6 +6460,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
struct buffer_ref {
struct ring_buffer *buffer;
void *page;
+ int cpu;
int ref;
};
@@ -6467,7 +6472,7 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
if (--ref->ref)
return;
- ring_buffer_free_read_page(ref->buffer, ref->page);
+ ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
kfree(ref);
buf->private = 0;
}
@@ -6501,7 +6506,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
if (--ref->ref)
return;
- ring_buffer_free_read_page(ref->buffer, ref->page);
+ ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
kfree(ref);
spd->partial[i].private = 0;
}
@@ -6566,11 +6571,13 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
kfree(ref);
break;
}
+ ref->cpu = iter->cpu_file;
r = ring_buffer_read_page(ref->buffer, &ref->page,
len, iter->cpu_file, 1);
if (r < 0) {
- ring_buffer_free_read_page(ref->buffer, ref->page);
+ ring_buffer_free_read_page(ref->buffer, ref->cpu,
+ ref->page);
kfree(ref);
break;
}
OpenPOWER on IntegriCloud