summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/fs-writeback.c13
-rw-r--r--include/linux/backing-dev.h5
-rw-r--r--include/linux/writeback.h3
-rw-r--r--mm/backing-dev.c12
-rw-r--r--mm/page-writeback.c87
5 files changed, 120 insertions, 0 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 2c947da..5826992 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -693,6 +693,16 @@ static inline bool over_bground_thresh(void)
}
/*
+ * Called under wb->list_lock. If there are multiple wb per bdi,
+ * only the flusher working on the first wb should do it.
+ */
+static void wb_update_bandwidth(struct bdi_writeback *wb,
+ unsigned long start_time)
+{
+ __bdi_update_bandwidth(wb->bdi, start_time);
+}
+
+/*
* Explicit flushing or periodic writeback of "old" data.
*
* Define "old": the first time one of an inode's pages is dirtied, we mark the
@@ -710,6 +720,7 @@ static inline bool over_bground_thresh(void)
static long wb_writeback(struct bdi_writeback *wb,
struct wb_writeback_work *work)
{
+ unsigned long wb_start = jiffies;
long nr_pages = work->nr_pages;
unsigned long oldest_jif;
struct inode *inode;
@@ -758,6 +769,8 @@ static long wb_writeback(struct bdi_writeback *wb,
progress = __writeback_inodes_wb(wb, work);
trace_writeback_written(wb->bdi, work);
+ wb_update_bandwidth(wb, wb_start);
+
/*
* Did we write something? Try for more
*
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 469d564..a008982 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -73,6 +73,11 @@ struct backing_dev_info {
struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS];
+ unsigned long bw_time_stamp; /* last time write bw is updated */
+ unsigned long written_stamp; /* pages written at bw_time_stamp */
+ unsigned long write_bandwidth; /* the estimated write bandwidth */
+ unsigned long avg_write_bandwidth; /* further smoothed write bw */
+
struct prop_local_percpu completions;
int dirty_exceeded;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index df1b7f1..66862f2 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -118,6 +118,9 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
unsigned long bdi_dirty_limit(struct backing_dev_info *bdi,
unsigned long dirty);
+void __bdi_update_bandwidth(struct backing_dev_info *bdi,
+ unsigned long start_time);
+
void page_writeback_init(void);
void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
unsigned long nr_pages_dirtied);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 83f18a1..a76cdd1 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -638,6 +638,11 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
}
+/*
+ * Initial write bandwidth: 100 MB/s
+ */
+#define INIT_BW (100 << (20 - PAGE_SHIFT))
+
int bdi_init(struct backing_dev_info *bdi)
{
int i, err;
@@ -660,6 +665,13 @@ int bdi_init(struct backing_dev_info *bdi)
}
bdi->dirty_exceeded = 0;
+
+ bdi->bw_time_stamp = jiffies;
+ bdi->written_stamp = 0;
+
+ bdi->write_bandwidth = INIT_BW;
+ bdi->avg_write_bandwidth = INIT_BW;
+
err = prop_local_init_percpu(&bdi->completions);
if (err) {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 8cd7137..446bdf7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -37,6 +37,11 @@
#include <trace/events/writeback.h>
/*
+ * Estimate write bandwidth at 200ms intervals.
+ */
+#define BANDWIDTH_INTERVAL max(HZ/5, 1)
+
+/*
* After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
* will look to see if it needs to force writeback or throttling.
*/
@@ -471,6 +476,85 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)
return bdi_dirty;
}
+static void bdi_update_write_bandwidth(struct backing_dev_info *bdi,
+ unsigned long elapsed,
+ unsigned long written)
+{
+ const unsigned long period = roundup_pow_of_two(3 * HZ);
+ unsigned long avg = bdi->avg_write_bandwidth;
+ unsigned long old = bdi->write_bandwidth;
+ u64 bw;
+
+ /*
+ * bw = written * HZ / elapsed
+ *
+ * bw * elapsed + write_bandwidth * (period - elapsed)
+ * write_bandwidth = ---------------------------------------------------
+ * period
+ */
+ bw = written - bdi->written_stamp;
+ bw *= HZ;
+ if (unlikely(elapsed > period)) {
+ do_div(bw, elapsed);
+ avg = bw;
+ goto out;
+ }
+ bw += (u64)bdi->write_bandwidth * (period - elapsed);
+ bw >>= ilog2(period);
+
+ /*
+ * one more level of smoothing, for filtering out sudden spikes
+ */
+ if (avg > old && old >= (unsigned long)bw)
+ avg -= (avg - old) >> 3;
+
+ if (avg < old && old <= (unsigned long)bw)
+ avg += (old - avg) >> 3;
+
+out:
+ bdi->write_bandwidth = bw;
+ bdi->avg_write_bandwidth = avg;
+}
+
+void __bdi_update_bandwidth(struct backing_dev_info *bdi,
+ unsigned long start_time)
+{
+ unsigned long now = jiffies;
+ unsigned long elapsed = now - bdi->bw_time_stamp;
+ unsigned long written;
+
+ /*
+ * rate-limit, only update once every 200ms.
+ */
+ if (elapsed < BANDWIDTH_INTERVAL)
+ return;
+
+ written = percpu_counter_read(&bdi->bdi_stat[BDI_WRITTEN]);
+
+ /*
+ * Skip quiet periods when disk bandwidth is under-utilized.
+ * (at least 1s idle time between two flusher runs)
+ */
+ if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time))
+ goto snapshot;
+
+ bdi_update_write_bandwidth(bdi, elapsed, written);
+
+snapshot:
+ bdi->written_stamp = written;
+ bdi->bw_time_stamp = now;
+}
+
+static void bdi_update_bandwidth(struct backing_dev_info *bdi,
+ unsigned long start_time)
+{
+ if (time_is_after_eq_jiffies(bdi->bw_time_stamp + BANDWIDTH_INTERVAL))
+ return;
+ spin_lock(&bdi->wb.list_lock);
+ __bdi_update_bandwidth(bdi, start_time);
+ spin_unlock(&bdi->wb.list_lock);
+}
+
/*
* balance_dirty_pages() must be called by processes which are generating dirty
* data. It looks at the number of dirty pages in the machine and will force
@@ -490,6 +574,7 @@ static void balance_dirty_pages(struct address_space *mapping,
unsigned long pause = 1;
bool dirty_exceeded = false;
struct backing_dev_info *bdi = mapping->backing_dev_info;
+ unsigned long start_time = jiffies;
for (;;) {
nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
@@ -544,6 +629,8 @@ static void balance_dirty_pages(struct address_space *mapping,
if (!bdi->dirty_exceeded)
bdi->dirty_exceeded = 1;
+ bdi_update_bandwidth(bdi, start_time);
+
/* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
* Unstable writes are a feature of certain networked
* filesystems (i.e. NFS) in which data may have been
OpenPOWER on IntegriCloud