staging/lustre/clio: optimize read ahead code

It used to check each page in the readahead window is covered by a lock underneath, now cpo_page_is_under_lock() provides @max_index to help decide the maximum ra window. @max_index can be modified by OSC to extend the maximum lock region, to align stripe boundary at LOV, and to make sure the readahead region at least covers read region at LLITE layer. After this is done, usually readahead code calls cpo_page_is_under_lock() for each stripe. Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com> Reviewed-on: http://review.whamcloud.com/8523 Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3321 Reviewed-by: Andreas Dilger <andreas.dilger@intel.com> Signed-off-by: Oleg Drokin <green@linuxhacker.ru> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Jinshan Xiong <jinshan.xiong@intel.com> 2016-03-30 19:48:33 -0400
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2016-03-30 21:38:13 -0700
commit: fd7444fecaa0c4516d68fdbedf570b8bded60bc1 (patch)
tree: f4ca032ddc44e47fd8498270d7ac668b8d6c8d81 /drivers/staging/lustre/lustre/llite/rw.c
parent: 7addf402c1171e875109bb1567171c4c3f8f8229 (diff)
download: op-kernel-dev-fd7444fecaa0c4516d68fdbedf570b8bded60bc1.zip
op-kernel-dev-fd7444fecaa0c4516d68fdbedf570b8bded60bc1.tar.gz
1 files changed, 67 insertions, 36 deletions
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index b1375f1..ad15058 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -166,7 +166,7 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which);
  */
 static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
 				     struct ra_io_arg *ria,
-				     unsigned long pages)
+				     unsigned long pages, unsigned long min)
 {
 	struct ll_ra_info *ra = &sbi->ll_ra_info;
 	long ret;
@@ -206,6 +206,11 @@ static unsigned long ll_ra_count_get(struct ll_sb_info *sbi,
 	}
 
 out:
+	if (ret < min) {
+		/* override ra limit for maximum performance */
+		atomic_add(min - ret, &ra->ra_cur_pages);
+		ret = min;
+	}
 	return ret;
 }
 
@@ -222,9 +227,9 @@ static void ll_ra_stats_inc_sbi(struct ll_sb_info *sbi, enum ra_stat which)
 	lprocfs_counter_incr(sbi->ll_ra_stats, which);
 }
 
-void ll_ra_stats_inc(struct address_space *mapping, enum ra_stat which)
+void ll_ra_stats_inc(struct inode *inode, enum ra_stat which)
 {
-	struct ll_sb_info *sbi = ll_i2sbi(mapping->host);
+	struct ll_sb_info *sbi = ll_i2sbi(inode);
 
 	ll_ra_stats_inc_sbi(sbi, which);
 }
@@ -290,7 +295,7 @@ void ll_ra_read_ex(struct file *f, struct ll_ra_read *rar)
 
 static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
 			      struct cl_page_list *queue, struct cl_page *page,
-			      struct cl_object *clob)
+			      struct cl_object *clob, pgoff_t *max_index)
 {
 	struct page *vmpage = page->cp_vmpage;
 	struct ccc_page *cp;
@@ -301,8 +306,11 @@ static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
 	lu_ref_add(&page->cp_reference, "ra", current);
 	cp = cl2ccc_page(cl_object_page_slice(clob, page));
 	if (!cp->cpg_defer_uptodate && !PageUptodate(vmpage)) {
-		rc = cl_page_is_under_lock(env, io, page);
-		if (rc == -EBUSY) {
+		CDEBUG(D_READA, "page index %lu, max_index: %lu\n",
+		       ccc_index(cp), *max_index);
+		if (*max_index == 0 || ccc_index(cp) > *max_index)
+			rc = cl_page_is_under_lock(env, io, page, max_index);
+		if (rc == 0) {
 			cp->cpg_defer_uptodate = 1;
 			cp->cpg_ra_used = 0;
 			cl_page_list_add(queue, page);
@@ -332,24 +340,25 @@ static int cl_read_ahead_page(const struct lu_env *env, struct cl_io *io,
  */
 static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
 			      struct cl_page_list *queue,
-			      pgoff_t index, struct address_space *mapping)
+			      pgoff_t index, pgoff_t *max_index)
 {
+	struct cl_object *clob  = io->ci_obj;
+	struct inode     *inode = ccc_object_inode(clob);
 	struct page      *vmpage;
-	struct cl_object *clob  = ll_i2info(mapping->host)->lli_clob;
 	struct cl_page   *page;
 	enum ra_stat      which = _NR_RA_STAT; /* keep gcc happy */
 	int	       rc    = 0;
 	const char       *msg   = NULL;
 
-	vmpage = grab_cache_page_nowait(mapping, index);
+	vmpage = grab_cache_page_nowait(inode->i_mapping, index);
 	if (vmpage) {
 		/* Check if vmpage was truncated or reclaimed */
-		if (vmpage->mapping == mapping) {
+		if (vmpage->mapping == inode->i_mapping) {
 			page = cl_page_find(env, clob, vmpage->index,
 					    vmpage, CPT_CACHEABLE);
 			if (!IS_ERR(page)) {
 				rc = cl_read_ahead_page(env, io, queue,
-							page, clob);
+							page, clob, max_index);
 				if (rc == -ENOLCK) {
 					which = RA_STAT_FAILED_MATCH;
 					msg   = "lock match failed";
@@ -370,7 +379,7 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io,
 		msg   = "g_c_p_n failed";
 	}
 	if (msg) {
-		ll_ra_stats_inc(mapping, which);
+		ll_ra_stats_inc(inode, which);
 		CDEBUG(D_READA, "%s\n", msg);
 	}
 	return rc;
@@ -482,11 +491,12 @@ static int ll_read_ahead_pages(const struct lu_env *env,
 			       struct cl_io *io, struct cl_page_list *queue,
 			       struct ra_io_arg *ria,
 			       unsigned long *reserved_pages,
-			       struct address_space *mapping,
 			       unsigned long *ra_end)
 {
-	int rc, count = 0, stride_ria;
-	unsigned long page_idx;
+	int rc, count = 0;
+	bool stride_ria;
+	pgoff_t page_idx;
+	pgoff_t max_index = 0;
 
 	LASSERT(ria);
 	RIA_DEBUG(ria);
@@ -497,7 +507,7 @@ static int ll_read_ahead_pages(const struct lu_env *env,
 		if (ras_inside_ra_window(page_idx, ria)) {
 			/* If the page is inside the read-ahead window*/
 			rc = ll_read_ahead_page(env, io, queue,
-						page_idx, mapping);
+						page_idx, &max_index);
 			if (rc == 1) {
 				(*reserved_pages)--;
 				count++;
@@ -532,25 +542,23 @@ static int ll_read_ahead_pages(const struct lu_env *env,
 }
 
 int ll_readahead(const struct lu_env *env, struct cl_io *io,
-		 struct ll_readahead_state *ras, struct address_space *mapping,
-		 struct cl_page_list *queue, int flags)
+		 struct cl_page_list *queue, struct ll_readahead_state *ras,
+		 bool hit)
 {
 	struct vvp_io *vio = vvp_env_io(env);
 	struct vvp_thread_info *vti = vvp_env_info(env);
 	struct cl_attr *attr = ccc_env_thread_attr(env);
 	unsigned long start = 0, end = 0, reserved;
-	unsigned long ra_end, len;
+	unsigned long ra_end, len, mlen = 0;
 	struct inode *inode;
 	struct ll_ra_read *bead;
 	struct ra_io_arg *ria = &vti->vti_ria;
-	struct ll_inode_info *lli;
 	struct cl_object *clob;
 	int ret = 0;
 	__u64 kms;
 
-	inode = mapping->host;
-	lli = ll_i2info(inode);
-	clob = lli->lli_clob;
+	clob = io->ci_obj;
+	inode = ccc_object_inode(clob);
 
 	memset(ria, 0, sizeof(*ria));
 
@@ -562,7 +570,7 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
 		return ret;
 	kms = attr->cat_kms;
 	if (kms == 0) {
-		ll_ra_stats_inc(mapping, RA_STAT_ZERO_LEN);
+		ll_ra_stats_inc(inode, RA_STAT_ZERO_LEN);
 		return 0;
 	}
 
@@ -621,29 +629,48 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
 	spin_unlock(&ras->ras_lock);
 
 	if (end == 0) {
-		ll_ra_stats_inc(mapping, RA_STAT_ZERO_WINDOW);
+		ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
 		return 0;
 	}
 	len = ria_page_count(ria);
-	if (len == 0)
+	if (len == 0) {
+		ll_ra_stats_inc(inode, RA_STAT_ZERO_WINDOW);
 		return 0;
+	}
+
+	CDEBUG(D_READA, DFID ": ria: %lu/%lu, bead: %lu/%lu, hit: %d\n",
+	       PFID(lu_object_fid(&clob->co_lu)),
+	       ria->ria_start, ria->ria_end,
+	       !bead ? 0 : bead->lrr_start,
+	       !bead ? 0 : bead->lrr_count,
+	       hit);
+
+	/* at least to extend the readahead window to cover current read */
+	if (!hit && bead &&
+	    bead->lrr_start + bead->lrr_count > ria->ria_start) {
+		/* to the end of current read window. */
+		mlen = bead->lrr_start + bead->lrr_count - ria->ria_start;
+		/* trim to RPC boundary */
+		start = ria->ria_start & (PTLRPC_MAX_BRW_PAGES - 1);
+		mlen = min(mlen, PTLRPC_MAX_BRW_PAGES - start);
+	}
 
-	reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len);
+	reserved = ll_ra_count_get(ll_i2sbi(inode), ria, len, mlen);
 	if (reserved < len)
-		ll_ra_stats_inc(mapping, RA_STAT_MAX_IN_FLIGHT);
+		ll_ra_stats_inc(inode, RA_STAT_MAX_IN_FLIGHT);
 
-	CDEBUG(D_READA, "reserved page %lu ra_cur %d ra_max %lu\n", reserved,
+	CDEBUG(D_READA, "reserved pages %lu/%lu/%lu, ra_cur %d, ra_max %lu\n",
+	       reserved, len, mlen,
 	       atomic_read(&ll_i2sbi(inode)->ll_ra_info.ra_cur_pages),
 	       ll_i2sbi(inode)->ll_ra_info.ra_max_pages);
 
-	ret = ll_read_ahead_pages(env, io, queue,
-				  ria, &reserved, mapping, &ra_end);
+	ret = ll_read_ahead_pages(env, io, queue, ria, &reserved, &ra_end);
 
 	if (reserved != 0)
 		ll_ra_count_put(ll_i2sbi(inode), reserved);
 
 	if (ra_end == end + 1 && ra_end == (kms >> PAGE_CACHE_SHIFT))
-		ll_ra_stats_inc(mapping, RA_STAT_EOF);
+		ll_ra_stats_inc(inode, RA_STAT_EOF);
 
 	/* if we didn't get to the end of the region we reserved from
 	 * the ras we need to go back and update the ras so that the
@@ -655,6 +682,7 @@ int ll_readahead(const struct lu_env *env, struct cl_io *io,
 	       ra_end, end, ria->ria_end);
 
 	if (ra_end != end + 1) {
+		ll_ra_stats_inc(inode, RA_STAT_FAILED_REACH_END);
 		spin_lock(&ras->ras_lock);
 		if (ra_end < ras->ras_next_readahead &&
 		    index_in_window(ra_end, ras->ras_window_start, 0,
@@ -925,15 +953,18 @@ void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 	ras->ras_last_readpage = index;
 	ras_set_start(inode, ras, index);
 
-	if (stride_io_mode(ras))
+	if (stride_io_mode(ras)) {
 		/* Since stride readahead is sensitive to the offset
 		 * of read-ahead, so we use original offset here,
 		 * instead of ras_window_start, which is RPC aligned
 		 */
 		ras->ras_next_readahead = max(index, ras->ras_next_readahead);
-	else
-		ras->ras_next_readahead = max(ras->ras_window_start,
-					      ras->ras_next_readahead);
+	} else {
+		if (ras->ras_next_readahead < ras->ras_window_start)
+			ras->ras_next_readahead = ras->ras_window_start;
+		if (!hit)
+			ras->ras_next_readahead = index + 1;
+	}
 	RAS_CDEBUG(ras);
 
 	/* Trigger RA in the mmap case where ras_consecutive_requests
author	Jinshan Xiong <jinshan.xiong@intel.com>	2016-03-30 19:48:33 -0400
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2016-03-30 21:38:13 -0700
commit	fd7444fecaa0c4516d68fdbedf570b8bded60bc1 (patch)
tree	f4ca032ddc44e47fd8498270d7ac668b8d6c8d81 /drivers/staging/lustre/lustre/llite/rw.c
parent	7addf402c1171e875109bb1567171c4c3f8f8229 (diff)
download	op-kernel-dev-fd7444fecaa0c4516d68fdbedf570b8bded60bc1.zip op-kernel-dev-fd7444fecaa0c4516d68fdbedf570b8bded60bc1.tar.gz