From 2dd15654ac0abe587a245a09a7823bbbd588bfb7 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Tue, 11 Oct 2016 22:57:00 +0800
Subject: f2fs: fix to release discard entries during checkpoint

In f2fs_fill_super, if there is any IO error occurs during recovery,
cached discard entries will be leaked, in order to avoid this, make
write_checkpoint() handle memory release by itself, besides, move
clear_prefree_segments to write_checkpoint for readability.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 6132b4c..bfa4414 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -738,7 +738,6 @@ static void f2fs_put_super(struct super_block *sb)
 	 * In addition, EIO will skip do checkpoint, we need this as well.
 	 */
 	release_ino_entry(sbi, true);
-	release_discard_addrs(sbi);
 
 	f2fs_leave_shrinker(sbi);
 	mutex_unlock(&sbi->umount_mutex);
-- 
cgit v1.1


From 7c45729a4d6d1c90879e6c5c2df325c2f6db7191 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 14 Oct 2016 11:51:23 -0700
Subject: f2fs: keep dirty inodes selectively for checkpoint

This is to avoid no free segment bug during checkpoint caused by a number of
dirty inodes.

The case was reported by Chao like this.
1. mount with lazytime option
2. fill 4k file until disk is full
3. sync filesystem
4. read all files in the image
5. umount

In this case, we actually don't need to flush dirty inode to inode page during
checkpoint.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index bfa4414..f8e2f31 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -620,24 +620,25 @@ static int f2fs_drop_inode(struct inode *inode)
 	return generic_drop_inode(inode);
 }
 
-int f2fs_inode_dirtied(struct inode *inode)
+int f2fs_inode_dirtied(struct inode *inode, bool sync)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	int ret = 0;
 
 	spin_lock(&sbi->inode_lock[DIRTY_META]);
 	if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
-		spin_unlock(&sbi->inode_lock[DIRTY_META]);
-		return 1;
+		ret = 1;
+	} else {
+		set_inode_flag(inode, FI_DIRTY_INODE);
+		stat_inc_dirty_inode(sbi, DIRTY_META);
 	}
-
-	set_inode_flag(inode, FI_DIRTY_INODE);
-	list_add_tail(&F2FS_I(inode)->gdirty_list,
+	if (sync && list_empty(&F2FS_I(inode)->gdirty_list)) {
+		list_add_tail(&F2FS_I(inode)->gdirty_list,
 				&sbi->inode_list[DIRTY_META]);
-	inc_page_count(sbi, F2FS_DIRTY_IMETA);
-	stat_inc_dirty_inode(sbi, DIRTY_META);
+		inc_page_count(sbi, F2FS_DIRTY_IMETA);
+	}
 	spin_unlock(&sbi->inode_lock[DIRTY_META]);
-
-	return 0;
+	return ret;
 }
 
 void f2fs_inode_synced(struct inode *inode)
@@ -649,10 +650,12 @@ void f2fs_inode_synced(struct inode *inode)
 		spin_unlock(&sbi->inode_lock[DIRTY_META]);
 		return;
 	}
-	list_del_init(&F2FS_I(inode)->gdirty_list);
+	if (!list_empty(&F2FS_I(inode)->gdirty_list)) {
+		list_del_init(&F2FS_I(inode)->gdirty_list);
+		dec_page_count(sbi, F2FS_DIRTY_IMETA);
+	}
 	clear_inode_flag(inode, FI_DIRTY_INODE);
 	clear_inode_flag(inode, FI_AUTO_RECOVER);
-	dec_page_count(sbi, F2FS_DIRTY_IMETA);
 	stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META);
 	spin_unlock(&sbi->inode_lock[DIRTY_META]);
 }
@@ -676,7 +679,7 @@ static void f2fs_dirty_inode(struct inode *inode, int flags)
 	if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
 		clear_inode_flag(inode, FI_AUTO_RECOVER);
 
-	f2fs_inode_dirtied(inode);
+	f2fs_inode_dirtied(inode, false);
 }
 
 static void f2fs_i_callback(struct rcu_head *head)
-- 
cgit v1.1


From 35782b233f37e48ecc469d9c7232f3f6a7fad41a Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 20 Oct 2016 19:09:57 -0700
Subject: f2fs: remove percpu_count due to performance regression

This patch removes percpu_count usage due to performance regression in iozone.

Fixes: 523be8a6b3 ("f2fs: use percpu_counter for page counters")
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f8e2f31..9749758 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -696,10 +696,6 @@ static void f2fs_destroy_inode(struct inode *inode)
 
 static void destroy_percpu_info(struct f2fs_sb_info *sbi)
 {
-	int i;
-
-	for (i = 0; i < NR_COUNT_TYPE; i++)
-		percpu_counter_destroy(&sbi->nr_pages[i]);
 	percpu_counter_destroy(&sbi->alloc_valid_block_count);
 	percpu_counter_destroy(&sbi->total_valid_inode_count);
 }
@@ -1449,6 +1445,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
 static void init_sb_info(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *raw_super = sbi->raw_super;
+	int i;
 
 	sbi->log_sectors_per_block =
 		le32_to_cpu(raw_super->log_sectors_per_block);
@@ -1473,6 +1470,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 	sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
 	clear_sbi_flag(sbi, SBI_NEED_FSCK);
 
+	for (i = 0; i < NR_COUNT_TYPE; i++)
+		atomic_set(&sbi->nr_pages[i], 0);
+
 	INIT_LIST_HEAD(&sbi->s_list);
 	mutex_init(&sbi->umount_mutex);
 	mutex_init(&sbi->wio_mutex[NODE]);
@@ -1488,13 +1488,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 
 static int init_percpu_info(struct f2fs_sb_info *sbi)
 {
-	int i, err;
-
-	for (i = 0; i < NR_COUNT_TYPE; i++) {
-		err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
-		if (err)
-			return err;
-	}
+	int err;
 
 	err = percpu_counter_init(&sbi->alloc_valid_block_count, 0, GFP_KERNEL);
 	if (err)
-- 
cgit v1.1


From 099228000eff6b25e0f76b276043cd65cd4eba5a Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 31 Oct 2016 14:01:41 -0700
Subject: f2fs: avoid infinite loop in the EIO case on recover_orphan_inodes

This patch should fix an infinite loop case below.

F2FS-fs : inject IO error in f2fs_read_end_io+0xf3/0x120 [f2fs]
F2FS-fs (nvme0n1p1): recover_orphan_inode: orphan failed (ino=39ac1a), run fsck to fix.
...
[<ffffffffc0b11ede>] sync_meta_pages+0xae/0x270 [f2fs]
[<ffffffffc0b288dd>] ? flush_sit_entries+0x8d/0x960 [f2fs]
[<ffffffffc0b13801>] write_checkpoint+0x361/0xf20 [f2fs]
[<ffffffffb40e979d>] ? trace_hardirqs_on+0xd/0x10
[<ffffffffc0b0a199>] ? f2fs_sync_fs+0x79/0x190 [f2fs]
[<ffffffffc0b0a1a5>] f2fs_sync_fs+0x85/0x190 [f2fs]
[<ffffffffc0b2560e>] f2fs_balance_fs_bg+0x7e/0x1c0 [f2fs]
[<ffffffffc0b216c4>] f2fs_write_node_pages+0x34/0x320 [f2fs]
[<ffffffffb41dff21>] do_writepages+0x21/0x30
[<ffffffffb429edb1>] __writeback_single_inode+0x61/0x760
[<ffffffffb490a937>] ? _raw_spin_unlock+0x27/0x40
[<ffffffffb42a0805>] writeback_single_inode+0xd5/0x190
[<ffffffffb42a0959>] write_inode_now+0x99/0xc0
[<ffffffffb4289a16>] iput+0x1f6/0x2c0
[<ffffffffc0b0e3be>] f2fs_fill_super+0xe0e/0x1300 [f2fs]
[<ffffffffb426c394>] ? sget_userns+0x4f4/0x530
[<ffffffffb426c692>] mount_bdev+0x182/0x1b0
[<ffffffffc0b0d5b0>] ? f2fs_commit_super+0x100/0x100 [f2fs]
[<ffffffffc0b0a375>] f2fs_mount+0x15/0x20 [f2fs]
[<ffffffffb426d038>] mount_fs+0x38/0x170
[<ffffffffb428ec9b>] vfs_kern_mount+0x6b/0x160
[<ffffffffb4291d9e>] do_mount+0x1be/0xd60
[<ffffffffb4291a57>] ? copy_mount_options+0xb7/0x220
[<ffffffffb4292c54>] SyS_mount+0x94/0xd0
[<ffffffffb490b345>] entry_SYSCALL_64_fastpath+0x23/0xc6

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 9749758..72603f1 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1889,6 +1889,13 @@ free_node_inode:
 	mutex_lock(&sbi->umount_mutex);
 	release_ino_entry(sbi, true);
 	f2fs_leave_shrinker(sbi);
+	/*
+	 * Some dirty meta pages can be produced by recover_orphan_inodes()
+	 * failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
+	 * followed by write_checkpoint() through f2fs_write_node_pages(), which
+	 * falls into an infinite loop in sync_meta_pages().
+	 */
+	truncate_inode_pages_final(META_MAPPING(sbi));
 	iput(sbi->node_inode);
 	mutex_unlock(&sbi->umount_mutex);
 free_nm:
-- 
cgit v1.1


From 487df616dec33231c99294b906d720d256a2de16 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 28 Oct 2016 17:44:59 +0900
Subject: f2fs: Add missing break in switch-case

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 72603f1..82af510 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -420,6 +420,7 @@ static int parse_options(struct super_block *sb, char *options)
 			break;
 		case Opt_nodiscard:
 			clear_opt(sbi, DISCARD);
+			break;
 		case Opt_noheap:
 			set_opt(sbi, NOHEAP);
 			break;
-- 
cgit v1.1


From 0bfd7a091c19132489a0f977b8dbf9f6b5ae0a1c Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 28 Oct 2016 17:45:00 +0900
Subject: f2fs: Use generic zoned block device terminology

SMR stands for "Shingled Magnetic Recording" which makes sense
only for hard disk drives (spinning rust). The ZBC/ZAC standards
enable management of SMR disks, but solid state drives may also
support those standards. So rename the HMSMR feature to BLKZONED
to avoid a HDD centric terminology. For the same reason, rename
f2fs_sb_mounted_hmsmr to f2fs_sb_mounted_blkzoned.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 82af510..cf54fd8 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -973,7 +973,7 @@ static void default_options(struct f2fs_sb_info *sbi)
 	set_opt(sbi, EXTENT_CACHE);
 	sbi->sb->s_flags |= MS_LAZYTIME;
 	set_opt(sbi, FLUSH_MERGE);
-	if (f2fs_sb_mounted_hmsmr(sbi->sb)) {
+	if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
 		set_opt_mode(sbi, F2FS_MOUNT_LFS);
 		set_opt(sbi, DISCARD);
 	} else {
-- 
cgit v1.1


From d1b959c8770260b611b9a1f0c5e8b12b7cb5b9d2 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 28 Oct 2016 17:45:01 +0900
Subject: f2fs: Check zoned block feature for host-managed zoned block devices

The F2FS_FEATURE_BLKZONED feature indicates that the drive was formatted
 with zone alignment optimization. This is optional for host-aware
devices, but mandatory for host-managed zoned block devices.
So check that the feature is set in this latter case.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index cf54fd8..ea3752f 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1638,6 +1638,26 @@ try_onemore:
 	sb->s_fs_info = sbi;
 	sbi->raw_super = raw_super;
 
+	/*
+	 * The BLKZONED feature indicates that the drive was formatted with
+	 * zone alignment optimization. This is optional for host-aware
+	 * devices, but mandatory for host-managed zoned block devices.
+	 */
+#ifndef CONFIG_BLK_DEV_ZONED
+	if (f2fs_sb_mounted_blkzoned(sb)) {
+		f2fs_msg(sb, KERN_ERR,
+			 "Zoned block device support is not enabled\n");
+		goto free_sb_buf;
+	}
+#else
+	if (bdev_zoned_model(sb->s_bdev) == BLK_ZONED_HM &&
+	    !f2fs_sb_mounted_blkzoned(sb)) {
+		f2fs_msg(sb, KERN_ERR,
+			 "Zoned block device feature not enabled\n");
+		goto free_sb_buf;
+	}
+#endif
+
 	default_options(sbi);
 	/* parse mount options */
 	options = kstrdup((const char *)data, GFP_KERNEL);
-- 
cgit v1.1


From 0ab0299835738cd407569401da1fef4c97b4419c Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 28 Oct 2016 17:45:02 +0900
Subject: f2fs: Suppress discard warning message for zoned block devices

For zoned block devices, discard is replaced by zone reset. So
do not warn if the device does not supports discard.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ea3752f..56b0d2a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -412,7 +412,7 @@ static int parse_options(struct super_block *sb, char *options)
 			q = bdev_get_queue(sb->s_bdev);
 			if (blk_queue_discard(q)) {
 				set_opt(sbi, DISCARD);
-			} else {
+			} else if (!f2fs_sb_mounted_blkzoned(sb)) {
 				f2fs_msg(sb, KERN_WARNING,
 					"mounting with \"discard\" option, but "
 					"the device does not support discard");
-- 
cgit v1.1


From 96ba2decb4241aa2c6b61cfc8489d648769eff99 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 28 Oct 2016 17:45:03 +0900
Subject: f2fs: Always enable discard for zoned blocks devices

Zone write pointer reset acts as discard for zoned block
devices. So if the zoned block device feature is enabled,
always declare that discard is enabled, even if the device
does not actually support the command.
For the same reason, prevent the use the "nodicard" mount
option.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 56b0d2a..7bb0b36 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -419,6 +419,11 @@ static int parse_options(struct super_block *sb, char *options)
 			}
 			break;
 		case Opt_nodiscard:
+			if (f2fs_sb_mounted_blkzoned(sb)) {
+				f2fs_msg(sb, KERN_WARNING,
+					"discard is required for zoned block devices");
+				return -EINVAL;
+			}
 			clear_opt(sbi, DISCARD);
 			break;
 		case Opt_noheap:
-- 
cgit v1.1


From 3adc57e97792e4ac9f228bde802829e2e9840afe Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 28 Oct 2016 17:45:04 +0900
Subject: f2fs: Do not allow adaptive mode for host-managed zoned block devices

The LFS mode is mandatory for host-managed zoned block devices as
update in place optimizations are not possible for segments in
sequential zones.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7bb0b36..3312284 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -518,6 +518,13 @@ static int parse_options(struct super_block *sb, char *options)
 				return -ENOMEM;
 			if (strlen(name) == 8 &&
 					!strncmp(name, "adaptive", 8)) {
+				if (f2fs_sb_mounted_blkzoned(sb)) {
+					f2fs_msg(sb, KERN_WARNING,
+						 "adaptive mode is not allowed with "
+						 "zoned block device feature");
+					kfree(name);
+					return -EINVAL;
+				}
 				set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
 			} else if (strlen(name) == 3 &&
 					!strncmp(name, "lfs", 3)) {
-- 
cgit v1.1


From 178053e2f1f9ccdb61ff6c2bd8644b53fc98e72e Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 28 Oct 2016 17:45:05 +0900
Subject: f2fs: Cache zoned block devices zone type

With the zoned block device feature enabled, section discard
need to do a zone reset for sections contained in sequential
zones, and a regular discard (if supported) for sections
stored in conventional zones. Avoid the need for a costly
report zones to obtain a section zone type when discarding it
by caching the types of the device zones in the super block
information. This cache is initialized at mount time for mounts
with the zoned block device feature enabled.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3312284..12a4f3f 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1511,6 +1511,65 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
 								GFP_KERNEL);
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
+static int init_blkz_info(struct f2fs_sb_info *sbi)
+{
+	struct block_device *bdev = sbi->sb->s_bdev;
+	sector_t nr_sectors = bdev->bd_part->nr_sects;
+	sector_t sector = 0;
+	struct blk_zone *zones;
+	unsigned int i, nr_zones;
+	unsigned int n = 0;
+	int err = -EIO;
+
+	if (!f2fs_sb_mounted_blkzoned(sbi->sb))
+		return 0;
+
+	sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_size(bdev));
+	sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
+	sbi->nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
+		sbi->log_blocks_per_blkz;
+	if (nr_sectors & (bdev_zone_size(bdev) - 1))
+		sbi->nr_blkz++;
+
+	sbi->blkz_type = kmalloc(sbi->nr_blkz, GFP_KERNEL);
+	if (!sbi->blkz_type)
+		return -ENOMEM;
+
+#define F2FS_REPORT_NR_ZONES   4096
+
+	zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone),
+			GFP_KERNEL);
+	if (!zones)
+		return -ENOMEM;
+
+	/* Get block zones type */
+	while (zones && sector < nr_sectors) {
+
+		nr_zones = F2FS_REPORT_NR_ZONES;
+		err = blkdev_report_zones(bdev, sector,
+					  zones, &nr_zones,
+					  GFP_KERNEL);
+		if (err)
+			break;
+		if (!nr_zones) {
+			err = -EIO;
+			break;
+		}
+
+		for (i = 0; i < nr_zones; i++) {
+			sbi->blkz_type[n] = zones[i].type;
+			sector += zones[i].len;
+			n++;
+		}
+	}
+
+	kfree(zones);
+
+	return err;
+}
+#endif
+
 /*
  * Read f2fs raw super block.
  * Because we have two copies of super block, so read both of them
@@ -1757,6 +1816,15 @@ try_onemore:
 
 	init_ino_entry_info(sbi);
 
+#ifdef CONFIG_BLK_DEV_ZONED
+	err = init_blkz_info(sbi);
+	if (err) {
+		f2fs_msg(sb, KERN_ERR,
+			"Failed to initialize F2FS blkzone information");
+		goto free_blkz;
+	}
+#endif
+
 	/* setup f2fs internal modules */
 	err = build_segment_manager(sbi);
 	if (err) {
@@ -1935,6 +2003,10 @@ free_nm:
 	destroy_node_manager(sbi);
 free_sm:
 	destroy_segment_manager(sbi);
+#ifdef CONFIG_BLK_DEV_ZONED
+free_blkz:
+	kfree(sbi->blkz_type);
+#endif
 	kfree(sbi->ckpt);
 free_meta_inode:
 	make_bad_inode(sbi->meta_inode);
-- 
cgit v1.1


From b4b9d34c855ef383402cd1acefb1e33a515ae5f5 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 4 Nov 2016 14:59:15 -0700
Subject: f2fs: remove checkpoint in f2fs_freeze

The generic freeze_super() calls sync_filesystems() before f2fs_freeze().
So, basically we don't need to do checkpoint in f2fs_freeze(). But, in xfs/068,
it triggers circular locking problem below due to gc_mutex for checkpoint.

======================================================
[ INFO: possible circular locking dependency detected ]
4.9.0-rc1+ #132 Tainted: G           OE
-------------------------------------------------------

1. wait for __sb_start_write() by

 [<ffffffff9845f353>] dump_stack+0x85/0xc2
 [<ffffffff980e80bf>] print_circular_bug+0x1cf/0x230
 [<ffffffff980eb4d0>] __lock_acquire+0x19e0/0x1bc0
 [<ffffffff980ebdcb>] lock_acquire+0x11b/0x220
 [<ffffffffc08c7c3b>] ? f2fs_drop_inode+0x9b/0x160 [f2fs]
 [<ffffffff9826bdd0>] __sb_start_write+0x130/0x200
 [<ffffffffc08c7c3b>] ? f2fs_drop_inode+0x9b/0x160 [f2fs]
 [<ffffffffc08c7c3b>] f2fs_drop_inode+0x9b/0x160 [f2fs]
 [<ffffffff98289991>] iput+0x171/0x2c0
 [<ffffffffc08cfccf>] f2fs_sync_inode_meta+0x3f/0xf0 [f2fs]
 [<ffffffffc08cfe04>] block_operations+0x84/0x110 [f2fs]
 [<ffffffffc08cff78>] write_checkpoint+0xe8/0xf20 [f2fs]
 [<ffffffff980e979d>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffffc08c6de9>] ? f2fs_sync_fs+0x79/0x190 [f2fs]
 [<ffffffff9803e9d9>] ? sched_clock+0x9/0x10
 [<ffffffffc08c6de9>] ? f2fs_sync_fs+0x79/0x190 [f2fs]
 [<ffffffffc08c6df5>] f2fs_sync_fs+0x85/0x190 [f2fs]
 [<ffffffff982a4f90>] ? do_fsync+0x70/0x70
 [<ffffffff982a4f90>] ? do_fsync+0x70/0x70
 [<ffffffff982a4fb0>] sync_fs_one_sb+0x20/0x30
 [<ffffffff9826ca3e>] iterate_supers+0xae/0x100
 [<ffffffff982a50b5>] sys_sync+0x55/0x90
 [<ffffffff9890b345>] entry_SYSCALL_64_fastpath+0x23/0xc6

2. wait for sbi->gc_mutex by

 [<ffffffff980ebdcb>] lock_acquire+0x11b/0x220
 [<ffffffff989063d6>] mutex_lock_nested+0x76/0x3f0
 [<ffffffffc08c6de9>] f2fs_sync_fs+0x79/0x190 [f2fs]
 [<ffffffffc08c7a6c>] f2fs_freeze+0x1c/0x20 [f2fs]
 [<ffffffff9826b6ef>] freeze_super+0xcf/0x190
 [<ffffffff9827eebc>] do_vfs_ioctl+0x53c/0x6a0
 [<ffffffff9827f099>] SyS_ioctl+0x79/0x90
 [<ffffffff9890b345>] entry_SYSCALL_64_fastpath+0x23/0xc6

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 12a4f3f..eca9aea 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -800,13 +800,17 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
 
 static int f2fs_freeze(struct super_block *sb)
 {
-	int err;
-
 	if (f2fs_readonly(sb))
 		return 0;
 
-	err = f2fs_sync_fs(sb, 1);
-	return err;
+	/* IO error happened before */
+	if (unlikely(f2fs_cp_error(F2FS_SB(sb))))
+		return -EIO;
+
+	/* must be clean, since sync_filesystem() was already called */
+	if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
+		return -EINVAL;
+	return 0;
 }
 
 static int f2fs_unfreeze(struct super_block *sb)
@@ -2152,3 +2156,4 @@ module_exit(exit_f2fs_fs)
 MODULE_AUTHOR("Samsung Electronics's Praesto Team");
 MODULE_DESCRIPTION("Flash Friendly File System");
 MODULE_LICENSE("GPL");
+
-- 
cgit v1.1


From 3c62be17d4f562f43fe1d03b48194399caa35aa5 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 6 Oct 2016 19:02:05 -0700
Subject: f2fs: support multiple devices

This patch implements multiple devices support for f2fs.
Given multiple devices by mkfs.f2fs, f2fs shows them entirely as one big
volume under one f2fs instance.

Internal block management is very simple, but we will modify block allocation
and background GC policy to boost IO speed by exploiting them accoording to
each device speed.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 138 ++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 109 insertions(+), 29 deletions(-)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index eca9aea..4ccbb86 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -713,6 +713,19 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi)
 	percpu_counter_destroy(&sbi->total_valid_inode_count);
 }
 
+static void destroy_device_list(struct f2fs_sb_info *sbi)
+{
+	int i;
+
+	for (i = 0; i < sbi->s_ndevs; i++) {
+		blkdev_put(FDEV(i).bdev, FMODE_EXCL);
+#ifdef CONFIG_BLK_DEV_ZONED
+		kfree(FDEV(i).blkz_type);
+#endif
+	}
+	kfree(sbi->devs);
+}
+
 static void f2fs_put_super(struct super_block *sb)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -773,6 +786,8 @@ static void f2fs_put_super(struct super_block *sb)
 		crypto_free_shash(sbi->s_chksum_driver);
 	kfree(sbi->raw_super);
 
+	destroy_device_list(sbi);
+
 	destroy_percpu_info(sbi);
 	kfree(sbi);
 }
@@ -1516,9 +1531,9 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
-static int init_blkz_info(struct f2fs_sb_info *sbi)
+static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
 {
-	struct block_device *bdev = sbi->sb->s_bdev;
+	struct block_device *bdev = FDEV(devi).bdev;
 	sector_t nr_sectors = bdev->bd_part->nr_sects;
 	sector_t sector = 0;
 	struct blk_zone *zones;
@@ -1529,15 +1544,21 @@ static int init_blkz_info(struct f2fs_sb_info *sbi)
 	if (!f2fs_sb_mounted_blkzoned(sbi->sb))
 		return 0;
 
+	if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
+				SECTOR_TO_BLOCK(bdev_zone_size(bdev)))
+		return -EINVAL;
 	sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_size(bdev));
+	if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
+				__ilog2_u32(sbi->blocks_per_blkz))
+		return -EINVAL;
 	sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
-	sbi->nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
-		sbi->log_blocks_per_blkz;
+	FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
+					sbi->log_blocks_per_blkz;
 	if (nr_sectors & (bdev_zone_size(bdev) - 1))
-		sbi->nr_blkz++;
+		FDEV(devi).nr_blkz++;
 
-	sbi->blkz_type = kmalloc(sbi->nr_blkz, GFP_KERNEL);
-	if (!sbi->blkz_type)
+	FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
+	if (!FDEV(devi).blkz_type)
 		return -ENOMEM;
 
 #define F2FS_REPORT_NR_ZONES   4096
@@ -1562,7 +1583,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi)
 		}
 
 		for (i = 0; i < nr_zones; i++) {
-			sbi->blkz_type[n] = zones[i].type;
+			FDEV(devi).blkz_type[n] = zones[i].type;
 			sector += zones[i].len;
 			n++;
 		}
@@ -1666,6 +1687,77 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
 	return err;
 }
 
+static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
+{
+	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+	int i;
+
+	for (i = 0; i < MAX_DEVICES; i++) {
+		if (!RDEV(i).path[0])
+			return 0;
+
+		if (i == 0) {
+			sbi->devs = kzalloc(sizeof(struct f2fs_dev_info) *
+						MAX_DEVICES, GFP_KERNEL);
+			if (!sbi->devs)
+				return -ENOMEM;
+		}
+
+		memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
+		FDEV(i).total_segments = le32_to_cpu(RDEV(i).total_segments);
+		if (i == 0) {
+			FDEV(i).start_blk = 0;
+			FDEV(i).end_blk = FDEV(i).start_blk +
+				(FDEV(i).total_segments <<
+				sbi->log_blocks_per_seg) - 1 +
+				le32_to_cpu(raw_super->segment0_blkaddr);
+		} else {
+			FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
+			FDEV(i).end_blk = FDEV(i).start_blk +
+				(FDEV(i).total_segments <<
+				sbi->log_blocks_per_seg) - 1;
+		}
+
+		FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
+					sbi->sb->s_mode, sbi->sb->s_type);
+		if (IS_ERR(FDEV(i).bdev))
+			return PTR_ERR(FDEV(i).bdev);
+
+		/* to release errored devices */
+		sbi->s_ndevs = i + 1;
+
+#ifdef CONFIG_BLK_DEV_ZONED
+		if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
+				!f2fs_sb_mounted_blkzoned(sbi->sb)) {
+			f2fs_msg(sbi->sb, KERN_ERR,
+				"Zoned block device feature not enabled\n");
+			return -EINVAL;
+		}
+		if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
+			if (init_blkz_info(sbi, i)) {
+				f2fs_msg(sbi->sb, KERN_ERR,
+					"Failed to initialize F2FS blkzone information");
+				return -EINVAL;
+			}
+			f2fs_msg(sbi->sb, KERN_INFO,
+				"Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
+				i, FDEV(i).path,
+				FDEV(i).total_segments,
+				FDEV(i).start_blk, FDEV(i).end_blk,
+				bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
+				"Host-aware" : "Host-managed");
+			continue;
+		}
+#endif
+		f2fs_msg(sbi->sb, KERN_INFO,
+			"Mount Device [%2d]: %20s, %8u, %8x - %8x",
+				i, FDEV(i).path,
+				FDEV(i).total_segments,
+				FDEV(i).start_blk, FDEV(i).end_blk);
+	}
+	return 0;
+}
+
 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct f2fs_sb_info *sbi;
@@ -1724,15 +1816,7 @@ try_onemore:
 			 "Zoned block device support is not enabled\n");
 		goto free_sb_buf;
 	}
-#else
-	if (bdev_zoned_model(sb->s_bdev) == BLK_ZONED_HM &&
-	    !f2fs_sb_mounted_blkzoned(sb)) {
-		f2fs_msg(sb, KERN_ERR,
-			 "Zoned block device feature not enabled\n");
-		goto free_sb_buf;
-	}
 #endif
-
 	default_options(sbi);
 	/* parse mount options */
 	options = kstrdup((const char *)data, GFP_KERNEL);
@@ -1802,6 +1886,13 @@ try_onemore:
 		goto free_meta_inode;
 	}
 
+	/* Initialize device list */
+	err = f2fs_scan_devices(sbi);
+	if (err) {
+		f2fs_msg(sb, KERN_ERR, "Failed to find devices");
+		goto free_devices;
+	}
+
 	sbi->total_valid_node_count =
 				le32_to_cpu(sbi->ckpt->valid_node_count);
 	percpu_counter_set(&sbi->total_valid_inode_count,
@@ -1820,15 +1911,6 @@ try_onemore:
 
 	init_ino_entry_info(sbi);
 
-#ifdef CONFIG_BLK_DEV_ZONED
-	err = init_blkz_info(sbi);
-	if (err) {
-		f2fs_msg(sb, KERN_ERR,
-			"Failed to initialize F2FS blkzone information");
-		goto free_blkz;
-	}
-#endif
-
 	/* setup f2fs internal modules */
 	err = build_segment_manager(sbi);
 	if (err) {
@@ -2007,10 +2089,8 @@ free_nm:
 	destroy_node_manager(sbi);
 free_sm:
 	destroy_segment_manager(sbi);
-#ifdef CONFIG_BLK_DEV_ZONED
-free_blkz:
-	kfree(sbi->blkz_type);
-#endif
+free_devices:
+	destroy_device_list(sbi);
 	kfree(sbi->ckpt);
 free_meta_inode:
 	make_bad_inode(sbi->meta_inode);
-- 
cgit v1.1


From b08b12d2ddc85b977a0531470cf6a7158289aaaf Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Fri, 18 Nov 2016 22:27:41 +0800
Subject: f2fs: fix incorrect free inode count in ->statfs

While calculating inode count that we can create at most in the left space,
we should consider space which data/node blocks occupied, since we create
data/node mixly in main area. So fix the wrong calculation in ->statfs.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 4ccbb86..2852a0b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -852,7 +852,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_bavail = user_block_count - valid_user_blocks(sbi);
 
 	buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
-	buf->f_ffree = buf->f_files - valid_inode_count(sbi);
+	buf->f_ffree = min(buf->f_files - valid_node_count(sbi),
+							buf->f_bavail);
 
 	buf->f_namelen = F2FS_NAME_LEN;
 	buf->f_fsid.val[0] = (u32)id;
-- 
cgit v1.1


From 204706c7accfabb67b97eef9f9a28361b6201199 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 2 Dec 2016 15:11:32 -0800
Subject: Revert "f2fs: use percpu_counter for # of dirty pages in inode"

This reverts commit 1beba1b3a953107c3ff5448ab4e4297db4619c76.

The perpcu_counter doesn't provide atomicity in single core and consume more
DRAM. That incurs fs_mark test failure due to ENOMEM.

Cc: stable@vger.kernel.org # 4.7+
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 2852a0b..7591d2d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -571,13 +571,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 
 	init_once((void *) fi);
 
-	if (percpu_counter_init(&fi->dirty_pages, 0, GFP_NOFS)) {
-		kmem_cache_free(f2fs_inode_cachep, fi);
-		return NULL;
-	}
-
 	/* Initialize f2fs-specific inode info */
 	fi->vfs_inode.i_version = 1;
+	atomic_set(&fi->dirty_pages, 0);
 	fi->i_current_depth = 1;
 	fi->i_advise = 0;
 	init_rwsem(&fi->i_sem);
@@ -703,7 +699,6 @@ static void f2fs_i_callback(struct rcu_head *head)
 
 static void f2fs_destroy_inode(struct inode *inode)
 {
-	percpu_counter_destroy(&F2FS_I(inode)->dirty_pages);
 	call_rcu(&inode->i_rcu, f2fs_i_callback);
 }
 
-- 
cgit v1.1


From 2040fce83fe17763b07c97c1f691da2bb85e4135 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 5 Dec 2016 13:56:04 -0800
Subject: f2fs: detect wrong layout

Previous mkfs.f2fs allows small partition inappropriately, so f2fs should detect
that as well.

Refer this in f2fs-tools.

mkfs.f2fs: detect small partition by overprovision ratio and # of segments

Reported-and-Tested-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 7591d2d..1a52647 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1452,6 +1452,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
 	unsigned int total, fsmeta;
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+	unsigned int ovp_segments, reserved_segments;
 
 	total = le32_to_cpu(raw_super->segment_count);
 	fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -1463,6 +1464,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
 	if (unlikely(fsmeta >= total))
 		return 1;
 
+	ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
+	reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
+
+	if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
+			ovp_segments == 0 || reserved_segments == 0)) {
+		f2fs_msg(sbi->sb, KERN_ERR,
+			"Wrong layout: check mkfs.f2fs version");
+		return 1;
+	}
+
 	if (unlikely(f2fs_cp_error(sbi))) {
 		f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
 		return 1;
-- 
cgit v1.1


From 5eba8c5d1fb3af28b2073ba5228d4998196c1bcc Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 7 Dec 2016 16:23:32 -0800
Subject: f2fs: fix to access nullified flush_cmd_control pointer

f2fs_sync_file()             remount_ro
 - f2fs_readonly
                               - destroy_flush_cmd_control
 - f2fs_issue_flush
   - no fcc pointer!

So, this patch doesn't free fcc in this case, but just stop its kernel thread
which sends flush commands.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs/f2fs/super.c')

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1a52647..b62c10d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1102,8 +1102,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	 * or if flush_merge is not passed in mount option.
 	 */
 	if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
-		destroy_flush_cmd_control(sbi);
-	} else if (!SM_I(sbi)->cmd_control_info) {
+		clear_opt(sbi, FLUSH_MERGE);
+		destroy_flush_cmd_control(sbi, false);
+	} else {
 		err = create_flush_cmd_control(sbi);
 		if (err)
 			goto restore_gc;
-- 
cgit v1.1