From 97dc3fd2cbbf6dac239333083a8a005bf50c96e8 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Mon, 16 Feb 2015 16:19:22 +0800
Subject: f2fs: use ->writepage in sync_meta_pages

This patch uses ->writepage of meta mapping in sync_meta_pages instead of
f2fs_write_meta_page, by this way, in its caller we can ignore any changes
(e.g. changing name) of this registered function.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/f2fs/checkpoint.c')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 7f794b72b3b7..6faffce01869 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -276,7 +276,7 @@ continue_unlock:
 			if (!clear_page_dirty_for_io(page))
 				goto continue_unlock;
 
-			if (f2fs_write_meta_page(page, &wbc)) {
+			if (mapping->a_ops->writepage(page, &wbc)) {
 				unlock_page(page);
 				break;
 			}
-- 
cgit v1.2.3


From 551414861fbd494d58d50f4750d1d1b7f42b6df1 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Thu, 26 Feb 2015 07:57:20 +0800
Subject: f2fs: introduce macro __cp_payload

This patch introduce macro __cp_payload.

Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Reviewed-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 7 +++----
 fs/f2fs/f2fs.h       | 7 ++++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'fs/f2fs/checkpoint.c')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 6faffce01869..c7cafd8d522c 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -471,8 +471,7 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
 
 	set_sbi_flag(sbi, SBI_POR_DOING);
 
-	start_blk = __start_cp_addr(sbi) + 1 +
-		le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+	start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
 	orphan_blkaddr = __start_sum_addr(sbi) - 1;
 
 	ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
@@ -615,7 +614,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
 	unsigned long blk_size = sbi->blocksize;
 	unsigned long long cp1_version = 0, cp2_version = 0;
 	unsigned long long cp_start_blk_no;
-	unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+	unsigned int cp_blks = 1 + __cp_payload(sbi);
 	block_t cp_blk_no;
 	int i;
 
@@ -884,7 +883,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	__u32 crc32 = 0;
 	void *kaddr;
 	int i;
-	int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+	int cp_payload_blks = __cp_payload(sbi);
 
 	/*
 	 * This avoids to conduct wrong roll-forward operations and uses
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 51d97f7b77f0..7ced71b69f33 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -982,12 +982,17 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
 	return 0;
 }
 
+static inline block_t __cp_payload(struct f2fs_sb_info *sbi)
+{
+	return le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+}
+
 static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 	int offset;
 
-	if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) {
+	if (__cp_payload(sbi) > 0) {
 		if (flag == NAT_BITMAP)
 			return &ckpt->sit_nat_version_bitmap;
 		else
-- 
cgit v1.2.3


From 3c64298579a1343cbdf3d2f17adf774a58546aae Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Thu, 26 Feb 2015 07:57:21 +0800
Subject: f2fs: fix the number of orphan inode blocks

cp_pack_start_sum is calculated in do_checkpoint and is equal to
cpu_to_le32(1 + cp_payload_blks + orphan_blocks). The number of
orphan inode blocks is take advantage of by recover_orphan_inodes
to readahead meta pages and recovery inodes. However, current codes
forget to reduce the number of cp payload blocks when calculate
the number of orphan inode blocks. This patch fix it.

Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Reviewed-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs/f2fs/checkpoint.c')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index c7cafd8d522c..4d5e697d82f6 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -464,7 +464,7 @@ static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
 
 void recover_orphan_inodes(struct f2fs_sb_info *sbi)
 {
-	block_t start_blk, orphan_blkaddr, i, j;
+	block_t start_blk, orphan_blocks, i, j;
 
 	if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
 		return;
@@ -472,11 +472,11 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
 	set_sbi_flag(sbi, SBI_POR_DOING);
 
 	start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
-	orphan_blkaddr = __start_sum_addr(sbi) - 1;
+	orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
 
-	ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
+	ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP);
 
-	for (i = 0; i < orphan_blkaddr; i++) {
+	for (i = 0; i < orphan_blocks; i++) {
 		struct page *page = get_meta_page(sbi, start_blk + i);
 		struct f2fs_orphan_block *orphan_blk;
 
-- 
cgit v1.2.3


From 2bda542d59f970b61095bd8205c6c76062f286e3 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Fri, 27 Feb 2015 15:56:16 +0800
Subject: f2fs: fix block_ops trace point

block operations is used to flush all dirty node and dentry blocks in
the page cache and suspend ordinary writing activities, however, there
are some facts such like cp error or mount read-only etc which lead to
block operations can't be invoked. Current trace point print block_ops
start premature even if block_ops doesn't have opportunity to execute.
This patch fix it by move block_ops trace point just before block_ops.

Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs/f2fs/checkpoint.c')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 4d5e697d82f6..e9f30abbb6fe 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1047,8 +1047,6 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 	unsigned long long ckpt_ver;
 
-	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
-
 	mutex_lock(&sbi->cp_mutex);
 
 	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
@@ -1058,6 +1056,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		goto out;
 	if (f2fs_readonly(sbi->sb))
 		goto out;
+
+	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
+
 	if (block_operations(sbi))
 		goto out;
 
-- 
cgit v1.2.3


From 14b42817763903070fa8cbf087369461400cc021 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Fri, 27 Feb 2015 17:38:13 +0800
Subject: f2fs: fix max orphan inodes calculation

cp_payload is introduced for sit bitmap to support large volume, and it is
just after the block of f2fs_checkpoint + nat bitmap, so the first segment
should include F2FS_CP_PACKS + NR_CURSEG_TYPE + cp_payload + orphan blocks.
However, current max orphan inodes calculation don't consider cp_payload,
this patch fix it by reducing the number of cp_payload from total blocks of
the first segment when calculate max orphan inodes.

Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Reviewed-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'fs/f2fs/checkpoint.c')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index e9f30abbb6fe..81d6d2f0e8ed 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1104,13 +1104,15 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
 	}
 
 	/*
-	 * considering 512 blocks in a segment 8 blocks are needed for cp
-	 * and log segment summaries. Remaining blocks are used to keep
-	 * orphan entries with the limitation one reserved segment
-	 * for cp pack we can have max 1020*504 orphan entries
+	 * considering 512 blocks in a segment 8+cp_payload blocks are
+	 * needed for cp and log segment summaries. Remaining blocks are
+	 * used to keep orphan entries with the limitation one reserved
+	 * segment for cp pack we can have max 1020*(504-cp_payload)
+	 * orphan entries
 	 */
 	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
-			NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
+			NR_CURSEG_TYPE - __cp_payload(sbi)) *
+				F2FS_ORPHANS_PER_BLOCK;
 }
 
 int __init create_checkpoint_caches(void)
-- 
cgit v1.2.3


From 7ecebe5e07958a0b7e54a560dbc24144287c6b41 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 27 Feb 2015 13:13:14 +0100
Subject: f2fs: add cond_resched() to sync_dirty_dir_inodes()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In a preempt-off enviroment a alot of FS activity (write/delete) I run
into a CPU stall:

| NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [kworker/u2:2:59]
| Modules linked in:
| CPU: 0 PID: 59 Comm: kworker/u2:2 Tainted: G        W      3.19.0-00010-g10c11c51ffed #153
| Workqueue: writeback bdi_writeback_workfn (flush-179:0)
| task: df230000 ti: df23e000 task.ti: df23e000
| PC is at __submit_merged_bio+0x6c/0x110
| LR is at f2fs_submit_merged_bio+0x74/0x80
…
| [<c00085c4>] (gic_handle_irq) from [<c0012e84>] (__irq_svc+0x44/0x5c)
| Exception stack(0xdf23fb48 to 0xdf23fb90)
| fb40:                   deef3484 ffff0001 ffff0001 00000027 deef3484 00000000
| fb60: deef3440 00000000 de426000 deef34ec deefc440 df23fbb4 df23fbb8 df23fb90
| fb80: c02191f0 c0218fa0 60000013 ffffffff
| [<c0012e84>] (__irq_svc) from [<c0218fa0>] (__submit_merged_bio+0x6c/0x110)
| [<c0218fa0>] (__submit_merged_bio) from [<c02191f0>] (f2fs_submit_merged_bio+0x74/0x80)
| [<c02191f0>] (f2fs_submit_merged_bio) from [<c021624c>] (sync_dirty_dir_inodes+0x70/0x78)
| [<c021624c>] (sync_dirty_dir_inodes) from [<c0216358>] (write_checkpoint+0x104/0xc10)
| [<c0216358>] (write_checkpoint) from [<c021231c>] (f2fs_sync_fs+0x80/0xbc)
| [<c021231c>] (f2fs_sync_fs) from [<c0221eb8>] (f2fs_balance_fs_bg+0x4c/0x68)
| [<c0221eb8>] (f2fs_balance_fs_bg) from [<c021e9b8>] (f2fs_write_node_pages+0x40/0x110)
| [<c021e9b8>] (f2fs_write_node_pages) from [<c00de620>] (do_writepages+0x34/0x48)
| [<c00de620>] (do_writepages) from [<c0145714>] (__writeback_single_inode+0x50/0x228)
| [<c0145714>] (__writeback_single_inode) from [<c0146184>] (writeback_sb_inodes+0x1a8/0x378)
| [<c0146184>] (writeback_sb_inodes) from [<c01463e4>] (__writeback_inodes_wb+0x90/0xc8)
| [<c01463e4>] (__writeback_inodes_wb) from [<c01465f8>] (wb_writeback+0x1dc/0x28c)
| [<c01465f8>] (wb_writeback) from [<c0146dd8>] (bdi_writeback_workfn+0x2ac/0x460)
| [<c0146dd8>] (bdi_writeback_workfn) from [<c003c3fc>] (process_one_work+0x11c/0x3a4)
| [<c003c3fc>] (process_one_work) from [<c003c844>] (worker_thread+0x17c/0x490)
| [<c003c844>] (worker_thread) from [<c0041398>] (kthread+0xec/0x100)
| [<c0041398>] (kthread) from [<c000ed10>] (ret_from_fork+0x14/0x24)

As it turns out, the code loops in sync_dirty_dir_inodes() and waits for
others to make progress but since it never leaves the CPU there is no
progress made. At the time of this stall, there is also a rm process
blocked:
| rm              R running      0  1989   1774 0x00000000
| [<c047c55c>] (__schedule) from [<c00486dc>] (__cond_resched+0x30/0x4c)
| [<c00486dc>] (__cond_resched) from [<c047c8c8>] (_cond_resched+0x4c/0x54)
| [<c047c8c8>] (_cond_resched) from [<c00e1aec>] (truncate_inode_pages_range+0x1f0/0x5e8)
| [<c00e1aec>] (truncate_inode_pages_range) from [<c00e1fd8>] (truncate_inode_pages+0x28/0x30)
| [<c00e1fd8>] (truncate_inode_pages) from [<c00e2148>] (truncate_inode_pages_final+0x60/0x64)
| [<c00e2148>] (truncate_inode_pages_final) from [<c020c92c>] (f2fs_evict_inode+0x4c/0x268)
| [<c020c92c>] (f2fs_evict_inode) from [<c0137214>] (evict+0x94/0x140)
| [<c0137214>] (evict) from [<c01377e8>] (iput+0xc8/0x134)
| [<c01377e8>] (iput) from [<c01333e4>] (d_delete+0x154/0x180)
| [<c01333e4>] (d_delete) from [<c0129870>] (vfs_rmdir+0x114/0x12c)
| [<c0129870>] (vfs_rmdir) from [<c012d644>] (do_rmdir+0x158/0x168)
| [<c012d644>] (do_rmdir) from [<c012dd90>] (SyS_unlinkat+0x30/0x3c)
| [<c012dd90>] (SyS_unlinkat) from [<c000ec40>] (ret_fast_syscall+0x0/0x4c)

As explained by Jaegeuk Kim:
|This inode is the directory (c.f., do_rmdir) causing a infinite loop on
|sync_dirty_dir_inodes.
|The sync_dirty_dir_inodes tries to flush dirty dentry pages, but if the
|inode is under eviction, it submits bios and do it again until eviction
|is finished.

This patch adds a cond_resched() (as suggested by Jaegeuk) after a BIO
is submitted so other thread can make progress.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
[Jaegeuk Kim: change fs/f2fs to f2fs in subject as naming convention]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/f2fs/checkpoint.c')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 81d6d2f0e8ed..53bc32804841 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -795,6 +795,7 @@ retry:
 		 * wribacking dentry pages in the freeing inode.
 		 */
 		f2fs_submit_merged_bio(sbi, DATA, WRITE);
+		cond_resched();
 	}
 	goto retry;
 }
-- 
cgit v1.2.3


From e0150392ddfaaf5ccf338893f6db177a2c64a7ee Mon Sep 17 00:00:00 2001
From: Changman Lee <cm224.lee@samsung.com>
Date: Mon, 9 Mar 2015 08:07:04 +0900
Subject: f2fs: cleanup statement about max orphan inodes calc

Through each macro, we can read the meaning easily.

Signed-off-by: Changman Lee <cm224.lee@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'fs/f2fs/checkpoint.c')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 53bc32804841..384bfc4c36c3 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1104,13 +1104,6 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
 		im->ino_num = 0;
 	}
 
-	/*
-	 * considering 512 blocks in a segment 8+cp_payload blocks are
-	 * needed for cp and log segment summaries. Remaining blocks are
-	 * used to keep orphan entries with the limitation one reserved
-	 * segment for cp pack we can have max 1020*(504-cp_payload)
-	 * orphan entries
-	 */
 	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
 			NR_CURSEG_TYPE - __cp_payload(sbi)) *
 				F2FS_ORPHANS_PER_BLOCK;
-- 
cgit v1.2.3


From 10027551ccf5459cc771c31ac8bc8e5cc8db45f8 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 9 Apr 2015 17:03:53 -0700
Subject: f2fs: pass checkpoint reason on roll-forward recovery

This patch adds CP_RECOVERY to remain recovery information for checkpoint.
And, it makes sure writing checkpoint in this case.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c        | 6 +++++-
 fs/f2fs/f2fs.h              | 1 +
 fs/f2fs/recovery.c          | 2 +-
 include/trace/events/f2fs.h | 1 +
 4 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'fs/f2fs/checkpoint.c')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 384bfc4c36c3..a5e17a2a0781 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1051,7 +1051,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	mutex_lock(&sbi->cp_mutex);
 
 	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
-			cpc->reason != CP_DISCARD && cpc->reason != CP_UMOUNT)
+		(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC))
 		goto out;
 	if (unlikely(f2fs_cp_error(sbi)))
 		goto out;
@@ -1086,6 +1086,10 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	unblock_operations(sbi);
 	stat_inc_cp_count(sbi->stat_info);
+
+	if (cpc->reason == CP_RECOVERY)
+		f2fs_msg(sbi->sb, KERN_NOTICE,
+			"checkpoint: version = %llx", ckpt_ver);
 out:
 	mutex_unlock(&sbi->cp_mutex);
 	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 053361ace0ec..c06a25e5cec3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -103,6 +103,7 @@ enum {
 	CP_UMOUNT,
 	CP_FASTBOOT,
 	CP_SYNC,
+	CP_RECOVERY,
 	CP_DISCARD,
 };
 
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 4b742c96c223..8d8ea99f2156 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -564,7 +564,7 @@ out:
 		mutex_unlock(&sbi->cp_mutex);
 	} else if (need_writecp) {
 		struct cp_control cpc = {
-			.reason = CP_SYNC,
+			.reason = CP_RECOVERY,
 		};
 		mutex_unlock(&sbi->cp_mutex);
 		write_checkpoint(sbi, &cpc);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 75724bd28356..8804f22a08d1 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -78,6 +78,7 @@
 		{ CP_UMOUNT,	"Umount" },				\
 		{ CP_FASTBOOT,	"Fastboot" },				\
 		{ CP_SYNC,	"Sync" },				\
+		{ CP_RECOVERY,	"Recovery" },				\
 		{ CP_DISCARD,	"Discard" })
 
 struct victim_sel_policy;
-- 
cgit v1.2.3