From 577e349514452fa3fcd99fd06e587b02d3d1cf28 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 24 Jan 2013 19:56:11 +0900 Subject: f2fs: prevent checkpoint once any IO failure is detected This patch enhances the checkpoint routine to cope with IO errors. Basically f2fs detects IO errors from end_io_write, and the errors are able to be occurred during one of data, node, and meta page writes. In the previous code, when an IO error is occurred during writes, f2fs sets a flag, CP_ERROR_FLAG, in the raw ckeckpoint buffer which will be written to disk. Afterwards, write_checkpoint() will check the flag and remount f2fs as a read-only (ro) mode. However, even once f2fs is remounted as a ro mode, dirty checkpoint pages are freely able to be written to disk by flusher or kswapd in background. In such a case, after cold reboot, f2fs would restore the checkpoint data having CP_ERROR_FLAG, resulting in disabling write_checkpoint and remounting f2fs as a ro mode again. Therefore, let's prevent any checkpoint page (meta) writes once an IO error is occurred, and remount f2fs as a ro mode right away at that moment. Reported-by: Oliver Winker Signed-off-by: Jaegeuk Kim Reviewed-by: Namjae Jeon --- fs/f2fs/segment.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4b0099066582..7aa270f3538a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -600,6 +600,7 @@ static void f2fs_end_io_write(struct bio *bio, int err) if (page->mapping) set_bit(AS_EIO, &page->mapping->flags); set_ckpt_flags(p->sbi->ckpt, CP_ERROR_FLAG); + p->sbi->sb->s_flags |= MS_RDONLY; } end_page_writeback(page); dec_page_count(p->sbi, F2FS_WRITEBACK); @@ -815,15 +816,10 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); } -int write_meta_page(struct f2fs_sb_info *sbi, struct page *page, - struct writeback_control *wbc) +void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) { - if (wbc->for_reclaim) - return AOP_WRITEPAGE_ACTIVATE; - set_page_writeback(page); submit_write_page(sbi, page, page->index, META); - return 0; } void write_node_page(struct f2fs_sb_info *sbi, struct page *page, -- cgit v1.2.3 From 437275272f9e635673f065300e5d95226a25cb06 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 4 Feb 2013 15:11:17 +0900 Subject: f2fs: clarify and enhance the f2fs_gc flow This patch makes clearer the ambiguous f2fs_gc flow as follows. 1. Remove intermediate checkpoint condition during f2fs_gc (i.e., should_do_checkpoint() and GC_BLOCKED) 2. Remove unnecessary return values of f2fs_gc because of #1. (i.e., GC_NODE, GC_OK, etc) 3. Simplify write_checkpoint() because of #2. 4. Clarify the main f2fs_gc flow. o monitor how many freed sections during one iteration of do_garbage_collect(). o do GC more without checkpoints if we can't get enough free sections. o do checkpoint once we've got enough free sections through forground GCs. 5. Adopt thread-logging (Slack-Space-Recycle) scheme more aggressively on data log types. See. get_ssr_segement() Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 10 ++--- fs/f2fs/f2fs.h | 3 +- fs/f2fs/gc.c | 107 ++++++++++++++++++++------------------------------- fs/f2fs/gc.h | 16 -------- fs/f2fs/node.c | 2 +- fs/f2fs/recovery.c | 2 +- fs/f2fs/segment.c | 21 +++++++++- fs/f2fs/segment.h | 12 ++---- fs/f2fs/super.c | 4 +- 9 files changed, 72 insertions(+), 105 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2887c196b0a2..2b6fc131e2ce 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -539,7 +539,7 @@ retry: /* * Freeze all the FS-operations for checkpoint. */ -void block_operations(struct f2fs_sb_info *sbi) +static void block_operations(struct f2fs_sb_info *sbi) { int t; struct writeback_control wbc = { @@ -722,15 +722,13 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* * We guarantee that this checkpoint procedure should not fail. */ -void write_checkpoint(struct f2fs_sb_info *sbi, bool blocked, bool is_umount) +void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long long ckpt_ver; - if (!blocked) { - mutex_lock(&sbi->cp_mutex); - block_operations(sbi); - } + mutex_lock(&sbi->cp_mutex); + block_operations(sbi); f2fs_submit_bio(sbi, DATA, true); f2fs_submit_bio(sbi, NODE, true); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 87840bccaf21..e7e7a29767d6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -969,8 +969,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *); void set_dirty_dir_page(struct inode *, struct page *); void remove_dirty_dir_inode(struct inode *); void sync_dirty_dir_inodes(struct f2fs_sb_info *); -void block_operations(struct f2fs_sb_info *); -void write_checkpoint(struct f2fs_sb_info *, bool, bool); +void write_checkpoint(struct f2fs_sb_info *, bool); void init_orphan_info(struct f2fs_sb_info *); int __init create_checkpoint_caches(void); void destroy_checkpoint_caches(void); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 16fdec355201..52d3a391b922 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -78,7 +78,8 @@ static int gc_thread_func(void *data) sbi->bg_gc++; - if (f2fs_gc(sbi) == GC_NONE) + /* if return value is not zero, no victim was selected */ + if (f2fs_gc(sbi)) wait_ms = GC_THREAD_NOGC_SLEEP_TIME; else if (wait_ms == GC_THREAD_NOGC_SLEEP_TIME) wait_ms = GC_THREAD_MAX_SLEEP_TIME; @@ -360,7 +361,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi, sentry = get_seg_entry(sbi, segno); ret = f2fs_test_bit(offset, sentry->cur_valid_map); mutex_unlock(&sit_i->sentry_lock); - return ret ? GC_OK : GC_NEXT; + return ret; } /* @@ -368,7 +369,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi, * On validity, copy that node with cold status, otherwise (invalid node) * ignore that. */ -static int gc_node_segment(struct f2fs_sb_info *sbi, +static void gc_node_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, unsigned int segno, int gc_type) { bool initial = true; @@ -380,21 +381,12 @@ next_step: for (off = 0; off < sbi->blocks_per_seg; off++, entry++) { nid_t nid = le32_to_cpu(entry->nid); struct page *node_page; - int err; - /* - * It makes sure that free segments are able to write - * all the dirty node pages before CP after this CP. - * So let's check the space of dirty node pages. - */ - if (should_do_checkpoint(sbi)) { - mutex_lock(&sbi->cp_mutex); - block_operations(sbi); - return GC_BLOCKED; - } + /* stop BG_GC if there is not enough free sections. */ + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) + return; - err = check_valid_map(sbi, segno, off); - if (err == GC_NEXT) + if (check_valid_map(sbi, segno, off) == 0) continue; if (initial) { @@ -424,7 +416,6 @@ next_step: }; sync_node_pages(sbi, 0, &wbc); } - return GC_DONE; } /* @@ -467,13 +458,13 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, node_page = get_node_page(sbi, nid); if (IS_ERR(node_page)) - return GC_NEXT; + return 0; get_node_info(sbi, nid, dni); if (sum->version != dni->version) { f2fs_put_page(node_page, 1); - return GC_NEXT; + return 0; } *nofs = ofs_of_node(node_page); @@ -481,8 +472,8 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, f2fs_put_page(node_page, 1); if (source_blkaddr != blkaddr) - return GC_NEXT; - return GC_OK; + return 0; + return 1; } static void move_data_page(struct inode *inode, struct page *page, int gc_type) @@ -523,13 +514,13 @@ out: * If the parent node is not valid or the data block address is different, * the victim data block is ignored. */ -static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, +static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, struct list_head *ilist, unsigned int segno, int gc_type) { struct super_block *sb = sbi->sb; struct f2fs_summary *entry; block_t start_addr; - int err, off; + int off; int phase = 0; start_addr = START_BLOCK(sbi, segno); @@ -543,20 +534,11 @@ next_step: unsigned int ofs_in_node, nofs; block_t start_bidx; - /* - * It makes sure that free segments are able to write - * all the dirty node pages before CP after this CP. - * So let's check the space of dirty node pages. - */ - if (should_do_checkpoint(sbi)) { - mutex_lock(&sbi->cp_mutex); - block_operations(sbi); - err = GC_BLOCKED; - goto stop; - } + /* stop BG_GC if there is not enough free sections. */ + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) + return; - err = check_valid_map(sbi, segno, off); - if (err == GC_NEXT) + if (check_valid_map(sbi, segno, off) == 0) continue; if (phase == 0) { @@ -565,8 +547,7 @@ next_step: } /* Get an inode by ino with checking validity */ - err = check_dnode(sbi, entry, &dni, start_addr + off, &nofs); - if (err == GC_NEXT) + if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0) continue; if (phase == 1) { @@ -606,11 +587,9 @@ next_iput: } if (++phase < 4) goto next_step; - err = GC_DONE; -stop: + if (gc_type == FG_GC) f2fs_submit_bio(sbi, DATA, true); - return err; } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, @@ -624,17 +603,16 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, return ret; } -static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, +static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, struct list_head *ilist, int gc_type) { struct page *sum_page; struct f2fs_summary_block *sum; - int ret = GC_DONE; /* read segment summary of victim */ sum_page = get_sum_page(sbi, segno); if (IS_ERR(sum_page)) - return GC_ERROR; + return; /* * CP needs to lock sum_page. In this time, we don't need @@ -646,17 +624,16 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, switch (GET_SUM_TYPE((&sum->footer))) { case SUM_TYPE_NODE: - ret = gc_node_segment(sbi, sum->entries, segno, gc_type); + gc_node_segment(sbi, sum->entries, segno, gc_type); break; case SUM_TYPE_DATA: - ret = gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); + gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); break; } stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer))); stat_inc_call_count(sbi->stat_info); f2fs_put_page(sum_page, 0); - return ret; } int f2fs_gc(struct f2fs_sb_info *sbi) @@ -664,40 +641,38 @@ int f2fs_gc(struct f2fs_sb_info *sbi) struct list_head ilist; unsigned int segno, i; int gc_type = BG_GC; - int gc_status = GC_NONE; + int nfree = 0; + int ret = -1; INIT_LIST_HEAD(&ilist); gc_more: if (!(sbi->sb->s_flags & MS_ACTIVE)) goto stop; - if (gc_type == BG_GC && has_not_enough_free_secs(sbi)) + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) gc_type = FG_GC; if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) goto stop; + ret = 0; - for (i = 0; i < sbi->segs_per_sec; i++) { - /* - * do_garbage_collect will give us three gc_status: - * GC_ERROR, GC_DONE, and GC_BLOCKED. - * If GC is finished uncleanly, we have to return - * the victim to dirty segment list. - */ - gc_status = do_garbage_collect(sbi, segno + i, &ilist, gc_type); - if (gc_status != GC_DONE) - break; - } - if (has_not_enough_free_secs(sbi)) { - write_checkpoint(sbi, (gc_status == GC_BLOCKED), false); - if (has_not_enough_free_secs(sbi)) - goto gc_more; - } + for (i = 0; i < sbi->segs_per_sec; i++) + do_garbage_collect(sbi, segno + i, &ilist, gc_type); + + if (gc_type == FG_GC && + get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) + nfree++; + + if (has_not_enough_free_secs(sbi, nfree)) + goto gc_more; + + if (gc_type == FG_GC) + write_checkpoint(sbi, false); stop: mutex_unlock(&sbi->gc_mutex); put_gc_inode(&ilist); - return gc_status; + return ret; } void build_gc_manager(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index c407a75a7daa..30b2db003acd 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -22,15 +22,6 @@ /* Search max. number of dirty segments to select a victim segment */ #define MAX_VICTIM_SEARCH 20 -enum { - GC_NONE = 0, - GC_ERROR, - GC_OK, - GC_NEXT, - GC_BLOCKED, - GC_DONE, -}; - struct f2fs_gc_kthread { struct task_struct *f2fs_gc_task; wait_queue_head_t gc_wait_queue_head; @@ -103,10 +94,3 @@ static inline int is_idle(struct f2fs_sb_info *sbi) struct request_list *rl = &q->root_rl; return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]); } - -static inline bool should_do_checkpoint(struct f2fs_sb_info *sbi) -{ - int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); - int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - return free_sections(sbi) <= (node_secs + 2 * dent_secs + 2); -} diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 33fa6d506d94..43ce16422b75 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1135,7 +1135,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, /* First check balancing cached NAT entries */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { - write_checkpoint(sbi, false, false); + write_checkpoint(sbi, false); return 0; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e2a3e1a8eae9..01e1a03b54c8 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -373,5 +373,5 @@ void recover_fsync_data(struct f2fs_sb_info *sbi) out: destroy_fsync_dnodes(sbi, &inode_list); kmem_cache_destroy(fsync_entry_slab); - write_checkpoint(sbi, false, false); + write_checkpoint(sbi, false); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7aa270f3538a..777f17e496e6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -29,7 +29,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi) * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. */ - if (has_not_enough_free_secs(sbi)) { + if (has_not_enough_free_secs(sbi, 0)) { mutex_lock(&sbi->gc_mutex); f2fs_gc(sbi); } @@ -308,7 +308,7 @@ static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, * If there is not enough reserved sections, * we should not reuse prefree segments. */ - if (has_not_enough_free_secs(sbi)) + if (has_not_enough_free_secs(sbi, 0)) return NULL_SEGNO; /* @@ -536,6 +536,23 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) } } +static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; + + if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0)) + return v_ops->get_victim(sbi, + &(curseg)->next_segno, BG_GC, type, SSR); + + /* For data segments, let's do SSR more intensively */ + for (; type >= CURSEG_HOT_DATA; type--) + if (v_ops->get_victim(sbi, &(curseg)->next_segno, + BG_GC, type, SSR)) + return 1; + return 0; +} + /* * flush out current segment and replace it with new segment * This function should be returned with success, otherwise BUG diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 458bf5c726f7..552dadbb2327 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -450,21 +450,15 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi) return (free_sections(sbi) < overprovision_sections(sbi)); } -static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type) -{ - struct curseg_info *curseg = CURSEG_I(sbi, type); - return DIRTY_I(sbi)->v_ops->get_victim(sbi, - &(curseg)->next_segno, BG_GC, type, SSR); -} - -static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi) +static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) { int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + if (sbi->por_doing) return false; - return (free_sections(sbi) <= (node_secs + 2 * dent_secs + + return ((free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + reserved_sections(sbi))); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ddb665f54d17..8c117649a035 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -112,7 +112,7 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_stats(sbi); stop_gc_thread(sbi); - write_checkpoint(sbi, false, true); + write_checkpoint(sbi, true); iput(sbi->node_inode); iput(sbi->meta_inode); @@ -136,7 +136,7 @@ int f2fs_sync_fs(struct super_block *sb, int sync) return 0; if (sync) - write_checkpoint(sbi, false, false); + write_checkpoint(sbi, false); else f2fs_balance_fs(sbi); -- cgit v1.2.3