From 4a6de50d5408cdc699588119e2338e580adc2b73 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Mar 2016 11:25:31 -0700 Subject: f2fs: use PGP_LOCK to check its truncation Previously, after trylock_page is succeeded, it doesn't check its mapping. In order to fix that, we can just give PGP_LOCK to pagecache_get_page. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1a33de9d84b1..ade221c9756b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1202,13 +1202,10 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) if (!inode) return; - page = pagecache_get_page(inode->i_mapping, 0, FGP_NOWAIT, 0); + page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0); if (!page) goto iput_out; - if (!trylock_page(page)) - goto release_out; - if (!PageUptodate(page)) goto page_out; @@ -1223,9 +1220,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) else set_page_dirty(page); page_out: - unlock_page(page); -release_out: - f2fs_put_page(page, 0); + f2fs_put_page(page, 1); iput_out: iput(inode); } -- cgit v1.2.3 From ff37355886ac2082cee594aa949c08e2cfb33aa0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 29 Mar 2016 16:13:45 -0700 Subject: f2fs: add BUG_ON to avoid unnecessary flow This patch adds BUG_ON instead of retrying loop. In the case of node pages, we already got this inode page, but unlocked it. By the fact that we don't truncate any node pages in operations, the page's mapping should be unchangeable. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ade221c9756b..095fc2c96e16 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -832,7 +832,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) trace_f2fs_truncate_inode_blocks_enter(inode, from); level = get_node_path(inode, from, offset, noffset); -restart: + page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); @@ -896,10 +896,7 @@ skip_partial: if (offset[1] == 0 && ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { lock_page(page); - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { - f2fs_put_page(page, 1); - goto restart; - } + BUG_ON(page->mapping != NODE_MAPPING(sbi)); f2fs_wait_on_page_writeback(page, NODE, true); ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; set_page_dirty(page); -- cgit v1.2.3 From eca76e783cf5970db36edfda7e66487d897ea222 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Apr 2016 16:14:38 -0700 Subject: f2fs: avoid needless lock for node pages when fsyncing a file When fsync is called, sync_node_pages finds a proper direct node pages to flush. But, it locks unrelated direct node pages together unnecessarily. Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 095fc2c96e16..cccee5006cdd 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1272,10 +1272,14 @@ next_step: * we should not skip writing node pages. */ lock_node: - if (ino && ino_of_node(page) == ino) - lock_page(page); - else if (!trylock_page(page)) + if (ino) { + if (ino_of_node(page) == ino) + lock_page(page); + else + continue; + } else if (!trylock_page(page)) { continue; + } if (unlikely(page->mapping != NODE_MAPPING(sbi))) { continue_unlock: -- cgit v1.2.3 From 5268137564920843e581304d9bfb06fb9502cf24 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Apr 2016 16:24:44 -0700 Subject: f2fs: split sync_node_pages with fsync_node_pages This patch splits the existing sync_node_pages into (f)sync_node_pages. The fsync_node_pages is used for f2fs_sync_file only. Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/f2fs.h | 3 +- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 2 +- fs/f2fs/node.c | 108 +++++++++++++++++++++++++++++++++++++-------------- 5 files changed, 84 insertions(+), 33 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b92782f40643..bf040b51989d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -892,7 +892,7 @@ retry_flush_nodes: if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); - err = sync_node_pages(sbi, 0, &wbc); + err = sync_node_pages(sbi, &wbc); if (err) { f2fs_unlock_all(sbi); goto out; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3f1551395244..269abe5959e7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1784,7 +1784,8 @@ void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); struct page *get_node_page_ra(struct page *, int); void sync_inode_page(struct dnode_of_data *); -int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); +int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); +int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); bool alloc_nid(struct f2fs_sb_info *, nid_t *); void alloc_nid_done(struct f2fs_sb_info *, nid_t); void alloc_nid_failed(struct f2fs_sb_info *, nid_t); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7de90e60abd1..3d53ee058aae 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -256,7 +256,7 @@ go_write: goto out; } sync_nodes: - sync_node_pages(sbi, ino, &wbc); + fsync_node_pages(sbi, ino, &wbc); /* if cp_error was enabled, we should avoid infinite loop */ if (unlikely(f2fs_cp_error(sbi))) { diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index b0051a97824c..e82046523186 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -841,7 +841,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, .nr_to_write = LONG_MAX, .for_reclaim = 0, }; - sync_node_pages(sbi, 0, &wbc); + sync_node_pages(sbi, &wbc); } else { f2fs_submit_merged_bio(sbi, DATA, WRITE); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index cccee5006cdd..675b7304c02a 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1222,12 +1222,84 @@ iput_out: iput(inode); } -int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, +int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, struct writeback_control *wbc) { pgoff_t index, end; struct pagevec pvec; - int step = ino ? 2 : 0; + int nwritten = 0; + + pagevec_init(&pvec, 0); + index = 0; + end = ULONG_MAX; + + while (index <= end) { + int i, nr_pages; + nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + if (unlikely(f2fs_cp_error(sbi))) { + pagevec_release(&pvec); + return -EIO; + } + + if (!IS_DNODE(page) || !is_cold_node(page)) + continue; + if (ino_of_node(page) != ino) + continue; + + lock_page(page); + + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { +continue_unlock: + unlock_page(page); + continue; + } + if (ino_of_node(page) != ino) + goto continue_unlock; + + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; + } + + f2fs_wait_on_page_writeback(page, NODE, true); + BUG_ON(PageWriteback(page)); + if (!clear_page_dirty_for_io(page)) + goto continue_unlock; + + set_fsync_mark(page, 1); + if (IS_INODE(page)) + set_dentry_mark(page, + need_dentry_mark(sbi, ino)); + nwritten++; + + if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) + unlock_page(page); + + if (--wbc->nr_to_write == 0) + break; + } + pagevec_release(&pvec); + cond_resched(); + + if (wbc->nr_to_write == 0) + break; + } + return nwritten; +} + +int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc) +{ + pgoff_t index, end; + struct pagevec pvec; + int step = 0; int nwritten = 0; pagevec_init(&pvec, 0); @@ -1266,28 +1338,15 @@ next_step: if (step == 2 && (!IS_DNODE(page) || !is_cold_node(page))) continue; - - /* - * If an fsync mode, - * we should not skip writing node pages. - */ lock_node: - if (ino) { - if (ino_of_node(page) == ino) - lock_page(page); - else - continue; - } else if (!trylock_page(page)) { + if (!trylock_page(page)) continue; - } if (unlikely(page->mapping != NODE_MAPPING(sbi))) { continue_unlock: unlock_page(page); continue; } - if (ino && ino_of_node(page) != ino) - goto continue_unlock; if (!PageDirty(page)) { /* someone wrote it for us */ @@ -1295,7 +1354,7 @@ continue_unlock: } /* flush inline_data */ - if (!ino && is_inline_node(page)) { + if (is_inline_node(page)) { clear_inline_node(page); unlock_page(page); flush_inline_data(sbi, ino_of_node(page)); @@ -1308,17 +1367,8 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - /* called by fsync() */ - if (ino && IS_DNODE(page)) { - set_fsync_mark(page, 1); - if (IS_INODE(page)) - set_dentry_mark(page, - need_dentry_mark(sbi, ino)); - nwritten++; - } else { - set_fsync_mark(page, 0); - set_dentry_mark(page, 0); - } + set_fsync_mark(page, 0); + set_dentry_mark(page, 0); if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) unlock_page(page); @@ -1466,7 +1516,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, NODE, wbc); wbc->sync_mode = WB_SYNC_NONE; - sync_node_pages(sbi, 0, wbc); + sync_node_pages(sbi, wbc); wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return 0; -- cgit v1.2.3 From c267ec1526da2d3b12c80a89ebd8eb9b6a01d636 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 15 Apr 2016 09:25:04 -0700 Subject: f2fs: report unwritten status in fsync_node_pages The fsync_node_pages should return pass or failure so that user could know fsync is completed or not. Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 +++- fs/f2fs/node.c | 13 ++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3d53ee058aae..60fd64c59cce 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -256,7 +256,9 @@ go_write: goto out; } sync_nodes: - fsync_node_pages(sbi, ino, &wbc); + ret = fsync_node_pages(sbi, ino, &wbc); + if (ret) + goto out; /* if cp_error was enabled, we should avoid infinite loop */ if (unlikely(f2fs_cp_error(sbi))) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 675b7304c02a..8a1e21144ecb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1227,7 +1227,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, { pgoff_t index, end; struct pagevec pvec; - int nwritten = 0; + int ret = 0; pagevec_init(&pvec, 0); index = 0; @@ -1278,21 +1278,20 @@ continue_unlock: if (IS_INODE(page)) set_dentry_mark(page, need_dentry_mark(sbi, ino)); - nwritten++; - if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) + ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); + if (ret) { unlock_page(page); - - if (--wbc->nr_to_write == 0) break; + } } pagevec_release(&pvec); cond_resched(); - if (wbc->nr_to_write == 0) + if (ret) break; } - return nwritten; + return ret ? -EIO: 0; } int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc) -- cgit v1.2.3 From 608514deba38c8611ad330d6a3c8e2b9a1f68e4b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 15 Apr 2016 09:43:17 -0700 Subject: f2fs: set fsync mark only for the last dnode In order to give atomic writes, we should consider power failure during sync_node_pages in fsync. So, this patch marks fsync flag only in the last dnode block. Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 +- fs/f2fs/file.c | 14 +++++-- fs/f2fs/node.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++---- fs/f2fs/recovery.c | 9 ++--- 4 files changed, 113 insertions(+), 20 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 269abe5959e7..ca828b0e7d6d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -159,7 +159,6 @@ struct fsync_inode_entry { struct inode *inode; /* vfs inode pointer */ block_t blkaddr; /* block address locating the last fsync */ block_t last_dentry; /* block address locating the last dentry */ - block_t last_inode; /* block address locating the last inode */ }; #define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats)) @@ -1784,7 +1783,8 @@ void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); struct page *get_node_page_ra(struct page *, int); void sync_inode_page(struct dnode_of_data *); -int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); +int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *, + bool); int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); bool alloc_nid(struct f2fs_sb_info *, nid_t *); void alloc_nid_done(struct f2fs_sb_info *, nid_t); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 60fd64c59cce..dc47d5c7b882 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -182,7 +182,8 @@ static void try_to_fix_pino(struct inode *inode) } } -int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) +static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, + int datasync, bool atomic) { struct inode *inode = file->f_mapping->host; struct f2fs_inode_info *fi = F2FS_I(inode); @@ -256,7 +257,7 @@ go_write: goto out; } sync_nodes: - ret = fsync_node_pages(sbi, ino, &wbc); + ret = fsync_node_pages(sbi, ino, &wbc, atomic); if (ret) goto out; @@ -290,6 +291,11 @@ out: return ret; } +int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) +{ + return f2fs_do_sync_file(file, start, end, datasync, false); +} + static pgoff_t __get_first_dirty_index(struct address_space *mapping, pgoff_t pgofs, int whence) { @@ -1407,7 +1413,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) } } - ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); err_out: mnt_drop_write_file(filp); return ret; @@ -1465,7 +1471,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0); + ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } mnt_drop_write_file(filp); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 8a1e21144ecb..de070a524fd2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1222,13 +1222,81 @@ iput_out: iput(inode); } +static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) +{ + pgoff_t index, end; + struct pagevec pvec; + struct page *last_page = NULL; + + pagevec_init(&pvec, 0); + index = 0; + end = ULONG_MAX; + + while (index <= end) { + int i, nr_pages; + nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + if (unlikely(f2fs_cp_error(sbi))) { + f2fs_put_page(last_page, 0); + pagevec_release(&pvec); + return ERR_PTR(-EIO); + } + + if (!IS_DNODE(page) || !is_cold_node(page)) + continue; + if (ino_of_node(page) != ino) + continue; + + lock_page(page); + + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { +continue_unlock: + unlock_page(page); + continue; + } + if (ino_of_node(page) != ino) + goto continue_unlock; + + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; + } + + if (last_page) + f2fs_put_page(last_page, 0); + + get_page(page); + last_page = page; + unlock_page(page); + } + pagevec_release(&pvec); + cond_resched(); + } + return last_page; +} + int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, - struct writeback_control *wbc) + struct writeback_control *wbc, bool atomic) { pgoff_t index, end; struct pagevec pvec; int ret = 0; + struct page *last_page = NULL; + bool marked = false; + if (atomic) { + last_page = last_fsync_dnode(sbi, ino); + if (IS_ERR_OR_NULL(last_page)) + return PTR_ERR_OR_ZERO(last_page); + } +retry: pagevec_init(&pvec, 0); index = 0; end = ULONG_MAX; @@ -1245,6 +1313,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, struct page *page = pvec.pages[i]; if (unlikely(f2fs_cp_error(sbi))) { + f2fs_put_page(last_page, 0); pagevec_release(&pvec); return -EIO; } @@ -1264,33 +1333,54 @@ continue_unlock: if (ino_of_node(page) != ino) goto continue_unlock; - if (!PageDirty(page)) { + if (!PageDirty(page) && page != last_page) { /* someone wrote it for us */ goto continue_unlock; } f2fs_wait_on_page_writeback(page, NODE, true); BUG_ON(PageWriteback(page)); - if (!clear_page_dirty_for_io(page)) - goto continue_unlock; - set_fsync_mark(page, 1); - if (IS_INODE(page)) - set_dentry_mark(page, + if (!atomic || page == last_page) { + set_fsync_mark(page, 1); + if (IS_INODE(page)) + set_dentry_mark(page, need_dentry_mark(sbi, ino)); + /* may be written by other thread */ + if (!PageDirty(page)) + set_page_dirty(page); + } + + if (!clear_page_dirty_for_io(page)) + goto continue_unlock; ret = NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); if (ret) { unlock_page(page); + f2fs_put_page(last_page, 0); + break; + } + if (page == last_page) { + f2fs_put_page(page, 0); + marked = true; break; } } pagevec_release(&pvec); cond_resched(); - if (ret) + if (ret || marked) break; } + if (!ret && atomic && !marked) { + f2fs_msg(sbi->sb, KERN_DEBUG, + "Retry to write fsync mark: ino=%u, idx=%lx", + ino, last_page->index); + lock_page(last_page); + set_page_dirty(last_page); + unlock_page(last_page); + goto retry; + } return ret ? -EIO: 0; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2c87c12b6f1c..a646d3ba3b25 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -257,11 +257,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) } entry->blkaddr = blkaddr; - if (IS_INODE(page)) { - entry->last_inode = blkaddr; - if (is_dent_dnode(page)) - entry->last_dentry = blkaddr; - } + if (IS_INODE(page) && is_dent_dnode(page)) + entry->last_dentry = blkaddr; next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); @@ -521,7 +518,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head) * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (entry->last_inode == blkaddr) + if (IS_INODE(page)) recover_inode(entry->inode, page); if (entry->last_dentry == blkaddr) { err = recover_dentry(entry->inode, page); -- cgit v1.2.3 From da011cc0da8cf4a60ddf4d2ae8b42902a3d71e5f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 27 Apr 2016 21:40:15 +0800 Subject: f2fs: move node pages only in victim section during GC For foreground GC, we cache node blocks in victim section and set them dirty, then we call sync_node_pages to flush these node pages, but meanwhile, those node pages which does not locate in victim section will be flushed together, so more bandwidth and continuous free space would be occupied. So for this condition, it's better to leave those unrelated node page in cache for further write hit, and let CP or VM to flush them afterward. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/gc.c | 25 ++++--------------------- fs/f2fs/node.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 21 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ca828b0e7d6d..6a482411e6d4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1783,6 +1783,7 @@ void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); struct page *get_node_page_ra(struct page *, int); void sync_inode_page(struct dnode_of_data *); +void move_node_page(struct page *, int); int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *, bool); int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e82046523186..19e9fafc5c70 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -465,15 +465,7 @@ next_step: continue; } - /* set page dirty and write it */ - if (gc_type == FG_GC) { - f2fs_wait_on_page_writeback(node_page, NODE, true); - set_page_dirty(node_page); - } else { - if (!PageWriteback(node_page)) - set_page_dirty(node_page); - } - f2fs_put_page(node_page, 1); + move_node_page(node_page, gc_type); stat_inc_node_blk_count(sbi, 1, gc_type); } @@ -834,18 +826,9 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, f2fs_put_page(sum_page, 0); } - if (gc_type == FG_GC) { - if (type == SUM_TYPE_NODE) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = LONG_MAX, - .for_reclaim = 0, - }; - sync_node_pages(sbi, &wbc); - } else { - f2fs_submit_merged_bio(sbi, DATA, WRITE); - } - } + if (gc_type == FG_GC) + f2fs_submit_merged_bio(sbi, + (type == SUM_TYPE_NODE) ? NODE : DATA, WRITE); blk_finish_plug(&plug); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index de070a524fd2..f80cfb6beac6 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1222,6 +1222,37 @@ iput_out: iput(inode); } +void move_node_page(struct page *node_page, int gc_type) +{ + if (gc_type == FG_GC) { + struct f2fs_sb_info *sbi = F2FS_P_SB(node_page); + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 1, + .for_reclaim = 0, + }; + + set_page_dirty(node_page); + f2fs_wait_on_page_writeback(node_page, NODE, true); + + f2fs_bug_on(sbi, PageWriteback(node_page)); + if (!clear_page_dirty_for_io(node_page)) + goto out_page; + + if (NODE_MAPPING(sbi)->a_ops->writepage(node_page, &wbc)) + unlock_page(node_page); + goto release_page; + } else { + /* set page dirty and write it */ + if (!PageWriteback(node_page)) + set_page_dirty(node_page); + } +out_page: + unlock_page(node_page); +release_page: + f2fs_put_page(node_page, 0); +} + static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) { pgoff_t index, end; -- cgit v1.2.3 From 300e129c15f0ed2f94482900a4cb65b28eb09d94 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 29 Apr 2016 16:11:53 -0700 Subject: f2fs: use f2fs_grab_cache_page instead of grab_cache_page This patch converts grab_cache_page to f2fs_grab_cache_page. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 ++++--- fs/f2fs/inline.c | 4 ++-- fs/f2fs/node.c | 8 ++++---- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bf040b51989d..9596d61ca6a8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -34,7 +34,7 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) struct address_space *mapping = META_MAPPING(sbi); struct page *page = NULL; repeat: - page = grab_cache_page(mapping, index); + page = f2fs_grab_cache_page(mapping, index, false); if (!page) { cond_resched(); goto repeat; @@ -64,7 +64,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, if (unlikely(!is_meta)) fio.rw &= ~REQ_META; repeat: - page = grab_cache_page(mapping, index); + page = f2fs_grab_cache_page(mapping, index, false); if (!page) { cond_resched(); goto repeat; @@ -186,7 +186,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, BUG(); } - page = grab_cache_page(META_MAPPING(sbi), fio.new_blkaddr); + page = f2fs_grab_cache_page(META_MAPPING(sbi), + fio.new_blkaddr, false); if (!page) continue; if (PageUptodate(page)) { diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 33830b251426..8a51955b68aa 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -161,7 +161,7 @@ int f2fs_convert_inline_inode(struct inode *inode) if (!f2fs_has_inline_data(inode)) return 0; - page = grab_cache_page(inode->i_mapping, 0); + page = f2fs_grab_cache_page(inode->i_mapping, 0, false); if (!page) return -ENOMEM; @@ -358,7 +358,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, struct f2fs_dentry_block *dentry_blk; int err; - page = grab_cache_page(dir->i_mapping, 0); + page = f2fs_grab_cache_page(dir->i_mapping, 0, false); if (!page) { f2fs_put_page(ipage, 1); return -ENOMEM; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f80cfb6beac6..af010f5c4ee1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -995,7 +995,7 @@ struct page *new_node_page(struct dnode_of_data *dn, if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return ERR_PTR(-EPERM); - page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); + page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false); if (!page) return ERR_PTR(-ENOMEM); @@ -1087,7 +1087,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) if (apage) return; - apage = grab_cache_page(NODE_MAPPING(sbi), nid); + apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); if (!apage) return; @@ -1128,7 +1128,7 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, return ERR_PTR(-ENOENT); f2fs_bug_on(sbi, check_nid_range(sbi, nid)); repeat: - page = grab_cache_page(NODE_MAPPING(sbi), nid); + page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); if (!page) return ERR_PTR(-ENOMEM); @@ -2012,7 +2012,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) if (unlikely(old_ni.blk_addr != NULL_ADDR)) return -EINVAL; - ipage = grab_cache_page(NODE_MAPPING(sbi), ino); + ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); if (!ipage) return -ENOMEM; -- cgit v1.2.3 From cb78942b821380913e6810375c9ce72858e64c4f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 29 Apr 2016 16:29:22 -0700 Subject: f2fs: inject ENOSPC failures This patch injects ENOSPC failures. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 +++++++ fs/f2fs/dir.c | 4 ++++ fs/f2fs/f2fs.h | 10 ++++++++++ fs/f2fs/node.c | 4 ++++ fs/f2fs/super.c | 4 ++++ 5 files changed, 29 insertions(+) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9596d61ca6a8..79da86d6cf5c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -474,6 +474,13 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) int err = 0; spin_lock(&im->ino_lock); + +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(FAULT_ORPHAN)) { + spin_unlock(&im->ino_lock); + return -ENOSPC; + } +#endif if (unlikely(im->ino_num >= sbi->max_orphans)) err = -ENOSPC; else diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 6bd059591405..50f42be4ff1a 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -537,6 +537,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, } start: +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(FAULT_DIR_DEPTH)) + return -ENOSPC; +#endif if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) return -ENOSPC; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d550a95d30f1..8ba2f923d95a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -41,6 +41,10 @@ enum { FAULT_KMALLOC, FAULT_PAGE_ALLOC, + FAULT_ALLOC_NID, + FAULT_ORPHAN, + FAULT_BLOCK, + FAULT_DIR_DEPTH, FAULT_MAX, }; @@ -1087,6 +1091,12 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, block_t valid_block_count; spin_lock(&sbi->stat_lock); +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(FAULT_BLOCK)) { + spin_unlock(&sbi->stat_lock); + return false; + } +#endif valid_block_count = sbi->total_valid_block_count + (block_t)count; if (unlikely(valid_block_count > sbi->user_block_count)) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index af010f5c4ee1..78b98db48805 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1838,6 +1838,10 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i = NULL; retry: +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(FAULT_ALLOC_NID)) + return false; +#endif if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) return false; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 986d0da84e01..87654f48c55d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -46,6 +46,10 @@ atomic_t f2fs_ops; char *fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", [FAULT_PAGE_ALLOC] = "page alloc", + [FAULT_ALLOC_NID] = "alloc nid", + [FAULT_ORPHAN] = "orphan", + [FAULT_BLOCK] = "no more block", + [FAULT_DIR_DEPTH] = "too big dir depth", }; #endif -- cgit v1.2.3 From fb58ae22067e0595d974e3d856522c1ed6d2d7bf Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 4 May 2016 09:58:10 -0700 Subject: f2fs: remove an obsolete variable This patch removes an obsolete variable used in add_free_nid. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 78b98db48805..264abe08bafb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1690,7 +1690,6 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; struct nat_entry *ne; - bool allocated = false; if (!available_free_memory(sbi, FREE_NIDS)) return -1; @@ -1704,8 +1703,6 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) ne = __lookup_nat_cache(nm_i, nid); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) - allocated = true; - if (allocated) return 0; } -- cgit v1.2.3 From 79344efb93a26378a91193bed133cee42162cd81 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 6 May 2016 16:19:43 -0700 Subject: f2fs: read node blocks ahead when truncating blocks This patch enables reading node blocks in advance when truncating large data blocks. > time rm $MNT/testfile (500GB) after drop_cachees Before : 9.422 s After : 4.821 s Reported-by: Stephen Bates Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- fs/f2fs/node.c | 52 ++++++++++++++++++++++++++++------------------------ 2 files changed, 29 insertions(+), 25 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index fb8cc27d4cc6..314612997721 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -584,7 +584,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) } set_new_dnode(&dn, inode, ipage, NULL, 0); - err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); + err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); if (err) { if (err == -ENOENT) goto free_next; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 264abe08bafb..cda7c7ccd4c8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -407,6 +407,29 @@ cache: up_write(&nm_i->nat_tree_lock); } +/* + * readahead MAX_RA_NODE number of node pages. + */ +static void ra_node_pages(struct page *parent, int start, int n) +{ + struct f2fs_sb_info *sbi = F2FS_P_SB(parent); + struct blk_plug plug; + int i, end; + nid_t nid; + + blk_start_plug(&plug); + + /* Then, try readahead for siblings of the desired node */ + end = start + n; + end = min(end, NIDS_PER_BLOCK); + for (i = start; i < end; i++) { + nid = get_nid(parent, i, false); + ra_node_page(sbi, nid); + } + + blk_finish_plug(&plug); +} + pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs) { const long direct_index = ADDRS_PER_INODE(dn->inode); @@ -707,6 +730,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, return PTR_ERR(page); } + ra_node_pages(page, ofs, NIDS_PER_BLOCK); + rn = F2FS_NODE(page); if (depth < 3) { for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { @@ -784,6 +809,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, nid[i + 1] = get_nid(pages[i], offset[i + 1], false); } + ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK); + /* free direct nodes linked to a partial indirect node */ for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { child_nid = get_nid(pages[idx], i, false); @@ -1095,29 +1122,6 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) f2fs_put_page(apage, err ? 1 : 0); } -/* - * readahead MAX_RA_NODE number of node pages. - */ -static void ra_node_pages(struct page *parent, int start) -{ - struct f2fs_sb_info *sbi = F2FS_P_SB(parent); - struct blk_plug plug; - int i, end; - nid_t nid; - - blk_start_plug(&plug); - - /* Then, try readahead for siblings of the desired node */ - end = start + MAX_RA_NODE; - end = min(end, NIDS_PER_BLOCK); - for (i = start; i < end; i++) { - nid = get_nid(parent, i, false); - ra_node_page(sbi, nid); - } - - blk_finish_plug(&plug); -} - static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, struct page *parent, int start) { @@ -1141,7 +1145,7 @@ repeat: } if (parent) - ra_node_pages(parent, start + 1); + ra_node_pages(parent, start + 1, MAX_RA_NODE); lock_page(page); -- cgit v1.2.3 From 0f3311a8c266b9f4fae4e5cdfcd9a86970e2b9bd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 21 May 2016 00:11:09 +0800 Subject: f2fs: fix to update dirty page count correctly Once we failed to merge inline data into inode page during flushing inline inode, we will skip invoking inode_dec_dirty_pages, which makes dirty page count incorrect, result in panic in ->evict_inode, Fix it. ------------[ cut here ]------------ kernel BUG at /home/yuchao/git/devf2fs/inode.c:336! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 3 PID: 10004 Comm: umount Tainted: G O 4.6.0-rc5+ #17 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 task: f0c33000 ti: c5212000 task.ti: c5212000 EIP: 0060:[] EFLAGS: 00010202 CPU: 3 EIP is at f2fs_evict_inode+0x85/0x490 [f2fs] EAX: 00000001 EBX: c4529ea0 ECX: 00000001 EDX: 00000000 ESI: c0131000 EDI: f89dd0a0 EBP: c5213e9c ESP: c5213e78 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 CR0: 80050033 CR2: b75878c0 CR3: 1a36a700 CR4: 000406f0 Stack: c4529ea0 c4529ef4 c5213e8c c176d45c c4529ef4 00000000 c4529ea0 c4529fac f89dd0a0 c5213eb0 c1204a68 c5213ed8 c452a2b4 c6680930 c5213ec0 c1204b64 c6680d44 c6680620 c5213eec c120588d ee84b000 ee84b5c0 c5214000 ee84b5e0 Call Trace: [] ? _raw_spin_unlock+0x2c/0x50 [] evict+0xa8/0x170 [] dispose_list+0x34/0x50 [] evict_inodes+0x10d/0x130 [] generic_shutdown_super+0x41/0xe0 [] ? unregister_shrinker+0x40/0x50 [] ? unregister_shrinker+0x40/0x50 [] kill_block_super+0x22/0x70 [] kill_f2fs_super+0x1e/0x20 [f2fs] [] deactivate_locked_super+0x3d/0x70 [] deactivate_super+0x43/0x60 [] cleanup_mnt+0x39/0x80 [] __cleanup_mnt+0x10/0x20 [] task_work_run+0x71/0x90 [] exit_to_usermode_loop+0x72/0x9e [] do_fast_syscall_32+0x19c/0x1c0 [] sysenter_past_esp+0x45/0x74 EIP: [] f2fs_evict_inode+0x85/0x490 [f2fs] SS:ESP 0068:c5213e78 ---[ end trace d30536330b7fdc58 ]--- Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index cda7c7ccd4c8..1f21aae80c40 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1197,6 +1197,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode; struct page *page; + int ret; /* should flush inline_data before evict_inode */ inode = ilookup(sbi->sb, ino); @@ -1216,9 +1217,9 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) if (!clear_page_dirty_for_io(page)) goto page_out; - if (!f2fs_write_inline_data(inode, page)) - inode_dec_dirty_pages(inode); - else + ret = f2fs_write_inline_data(inode, page); + inode_dec_dirty_pages(inode); + if (ret) set_page_dirty(page); page_out: f2fs_put_page(page, 1); -- cgit v1.2.3