From 640cc18982b1c2049ac3a7223444248aea7d51b4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Jul 2017 10:59:55 -0700 Subject: f2fs: give a try to do atomic write in -ENOMEM case It'd be better to retry writing atomic pages when we get -ENOMEM. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f964b68718c1..f5d139f897dc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -309,17 +309,21 @@ static int __commit_inmem_pages(struct inode *inode, inode_dec_dirty_pages(inode); remove_dirty_inode(inode); } - +retry: fio.page = page; fio.old_blkaddr = NULL_ADDR; fio.encrypted_page = NULL; fio.need_lock = LOCK_DONE; err = do_write_data_page(&fio); if (err) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto retry; + } unlock_page(page); break; } - /* record old blkaddr for revoking */ cur->old_addr = fio.old_blkaddr; last_idx = page->index; -- cgit v1.2.3 From dc6febb6bcec7ff1b4a4d306411013b5f648f27e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 22 Jul 2017 08:52:23 +0800 Subject: f2fs: make background threads of f2fs being aware of freezing When ->freeze_fs is called from lvm for doing snapshot, it needs to make sure there will be no more changes in filesystem's data, however, previously, background threads like GC thread wasn't aware of freezing, so in environment with active background threads, data of snapshot becomes unstable. This patch fixes this issue by adding sb_{start,end}_intwrite in below background threads: - GC thread - flush thread - discard thread Note that, don't use sb_start_intwrite() in gc_thread_func() due to: generic/241 reports below bug: ====================================================== WARNING: possible circular locking dependency detected 4.13.0-rc1+ #32 Tainted: G O ------------------------------------------------------ f2fs_gc-250:0/22186 is trying to acquire lock: (&sbi->gc_mutex){+.+...}, at: [] f2fs_sync_fs+0x7b/0x1b0 [f2fs] but task is already holding lock: (sb_internal#2){++++.-}, at: [] gc_thread_func+0x159/0x4a0 [f2fs] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (sb_internal#2){++++.-}: __lock_acquire+0x405/0x7b0 lock_acquire+0xae/0x220 __sb_start_write+0x11d/0x1f0 f2fs_evict_inode+0x2d6/0x4e0 [f2fs] evict+0xa8/0x170 iput+0x1fb/0x2c0 f2fs_sync_inode_meta+0x3f/0xf0 [f2fs] write_checkpoint+0x1b1/0x750 [f2fs] f2fs_sync_fs+0x85/0x1b0 [f2fs] f2fs_do_sync_file.isra.24+0x137/0xa30 [f2fs] f2fs_sync_file+0x34/0x40 [f2fs] vfs_fsync_range+0x4a/0xa0 do_fsync+0x3c/0x60 SyS_fdatasync+0x15/0x20 do_fast_syscall_32+0xa1/0x1b0 entry_SYSENTER_32+0x4c/0x7b -> #1 (&sbi->cp_mutex){+.+...}: __lock_acquire+0x405/0x7b0 lock_acquire+0xae/0x220 __mutex_lock+0x4f/0x830 mutex_lock_nested+0x25/0x30 write_checkpoint+0x2f/0x750 [f2fs] f2fs_sync_fs+0x85/0x1b0 [f2fs] sync_filesystem+0x67/0x80 generic_shutdown_super+0x27/0x100 kill_block_super+0x22/0x50 kill_f2fs_super+0x3a/0x40 [f2fs] deactivate_locked_super+0x3d/0x70 deactivate_super+0x40/0x60 cleanup_mnt+0x39/0x70 __cleanup_mnt+0x10/0x20 task_work_run+0x69/0x80 exit_to_usermode_loop+0x57/0x92 do_fast_syscall_32+0x18c/0x1b0 entry_SYSENTER_32+0x4c/0x7b -> #0 (&sbi->gc_mutex){+.+...}: validate_chain.isra.36+0xc50/0xdb0 __lock_acquire+0x405/0x7b0 lock_acquire+0xae/0x220 __mutex_lock+0x4f/0x830 mutex_lock_nested+0x25/0x30 f2fs_sync_fs+0x7b/0x1b0 [f2fs] f2fs_balance_fs_bg+0xb9/0x200 [f2fs] gc_thread_func+0x302/0x4a0 [f2fs] kthread+0xe9/0x120 ret_from_fork+0x19/0x24 other info that might help us debug this: Chain exists of: &sbi->gc_mutex --> &sbi->cp_mutex --> sb_internal#2 Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sb_internal#2); lock(&sbi->cp_mutex); lock(sb_internal#2); lock(&sbi->gc_mutex); *** DEADLOCK *** 1 lock held by f2fs_gc-250:0/22186: #0: (sb_internal#2){++++.-}, at: [] gc_thread_func+0x159/0x4a0 [f2fs] stack backtrace: CPU: 2 PID: 22186 Comm: f2fs_gc-250:0 Tainted: G O 4.13.0-rc1+ #32 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 Call Trace: dump_stack+0x5f/0x92 print_circular_bug+0x1b3/0x1bd validate_chain.isra.36+0xc50/0xdb0 ? __this_cpu_preempt_check+0xf/0x20 __lock_acquire+0x405/0x7b0 lock_acquire+0xae/0x220 ? f2fs_sync_fs+0x7b/0x1b0 [f2fs] __mutex_lock+0x4f/0x830 ? f2fs_sync_fs+0x7b/0x1b0 [f2fs] mutex_lock_nested+0x25/0x30 ? f2fs_sync_fs+0x7b/0x1b0 [f2fs] f2fs_sync_fs+0x7b/0x1b0 [f2fs] f2fs_balance_fs_bg+0xb9/0x200 [f2fs] gc_thread_func+0x302/0x4a0 [f2fs] ? preempt_schedule_common+0x2f/0x4d ? f2fs_gc+0x540/0x540 [f2fs] kthread+0xe9/0x120 ? f2fs_gc+0x540/0x540 [f2fs] ? kthread_create_on_node+0x30/0x30 ret_from_fork+0x19/0x24 The deadlock occurs in below condition: GC Thread Thread B - sb_start_intwrite - f2fs_sync_file - f2fs_sync_fs - mutex_lock(&sbi->gc_mutex) - write_checkpoint - block_operations - f2fs_sync_inode_meta - iput - sb_start_intwrite - mutex_lock(&sbi->gc_mutex) Fix this by altering sb_start_intwrite to sb_start_write_trylock. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 9 +++++++-- fs/f2fs/segment.c | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index fa3d2e2df8e7..41c649c9c55c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -55,6 +55,9 @@ static int gc_thread_func(void *data) } #endif + if (!sb_start_write_trylock(sbi->sb)) + continue; + /* * [GC triggering condition] * 0. GC is not conducted currently. @@ -69,12 +72,12 @@ static int gc_thread_func(void *data) * So, I'd like to wait some time to collect dirty segments. */ if (!mutex_trylock(&sbi->gc_mutex)) - continue; + goto next; if (!is_idle(sbi)) { increase_sleep_time(gc_th, &wait_ms); mutex_unlock(&sbi->gc_mutex); - continue; + goto next; } if (has_enough_invalid_blocks(sbi)) @@ -93,6 +96,8 @@ static int gc_thread_func(void *data) /* balancing f2fs's metadata periodically */ f2fs_balance_fs_bg(sbi); +next: + sb_end_write(sbi->sb); } while (!kthread_should_stop()); return 0; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f5d139f897dc..2a5672a20a62 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -485,6 +485,8 @@ repeat: if (kthread_should_stop()) return 0; + sb_start_intwrite(sbi->sb); + if (!llist_empty(&fcc->issue_list)) { struct flush_cmd *cmd, *next; int ret; @@ -503,6 +505,8 @@ repeat: fcc->dispatch_list = NULL; } + sb_end_intwrite(sbi->sb); + wait_event_interruptible(*q, kthread_should_stop() || !llist_empty(&fcc->issue_list)); goto repeat; @@ -1130,9 +1134,13 @@ static int issue_discard_thread(void *data) if (kthread_should_stop()) return 0; + sb_start_intwrite(sbi->sb); + __issue_discard_cmd(sbi, true); __wait_discard_cmd(sbi, true); + sb_end_intwrite(sbi->sb); + congestion_wait(BLK_RW_SYNC, HZ/50); } while (!kthread_should_stop()); return 0; -- cgit v1.2.3 From 704956ecf5bcdc14d14650f39f2b545b34c96265 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 31 Jul 2017 20:19:09 +0800 Subject: f2fs: support inode checksum This patch adds to support inode checksum in f2fs. Signed-off-by: Chao Yu [Jaegeuk Kim: fix verification flow] Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 32 ++++++++++++++++++++++ fs/f2fs/inode.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/node.c | 7 +++++ fs/f2fs/segment.c | 5 +++- fs/f2fs/super.c | 5 ++++ include/linux/f2fs_fs.h | 1 + 6 files changed, 119 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1c31e4e7bde6..7c1244ba92a8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -116,6 +116,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_ATOMIC_WRITE 0x0004 #define F2FS_FEATURE_EXTRA_ATTR 0x0008 #define F2FS_FEATURE_PRJQUOTA 0x0010 +#define F2FS_FEATURE_INODE_CHKSUM 0x0020 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -1085,6 +1086,9 @@ struct f2fs_sb_info { /* Reference to checksum algorithm driver via cryptoapi */ struct crypto_shash *s_chksum_driver; + /* Precomputed FS UUID checksum for seeding other checksums */ + __u32 s_chksum_seed; + /* For fault injection */ #ifdef CONFIG_F2FS_FAULT_INJECTION struct f2fs_fault_info fault_info; @@ -1176,6 +1180,27 @@ static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, return f2fs_crc32(sbi, buf, buf_size) == blk_crc; } +static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc, + const void *address, unsigned int length) +{ + struct { + struct shash_desc shash; + char ctx[4]; + } desc; + int err; + + BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) != sizeof(desc.ctx)); + + desc.shash.tfm = sbi->s_chksum_driver; + desc.shash.flags = 0; + *(u32 *)desc.ctx = crc; + + err = crypto_shash_update(&desc.shash, address, length); + BUG_ON(err); + + return *(u32 *)desc.ctx; +} + static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) { return container_of(inode, struct f2fs_inode_info, vfs_inode); @@ -2285,6 +2310,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg); * inode.c */ void f2fs_set_inode_flags(struct inode *inode); +bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page); struct inode *f2fs_iget(struct super_block *sb, unsigned long ino); struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino); int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); @@ -2869,6 +2896,11 @@ static inline int f2fs_sb_has_project_quota(struct super_block *sb) return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_PRJQUOTA); } +static inline int f2fs_sb_has_inode_chksum(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM); +} + #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkaddr) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index f15e663a1a15..b4c401d456e7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -108,6 +108,76 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage) return; } +static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +{ + struct f2fs_inode *ri = &F2FS_NODE(page)->i; + int extra_isize = le32_to_cpu(ri->i_extra_isize); + + if (!f2fs_sb_has_inode_chksum(sbi->sb)) + return false; + + if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR)) + return false; + + if (!F2FS_FITS_IN_INODE(ri, extra_isize, i_inode_checksum)) + return false; + + return true; +} + +static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) +{ + struct f2fs_node *node = F2FS_NODE(page); + struct f2fs_inode *ri = &node->i; + __le32 ino = node->footer.ino; + __le32 gen = ri->i_generation; + __u32 chksum, chksum_seed; + __u32 dummy_cs = 0; + unsigned int offset = offsetof(struct f2fs_inode, i_inode_checksum); + unsigned int cs_size = sizeof(dummy_cs); + + chksum = f2fs_chksum(sbi, sbi->s_chksum_seed, (__u8 *)&ino, + sizeof(ino)); + chksum_seed = f2fs_chksum(sbi, chksum, (__u8 *)&gen, sizeof(gen)); + + chksum = f2fs_chksum(sbi, chksum_seed, (__u8 *)ri, offset); + chksum = f2fs_chksum(sbi, chksum, (__u8 *)&dummy_cs, cs_size); + offset += cs_size; + chksum = f2fs_chksum(sbi, chksum, (__u8 *)ri + offset, + F2FS_BLKSIZE - offset); + return chksum; +} + +bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) +{ + struct f2fs_inode *ri; + __u32 provided, calculated; + + if (!f2fs_enable_inode_chksum(sbi, page)) + return true; + + ri = &F2FS_NODE(page)->i; + provided = le32_to_cpu(ri->i_inode_checksum); + calculated = f2fs_inode_chksum(sbi, page); + + if (provided != calculated) + f2fs_msg(sbi->sb, KERN_WARNING, + "checksum invalid, ino = %x, %x vs. %x", + ino_of_node(page), provided, calculated); + + return provided == calculated; +} + +void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page) +{ + struct f2fs_inode *ri = &F2FS_NODE(page)->i; + + if (!f2fs_enable_inode_chksum(sbi, page)) + return; + + ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page)); +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 14b79a396a3f..0176035d8ced 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1171,6 +1171,11 @@ repeat: err = -EIO; goto out_err; } + + if (!f2fs_inode_chksum_verify(sbi, page)) { + err = -EBADMSG; + goto out_err; + } page_hit: if(unlikely(nid != nid_of_node(page))) { f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, " @@ -2275,6 +2280,8 @@ retry: F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) dst->i_projid = src->i_projid; + + f2fs_inode_chksum_set(sbi, ipage); } new_ni = old_ni; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2a5672a20a62..8911f50ddef6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2214,9 +2214,12 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&sit_i->sentry_lock); - if (page && IS_NODESEG(type)) + if (page && IS_NODESEG(type)) { fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg)); + f2fs_inode_chksum_set(sbi, page); + } + if (add_list) { struct f2fs_bio_info *io; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4c28576c5b30..fc757c8861b7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1993,6 +1993,11 @@ try_onemore: sb->s_fs_info = sbi; sbi->raw_super = raw_super; + /* precompute checksum seed for metadata */ + if (f2fs_sb_has_inode_chksum(sb)) + sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, + sizeof(raw_super->uuid)); + /* * The BLKZONED feature indicates that the drive was formatted with * zone alignment optimization. This is optional for host-aware diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 50f176683676..2a0c453d7235 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -240,6 +240,7 @@ struct f2fs_inode { __le16 i_extra_isize; /* extra inode attribute size */ __le16 i_padding; /* padding */ __le32 i_projid; /* project id */ + __le32 i_inode_checksum;/* inode meta checksum */ __le32 i_extra_end[0]; /* for attribute size calculation */ }; __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ -- cgit v1.2.3 From 6415fedc572219e0c8e6b4d3c17c46ed36997ae7 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 2 Aug 2017 21:20:13 +0800 Subject: f2fs: update cur_valid_map_mir together with cur_valid_map When cur_valid_map passes the f2fs_test_and_set(,clear)_bit test, cur_valid_map_mir update is skipped unlikely, so fix it. The fix now changes the mirror check together with cur_valid_map all the time. Signed-off-by: Yunlong Song Signed-off-by: Chao Yu [Jaegeuk Kim: Fix unused variable and add unlikely for corner condition.] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8911f50ddef6..7781203a4077 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1503,6 +1503,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) struct seg_entry *se; unsigned int segno, offset; long int new_vblocks; + bool exist; +#ifdef CONFIG_F2FS_CHECK_FS + bool mir_exist; +#endif segno = GET_SEGNO(sbi, blkaddr); @@ -1519,17 +1523,23 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) { + exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - if (f2fs_test_and_set_bit(offset, - se->cur_valid_map_mir)) - f2fs_bug_on(sbi, 1); - else - WARN_ON(1); -#else + mir_exist = f2fs_test_and_set_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " + "when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); + } #endif + if (unlikely(exist)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); } + if (f2fs_discard_en(sbi) && !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; @@ -1540,17 +1550,23 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) se->ckpt_valid_blocks++; } } else { - if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) { + exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - if (!f2fs_test_and_clear_bit(offset, - se->cur_valid_map_mir)) - f2fs_bug_on(sbi, 1); - else - WARN_ON(1); -#else + mir_exist = f2fs_test_and_clear_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " + "when clearing bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); + } #endif + if (unlikely(!exist)) { + f2fs_msg(sbi->sb, KERN_ERR, + "Bitmap was wrongly cleared, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); } + if (f2fs_discard_en(sbi) && f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; -- cgit v1.2.3 From 35ee82ca133a58011e648a407d4ab13275c975d4 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Wed, 2 Aug 2017 22:16:54 +0800 Subject: f2fs: do not change the valid_block value if cur_valid_map was wrongly set or cleared Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7781203a4077..1675a2dcd599 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1538,6 +1538,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) f2fs_msg(sbi->sb, KERN_ERR, "Bitmap was wrongly set, blk:%u", blkaddr); f2fs_bug_on(sbi, 1); + se->valid_blocks--; + del = 0; } if (f2fs_discard_en(sbi) && @@ -1565,6 +1567,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) f2fs_msg(sbi->sb, KERN_ERR, "Bitmap was wrongly cleared, blk:%u", blkaddr); f2fs_bug_on(sbi, 1); + se->valid_blocks++; + del = 0; } if (f2fs_discard_en(sbi) && -- cgit v1.2.3 From b0af6d491a6b5f5622fa91ac75f34f3640f862c4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 2 Aug 2017 23:21:48 +0800 Subject: f2fs: add app/fs io stat This patch enables inner app/fs io stats and introduces below virtual fs nodes for exposing stats info: /sys/fs/f2fs//iostat_enable /proc/fs/f2fs//iostat_info Signed-off-by: Chao Yu [Jaegeuk Kim: fix wrong stat assignment] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 34 +++++++++++++++++++++--------- fs/f2fs/data.c | 35 +++++++++++++++++++++++-------- fs/f2fs/f2fs.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++--- fs/f2fs/file.c | 7 ++++++- fs/f2fs/gc.c | 3 +++ fs/f2fs/inline.c | 1 + fs/f2fs/node.c | 15 +++++++------ fs/f2fs/segment.c | 21 +++++++++++++++++-- fs/f2fs/super.c | 4 ++++ fs/f2fs/sysfs.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 200 insertions(+), 31 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3c84a2520796..da5b49183e09 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -230,8 +230,9 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); } -static int f2fs_write_meta_page(struct page *page, - struct writeback_control *wbc) +static int __f2fs_write_meta_page(struct page *page, + struct writeback_control *wbc, + enum iostat_type io_type) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); @@ -244,7 +245,7 @@ static int f2fs_write_meta_page(struct page *page, if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; - write_meta_page(sbi, page); + write_meta_page(sbi, page, io_type); dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) @@ -263,6 +264,12 @@ redirty_out: return AOP_WRITEPAGE_ACTIVATE; } +static int f2fs_write_meta_page(struct page *page, + struct writeback_control *wbc) +{ + return __f2fs_write_meta_page(page, wbc, FS_META_IO); +} + static int f2fs_write_meta_pages(struct address_space *mapping, struct writeback_control *wbc) { @@ -283,7 +290,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); - written = sync_meta_pages(sbi, META, wbc->nr_to_write); + written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); mutex_unlock(&sbi->cp_mutex); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -295,7 +302,7 @@ skip_write: } long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, - long nr_to_write) + long nr_to_write, enum iostat_type io_type) { struct address_space *mapping = META_MAPPING(sbi); pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX; @@ -346,7 +353,7 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - if (mapping->a_ops->writepage(page, &wbc)) { + if (__f2fs_write_meta_page(page, &wbc, io_type)) { unlock_page(page); break; } @@ -904,7 +911,14 @@ retry: if (inode) { unsigned long cur_ino = inode->i_ino; + if (is_dir) + F2FS_I(inode)->cp_task = current; + filemap_fdatawrite(inode->i_mapping); + + if (is_dir) + F2FS_I(inode)->cp_task = NULL; + iput(inode); /* We need to give cpu to another writers. */ if (ino == cur_ino) { @@ -1017,7 +1031,7 @@ retry_flush_nodes: if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); - err = sync_node_pages(sbi, &wbc, false); + err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); if (err) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); @@ -1115,7 +1129,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) { - sync_meta_pages(sbi, META, LONG_MAX); + sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); if (unlikely(f2fs_cp_error(sbi))) return -EIO; } @@ -1194,7 +1208,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Flush all the NAT BITS pages */ while (get_pages(sbi, F2FS_DIRTY_META)) { - sync_meta_pages(sbi, META, LONG_MAX); + sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); if (unlikely(f2fs_cp_error(sbi))) return -EIO; } @@ -1249,7 +1263,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) percpu_counter_set(&sbi->alloc_valid_block_count, 0); /* Here, we only have one bio having CP pack */ - sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO); /* wait for previous submitted meta pages writeback */ wait_on_all_pages_writeback(sbi); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index aefc2a5745d3..c43262dc36de 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1475,7 +1475,8 @@ out: } static int __write_data_page(struct page *page, bool *submitted, - struct writeback_control *wbc) + struct writeback_control *wbc, + enum iostat_type io_type) { struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -1496,6 +1497,7 @@ static int __write_data_page(struct page *page, bool *submitted, .encrypted_page = NULL, .submitted = false, .need_lock = LOCK_RETRY, + .io_type = io_type, }; trace_f2fs_writepage(page, DATA); @@ -1602,7 +1604,7 @@ redirty_out: static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { - return __write_data_page(page, NULL, wbc); + return __write_data_page(page, NULL, wbc, FS_DATA_IO); } /* @@ -1611,7 +1613,8 @@ static int f2fs_write_data_page(struct page *page, * warm/hot data page. */ static int f2fs_write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc) + struct writeback_control *wbc, + enum iostat_type io_type) { int ret = 0; int done = 0; @@ -1701,7 +1704,7 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = __write_data_page(page, &submitted, wbc); + ret = __write_data_page(page, &submitted, wbc, io_type); if (unlikely(ret)) { /* * keep nr_to_write, since vfs uses this to @@ -1756,8 +1759,9 @@ continue_unlock: return ret; } -static int f2fs_write_data_pages(struct address_space *mapping, - struct writeback_control *wbc) +int __f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc, + enum iostat_type io_type) { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -1794,7 +1798,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, goto skip_write; blk_start_plug(&plug); - ret = f2fs_write_cache_pages(mapping, wbc); + ret = f2fs_write_cache_pages(mapping, wbc, io_type); blk_finish_plug(&plug); if (wbc->sync_mode == WB_SYNC_ALL) @@ -1813,6 +1817,16 @@ skip_write: return 0; } +static int f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + + return __f2fs_write_data_pages(mapping, wbc, + F2FS_I(inode)->cp_task == current ? + FS_CP_DATA_IO : FS_DATA_IO); +} + static void f2fs_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; @@ -2079,10 +2093,13 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) up_read(&F2FS_I(inode)->dio_rwsem[rw]); if (rw == WRITE) { - if (err > 0) + if (err > 0) { + f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, + err); set_inode_flag(inode, FI_UPDATE_WRITE); - else if (err < 0) + } else if (err < 0) { f2fs_write_failed(mapping, offset + count); + } } trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7c1244ba92a8..0ccccbfdeeab 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -557,6 +557,7 @@ struct f2fs_inode_info { f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ struct task_struct *task; /* lookup and create consistency */ + struct task_struct *cp_task; /* separate cp/wb IO stats*/ nid_t i_xattr_nid; /* node id that contains xattrs */ loff_t last_disk_size; /* lastly written file size */ @@ -863,6 +864,23 @@ enum need_lock_type { LOCK_RETRY, }; +enum iostat_type { + APP_DIRECT_IO, /* app direct IOs */ + APP_BUFFERED_IO, /* app buffered IOs */ + APP_WRITE_IO, /* app write IOs */ + APP_MAPPED_IO, /* app mapped IOs */ + FS_DATA_IO, /* data IOs from kworker/fsync/reclaimer */ + FS_NODE_IO, /* node IOs from kworker/fsync/reclaimer */ + FS_META_IO, /* meta IOs from kworker/reclaimer */ + FS_GC_DATA_IO, /* data IOs from forground gc */ + FS_GC_NODE_IO, /* node IOs from forground gc */ + FS_CP_DATA_IO, /* data IOs from checkpoint */ + FS_CP_NODE_IO, /* node IOs from checkpoint */ + FS_CP_META_IO, /* meta IOs from checkpoint */ + FS_DISCARD, /* discard */ + NR_IO_TYPE, +}; + struct f2fs_io_info { struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ @@ -877,6 +895,7 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ + enum iostat_type io_type; /* io type */ }; #define is_read_io(rw) ((rw) == READ) @@ -1068,6 +1087,11 @@ struct f2fs_sb_info { #endif spinlock_t stat_lock; /* lock for stat operations */ + /* For app/fs IO statistics */ + spinlock_t iostat_lock; + unsigned long long write_iostat[NR_IO_TYPE]; + bool iostat_enable; + /* For sysfs suppport */ struct kobject s_kobj; struct completion s_kobj_unregister; @@ -2291,6 +2315,31 @@ static inline int get_extra_isize(struct inode *inode) sizeof((f2fs_inode)->field)) \ <= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize)) \ +static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi) +{ + int i; + + spin_lock(&sbi->iostat_lock); + for (i = 0; i < NR_IO_TYPE; i++) + sbi->write_iostat[i] = 0; + spin_unlock(&sbi->iostat_lock); +} + +static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, + enum iostat_type type, unsigned long long io_bytes) +{ + if (!sbi->iostat_enable) + return; + spin_lock(&sbi->iostat_lock); + sbi->write_iostat[type] += io_bytes; + + if (type == APP_WRITE_IO || type == APP_DIRECT_IO) + sbi->write_iostat[APP_BUFFERED_IO] = + sbi->write_iostat[APP_WRITE_IO] - + sbi->write_iostat[APP_DIRECT_IO]; + spin_unlock(&sbi->iostat_lock); +} + /* * file.c */ @@ -2418,7 +2467,7 @@ void move_node_page(struct page *node_page, int gc_type); int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic); int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, - bool do_balance); + bool do_balance, enum iostat_type io_type); void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount); bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); @@ -2461,7 +2510,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page); +void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, + enum iostat_type io_type); void write_node_page(unsigned int nid, struct f2fs_io_info *fio); void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio); int rewrite_data_page(struct f2fs_io_info *fio); @@ -2502,7 +2552,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, - long nr_to_write); + long nr_to_write, enum iostat_type io_type); void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); void release_ino_entry(struct f2fs_sb_info *sbi, bool all); @@ -2555,6 +2605,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); void f2fs_set_page_dirty_nobuffers(struct page *page); +int __f2fs_write_data_pages(struct address_space *mapping, + struct writeback_control *wbc, + enum iostat_type io_type); void f2fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length); int f2fs_release_page(struct page *page, gfp_t wait); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 407518f42e45..e2b33b87701f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -98,6 +98,8 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) if (!PageUptodate(page)) SetPageUptodate(page); + f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE); + trace_f2fs_vm_page_mkwrite(page, DATA); mapped: /* fill the page */ @@ -1815,7 +1817,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) f2fs_stop_checkpoint(sbi, false); break; case F2FS_GOING_DOWN_METAFLUSH: - sync_meta_pages(sbi, META, LONG_MAX); + sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO); f2fs_stop_checkpoint(sbi, false); break; default: @@ -2694,6 +2696,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = __generic_file_write_iter(iocb, from); blk_finish_plug(&plug); clear_inode_flag(inode, FI_NO_PREALLOC); + + if (ret > 0) + f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } inode_unlock(inode); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f57cadae1a30..620dca443b29 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -689,6 +689,8 @@ static void move_encrypted_block(struct inode *inode, block_t bidx, fio.new_blkaddr = newaddr; f2fs_submit_page_write(&fio); + f2fs_update_iostat(fio.sbi, FS_GC_DATA_IO, F2FS_BLKSIZE); + f2fs_update_data_blkaddr(&dn, newaddr); set_inode_flag(inode, FI_APPEND_WRITE); if (page->index == 0) @@ -736,6 +738,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, .page = page, .encrypted_page = NULL, .need_lock = LOCK_REQ, + .io_type = FS_GC_DATA_IO, }; bool is_dirty = PageDirty(page); int err; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index f38be791fdf9..e63ab0d1f614 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -117,6 +117,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .op_flags = REQ_SYNC | REQ_PRIO, .page = page, .encrypted_page = NULL, + .io_type = FS_DATA_IO, }; int dirty, err; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0176035d8ced..9168c304fd58 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1332,7 +1332,8 @@ continue_unlock: } static int __write_node_page(struct page *page, bool atomic, bool *submitted, - struct writeback_control *wbc, bool do_balance) + struct writeback_control *wbc, bool do_balance, + enum iostat_type io_type) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); nid_t nid; @@ -1345,6 +1346,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, .page = page, .encrypted_page = NULL, .submitted = false, + .io_type = io_type, }; trace_f2fs_writepage(page, NODE); @@ -1413,7 +1415,7 @@ redirty_out: static int f2fs_write_node_page(struct page *page, struct writeback_control *wbc) { - return __write_node_page(page, false, NULL, wbc, false); + return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO); } int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, @@ -1501,7 +1503,8 @@ continue_unlock: ret = __write_node_page(page, atomic && page == last_page, - &submitted, wbc, true); + &submitted, wbc, true, + FS_NODE_IO); if (ret) { unlock_page(page); f2fs_put_page(last_page, 0); @@ -1539,7 +1542,7 @@ out: } int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc, - bool do_balance) + bool do_balance, enum iostat_type io_type) { pgoff_t index, end; struct pagevec pvec; @@ -1618,7 +1621,7 @@ continue_unlock: set_dentry_mark(page, 0); ret = __write_node_page(page, false, &submitted, - wbc, do_balance); + wbc, do_balance, io_type); if (ret) unlock_page(page); else if (submitted) @@ -1707,7 +1710,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, NODE, wbc); wbc->sync_mode = WB_SYNC_NONE; blk_start_plug(&plug); - sync_node_pages(sbi, wbc, true); + sync_node_pages(sbi, wbc, true, FS_NODE_IO); blk_finish_plug(&plug); wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return 0; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1675a2dcd599..c6e4a1174f8a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -292,6 +292,7 @@ static int __commit_inmem_pages(struct inode *inode, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, + .io_type = FS_DATA_IO, }; pgoff_t last_idx = ULONG_MAX; int err = 0; @@ -823,6 +824,8 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, submit_bio(bio); list_move_tail(&dc->list, &dcc->wait_list); __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); + + f2fs_update_iostat(sbi, FS_DISCARD, 1); } } else { __remove_discard_cmd(sbi, dc); @@ -2271,7 +2274,8 @@ reallocate: } } -void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) +void write_meta_page(struct f2fs_sb_info *sbi, struct page *page, + enum iostat_type io_type) { struct f2fs_io_info fio = { .sbi = sbi, @@ -2290,6 +2294,8 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) set_page_writeback(page); f2fs_submit_page_write(&fio); + + f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE); } void write_node_page(unsigned int nid, struct f2fs_io_info *fio) @@ -2298,6 +2304,8 @@ void write_node_page(unsigned int nid, struct f2fs_io_info *fio) set_summary(&sum, nid, 0, 0); do_write_page(&sum, fio); + + f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); } void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) @@ -2311,13 +2319,22 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); do_write_page(&sum, fio); f2fs_update_data_blkaddr(dn, fio->new_blkaddr); + + f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE); } int rewrite_data_page(struct f2fs_io_info *fio) { + int err; + fio->new_blkaddr = fio->old_blkaddr; stat_inc_inplace_blocks(fio->sbi); - return f2fs_submit_page_bio(fio); + + err = f2fs_submit_page_bio(fio); + + f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); + + return err; } void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b84367856f8e..ca340d84f8c2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2056,6 +2056,10 @@ try_onemore: set_sbi_flag(sbi, SBI_POR_DOING); spin_lock_init(&sbi->stat_lock); + /* init iostat info */ + spin_lock_init(&sbi->iostat_lock); + sbi->iostat_enable = false; + for (i = 0; i < NR_PAGE_TYPE; i++) { int n = (i == META) ? 1: NR_TEMP_TYPE; int j; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 1e31d0c5b6ab..3d6bbdb743b0 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -153,6 +153,10 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, return count; } *ui = t; + + if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) + f2fs_reset_iostat(sbi); + return count; } @@ -250,6 +254,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable); #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); @@ -288,6 +293,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(dirty_nats_ratio), ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), + ATTR_LIST(iostat_enable), #ifdef CONFIG_F2FS_FAULT_INJECTION ATTR_LIST(inject_rate), ATTR_LIST(inject_type), @@ -391,6 +397,48 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset) return 0; } +static int iostat_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + time64_t now = ktime_get_real_seconds(); + + if (!sbi->iostat_enable) + return 0; + + seq_printf(seq, "time: %-16llu\n", now); + + /* print app IOs */ + seq_printf(seq, "app buffered: %-16llu\n", + sbi->write_iostat[APP_BUFFERED_IO]); + seq_printf(seq, "app direct: %-16llu\n", + sbi->write_iostat[APP_DIRECT_IO]); + seq_printf(seq, "app mapped: %-16llu\n", + sbi->write_iostat[APP_MAPPED_IO]); + + /* print fs IOs */ + seq_printf(seq, "fs data: %-16llu\n", + sbi->write_iostat[FS_DATA_IO]); + seq_printf(seq, "fs node: %-16llu\n", + sbi->write_iostat[FS_NODE_IO]); + seq_printf(seq, "fs meta: %-16llu\n", + sbi->write_iostat[FS_META_IO]); + seq_printf(seq, "fs gc data: %-16llu\n", + sbi->write_iostat[FS_GC_DATA_IO]); + seq_printf(seq, "fs gc node: %-16llu\n", + sbi->write_iostat[FS_GC_NODE_IO]); + seq_printf(seq, "fs cp data: %-16llu\n", + sbi->write_iostat[FS_CP_DATA_IO]); + seq_printf(seq, "fs cp node: %-16llu\n", + sbi->write_iostat[FS_CP_NODE_IO]); + seq_printf(seq, "fs cp meta: %-16llu\n", + sbi->write_iostat[FS_CP_META_IO]); + seq_printf(seq, "fs discard: %-16llu\n", + sbi->write_iostat[FS_DISCARD]); + + return 0; +} + #define F2FS_PROC_FILE_DEF(_name) \ static int _name##_open_fs(struct inode *inode, struct file *file) \ { \ @@ -406,6 +454,7 @@ static const struct file_operations f2fs_seq_##_name##_fops = { \ F2FS_PROC_FILE_DEF(segment_info); F2FS_PROC_FILE_DEF(segment_bits); +F2FS_PROC_FILE_DEF(iostat_info); int __init f2fs_init_sysfs(void) { @@ -454,6 +503,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) &f2fs_seq_segment_info_fops, sb); proc_create_data("segment_bits", S_IRUGO, sbi->s_proc, &f2fs_seq_segment_bits_fops, sb); + proc_create_data("iostat_info", S_IRUGO, sbi->s_proc, + &f2fs_seq_iostat_info_fops, sb); } return 0; } @@ -461,6 +512,7 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) { if (sbi->s_proc) { + remove_proc_entry("iostat_info", sbi->s_proc); remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); -- cgit v1.2.3 From 008396e1b026b3873091b555b808155da7d9d18f Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 4 Aug 2017 17:07:15 +0800 Subject: f2fs: fix the size value in __check_sit_bitmap The current size value is not correct and will miss bitmap check. Signed-off-by: Yunlong Song Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c6e4a1174f8a..95267630c8c4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -787,11 +787,14 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi, sentry = get_seg_entry(sbi, segno); offset = GET_BLKOFF_FROM_SEG0(sbi, blk); - size = min((unsigned long)(end - blk), max_blocks); + if (end < START_BLOCK(sbi, segno + 1)) + size = GET_BLKOFF_FROM_SEG0(sbi, end); + else + size = max_blocks; map = (unsigned long *)(sentry->cur_valid_map); offset = __find_rev_next_bit(map, size, offset); f2fs_bug_on(sbi, offset != size); - blk += size; + blk = START_BLOCK(sbi, segno + 1); } #endif } -- cgit v1.2.3 From 7f2b4e8ea5b49846f7d20f1694dbca81f88b8d50 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 8 Aug 2017 19:09:08 +0800 Subject: f2fs: retry to revoke atomic commit in -ENOMEM case During atomic committing, if we encounter -ENOMEM in revoke path, it's better to give a chance to retry revoking. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 95267630c8c4..05144b3a7f62 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -213,9 +213,15 @@ static int __revoke_inmem_pages(struct inode *inode, struct node_info ni; trace_f2fs_commit_inmem_page(page, INMEM_REVOKE); - +retry: set_new_dnode(&dn, inode, NULL, NULL, 0); - if (get_dnode_of_data(&dn, page->index, LOOKUP_NODE)) { + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + if (err) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto retry; + } err = -EAGAIN; goto next; } -- cgit v1.2.3 From 969d1b180d987c2be02de890d0fff0f66a0e80de Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 7 Aug 2017 23:09:56 +0800 Subject: f2fs: introduce discard_granularity sysfs entry Commit d618ebaf0aa8 ("f2fs: enable small discard by default") enables f2fs to issue 4K size discard in real-time discard mode. However, issuing smaller discard may cost more lifetime but releasing less free space in flash device. Since f2fs has ability of separating hot/cold data and garbage collection, we can expect that small-sized invalid region would expand soon with OPU, deletion or garbage collection on valid datas, so it's better to delay or skip issuing smaller size discards, it could help to reduce overmuch consumption of IO bandwidth and lifetime of flash storage. This patch makes f2fs selectng 64K size as its default minimal granularity, and issue discard with the size which is not smaller than minimal granularity. Also it exposes discard granularity as sysfs entry for configuration in different scenario. Jaegeuk Kim: We must issue all the accumulated discard commands when fstrim is called. So, I've added pend_list_tag[] to indicate whether we should issue the commands or not. If tag sets P_ACTIVE or P_TRIM, we have to issue them. P_TRIM is set once at a time, given fstrim trigger. In addition, issue_discard_thread is calling too much due to the number of discard commands remaining in the pending list. I added a timer to control it likewise gc_thread. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 9 ++++ fs/f2fs/f2fs.h | 12 +++++ fs/f2fs/segment.c | 91 ++++++++++++++++++++++++++++----- fs/f2fs/sysfs.c | 23 +++++++++ 4 files changed, 121 insertions(+), 14 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 621da3fc56c5..11b7f4ebea7c 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -57,6 +57,15 @@ Contact: "Jaegeuk Kim" Description: Controls the issue rate of small discard commands. +What: /sys/fs/f2fs//discard_granularity +Date: July 2017 +Contact: "Chao Yu" +Description: + Controls discard granularity of inner discard thread, inner thread + will not issue discards with size that is smaller than granularity. + The unit size is one block, now only support configuring in range + of [1, 512]. + What: /sys/fs/f2fs//max_victim_search Date: January 2014 Contact: "Jaegeuk Kim" diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e252e5bf9791..4b993961d81d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -148,6 +148,8 @@ enum { (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) #define DISCARD_ISSUE_RATE 8 +#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */ +#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */ #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ @@ -196,11 +198,18 @@ struct discard_entry { unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */ }; +/* default discard granularity of inner discard thread, unit: block count */ +#define DEFAULT_DISCARD_GRANULARITY 16 + /* max discard pend list number */ #define MAX_PLIST_NUM 512 #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ (MAX_PLIST_NUM - 1) : (blk_num - 1)) +#define P_ACTIVE 0x01 +#define P_TRIM 0x02 +#define plist_issue(tag) (((tag) & P_ACTIVE) || ((tag) & P_TRIM)) + enum { D_PREP, D_SUBMIT, @@ -236,11 +245,14 @@ struct discard_cmd_control { struct task_struct *f2fs_issue_discard; /* discard thread */ struct list_head entry_list; /* 4KB discard entry list */ struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ + unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */ struct list_head wait_list; /* store on-flushing entries */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ + unsigned int discard_wake; /* to wake up discard thread */ struct mutex cmd_lock; unsigned int nr_discards; /* # of discards in the list */ unsigned int max_discards; /* max. discards to be issued */ + unsigned int discard_granularity; /* discard granularity */ unsigned int undiscard_blks; /* # of undiscard blocks */ atomic_t issued_discard; /* # of issued discard */ atomic_t issing_discard; /* # of issing discard */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 05144b3a7f62..1387925a0d83 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1016,32 +1016,65 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } -static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) +static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int i, iter = 0; + int iter = 0, issued = 0; + int i; mutex_lock(&dcc->cmd_lock); f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); blk_start_plug(&plug); - for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + for (i = MAX_PLIST_NUM - 1; + i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { pend_list = &dcc->pend_list[i]; list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); - if (!issue_cond || is_idle(sbi)) + /* Hurry up to finish fstrim */ + if (dcc->pend_list_tag[i] & P_TRIM) { + __submit_discard_cmd(sbi, dc); + issued++; + continue; + } + + if (!issue_cond || is_idle(sbi)) { + issued++; __submit_discard_cmd(sbi, dc); + } if (issue_cond && iter++ > DISCARD_ISSUE_RATE) goto out; } + if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) + dcc->pend_list_tag[i] &= (~P_TRIM); } out: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); + + return issued; +} + +static void __drop_discard_cmd(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *pend_list; + struct discard_cmd *dc, *tmp; + int i; + + mutex_lock(&dcc->cmd_lock); + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); + __remove_discard_cmd(sbi, dc); + } + } + mutex_unlock(&dcc->cmd_lock); } static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, @@ -1126,34 +1159,56 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { __issue_discard_cmd(sbi, false); + __drop_discard_cmd(sbi); __wait_discard_cmd(sbi, false); } +static void mark_discard_range_all(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + int i; + + mutex_lock(&dcc->cmd_lock); + for (i = 0; i < MAX_PLIST_NUM; i++) + dcc->pend_list_tag[i] |= P_TRIM; + mutex_unlock(&dcc->cmd_lock); +} + static int issue_discard_thread(void *data) { struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; + unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + int issued; set_freezable(); do { - wait_event_interruptible(*q, kthread_should_stop() || - freezing(current) || - atomic_read(&dcc->discard_cmd_cnt)); + wait_event_interruptible_timeout(*q, + kthread_should_stop() || freezing(current) || + dcc->discard_wake, + msecs_to_jiffies(wait_ms)); if (try_to_freeze()) continue; if (kthread_should_stop()) return 0; + if (dcc->discard_wake) + dcc->discard_wake = 0; + sb_start_intwrite(sbi->sb); - __issue_discard_cmd(sbi, true); - __wait_discard_cmd(sbi, true); + issued = __issue_discard_cmd(sbi, true); + if (issued) { + __wait_discard_cmd(sbi, true); + wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + } else { + wait_ms = DEF_MAX_DISCARD_ISSUE_TIME; + } sb_end_intwrite(sbi->sb); - congestion_wait(BLK_RW_SYNC, HZ/50); } while (!kthread_should_stop()); return 0; } @@ -1344,7 +1399,8 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { - struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *head = &dcc->entry_list; struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; @@ -1426,11 +1482,12 @@ skip: goto find_next; list_del(&entry->list); - SM_I(sbi)->dcc_info->nr_discards -= total_len; + dcc->nr_discards -= total_len; kmem_cache_free(discard_entry_slab, entry); } - wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); + dcc->discard_wake = 1; + wake_up_interruptible_all(&dcc->discard_wait_queue); } static int create_discard_cmd_control(struct f2fs_sb_info *sbi) @@ -1448,9 +1505,13 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return -ENOMEM; + dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; INIT_LIST_HEAD(&dcc->entry_list); - for (i = 0; i < MAX_PLIST_NUM; i++) + for (i = 0; i < MAX_PLIST_NUM; i++) { INIT_LIST_HEAD(&dcc->pend_list[i]); + if (i >= dcc->discard_granularity - 1) + dcc->pend_list_tag[i] |= P_ACTIVE; + } INIT_LIST_HEAD(&dcc->wait_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); @@ -2127,6 +2188,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) schedule(); } + /* It's time to issue all the filed discards */ + mark_discard_range_all(sbi); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index c40e5d24df9f..4bcaa9059026 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -152,6 +152,27 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, spin_unlock(&sbi->stat_lock); return count; } + + if (!strcmp(a->attr.name, "discard_granularity")) { + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + int i; + + if (t == 0 || t > MAX_PLIST_NUM) + return -EINVAL; + if (t == *ui) + return count; + + mutex_lock(&dcc->cmd_lock); + for (i = 0; i < MAX_PLIST_NUM; i++) { + if (i >= t - 1) + dcc->pend_list_tag[i] |= P_ACTIVE; + else + dcc->pend_list_tag[i] &= (~P_ACTIVE); + } + mutex_unlock(&dcc->cmd_lock); + return count; + } + *ui = t; if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) @@ -248,6 +269,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent, gc_urgent); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); @@ -290,6 +312,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), ATTR_LIST(max_small_discards), + ATTR_LIST(discard_granularity), ATTR_LIST(batched_trim_sections), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), -- cgit v1.2.3 From 74d46992e0d9dee7f1f376de0d56d31614c8a17a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 23 Aug 2017 19:10:32 +0200 Subject: block: replace bi_bdev with a gendisk pointer and partitions index This way we don't need a block_device structure to submit I/O. The block_device has different life time rules from the gendisk and request_queue and is usually only available when the block device node is open. Other callers need to explicitly create one (e.g. the lightnvm passthrough code, or the new nvme multipathing code). For the actual I/O path all that we need is the gendisk, which exists once per block device. But given that the block layer also does partition remapping we additionally need a partition index, which is used for said remapping in generic_make_request. Note that all the block drivers generally want request_queue or sometimes the gendisk, so this removes a layer of indirection all over the stack. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- arch/powerpc/sysdev/axonram.c | 2 +- block/bio-integrity.c | 18 +++---- block/bio.c | 10 ++-- block/blk-core.c | 100 ++++++++++++++++++------------------ block/blk-flush.c | 2 +- block/blk-lib.c | 8 +-- block/blk-merge.c | 2 +- block/blk-zoned.c | 4 +- drivers/block/brd.c | 5 +- drivers/block/drbd/drbd_actlog.c | 2 +- drivers/block/drbd/drbd_bitmap.c | 2 +- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_receiver.c | 4 +- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/drbd/drbd_worker.c | 2 +- drivers/block/floppy.c | 2 +- drivers/block/pktcdvd.c | 11 ++-- drivers/block/xen-blkback/blkback.c | 4 +- drivers/md/bcache/debug.c | 2 +- drivers/md/bcache/io.c | 2 +- drivers/md/bcache/journal.c | 6 +-- drivers/md/bcache/request.c | 16 +++--- drivers/md/bcache/super.c | 6 +-- drivers/md/bcache/writeback.c | 5 +- drivers/md/dm-bio-record.h | 9 ++-- drivers/md/dm-bufio.c | 2 +- drivers/md/dm-cache-target.c | 4 +- drivers/md/dm-crypt.c | 4 +- drivers/md/dm-delay.c | 4 +- drivers/md/dm-era-target.c | 2 +- drivers/md/dm-flakey.c | 2 +- drivers/md/dm-integrity.c | 11 ++-- drivers/md/dm-io.c | 2 +- drivers/md/dm-linear.c | 2 +- drivers/md/dm-log-writes.c | 8 +-- drivers/md/dm-mpath.c | 2 +- drivers/md/dm-raid1.c | 12 ++--- drivers/md/dm-snap.c | 16 +++--- drivers/md/dm-stripe.c | 10 ++-- drivers/md/dm-switch.c | 2 +- drivers/md/dm-thin.c | 6 +-- drivers/md/dm-verity-target.c | 2 +- drivers/md/dm-zoned-metadata.c | 6 +-- drivers/md/dm-zoned-target.c | 4 +- drivers/md/dm.c | 10 ++-- drivers/md/faulty.c | 4 +- drivers/md/linear.c | 6 +-- drivers/md/md.c | 10 ++-- drivers/md/md.h | 9 +++- drivers/md/multipath.c | 8 +-- drivers/md/raid0.c | 7 ++- drivers/md/raid1.c | 34 ++++++------ drivers/md/raid10.c | 50 +++++++++--------- drivers/md/raid5-cache.c | 6 +-- drivers/md/raid5-ppl.c | 6 +-- drivers/md/raid5.c | 12 ++--- drivers/nvdimm/nd.h | 4 +- drivers/nvme/host/core.c | 11 +--- drivers/nvme/host/lightnvm.c | 15 +----- drivers/nvme/target/io-cmd.c | 6 +-- drivers/s390/block/dcssblk.c | 4 +- drivers/s390/block/xpram.c | 2 +- drivers/target/target_core_iblock.c | 4 +- fs/block_dev.c | 4 +- fs/btrfs/check-integrity.c | 12 ++--- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent_io.c | 6 +-- fs/btrfs/raid56.c | 8 +-- fs/btrfs/scrub.c | 12 ++--- fs/btrfs/volumes.c | 2 +- fs/buffer.c | 4 +- fs/crypto/bio.c | 2 +- fs/direct-io.c | 8 +-- fs/exofs/ore.c | 2 +- fs/ext4/page-io.c | 4 +- fs/ext4/readpage.c | 2 +- fs/f2fs/data.c | 5 +- fs/f2fs/segment.c | 2 +- fs/gfs2/lops.c | 2 +- fs/gfs2/meta_io.c | 2 +- fs/gfs2/ops_fstype.c | 2 +- fs/hfsplus/wrapper.c | 2 +- fs/iomap.c | 4 +- fs/jfs/jfs_logmgr.c | 4 +- fs/jfs/jfs_metapage.c | 4 +- fs/mpage.c | 2 +- fs/nfs/blocklayout/blocklayout.c | 2 +- fs/nilfs2/segbuf.c | 2 +- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/xfs/xfs_aops.c | 2 +- fs/xfs/xfs_buf.c | 2 +- include/linux/bio.h | 18 +++++++ include/linux/blk_types.h | 3 +- include/trace/events/bcache.h | 6 +-- include/trace/events/block.h | 16 +++--- include/trace/events/f2fs.h | 2 +- kernel/power/swap.c | 5 +- kernel/trace/blktrace.c | 2 +- mm/page_io.c | 17 +++--- 99 files changed, 358 insertions(+), 357 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 2799706106c6..1e15deacccaf 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -110,7 +110,7 @@ axon_ram_irq_handler(int irq, void *dev) static blk_qc_t axon_ram_make_request(struct request_queue *queue, struct bio *bio) { - struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data; + struct axon_ram_bank *bank = bio->bi_disk->private_data; unsigned long phys_mem, phys_end; void *user_mem; struct bio_vec vec; diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 5fa9a740fd99..fc71e6172869 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -146,7 +146,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, iv = bip->bip_vec + bip->bip_vcnt; if (bip->bip_vcnt && - bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev), + bvec_gap_to_prev(bio->bi_disk->queue, &bip->bip_vec[bip->bip_vcnt - 1], offset)) return 0; @@ -190,7 +190,7 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, static blk_status_t bio_integrity_process(struct bio *bio, struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn) { - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); struct blk_integrity_iter iter; struct bvec_iter bviter; struct bio_vec bv; @@ -199,7 +199,7 @@ static blk_status_t bio_integrity_process(struct bio *bio, void *prot_buf = page_address(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset; - iter.disk_name = bio->bi_bdev->bd_disk->disk_name; + iter.disk_name = bio->bi_disk->disk_name; iter.interval = 1 << bi->interval_exp; iter.seed = proc_iter->bi_sector; iter.prot_buf = prot_buf; @@ -236,8 +236,8 @@ static blk_status_t bio_integrity_process(struct bio *bio, bool bio_integrity_prep(struct bio *bio) { struct bio_integrity_payload *bip; - struct blk_integrity *bi; - struct request_queue *q; + struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); + struct request_queue *q = bio->bi_disk->queue; void *buf; unsigned long start, end; unsigned int len, nr_pages; @@ -245,11 +245,9 @@ bool bio_integrity_prep(struct bio *bio) unsigned int intervals; blk_status_t status; - bi = bdev_get_integrity(bio->bi_bdev); if (!bi) return true; - q = bdev_get_queue(bio->bi_bdev); if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE) return true; @@ -354,7 +352,7 @@ static void bio_integrity_verify_fn(struct work_struct *work) struct bio_integrity_payload *bip = container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); struct bvec_iter iter = bio->bi_iter; /* @@ -411,7 +409,7 @@ bool __bio_integrity_endio(struct bio *bio) void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) { struct bio_integrity_payload *bip = bio_integrity(bio); - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); bip->bip_iter.bi_sector += bytes_done >> 9; @@ -428,7 +426,7 @@ EXPORT_SYMBOL(bio_integrity_advance); void bio_integrity_trim(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); + struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); } diff --git a/block/bio.c b/block/bio.c index ecd1a9c7a301..6745759028da 100644 --- a/block/bio.c +++ b/block/bio.c @@ -593,10 +593,10 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) BUG_ON(bio->bi_pool && BVEC_POOL_IDX(bio)); /* - * most users will be overriding ->bi_bdev with a new target, + * most users will be overriding ->bi_disk with a new target, * so we don't set nor calculate new physical/hw segment counts here */ - bio->bi_bdev = bio_src->bi_bdev; + bio->bi_disk = bio_src->bi_disk; bio_set_flag(bio, BIO_CLONED); bio->bi_opf = bio_src->bi_opf; bio->bi_write_hint = bio_src->bi_write_hint; @@ -681,7 +681,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs); if (!bio) return NULL; - bio->bi_bdev = bio_src->bi_bdev; + bio->bi_disk = bio_src->bi_disk; bio->bi_opf = bio_src->bi_opf; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; @@ -1830,8 +1830,8 @@ again: goto again; } - if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) { - trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio, + if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) { + trace_block_bio_complete(bio->bi_disk->queue, bio, blk_status_to_errno(bio->bi_status)); bio_clear_flag(bio, BIO_TRACE_COMPLETION); } diff --git a/block/blk-core.c b/block/blk-core.c index d579501f24ba..fc1af9097dff 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1910,40 +1910,15 @@ out_unlock: return BLK_QC_T_NONE; } -/* - * If bio->bi_dev is a partition, remap the location - */ -static inline void blk_partition_remap(struct bio *bio) -{ - struct block_device *bdev = bio->bi_bdev; - - /* - * Zone reset does not include bi_size so bio_sectors() is always 0. - * Include a test for the reset op code and perform the remap if needed. - */ - if (bdev != bdev->bd_contains && - (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)) { - struct hd_struct *p = bdev->bd_part; - - bio->bi_iter.bi_sector += p->start_sect; - bio->bi_bdev = bdev->bd_contains; - - trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, - bdev->bd_dev, - bio->bi_iter.bi_sector - p->start_sect); - } -} - static void handle_bad_sector(struct bio *bio) { char b[BDEVNAME_SIZE]; printk(KERN_INFO "attempt to access beyond end of device\n"); printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n", - bdevname(bio->bi_bdev, b), - bio->bi_opf, + bio_devname(bio, b), bio->bi_opf, (unsigned long long)bio_end_sector(bio), - (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9)); + (long long)get_capacity(bio->bi_disk)); } #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -1981,6 +1956,38 @@ static inline bool should_fail_request(struct hd_struct *part, #endif /* CONFIG_FAIL_MAKE_REQUEST */ +/* + * Remap block n of partition p to block n+start(p) of the disk. + */ +static inline int blk_partition_remap(struct bio *bio) +{ + struct hd_struct *p; + int ret = 0; + + /* + * Zone reset does not include bi_size so bio_sectors() is always 0. + * Include a test for the reset op code and perform the remap if needed. + */ + if (!bio->bi_partno || + (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET)) + return 0; + + rcu_read_lock(); + p = __disk_get_part(bio->bi_disk, bio->bi_partno); + if (likely(p && !should_fail_request(p, bio->bi_iter.bi_size))) { + bio->bi_iter.bi_sector += p->start_sect; + bio->bi_partno = 0; + trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), + bio->bi_iter.bi_sector - p->start_sect); + } else { + printk("%s: fail for partition %d\n", __func__, bio->bi_partno); + ret = -EIO; + } + rcu_read_unlock(); + + return ret; +} + /* * Check whether this bio extends beyond the end of the device. */ @@ -1992,7 +1999,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) return 0; /* Test device or partition size, when known. */ - maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; + maxsector = get_capacity(bio->bi_disk); if (maxsector) { sector_t sector = bio->bi_iter.bi_sector; @@ -2017,20 +2024,18 @@ generic_make_request_checks(struct bio *bio) int nr_sectors = bio_sectors(bio); blk_status_t status = BLK_STS_IOERR; char b[BDEVNAME_SIZE]; - struct hd_struct *part; might_sleep(); if (bio_check_eod(bio, nr_sectors)) goto end_io; - q = bdev_get_queue(bio->bi_bdev); + q = bio->bi_disk->queue; if (unlikely(!q)) { printk(KERN_ERR "generic_make_request: Trying to access " "nonexistent block-device %s (%Lu)\n", - bdevname(bio->bi_bdev, b), - (long long) bio->bi_iter.bi_sector); + bio_devname(bio, b), (long long)bio->bi_iter.bi_sector); goto end_io; } @@ -2042,17 +2047,11 @@ generic_make_request_checks(struct bio *bio) if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_rq_based(q)) goto not_supported; - part = bio->bi_bdev->bd_part; - if (should_fail_request(part, bio->bi_iter.bi_size) || - should_fail_request(&part_to_disk(part)->part0, - bio->bi_iter.bi_size)) + if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size)) goto end_io; - /* - * If this device has partitions, remap block n - * of partition p to block n+start(p) of the disk. - */ - blk_partition_remap(bio); + if (blk_partition_remap(bio)) + goto end_io; if (bio_check_eod(bio, nr_sectors)) goto end_io; @@ -2081,16 +2080,16 @@ generic_make_request_checks(struct bio *bio) goto not_supported; break; case REQ_OP_WRITE_SAME: - if (!bdev_write_same(bio->bi_bdev)) + if (!q->limits.max_write_same_sectors) goto not_supported; break; case REQ_OP_ZONE_REPORT: case REQ_OP_ZONE_RESET: - if (!bdev_is_zoned(bio->bi_bdev)) + if (!blk_queue_is_zoned(q)) goto not_supported; break; case REQ_OP_WRITE_ZEROES: - if (!bdev_write_zeroes_sectors(bio->bi_bdev)) + if (!q->limits.max_write_zeroes_sectors) goto not_supported; break; default: @@ -2197,7 +2196,7 @@ blk_qc_t generic_make_request(struct bio *bio) bio_list_init(&bio_list_on_stack[0]); current->bio_list = bio_list_on_stack; do { - struct request_queue *q = bdev_get_queue(bio->bi_bdev); + struct request_queue *q = bio->bi_disk->queue; if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) { struct bio_list lower, same; @@ -2215,7 +2214,7 @@ blk_qc_t generic_make_request(struct bio *bio) bio_list_init(&lower); bio_list_init(&same); while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL) - if (q == bdev_get_queue(bio->bi_bdev)) + if (q == bio->bi_disk->queue) bio_list_add(&same, bio); else bio_list_add(&lower, bio); @@ -2258,7 +2257,7 @@ blk_qc_t submit_bio(struct bio *bio) unsigned int count; if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) - count = bdev_logical_block_size(bio->bi_bdev) >> 9; + count = queue_logical_block_size(bio->bi_disk->queue); else count = bio_sectors(bio); @@ -2275,8 +2274,7 @@ blk_qc_t submit_bio(struct bio *bio) current->comm, task_pid_nr(current), op_is_write(bio_op(bio)) ? "WRITE" : "READ", (unsigned long long)bio->bi_iter.bi_sector, - bdevname(bio->bi_bdev, b), - count); + bio_devname(bio, b), count); } } @@ -3049,8 +3047,8 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; - if (bio->bi_bdev) - rq->rq_disk = bio->bi_bdev->bd_disk; + if (bio->bi_disk) + rq->rq_disk = bio->bi_disk; } #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE diff --git a/block/blk-flush.c b/block/blk-flush.c index ed5fe322abba..83b7d5b41c79 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -525,7 +525,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, return -ENXIO; bio = bio_alloc(gfp_mask, 0); - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; ret = submit_bio_wait(bio); diff --git a/block/blk-lib.c b/block/blk-lib.c index 3fe0aec90597..e01adb5145b3 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -77,7 +77,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio = next_bio(bio, 0, gfp_mask); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio_set_op_attrs(bio, op, 0); bio->bi_iter.bi_size = req_sects << 9; @@ -168,7 +168,7 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector, while (nr_sects) { bio = next_bio(bio, 1, gfp_mask); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_vcnt = 1; bio->bi_io_vec->bv_page = page; bio->bi_io_vec->bv_offset = 0; @@ -241,7 +241,7 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev, while (nr_sects) { bio = next_bio(bio, 0, gfp_mask); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_opf = REQ_OP_WRITE_ZEROES; if (flags & BLKDEV_ZERO_NOUNMAP) bio->bi_opf |= REQ_NOUNMAP; @@ -323,7 +323,7 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects), gfp_mask); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); while (nr_sects != 0) { diff --git a/block/blk-merge.c b/block/blk-merge.c index 05f116bfb99d..aa524cad5bea 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -786,7 +786,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) return false; /* must be same device and not a special request */ - if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq)) + if (rq->rq_disk != bio->bi_disk || req_no_special_merge(rq)) return false; /* only merge integrity protected bio into ditto rq */ diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 3bd15d8095b1..ff57fb51b338 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -116,7 +116,7 @@ int blkdev_report_zones(struct block_device *bdev, if (!bio) return -ENOMEM; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = blk_zone_start(q, sector); bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0); @@ -234,7 +234,7 @@ int blkdev_reset_zones(struct block_device *bdev, bio = bio_alloc(gfp_mask, 0); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0); ret = submit_bio_wait(bio); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 104b71c0490d..006e1cb7e6f0 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -294,14 +294,13 @@ out: static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio) { - struct block_device *bdev = bio->bi_bdev; - struct brd_device *brd = bdev->bd_disk->private_data; + struct brd_device *brd = bio->bi_disk->private_data; struct bio_vec bvec; sector_t sector; struct bvec_iter iter; sector = bio->bi_iter.bi_sector; - if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) + if (bio_end_sector(bio) > get_capacity(bio->bi_disk)) goto io_error; bio_for_each_segment(bvec, bio, iter) { diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index e02c45cd3c5a..5f0eaee8c8a7 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -151,7 +151,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, op_flags |= REQ_SYNC; bio = bio_alloc_drbd(GFP_NOIO); - bio->bi_bdev = bdev->md_bdev; + bio_set_dev(bio, bdev->md_bdev); bio->bi_iter.bi_sector = sector; err = -EIO; if (bio_add_page(bio, device->md_io.page, size, 0) != size) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 809fd245c3dc..bd97908c766f 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -1019,7 +1019,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho bm_store_page_idx(page, page_nr); } else page = b->bm_pages[page_nr]; - bio->bi_bdev = device->ldev->md_bdev; + bio_set_dev(bio, device->ldev->md_bdev); bio->bi_iter.bi_sector = on_disk_sector; /* bio_add_page of a single page to an empty bio will always succeed, * according to api. Do we want to assert that? */ diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index d17b6e6393c7..819f9d0bc875 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1628,8 +1628,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device, int fault_type, struct bio *bio) { __release(local); - if (!bio->bi_bdev) { - drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n"); + if (!bio->bi_disk) { + drbd_err(device, "drbd_generic_make_request: bio->bi_disk == NULL\n"); bio->bi_status = BLK_STS_IOERR; bio_endio(bio); return; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index c7e95e6380fb..ece6e5d7dc3f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1265,7 +1265,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont octx->device = device; octx->ctx = ctx; - bio->bi_bdev = device->ldev->backing_bdev; + bio_set_dev(bio, device->ldev->backing_bdev); bio->bi_private = octx; bio->bi_end_io = one_flush_endio; bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH; @@ -1548,7 +1548,7 @@ next_bio: } /* > peer_req->i.sector, unless this is the first bio */ bio->bi_iter.bi_sector = sector; - bio->bi_bdev = device->ldev->backing_bdev; + bio_set_dev(bio, device->ldev->backing_bdev); bio_set_op_attrs(bio, op, op_flags); bio->bi_private = peer_req; bio->bi_end_io = drbd_peer_request_endio; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 8d6b5d137b5e..447c975f5481 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1179,7 +1179,7 @@ drbd_submit_req_private_bio(struct drbd_request *req) else type = DRBD_FAULT_DT_RD; - bio->bi_bdev = device->ldev->backing_bdev; + bio_set_dev(bio, device->ldev->backing_bdev); /* State may have changed since we grabbed our reference on the * ->ldev member. Double check, and short-circuit to endio. diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1d8726a8df34..c268d886c4f0 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -1513,7 +1513,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel) drbd_al_begin_io(device, &req->i); drbd_req_make_private_bio(req, req->master_bio); - req->private_bio->bi_bdev = device->ldev->backing_bdev; + bio_set_dev(req->private_bio, device->ldev->backing_bdev); generic_make_request(req->private_bio); return 0; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 9c00f29e40c1..60c086a53609 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4134,7 +4134,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) cbdata.drive = drive; bio_init(&bio, &bio_vec, 1); - bio.bi_bdev = bdev; + bio_set_dev(&bio, bdev); bio_add_page(&bio, page, size, 0); bio.bi_iter.bi_sector = 0; diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 6b8b097abbb9..67974796c350 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1028,7 +1028,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) bio = pkt->r_bios[f]; bio_reset(bio); bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); - bio->bi_bdev = pd->bdev; + bio_set_dev(bio, pd->bdev); bio->bi_end_io = pkt_end_io_read; bio->bi_private = pkt; @@ -1122,7 +1122,7 @@ static int pkt_start_recovery(struct packet_data *pkt) pkt->sector = new_sector; bio_reset(pkt->bio); - pkt->bio->bi_bdev = pd->bdev; + bio_set_set(pkt->bio, pd->bdev); bio_set_op_attrs(pkt->bio, REQ_OP_WRITE, 0); pkt->bio->bi_iter.bi_sector = new_sector; pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE; @@ -1267,7 +1267,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) bio_reset(pkt->w_bio); pkt->w_bio->bi_iter.bi_sector = pkt->sector; - pkt->w_bio->bi_bdev = pd->bdev; + bio_set_dev(pkt->w_bio, pd->bdev); pkt->w_bio->bi_end_io = pkt_end_io_packet_write; pkt->w_bio->bi_private = pkt; @@ -2314,7 +2314,7 @@ static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) psd->pd = pd; psd->bio = bio; - cloned_bio->bi_bdev = pd->bdev; + bio_set_dev(cloned_bio, pd->bdev); cloned_bio->bi_private = psd; cloned_bio->bi_end_io = pkt_end_io_read_cloned; pd->stats.secs_r += bio_sectors(bio); @@ -2415,8 +2415,7 @@ static blk_qc_t pkt_make_request(struct request_queue *q, struct bio *bio) pd = q->queuedata; if (!pd) { - pr_err("%s incorrect request queue\n", - bdevname(bio->bi_bdev, b)); + pr_err("%s incorrect request queue\n", bio_devname(bio, b)); goto end_io; } diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 5f3a813e7ae0..987d665e82de 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1363,7 +1363,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, goto fail_put_bio; biolist[nbio++] = bio; - bio->bi_bdev = preq.bdev; + bio_set_dev(bio, preq.bdev); bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; bio->bi_iter.bi_sector = preq.sector_number; @@ -1382,7 +1382,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, goto fail_put_bio; biolist[nbio++] = bio; - bio->bi_bdev = preq.bdev; + bio_set_dev(bio, preq.bdev); bio->bi_private = pending_req; bio->bi_end_io = end_block_io_op; bio_set_op_attrs(bio, operation, operation_flags); diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 35a5a7210e51..61076eda2e6d 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -49,7 +49,7 @@ void bch_btree_verify(struct btree *b) v->keys.ops = b->keys.ops; bio = bch_bbio_alloc(b->c); - bio->bi_bdev = PTR_CACHE(b->c, &b->key, 0)->bdev; + bio_set_dev(bio, PTR_CACHE(b->c, &b->key, 0)->bdev); bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0); bio->bi_iter.bi_size = KEY_SIZE(&v->key) << 9; bio->bi_opf = REQ_OP_READ | REQ_META; diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 6a9b85095e7b..7e871bdc0097 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -34,7 +34,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c) struct bbio *b = container_of(bio, struct bbio, bio); bio->bi_iter.bi_sector = PTR_OFFSET(&b->key, 0); - bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev; + bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev); b->submit_time_us = local_clock_us(); closure_bio_submit(bio, bio->bi_private); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 0352d05e495c..7e1d1c3ba33a 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -53,7 +53,7 @@ reread: left = ca->sb.bucket_size - offset; bio_reset(bio); bio->bi_iter.bi_sector = bucket + offset; - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = len << 9; bio->bi_end_io = journal_read_endio; @@ -452,7 +452,7 @@ static void do_journal_discard(struct cache *ca) bio_set_op_attrs(bio, REQ_OP_DISCARD, 0); bio->bi_iter.bi_sector = bucket_to_sector(ca->set, ca->sb.d[ja->discard_idx]); - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = bucket_bytes(ca); bio->bi_end_io = journal_discard_endio; @@ -623,7 +623,7 @@ static void journal_write_unlocked(struct closure *cl) bio_reset(bio); bio->bi_iter.bi_sector = PTR_OFFSET(k, i); - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 72eb97176403..0e1463d0c334 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -607,7 +607,7 @@ static void request_endio(struct bio *bio) static void bio_complete(struct search *s) { if (s->orig_bio) { - struct request_queue *q = bdev_get_queue(s->orig_bio->bi_bdev); + struct request_queue *q = s->orig_bio->bi_disk->queue; generic_end_io_acct(q, bio_data_dir(s->orig_bio), &s->d->disk->part0, s->start_time); @@ -735,7 +735,7 @@ static void cached_dev_read_done(struct closure *cl) if (s->iop.bio) { bio_reset(s->iop.bio); s->iop.bio->bi_iter.bi_sector = s->cache_miss->bi_iter.bi_sector; - s->iop.bio->bi_bdev = s->cache_miss->bi_bdev; + bio_copy_dev(s->iop.bio, s->cache_miss); s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9; bch_bio_map(s->iop.bio, NULL); @@ -794,7 +794,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, !(bio->bi_opf & REQ_META) && s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA) reada = min_t(sector_t, dc->readahead >> 9, - bdev_sectors(bio->bi_bdev) - bio_end_sector(bio)); + get_capacity(bio->bi_disk) - bio_end_sector(bio)); s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada); @@ -820,7 +820,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, goto out_submit; cache_bio->bi_iter.bi_sector = miss->bi_iter.bi_sector; - cache_bio->bi_bdev = miss->bi_bdev; + bio_copy_dev(cache_bio, miss); cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9; cache_bio->bi_end_io = request_endio; @@ -919,7 +919,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0, dc->disk.bio_split); - flush->bi_bdev = bio->bi_bdev; + bio_copy_dev(flush, bio); flush->bi_end_io = request_endio; flush->bi_private = cl; flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; @@ -956,13 +956,13 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio) { struct search *s; - struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; + struct bcache_device *d = bio->bi_disk->private_data; struct cached_dev *dc = container_of(d, struct cached_dev, disk); int rw = bio_data_dir(bio); generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); - bio->bi_bdev = dc->bdev; + bio_set_dev(bio, dc->bdev); bio->bi_iter.bi_sector += dc->sb.data_offset; if (cached_dev_get(dc)) { @@ -1072,7 +1072,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, { struct search *s; struct closure *cl; - struct bcache_device *d = bio->bi_bdev->bd_disk->private_data; + struct bcache_device *d = bio->bi_disk->private_data; int rw = bio_data_dir(bio); generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 8352fad765f6..974d832e54a6 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -257,7 +257,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) closure_init(cl, parent); bio_reset(bio); - bio->bi_bdev = dc->bdev; + bio_set_dev(bio, dc->bdev); bio->bi_end_io = write_bdev_super_endio; bio->bi_private = dc; @@ -303,7 +303,7 @@ void bcache_write_super(struct cache_set *c) SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb)); bio_reset(bio); - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_end_io = write_super_endio; bio->bi_private = ca; @@ -508,7 +508,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op, closure_init_stack(cl); bio->bi_iter.bi_sector = bucket * ca->sb.bucket_size; - bio->bi_bdev = ca->bdev; + bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = bucket_bytes(ca); bio->bi_end_io = prio_endio; diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 42c66e76f05e..c49022a8dc9d 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -181,7 +181,7 @@ static void write_dirty(struct closure *cl) dirty_init(w); bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0); io->bio.bi_iter.bi_sector = KEY_START(&w->key); - io->bio.bi_bdev = io->dc->bdev; + bio_set_dev(&io->bio, io->dc->bdev); io->bio.bi_end_io = dirty_endio; closure_bio_submit(&io->bio, cl); @@ -250,8 +250,7 @@ static void read_dirty(struct cached_dev *dc) dirty_init(w); bio_set_op_attrs(&io->bio, REQ_OP_READ, 0); io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0); - io->bio.bi_bdev = PTR_CACHE(dc->disk.c, - &w->key, 0)->bdev; + bio_set_dev(&io->bio, PTR_CACHE(dc->disk.c, &w->key, 0)->bdev); io->bio.bi_end_io = read_dirty_endio; if (bio_alloc_pages(&io->bio, GFP_KERNEL)) diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h index dd3646111561..c82578af56a5 100644 --- a/drivers/md/dm-bio-record.h +++ b/drivers/md/dm-bio-record.h @@ -18,21 +18,24 @@ */ struct dm_bio_details { - struct block_device *bi_bdev; + struct gendisk *bi_disk; + u8 bi_partno; unsigned long bi_flags; struct bvec_iter bi_iter; }; static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) { - bd->bi_bdev = bio->bi_bdev; + bd->bi_disk = bio->bi_disk; + bd->bi_partno = bio->bi_partno; bd->bi_flags = bio->bi_flags; bd->bi_iter = bio->bi_iter; } static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) { - bio->bi_bdev = bd->bi_bdev; + bio->bi_disk = bd->bi_disk; + bio->bi_partno = bd->bi_partno; bio->bi_flags = bd->bi_flags; bio->bi_iter = bd->bi_iter; } diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 44f4a8ac95bd..9601225e0ae9 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -616,7 +616,7 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS); b->bio.bi_iter.bi_sector = sector; - b->bio.bi_bdev = b->c->bdev; + bio_set_dev(&b->bio, b->c->bdev); b->bio.bi_end_io = inline_endio; /* * Use of .bi_private isn't a problem here because diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index c5ea03fc7ee1..dcac25c2be7a 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -833,7 +833,7 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) *--------------------------------------------------------------*/ static void remap_to_origin(struct cache *cache, struct bio *bio) { - bio->bi_bdev = cache->origin_dev->bdev; + bio_set_dev(bio, cache->origin_dev->bdev); } static void remap_to_cache(struct cache *cache, struct bio *bio, @@ -842,7 +842,7 @@ static void remap_to_cache(struct cache *cache, struct bio *bio, sector_t bi_sector = bio->bi_iter.bi_sector; sector_t block = from_cblock(cblock); - bio->bi_bdev = cache->cache_dev->bdev; + bio_set_dev(bio, cache->cache_dev->bdev); if (!block_size_is_power_of_two(cache)) bio->bi_iter.bi_sector = (block * cache->sectors_per_block) + diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 73c2e270cda6..ca99147208a9 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1544,7 +1544,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) clone->bi_private = io; clone->bi_end_io = crypt_endio; - clone->bi_bdev = cc->dev->bdev; + bio_set_dev(clone, cc->dev->bdev); clone->bi_opf = io->base_bio->bi_opf; } @@ -2793,7 +2793,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) */ if (unlikely(bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)) { - bio->bi_bdev = cc->dev->bdev; + bio_set_dev(bio, cc->dev->bdev); if (bio_sectors(bio)) bio->bi_iter.bi_sector = cc->start + dm_target_offset(ti, bio->bi_iter.bi_sector); diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index ae3158795d26..2209a9700acd 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -282,7 +282,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio) struct delay_c *dc = ti->private; if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) { - bio->bi_bdev = dc->dev_write->bdev; + bio_set_dev(bio, dc->dev_write->bdev); if (bio_sectors(bio)) bio->bi_iter.bi_sector = dc->start_write + dm_target_offset(ti, bio->bi_iter.bi_sector); @@ -290,7 +290,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio) return delay_bio(dc, dc->write_delay, bio); } - bio->bi_bdev = dc->dev_read->bdev; + bio_set_dev(bio, dc->dev_read->bdev); bio->bi_iter.bi_sector = dc->start_read + dm_target_offset(ti, bio->bi_iter.bi_sector); diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index e7ba89f98d8d..ba84b8d62cd0 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1192,7 +1192,7 @@ static dm_block_t get_block(struct era *era, struct bio *bio) static void remap_to_origin(struct era *era, struct bio *bio) { - bio->bi_bdev = era->origin_dev->bdev; + bio_set_dev(bio, era->origin_dev->bdev); } /*---------------------------------------------------------------- diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index e2c7234931bc..7146c2d9762d 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -274,7 +274,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) { struct flakey_c *fc = ti->private; - bio->bi_bdev = fc->dev->bdev; + bio_set_dev(bio, fc->dev->bdev); if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) bio->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector); diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 3acce09bba35..27c0f223f8ea 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -250,7 +250,8 @@ struct dm_integrity_io { struct completion *completion; - struct block_device *orig_bi_bdev; + struct gendisk *orig_bi_disk; + u8 orig_bi_partno; bio_end_io_t *orig_bi_end_io; struct bio_integrity_payload *orig_bi_integrity; struct bvec_iter orig_bi_iter; @@ -1164,7 +1165,8 @@ static void integrity_end_io(struct bio *bio) struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io)); bio->bi_iter = dio->orig_bi_iter; - bio->bi_bdev = dio->orig_bi_bdev; + bio->bi_disk = dio->orig_bi_disk; + bio->bi_partno = dio->orig_bi_partno; if (dio->orig_bi_integrity) { bio->bi_integrity = dio->orig_bi_integrity; bio->bi_opf |= REQ_INTEGRITY; @@ -1681,8 +1683,9 @@ sleep: dio->orig_bi_iter = bio->bi_iter; - dio->orig_bi_bdev = bio->bi_bdev; - bio->bi_bdev = ic->dev->bdev; + dio->orig_bi_disk = bio->bi_disk; + dio->orig_bi_partno = bio->bi_partno; + bio_set_dev(bio, ic->dev->bdev); dio->orig_bi_integrity = bio_integrity(bio); bio->bi_integrity = NULL; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 25039607f3cb..b4357ed4d541 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -347,7 +347,7 @@ static void do_region(int op, int op_flags, unsigned region, bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); bio->bi_iter.bi_sector = where->sector + (where->count - remaining); - bio->bi_bdev = where->bdev; + bio_set_dev(bio, where->bdev); bio->bi_end_io = endio; bio_set_op_attrs(bio, op, op_flags); store_io_and_region_in_bio(bio, io, region); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 41971a090e34..405eca206d67 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -88,7 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) { struct linear_c *lc = ti->private; - bio->bi_bdev = lc->dev->bdev; + bio_set_dev(bio, lc->dev->bdev); if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) bio->bi_iter.bi_sector = linear_map_sector(ti, bio->bi_iter.bi_sector); diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index a1da0eb58a93..534a254eb977 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -198,7 +198,7 @@ static int write_metadata(struct log_writes_c *lc, void *entry, } bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio->bi_bdev = lc->logdev->bdev; + bio_set_dev(bio, lc->logdev->bdev); bio->bi_end_io = log_end_io; bio->bi_private = lc; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -263,7 +263,7 @@ static int log_one_block(struct log_writes_c *lc, } bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio->bi_bdev = lc->logdev->bdev; + bio_set_dev(bio, lc->logdev->bdev); bio->bi_end_io = log_end_io; bio->bi_private = lc; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -285,7 +285,7 @@ static int log_one_block(struct log_writes_c *lc, } bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; - bio->bi_bdev = lc->logdev->bdev; + bio_set_dev(bio, lc->logdev->bdev); bio->bi_end_io = log_end_io; bio->bi_private = lc; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -539,7 +539,7 @@ static void normal_map_bio(struct dm_target *ti, struct bio *bio) { struct log_writes_c *lc = ti->private; - bio->bi_bdev = lc->dev->bdev; + bio_set_dev(bio, lc->dev->bdev); } static int log_writes_map(struct dm_target *ti, struct bio *bio) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 0e8ab5bb3575..573046bd5c46 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -566,7 +566,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m mpio->nr_bytes = nr_bytes; bio->bi_status = 0; - bio->bi_bdev = pgpath->path.dev->bdev; + bio_set_dev(bio, pgpath->path.dev->bdev); bio->bi_opf |= REQ_FAILFAST_TRANSPORT; if (pgpath->pg->ps.type->start_io) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index a4fbd911d566..c0b82136b2d1 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -145,7 +145,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) struct dm_raid1_bio_record { struct mirror *m; - /* if details->bi_bdev == NULL, details were not saved */ + /* if details->bi_disk == NULL, details were not saved */ struct dm_bio_details details; region_t write_region; }; @@ -464,7 +464,7 @@ static sector_t map_sector(struct mirror *m, struct bio *bio) static void map_bio(struct mirror *m, struct bio *bio) { - bio->bi_bdev = m->dev->bdev; + bio_set_dev(bio, m->dev->bdev); bio->bi_iter.bi_sector = map_sector(m, bio); } @@ -1199,7 +1199,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio) struct dm_raid1_bio_record *bio_record = dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); - bio_record->details.bi_bdev = NULL; + bio_record->details.bi_disk = NULL; if (rw == WRITE) { /* Save region for mirror_end_io() handler */ @@ -1266,7 +1266,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, goto out; if (unlikely(*error)) { - if (!bio_record->details.bi_bdev) { + if (!bio_record->details.bi_disk) { /* * There wasn't enough memory to record necessary * information for a retry or there was no other @@ -1291,7 +1291,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, bd = &bio_record->details; dm_bio_restore(bd, bio); - bio_record->details.bi_bdev = NULL; + bio_record->details.bi_disk = NULL; bio->bi_status = 0; queue_bio(ms, bio, rw); @@ -1301,7 +1301,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, } out: - bio_record->details.bi_bdev = NULL; + bio_record->details.bi_disk = NULL; return DM_ENDIO_DONE; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 1ba41048b438..1113b42e1eda 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1663,7 +1663,7 @@ __find_pending_exception(struct dm_snapshot *s, static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, struct bio *bio, chunk_t chunk) { - bio->bi_bdev = s->cow->bdev; + bio_set_dev(bio, s->cow->bdev); bio->bi_iter.bi_sector = chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) + (chunk - e->old_chunk)) + @@ -1681,7 +1681,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) init_tracked_chunk(bio); if (bio->bi_opf & REQ_PREFLUSH) { - bio->bi_bdev = s->cow->bdev; + bio_set_dev(bio, s->cow->bdev); return DM_MAPIO_REMAPPED; } @@ -1769,7 +1769,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) goto out; } } else { - bio->bi_bdev = s->origin->bdev; + bio_set_dev(bio, s->origin->bdev); track_chunk(s, bio, chunk); } @@ -1802,9 +1802,9 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) if (bio->bi_opf & REQ_PREFLUSH) { if (!dm_bio_get_target_bio_nr(bio)) - bio->bi_bdev = s->origin->bdev; + bio_set_dev(bio, s->origin->bdev); else - bio->bi_bdev = s->cow->bdev; + bio_set_dev(bio, s->cow->bdev); return DM_MAPIO_REMAPPED; } @@ -1824,7 +1824,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) chunk >= s->first_merging_chunk && chunk < (s->first_merging_chunk + s->num_merging_chunks)) { - bio->bi_bdev = s->origin->bdev; + bio_set_dev(bio, s->origin->bdev); bio_list_add(&s->bios_queued_during_merge, bio); r = DM_MAPIO_SUBMITTED; goto out_unlock; @@ -1838,7 +1838,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) } redirect_to_origin: - bio->bi_bdev = s->origin->bdev; + bio_set_dev(bio, s->origin->bdev); if (bio_data_dir(bio) == WRITE) { up_write(&s->lock); @@ -2285,7 +2285,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio) struct dm_origin *o = ti->private; unsigned available_sectors; - bio->bi_bdev = o->dev->bdev; + bio_set_dev(bio, o->dev->bdev); if (unlikely(bio->bi_opf & REQ_PREFLUSH)) return DM_MAPIO_REMAPPED; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index a0375530b07f..ab50d7c4377f 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -270,7 +270,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio, stripe_map_range_sector(sc, bio_end_sector(bio), target_stripe, &end); if (begin < end) { - bio->bi_bdev = sc->stripe[target_stripe].dev->bdev; + bio_set_dev(bio, sc->stripe[target_stripe].dev->bdev); bio->bi_iter.bi_sector = begin + sc->stripe[target_stripe].physical_start; bio->bi_iter.bi_size = to_bytes(end - begin); @@ -291,7 +291,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) if (bio->bi_opf & REQ_PREFLUSH) { target_bio_nr = dm_bio_get_target_bio_nr(bio); BUG_ON(target_bio_nr >= sc->stripes); - bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev; + bio_set_dev(bio, sc->stripe[target_bio_nr].dev->bdev); return DM_MAPIO_REMAPPED; } if (unlikely(bio_op(bio) == REQ_OP_DISCARD) || @@ -306,7 +306,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) &stripe, &bio->bi_iter.bi_sector); bio->bi_iter.bi_sector += sc->stripe[stripe].physical_start; - bio->bi_bdev = sc->stripe[stripe].dev->bdev; + bio_set_dev(bio, sc->stripe[stripe].dev->bdev); return DM_MAPIO_REMAPPED; } @@ -430,9 +430,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, return DM_ENDIO_DONE; memset(major_minor, 0, sizeof(major_minor)); - sprintf(major_minor, "%d:%d", - MAJOR(disk_devt(bio->bi_bdev->bd_disk)), - MINOR(disk_devt(bio->bi_bdev->bd_disk))); + sprintf(major_minor, "%d:%d", MAJOR(bio_dev(bio)), MINOR(bio_dev(bio))); /* * Test to see which stripe drive triggered the event diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c index 871c18fe000d..2dcea4c56f37 100644 --- a/drivers/md/dm-switch.c +++ b/drivers/md/dm-switch.c @@ -322,7 +322,7 @@ static int switch_map(struct dm_target *ti, struct bio *bio) sector_t offset = dm_target_offset(ti, bio->bi_iter.bi_sector); unsigned path_nr = switch_get_path_nr(sctx, offset); - bio->bi_bdev = sctx->path_list[path_nr].dmdev->bdev; + bio_set_dev(bio, sctx->path_list[path_nr].dmdev->bdev); bio->bi_iter.bi_sector = sctx->path_list[path_nr].start + offset; return DM_MAPIO_REMAPPED; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 9dec2f8cc739..69d88aee3055 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -679,7 +679,7 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) struct pool *pool = tc->pool; sector_t bi_sector = bio->bi_iter.bi_sector; - bio->bi_bdev = tc->pool_dev->bdev; + bio_set_dev(bio, tc->pool_dev->bdev); if (block_size_is_power_of_two(pool)) bio->bi_iter.bi_sector = (block << pool->sectors_per_block_shift) | @@ -691,7 +691,7 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) static void remap_to_origin(struct thin_c *tc, struct bio *bio) { - bio->bi_bdev = tc->origin_dev->bdev; + bio_set_dev(bio, tc->origin_dev->bdev); } static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) @@ -3313,7 +3313,7 @@ static int pool_map(struct dm_target *ti, struct bio *bio) * As this is a singleton target, ti->begin is always zero. */ spin_lock_irqsave(&pool->lock, flags); - bio->bi_bdev = pt->data_dev->bdev; + bio_set_dev(bio, pt->data_dev->bdev); r = DM_MAPIO_REMAPPED; spin_unlock_irqrestore(&pool->lock, flags); diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index b46705ebf01f..1c5b6185c79d 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -637,7 +637,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) struct dm_verity *v = ti->private; struct dm_verity_io *io; - bio->bi_bdev = v->data_dev->bdev; + bio_set_dev(bio, v->data_dev->bdev); bio->bi_iter.bi_sector = verity_map_sector(v, bio->bi_iter.bi_sector); if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) & diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index a4fa2ada6883..70485de37b66 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -409,7 +409,7 @@ static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd, } bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio->bi_bdev = zmd->dev->bdev; + bio_set_dev(bio, zmd->dev->bdev); bio->bi_private = mblk; bio->bi_end_io = dmz_mblock_bio_end_io; bio_set_op_attrs(bio, REQ_OP_READ, REQ_META | REQ_PRIO); @@ -564,7 +564,7 @@ static void dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, set_bit(DMZ_META_WRITING, &mblk->state); bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio->bi_bdev = zmd->dev->bdev; + bio_set_dev(bio, zmd->dev->bdev); bio->bi_private = mblk; bio->bi_end_io = dmz_mblock_bio_end_io; bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO); @@ -586,7 +586,7 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, return -ENOMEM; bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio->bi_bdev = zmd->dev->bdev; + bio_set_dev(bio, zmd->dev->bdev); bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO); bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0); ret = submit_bio_wait(bio); diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index b08bbbd4d902..b87c1741da4b 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -238,7 +238,7 @@ static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone, struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx)); /* Setup and submit the BIO */ - bio->bi_bdev = dmz->dev->bdev; + bio_set_dev(bio, dmz->dev->bdev); bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block); atomic_inc(&bioctx->ref); generic_make_request(bio); @@ -586,7 +586,7 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) (unsigned long long)dmz_chunk_block(dmz->dev, dmz_bio_block(bio)), (unsigned int)dmz_bio_blocks(bio)); - bio->bi_bdev = dev->bdev; + bio_set_dev(bio, dev->bdev); if (!nr_sectors && bio_op(bio) != REQ_OP_WRITE) return DM_MAPIO_REMAPPED; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8612a2d1ccd9..b28b9ce8f4ff 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -851,10 +851,10 @@ static void clone_endio(struct bio *bio) if (unlikely(error == BLK_STS_TARGET)) { if (bio_op(bio) == REQ_OP_WRITE_SAME && - !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors) + !bio->bi_disk->queue->limits.max_write_same_sectors) disable_write_same(md); if (bio_op(bio) == REQ_OP_WRITE_ZEROES && - !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors) + !bio->bi_disk->queue->limits.max_write_zeroes_sectors) disable_write_zeroes(md); } @@ -1215,8 +1215,8 @@ static void __map_bio(struct dm_target_io *tio) break; case DM_MAPIO_REMAPPED: /* the bio has been remapped so dispatch it */ - trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone, - tio->io->bio->bi_bdev->bd_dev, sector); + trace_block_bio_remap(clone->bi_disk->queue, clone, + bio_dev(tio->io->bio), sector); generic_make_request(clone); break; case DM_MAPIO_KILL: @@ -1796,7 +1796,7 @@ static struct mapped_device *alloc_dev(int minor) goto bad; bio_init(&md->flush_bio, NULL, 0); - md->flush_bio.bi_bdev = md->bdev; + bio_set_dev(&md->flush_bio, md->bdev); md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; dm_stats_init(&md->stats); diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 06a64d5d8c6c..38264b38420f 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -216,12 +216,12 @@ static bool faulty_make_request(struct mddev *mddev, struct bio *bio) if (failit) { struct bio *b = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set); - b->bi_bdev = conf->rdev->bdev; + bio_set_dev(b, conf->rdev->bdev); b->bi_private = bio; b->bi_end_io = faulty_fail; bio = b; } else - bio->bi_bdev = conf->rdev->bdev; + bio_set_dev(bio, conf->rdev->bdev); generic_make_request(bio); return true; diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 5f1eb9189542..c464fb48039a 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -275,17 +275,17 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) bio = split; } - bio->bi_bdev = tmp_dev->rdev->bdev; + bio_set_dev(bio, tmp_dev->rdev->bdev); bio->bi_iter.bi_sector = bio->bi_iter.bi_sector - start_sector + data_offset; if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) { + !blk_queue_discard(bio->bi_disk->queue))) { /* Just ignore it */ bio_endio(bio); } else { if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), + trace_block_bio_remap(bio->bi_disk->queue, bio, disk_devt(mddev->gendisk), bio_sector); mddev_check_writesame(mddev, bio); diff --git a/drivers/md/md.c b/drivers/md/md.c index c99634612fc4..0afdc1bfd7cb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -422,7 +422,7 @@ static void submit_flushes(struct work_struct *ws) bi = bio_alloc_mddev(GFP_NOIO, 0, mddev); bi->bi_end_io = md_end_flush; bi->bi_private = rdev; - bi->bi_bdev = rdev->bdev; + bio_set_dev(bi, rdev->bdev); bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; atomic_inc(&mddev->flush_pending); submit_bio(bi); @@ -772,7 +772,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, atomic_inc(&rdev->nr_pending); - bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev; + bio_set_dev(bio, rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev); bio->bi_iter.bi_sector = sector; bio_add_page(bio, page, size, 0); bio->bi_private = rdev; @@ -803,8 +803,10 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, struct bio *bio = md_bio_alloc_sync(rdev->mddev); int ret; - bio->bi_bdev = (metadata_op && rdev->meta_bdev) ? - rdev->meta_bdev : rdev->bdev; + if (metadata_op && rdev->meta_bdev) + bio_set_dev(bio, rdev->meta_bdev); + else + bio_set_dev(bio, rdev->bdev); bio_set_op_attrs(bio, op, op_flags); if (metadata_op) bio->bi_iter.bi_sector = sector + rdev->sb_start; diff --git a/drivers/md/md.h b/drivers/md/md.h index 09db03455801..c0d436fb88f0 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -509,6 +509,11 @@ static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sect atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io); } +static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors) +{ + atomic_add(nr_sectors, &bio->bi_disk->sync_io); +} + struct md_personality { char *name; @@ -721,14 +726,14 @@ static inline void mddev_clear_unsupported_flags(struct mddev *mddev, static inline void mddev_check_writesame(struct mddev *mddev, struct bio *bio) { if (bio_op(bio) == REQ_OP_WRITE_SAME && - !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors) + !bio->bi_disk->queue->limits.max_write_same_sectors) mddev->queue->limits.max_write_same_sectors = 0; } static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio) { if (bio_op(bio) == REQ_OP_WRITE_ZEROES && - !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors) + !bio->bi_disk->queue->limits.max_write_zeroes_sectors) mddev->queue->limits.max_write_zeroes_sectors = 0; } #endif /* _MD_MD_H */ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 23a162ba6c56..b68e0666b9b0 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -134,7 +134,7 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio) __bio_clone_fast(&mp_bh->bio, bio); mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; - mp_bh->bio.bi_bdev = multipath->rdev->bdev; + bio_set_dev(&mp_bh->bio, multipath->rdev->bdev); mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT; mp_bh->bio.bi_end_io = multipath_end_request; mp_bh->bio.bi_private = mp_bh; @@ -345,17 +345,17 @@ static void multipathd(struct md_thread *thread) if ((mp_bh->path = multipath_map (conf))<0) { pr_err("multipath: %s: unrecoverable IO read error for block %llu\n", - bdevname(bio->bi_bdev,b), + bio_devname(bio, b), (unsigned long long)bio->bi_iter.bi_sector); multipath_end_bh_io(mp_bh, BLK_STS_IOERR); } else { pr_err("multipath: %s: redirecting sector %llu to another IO path\n", - bdevname(bio->bi_bdev,b), + bio_devname(bio, b), (unsigned long long)bio->bi_iter.bi_sector); *bio = *(mp_bh->master_bio); bio->bi_iter.bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset; - bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev; + bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev); bio->bi_opf |= REQ_FAILFAST_TRANSPORT; bio->bi_end_io = multipath_end_request; bio->bi_private = mp_bh; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 94d9ae9b0fd0..05a4521b832f 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -588,14 +588,13 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) zone = find_zone(mddev->private, §or); tmp_dev = map_sector(mddev, zone, sector, §or); - bio->bi_bdev = tmp_dev->bdev; + bio_set_dev(bio, tmp_dev->bdev); bio->bi_iter.bi_sector = sector + zone->dev_start + tmp_dev->data_offset; if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), - bio, disk_devt(mddev->gendisk), - bio_sector); + trace_block_bio_remap(bio->bi_disk->queue, bio, + disk_devt(mddev->gendisk), bio_sector); mddev_check_writesame(mddev, bio); mddev_check_write_zeroes(mddev, bio); generic_make_request(bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f50958ded9f0..baf5e358d22a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -786,13 +786,13 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; - struct md_rdev *rdev = (void*)bio->bi_bdev; + struct md_rdev *rdev = (void *)bio->bi_disk; bio->bi_next = NULL; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) + !blk_queue_discard(bio->bi_disk->queue))) /* Just ignore it */ bio_endio(bio); else @@ -1273,7 +1273,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_iter.bi_sector = r1_bio->sector + mirror->rdev->data_offset; - read_bio->bi_bdev = mirror->rdev->bdev; + bio_set_dev(read_bio, mirror->rdev->bdev); read_bio->bi_end_io = raid1_end_read_request; bio_set_op_attrs(read_bio, op, do_sync); if (test_bit(FailFast, &mirror->rdev->flags) && @@ -1282,9 +1282,8 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_private = r1_bio; if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), - read_bio, disk_devt(mddev->gendisk), - r1_bio->sector); + trace_block_bio_remap(read_bio->bi_disk->queue, read_bio, + disk_devt(mddev->gendisk), r1_bio->sector); generic_make_request(read_bio); } @@ -1496,7 +1495,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, mbio->bi_iter.bi_sector = (r1_bio->sector + conf->mirrors[i].rdev->data_offset); - mbio->bi_bdev = conf->mirrors[i].rdev->bdev; + bio_set_dev(mbio, conf->mirrors[i].rdev->bdev); mbio->bi_end_io = raid1_end_write_request; mbio->bi_opf = bio_op(bio) | (bio->bi_opf & (REQ_SYNC | REQ_FUA)); if (test_bit(FailFast, &conf->mirrors[i].rdev->flags) && @@ -1508,11 +1507,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, atomic_inc(&r1_bio->remaining); if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev), + trace_block_bio_remap(mbio->bi_disk->queue, mbio, disk_devt(mddev->gendisk), r1_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ - mbio->bi_bdev = (void*)conf->mirrors[i].rdev; + mbio->bi_disk = (void *)conf->mirrors[i].rdev; cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug)); if (cb) @@ -1990,8 +1989,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) * Don't fail devices as that won't really help. */ pr_crit_ratelimited("md/raid1:%s: %s: unrecoverable I/O read error for block %llu\n", - mdname(mddev), - bdevname(bio->bi_bdev, b), + mdname(mddev), bio_devname(bio, b), (unsigned long long)r1_bio->sector); for (d = 0; d < conf->raid_disks * 2; d++) { rdev = conf->mirrors[d].rdev; @@ -2082,7 +2080,7 @@ static void process_checks(struct r1bio *r1_bio) b->bi_status = status; b->bi_iter.bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; - b->bi_bdev = conf->mirrors[i].rdev->bdev; + bio_set_dev(b, conf->mirrors[i].rdev->bdev); b->bi_end_io = end_sync_read; rp->raid_bio = r1_bio; b->bi_private = rp; @@ -2350,7 +2348,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i) bio_trim(wbio, sector - r1_bio->sector, sectors); wbio->bi_iter.bi_sector += rdev->data_offset; - wbio->bi_bdev = rdev->bdev; + bio_set_dev(wbio, rdev->bdev); if (submit_bio_wait(wbio) < 0) /* failure! */ @@ -2440,7 +2438,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) struct mddev *mddev = conf->mddev; struct bio *bio; struct md_rdev *rdev; - dev_t bio_dev; sector_t bio_sector; clear_bit(R1BIO_ReadError, &r1_bio->state); @@ -2454,7 +2451,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) */ bio = r1_bio->bios[r1_bio->read_disk]; - bio_dev = bio->bi_bdev->bd_dev; bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector; bio_put(bio); r1_bio->bios[r1_bio->read_disk] = NULL; @@ -2727,7 +2723,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, if (bio->bi_end_io) { atomic_inc(&rdev->nr_pending); bio->bi_iter.bi_sector = sector_nr + rdev->data_offset; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; } @@ -2853,7 +2849,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, bio = r1_bio->bios[i]; if (bio->bi_end_io == end_sync_read) { read_targets--; - md_sync_acct(bio->bi_bdev, nr_sectors); + md_sync_acct_bio(bio, nr_sectors); if (read_targets == 1) bio->bi_opf &= ~MD_FAILFAST; generic_make_request(bio); @@ -2862,7 +2858,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, } else { atomic_set(&r1_bio->remaining, 1); bio = r1_bio->bios[r1_bio->read_disk]; - md_sync_acct(bio->bi_bdev, nr_sectors); + md_sync_acct_bio(bio, nr_sectors); if (read_targets == 1) bio->bi_opf &= ~MD_FAILFAST; generic_make_request(bio); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f55d4cc085f6..d1f948e371e0 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -901,13 +901,13 @@ static void flush_pending_writes(struct r10conf *conf) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; - struct md_rdev *rdev = (void*)bio->bi_bdev; + struct md_rdev *rdev = (void*)bio->bi_disk; bio->bi_next = NULL; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) + !blk_queue_discard(bio->bi_disk->queue))) /* Just ignore it */ bio_endio(bio); else @@ -1085,13 +1085,13 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; - struct md_rdev *rdev = (void*)bio->bi_bdev; + struct md_rdev *rdev = (void*)bio->bi_disk; bio->bi_next = NULL; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); if (test_bit(Faulty, &rdev->flags)) { bio_io_error(bio); } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && - !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) + !blk_queue_discard(bio->bi_disk->queue))) /* Just ignore it */ bio_endio(bio); else @@ -1200,7 +1200,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + choose_data_offset(r10_bio, rdev); - read_bio->bi_bdev = rdev->bdev; + bio_set_dev(read_bio, rdev->bdev); read_bio->bi_end_io = raid10_end_read_request; bio_set_op_attrs(read_bio, op, do_sync); if (test_bit(FailFast, &rdev->flags) && @@ -1209,7 +1209,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, read_bio->bi_private = r10_bio; if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev), + trace_block_bio_remap(read_bio->bi_disk->queue, read_bio, disk_devt(mddev->gendisk), r10_bio->sector); generic_make_request(read_bio); @@ -1249,7 +1249,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr + choose_data_offset(r10_bio, rdev)); - mbio->bi_bdev = rdev->bdev; + bio_set_dev(mbio, rdev->bdev); mbio->bi_end_io = raid10_end_write_request; bio_set_op_attrs(mbio, op, do_sync | do_fua); if (!replacement && test_bit(FailFast, @@ -1259,11 +1259,11 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, mbio->bi_private = r10_bio; if (conf->mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev), + trace_block_bio_remap(mbio->bi_disk->queue, mbio, disk_devt(conf->mddev->gendisk), r10_bio->sector); /* flush_pending_writes() needs access to the rdev so...*/ - mbio->bi_bdev = (void *)rdev; + mbio->bi_disk = (void *)rdev; atomic_inc(&r10_bio->remaining); @@ -2094,7 +2094,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) if (test_bit(FailFast, &conf->mirrors[d].rdev->flags)) tbio->bi_opf |= MD_FAILFAST; tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset; - tbio->bi_bdev = conf->mirrors[d].rdev->bdev; + bio_set_dev(tbio, conf->mirrors[d].rdev->bdev); generic_make_request(tbio); } @@ -2552,7 +2552,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); wbio->bi_iter.bi_sector = wsector + choose_data_offset(r10_bio, rdev); - wbio->bi_bdev = rdev->bdev; + bio_set_dev(wbio, rdev->bdev); bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); if (submit_bio_wait(wbio) < 0) @@ -2575,7 +2575,6 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) struct bio *bio; struct r10conf *conf = mddev->private; struct md_rdev *rdev = r10_bio->devs[slot].rdev; - dev_t bio_dev; sector_t bio_last_sector; /* we got a read error. Maybe the drive is bad. Maybe just @@ -2587,7 +2586,6 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) * frozen. */ bio = r10_bio->devs[slot].bio; - bio_dev = bio->bi_bdev->bd_dev; bio_last_sector = r10_bio->devs[slot].addr + rdev->data_offset + r10_bio->sectors; bio_put(bio); r10_bio->devs[slot].bio = NULL; @@ -2950,7 +2948,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, /* Again, very different code for resync and recovery. * Both must result in an r10bio with a list of bios that - * have bi_end_io, bi_sector, bi_bdev set, + * have bi_end_io, bi_sector, bi_disk set, * and bi_private set to the r10bio. * For recovery, we may actually create several r10bios * with 2 bios in each, that correspond to the bios in the main one. @@ -3095,7 +3093,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, from_addr = r10_bio->devs[j].addr; bio->bi_iter.bi_sector = from_addr + rdev->data_offset; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); atomic_inc(&rdev->nr_pending); /* and we write to 'i' (if not in_sync) */ @@ -3117,7 +3115,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_iter.bi_sector = to_addr + mrdev->data_offset; - bio->bi_bdev = mrdev->bdev; + bio_set_dev(bio, mrdev->bdev); atomic_inc(&r10_bio->remaining); } else r10_bio->devs[1].bio->bi_end_io = NULL; @@ -3143,7 +3141,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio->bi_iter.bi_sector = to_addr + mreplace->data_offset; - bio->bi_bdev = mreplace->bdev; + bio_set_dev(bio, mreplace->bdev); atomic_inc(&r10_bio->remaining); break; } @@ -3289,7 +3287,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; bio->bi_iter.bi_sector = sector + rdev->data_offset; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); count++; rdev = rcu_dereference(conf->mirrors[d].replacement); @@ -3311,7 +3309,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, if (test_bit(FailFast, &rdev->flags)) bio->bi_opf |= MD_FAILFAST; bio->bi_iter.bi_sector = sector + rdev->data_offset; - bio->bi_bdev = rdev->bdev; + bio_set_dev(bio, rdev->bdev); count++; rcu_read_unlock(); } @@ -3367,7 +3365,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr, r10_bio->sectors = nr_sectors; if (bio->bi_end_io == end_sync_read) { - md_sync_acct(bio->bi_bdev, nr_sectors); + md_sync_acct_bio(bio, nr_sectors); bio->bi_status = 0; generic_make_request(bio); } @@ -4383,7 +4381,7 @@ read_more: read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev); - read_bio->bi_bdev = rdev->bdev; + bio_set_dev(read_bio, rdev->bdev); read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr + rdev->data_offset); read_bio->bi_private = r10_bio; @@ -4417,7 +4415,7 @@ read_more: if (!rdev2 || test_bit(Faulty, &rdev2->flags)) continue; - b->bi_bdev = rdev2->bdev; + bio_set_dev(b, rdev2->bdev); b->bi_iter.bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset; b->bi_end_io = end_reshape_write; @@ -4449,7 +4447,7 @@ read_more: r10_bio->sectors = nr_sectors; /* Now submit the read */ - md_sync_acct(read_bio->bi_bdev, r10_bio->sectors); + md_sync_acct_bio(read_bio, r10_bio->sectors); atomic_inc(&r10_bio->remaining); read_bio->bi_next = NULL; generic_make_request(read_bio); @@ -4511,7 +4509,7 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio) } atomic_inc(&rdev->nr_pending); rcu_read_unlock(); - md_sync_acct(b->bi_bdev, r10_bio->sectors); + md_sync_acct_bio(b, r10_bio->sectors); atomic_inc(&r10_bio->remaining); b->bi_next = NULL; generic_make_request(b); diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index bfa1e907c472..f253a9c583c1 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -728,7 +728,7 @@ static struct bio *r5l_bio_alloc(struct r5l_log *log) struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, log->bs); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - bio->bi_bdev = log->rdev->bdev; + bio_set_dev(bio, log->rdev->bdev); bio->bi_iter.bi_sector = log->rdev->data_offset + log->log_start; return bio; @@ -1291,7 +1291,7 @@ void r5l_flush_stripe_to_raid(struct r5l_log *log) if (!do_flush) return; bio_reset(&log->flush_bio); - log->flush_bio.bi_bdev = log->rdev->bdev; + bio_set_dev(&log->flush_bio, log->rdev->bdev); log->flush_bio.bi_end_io = r5l_log_flush_endio; log->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; submit_bio(&log->flush_bio); @@ -1669,7 +1669,7 @@ static int r5l_recovery_fetch_ra_pool(struct r5l_log *log, sector_t offset) { bio_reset(ctx->ra_bio); - ctx->ra_bio->bi_bdev = log->rdev->bdev; + bio_set_dev(ctx->ra_bio, log->rdev->bdev); bio_set_op_attrs(ctx->ra_bio, REQ_OP_READ, 0); ctx->ra_bio->bi_iter.bi_sector = log->rdev->data_offset + offset; diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 44ad5baf3206..1e237c40d6fa 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -415,7 +415,7 @@ static void ppl_submit_iounit_bio(struct ppl_io_unit *io, struct bio *bio) pr_debug("%s: seq: %llu size: %u sector: %llu dev: %s\n", __func__, io->seq, bio->bi_iter.bi_size, (unsigned long long)bio->bi_iter.bi_sector, - bdevname(bio->bi_bdev, b)); + bio_devname(bio, b)); submit_bio(bio); } @@ -453,7 +453,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) bio->bi_end_io = ppl_log_endio; bio->bi_opf = REQ_OP_WRITE | REQ_FUA; - bio->bi_bdev = log->rdev->bdev; + bio_set_dev(bio, log->rdev->bdev); bio->bi_iter.bi_sector = log->rdev->ppl.sector; bio_add_page(bio, io->header_page, PAGE_SIZE, 0); @@ -468,7 +468,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, ppl_conf->bs); bio->bi_opf = prev->bi_opf; - bio->bi_bdev = prev->bi_bdev; + bio_copy_dev(bio, prev); bio->bi_iter.bi_sector = bio_end_sector(prev); bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d687aeb1b538..3ae8bbceb6c4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1096,7 +1096,7 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - bi->bi_bdev = rdev->bdev; + bio_set_dev(bi, rdev->bdev); bio_set_op_attrs(bi, op, op_flags); bi->bi_end_io = op_is_write(op) ? raid5_end_write_request @@ -1145,7 +1145,7 @@ again: set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); if (conf->mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(bi->bi_bdev), + trace_block_bio_remap(bi->bi_disk->queue, bi, disk_devt(conf->mddev->gendisk), sh->dev[i].sector); if (should_defer && op_is_write(op)) @@ -1160,7 +1160,7 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - rbi->bi_bdev = rrdev->bdev; + bio_set_dev(rbi, rrdev->bdev); bio_set_op_attrs(rbi, op, op_flags); BUG_ON(!op_is_write(op)); rbi->bi_end_io = raid5_end_write_request; @@ -1193,7 +1193,7 @@ again: if (op == REQ_OP_DISCARD) rbi->bi_vcnt = 0; if (conf->mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), + trace_block_bio_remap(rbi->bi_disk->queue, rbi, disk_devt(conf->mddev->gendisk), sh->dev[i].sector); if (should_defer && op_is_write(op)) @@ -5233,7 +5233,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) atomic_inc(&rdev->nr_pending); rcu_read_unlock(); raid_bio->bi_next = (void*)rdev; - align_bi->bi_bdev = rdev->bdev; + bio_set_dev(align_bi, rdev->bdev); bio_clear_flag(align_bi, BIO_SEG_VALID); if (is_badblock(rdev, align_bi->bi_iter.bi_sector, @@ -5255,7 +5255,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio) spin_unlock_irq(&conf->device_lock); if (mddev->gendisk) - trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev), + trace_block_bio_remap(align_bi->bi_disk->queue, align_bi, disk_devt(mddev->gendisk), raid_bio->bi_iter.bi_sector); generic_make_request(align_bi); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 73062da3177f..a87f793f2945 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -390,7 +390,7 @@ int nd_region_activate(struct nd_region *nd_region); void __nd_iostat_start(struct bio *bio, unsigned long *start); static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) { - struct gendisk *disk = bio->bi_bdev->bd_disk; + struct gendisk *disk = bio->bi_disk; if (!blk_queue_io_stat(disk->queue)) return false; @@ -402,7 +402,7 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) } static inline void nd_iostat_end(struct bio *bio, unsigned long start) { - struct gendisk *disk = bio->bi_bdev->bd_disk; + struct gendisk *disk = bio->bi_disk; generic_end_io_acct(disk->queue, bio_data_dir(bio), &disk->part0, start); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c49f1f8b2e57..f03452db7938 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -613,11 +613,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, if (!disk) goto submit; - bio->bi_bdev = bdget_disk(disk, 0); - if (!bio->bi_bdev) { - ret = -ENODEV; - goto out_unmap; - } + bio->bi_disk = disk; if (meta_buffer && meta_len) { struct bio_integrity_payload *bip; @@ -668,11 +664,8 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, out_free_meta: kfree(meta); out_unmap: - if (bio) { - if (disk && bio->bi_bdev) - bdput(bio->bi_bdev); + if (bio) blk_rq_unmap_user(bio); - } out: blk_mq_free_request(req); return ret; diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index be8541335e31..c1a28569e843 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -643,17 +643,9 @@ static int nvme_nvm_submit_user_cmd(struct request_queue *q, vcmd->ph_rw.metadata = cpu_to_le64(metadata_dma); } - if (!disk) - goto submit; - - bio->bi_bdev = bdget_disk(disk, 0); - if (!bio->bi_bdev) { - ret = -ENODEV; - goto err_meta; - } + bio->bi_disk = disk; } -submit: blk_execute_rq(q, NULL, rq, 0); if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) @@ -673,11 +665,8 @@ err_meta: if (meta_buf && meta_len) dma_pool_free(dev->dma_pool, metadata, metadata_dma); err_map: - if (bio) { - if (disk && bio->bi_bdev) - bdput(bio->bi_bdev); + if (bio) blk_rq_unmap_user(bio); - } err_ppa: if (ppa_buf && ppa_len) dma_pool_free(dev->dma_pool, ppa_list, ppa_dma); diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c index 3b4d47a6abdb..0d4c23dc4532 100644 --- a/drivers/nvme/target/io-cmd.c +++ b/drivers/nvme/target/io-cmd.c @@ -68,7 +68,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) nvmet_inline_bio_init(req); bio = &req->inline_bio; - bio->bi_bdev = req->ns->bdev; + bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; @@ -80,7 +80,7 @@ static void nvmet_execute_rw(struct nvmet_req *req) struct bio *prev = bio; bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); - bio->bi_bdev = req->ns->bdev; + bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; bio_set_op_attrs(bio, op, op_flags); @@ -104,7 +104,7 @@ static void nvmet_execute_flush(struct nvmet_req *req) nvmet_inline_bio_init(req); bio = &req->inline_bio; - bio->bi_bdev = req->ns->bdev; + bio_set_dev(bio, req->ns->bdev); bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 68bae4f6bd88..7abb240847c0 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -856,14 +856,14 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) blk_queue_split(q, &bio); bytes_done = 0; - dev_info = bio->bi_bdev->bd_disk->private_data; + dev_info = bio->bi_disk->private_data; if (dev_info == NULL) goto fail; if ((bio->bi_iter.bi_sector & 7) != 0 || (bio->bi_iter.bi_size & 4095) != 0) /* Request is not page-aligned. */ goto fail; - if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) { + if (bio_end_sector(bio) > get_capacity(bio->bi_disk)) { /* Request beyond end of DCSS segment. */ goto fail; } diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index a48f0d40c1d2..571a0709e1e5 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -183,7 +183,7 @@ static unsigned long xpram_highest_page_index(void) */ static blk_qc_t xpram_make_request(struct request_queue *q, struct bio *bio) { - xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data; + xpram_device_t *xdev = bio->bi_disk->private_data; struct bio_vec bvec; struct bvec_iter iter; unsigned int index; diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index ee7c7fa55dad..07c814c42648 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -338,7 +338,7 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num, int op, return NULL; } - bio->bi_bdev = ib_dev->ibd_bd; + bio_set_dev(bio, ib_dev->ibd_bd); bio->bi_private = cmd; bio->bi_end_io = &iblock_bio_done; bio->bi_iter.bi_sector = lba; @@ -395,7 +395,7 @@ iblock_execute_sync_cache(struct se_cmd *cmd) bio = bio_alloc(GFP_KERNEL, 0); bio->bi_end_io = iblock_end_io_flush; - bio->bi_bdev = ib_dev->ibd_bd; + bio_set_dev(bio, ib_dev->ibd_bd); bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; if (!immed) bio->bi_private = cmd; diff --git a/fs/block_dev.c b/fs/block_dev.c index d29d1c70f833..bb715b2fcfb8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -223,7 +223,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, } bio_init(&bio, vecs, nr_pages); - bio.bi_bdev = bdev; + bio_set_dev(&bio, bdev); bio.bi_iter.bi_sector = pos >> 9; bio.bi_write_hint = iocb->ki_hint; bio.bi_private = current; @@ -362,7 +362,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) blk_start_plug(&plug); for (;;) { - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = pos >> 9; bio->bi_write_hint = iocb->ki_hint; bio->bi_private = dio; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 9d3854839038..fb07e3c22b9a 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -1635,7 +1635,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, unsigned int j; bio = btrfs_io_bio_alloc(num_pages - i); - bio->bi_bdev = block_ctx->dev->bdev; + bio_set_dev(bio, block_ctx->dev->bdev); bio->bi_iter.bi_sector = dev_bytenr >> 9; bio_set_op_attrs(bio, REQ_OP_READ, 0); @@ -2803,7 +2803,7 @@ static void __btrfsic_submit_bio(struct bio *bio) mutex_lock(&btrfsic_mutex); /* since btrfsic_submit_bio() is also called before * btrfsic_mount(), this might return NULL */ - dev_state = btrfsic_dev_state_lookup(bio->bi_bdev->bd_dev); + dev_state = btrfsic_dev_state_lookup(bio_dev(bio)); if (NULL != dev_state && (bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) { unsigned int i = 0; @@ -2819,10 +2819,10 @@ static void __btrfsic_submit_bio(struct bio *bio) bio_is_patched = 0; if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) - pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n", + pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_disk=%p)\n", bio_op(bio), bio->bi_opf, segs, (unsigned long long)bio->bi_iter.bi_sector, - dev_bytenr, bio->bi_bdev); + dev_bytenr, bio->bi_disk); mapped_datav = kmalloc_array(segs, sizeof(*mapped_datav), GFP_NOFS); @@ -2851,8 +2851,8 @@ static void __btrfsic_submit_bio(struct bio *bio) } else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) { if (dev_state->state->print_mask & BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) - pr_info("submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n", - bio_op(bio), bio->bi_opf, bio->bi_bdev); + pr_info("submit_bio(rw=%d,0x%x FLUSH, disk=%p)\n", + bio_op(bio), bio->bi_opf, bio->bi_disk); if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { if ((dev_state->state->print_mask & (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 080e2ebb8aa0..0640c27e63e9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3499,7 +3499,7 @@ static void write_dev_flush(struct btrfs_device *device) bio_reset(bio); bio->bi_end_io = btrfs_end_empty_barrier; - bio->bi_bdev = device->bdev; + bio_set_dev(bio, device->bdev); bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0aff9b278c19..42b12a85ab49 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2033,7 +2033,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, bio_put(bio); return -EIO; } - bio->bi_bdev = dev->bdev; + bio_set_dev(bio, dev->bdev); bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; bio_add_page(bio, page, length, pg_offset); @@ -2335,7 +2335,7 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio, bio = btrfs_io_bio_alloc(1); bio->bi_end_io = endio_func; bio->bi_iter.bi_sector = failrec->logical >> 9; - bio->bi_bdev = fs_info->fs_devices->latest_bdev; + bio_set_dev(bio, fs_info->fs_devices->latest_bdev); bio->bi_iter.bi_size = 0; bio->bi_private = data; @@ -2675,7 +2675,7 @@ struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte) struct bio *bio; bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, btrfs_bioset); - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = first_byte >> 9; btrfs_io_bio_init(btrfs_io_bio(bio)); return bio; diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 208638384cd2..d268cb633735 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1090,7 +1090,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, */ if (last_end == disk_start && stripe->dev->bdev && !last->bi_status && - last->bi_bdev == stripe->dev->bdev) { + last->bi_disk == stripe->dev->bdev->bd_disk && + last->bi_partno == stripe->dev->bdev->bd_partno) { ret = bio_add_page(last, page, PAGE_SIZE, 0); if (ret == PAGE_SIZE) return 0; @@ -1100,7 +1101,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, /* put a new bio on the list */ bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1); bio->bi_iter.bi_size = 0; - bio->bi_bdev = stripe->dev->bdev; + bio_set_dev(bio, stripe->dev->bdev); bio->bi_iter.bi_sector = disk_start >> 9; bio_add_page(bio, page, PAGE_SIZE, 0); @@ -1347,7 +1348,8 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio, stripe_start = stripe->physical; if (physical >= stripe_start && physical < stripe_start + rbio->stripe_len && - bio->bi_bdev == stripe->dev->bdev) { + bio->bi_disk == stripe->dev->bdev->bd_disk && + bio->bi_partno == stripe->dev->bdev->bd_partno) { return i; } } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6f1e4c984b94..b0b71e8e4c36 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1738,7 +1738,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, WARN_ON(!page->page); bio = btrfs_io_bio_alloc(1); - bio->bi_bdev = page->dev->bdev; + bio_set_dev(bio, page->dev->bdev); bio_add_page(bio, page->page, PAGE_SIZE, 0); if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) { @@ -1826,7 +1826,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, } bio = btrfs_io_bio_alloc(1); - bio->bi_bdev = page_bad->dev->bdev; + bio_set_dev(bio, page_bad->dev->bdev); bio->bi_iter.bi_sector = page_bad->physical >> 9; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); @@ -1921,7 +1921,7 @@ again: bio->bi_private = sbio; bio->bi_end_io = scrub_wr_bio_end_io; - bio->bi_bdev = sbio->dev->bdev; + bio_set_dev(bio, sbio->dev->bdev); bio->bi_iter.bi_sector = sbio->physical >> 9; bio_set_op_attrs(bio, REQ_OP_WRITE, 0); sbio->status = 0; @@ -1964,7 +1964,7 @@ static void scrub_wr_submit(struct scrub_ctx *sctx) sbio = sctx->wr_curr_bio; sctx->wr_curr_bio = NULL; - WARN_ON(!sbio->bio->bi_bdev); + WARN_ON(!sbio->bio->bi_disk); scrub_pending_bio_inc(sctx); /* process all writes in a single worker thread. Then the block layer * orders the requests before sending them to the driver which @@ -2321,7 +2321,7 @@ again: bio->bi_private = sbio; bio->bi_end_io = scrub_bio_end_io; - bio->bi_bdev = sbio->dev->bdev; + bio_set_dev(bio, sbio->dev->bdev); bio->bi_iter.bi_sector = sbio->physical >> 9; bio_set_op_attrs(bio, REQ_OP_READ, 0); sbio->status = 0; @@ -4627,7 +4627,7 @@ static int write_page_nocow(struct scrub_ctx *sctx, bio = btrfs_io_bio_alloc(1); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = physical_for_dev_replace >> 9; - bio->bi_bdev = dev->bdev; + bio_set_dev(bio, dev->bdev); bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; ret = bio_add_page(bio, page, PAGE_SIZE, 0); if (ret != PAGE_SIZE) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e8b9a269fdde..f9f0f474a64f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6188,7 +6188,7 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio, rcu_read_unlock(); } #endif - bio->bi_bdev = dev->bdev; + bio_set_dev(bio, dev->bdev); btrfs_bio_counter_inc_noblocked(fs_info); diff --git a/fs/buffer.c b/fs/buffer.c index 5715dac7821f..50e51a67dc78 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3057,7 +3057,7 @@ void guard_bio_eod(int op, struct bio *bio) struct bio_vec *bvec = &bio->bi_io_vec[bio->bi_vcnt - 1]; unsigned truncated_bytes; - maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; + maxsector = get_capacity(bio->bi_disk); if (!maxsector) return; @@ -3116,7 +3116,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, } bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio->bi_bdev = bh->b_bdev; + bio_set_dev(bio, bh->b_bdev); bio->bi_write_hint = write_hint; bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 6181e9526860..483784d5eb73 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -115,7 +115,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, err = -ENOMEM; goto errout; } - bio->bi_bdev = inode->i_sb->s_bdev; + bio_set_dev(bio, inode->i_sb->s_bdev); bio->bi_iter.bi_sector = pblk << (inode->i_sb->s_blocksize_bits - 9); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); diff --git a/fs/direct-io.c b/fs/direct-io.c index 08cf27811e5a..5fa2211e49ae 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -111,7 +111,7 @@ struct dio { int op; int op_flags; blk_qc_t bio_cookie; - struct block_device *bio_bdev; + struct gendisk *bio_disk; struct inode *inode; loff_t i_size; /* i_size when submitted */ dio_iodone_t *end_io; /* IO completion function */ @@ -377,7 +377,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, */ bio = bio_alloc(GFP_KERNEL, nr_vecs); - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = first_sector; bio_set_op_attrs(bio, dio->op, dio->op_flags); if (dio->is_async) @@ -412,7 +412,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) bio_set_pages_dirty(bio); - dio->bio_bdev = bio->bi_bdev; + dio->bio_disk = bio->bi_disk; if (sdio->submit_io) { sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio); @@ -458,7 +458,7 @@ static struct bio *dio_await_one(struct dio *dio) dio->waiter = current; spin_unlock_irqrestore(&dio->bio_lock, flags); if (!(dio->iocb->ki_flags & IOCB_HIPRI) || - !blk_mq_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie)) + !blk_mq_poll(dio->bio_disk->queue, dio->bio_cookie)) io_schedule(); /* wake up sets us TASK_RUNNING */ spin_lock_irqsave(&dio->bio_lock, flags); diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 8bb72807e70d..3c6a9c156b7a 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -869,7 +869,7 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) goto out; } - bio->bi_bdev = NULL; + bio->bi_disk = NULL; bio->bi_next = NULL; per_dev->offset = master_dev->offset; per_dev->length = master_dev->length; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index c2fce4478cca..55ad7dd149d0 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -300,7 +300,7 @@ static void ext4_end_bio(struct bio *bio) char b[BDEVNAME_SIZE]; if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n", - bdevname(bio->bi_bdev, b), + bio_devname(bio, b), (long long) bio->bi_iter.bi_sector, (unsigned) bio_sectors(bio), bio->bi_status)) { @@ -375,7 +375,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io, return -ENOMEM; wbc_init_bio(io->io_wbc, bio); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio->bi_bdev = bh->b_bdev; + bio_set_dev(bio, bh->b_bdev); bio->bi_end_io = ext4_end_bio; bio->bi_private = ext4_get_io_end(io->io_end); io->io_bio = bio; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 40a5497b0f60..04c90643af7a 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -254,7 +254,7 @@ int ext4_mpage_readpages(struct address_space *mapping, fscrypt_release_ctx(ctx); goto set_error_page; } - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; bio->bi_private = ctx; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 87c1f4150c64..a791aac4c5af 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -142,7 +142,7 @@ struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, } } if (bio) { - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); } return bdev; @@ -161,7 +161,8 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) static bool __same_bdev(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio) { - return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev; + struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL); + return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno; } /* diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f964b68718c1..6f8fc4a6e701 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -447,7 +447,7 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi, int ret; bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); ret = submit_bio_wait(bio); bio_put(bio); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 3010f9edd177..720c19ada0f9 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -265,7 +265,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno) bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9); - bio->bi_bdev = sb->s_bdev; + bio_set_dev(bio, sb->s_bdev); bio->bi_end_io = gfs2_end_log_write; bio->bi_private = sdp; diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index fabe1614f879..39433a173baa 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -221,7 +221,7 @@ static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[], bio = bio_alloc(GFP_NOIO, num); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio->bi_bdev = bh->b_bdev; + bio_set_dev(bio, bh->b_bdev); while (num > 0) { bh = *bhs; if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e76058d34b74..8155e16076e1 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -242,7 +242,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) bio = bio_alloc(GFP_NOFS, 1); bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9); - bio->bi_bdev = sb->s_bdev; + bio_set_dev(bio, sb->s_bdev); bio_add_page(bio, page, PAGE_SIZE, 0); bio->bi_end_io = end_bio_io_page; diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index e254fa0f0697..10032b919a85 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -65,7 +65,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, bio = bio_alloc(GFP_NOIO, 1); bio->bi_iter.bi_sector = sector; - bio->bi_bdev = sb->s_bdev; + bio_set_dev(bio, sb->s_bdev); bio_set_op_attrs(bio, op, op_flags); if (op != WRITE && data) diff --git a/fs/iomap.c b/fs/iomap.c index 039266128b7f..77be8850997b 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -805,7 +805,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, struct bio *bio; bio = bio_alloc(GFP_KERNEL, 1); - bio->bi_bdev = iomap->bdev; + bio_set_dev(bio, iomap->bdev); bio->bi_iter.bi_sector = iomap->blkno + ((pos - iomap->offset) >> 9); bio->bi_private = dio; @@ -884,7 +884,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, return 0; bio = bio_alloc(GFP_KERNEL, nr_pages); - bio->bi_bdev = iomap->bdev; + bio_set_dev(bio, iomap->bdev); bio->bi_iter.bi_sector = iomap->blkno + ((pos - iomap->offset) >> 9); bio->bi_write_hint = dio->iocb->ki_hint; diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index a21f0e9eecd4..0e5d412c0b01 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1995,7 +1995,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) bio = bio_alloc(GFP_NOFS, 1); bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9); - bio->bi_bdev = log->bdev; + bio_set_dev(bio, log->bdev); bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset); BUG_ON(bio->bi_iter.bi_size != LOGPSIZE); @@ -2139,7 +2139,7 @@ static void lbmStartIO(struct lbuf * bp) bio = bio_alloc(GFP_NOFS, 1); bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9); - bio->bi_bdev = log->bdev; + bio_set_dev(bio, log->bdev); bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset); BUG_ON(bio->bi_iter.bi_size != LOGPSIZE); diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 65120a471729..1c4b9ad4d7ab 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -430,7 +430,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage); bio = bio_alloc(GFP_NOFS, 1); - bio->bi_bdev = inode->i_sb->s_bdev; + bio_set_dev(bio, inode->i_sb->s_bdev); bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9); bio->bi_end_io = metapage_write_end_io; bio->bi_private = page; @@ -510,7 +510,7 @@ static int metapage_readpage(struct file *fp, struct page *page) submit_bio(bio); bio = bio_alloc(GFP_NOFS, 1); - bio->bi_bdev = inode->i_sb->s_bdev; + bio_set_dev(bio, inode->i_sb->s_bdev); bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9); bio->bi_end_io = metapage_read_end_io; diff --git a/fs/mpage.c b/fs/mpage.c index 2e4c41ccb5c9..37bb77c1302c 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -83,7 +83,7 @@ mpage_alloc(struct block_device *bdev, } if (bio) { - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = first_sector; } return bio; diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index d8863a804b15..995d707537da 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -130,7 +130,7 @@ bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector, if (bio) { bio->bi_iter.bi_sector = disk_sector; - bio->bi_bdev = bdev; + bio_set_dev(bio, bdev); bio->bi_end_io = end_io; bio->bi_private = par; } diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index e73c86d9855c..6c5009cc4e6f 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -400,7 +400,7 @@ static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start, bio = bio_alloc(GFP_NOIO, nr_vecs); } if (likely(bio)) { - bio->bi_bdev = nilfs->ns_bdev; + bio_set_dev(bio, nilfs->ns_bdev); bio->bi_iter.bi_sector = start << (nilfs->ns_blocksize_bits - 9); } diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index ffe003982d95..6aea15746a56 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -554,7 +554,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, /* Must put everything in 512 byte sectors for the bio... */ bio->bi_iter.bi_sector = (reg->hr_start_block + cs) << (bits - 9); - bio->bi_bdev = reg->hr_bdev; + bio_set_dev(bio, reg->hr_bdev); bio->bi_private = wc; bio->bi_end_io = o2hb_bio_end_io; bio_set_op_attrs(bio, op, op_flags); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 6bf120bb1a17..c8ca03a5a08f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -517,7 +517,7 @@ xfs_init_bio_from_bh( struct buffer_head *bh) { bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio->bi_bdev = bh->b_bdev; + bio_set_dev(bio, bh->b_bdev); } static struct xfs_ioend * diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 72f038492ba8..b1c9711e79a4 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1281,7 +1281,7 @@ next_chunk: nr_pages = min(total_nr_pages, BIO_MAX_PAGES); bio = bio_alloc(GFP_NOIO, nr_pages); - bio->bi_bdev = bp->b_target->bt_bdev; + bio_set_dev(bio, bp->b_target->bt_bdev); bio->bi_iter.bi_sector = sector; bio->bi_end_io = xfs_buf_bio_end_io; bio->bi_private = bp; diff --git a/include/linux/bio.h b/include/linux/bio.h index 9276788a9b24..a8fe7935332f 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -494,6 +494,24 @@ extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *); extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int); extern unsigned int bvec_nr_vecs(unsigned short idx); +#define bio_set_dev(bio, bdev) \ +do { \ + (bio)->bi_disk = (bdev)->bd_disk; \ + (bio)->bi_partno = (bdev)->bd_partno; \ +} while (0) + +#define bio_copy_dev(dst, src) \ +do { \ + (dst)->bi_disk = (src)->bi_disk; \ + (dst)->bi_partno = (src)->bi_partno; \ +} while (0) + +#define bio_dev(bio) \ + disk_devt((bio)->bi_disk) + +#define bio_devname(bio, buf) \ + __bdevname(bio_dev(bio), (buf)) + #ifdef CONFIG_BLK_CGROUP int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); int bio_associate_current(struct bio *bio); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d2eb87c84d82..a2d2aa709cef 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -48,7 +48,8 @@ struct blk_issue_stat { */ struct bio { struct bio *bi_next; /* request queue link */ - struct block_device *bi_bdev; + struct gendisk *bi_disk; + u8 bi_partno; blk_status_t bi_status; unsigned int bi_opf; /* bottom bits req flags, * top bits REQ_OP. Use diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index df3e9ae5ad8d..daf749138ff8 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -21,7 +21,7 @@ DECLARE_EVENT_CLASS(bcache_request, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->orig_major = d->disk->major; __entry->orig_minor = d->disk->first_minor; __entry->sector = bio->bi_iter.bi_sector; @@ -98,7 +98,7 @@ DECLARE_EVENT_CLASS(bcache_bio, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio->bi_iter.bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); @@ -133,7 +133,7 @@ TRACE_EVENT(bcache_read, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio->bi_iter.bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); diff --git a/include/trace/events/block.h b/include/trace/events/block.h index d0dbe60d8a6d..f815aaaef755 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -236,8 +236,7 @@ TRACE_EVENT(block_bio_bounce, ), TP_fast_assign( - __entry->dev = bio->bi_bdev ? - bio->bi_bdev->bd_dev : 0; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); @@ -274,7 +273,7 @@ TRACE_EVENT(block_bio_complete, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->error = error; @@ -302,7 +301,7 @@ DECLARE_EVENT_CLASS(block_bio_merge, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); @@ -369,7 +368,7 @@ TRACE_EVENT(block_bio_queue, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); @@ -397,7 +396,8 @@ DECLARE_EVENT_CLASS(block_get_rq, ), TP_fast_assign( - __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; + __entry->dev = bio ? bio_dev(bio) : 0; + __entry->dev = bio_dev(bio); __entry->sector = bio ? bio->bi_iter.bi_sector : 0; __entry->nr_sector = bio ? bio_sectors(bio) : 0; blk_fill_rwbs(__entry->rwbs, @@ -532,7 +532,7 @@ TRACE_EVENT(block_split, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->new_sector = new_sector; blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); @@ -573,7 +573,7 @@ TRACE_EVENT(block_bio_remap, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio_dev(bio); __entry->sector = bio->bi_iter.bi_sector; __entry->nr_sector = bio_sectors(bio); __entry->old_dev = dev; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 6f77a2755abb..bc4dd7837e4c 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -829,7 +829,7 @@ DECLARE_EVENT_CLASS(f2fs__bio, TP_fast_assign( __entry->dev = sb->s_dev; - __entry->target = bio->bi_bdev->bd_dev; + __entry->target = bio_dev(bio); __entry->op = bio_op(bio); __entry->op_flags = bio->bi_opf; __entry->type = type; diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 57d22571f306..d7cdc426ee38 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -242,8 +242,7 @@ static void hib_end_io(struct bio *bio) if (bio->bi_status) { printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n", - imajor(bio->bi_bdev->bd_inode), - iminor(bio->bi_bdev->bd_inode), + MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), (unsigned long long)bio->bi_iter.bi_sector); } @@ -270,7 +269,7 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr, bio = bio_alloc(__GFP_RECLAIM | __GFP_HIGH, 1); bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9); - bio->bi_bdev = hib_resume_bdev; + bio_set_dev(bio, hib_resume_bdev); bio_set_op_attrs(bio, op, op_flags); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7724de18d2fe..2a685b45b73b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -963,7 +963,7 @@ static void blk_add_trace_bio_remap(void *ignore, return; r.device_from = cpu_to_be32(dev); - r.device_to = cpu_to_be32(bio->bi_bdev->bd_dev); + r.device_to = cpu_to_be32(bio_dev(bio)); r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, diff --git a/mm/page_io.c b/mm/page_io.c index b6c4ac388209..9cf1bc751d79 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -31,7 +31,10 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, bio = bio_alloc(gfp_flags, 1); if (bio) { - bio->bi_iter.bi_sector = map_swap_page(page, &bio->bi_bdev); + struct block_device *bdev; + + bio->bi_iter.bi_sector = map_swap_page(page, &bdev); + bio_set_dev(bio, bdev); bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9; bio->bi_end_io = end_io; @@ -57,8 +60,7 @@ void end_swap_bio_write(struct bio *bio) */ set_page_dirty(page); pr_alert("Write-error on swap-device (%u:%u:%llu)\n", - imajor(bio->bi_bdev->bd_inode), - iminor(bio->bi_bdev->bd_inode), + MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), (unsigned long long)bio->bi_iter.bi_sector); ClearPageReclaim(page); } @@ -123,8 +125,7 @@ static void end_swap_bio_read(struct bio *bio) SetPageError(page); ClearPageUptodate(page); pr_alert("Read-error on swap-device (%u:%u:%llu)\n", - imajor(bio->bi_bdev->bd_inode), - iminor(bio->bi_bdev->bd_inode), + MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), (unsigned long long)bio->bi_iter.bi_sector); goto out; } @@ -338,7 +339,7 @@ int swap_readpage(struct page *page, bool do_poll) int ret = 0; struct swap_info_struct *sis = page_swap_info(page); blk_qc_t qc; - struct block_device *bdev; + struct gendisk *disk; VM_BUG_ON_PAGE(!PageSwapCache(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); @@ -377,7 +378,7 @@ int swap_readpage(struct page *page, bool do_poll) ret = -ENOMEM; goto out; } - bdev = bio->bi_bdev; + disk = bio->bi_disk; bio->bi_private = current; bio_set_op_attrs(bio, REQ_OP_READ, 0); count_vm_event(PSWPIN); @@ -388,7 +389,7 @@ int swap_readpage(struct page *page, bool do_poll) if (!READ_ONCE(bio->bi_private)) break; - if (!blk_mq_poll(bdev_get_queue(bdev), qc)) + if (!blk_mq_poll(disk->queue, qc)) break; } __set_current_state(TASK_RUNNING); -- cgit v1.2.3 From 5f656541ff6e4f58b4ab5b4ae59badb97a9ff749 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 15 Aug 2017 21:27:19 -0700 Subject: f2fs: issue discard commands if gc_urgent is set It's time to issue all the discard commands, if user sets the idle time. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 +++++- fs/f2fs/sysfs.c | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1387925a0d83..e3922f902c8c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -21,6 +21,7 @@ #include "f2fs.h" #include "segment.h" #include "node.h" +#include "gc.h" #include "trace.h" #include @@ -1194,8 +1195,11 @@ static int issue_discard_thread(void *data) if (kthread_should_stop()) return 0; - if (dcc->discard_wake) + if (dcc->discard_wake) { dcc->discard_wake = 0; + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + mark_discard_range_all(sbi); + } sb_start_intwrite(sbi->sb); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 4bcaa9059026..b9ad9041559f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -178,8 +178,13 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) f2fs_reset_iostat(sbi); if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) { + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + sbi->gc_thread->gc_wake = 1; wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head); + + dcc->discard_wake = 1; + wake_up_interruptible_all(&dcc->discard_wait_queue); } return count; -- cgit v1.2.3 From 6f890df0a7efe3181aceb5d8bcd4af7deb2abce5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 21 Aug 2017 22:53:45 +0800 Subject: f2fs: fix out-of-order execution in f2fs_issue_flush In f2fs_issue_flush, due to out-of-order execution of CPU, wake_up can be called before we insert issue_list, result in long latency of wait_for_completion. Fix this by adding smp_mb() to force the order of related codes. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e3922f902c8c..be1c49b9402b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -549,7 +549,10 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) atomic_inc(&fcc->issing_flush); llist_add(&cmd.llnode, &fcc->issue_list); - if (!fcc->dispatch_list) + /* update issue_list before we wake up issue_flush thread */ + smp_mb(); + + if (waitqueue_active(&fcc->flush_wait_queue)) wake_up(&fcc->flush_wait_queue); if (fcc->f2fs_issue_flush) { -- cgit v1.2.3 From 84a23fbe96b4e307eb749046a74515329119b08d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 18 Aug 2017 23:37:36 +0800 Subject: f2fs: clear FI_HOT_DATA correctly This patch fixes to clear FI_HOT_DATA correctly in below path: - error handling in f2fs_ioc_start_atomic_write - after commit atomic write in f2fs_ioc_commit_atomic_write - after drop atomic write in drop_inmem_pages Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 ++ fs/f2fs/segment.c | 1 + 2 files changed, 3 insertions(+) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3eebd49c3348..8f0d32a57b52 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1640,6 +1640,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) { clear_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_HOT_DATA); goto out; } @@ -1678,6 +1679,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); if (!ret) { clear_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_HOT_DATA); stat_dec_atomic_write(inode); } } else { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index be1c49b9402b..8306beace7cb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -255,6 +255,7 @@ void drop_inmem_pages(struct inode *inode) mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_HOT_DATA); stat_dec_atomic_write(inode); } -- cgit v1.2.3 From 01983c715ad0e78842a885f361ad927a3a985994 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 22 Aug 2017 21:15:43 -0700 Subject: f2fs: wake up discard_thread iff there is a candidate This patch fixes to avoid needless wake ups. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +-- fs/f2fs/segment.h | 25 +++++++++++++++++++++++++ fs/f2fs/sysfs.c | 6 +----- 3 files changed, 27 insertions(+), 7 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8306beace7cb..8375257b6b26 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1494,8 +1494,7 @@ skip: kmem_cache_free(discard_entry_slab, entry); } - dcc->discard_wake = 1; - wake_up_interruptible_all(&dcc->discard_wait_queue); + wake_up_discard_thread(sbi, false); } static int create_discard_cmd_control(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index b9b4f85ffeb6..613b2fa7b1c1 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -798,3 +798,28 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, wbc->nr_to_write = desired; return desired - nr_to_write; } + +static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + bool wakeup = false; + int i; + + if (force) + goto wake_up; + + mutex_lock(&dcc->cmd_lock); + for (i = MAX_PLIST_NUM - 1; + i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) { + if (!list_empty(&dcc->pend_list[i])) { + wakeup = true; + break; + } + } + mutex_unlock(&dcc->cmd_lock); + if (!wakeup) + return; +wake_up: + dcc->discard_wake = 1; + wake_up_interruptible_all(&dcc->discard_wait_queue); +} diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index b9ad9041559f..962735dc9c63 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -178,13 +178,9 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0) f2fs_reset_iostat(sbi); if (!strcmp(a->attr.name, "gc_urgent") && t == 1 && sbi->gc_thread) { - struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - sbi->gc_thread->gc_wake = 1; wake_up_interruptible_all(&sbi->gc_thread->gc_wait_queue_head); - - dcc->discard_wake = 1; - wake_up_interruptible_all(&dcc->discard_wait_queue); + wake_up_discard_thread(sbi, true); } return count; -- cgit v1.2.3 From 025d63a486aad611b20fa39184fc86e4b76d260e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 30 Aug 2017 18:04:48 +0800 Subject: f2fs: remove unneeded parameter of change_curseg allocate_segment_by_default is the only caller of change_curseg passing @reuse with 'false', but commit 763bfe1bc575 ("f2fs: remove reusing any prefree segments") removes the calling, after that, @reuse in change_curseg always be true, so, let's clean up the unneeded parameter. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8375257b6b26..d6c3f456ea51 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2012,7 +2012,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ -static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) +static void change_curseg(struct f2fs_sb_info *sbi, int type) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2033,12 +2033,10 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) curseg->alloc_type = SSR; __next_free_blkoff(sbi, curseg, 0); - if (reuse) { - sum_page = get_sum_page(sbi, new_segno); - sum_node = (struct f2fs_summary_block *)page_address(sum_page); - memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); - f2fs_put_page(sum_page, 1); - } + sum_page = get_sum_page(sbi, new_segno); + sum_node = (struct f2fs_summary_block *)page_address(sum_page); + memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); + f2fs_put_page(sum_page, 1); } static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) @@ -2102,7 +2100,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) new_curseg(sbi, type, false); else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) - change_curseg(sbi, type, true); + change_curseg(sbi, type); else new_curseg(sbi, type, false); @@ -2455,7 +2453,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, /* change the current segment */ if (segno != curseg->segno) { curseg->next_segno = segno; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); @@ -2474,7 +2472,7 @@ void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (recover_curseg) { if (old_cursegno != curseg->segno) { curseg->next_segno = old_cursegno; - change_curseg(sbi, type, true); + change_curseg(sbi, type); } curseg->next_blkoff = old_blkoff; } -- cgit v1.2.3 From edd748e6c8e824a8281f8a8450f12c4a95ec61ee Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 31 Aug 2017 18:56:05 +0800 Subject: f2fs: avoid race in between atomic_read & atomic_inc Previously, we will miss merging flush command during fsync due to below race condition: Thread A Thread B Thread C - f2fs_issue_flush - atomic_read(&issing_flush) - f2fs_issue_flush - atomic_read(&issing_flush) - f2fs_issue_flush - atomic_read(&issing_flush) - atomic_inc(&issing_flush) - atomic_inc(&issing_flush) - atomic_inc(&issing_flush) - submit_flush_wait - submit_flush_wait - submit_flush_wait It needs to use atomic_inc_return instead to avoid such race. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d6c3f456ea51..1215ca1bd4e2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -536,8 +536,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) return ret; } - if (!atomic_read(&fcc->issing_flush)) { - atomic_inc(&fcc->issing_flush); + if (atomic_inc_return(&fcc->issing_flush) == 1) { ret = submit_flush_wait(sbi); atomic_dec(&fcc->issing_flush); @@ -547,7 +546,6 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) init_completion(&cmd.wait); - atomic_inc(&fcc->issing_flush); llist_add(&cmd.llnode, &fcc->issue_list); /* update issue_list before we wake up issue_flush thread */ -- cgit v1.2.3 From d3238691ed5f7be29f7b8b7cf7c68bbb2924361d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 31 Aug 2017 18:56:06 +0800 Subject: f2fs: fix to wake up all sleeping flusher In scenario of remount_ro vs flush, after flush_thread exits in ->remount_fs, flusher will only clean up golbal issue_list, but without waking up flushers waiting on that list, result in hang related user threads. In order to fix this issue, this patch enables the flusher to take charge of issue_flush thread: executes merged flush command, and wake up all sleeping flushers. Fixes: 5eba8c5d1fb3 ("f2fs: fix to access nullified flush_cmd_control pointer") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1215ca1bd4e2..265c3bc44f2d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -558,8 +558,27 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) wait_for_completion(&cmd.wait); atomic_dec(&fcc->issing_flush); } else { - llist_del_all(&fcc->issue_list); - atomic_set(&fcc->issing_flush, 0); + struct llist_node *list; + + list = llist_del_all(&fcc->issue_list); + if (!list) { + wait_for_completion(&cmd.wait); + atomic_dec(&fcc->issing_flush); + } else { + struct flush_cmd *tmp, *next; + + ret = submit_flush_wait(sbi); + + llist_for_each_entry_safe(tmp, next, list, llnode) { + if (tmp == &cmd) { + cmd.ret = ret; + atomic_dec(&fcc->issing_flush); + continue; + } + tmp->ret = ret; + complete(&tmp->wait); + } + } } return cmd.ret; -- cgit v1.2.3 From d4c759ee5faa51e0b0ee55d8a229ba5b80c4917e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 5 Sep 2017 17:04:35 -0700 Subject: f2fs: use generic terms used for encrypted block management This patch renames functions regarding to buffer management via META_MAPPING used for encrypted blocks especially. We can actually use them in generic way. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++--- fs/f2fs/f2fs.h | 3 +-- fs/f2fs/file.c | 2 +- fs/f2fs/gc.c | 13 +++++++++---- fs/f2fs/segment.c | 3 +-- 5 files changed, 15 insertions(+), 12 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e6c683e7a10e..ee6801fdbdec 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1163,7 +1163,7 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr, return ERR_CAST(ctx); /* wait the page to be moved by cleaning */ - f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); + f2fs_wait_on_block_writeback(sbi, blkaddr); } bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); @@ -1349,7 +1349,7 @@ static int encrypt_one_page(struct f2fs_io_info *fio) return 0; /* wait for GCed encrypted page writeback */ - f2fs_wait_on_encrypted_page_writeback(fio->sbi, fio->old_blkaddr); + f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr); retry_encrypt: fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, @@ -1975,7 +1975,7 @@ repeat: /* wait for GCed encrypted page writeback */ if (f2fs_encrypted_file(inode)) - f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); + f2fs_wait_on_block_writeback(sbi, blkaddr); if (len == PAGE_SIZE || PageUptodate(page)) return 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 417c28f01fd5..91fb749686f2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2550,8 +2550,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, struct f2fs_io_info *fio, bool add_list); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered); -void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, - block_t blkaddr); +void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr); void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk); void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); int lookup_journal_in_cursum(struct f2fs_journal *journal, int type, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f8cedaba7ce4..224379a9848c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -107,7 +107,7 @@ mapped: /* wait for GCed encrypted page writeback */ if (f2fs_encrypted_file(inode)) - f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); + f2fs_wait_on_block_writeback(sbi, dn.data_blkaddr); out_sem: up_read(&F2FS_I(inode)->i_mmap_sem); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6a12f33d0cdd..bfe6a8ccc3a0 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -599,8 +599,12 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, return true; } -static void move_encrypted_block(struct inode *inode, block_t bidx, - unsigned int segno, int off) +/* + * Move data block via META_MAPPING while keeping locked data page. + * This can be used to move blocks, aka LBAs, directly on disk. + */ +static void move_data_block(struct inode *inode, block_t bidx, + unsigned int segno, int off) { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), @@ -873,9 +877,10 @@ next_step: start_bidx = start_bidx_of_node(nofs, inode) + ofs_in_node; if (f2fs_encrypted_file(inode)) - move_encrypted_block(inode, start_bidx, segno, off); + move_data_block(inode, start_bidx, segno, off); else - move_data_page(inode, start_bidx, gc_type, segno, off); + move_data_page(inode, start_bidx, gc_type, + segno, off); if (locked) { up_write(&fi->dio_rwsem[WRITE]); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 265c3bc44f2d..9e708e525ba8 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2528,8 +2528,7 @@ void f2fs_wait_on_page_writeback(struct page *page, } } -void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, - block_t blkaddr) +void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr) { struct page *cpage; -- cgit v1.2.3 From 1eb1ef4a8e9f6a4d9c2c7a645aba21d2f8719728 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 9 Sep 2017 12:03:23 -0700 Subject: f2fs: better to wait for fstrim completion In android, we'd better wait for fstrim completion instead of issuing the discard commands asynchronous. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9e708e525ba8..273cc645e502 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" @@ -1061,6 +1062,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) if (dcc->pend_list_tag[i] & P_TRIM) { __submit_discard_cmd(sbi, dc); issued++; + + if (fatal_signal_pending(current)) + break; continue; } @@ -1177,7 +1181,7 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } } -/* This comes from f2fs_put_super */ +/* This comes from f2fs_put_super and f2fs_trim_fs */ void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { __issue_discard_cmd(sbi, false); @@ -2212,6 +2216,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) } /* It's time to issue all the filed discards */ mark_discard_range_all(sbi); + f2fs_wait_discard_bios(sbi); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; -- cgit v1.2.3 From b3a97a2a9a7b2d50bcf13d32857cd6f5695c6b65 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 9 Sep 2017 11:11:04 -0700 Subject: f2fs: speed up gc_urgent mode with SSR This patch activates SSR in gc_urgent mode. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 15 +++++++++++++++ fs/f2fs/segment.h | 13 ------------- 3 files changed, 16 insertions(+), 13 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c0803b1873b3..9a7c90386947 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2511,6 +2511,7 @@ void destroy_node_manager_caches(void); /* * segment.c */ +bool need_SSR(struct f2fs_sb_info *sbi); void register_inmem_page(struct inode *inode, struct page *page); void drop_inmem_pages(struct inode *inode); void drop_inmem_page(struct inode *inode, struct page *page); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 273cc645e502..7fd742f747ce 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -169,6 +169,21 @@ found: return result - size + __reverse_ffz(tmp); } +bool need_SSR(struct f2fs_sb_info *sbi) +{ + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + + if (test_opt(sbi, LFS)) + return false; + if (sbi->gc_thread && sbi->gc_thread->gc_urgent) + return true; + + return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + + 2 * reserved_sections(sbi)); +} + void register_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 613b2fa7b1c1..e0a6cc23ace3 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -497,19 +497,6 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi)); } -static inline bool need_SSR(struct f2fs_sb_info *sbi) -{ - int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); - int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); - int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); - - if (test_opt(sbi, LFS)) - return false; - - return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + - 2 * reserved_sections(sbi)); -} - static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed, int needed) { -- cgit v1.2.3 From e6c6de18f010d9a7d592f4044d2c30213cb3a7bc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 12 Sep 2017 21:35:12 +0800 Subject: f2fs: hurry up to issue discard after io interruption Once we encounter I/O interruption during issuing discards, we will delay long time before next round, but if system status is I/O idle during the time, it may loses opportunity to issue discards. So this patch changes to hurry up to issue discard after io interruption. Besides, this patch also fixes to issue discards accurately with assigned rate. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7fd742f747ce..dedf0209d820 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1062,6 +1062,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) struct blk_plug plug; int iter = 0, issued = 0; int i; + bool io_interrupted = false; mutex_lock(&dcc->cmd_lock); f2fs_bug_on(sbi, @@ -1083,11 +1084,20 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) continue; } - if (!issue_cond || is_idle(sbi)) { + if (!issue_cond) { + __submit_discard_cmd(sbi, dc); issued++; + continue; + } + + if (is_idle(sbi)) { __submit_discard_cmd(sbi, dc); + issued++; + } else { + io_interrupted = true; } - if (issue_cond && iter++ > DISCARD_ISSUE_RATE) + + if (++iter >= DISCARD_ISSUE_RATE) goto out; } if (list_empty(pend_list) && dcc->pend_list_tag[i] & P_TRIM) @@ -1097,6 +1107,9 @@ out: blk_finish_plug(&plug); mutex_unlock(&dcc->cmd_lock); + if (!issued && io_interrupted) + issued = -1; + return issued; } -- cgit v1.2.3