diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2009-09-04 09:39:50 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2009-09-04 09:39:50 +1000 |
commit | 4d95593b2938e80a337d21b9de7117718c7c7fbe (patch) | |
tree | 4d04f5d811242482bbc16226bebe544f61761d72 /fs | |
parent | fbcb7e2b937e64d2acc3f263abbd592428490617 (diff) | |
parent | 0dbfe983be789f9b03dc3a6e52ca55f9d832d52c (diff) |
Merge commit 'ext4/next'
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/Kconfig | 11 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 2 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 83 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 3 | ||||
-rw-r--r-- | fs/ext4/extents.c | 112 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 45 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 190 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 22 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 2 | ||||
-rw-r--r-- | fs/ext4/move_extent.c | 1 | ||||
-rw-r--r-- | fs/ext4/namei.c | 20 | ||||
-rw-r--r-- | fs/ext4/super.c | 107 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 4 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 6 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 7 |
16 files changed, 377 insertions, 240 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 418b6f3b0ae8..d5c0ea2e8f2d 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -37,7 +37,7 @@ config EXT4DEV_COMPAT To enable backwards compatibility so that systems that are still expecting to mount ext4 filesystems using ext4dev, - chose Y here. This feature will go away by 2.6.31, so + choose Y here. This feature will go away by 2.6.31, so please arrange to get your userspace programs fixed! config EXT4_FS_XATTR @@ -77,3 +77,12 @@ config EXT4_FS_SECURITY If you are not using a security module that requires using extended attributes for file security labels, say N. + +config EXT4_DEBUG + bool "EXT4 debugging support" + depends on EXT4_FS + help + Enables run-time debugging support for the ext4 filesystem. + + If you select Y here, then you will be able to turn on debugging + with a command such as "echo 1 > /sys/kernel/debug/ext4/mballoc-debug" diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e2126d70dff5..1d0418980f8d 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -478,7 +478,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, * new bitmap information */ set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); - ext4_mb_update_group_info(grp, blocks_freed); + grp->bb_free += blocks_freed; up_write(&grp->alloc_sem); /* We dirtied the bitmap block */ diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9714db393efe..815a804dc873 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -67,27 +67,29 @@ typedef unsigned int ext4_group_t; /* prefer goal again. length */ -#define EXT4_MB_HINT_MERGE 1 +#define EXT4_MB_HINT_MERGE 0x0001 /* blocks already reserved */ -#define EXT4_MB_HINT_RESERVED 2 +#define EXT4_MB_HINT_RESERVED 0x0002 /* metadata is being allocated */ -#define EXT4_MB_HINT_METADATA 4 +#define EXT4_MB_HINT_METADATA 0x0004 /* first blocks in the file */ -#define EXT4_MB_HINT_FIRST 8 +#define EXT4_MB_HINT_FIRST 0x0008 /* search for the best chunk */ -#define EXT4_MB_HINT_BEST 16 +#define EXT4_MB_HINT_BEST 0x0010 /* data is being allocated */ -#define EXT4_MB_HINT_DATA 32 +#define EXT4_MB_HINT_DATA 0x0020 /* don't preallocate (for tails) */ -#define EXT4_MB_HINT_NOPREALLOC 64 +#define EXT4_MB_HINT_NOPREALLOC 0x0040 /* allocate for locality group */ -#define EXT4_MB_HINT_GROUP_ALLOC 128 +#define EXT4_MB_HINT_GROUP_ALLOC 0x0080 /* allocate goal blocks or none */ -#define EXT4_MB_HINT_GOAL_ONLY 256 +#define EXT4_MB_HINT_GOAL_ONLY 0x0100 /* goal is meaningful */ -#define EXT4_MB_HINT_TRY_GOAL 512 +#define EXT4_MB_HINT_TRY_GOAL 0x0200 /* blocks already pre-reserved by delayed allocation */ -#define EXT4_MB_DELALLOC_RESERVED 1024 +#define EXT4_MB_DELALLOC_RESERVED 0x0400 +/* We are doing stream allocation */ +#define EXT4_MB_STREAM_ALLOC 0x0800 struct ext4_allocation_request { @@ -112,6 +114,21 @@ struct ext4_allocation_request { }; /* + * For delayed allocation tracking + */ +struct mpage_da_data { + struct inode *inode; + sector_t b_blocknr; /* start block number of extent */ + size_t b_size; /* size of extent */ + unsigned long b_state; /* state of the extent */ + unsigned long first_page, next_page; /* extent of pages */ + struct writeback_control *wbc; + int io_done; + int pages_written; + int retval; +}; + +/* * Special inodes numbers */ #define EXT4_BAD_INO 1 /* Bad blocks inode */ @@ -950,6 +967,7 @@ struct ext4_sb_info { atomic_t s_mb_lost_chunks; atomic_t s_mb_preallocated; atomic_t s_mb_discarded; + atomic_t s_lock_busy; /* locality groups */ struct ext4_locality_group *s_locality_groups; @@ -1340,8 +1358,6 @@ extern void ext4_mb_free_blocks(handle_t *, struct inode *, ext4_fsblk_t, unsigned long, int, unsigned long *); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); -extern void ext4_mb_update_group_info(struct ext4_group_info *grp, - ext4_grpblk_t add); extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); extern void ext4_mb_put_buddy_cache_lock(struct super_block *, ext4_group_t, int); @@ -1367,6 +1383,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_can_truncate(struct inode *inode); extern void ext4_truncate(struct inode *); +extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); extern void ext4_set_inode_flags(struct inode *); extern void ext4_get_inode_flags(struct ext4_inode_info *); extern int ext4_alloc_da_blocks(struct inode *inode); @@ -1575,15 +1592,18 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) struct ext4_group_info { unsigned long bb_state; struct rb_root bb_free_root; - unsigned short bb_first_free; - unsigned short bb_free; - unsigned short bb_fragments; + ext4_grpblk_t bb_first_free; /* first free block */ + ext4_grpblk_t bb_free; /* total free blocks */ + ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ struct list_head bb_prealloc_list; #ifdef DOUBLE_CHECK void *bb_bitmap; #endif struct rw_semaphore alloc_sem; - unsigned short bb_counters[]; + ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block + * regions, index is order. + * bb_counters[3] = 5 means + * 5 free 8-block regions. */ }; #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 @@ -1591,15 +1611,42 @@ struct ext4_group_info { #define EXT4_MB_GRP_NEED_INIT(grp) \ (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) +#define EXT4_MAX_CONTENTION 8 +#define EXT4_CONTENTION_THRESHOLD 2 + static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, ext4_group_t group) { return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); } +/* + * Returns true if the filesystem is busy enough that attempts to + * access the block group locks has run into contention. + */ +static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi) +{ + return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD); +} + static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) { - spin_lock(ext4_group_lock_ptr(sb, group)); + spinlock_t *lock = ext4_group_lock_ptr(sb, group); + if (spin_trylock(lock)) + /* + * We're able to grab the lock right away, so drop the + * lock contention counter. + */ + atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0); + else { + /* + * The lock is busy, so bump the contention counter, + * and then wait on the spin lock. + */ + atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1, + EXT4_MAX_CONTENTION); + spin_lock(lock); + } } static inline void ext4_unlock_group(struct super_block *sb, diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 20a84105a10b..9538633879d7 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -43,8 +43,7 @@ #define CHECK_BINSEARCH__ /* - * If EXT_DEBUG is defined you can use the 'extdebug' mount option - * to get lots of info about what's going on. + * Turn on EXT_DEBUG to get lots of info about extents operations. */ #define EXT_DEBUG__ #ifdef EXT_DEBUG diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 73ebfb44ad75..f7bdd5590cd1 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -93,7 +93,9 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); } -static int ext4_ext_journal_restart(handle_t *handle, int needed) +static int ext4_ext_truncate_extend_restart(handle_t *handle, + struct inode *inode, + int needed) { int err; @@ -104,7 +106,14 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed) err = ext4_journal_extend(handle, needed); if (err <= 0) return err; - return ext4_journal_restart(handle, needed); + err = ext4_truncate_restart_trans(handle, inode, needed); + /* + * We have dropped i_data_sem so someone might have cached again + * an extent we are going to truncate. + */ + ext4_ext_invalidate_cache(inode); + + return err; } /* @@ -220,57 +229,65 @@ ext4_ext_new_meta_block(handle_t *handle, struct inode *inode, return newblock; } -static int ext4_ext_space_block(struct inode *inode) +static inline int ext4_ext_space_block(struct inode *inode, int check) { int size; size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) / sizeof(struct ext4_extent); + if (!check) { #ifdef AGGRESSIVE_TEST - if (size > 6) - size = 6; + if (size > 6) + size = 6; #endif + } return size; } -static int ext4_ext_space_block_idx(struct inode *inode) +static inline int ext4_ext_space_block_idx(struct inode *inode, int check) { int size; size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) / sizeof(struct ext4_extent_idx); + if (!check) { #ifdef AGGRESSIVE_TEST - if (size > 5) - size = 5; + if (size > 5) + size = 5; #endif + } return size; } -static int ext4_ext_space_root(struct inode *inode) +static inline int ext4_ext_space_root(struct inode *inode, int check) { int size; size = sizeof(EXT4_I(inode)->i_data); size -= sizeof(struct ext4_extent_header); size /= sizeof(struct ext4_extent); + if (!check) { #ifdef AGGRESSIVE_TEST - if (size > 3) - size = 3; + if (size > 3) + size = 3; #endif + } return size; } -static int ext4_ext_space_root_idx(struct inode *inode) +static inline int ext4_ext_space_root_idx(struct inode *inode, int check) { int size; size = sizeof(EXT4_I(inode)->i_data); size -= sizeof(struct ext4_extent_header); size /= sizeof(struct ext4_extent_idx); + if (!check) { #ifdef AGGRESSIVE_TEST - if (size > 4) - size = 4; + if (size > 4) + size = 4; #endif + } return size; } @@ -284,9 +301,9 @@ int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) int lcap, icap, rcap, leafs, idxs, num; int newextents = blocks; - rcap = ext4_ext_space_root_idx(inode); - lcap = ext4_ext_space_block(inode); - icap = ext4_ext_space_block_idx(inode); + rcap = ext4_ext_space_root_idx(inode, 0); + lcap = ext4_ext_space_block(inode, 0); + icap = ext4_ext_space_block_idx(inode, 0); /* number of new leaf blocks needed */ num = leafs = (newextents + lcap - 1) / lcap; @@ -311,14 +328,14 @@ ext4_ext_max_entries(struct inode *inode, int depth) if (depth == ext_depth(inode)) { if (depth == 0) - max = ext4_ext_space_root(inode); + max = ext4_ext_space_root(inode, 1); else - max = ext4_ext_space_root_idx(inode); + max = ext4_ext_space_root_idx(inode, 1); } else { if (depth == 0) - max = ext4_ext_space_block(inode); + max = ext4_ext_space_block(inode, 1); else - max = ext4_ext_space_block_idx(inode); + max = ext4_ext_space_block_idx(inode, 1); } return max; @@ -437,8 +454,9 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block), idx_pblock(path->p_idx)); } else if (path->p_ext) { - ext_debug(" %d:%d:%llu ", + ext_debug(" %d:[%d]%d:%llu ", le32_to_cpu(path->p_ext->ee_block), + ext4_ext_is_uninitialized(path->p_ext), ext4_ext_get_actual_len(path->p_ext), ext_pblock(path->p_ext)); } else @@ -460,8 +478,11 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) eh = path[depth].p_hdr; ex = EXT_FIRST_EXTENT(eh); + ext_debug("Displaying leaf extents for inode %lu\n", inode->i_ino); + for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { - ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), + ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), + ext4_ext_is_uninitialized(ex), ext4_ext_get_actual_len(ex), ext_pblock(ex)); } ext_debug("\n"); @@ -580,9 +601,10 @@ ext4_ext_binsearch(struct inode *inode, } path->p_ext = l - 1; - ext_debug(" -> %d:%llu:%d ", + ext_debug(" -> %d:%llu:[%d]%d ", le32_to_cpu(path->p_ext->ee_block), ext_pblock(path->p_ext), + ext4_ext_is_uninitialized(path->p_ext), ext4_ext_get_actual_len(path->p_ext)); #ifdef CHECK_BINSEARCH @@ -612,7 +634,7 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode) eh->eh_depth = 0; eh->eh_entries = 0; eh->eh_magic = EXT4_EXT_MAGIC; - eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode)); + eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0)); ext4_mark_inode_dirty(handle, inode); ext4_ext_invalidate_cache(inode); return 0; @@ -837,7 +859,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, neh = ext_block_hdr(bh); neh->eh_entries = 0; - neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); + neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_depth = 0; ex = EXT_FIRST_EXTENT(neh); @@ -850,9 +872,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, path[depth].p_ext++; while (path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)) { - ext_debug("move %d:%llu:%d in new leaf %llu\n", + ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", le32_to_cpu(path[depth].p_ext->ee_block), ext_pblock(path[depth].p_ext), + ext4_ext_is_uninitialized(path[depth].p_ext), ext4_ext_get_actual_len(path[depth].p_ext), newblock); /*memmove(ex++, path[depth].p_ext++, @@ -912,7 +935,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, neh = ext_block_hdr(bh); neh->eh_entries = cpu_to_le16(1); neh->eh_magic = EXT4_EXT_MAGIC; - neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); + neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); neh->eh_depth = cpu_to_le16(depth - i); fidx = EXT_FIRST_INDEX(neh); fidx->ei_block = border; @@ -1037,9 +1060,9 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, /* old root could have indexes or leaves * so calculate e_max right way */ if (ext_depth(inode)) - neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode)); + neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0)); else - neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode)); + neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0)); neh->eh_magic = EXT4_EXT_MAGIC; set_buffer_uptodate(bh); unlock_buffer(bh); @@ -1054,7 +1077,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, goto out; curp->p_hdr->eh_magic = EXT4_EXT_MAGIC; - curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode)); + curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0)); curp->p_hdr->eh_entries = cpu_to_le16(1); curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); @@ -1580,9 +1603,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, /* try to insert block into found extent and return */ if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { - ext_debug("append %d block to %d:%d (from %llu)\n", + ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", + ext4_ext_is_uninitialized(newext), ext4_ext_get_actual_len(newext), le32_to_cpu(ex->ee_block), + ext4_ext_is_uninitialized(ex), ext4_ext_get_actual_len(ex), ext_pblock(ex)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) @@ -1651,9 +1676,10 @@ has_space: if (!nearex) { /* there is no extent in this leaf, create first one */ - ext_debug("first extent in the leaf: %d:%llu:%d\n", + ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n", le32_to_cpu(newext->ee_block), ext_pblock(newext), + ext4_ext_is_uninitialized(newext), ext4_ext_get_actual_len(newext)); path[depth].p_ext = EXT_FIRST_EXTENT(eh); } else if (le32_to_cpu(newext->ee_block) @@ -1663,10 +1689,11 @@ has_space: len = EXT_MAX_EXTENT(eh) - nearex; len = (len - 1) * sizeof(struct ext4_extent); len = len < 0 ? 0 : len; - ext_debug("insert %d:%llu:%d after: nearest 0x%p, " + ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, " "move %d from 0x%p to 0x%p\n", le32_to_cpu(newext->ee_block), ext_pblock(newext), + ext4_ext_is_uninitialized(newext), ext4_ext_get_actual_len(newext), nearex, len, nearex + 1, nearex + 2); memmove(nearex + 2, nearex + 1, len); @@ -1676,10 +1703,11 @@ has_space: BUG_ON(newext->ee_block == nearex->ee_block); len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); len = len < 0 ? 0 : len; - ext_debug("insert %d:%llu:%d before: nearest 0x%p, " + ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, " "move %d from 0x%p to 0x%p\n", le32_to_cpu(newext->ee_block), ext_pblock(newext), + ext4_ext_is_uninitialized(newext), ext4_ext_get_actual_len(newext), nearex, len, nearex + 1, nearex + 2); memmove(nearex + 1, nearex, len); @@ -2094,7 +2122,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, else uninitialized = 0; - ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); + ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, + uninitialized, ex_ee_len); path[depth].p_ext = ex; a = ex_ee_block > start ? ex_ee_block : start; @@ -2138,7 +2167,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, } credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); - err = ext4_ext_journal_restart(handle, credits); + err = ext4_ext_truncate_extend_restart(handle, inode, credits); if (err) goto out; @@ -2327,7 +2356,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start) if (err == 0) { ext_inode_hdr(inode)->eh_depth = 0; ext_inode_hdr(inode)->eh_max = - cpu_to_le16(ext4_ext_space_root(inode)); + cpu_to_le16(ext4_ext_space_root(inode, 0)); err = ext4_ext_dirty(handle, inode, path); } } @@ -2743,6 +2772,7 @@ insert: } else if (err) goto fix_extent_len; out: + ext4_ext_show_leaf(inode, path); return err ? err : allocated; fix_extent_len: @@ -2786,7 +2816,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, struct ext4_allocation_request ar; __clear_bit(BH_New, &bh_result->b_state); - ext_debug("blocks %u/%u requested for inode %u\n", + ext_debug("blocks %u/%u requested for inode %lu\n", iblock, max_blocks, inode->i_ino); /* check in cache */ @@ -2849,7 +2879,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, newblock = iblock - ee_block + ee_start; /* number of remaining blocks in the extent */ allocated = ee_len - (iblock - ee_block); - ext_debug("%u fit into %lu:%d -> %llu\n", iblock, + ext_debug("%u fit into %u:%d -> %llu\n", iblock, ee_block, ee_len, newblock); /* Do not put uninitialized extent in the cache */ @@ -2950,7 +2980,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, newblock = ext4_mb_new_blocks(handle, &ar, &err); if (!newblock) goto out2; - ext_debug("allocate new block: goal %llu, found %llu/%lu\n", + ext_debug("allocate new block: goal %llu, found %llu/%u\n", ar.goal, newblock, allocated); /* try to insert new extent into found leaf and return */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 29e6dc7299b8..f3624ead4f6c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1189,7 +1189,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", - i, ext4_free_inodes_count(sb, gdp), x); + (unsigned long) i, ext4_free_inodes_count(sb, gdp), x); bitmap_count += x; } brelse(bitmap_bh); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f9c642b22efa..17802a96af9f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -192,11 +192,24 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) * so before we call here everything must be consistently dirtied against * this transaction. */ -static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) + int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, + int nblocks) { + int ret; + + /* + * Drop i_data_sem to avoid deadlock with ext4_get_blocks At this + * moment, get_block can be called only for blocks inside i_size since + * page cache has been already dropped and writes are blocked by + * i_mutex. So we can safely drop the i_data_sem here. + */ BUG_ON(EXT4_JOURNAL(inode) == NULL); jbd_debug(2, "restarting handle %p\n", handle); - return ext4_journal_restart(handle, blocks_for_truncate(inode)); + up_write(&EXT4_I(inode)->i_data_sem); + ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); + down_write(&EXT4_I(inode)->i_data_sem); + + return ret; } /* @@ -341,9 +354,7 @@ static int ext4_block_to_path(struct inode *inode, int n = 0; int final = 0; - if (i_block < 0) { - ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); - } else if (i_block < direct_blocks) { + if (i_block < direct_blocks) { offsets[n++] = i_block; final = direct_blocks; } else if ((i_block -= direct_blocks) < indirect_blocks) { @@ -762,8 +773,9 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, BUFFER_TRACE(bh, "call get_create_access"); err = ext4_journal_get_create_access(handle, bh); if (err) { + /* Don't brelse(bh) here; it's done in + * ext4_journal_forget() below */ unlock_buffer(bh); - brelse(bh); goto failed; } @@ -1863,18 +1875,6 @@ static void ext4_da_page_release_reservation(struct page *page, * Delayed allocation stuff */ -struct mpage_da_data { - struct inode *inode; - sector_t b_blocknr; /* start block number of extent */ - size_t b_size; /* size of extent */ - unsigned long b_state; /* state of the extent */ - unsigned long first_page, next_page; /* extent of pages */ - struct writeback_control *wbc; - int io_done; - int pages_written; - int retval; -}; - /* * mpage_da_submit_io - walks through extent of pages and try to write * them with writepage() call back @@ -2737,6 +2737,7 @@ static int ext4_da_writepages(struct address_space *mapping, long pages_skipped; int range_cyclic, cycled = 1, io_done = 0; int needed_blocks, ret = 0, nr_to_writebump = 0; + loff_t range_start = wbc->range_start; struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); trace_ext4_da_writepages(inode, wbc); @@ -2850,6 +2851,7 @@ retry: mpd.io_done = 1; ret = MPAGE_DA_EXTENT_TAIL; } + trace_ext4_da_write_pages(inode, &mpd); wbc->nr_to_write -= mpd.pages_written; ext4_journal_stop(handle); @@ -2905,6 +2907,7 @@ out_writepages: if (!no_nrwrite_index_update) wbc->no_nrwrite_index_update = 0; wbc->nr_to_write -= nr_to_writebump; + wbc->range_start = range_start; trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); return ret; } @@ -3659,7 +3662,8 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode, ext4_handle_dirty_metadata(handle, inode, bh); } ext4_mark_inode_dirty(handle, inode); - ext4_journal_test_restart(handle, inode); + ext4_truncate_restart_trans(handle, inode, + blocks_for_truncate(inode)); if (bh) { BUFFER_TRACE(bh, "retaking write access"); ext4_journal_get_write_access(handle, bh); @@ -3870,7 +3874,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, return; if (try_to_extend_transaction(handle, inode)) { ext4_mark_inode_dirty(handle, inode); - ext4_journal_test_restart(handle, inode); + ext4_truncate_restart_trans(handle, inode, + blocks_for_truncate(inode)); } ext4_free_blocks(handle, inode, nr, 1, 1); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index cd258463e2a9..3086b3c65adc 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -22,6 +22,7 @@ */ #include "mballoc.h" +#include <linux/debugfs.h> #include <trace/events/ext4.h> /* @@ -622,13 +623,13 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, /* FIXME!! need more doc */ static void ext4_mb_mark_free_simple(struct super_block *sb, - void *buddy, unsigned first, int len, + void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, struct ext4_group_info *grp) { struct ext4_sb_info *sbi = EXT4_SB(sb); - unsigned short min; - unsigned short max; - unsigned short chunk; + ext4_grpblk_t min; + ext4_grpblk_t max; + ext4_grpblk_t chunk; unsigned short border; BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); @@ -662,10 +663,10 @@ void ext4_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap, ext4_group_t group) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); - unsigned short max = EXT4_BLOCKS_PER_GROUP(sb); - unsigned short i = 0; - unsigned short first; - unsigned short len; + ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb); + ext4_grpblk_t i = 0; + ext4_grpblk_t first; + ext4_grpblk_t len; unsigned free = 0; unsigned fragments = 0; unsigned long long period = get_cycles(); @@ -743,7 +744,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) char *data; char *bitmap; - mb_debug("init page %lu\n", page->index); + mb_debug(1, "init page %lu\n", page->index); inode = page->mapping->host; sb = inode->i_sb; @@ -822,7 +823,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) set_bitmap_uptodate(bh[i]); bh[i]->b_end_io = end_buffer_read_sync; submit_bh(READ, bh[i]); - mb_debug("read bitmap for group %u\n", first_group + i); + mb_debug(1, "read bitmap for group %u\n", first_group + i); } /* wait for I/O completion */ @@ -862,12 +863,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore) if ((first_block + i) & 1) { /* this is block of buddy */ BUG_ON(incore == NULL); - mb_debug("put buddy for group %u in page %lu/%x\n", + mb_debug(1, "put buddy for group %u in page %lu/%x\n", group, page->index, i * blocksize); grinfo = ext4_get_group_info(sb, group); grinfo->bb_fragments = 0; memset(grinfo->bb_counters, 0, - sizeof(unsigned short)*(sb->s_blocksize_bits+2)); + sizeof(*grinfo->bb_counters) * + (sb->s_blocksize_bits+2)); /* * incore got set to the group block bitmap below */ @@ -878,7 +880,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) } else { /* this is block of bitmap */ BUG_ON(incore != NULL); - mb_debug("put bitmap for group %u in page %lu/%x\n", + mb_debug(1, "put bitmap for group %u in page %lu/%x\n", group, page->index, i * blocksize); /* see comments in ext4_mb_put_pa() */ @@ -922,7 +924,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, struct ext4_sb_info *sbi = EXT4_SB(sb); struct inode *inode = sbi->s_buddy_cache; - mb_debug("load group %u\n", group); + mb_debug(1, "load group %u\n", group); blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; grp = ext4_get_group_info(sb, group); @@ -1360,7 +1362,7 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, ac->alloc_semp = e4b->alloc_semp; e4b->alloc_semp = NULL; /* store last allocated for subsequent stream allocation */ - if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { + if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { spin_lock(&sbi->s_md_lock); sbi->s_mb_last_group = ac->ac_f_ex.fe_group; sbi->s_mb_last_start = ac->ac_f_ex.fe_start; @@ -1851,7 +1853,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) struct inode *inode = sbi->s_buddy_cache; struct page *page = NULL, *bitmap_page = NULL; - mb_debug("init group %lu\n", group); + mb_debug(1, "init group %u\n", group); blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; this_grp = ext4_get_group_info(sb, group); /* @@ -1938,7 +1940,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) struct ext4_sb_info *sbi; struct super_block *sb; struct ext4_buddy e4b; - loff_t size, isize; sb = ac->ac_sb; sbi = EXT4_SB(sb); @@ -1974,20 +1975,16 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) } bsbits = ac->ac_sb->s_blocksize_bits; - /* if stream allocation is enabled, use global goal */ - size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; - isize = i_size_read(ac->ac_inode) >> bsbits; - if (size < isize) - size = isize; - if (size < sbi->s_mb_stream_request && - (ac->ac_flags & EXT4_MB_HINT_DATA)) { + /* if stream allocation is enabled, use global goal */ + if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { /* TBD: may be hot point */ spin_lock(&sbi->s_md_lock); ac->ac_g_ex.fe_group = sbi->s_mb_last_group; ac->ac_g_ex.fe_start = sbi->s_mb_last_start; spin_unlock(&sbi->s_md_lock); } + /* Let's just scan groups to find more-less suitable blocks */ cr = ac->ac_2order ? 0 : 1; /* @@ -2156,7 +2153,7 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) { seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " - "%-5s %-2s %-5s %-5s %-5s %-6s\n", + "%-5s %-2s %-6s %-5s %-5s %-6s\n", "pid", "inode", "original", "goal", "result", "found", "grps", "cr", "flags", "merge", "tail", "broken"); return 0; @@ -2164,7 +2161,7 @@ static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) if (hs->op == EXT4_MB_HISTORY_ALLOC) { fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " - "%-5u %-5s %-5u %-6u\n"; + "0x%04x %-5s %-5u %-6u\n"; sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, hs->result.fe_start, hs->result.fe_len, hs->result.fe_logical); @@ -2328,7 +2325,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) struct ext4_buddy e4b; struct sg { struct ext4_group_info info; - unsigned short counters[16]; + ext4_grpblk_t counters[16]; } sg; group--; @@ -2532,7 +2529,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); init_rwsem(&meta_group_info[i]->alloc_sem); - meta_group_info[i]->bb_free_root.rb_node = NULL;; + meta_group_info[i]->bb_free_root.rb_node = NULL; #ifdef DOUBLE_CHECK { @@ -2558,26 +2555,15 @@ exit_meta_group_info: return -ENOMEM; } /* ext4_mb_add_groupinfo */ -/* - * Update an existing group. - * This function is used for online resize - */ -void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) -{ - grp->bb_free += add; -} - static int ext4_mb_init_backend(struct super_block *sb) { ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; - int metalen; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; int num_meta_group_infos; int num_meta_group_infos_max; int array_size; - struct ext4_group_info **meta_group_info; struct ext4_group_desc *desc; /* This is the number of blocks used by GDT */ @@ -2622,22 +2608,6 @@ static int ext4_mb_init_backend(struct super_block *sb) goto err_freesgi; } EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; - - metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb); - for (i = 0; i < num_meta_group_infos; i++) { - if ((i + 1) == num_meta_group_infos) - metalen = sizeof(*meta_group_info) * - (ngroups - - (i << EXT4_DESC_PER_BLOCK_BITS(sb))); - meta_group_info = kmalloc(metalen, GFP_KERNEL); - if (meta_group_info == NULL) { - printk(KERN_ERR "EXT4-fs: can't allocate mem for a " - "buddy group\n"); - goto err_freemeta; - } - sbi->s_group_info[i] = meta_group_info; - } - for (i = 0; i < ngroups; i++) { desc = ext4_get_group_desc(sb, i, NULL); if (desc == NULL) { @@ -2655,7 +2625,6 @@ err_freebuddy: while (i-- > 0) kfree(ext4_get_group_info(sb, i)); i = num_meta_group_infos; -err_freemeta: while (i-- > 0) kfree(sbi->s_group_info[i]); iput(sbi->s_buddy_cache); @@ -2672,14 +2641,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) unsigned max; int ret; - i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); + i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); if (sbi->s_mb_offsets == NULL) { return -ENOMEM; } - i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); + i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); if (sbi->s_mb_maxs == NULL) { kfree(sbi->s_mb_offsets); @@ -2758,7 +2727,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) kmem_cache_free(ext4_pspace_cachep, pa); } if (count) - mb_debug("mballoc: %u PAs left\n", count); + mb_debug(1, "mballoc: %u PAs left\n", count); } @@ -2839,7 +2808,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) list_for_each_safe(l, ltmp, &txn->t_private_list) { entry = list_entry(l, struct ext4_free_data, list); - mb_debug("gonna free %u blocks in group %u (0x%p):", + mb_debug(1, "gonna free %u blocks in group %u (0x%p):", entry->count, entry->group, entry); err = ext4_mb_load_buddy(sb, entry->group, &e4b); @@ -2874,9 +2843,43 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) ext4_mb_release_desc(&e4b); } - mb_debug("freed %u blocks in %u structures\n", count, count2); + mb_debug(1, "freed %u blocks in %u structures\n", count, count2); +} + +#ifdef CONFIG_EXT4_DEBUG +u8 mb_enable_debug __read_mostly; + +static struct dentry *debugfs_dir; +static struct dentry *debugfs_debug; + +static void __init ext4_create_debugfs_entry(void) +{ + debugfs_dir = debugfs_create_dir("ext4", NULL); + if (debugfs_dir) + debugfs_debug = debugfs_create_u8("mballoc-debug", + S_IRUGO | S_IWUSR, + debugfs_dir, + &mb_enable_debug); +} + +static void ext4_remove_debugfs_entry(void) +{ + debugfs_remove(debugfs_debug); + debugfs_remove(debugfs_dir); +} + +#else + +static void __init ext4_create_debugfs_entry(void) +{ } +static void ext4_remove_debugfs_entry(void) +{ +} + +#endif + int __init init_ext4_mballoc(void) { ext4_pspace_cachep = @@ -2904,6 +2907,7 @@ int __init init_ext4_mballoc(void) kmem_cache_destroy(ext4_ac_cachep); return -ENOMEM; } + ext4_create_debugfs_entry(); return 0; } @@ -2917,6 +2921,7 @@ void exit_ext4_mballoc(void) kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); kmem_cache_destroy(ext4_free_ext_cachep); + ext4_remove_debugfs_entry(); } @@ -3061,7 +3066,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac) ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; else ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; - mb_debug("#%u: goal %u blocks for locality group\n", + mb_debug(1, "#%u: goal %u blocks for locality group\n", current->pid, ac->ac_g_ex.fe_len); } @@ -3180,23 +3185,18 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || ac->ac_o_ex.fe_logical < pa->pa_lstart)); - /* skip PA normalized request doesn't overlap with */ - if (pa->pa_lstart >= end) { - spin_unlock(&pa->pa_lock); - continue; - } - if (pa_end <= start) { + /* skip PAs this normalized request doesn't overlap with */ + if (pa->pa_lstart >= end || pa_end <= start) { spin_unlock(&pa->pa_lock); continue; } BUG_ON(pa->pa_lstart <= start && pa_end >= end); + /* adjust start or end to be adjacent to this pa */ if (pa_end <= ac->ac_o_ex.fe_logical) { BUG_ON(pa_end < start); start = pa_end; - } - - if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { + } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) { BUG_ON(pa->pa_lstart > end); end = pa->pa_lstart; } @@ -3251,7 +3251,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; } - mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size, + mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size, (unsigned) orig_size, (unsigned) start); } @@ -3300,7 +3300,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, BUG_ON(pa->pa_free < len); pa->pa_free -= len; - mb_debug("use %llu/%u from inode pa %p\n", start, len, pa); + mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa); } /* @@ -3324,7 +3324,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, * in on-disk bitmap -- see ext4_mb_release_context() * Other CPUs are prevented from allocating from this pa by lg_mutex */ - mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); + mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); } /* @@ -3503,7 +3503,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, preallocated += len; count++; } - mb_debug("prellocated %u for group %u\n", preallocated, group); + mb_debug(1, "prellocated %u for group %u\n", preallocated, group); } static void ext4_mb_pa_callback(struct rcu_head *head) @@ -3638,7 +3638,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) pa->pa_deleted = 0; pa->pa_type = MB_INODE_PA; - mb_debug("new inode pa %p: %llu/%u for %u\n", pa, + mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa, pa->pa_pstart, pa->pa_len, pa->pa_lstart); trace_ext4_mb_new_inode_pa(ac, pa); @@ -3698,7 +3698,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) pa->pa_deleted = 0; pa->pa_type = MB_GROUP_PA; - mb_debug("new group pa %p: %llu/%u for %u\n", pa, + mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa, pa->pa_pstart, pa->pa_len, pa->pa_lstart); trace_ext4_mb_new_group_pa(ac, pa); @@ -3777,7 +3777,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, next = mb_find_next_bit(bitmap_bh->b_data, end, bit); start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + le32_to_cpu(sbi->s_es->s_first_data_block); - mb_debug(" free preallocated %u/%u in group %u\n", + mb_debug(1, " free preallocated %u/%u in group %u\n", (unsigned) start, (unsigned) next - bit, (unsigned) group); free += next - bit; @@ -3868,7 +3868,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, int busy = 0; int free = 0; - mb_debug("discard preallocation for group %u\n", group); + mb_debug(1, "discard preallocation for group %u\n", group); if (list_empty(&grp->bb_prealloc_list)) return 0; @@ -3992,7 +3992,7 @@ void ext4_discard_preallocations(struct inode *inode) return; } - mb_debug("discard preallocation for inode %lu\n", inode->i_ino); + mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino); trace_ext4_discard_preallocations(inode); INIT_LIST_HEAD(&list); @@ -4097,7 +4097,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode, { BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); } -#ifdef MB_DEBUG +#ifdef CONFIG_EXT4_DEBUG static void ext4_mb_show_ac(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; @@ -4139,14 +4139,14 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) ext4_get_group_no_and_offset(sb, pa->pa_pstart, NULL, &start); spin_unlock(&pa->pa_lock); - printk(KERN_ERR "PA:%lu:%d:%u \n", i, - start, pa->pa_len); + printk(KERN_ERR "PA:%u:%d:%u \n", i, + start, pa->pa_len); } ext4_unlock_group(sb, i); if (grp->bb_free == 0) continue; - printk(KERN_ERR "%lu: %d/%d \n", + printk(KERN_ERR "%u: %d/%d \n", i, grp->bb_free, grp->bb_fragments); } printk(KERN_ERR "\n"); @@ -4174,16 +4174,26 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) return; + if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) + return; + size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; - isize = i_size_read(ac->ac_inode) >> bsbits; + isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) + >> bsbits; size = max(size, isize); - /* don't use group allocation for large files */ - if (size >= sbi->s_mb_stream_request) + if ((size == isize) && + !ext4_fs_is_busy(sbi) && + (atomic_read(&ac->ac_inode->i_writecount) == 0)) { + ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC; return; + } - if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) + /* don't use group allocation for large files */ + if (size >= sbi->s_mb_stream_request) { + ac->ac_flags |= EXT4_MB_STREAM_ALLOC; return; + } BUG_ON(ac->ac_lg != NULL); /* @@ -4246,7 +4256,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, * locality group. this is a policy, actually */ ext4_mb_group_or_file(ac); - mb_debug("init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " + mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " "left: %u/%u, right %u/%u to %swritable\n", (unsigned) ar->len, (unsigned) ar->logical, (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, @@ -4268,7 +4278,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, struct ext4_prealloc_space *pa, *tmp; struct ext4_allocation_context *ac; - mb_debug("discard locality group preallocation\n"); + mb_debug(1, "discard locality group preallocation\n"); INIT_LIST_HEAD(&discard_list); ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c96bb19f58f9..878ace38c9f1 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -37,11 +37,19 @@ /* */ -#define MB_DEBUG__ -#ifdef MB_DEBUG -#define mb_debug(fmt, a...) printk(fmt, ##a) +#ifdef CONFIG_EXT4_DEBUG +extern u8 mb_enable_debug; + +#define mb_debug(n, fmt, a...) \ + do { \ + if ((n) <= mb_enable_debug) { \ + printk (KERN_DEBUG "(%s, %d): %s: ", \ + __FILE__, __LINE__, __func__); \ + printk (fmt, ## a); \ + } \ + } while (0) #else -#define mb_debug(fmt, a...) +#define mb_debug(n, fmt, a...) #endif /* @@ -128,8 +136,8 @@ struct ext4_prealloc_space { unsigned pa_deleted; ext4_fsblk_t pa_pstart; /* phys. block */ ext4_lblk_t pa_lstart; /* log. block */ - unsigned short pa_len; /* len of preallocated chunk */ - unsigned short pa_free; /* how many blocks are free */ + ext4_grpblk_t pa_len; /* len of preallocated chunk */ + ext4_grpblk_t pa_free; /* how many blocks are free */ unsigned short pa_type; /* pa type. inode or group */ spinlock_t *pa_obj_lock; struct inode *pa_inode; /* hack, for history only */ @@ -144,7 +152,7 @@ struct ext4_free_extent { ext4_lblk_t fe_logical; ext4_grpblk_t fe_start; ext4_group_t fe_group; - int fe_len; + ext4_grpblk_t fe_len; }; /* diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 313a50b39741..05361ad5b80a 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -618,7 +618,7 @@ err_out: tmp_inode->i_nlink = 0; ext4_journal_stop(handle); - + unlock_new_inode(tmp_inode); iput(tmp_inode); return retval; diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index bbf2dd9404dc..5821e0bee917 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -871,6 +871,7 @@ out: if (PageLocked(page)) unlock_page(page); page_cache_release(page); + ext4_journal_stop(handle); } out2: ext4_journal_stop(handle); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index de04013d16ff..f92cc06671f5 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1518,8 +1518,12 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, return retval; if (blocks == 1 && !dx_fallback && - EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) - return make_indexed_dir(handle, dentry, inode, bh); + EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { + retval = make_indexed_dir(handle, dentry, inode, bh); + if (retval == -ENOSPC) + brelse(bh); + return retval; + } brelse(bh); } bh = ext4_append(handle, dir, &block, &retval); @@ -1528,7 +1532,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, de = (struct ext4_dir_entry_2 *) bh->b_data; de->inode = 0; de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); - return add_dirent_to_buf(handle, dentry, inode, de, bh); + retval = add_dirent_to_buf(handle, dentry, inode, de, bh); + if (retval == -ENOSPC) + brelse(bh); + return retval; } /* @@ -1657,7 +1664,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (!de) goto cleanup; err = add_dirent_to_buf(handle, dentry, inode, de, bh); - bh = NULL; + if (err != -ENOSPC) + bh = NULL; goto cleanup; journal_error: @@ -2310,7 +2318,7 @@ static int ext4_link(struct dentry *old_dentry, struct inode *inode = old_dentry->d_inode; int err, retries = 0; - if (EXT4_DIR_LINK_MAX(inode)) + if (inode->i_nlink >= EXT4_LINK_MAX) return -EMLINK; /* @@ -2413,7 +2421,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, goto end_rename; retval = -EMLINK; if (!new_inode && new_dir != old_dir && - new_dir->i_nlink >= EXT4_LINK_MAX) + EXT4_DIR_LINK_MAX(new_dir)) goto end_rename; } if (!new_bh) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8f4f079e6b9a..4037fe0b5a5c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -344,7 +344,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno, errstr = "Out of memory"; break; case -EROFS: - if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT) + if (!sb || (EXT4_SB(sb)->s_journal && + EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)) errstr = "Journal has aborted"; else errstr = "Readonly filesystem"; @@ -2253,6 +2254,49 @@ static struct kobj_type ext4_ktype = { .release = ext4_sb_release, }; +/* + * Check whether this filesystem can be mounted based on + * the features present and the RDONLY/RDWR mount requested. + * Returns 1 if this filesystem can be mounted as requested, + * 0 if it cannot be. + */ +static int ext4_feature_set_ok(struct super_block *sb, int readonly) +{ + if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP)) { + ext4_msg(sb, KERN_ERR, + "Couldn't mount because of " + "unsupported optional features (%x)", + (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & + ~EXT4_FEATURE_INCOMPAT_SUPP)); + return 0; + } + + if (readonly) + return 1; + + /* Check that feature set is OK for a read-write mount */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP)) { + ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of " + "unsupported optional features (%x)", + (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & + ~EXT4_FEATURE_RO_COMPAT_SUPP)); + return 0; + } + /* + * Large file size enabled file system can only be mounted + * read-write on 32-bit systems if kernel is built with CONFIG_LBDAF + */ + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { + if (sizeof(blkcnt_t) < sizeof(u64)) { + ext4_msg(sb, KERN_ERR, "Filesystem with huge files " + "cannot be mounted RDWR without " + "CONFIG_LBDAF"); + return 0; + } + } + return 1; +} + static int ext4_fill_super(struct super_block *sb, void *data, int silent) __releases(kernel_lock) __acquires(kernel_lock) @@ -2274,7 +2318,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) unsigned int db_count; unsigned int i; int needs_recovery, has_huge_files; - int features; __u64 blocks_count; int err; unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; @@ -2401,39 +2444,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) * previously didn't change the revision level when setting the flags, * so there is a chance incompat flags are set on a rev 0 filesystem. */ - features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); - if (features) { - ext4_msg(sb, KERN_ERR, - "Couldn't mount because of " - "unsupported optional features (%x)", - (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & - ~EXT4_FEATURE_INCOMPAT_SUPP)); + if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) goto failed_mount; - } - features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); - if (!(sb->s_flags & MS_RDONLY) && features) { - ext4_msg(sb, KERN_ERR, - "Couldn't mount RDWR because of " - "unsupported optional features (%x)", - (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & - ~EXT4_FEATURE_RO_COMPAT_SUPP)); - goto failed_mount; - } - has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, - EXT4_FEATURE_RO_COMPAT_HUGE_FILE); - if (has_huge_files) { - /* - * Large file size enabled file system can only be - * mount if kernel is build with CONFIG_LBDAF - */ - if (sizeof(root->i_blocks) < sizeof(u64) && - !(sb->s_flags & MS_RDONLY)) { - ext4_msg(sb, KERN_ERR, "Filesystem with huge " - "files cannot be mounted read-write " - "without CONFIG_LBDAF"); - goto failed_mount; - } - } + blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); if (blocksize < EXT4_MIN_BLOCK_SIZE || @@ -2469,6 +2482,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } + has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_HUGE_FILE); sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, has_huge_files); sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); @@ -2549,12 +2564,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } - if (ext4_blocks_count(es) > - (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { + /* + * Test whether we have more sectors than will fit in sector_t, + * and whether the max offset is addressable by the page cache. + */ + if ((ext4_blocks_count(es) > + (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || + (ext4_blocks_count(es) > + (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { ext4_msg(sb, KERN_ERR, "filesystem" - " too large to mount safely"); + " too large to mount safely on this system"); if (sizeof(sector_t) < 8) ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); + ret = -EFBIG; goto failed_mount; } @@ -3477,18 +3499,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (sbi->s_journal) ext4_mark_recovery_complete(sb, es); } else { - int ret; - if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, - ~EXT4_FEATURE_RO_COMPAT_SUPP))) { - ext4_msg(sb, KERN_WARNING, "couldn't " - "remount RDWR because of unsupported " - "optional features (%x)", - (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & - ~EXT4_FEATURE_RO_COMPAT_SUPP)); + /* Make sure we can mount this feature set readwrite */ + if (!ext4_feature_set_ok(sb, 0)) { err = -EROFS; goto restore_opts; } - /* * Make sure the group descriptor checksums * are sane. If they aren't, refuse to remount r/w. diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7b4088b2364d..a7fe81df5695 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -132,9 +132,7 @@ static int journal_submit_commit_record(journal_t *journal, set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - if (journal->j_flags & JBD2_BARRIER && - !JBD2_HAS_INCOMPAT_FEATURE(journal, - JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { + if (journal->j_flags & JBD2_BARRIER) { set_buffer_ordered(bh); barrier_done = 1; } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index e378cb383979..a8a358bc0f21 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1187,6 +1187,12 @@ static int journal_reset(journal_t *journal) first = be32_to_cpu(sb->s_first); last = be32_to_cpu(sb->s_maxlen); + if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { + printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n", + first, last); + journal_fail_superblock(journal); + return -EINVAL; + } journal->j_first = first; journal->j_last = last; diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 6213ac728f30..a0512700542f 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -57,7 +57,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) INIT_LIST_HEAD(&transaction->t_private_list); /* Set up the commit timer for the new transaction. */ - journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); + journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires); add_timer(&journal->j_commit_timer); J_ASSERT(journal->j_running_transaction == NULL); @@ -238,6 +238,8 @@ repeat_locked: __jbd2_log_space_left(journal)); spin_unlock(&transaction->t_handle_lock); spin_unlock(&journal->j_state_lock); + + lock_map_acquire(&handle->h_lockdep_map); out: if (unlikely(new_transaction)) /* It's usually NULL */ kfree(new_transaction); @@ -303,8 +305,6 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) handle = ERR_PTR(err); goto out; } - - lock_map_acquire(&handle->h_lockdep_map); out: return handle; } @@ -426,6 +426,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks) __jbd2_log_start_commit(journal, transaction->t_tid); spin_unlock(&journal->j_state_lock); + lock_map_release(&handle->h_lockdep_map); handle->h_buffer_credits = nblocks; ret = start_this_handle(journal, handle); return ret; |