From a2df2a63407803a833f82e1fa6693826c8c9d584 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Tue, 17 Jul 2007 21:42:41 -0400 Subject: fallocate support in ext4 This patch implements ->fallocate() inode operation in ext4. With this patch users of ext4 file systems will be able to use fallocate() system call for persistent preallocation. Current implementation only supports preallocation for regular files (directories not supported as of date) with extent maps. This patch does not support block-mapped files currently. Only FALLOC_ALLOCATE and FALLOC_RESV_SPACE modes are being supported as of now. Signed-off-by: Amit Arora --- fs/ext4/extents.c | 249 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 208 insertions(+), 41 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b9ce24129070..ba25832a756c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -282,7 +283,7 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) } else if (path->p_ext) { ext_debug(" %d:%d:%llu ", le32_to_cpu(path->p_ext->ee_block), - le16_to_cpu(path->p_ext->ee_len), + ext4_ext_get_actual_len(path->p_ext), ext_pblock(path->p_ext)); } else ext_debug(" []"); @@ -305,7 +306,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block), - le16_to_cpu(ex->ee_len), ext_pblock(ex)); + ext4_ext_get_actual_len(ex), ext_pblock(ex)); } ext_debug("\n"); } @@ -425,7 +426,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) ext_debug(" -> %d:%llu:%d ", le32_to_cpu(path->p_ext->ee_block), ext_pblock(path->p_ext), - le16_to_cpu(path->p_ext->ee_len)); + ext4_ext_get_actual_len(path->p_ext)); #ifdef CHECK_BINSEARCH { @@ -686,7 +687,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, ext_debug("move %d:%llu:%d in new leaf %llu\n", le32_to_cpu(path[depth].p_ext->ee_block), ext_pblock(path[depth].p_ext), - le16_to_cpu(path[depth].p_ext->ee_len), + ext4_ext_get_actual_len(path[depth].p_ext), newblock); /*memmove(ex++, path[depth].p_ext++, sizeof(struct ext4_extent)); @@ -1106,7 +1107,19 @@ static int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { - if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len) != + unsigned short ext1_ee_len, ext2_ee_len; + + /* + * Make sure that either both extents are uninitialized, or + * both are _not_. + */ + if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) + return 0; + + ext1_ee_len = ext4_ext_get_actual_len(ex1); + ext2_ee_len = ext4_ext_get_actual_len(ex2); + + if (le32_to_cpu(ex1->ee_block) + ext1_ee_len != le32_to_cpu(ex2->ee_block)) return 0; @@ -1115,14 +1128,14 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, * as an RO_COMPAT feature, refuse to merge to extents if * this can result in the top bit of ee_len being set. */ - if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN) + if (ext1_ee_len + ext2_ee_len > EXT_MAX_LEN) return 0; #ifdef AGGRESSIVE_TEST if (le16_to_cpu(ex1->ee_len) >= 4) return 0; #endif - if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2)) + if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2)) return 1; return 0; } @@ -1144,7 +1157,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode, unsigned int ret = 0; b1 = le32_to_cpu(newext->ee_block); - len1 = le16_to_cpu(newext->ee_len); + len1 = ext4_ext_get_actual_len(newext); depth = ext_depth(inode); if (!path[depth].p_ext) goto out; @@ -1191,8 +1204,9 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_extent *nearex; /* nearest extent */ struct ext4_ext_path *npath = NULL; int depth, len, err, next; + unsigned uninitialized = 0; - BUG_ON(newext->ee_len == 0); + BUG_ON(ext4_ext_get_actual_len(newext) == 0); depth = ext_depth(inode); ex = path[depth].p_ext; BUG_ON(path[depth].p_hdr == NULL); @@ -1200,14 +1214,24 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, /* try to insert block into found extent and return */ if (ex && ext4_can_extents_be_merged(inode, ex, newext)) { ext_debug("append %d block to %d:%d (from %llu)\n", - le16_to_cpu(newext->ee_len), + ext4_ext_get_actual_len(newext), le32_to_cpu(ex->ee_block), - le16_to_cpu(ex->ee_len), ext_pblock(ex)); + ext4_ext_get_actual_len(ex), ext_pblock(ex)); err = ext4_ext_get_access(handle, inode, path + depth); if (err) return err; - ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len) - + le16_to_cpu(newext->ee_len)); + + /* + * ext4_can_extents_be_merged should have checked that either + * both extents are uninitialized, or both aren't. Thus we + * need to check only one of them here. + */ + if (ext4_ext_is_uninitialized(ex)) + uninitialized = 1; + ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + + ext4_ext_get_actual_len(newext)); + if (uninitialized) + ext4_ext_mark_uninitialized(ex); eh = path[depth].p_hdr; nearex = ex; goto merge; @@ -1263,7 +1287,7 @@ has_space: ext_debug("first extent in the leaf: %d:%llu:%d\n", le32_to_cpu(newext->ee_block), ext_pblock(newext), - le16_to_cpu(newext->ee_len)); + ext4_ext_get_actual_len(newext)); path[depth].p_ext = EXT_FIRST_EXTENT(eh); } else if (le32_to_cpu(newext->ee_block) > le32_to_cpu(nearex->ee_block)) { @@ -1276,7 +1300,7 @@ has_space: "move %d from 0x%p to 0x%p\n", le32_to_cpu(newext->ee_block), ext_pblock(newext), - le16_to_cpu(newext->ee_len), + ext4_ext_get_actual_len(newext), nearex, len, nearex + 1, nearex + 2); memmove(nearex + 2, nearex + 1, len); } @@ -1289,7 +1313,7 @@ has_space: "move %d from 0x%p to 0x%p\n", le32_to_cpu(newext->ee_block), ext_pblock(newext), - le16_to_cpu(newext->ee_len), + ext4_ext_get_actual_len(newext), nearex, len, nearex + 1, nearex + 2); memmove(nearex + 1, nearex, len); path[depth].p_ext = nearex; @@ -1308,8 +1332,13 @@ merge: if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1)) break; /* merge with next extent! */ - nearex->ee_len = cpu_to_le16(le16_to_cpu(nearex->ee_len) - + le16_to_cpu(nearex[1].ee_len)); + if (ext4_ext_is_uninitialized(nearex)) + uninitialized = 1; + nearex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(nearex) + + ext4_ext_get_actual_len(nearex + 1)); + if (uninitialized) + ext4_ext_mark_uninitialized(nearex); + if (nearex + 1 < EXT_LAST_EXTENT(eh)) { len = (EXT_LAST_EXTENT(eh) - nearex - 1) * sizeof(struct ext4_extent); @@ -1379,8 +1408,8 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block, end = le32_to_cpu(ex->ee_block); if (block + num < end) end = block + num; - } else if (block >= - le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)) { + } else if (block >= le32_to_cpu(ex->ee_block) + + ext4_ext_get_actual_len(ex)) { /* need to allocate space after found extent */ start = block; end = block + num; @@ -1392,7 +1421,8 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block, * by found extent */ start = block; - end = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len); + end = le32_to_cpu(ex->ee_block) + + ext4_ext_get_actual_len(ex); if (block + num < end) end = block + num; exists = 1; @@ -1408,7 +1438,7 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block, cbex.ec_type = EXT4_EXT_CACHE_GAP; } else { cbex.ec_block = le32_to_cpu(ex->ee_block); - cbex.ec_len = le16_to_cpu(ex->ee_len); + cbex.ec_len = ext4_ext_get_actual_len(ex); cbex.ec_start = ext_pblock(ex); cbex.ec_type = EXT4_EXT_CACHE_EXTENT; } @@ -1481,15 +1511,15 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, ext_debug("cache gap(before): %lu [%lu:%lu]", (unsigned long) block, (unsigned long) le32_to_cpu(ex->ee_block), - (unsigned long) le16_to_cpu(ex->ee_len)); + (unsigned long) ext4_ext_get_actual_len(ex)); } else if (block >= le32_to_cpu(ex->ee_block) - + le16_to_cpu(ex->ee_len)) { + + ext4_ext_get_actual_len(ex)) { lblock = le32_to_cpu(ex->ee_block) - + le16_to_cpu(ex->ee_len); + + ext4_ext_get_actual_len(ex); len = ext4_ext_next_allocated_block(path); ext_debug("cache gap(after): [%lu:%lu] %lu", (unsigned long) le32_to_cpu(ex->ee_block), - (unsigned long) le16_to_cpu(ex->ee_len), + (unsigned long) ext4_ext_get_actual_len(ex), (unsigned long) block); BUG_ON(len == lblock); len = len - lblock; @@ -1619,12 +1649,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, unsigned long from, unsigned long to) { struct buffer_head *bh; + unsigned short ee_len = ext4_ext_get_actual_len(ex); int i; #ifdef EXTENTS_STATS { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - unsigned short ee_len = le16_to_cpu(ex->ee_len); spin_lock(&sbi->s_ext_stats_lock); sbi->s_ext_blocks += ee_len; sbi->s_ext_extents++; @@ -1638,12 +1668,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, } #endif if (from >= le32_to_cpu(ex->ee_block) - && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { + && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { /* tail removal */ unsigned long num; ext4_fsblk_t start; - num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from; - start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num; + num = le32_to_cpu(ex->ee_block) + ee_len - from; + start = ext_pblock(ex) + ee_len - num; ext_debug("free last %lu blocks starting %llu\n", num, start); for (i = 0; i < num; i++) { bh = sb_find_get_block(inode->i_sb, start + i); @@ -1651,12 +1681,12 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, } ext4_free_blocks(handle, inode, start, num); } else if (from == le32_to_cpu(ex->ee_block) - && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) { + && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { printk("strange request: removal %lu-%lu from %u:%u\n", - from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); + from, to, le32_to_cpu(ex->ee_block), ee_len); } else { printk("strange request: removal(2) %lu-%lu from %u:%u\n", - from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len)); + from, to, le32_to_cpu(ex->ee_block), ee_len); } return 0; } @@ -1671,6 +1701,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, unsigned a, b, block, num; unsigned long ex_ee_block; unsigned short ex_ee_len; + unsigned uninitialized = 0; struct ext4_extent *ex; ext_debug("truncate since %lu in leaf\n", start); @@ -1685,7 +1716,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ex = EXT_LAST_EXTENT(eh); ex_ee_block = le32_to_cpu(ex->ee_block); - ex_ee_len = le16_to_cpu(ex->ee_len); + if (ext4_ext_is_uninitialized(ex)) + uninitialized = 1; + ex_ee_len = ext4_ext_get_actual_len(ex); while (ex >= EXT_FIRST_EXTENT(eh) && ex_ee_block + ex_ee_len > start) { @@ -1753,6 +1786,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ex->ee_block = cpu_to_le32(block); ex->ee_len = cpu_to_le16(num); + if (uninitialized) + ext4_ext_mark_uninitialized(ex); err = ext4_ext_dirty(handle, inode, path + depth); if (err) @@ -1762,7 +1797,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ext_pblock(ex)); ex--; ex_ee_block = le32_to_cpu(ex->ee_block); - ex_ee_len = le16_to_cpu(ex->ee_len); + ex_ee_len = ext4_ext_get_actual_len(ex); } if (correct_index && eh->eh_entries) @@ -2038,7 +2073,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (ex) { unsigned long ee_block = le32_to_cpu(ex->ee_block); ext4_fsblk_t ee_start = ext_pblock(ex); - unsigned short ee_len = le16_to_cpu(ex->ee_len); + unsigned short ee_len; /* * Allow future support for preallocated extents to be added @@ -2046,8 +2081,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * Uninitialized extents are treated as holes, except that * we avoid (fail) allocating new blocks during a write. */ - if (ee_len > EXT_MAX_LEN) + if (le16_to_cpu(ex->ee_len) > EXT_MAX_LEN) goto out2; + ee_len = ext4_ext_get_actual_len(ex); /* if found extent covers block, simply return it */ if (iblock >= ee_block && iblock < ee_block + ee_len) { newblock = iblock - ee_block + ee_start; @@ -2055,8 +2091,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, allocated = ee_len - (iblock - ee_block); ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, ee_block, ee_len, newblock); - ext4_ext_put_in_cache(inode, ee_block, ee_len, - ee_start, EXT4_EXT_CACHE_EXTENT); + /* Do not put uninitialized extent in the cache */ + if (!ext4_ext_is_uninitialized(ex)) + ext4_ext_put_in_cache(inode, ee_block, + ee_len, ee_start, + EXT4_EXT_CACHE_EXTENT); goto out; } } @@ -2098,6 +2137,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* try to insert new extent into found leaf and return */ ext4_ext_store_pblock(&newex, newblock); newex.ee_len = cpu_to_le16(allocated); + if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ + ext4_ext_mark_uninitialized(&newex); err = ext4_ext_insert_extent(handle, inode, path, &newex); if (err) { /* free data blocks we just allocated */ @@ -2113,8 +2154,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, newblock = ext_pblock(&newex); __set_bit(BH_New, &bh_result->b_state); - ext4_ext_put_in_cache(inode, iblock, allocated, newblock, - EXT4_EXT_CACHE_EXTENT); + /* Cache only when it is _not_ an uninitialized extent */ + if (create != EXT4_CREATE_UNINITIALIZED_EXT) + ext4_ext_put_in_cache(inode, iblock, allocated, newblock, + EXT4_EXT_CACHE_EXTENT); out: if (allocated > max_blocks) allocated = max_blocks; @@ -2217,3 +2260,127 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) return needed; } + +/* + * preallocate space for a file. This implements ext4's fallocate inode + * operation, which gets called from sys_fallocate system call. + * For block-mapped files, posix_fallocate should fall back to the method + * of writing zeroes to the required new blocks (the same behavior which is + * expected for file systems which do not support fallocate() system call). + */ +long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) +{ + handle_t *handle; + ext4_fsblk_t block, max_blocks; + ext4_fsblk_t nblocks = 0; + int ret = 0; + int ret2 = 0; + int retries = 0; + struct buffer_head map_bh; + unsigned int credits, blkbits = inode->i_blkbits; + + /* + * currently supporting (pre)allocate mode for extent-based + * files _only_ + */ + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + return -EOPNOTSUPP; + + /* preallocation to directories is currently not supported */ + if (S_ISDIR(inode->i_mode)) + return -ENODEV; + + block = offset >> blkbits; + max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) + - block; + + /* + * credits to insert 1 extent into extent tree + buffers to be able to + * modify 1 super block, 1 block bitmap and 1 group descriptor. + */ + credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; +retry: + while (ret >= 0 && ret < max_blocks) { + block = block + ret; + max_blocks = max_blocks - ret; + handle = ext4_journal_start(inode, credits); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + break; + } + + ret = ext4_ext_get_blocks(handle, inode, block, + max_blocks, &map_bh, + EXT4_CREATE_UNINITIALIZED_EXT, 0); + WARN_ON(!ret); + if (!ret) { + ext4_error(inode->i_sb, "ext4_fallocate", + "ext4_ext_get_blocks returned 0! inode#%lu" + ", block=%llu, max_blocks=%llu", + inode->i_ino, block, max_blocks); + ret = -EIO; + ext4_mark_inode_dirty(handle, inode); + ret2 = ext4_journal_stop(handle); + break; + } + if (ret > 0) { + /* check wrap through sign-bit/zero here */ + if ((block + ret) < 0 || (block + ret) < block) { + ret = -EIO; + ext4_mark_inode_dirty(handle, inode); + ret2 = ext4_journal_stop(handle); + break; + } + if (buffer_new(&map_bh) && ((block + ret) > + (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits) + >> blkbits))) + nblocks = nblocks + ret; + } + + /* Update ctime if new blocks get allocated */ + if (nblocks) { + struct timespec now; + + now = current_fs_time(inode->i_sb); + if (!timespec_equal(&inode->i_ctime, &now)) + inode->i_ctime = now; + } + + ext4_mark_inode_dirty(handle, inode); + ret2 = ext4_journal_stop(handle); + if (ret2) + break; + } + + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + + /* + * Time to update the file size. + * Update only when preallocation was requested beyond the file size. + */ + if (!(mode & FALLOC_FL_KEEP_SIZE) && + (offset + len) > i_size_read(inode)) { + if (ret > 0) { + /* + * if no error, we assume preallocation succeeded + * completely + */ + mutex_lock(&inode->i_mutex); + i_size_write(inode, offset + len); + EXT4_I(inode)->i_disksize = i_size_read(inode); + mutex_unlock(&inode->i_mutex); + } else if (ret < 0 && nblocks) { + /* Handle partial allocation scenario */ + loff_t newsize; + + mutex_lock(&inode->i_mutex); + newsize = (nblocks << blkbits) + i_size_read(inode); + i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits)); + EXT4_I(inode)->i_disksize = i_size_read(inode); + mutex_unlock(&inode->i_mutex); + } + } + + return ret > 0 ? ret2 : ret; +} -- cgit v1.2.3 From 56055d3ae4cc7fa6d2b10885f20269de8a989ed7 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Tue, 17 Jul 2007 21:42:38 -0400 Subject: write support for preallocated blocks This patch adds write support to the uninitialized extents that get created when a preallocation is done using fallocate(). It takes care of splitting the extents into multiple (upto three) extents and merging the new split extents with neighbouring ones, if possible. Signed-off-by: Amit Arora --- fs/ext4/extents.c | 254 +++++++++++++++++++++++++++++++++++----- include/linux/ext4_fs_extents.h | 3 + 2 files changed, 225 insertions(+), 32 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ba25832a756c..ded3d469f978 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1140,6 +1140,53 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, return 0; } +/* + * This function tries to merge the "ex" extent to the next extent in the tree. + * It always tries to merge towards right. If you want to merge towards + * left, pass "ex - 1" as argument instead of "ex". + * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns + * 1 if they got merged. + */ +int ext4_ext_try_to_merge(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *ex) +{ + struct ext4_extent_header *eh; + unsigned int depth, len; + int merge_done = 0; + int uninitialized = 0; + + depth = ext_depth(inode); + BUG_ON(path[depth].p_hdr == NULL); + eh = path[depth].p_hdr; + + while (ex < EXT_LAST_EXTENT(eh)) { + if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) + break; + /* merge with next extent! */ + if (ext4_ext_is_uninitialized(ex)) + uninitialized = 1; + ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) + + ext4_ext_get_actual_len(ex + 1)); + if (uninitialized) + ext4_ext_mark_uninitialized(ex); + + if (ex + 1 < EXT_LAST_EXTENT(eh)) { + len = (EXT_LAST_EXTENT(eh) - ex - 1) + * sizeof(struct ext4_extent); + memmove(ex + 1, ex + 2, len); + } + eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1); + merge_done = 1; + WARN_ON(eh->eh_entries == 0); + if (!eh->eh_entries) + ext4_error(inode->i_sb, "ext4_ext_try_to_merge", + "inode#%lu, eh->eh_entries = 0!", inode->i_ino); + } + + return merge_done; +} + /* * check if a portion of the "newext" extent overlaps with an * existing extent. @@ -1328,25 +1375,7 @@ has_space: merge: /* try to merge extents to the right */ - while (nearex < EXT_LAST_EXTENT(eh)) { - if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1)) - break; - /* merge with next extent! */ - if (ext4_ext_is_uninitialized(nearex)) - uninitialized = 1; - nearex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(nearex) - + ext4_ext_get_actual_len(nearex + 1)); - if (uninitialized) - ext4_ext_mark_uninitialized(nearex); - - if (nearex + 1 < EXT_LAST_EXTENT(eh)) { - len = (EXT_LAST_EXTENT(eh) - nearex - 1) - * sizeof(struct ext4_extent); - memmove(nearex + 1, nearex + 2, len); - } - eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1); - BUG_ON(eh->eh_entries == 0); - } + ext4_ext_try_to_merge(inode, path, nearex); /* try to merge extents to the left */ @@ -2012,15 +2041,158 @@ void ext4_ext_release(struct super_block *sb) #endif } +/* + * This function is called by ext4_ext_get_blocks() if someone tries to write + * to an uninitialized extent. It may result in splitting the uninitialized + * extent into multiple extents (upto three - one initialized and two + * uninitialized). + * There are three possibilities: + * a> There is no split required: Entire extent should be initialized + * b> Splits in two extents: Write is happening at either end of the extent + * c> Splits in three extents: Somone is writing in middle of the extent + */ +int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, + struct ext4_ext_path *path, + ext4_fsblk_t iblock, + unsigned long max_blocks) +{ + struct ext4_extent *ex, newex; + struct ext4_extent *ex1 = NULL; + struct ext4_extent *ex2 = NULL; + struct ext4_extent *ex3 = NULL; + struct ext4_extent_header *eh; + unsigned int allocated, ee_block, ee_len, depth; + ext4_fsblk_t newblock; + int err = 0; + int ret = 0; + + depth = ext_depth(inode); + eh = path[depth].p_hdr; + ex = path[depth].p_ext; + ee_block = le32_to_cpu(ex->ee_block); + ee_len = ext4_ext_get_actual_len(ex); + allocated = ee_len - (iblock - ee_block); + newblock = iblock - ee_block + ext_pblock(ex); + ex2 = ex; + + /* ex1: ee_block to iblock - 1 : uninitialized */ + if (iblock > ee_block) { + ex1 = ex; + ex1->ee_len = cpu_to_le16(iblock - ee_block); + ext4_ext_mark_uninitialized(ex1); + ex2 = &newex; + } + /* + * for sanity, update the length of the ex2 extent before + * we insert ex3, if ex1 is NULL. This is to avoid temporary + * overlap of blocks. + */ + if (!ex1 && allocated > max_blocks) + ex2->ee_len = cpu_to_le16(max_blocks); + /* ex3: to ee_block + ee_len : uninitialised */ + if (allocated > max_blocks) { + unsigned int newdepth; + ex3 = &newex; + ex3->ee_block = cpu_to_le32(iblock + max_blocks); + ext4_ext_store_pblock(ex3, newblock + max_blocks); + ex3->ee_len = cpu_to_le16(allocated - max_blocks); + ext4_ext_mark_uninitialized(ex3); + err = ext4_ext_insert_extent(handle, inode, path, ex3); + if (err) + goto out; + /* + * The depth, and hence eh & ex might change + * as part of the insert above. + */ + newdepth = ext_depth(inode); + if (newdepth != depth) { + depth = newdepth; + path = ext4_ext_find_extent(inode, iblock, NULL); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; + goto out; + } + eh = path[depth].p_hdr; + ex = path[depth].p_ext; + if (ex2 != &newex) + ex2 = ex; + } + allocated = max_blocks; + } + /* + * If there was a change of depth as part of the + * insertion of ex3 above, we need to update the length + * of the ex1 extent again here + */ + if (ex1 && ex1 != ex) { + ex1 = ex; + ex1->ee_len = cpu_to_le16(iblock - ee_block); + ext4_ext_mark_uninitialized(ex1); + ex2 = &newex; + } + /* ex2: iblock to iblock + maxblocks-1 : initialised */ + ex2->ee_block = cpu_to_le32(iblock); + ex2->ee_start = cpu_to_le32(newblock); + ext4_ext_store_pblock(ex2, newblock); + ex2->ee_len = cpu_to_le16(allocated); + if (ex2 != ex) + goto insert; + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; + /* + * New (initialized) extent starts from the first block + * in the current extent. i.e., ex2 == ex + * We have to see if it can be merged with the extent + * on the left. + */ + if (ex2 > EXT_FIRST_EXTENT(eh)) { + /* + * To merge left, pass "ex2 - 1" to try_to_merge(), + * since it merges towards right _only_. + */ + ret = ext4_ext_try_to_merge(inode, path, ex2 - 1); + if (ret) { + err = ext4_ext_correct_indexes(handle, inode, path); + if (err) + goto out; + depth = ext_depth(inode); + ex2--; + } + } + /* + * Try to Merge towards right. This might be required + * only when the whole extent is being written to. + * i.e. ex2 == ex and ex3 == NULL. + */ + if (!ex3) { + ret = ext4_ext_try_to_merge(inode, path, ex2); + if (ret) { + err = ext4_ext_correct_indexes(handle, inode, path); + if (err) + goto out; + } + } + /* Mark modified extent as dirty */ + err = ext4_ext_dirty(handle, inode, path + depth); + goto out; +insert: + err = ext4_ext_insert_extent(handle, inode, path, &newex); +out: + return err ? err : allocated; +} + int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t iblock, unsigned long max_blocks, struct buffer_head *bh_result, int create, int extend_disksize) { struct ext4_ext_path *path = NULL; + struct ext4_extent_header *eh; struct ext4_extent newex, *ex; ext4_fsblk_t goal, newblock; - int err = 0, depth; + int err = 0, depth, ret; unsigned long allocated = 0; __clear_bit(BH_New, &bh_result->b_state); @@ -2033,8 +2205,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (goal) { if (goal == EXT4_EXT_CACHE_GAP) { if (!create) { - /* block isn't allocated yet and - * user doesn't want to allocate it */ + /* + * block isn't allocated yet and + * user doesn't want to allocate it + */ goto out2; } /* we should allocate requested block */ @@ -2068,6 +2242,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * this is why assert can't be put in ext4_ext_find_extent() */ BUG_ON(path[depth].p_ext == NULL && depth != 0); + eh = path[depth].p_hdr; ex = path[depth].p_ext; if (ex) { @@ -2076,13 +2251,9 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, unsigned short ee_len; /* - * Allow future support for preallocated extents to be added - * as an RO_COMPAT feature: * Uninitialized extents are treated as holes, except that - * we avoid (fail) allocating new blocks during a write. + * we split out initialized portions during a write. */ - if (le16_to_cpu(ex->ee_len) > EXT_MAX_LEN) - goto out2; ee_len = ext4_ext_get_actual_len(ex); /* if found extent covers block, simply return it */ if (iblock >= ee_block && iblock < ee_block + ee_len) { @@ -2091,12 +2262,27 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, allocated = ee_len - (iblock - ee_block); ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, ee_block, ee_len, newblock); + /* Do not put uninitialized extent in the cache */ - if (!ext4_ext_is_uninitialized(ex)) + if (!ext4_ext_is_uninitialized(ex)) { ext4_ext_put_in_cache(inode, ee_block, ee_len, ee_start, EXT4_EXT_CACHE_EXTENT); - goto out; + goto out; + } + if (create == EXT4_CREATE_UNINITIALIZED_EXT) + goto out; + if (!create) + goto out2; + + ret = ext4_ext_convert_to_initialized(handle, inode, + path, iblock, + max_blocks); + if (ret <= 0) + goto out2; + else + allocated = ret; + goto outnew; } } @@ -2105,8 +2291,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * we couldn't try to create block if create flag is zero */ if (!create) { - /* put just found gap into cache to speed up - * subsequent requests */ + /* + * put just found gap into cache to speed up + * subsequent requests + */ ext4_ext_put_gap_in_cache(inode, path, iblock); goto out2; } @@ -2152,6 +2340,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* previous routine could use block we allocated */ newblock = ext_pblock(&newex); +outnew: __set_bit(BH_New, &bh_result->b_state); /* Cache only when it is _not_ an uninitialized extent */ @@ -2221,7 +2410,8 @@ void ext4_ext_truncate(struct inode * inode, struct page *page) err = ext4_ext_remove_space(inode, last_block); /* In a multi-transaction truncate, we only make the final - * transaction synchronous. */ + * transaction synchronous. + */ if (IS_SYNC(inode)) handle->h_sync = 1; diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index e3d5afc6f23e..edf49ec89eac 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -205,6 +205,9 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) extern int ext4_extent_tree_init(handle_t *, struct inode *); extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); +extern int ext4_ext_try_to_merge(struct inode *inode, + struct ext4_ext_path *path, + struct ext4_extent *); extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *); -- cgit v1.2.3 From 749269facaf87f6e516c3af12763e03181b9c139 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Wed, 18 Jul 2007 09:02:56 -0400 Subject: Change on-disk format to support 2^15 uninitialized extents This change was suggested by Andreas Dilger. This patch changes the EXT_MAX_LEN value and extent code which marks/checks uninitialized extents. With this change it will be possible to have initialized extents with 2^15 blocks (earlier the max blocks we could have was 2^15 - 1). This way we can have better extent-to-block alignment. Now, maximum number of blocks we can have in an initialized extent is 2^15 and in an uninitialized extent is 2^15 - 1. Signed-off-by: Amit Arora --- fs/ext4/extents.c | 28 +++++++++++++++++++++++++--- include/linux/ext4_fs_extents.h | 31 +++++++++++++++++++++++++++---- 2 files changed, 52 insertions(+), 7 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index ded3d469f978..77146b826a13 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1107,7 +1107,7 @@ static int ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, struct ext4_extent *ex2) { - unsigned short ext1_ee_len, ext2_ee_len; + unsigned short ext1_ee_len, ext2_ee_len, max_len; /* * Make sure that either both extents are uninitialized, or @@ -1116,6 +1116,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) return 0; + if (ext4_ext_is_uninitialized(ex1)) + max_len = EXT_UNINIT_MAX_LEN; + else + max_len = EXT_INIT_MAX_LEN; + ext1_ee_len = ext4_ext_get_actual_len(ex1); ext2_ee_len = ext4_ext_get_actual_len(ex2); @@ -1128,7 +1133,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, * as an RO_COMPAT feature, refuse to merge to extents if * this can result in the top bit of ee_len being set. */ - if (ext1_ee_len + ext2_ee_len > EXT_MAX_LEN) + if (ext1_ee_len + ext2_ee_len > max_len) return 0; #ifdef AGGRESSIVE_TEST if (le16_to_cpu(ex1->ee_len) >= 4) @@ -1815,7 +1820,11 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ex->ee_block = cpu_to_le32(block); ex->ee_len = cpu_to_le16(num); - if (uninitialized) + /* + * Do not mark uninitialized if all the blocks in the + * extent have been removed. + */ + if (uninitialized && num) ext4_ext_mark_uninitialized(ex); err = ext4_ext_dirty(handle, inode, path + depth); @@ -2308,6 +2317,19 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* allocate new block */ goal = ext4_ext_find_goal(inode, path, iblock); + /* + * See if request is beyond maximum number of blocks we can have in + * a single extent. For an initialized extent this limit is + * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is + * EXT_UNINIT_MAX_LEN. + */ + if (max_blocks > EXT_INIT_MAX_LEN && + create != EXT4_CREATE_UNINITIALIZED_EXT) + max_blocks = EXT_INIT_MAX_LEN; + else if (max_blocks > EXT_UNINIT_MAX_LEN && + create == EXT4_CREATE_UNINITIALIZED_EXT) + max_blocks = EXT_UNINIT_MAX_LEN; + /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ newex.ee_block = cpu_to_le32(iblock); newex.ee_len = cpu_to_le16(max_blocks); diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index edf49ec89eac..81406f3655d4 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -141,7 +141,25 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, #define EXT_MAX_BLOCK 0xffffffff -#define EXT_MAX_LEN ((1UL << 15) - 1) +/* + * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an + * initialized extent. This is 2^15 and not (2^16 - 1), since we use the + * MSB of ee_len field in the extent datastructure to signify if this + * particular extent is an initialized extent or an uninitialized (i.e. + * preallocated). + * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an + * uninitialized extent. + * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an + * uninitialized one. In other words, if MSB of ee_len is set, it is an + * uninitialized extent with only one special scenario when ee_len = 0x8000. + * In this case we can not have an uninitialized extent of zero length and + * thus we make it as a special case of initialized extent with 0x8000 length. + * This way we get better extent-to-group alignment for initialized extents. + * Hence, the maximum number of blocks we can have in an *initialized* + * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767). + */ +#define EXT_INIT_MAX_LEN (1UL << 15) +#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1) #define EXT_FIRST_EXTENT(__hdr__) \ @@ -190,17 +208,22 @@ ext4_ext_invalidate_cache(struct inode *inode) static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) { - ext->ee_len |= cpu_to_le16(0x8000); + /* We can not have an uninitialized extent of zero length! */ + BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0); + ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN); } static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext) { - return (int)(le16_to_cpu((ext)->ee_len) & 0x8000); + /* Extent with ee_len of 0x8000 is treated as an initialized extent */ + return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); } static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) { - return (int)(le16_to_cpu((ext)->ee_len) & 0x7FFF); + return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ? + le16_to_cpu(ext->ee_len) : + (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); } extern int ext4_extent_tree_init(handle_t *, struct inode *); -- cgit v1.2.3 From c29c0ae7f282828da3695167ed870131798348d9 Mon Sep 17 00:00:00 2001 From: Alex Tomas Date: Wed, 18 Jul 2007 09:19:09 -0400 Subject: ext4: Make extents code sanely handle on-disk corruption Add more run-time checking of extent header fields and remove BUG_ON checks so we don't panic the kernel just because the on-disk filesystem is corrupted. Signed-off-by: Alex Tomas Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 144 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 86 insertions(+), 58 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 77146b826a13..96264b2ef0a3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -92,36 +92,6 @@ static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); } -static int ext4_ext_check_header(const char *function, struct inode *inode, - struct ext4_extent_header *eh) -{ - const char *error_msg = NULL; - - if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) { - error_msg = "invalid magic"; - goto corrupted; - } - if (unlikely(eh->eh_max == 0)) { - error_msg = "invalid eh_max"; - goto corrupted; - } - if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) { - error_msg = "invalid eh_entries"; - goto corrupted; - } - return 0; - -corrupted: - ext4_error(inode->i_sb, function, - "bad header in inode #%lu: %s - magic %x, " - "entries %u, max %u, depth %u", - inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), - le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), - le16_to_cpu(eh->eh_depth)); - - return -EIO; -} - static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed) { int err; @@ -270,6 +240,70 @@ static int ext4_ext_space_root_idx(struct inode *inode) return size; } +static int +ext4_ext_max_entries(struct inode *inode, int depth) +{ + int max; + + if (depth == ext_depth(inode)) { + if (depth == 0) + max = ext4_ext_space_root(inode); + else + max = ext4_ext_space_root_idx(inode); + } else { + if (depth == 0) + max = ext4_ext_space_block(inode); + else + max = ext4_ext_space_block_idx(inode); + } + + return max; +} + +static int __ext4_ext_check_header(const char *function, struct inode *inode, + struct ext4_extent_header *eh, + int depth) +{ + const char *error_msg; + int max = 0; + + if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) { + error_msg = "invalid magic"; + goto corrupted; + } + if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) { + error_msg = "unexpected eh_depth"; + goto corrupted; + } + if (unlikely(eh->eh_max == 0)) { + error_msg = "invalid eh_max"; + goto corrupted; + } + max = ext4_ext_max_entries(inode, depth); + if (unlikely(le16_to_cpu(eh->eh_max) > max)) { + error_msg = "too large eh_max"; + goto corrupted; + } + if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) { + error_msg = "invalid eh_entries"; + goto corrupted; + } + return 0; + +corrupted: + ext4_error(inode->i_sb, function, + "bad header in inode #%lu: %s - magic %x, " + "entries %u, max %u(%u), depth %u(%u)", + inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), + le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max), + max, le16_to_cpu(eh->eh_depth), depth); + + return -EIO; +} + +#define ext4_ext_check_header(inode, eh, depth) \ + __ext4_ext_check_header(__FUNCTION__, inode, eh, depth) + #ifdef EXT_DEBUG static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path) { @@ -330,6 +364,7 @@ static void ext4_ext_drop_refs(struct ext4_ext_path *path) /* * ext4_ext_binsearch_idx: * binary search for the closest index of the given block + * the header must be checked before calling this */ static void ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block) @@ -337,9 +372,6 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc struct ext4_extent_header *eh = path->p_hdr; struct ext4_extent_idx *r, *l, *m; - BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC); - BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); - BUG_ON(le16_to_cpu(eh->eh_entries) <= 0); ext_debug("binsearch for %d(idx): ", block); @@ -389,6 +421,7 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc /* * ext4_ext_binsearch: * binary search for closest extent of the given block + * the header must be checked before calling this */ static void ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) @@ -396,9 +429,6 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) struct ext4_extent_header *eh = path->p_hdr; struct ext4_extent *r, *l, *m; - BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC); - BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); - if (eh->eh_entries == 0) { /* * this leaf is empty: @@ -469,11 +499,10 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path) short int depth, i, ppos = 0, alloc = 0; eh = ext_inode_hdr(inode); - BUG_ON(eh == NULL); - if (ext4_ext_check_header(__FUNCTION__, inode, eh)) + depth = ext_depth(inode); + if (ext4_ext_check_header(inode, eh, depth)) return ERR_PTR(-EIO); - i = depth = ext_depth(inode); /* account possible depth increase */ if (!path) { @@ -485,10 +514,12 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path) } path[0].p_hdr = eh; + i = depth; /* walk through the tree */ while (i) { ext_debug("depth %d: num %d, max %d\n", ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max)); + ext4_ext_binsearch_idx(inode, path + ppos, block); path[ppos].p_block = idx_pblock(path[ppos].p_idx); path[ppos].p_depth = i; @@ -505,7 +536,7 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path) path[ppos].p_hdr = eh; i--; - if (ext4_ext_check_header(__FUNCTION__, inode, eh)) + if (ext4_ext_check_header(inode, eh, i)) goto err; } @@ -514,9 +545,6 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path) path[ppos].p_ext = NULL; path[ppos].p_idx = NULL; - if (ext4_ext_check_header(__FUNCTION__, inode, eh)) - goto err; - /* find extent */ ext4_ext_binsearch(inode, path + ppos, block); @@ -1738,13 +1766,12 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, unsigned uninitialized = 0; struct ext4_extent *ex; + /* the header must be checked already in ext4_ext_remove_space() */ ext_debug("truncate since %lu in leaf\n", start); if (!path[depth].p_hdr) path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); eh = path[depth].p_hdr; BUG_ON(eh == NULL); - BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max)); - BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC); /* find where to start removing */ ex = EXT_LAST_EXTENT(eh); @@ -1898,7 +1925,7 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start) return -ENOMEM; } path[0].p_hdr = ext_inode_hdr(inode); - if (ext4_ext_check_header(__FUNCTION__, inode, path[0].p_hdr)) { + if (ext4_ext_check_header(inode, path[0].p_hdr, depth)) { err = -EIO; goto out; } @@ -1919,17 +1946,8 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start) if (!path[i].p_hdr) { ext_debug("initialize header\n"); path[i].p_hdr = ext_block_hdr(path[i].p_bh); - if (ext4_ext_check_header(__FUNCTION__, inode, - path[i].p_hdr)) { - err = -EIO; - goto out; - } } - BUG_ON(le16_to_cpu(path[i].p_hdr->eh_entries) - > le16_to_cpu(path[i].p_hdr->eh_max)); - BUG_ON(path[i].p_hdr->eh_magic != EXT4_EXT_MAGIC); - if (!path[i].p_idx) { /* this level hasn't been touched yet */ path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr); @@ -1946,17 +1964,27 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start) i, EXT_FIRST_INDEX(path[i].p_hdr), path[i].p_idx); if (ext4_ext_more_to_rm(path + i)) { + struct buffer_head *bh; /* go to the next level */ ext_debug("move to level %d (block %llu)\n", i + 1, idx_pblock(path[i].p_idx)); memset(path + i + 1, 0, sizeof(*path)); - path[i+1].p_bh = - sb_bread(sb, idx_pblock(path[i].p_idx)); - if (!path[i+1].p_bh) { + bh = sb_bread(sb, idx_pblock(path[i].p_idx)); + if (!bh) { /* should we reset i_size? */ err = -EIO; break; } + if (WARN_ON(i + 1 > depth)) { + err = -EIO; + break; + } + if (ext4_ext_check_header(inode, ext_block_hdr(bh), + depth - i - 1)) { + err = -EIO; + break; + } + path[i + 1].p_bh = bh; /* save actual number of indexes since this * number is changed at the next iteration */ -- cgit v1.2.3 From 26d535ed24f74ce949d7b49e40574c45cd845cdd Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 18 Jul 2007 08:33:37 -0400 Subject: Fix compilation with EXT_DEBUG, also fix leXX_to_cpu conversions. Signed-off-by: Dmitry Monakhov Acked-by: Alex Tomas Signed-off-by: Dave Kleikamp Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 96264b2ef0a3..11ce15d91acc 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -383,13 +383,14 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc r = m - 1; else l = m + 1; - ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ei_block, - m, m->ei_block, r, r->ei_block); + ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block), + m, le32_to_cpu(m->ei_block), + r, le32_to_cpu(r->ei_block)); } path->p_idx = l - 1; ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block), - idx_block(path->p_idx)); + idx_pblock(path->p_idx)); #ifdef CHECK_BINSEARCH { @@ -448,8 +449,9 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) r = m - 1; else l = m + 1; - ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block, - m, m->ee_block, r, r->ee_block); + ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block), + m, le32_to_cpu(m->ee_block), + r, le32_to_cpu(r->ee_block)); } path->p_ext = l - 1; @@ -582,7 +584,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { len = (len - 1) * sizeof(struct ext4_extent_idx); len = len < 0 ? 0 : len; - ext_debug("insert new index %d after: %d. " + ext_debug("insert new index %d after: %llu. " "move %d from 0x%p to 0x%p\n", logical, ptr, len, (curp->p_idx + 1), (curp->p_idx + 2)); @@ -593,7 +595,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode, /* insert before */ len = len * sizeof(struct ext4_extent_idx); len = len < 0 ? 0 : len; - ext_debug("insert new index %d before: %d. " + ext_debug("insert new index %d before: %llu. " "move %d from 0x%p to 0x%p\n", logical, ptr, len, curp->p_idx, (curp->p_idx + 1)); @@ -793,7 +795,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) != EXT_LAST_INDEX(path[i].p_hdr)); while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { - ext_debug("%d: move %d:%d in new index %llu\n", i, + ext_debug("%d: move %d:%llu in new index %llu\n", i, le32_to_cpu(path[i].p_idx->ei_block), idx_pblock(path[i].p_idx), newblock); -- cgit v1.2.3 From e9f410b1c035b6e63f0b4c3d6cfe4298d6a04492 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 18 Jul 2007 09:09:15 -0400 Subject: ext4: extent macros cleanup Use the EXT_LAST_INDEX macro; that's what it's there for. Clean up ext4_ext_ext_grow_indepth() so the correct EXT_FIRST_INDEX or EXT_FIRST_MACRO is used as necessary. The two macros are equivalent, so the C will collapse the if statement out, but it makes the code much more readable. Signed-off-by: Dmitry Monakhov Acked-by: Alex Tomas Signed-off-by: Dave Kleikamp Singed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 11ce15d91acc..750c46f7d893 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -376,7 +376,7 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc ext_debug("binsearch for %d(idx): ", block); l = EXT_FIRST_INDEX(eh) + 1; - r = EXT_FIRST_INDEX(eh) + le16_to_cpu(eh->eh_entries) - 1; + r = EXT_LAST_INDEX(eh); while (l <= r) { m = l + (r - l) / 2; if (block < le32_to_cpu(m->ei_block)) @@ -441,7 +441,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) ext_debug("binsearch for %d: ", block); l = EXT_FIRST_EXTENT(eh) + 1; - r = EXT_FIRST_EXTENT(eh) + le16_to_cpu(eh->eh_entries) - 1; + r = EXT_LAST_EXTENT(eh); while (l <= r) { m = l + (r - l) / 2; @@ -924,8 +924,13 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode)); curp->p_hdr->eh_entries = cpu_to_le16(1); curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); - /* FIXME: it works, but actually path[0] can be index */ - curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; + + if (path[0].p_hdr->eh_depth) + curp->p_idx->ei_block = + EXT_FIRST_INDEX(path[0].p_hdr)->ei_block; + else + curp->p_idx->ei_block = + EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; ext4_idx_store_pblock(curp->p_idx, newblock); neh = ext_inode_hdr(inode); -- cgit v1.2.3