From a2df2a63407803a833f82e1fa6693826c8c9d584 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Tue, 17 Jul 2007 21:42:41 -0400 Subject: fallocate support in ext4 This patch implements ->fallocate() inode operation in ext4. With this patch users of ext4 file systems will be able to use fallocate() system call for persistent preallocation. Current implementation only supports preallocation for regular files (directories not supported as of date) with extent maps. This patch does not support block-mapped files currently. Only FALLOC_ALLOCATE and FALLOC_RESV_SPACE modes are being supported as of now. Signed-off-by: Amit Arora --- include/linux/ext4_fs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux/ext4_fs.h') diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index de1f9f78625a..87c2d7a05b01 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -102,6 +102,7 @@ EXT4_GOOD_OLD_FIRST_INO : \ (s)->s_first_ino) #endif +#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits))) /* * Macro-instructions used to manage fragments @@ -225,6 +226,11 @@ struct ext4_new_group_data { __u32 free_blocks_count; }; +/* + * Following is used by preallocation code to tell get_blocks() that we + * want uninitialzed extents. + */ +#define EXT4_CREATE_UNINITIALIZED_EXT 2 /* * ioctl commands @@ -983,6 +989,8 @@ extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, extern void ext4_ext_truncate(struct inode *, struct page *); extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); +extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, + loff_t len); static inline int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, unsigned long max_blocks, struct buffer_head *bh, -- cgit v1.2.3 From ff9ddf7e847c4dc533f119efb6c77a6e57ab6397 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 18 Jul 2007 09:24:20 -0400 Subject: ext4: copy i_flags to inode flags on write Propagate flags such as S_APPEND, S_IMMUTABLE, etc. from i_flags into ext4-specific i_flags. Quota code changes these flags on quota files (to make it harder for sysadmin to screw himself) and these changes were not correctly propagated into the filesystem. (This is a forward port patch from ext3) Signed-off-by: Jan Kara Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 20 ++++++++++++++++++++ fs/ext4/ioctl.c | 1 + include/linux/ext4_fs.h | 1 + 3 files changed, 22 insertions(+) (limited to 'include/linux/ext4_fs.h') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8416fa28c422..49035c5a2c43 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2583,6 +2583,25 @@ void ext4_set_inode_flags(struct inode *inode) inode->i_flags |= S_DIRSYNC; } +/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ +void ext4_get_inode_flags(struct ext4_inode_info *ei) +{ + unsigned int flags = ei->vfs_inode.i_flags; + + ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL| + EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL); + if (flags & S_SYNC) + ei->i_flags |= EXT4_SYNC_FL; + if (flags & S_APPEND) + ei->i_flags |= EXT4_APPEND_FL; + if (flags & S_IMMUTABLE) + ei->i_flags |= EXT4_IMMUTABLE_FL; + if (flags & S_NOATIME) + ei->i_flags |= EXT4_NOATIME_FL; + if (flags & S_DIRSYNC) + ei->i_flags |= EXT4_DIRSYNC_FL; +} + void ext4_read_inode(struct inode * inode) { struct ext4_iloc iloc; @@ -2744,6 +2763,7 @@ static int ext4_do_update_inode(handle_t *handle, if (ei->i_state & EXT4_STATE_NEW) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); + ext4_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); if(!(test_opt(inode->i_sb, NO_UID32))) { raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7b4aa4543c83..9737432f079d 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -28,6 +28,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, switch (cmd) { case EXT4_IOC_GETFLAGS: + ext4_get_inode_flags(ei); flags = ei->i_flags & EXT4_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); case EXT4_IOC_SETFLAGS: { diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 87c2d7a05b01..33b2b1a2d790 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -868,6 +868,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern void ext4_truncate (struct inode *); extern void ext4_set_inode_flags(struct inode *); +extern void ext4_get_inode_flags(struct ext4_inode_info *); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_block_truncate_page(handle_t *handle, struct page *page, -- cgit v1.2.3 From e23291b9120c11aafb2ee76fb71a062eb3c1056c Mon Sep 17 00:00:00 2001 From: "Jose R. Santos" Date: Wed, 18 Jul 2007 08:57:06 -0400 Subject: jbd2: Fix CONFIG_JBD_DEBUG ifdef to be CONFIG_JBD2_DEBUG When the JBD code was forked to create the new JBD2 code base, the references to CONFIG_JBD_DEBUG where never changed to CONFIG_JBD2_DEBUG. This patch fixes that. Signed-off-by: Jose R. Santos Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 4 ++-- fs/ext4/ioctl.c | 4 ++-- fs/jbd2/journal.c | 14 +++++++------- fs/jbd2/recovery.c | 2 +- include/linux/ext4_fs.h | 4 ++-- include/linux/ext4_fs_sb.h | 2 +- include/linux/jbd2.h | 4 ++-- 7 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include/linux/ext4_fs.h') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 9de54ae48dee..e53b4af52f11 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -517,7 +517,7 @@ do_more: /* * An HJ special. This is expensive... */ -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG jbd_unlock_bh_state(bitmap_bh); { struct buffer_head *debug_bh; @@ -1597,7 +1597,7 @@ allocated: performed_allocation = 1; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG { struct buffer_head *debug_bh; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 9737432f079d..5b00775d5096 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -141,7 +141,7 @@ flags_err: ext4_journal_stop(handle); return err; } -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG case EXT4_IOC_WAIT_FOR_READONLY: /* * This is racy - by the time we're woken up and running, @@ -283,7 +283,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case EXT4_IOC32_SETVERSION_OLD: cmd = EXT4_IOC_SETVERSION_OLD; break; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG case EXT4_IOC32_WAIT_FOR_READONLY: cmd = EXT4_IOC_WAIT_FOR_READONLY; break; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 78d63b818f0b..8f530cc66d34 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -528,7 +528,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) { int err = 0; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG spin_lock(&journal->j_state_lock); if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_EMERG @@ -1709,7 +1709,7 @@ void jbd2_slab_free(void *ptr, size_t size) * Journal_head storage management */ static struct kmem_cache *jbd2_journal_head_cache; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG static atomic_t nr_journal_heads = ATOMIC_INIT(0); #endif @@ -1747,7 +1747,7 @@ static struct journal_head *journal_alloc_journal_head(void) struct journal_head *ret; static unsigned long last_warning; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG atomic_inc(&nr_journal_heads); #endif ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); @@ -1768,7 +1768,7 @@ static struct journal_head *journal_alloc_journal_head(void) static void journal_free_journal_head(struct journal_head *jh) { -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG atomic_dec(&nr_journal_heads); memset(jh, JBD_POISON_FREE, sizeof(*jh)); #endif @@ -1953,12 +1953,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) /* * /proc tunables */ -#if defined(CONFIG_JBD_DEBUG) +#if defined(CONFIG_JBD2_DEBUG) int jbd2_journal_enable_debug; EXPORT_SYMBOL(jbd2_journal_enable_debug); #endif -#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS) +#if defined(CONFIG_JBD2_DEBUG) && defined(CONFIG_PROC_FS) static struct proc_dir_entry *proc_jbd_debug; @@ -2073,7 +2073,7 @@ static int __init journal_init(void) static void __exit journal_exit(void) { -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG int n = atomic_read(&nr_journal_heads); if (n) printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 395c92a04ac9..e7730a045b93 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -295,7 +295,7 @@ int jbd2_journal_skip_recovery(journal_t *journal) printk(KERN_ERR "JBD: error %d scanning journal\n", err); ++journal->j_transaction_sequence; } else { -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); #endif jbd_debug(0, diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 33b2b1a2d790..45ec7258b2b1 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -243,7 +243,7 @@ struct ext4_new_group_data { #define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG #define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) #endif #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) @@ -259,7 +259,7 @@ struct ext4_new_group_data { #define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int) #define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int) #define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG #define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) #endif #define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h index 2347557a327a..0f7dc15924bf 100644 --- a/include/linux/ext4_fs_sb.h +++ b/include/linux/ext4_fs_sb.h @@ -73,7 +73,7 @@ struct ext4_sb_info { struct list_head s_orphan; unsigned long s_commit_interval; struct block_device *journal_bdev; -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ #endif diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0e0fedd2039a..a37aca31de46 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -50,11 +50,11 @@ */ #define JBD_DEFAULT_MAX_COMMIT_AGE 5 -#ifdef CONFIG_JBD_DEBUG +#ifdef CONFIG_JBD2_DEBUG /* * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal * consistency checks. By default we don't do this unless - * CONFIG_JBD_DEBUG is on. + * CONFIG_JBD2_DEBUG is on. */ #define JBD_EXPENSIVE_CHECKING extern int jbd2_journal_enable_debug; -- cgit v1.2.3 From ef7f38359ea8b3e9c7f2cae9a4d4935f55ca9e80 Mon Sep 17 00:00:00 2001 From: Kalpak Shah Date: Wed, 18 Jul 2007 09:15:20 -0400 Subject: ext4: Add nanosecond timestamps This patch adds nanosecond timestamps for ext4. This involves adding *time_extra fields to the ext4_inode to extend the timestamps to 64-bits. Creation time is also added by this patch. These extended fields will fit into an inode if the filesystem was formatted with large inodes (-I 256 or larger) and there are currently no EAs consuming all of the available space. For new inodes we always reserve enough space for the kernel's known extended fields, but for inodes created with an old kernel this might not have been the case. So this patch also adds the EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature flag(ro-compat so that older kernels can't create inodes with a smaller extra_isize). which indicates if the fields fitting inside s_min_extra_isize are available or not. If the expansion of inodes if unsuccessful then this feature will be disabled. This feature is only enabled if requested by the sysadmin. None of the extended inode fields is critical for correct filesystem operation. Signed-off-by: Andreas Dilger Signed-off-by: Kalpak Shah Signed-off-by: Eric Sandeen Signed-off-by: Dave Kleikamp Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 8 ++--- fs/ext4/inode.c | 22 +++++++----- fs/ext4/ioctl.c | 4 +-- fs/ext4/namei.c | 16 ++++----- fs/ext4/super.c | 28 +++++++++++++++ fs/ext4/xattr.c | 2 +- include/linux/ext4_fs.h | 86 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/ext4_fs_i.h | 5 +++ include/linux/ext4_fs_sb.h | 1 + 9 files changed, 147 insertions(+), 25 deletions(-) (limited to 'include/linux/ext4_fs.h') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index c88b439ba5cd..427f83066a0d 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -563,7 +563,8 @@ got: inode->i_ino = ino; /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = + ext4_current_time(inode); memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_dir_start_lookup = 0; @@ -595,9 +596,8 @@ got: spin_unlock(&sbi->s_next_gen_lock); ei->i_state = EXT4_STATE_NEW; - ei->i_extra_isize = - (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ? - sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0; + + ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; ret = inode; if(DQUOT_ALLOC_INODE(inode)) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 49035c5a2c43..b83f91edebd1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -726,7 +726,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, /* We are done with atomic stuff, now do the rest of housekeeping */ - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); /* had we spliced it onto indirect block? */ @@ -2375,7 +2375,7 @@ do_indirects: ext4_discard_reservation(inode); mutex_unlock(&ei->truncate_mutex); - inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); /* @@ -2629,10 +2629,6 @@ void ext4_read_inode(struct inode * inode) } inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); - inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; ei->i_state = 0; ei->i_dir_start_lookup = 0; @@ -2710,6 +2706,11 @@ void ext4_read_inode(struct inode * inode) } else ei->i_extra_isize = 0; + EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); + EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); + EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); + EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); + if (S_ISREG(inode->i_mode)) { inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; @@ -2791,9 +2792,12 @@ static int ext4_do_update_inode(handle_t *handle, } raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(ei->i_disksize); - raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); - raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); + + EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); + EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); + EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); + EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); + raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_flags = cpu_to_le32(ei->i_flags); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 5b00775d5096..c04c7ccba9e3 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -97,7 +97,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, ei->i_flags = flags; ext4_set_inode_flags(inode); - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); err = ext4_mark_iloc_dirty(handle, inode, &iloc); flags_err: @@ -134,7 +134,7 @@ flags_err: return PTR_ERR(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err == 0) { - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); inode->i_generation = generation; err = ext4_mark_iloc_dirty(handle, inode, &iloc); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2de339dd7554..40106b7ea4b8 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1295,7 +1295,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, * happen is that the times are slightly out of date * and/or different from the directory change time. */ - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + dir->i_mtime = dir->i_ctime = ext4_current_time(dir); ext4_update_dx_flag(dir); dir->i_version++; ext4_mark_inode_dirty(handle, dir); @@ -2056,7 +2056,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) * recovery. */ inode->i_size = 0; ext4_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; + inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); drop_nlink(dir); ext4_update_dx_flag(dir); @@ -2106,13 +2106,13 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; + dir->i_ctime = dir->i_mtime = ext4_current_time(dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); drop_nlink(inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime; + inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); retval = 0; @@ -2203,7 +2203,7 @@ retry: if (IS_DIRSYNC(dir)) handle->h_sync = 1; - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); inc_nlink(inode); atomic_inc(&inode->i_count); @@ -2305,7 +2305,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old_inode->i_ctime = CURRENT_TIME_SEC; + old_inode->i_ctime = ext4_current_time(old_inode); ext4_mark_inode_dirty(handle, old_inode); /* @@ -2338,9 +2338,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, if (new_inode) { drop_nlink(new_inode); - new_inode->i_ctime = CURRENT_TIME_SEC; + new_inode->i_ctime = ext4_current_time(new_inode); } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; + old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); ext4_update_dx_flag(old_dir); if (dir_bh) { BUFFER_TRACE(dir_bh, "get_write_access"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index af0835187e76..b47259f6f39c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1651,6 +1651,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_inode_size); goto failed_mount; } + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) + sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); } sbi->s_frag_size = EXT4_MIN_FRAG_SIZE << le32_to_cpu(es->s_log_frag_size); @@ -1874,6 +1876,32 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) } ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); + + /* determine the minimum size of new large inodes, if present */ + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_want_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_want_extra_isize); + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_min_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_min_extra_isize); + } + } + /* Check if enough inode space is available */ + if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > + sbi->s_inode_size) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + printk(KERN_INFO "EXT4-fs: required extra inode space not" + "available.\n"); + } + /* * akpm: core read_super() calls in here with the superblock locked. * That deadlocks, because orphan cleanup needs to lock the superblock diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e832e96095b3..fe16a569d06b 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1013,7 +1013,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, } if (!error) { ext4_xattr_update_super_block(handle, inode->i_sb); - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 45ec7258b2b1..df5e38faa15f 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -288,7 +288,7 @@ struct ext4_inode { __le16 i_uid; /* Low 16 bits of Owner Uid */ __le32 i_size; /* Size in bytes */ __le32 i_atime; /* Access time */ - __le32 i_ctime; /* Creation time */ + __le32 i_ctime; /* Inode Change time */ __le32 i_mtime; /* Modification time */ __le32 i_dtime; /* Deletion Time */ __le16 i_gid; /* Low 16 bits of Group Id */ @@ -337,10 +337,85 @@ struct ext4_inode { } osd2; /* OS dependent 2 */ __le16 i_extra_isize; __le16 i_pad1; + __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ + __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ + __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ + __le32 i_crtime; /* File Creation time */ + __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ }; #define i_size_high i_dir_acl +#define EXT4_EPOCH_BITS 2 +#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) +#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) + +/* + * Extended fields will fit into an inode if the filesystem was formatted + * with large inodes (-I 256 or larger) and there are not currently any EAs + * consuming all of the available space. For new inodes we always reserve + * enough space for the kernel's known extended fields, but for inodes + * created with an old kernel this might not have been the case. None of + * the extended inode fields is critical for correct filesystem operation. + * This macro checks if a certain field fits in the inode. Note that + * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize + */ +#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \ + ((offsetof(typeof(*ext4_inode), field) + \ + sizeof((ext4_inode)->field)) \ + <= (EXT4_GOOD_OLD_INODE_SIZE + \ + (einode)->i_extra_isize)) \ + +static inline __le32 ext4_encode_extra_time(struct timespec *time) +{ + return cpu_to_le32((sizeof(time->tv_sec) > 4 ? + time->tv_sec >> 32 : 0) | + ((time->tv_nsec << 2) & EXT4_NSEC_MASK)); +} + +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) +{ + if (sizeof(time->tv_sec) > 4) + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) + << 32; + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2; +} + +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ +do { \ + (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(inode)->xtime); \ +} while (0) + +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(einode)->xtime); \ +} while (0) + +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ +do { \ + (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + ext4_decode_extra_time(&(inode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (einode)->xtime.tv_sec = \ + (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + ext4_decode_extra_time(&(einode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + #if defined(__KERNEL__) || defined(__linux__) #define i_reserved1 osd1.linux1.l_i_reserved1 #define i_frag osd2.linux2.l_i_frag @@ -539,6 +614,13 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode) return container_of(inode, struct ext4_inode_info, vfs_inode); } +static inline struct timespec ext4_current_time(struct inode *inode) +{ + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; +} + + static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) { return ino == EXT4_ROOT_INO || @@ -609,6 +691,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 @@ -626,6 +709,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) EXT4_FEATURE_INCOMPAT_64BIT) #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR) /* diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h index 9de494406995..1a511e9905aa 100644 --- a/include/linux/ext4_fs_i.h +++ b/include/linux/ext4_fs_i.h @@ -153,6 +153,11 @@ struct ext4_inode_info { unsigned long i_ext_generation; struct ext4_ext_cache i_cached_extent; + /* + * File creation time. Its function is same as that of + * struct timespec i_{a,c,m}time in the generic inode. + */ + struct timespec i_crtime; }; #endif /* _LINUX_EXT4_FS_I */ diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h index 0f7dc15924bf..1b2ffee12be9 100644 --- a/include/linux/ext4_fs_sb.h +++ b/include/linux/ext4_fs_sb.h @@ -81,6 +81,7 @@ struct ext4_sb_info { char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ int s_jquota_fmt; /* Format of quota to use */ #endif + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ #ifdef EXTENTS_STATS /* ext4 extents stats */ -- cgit v1.2.3 From 6dd4ee7cab7e3a17c571aebd444f4344c8c4946e Mon Sep 17 00:00:00 2001 From: Kalpak Shah Date: Wed, 18 Jul 2007 09:19:57 -0400 Subject: ext4: Expand extra_inodes space per the s_{want,min}_extra_isize fields We need to make sure that existing ext3 filesystems can also avail the new fields that have been added to the ext4 inode. We use s_want_extra_isize and s_min_extra_isize to decide by how much we should expand the inode. If EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature is set then we expand the inode by max(s_want_extra_isize, s_min_extra_isize , sizeof(ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE) bytes. Actually it is still an open question about whether users should be able to set s_*_extra_isize smaller than the known fields or not. This patch also adds the functionality to expand inodes to include the newly added fields. We start by trying to expand by s_want_extra_isize bytes and if its fails we try to expand by s_min_extra_isize bytes. This is done by changing the i_extra_isize if enough space is available in the inode and no EAs are present. If EAs are present and there is enough space in the inode then the EAs in the inode are shifted to make space. If enough space is not available in the inode due to the EAs then 1 or more EAs are shifted to the external EA block. In the worst case when even the external EA block does not have enough space we inform the user that some EA would need to be deleted or s_min_extra_isize would have to be reduced. Signed-off-by: Andreas Dilger Signed-off-by: Kalpak Shah Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 63 ++++++++++- fs/ext4/xattr.c | 274 ++++++++++++++++++++++++++++++++++++++++++++++-- fs/ext4/xattr.h | 17 +++ include/linux/ext4_fs.h | 1 + 4 files changed, 347 insertions(+), 8 deletions(-) (limited to 'include/linux/ext4_fs.h') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b83f91edebd1..f6d8528c4f55 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3105,6 +3105,39 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, return err; } +/* + * Expand an inode by new_extra_isize bytes. + * Returns 0 on success or negative error number on failure. + */ +int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize, + struct ext4_iloc iloc, handle_t *handle) +{ + struct ext4_inode *raw_inode; + struct ext4_xattr_ibody_header *header; + struct ext4_xattr_entry *entry; + + if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) + return 0; + + raw_inode = ext4_raw_inode(&iloc); + + header = IHDR(inode, raw_inode); + entry = IFIRST(header); + + /* No extended attributes present */ + if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || + header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { + memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, + new_extra_isize); + EXT4_I(inode)->i_extra_isize = new_extra_isize; + return 0; + } + + /* try to expand with EAs present */ + return ext4_expand_extra_isize_ea(inode, new_extra_isize, + raw_inode, handle); +} + /* * What we do here is to mark the in-core inode as clean with respect to inode * dirtiness (it may still be data-dirty). @@ -3129,10 +3162,38 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) { struct ext4_iloc iloc; - int err; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + static unsigned int mnt_count; + int err, ret; might_sleep(); err = ext4_reserve_inode_write(handle, inode, &iloc); + if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && + !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { + /* + * We need extra buffer credits since we may write into EA block + * with this same handle. If journal_extend fails, then it will + * only result in a minor loss of functionality for that inode. + * If this is felt to be critical, then e2fsck should be run to + * force a large enough s_min_extra_isize. + */ + if ((jbd2_journal_extend(handle, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { + ret = ext4_expand_extra_isize(inode, + sbi->s_want_extra_isize, + iloc, handle); + if (ret) { + EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; + if (mnt_count != sbi->s_es->s_mnt_count) { + ext4_warning(inode->i_sb, __FUNCTION__, + "Unable to expand inode %lu. Delete" + " some EAs or run e2fsck.", + inode->i_ino); + mnt_count = sbi->s_es->s_mnt_count; + } + } + } + } if (!err) err = ext4_mark_iloc_dirty(handle, inode, &iloc); return err; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index fe16a569d06b..b10d68fffb55 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -66,13 +66,6 @@ #define BFIRST(bh) ENTRY(BHDR(bh)+1) #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -#define IHDR(inode, raw_inode) \ - ((struct ext4_xattr_ibody_header *) \ - ((void *)raw_inode + \ - EXT4_GOOD_OLD_INODE_SIZE + \ - EXT4_I(inode)->i_extra_isize)) -#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) - #ifdef EXT4_XATTR_DEBUG # define ea_idebug(inode, f...) do { \ printk(KERN_DEBUG "inode %s:%lu: ", \ @@ -508,6 +501,24 @@ out: return; } +/* + * Find the available free space for EAs. This also returns the total number of + * bytes used by EA entries. + */ +static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last, + size_t *min_offs, void *base, int *total) +{ + for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + *total += EXT4_XATTR_LEN(last->e_name_len); + if (!last->e_value_block && last->e_value_size) { + size_t offs = le16_to_cpu(last->e_value_offs); + if (offs < *min_offs) + *min_offs = offs; + } + } + return (*min_offs - ((void *)last - base) - sizeof(__u32)); +} + struct ext4_xattr_info { int name_index; const char *name; @@ -1014,6 +1025,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (!error) { ext4_xattr_update_super_block(handle, inode->i_sb); inode->i_ctime = ext4_current_time(inode); + if (!value) + EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with @@ -1066,6 +1079,253 @@ retry: return error; } +/* + * Shift the EA entries in the inode to create space for the increased + * i_extra_isize. + */ +static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry, + int value_offs_shift, void *to, + void *from, size_t n, int blocksize) +{ + struct ext4_xattr_entry *last = entry; + int new_offs; + + /* Adjust the value offsets of the entries */ + for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + if (!last->e_value_block && last->e_value_size) { + new_offs = le16_to_cpu(last->e_value_offs) + + value_offs_shift; + BUG_ON(new_offs + le32_to_cpu(last->e_value_size) + > blocksize); + last->e_value_offs = cpu_to_le16(new_offs); + } + } + /* Shift the entries by n bytes */ + memmove(to, from, n); +} + +/* + * Expand an inode by new_extra_isize bytes when EAs are present. + * Returns 0 on success or negative error number on failure. + */ +int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + struct ext4_inode *raw_inode, handle_t *handle) +{ + struct ext4_xattr_ibody_header *header; + struct ext4_xattr_entry *entry, *last, *first; + struct buffer_head *bh = NULL; + struct ext4_xattr_ibody_find *is = NULL; + struct ext4_xattr_block_find *bs = NULL; + char *buffer = NULL, *b_entry_name = NULL; + size_t min_offs, free; + int total_ino, total_blk; + void *base, *start, *end; + int extra_isize = 0, error = 0, tried_min_extra_isize = 0; + int s_min_extra_isize = EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize; + + down_write(&EXT4_I(inode)->xattr_sem); +retry: + if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) { + up_write(&EXT4_I(inode)->xattr_sem); + return 0; + } + + header = IHDR(inode, raw_inode); + entry = IFIRST(header); + + /* + * Check if enough free space is available in the inode to shift the + * entries ahead by new_extra_isize. + */ + + base = start = entry; + end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; + min_offs = end - base; + last = entry; + total_ino = sizeof(struct ext4_xattr_ibody_header); + + free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); + if (free >= new_extra_isize) { + entry = IFIRST(header); + ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize + - new_extra_isize, (void *)raw_inode + + EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, + (void *)header, total_ino, + inode->i_sb->s_blocksize); + EXT4_I(inode)->i_extra_isize = new_extra_isize; + error = 0; + goto cleanup; + } + + /* + * Enough free space isn't available in the inode, check if + * EA block can hold new_extra_isize bytes. + */ + if (EXT4_I(inode)->i_file_acl) { + bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); + error = -EIO; + if (!bh) + goto cleanup; + if (ext4_xattr_check_block(bh)) { + ext4_error(inode->i_sb, __FUNCTION__, + "inode %lu: bad block %llu", inode->i_ino, + EXT4_I(inode)->i_file_acl); + error = -EIO; + goto cleanup; + } + base = BHDR(bh); + first = BFIRST(bh); + end = bh->b_data + bh->b_size; + min_offs = end - base; + free = ext4_xattr_free_space(first, &min_offs, base, + &total_blk); + if (free < new_extra_isize) { + if (!tried_min_extra_isize && s_min_extra_isize) { + tried_min_extra_isize++; + new_extra_isize = s_min_extra_isize; + brelse(bh); + goto retry; + } + error = -1; + goto cleanup; + } + } else { + free = inode->i_sb->s_blocksize; + } + + while (new_extra_isize > 0) { + size_t offs, size, entry_size; + struct ext4_xattr_entry *small_entry = NULL; + struct ext4_xattr_info i = { + .value = NULL, + .value_len = 0, + }; + unsigned int total_size; /* EA entry size + value size */ + unsigned int shift_bytes; /* No. of bytes to shift EAs by? */ + unsigned int min_total_size = ~0U; + + is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); + bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); + if (!is || !bs) { + error = -ENOMEM; + goto cleanup; + } + + is->s.not_found = -ENODATA; + bs->s.not_found = -ENODATA; + is->iloc.bh = NULL; + bs->bh = NULL; + + last = IFIRST(header); + /* Find the entry best suited to be pushed into EA block */ + entry = NULL; + for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + total_size = + EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + + EXT4_XATTR_LEN(last->e_name_len); + if (total_size <= free && total_size < min_total_size) { + if (total_size < new_extra_isize) { + small_entry = last; + } else { + entry = last; + min_total_size = total_size; + } + } + } + + if (entry == NULL) { + if (small_entry) { + entry = small_entry; + } else { + if (!tried_min_extra_isize && + s_min_extra_isize) { + tried_min_extra_isize++; + new_extra_isize = s_min_extra_isize; + goto retry; + } + error = -1; + goto cleanup; + } + } + offs = le16_to_cpu(entry->e_value_offs); + size = le32_to_cpu(entry->e_value_size); + entry_size = EXT4_XATTR_LEN(entry->e_name_len); + i.name_index = entry->e_name_index, + buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS); + b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); + if (!buffer || !b_entry_name) { + error = -ENOMEM; + goto cleanup; + } + /* Save the entry name and the entry value */ + memcpy(buffer, (void *)IFIRST(header) + offs, + EXT4_XATTR_SIZE(size)); + memcpy(b_entry_name, entry->e_name, entry->e_name_len); + b_entry_name[entry->e_name_len] = '\0'; + i.name = b_entry_name; + + error = ext4_get_inode_loc(inode, &is->iloc); + if (error) + goto cleanup; + + error = ext4_xattr_ibody_find(inode, &i, is); + if (error) + goto cleanup; + + /* Remove the chosen entry from the inode */ + error = ext4_xattr_ibody_set(handle, inode, &i, is); + + entry = IFIRST(header); + if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) + shift_bytes = new_extra_isize; + else + shift_bytes = entry_size + size; + /* Adjust the offsets and shift the remaining entries ahead */ + ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize - + shift_bytes, (void *)raw_inode + + EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes, + (void *)header, total_ino - entry_size, + inode->i_sb->s_blocksize); + + extra_isize += shift_bytes; + new_extra_isize -= shift_bytes; + EXT4_I(inode)->i_extra_isize = extra_isize; + + i.name = b_entry_name; + i.value = buffer; + i.value_len = cpu_to_le32(size); + error = ext4_xattr_block_find(inode, &i, bs); + if (error) + goto cleanup; + + /* Add entry which was removed from the inode into the block */ + error = ext4_xattr_block_set(handle, inode, &i, bs); + if (error) + goto cleanup; + kfree(b_entry_name); + kfree(buffer); + brelse(is->iloc.bh); + kfree(is); + kfree(bs); + } + brelse(bh); + up_write(&EXT4_I(inode)->xattr_sem); + return 0; + +cleanup: + kfree(b_entry_name); + kfree(buffer); + if (is) + brelse(is->iloc.bh); + kfree(is); + kfree(bs); + brelse(bh); + up_write(&EXT4_I(inode)->xattr_sem); + return error; +} + + + /* * ext4_xattr_delete_inode() * diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 79432b35398f..d7f5d6a12651 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -56,6 +56,13 @@ struct ext4_xattr_entry { #define EXT4_XATTR_SIZE(size) \ (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) +#define IHDR(inode, raw_inode) \ + ((struct ext4_xattr_ibody_header *) \ + ((void *)raw_inode + \ + EXT4_GOOD_OLD_INODE_SIZE + \ + EXT4_I(inode)->i_extra_isize)) +#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) + # ifdef CONFIG_EXT4DEV_FS_XATTR extern struct xattr_handler ext4_xattr_user_handler; @@ -74,6 +81,9 @@ extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, extern void ext4_xattr_delete_inode(handle_t *, struct inode *); extern void ext4_xattr_put_super(struct super_block *); +extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + struct ext4_inode *raw_inode, handle_t *handle); + extern int init_ext4_xattr(void); extern void exit_ext4_xattr(void); @@ -129,6 +139,13 @@ exit_ext4_xattr(void) { } +static inline int +ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + struct ext4_inode *raw_inode, handle_t *handle) +{ + return -EOPNOTSUPP; +} + #define ext4_xattr_handlers NULL # endif /* CONFIG_EXT4DEV_FS_XATTR */ diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index df5e38faa15f..52dcc24dd986 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -202,6 +202,7 @@ struct ext4_group_desc #define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ #define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ +#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ /* Used to pass group descriptor data when online resize is done */ struct ext4_new_group_input { -- cgit v1.2.3 From f8628a14a27eb4512a1ede43de1d9db4d9f92bc3 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Wed, 18 Jul 2007 08:38:01 -0400 Subject: ext4: Remove 65000 subdirectory limit This patch adds support to ext4 for allowing more than 65000 subdirectories. Currently the maximum number of subdirectories is capped at 32000. If we exceed 65000 subdirectories in an htree directory it sets the inode link count to 1 and no longer counts subdirectories. The directory link count is not actually used when determining if a directory is empty, as that only counts subdirectories and not regular files that might be in there. A EXT4_FEATURE_RO_COMPAT_DIR_NLINK flag has been added and it is set if the subdir count for any directory crosses 65000. A later fsck will clear EXT4_FEATURE_RO_COMPAT_DIR_NLINK if there are no longer any directory with >65000 subdirs. Signed-off-by: Andreas Dilger Signed-off-by: Kalpak Shah Signed-off-by: "Theodore Ts'o" --- fs/ext4/namei.c | 60 +++++++++++++++++++++++++++++++++++++------------ include/linux/ext4_fs.h | 4 +++- 2 files changed, 49 insertions(+), 15 deletions(-) (limited to 'include/linux/ext4_fs.h') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 40106b7ea4b8..da224974af78 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1629,6 +1629,35 @@ static int ext4_delete_entry (handle_t *handle, return -ENOENT; } +/* + * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2, + * since this indicates that nlinks count was previously 1. + */ +static void ext4_inc_count(handle_t *handle, struct inode *inode) +{ + inc_nlink(inode); + if (is_dx(inode) && inode->i_nlink > 1) { + /* limit is 16-bit i_links_count */ + if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) { + inode->i_nlink = 1; + EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_DIR_NLINK); + } + } +} + +/* + * If a directory had nlink == 1, then we should let it be 1. This indicates + * directory has >EXT4_LINK_MAX subdirs. + */ +static void ext4_dec_count(handle_t *handle, struct inode *inode) +{ + drop_nlink(inode); + if (S_ISDIR(inode->i_mode) && inode->i_nlink == 0) + inc_nlink(inode); +} + + static int ext4_add_nondir(handle_t *handle, struct dentry *dentry, struct inode *inode) { @@ -1725,7 +1754,7 @@ static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) struct ext4_dir_entry_2 * de; int err, retries = 0; - if (dir->i_nlink >= EXT4_LINK_MAX) + if (EXT4_DIR_LINK_MAX(dir)) return -EMLINK; retry: @@ -1748,7 +1777,7 @@ retry: inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; dir_block = ext4_bread (handle, inode, 0, 1, &err); if (!dir_block) { - drop_nlink(inode); /* is this nlink == 0? */ + ext4_dec_count(handle, inode); /* is this nlink == 0? */ ext4_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -1780,7 +1809,7 @@ retry: iput (inode); goto out_stop; } - inc_nlink(dir); + ext4_inc_count(handle, dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); d_instantiate(dentry, inode); @@ -2045,9 +2074,9 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_rmdir; - if (inode->i_nlink != 2) + if (!EXT4_DIR_LINK_EMPTY(inode)) ext4_warning (inode->i_sb, "ext4_rmdir", - "empty directory has nlink!=2 (%d)", + "empty directory has too many links (%d)", inode->i_nlink); inode->i_version++; clear_nlink(inode); @@ -2058,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) ext4_orphan_add(handle, inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); - drop_nlink(dir); + ext4_dec_count(handle, dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); @@ -2109,7 +2138,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) dir->i_ctime = dir->i_mtime = ext4_current_time(dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); - drop_nlink(inode); + ext4_dec_count(handle, inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); inode->i_ctime = ext4_current_time(inode); @@ -2159,7 +2188,7 @@ retry: err = __page_symlink(inode, symname, l, mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); if (err) { - drop_nlink(inode); + ext4_dec_count(handle, inode); ext4_mark_inode_dirty(handle, inode); iput (inode); goto out_stop; @@ -2185,8 +2214,9 @@ static int ext4_link (struct dentry * old_dentry, struct inode *inode = old_dentry->d_inode; int err, retries = 0; - if (inode->i_nlink >= EXT4_LINK_MAX) + if (EXT4_DIR_LINK_MAX(inode)) return -EMLINK; + /* * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing * otherwise has the potential to corrupt the orphan inode list. @@ -2204,7 +2234,7 @@ retry: handle->h_sync = 1; inode->i_ctime = ext4_current_time(inode); - inc_nlink(inode); + ext4_inc_count(handle, inode); atomic_inc(&inode->i_count); err = ext4_add_nondir(handle, dentry, inode); @@ -2337,7 +2367,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, } if (new_inode) { - drop_nlink(new_inode); + ext4_dec_count(handle, new_inode); new_inode->i_ctime = ext4_current_time(new_inode); } old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); @@ -2348,11 +2378,13 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata"); ext4_journal_dirty_metadata(handle, dir_bh); - drop_nlink(old_dir); + ext4_dec_count(handle, old_dir); if (new_inode) { - drop_nlink(new_inode); + /* checked empty_dir above, can't have another parent, + * ext3_dec_count() won't work for many-linked dirs */ + new_inode->i_nlink = 0; } else { - inc_nlink(new_dir); + ext4_inc_count(handle, new_dir); ext4_update_dx_flag(new_dir); ext4_mark_inode_dirty(handle, new_dir); } diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 52dcc24dd986..cdee7aaa57aa 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -71,7 +71,7 @@ /* * Maximal count of links to a file */ -#define EXT4_LINK_MAX 32000 +#define EXT4_LINK_MAX 65000 /* * Macro-instructions used to manage several block sizes @@ -692,6 +692,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 @@ -710,6 +711,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) EXT4_FEATURE_INCOMPAT_64BIT) #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR) -- cgit v1.2.3