From ff9ddf7e847c4dc533f119efb6c77a6e57ab6397 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 18 Jul 2007 09:24:20 -0400 Subject: ext4: copy i_flags to inode flags on write Propagate flags such as S_APPEND, S_IMMUTABLE, etc. from i_flags into ext4-specific i_flags. Quota code changes these flags on quota files (to make it harder for sysadmin to screw himself) and these changes were not correctly propagated into the filesystem. (This is a forward port patch from ext3) Signed-off-by: Jan Kara Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8416fa28c422..49035c5a2c43 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2583,6 +2583,25 @@ void ext4_set_inode_flags(struct inode *inode) inode->i_flags |= S_DIRSYNC; } +/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ +void ext4_get_inode_flags(struct ext4_inode_info *ei) +{ + unsigned int flags = ei->vfs_inode.i_flags; + + ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL| + EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL); + if (flags & S_SYNC) + ei->i_flags |= EXT4_SYNC_FL; + if (flags & S_APPEND) + ei->i_flags |= EXT4_APPEND_FL; + if (flags & S_IMMUTABLE) + ei->i_flags |= EXT4_IMMUTABLE_FL; + if (flags & S_NOATIME) + ei->i_flags |= EXT4_NOATIME_FL; + if (flags & S_DIRSYNC) + ei->i_flags |= EXT4_DIRSYNC_FL; +} + void ext4_read_inode(struct inode * inode) { struct ext4_iloc iloc; @@ -2744,6 +2763,7 @@ static int ext4_do_update_inode(handle_t *handle, if (ei->i_state & EXT4_STATE_NEW) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); + ext4_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); if(!(test_opt(inode->i_sb, NO_UID32))) { raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); -- cgit v1.2.3 From ef7f38359ea8b3e9c7f2cae9a4d4935f55ca9e80 Mon Sep 17 00:00:00 2001 From: Kalpak Shah Date: Wed, 18 Jul 2007 09:15:20 -0400 Subject: ext4: Add nanosecond timestamps This patch adds nanosecond timestamps for ext4. This involves adding *time_extra fields to the ext4_inode to extend the timestamps to 64-bits. Creation time is also added by this patch. These extended fields will fit into an inode if the filesystem was formatted with large inodes (-I 256 or larger) and there are currently no EAs consuming all of the available space. For new inodes we always reserve enough space for the kernel's known extended fields, but for inodes created with an old kernel this might not have been the case. So this patch also adds the EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature flag(ro-compat so that older kernels can't create inodes with a smaller extra_isize). which indicates if the fields fitting inside s_min_extra_isize are available or not. If the expansion of inodes if unsuccessful then this feature will be disabled. This feature is only enabled if requested by the sysadmin. None of the extended inode fields is critical for correct filesystem operation. Signed-off-by: Andreas Dilger Signed-off-by: Kalpak Shah Signed-off-by: Eric Sandeen Signed-off-by: Dave Kleikamp Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 8 ++--- fs/ext4/inode.c | 22 +++++++----- fs/ext4/ioctl.c | 4 +-- fs/ext4/namei.c | 16 ++++----- fs/ext4/super.c | 28 +++++++++++++++ fs/ext4/xattr.c | 2 +- include/linux/ext4_fs.h | 86 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/ext4_fs_i.h | 5 +++ include/linux/ext4_fs_sb.h | 1 + 9 files changed, 147 insertions(+), 25 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index c88b439ba5cd..427f83066a0d 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -563,7 +563,8 @@ got: inode->i_ino = ino; /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; + inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = + ext4_current_time(inode); memset(ei->i_data, 0, sizeof(ei->i_data)); ei->i_dir_start_lookup = 0; @@ -595,9 +596,8 @@ got: spin_unlock(&sbi->s_next_gen_lock); ei->i_state = EXT4_STATE_NEW; - ei->i_extra_isize = - (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ? - sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0; + + ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; ret = inode; if(DQUOT_ALLOC_INODE(inode)) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 49035c5a2c43..b83f91edebd1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -726,7 +726,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, /* We are done with atomic stuff, now do the rest of housekeeping */ - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); /* had we spliced it onto indirect block? */ @@ -2375,7 +2375,7 @@ do_indirects: ext4_discard_reservation(inode); mutex_unlock(&ei->truncate_mutex); - inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; + inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); /* @@ -2629,10 +2629,6 @@ void ext4_read_inode(struct inode * inode) } inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_size = le32_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); - inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; ei->i_state = 0; ei->i_dir_start_lookup = 0; @@ -2710,6 +2706,11 @@ void ext4_read_inode(struct inode * inode) } else ei->i_extra_isize = 0; + EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); + EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); + EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); + EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); + if (S_ISREG(inode->i_mode)) { inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; @@ -2791,9 +2792,12 @@ static int ext4_do_update_inode(handle_t *handle, } raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le32(ei->i_disksize); - raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); - raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); + + EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); + EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); + EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); + EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); + raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_flags = cpu_to_le32(ei->i_flags); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 5b00775d5096..c04c7ccba9e3 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -97,7 +97,7 @@ int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, ei->i_flags = flags; ext4_set_inode_flags(inode); - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); err = ext4_mark_iloc_dirty(handle, inode, &iloc); flags_err: @@ -134,7 +134,7 @@ flags_err: return PTR_ERR(handle); err = ext4_reserve_inode_write(handle, inode, &iloc); if (err == 0) { - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); inode->i_generation = generation; err = ext4_mark_iloc_dirty(handle, inode, &iloc); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2de339dd7554..40106b7ea4b8 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1295,7 +1295,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, * happen is that the times are slightly out of date * and/or different from the directory change time. */ - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + dir->i_mtime = dir->i_ctime = ext4_current_time(dir); ext4_update_dx_flag(dir); dir->i_version++; ext4_mark_inode_dirty(handle, dir); @@ -2056,7 +2056,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) * recovery. */ inode->i_size = 0; ext4_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; + inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); drop_nlink(dir); ext4_update_dx_flag(dir); @@ -2106,13 +2106,13 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; + dir->i_ctime = dir->i_mtime = ext4_current_time(dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); drop_nlink(inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime; + inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); retval = 0; @@ -2203,7 +2203,7 @@ retry: if (IS_DIRSYNC(dir)) handle->h_sync = 1; - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); inc_nlink(inode); atomic_inc(&inode->i_count); @@ -2305,7 +2305,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old_inode->i_ctime = CURRENT_TIME_SEC; + old_inode->i_ctime = ext4_current_time(old_inode); ext4_mark_inode_dirty(handle, old_inode); /* @@ -2338,9 +2338,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, if (new_inode) { drop_nlink(new_inode); - new_inode->i_ctime = CURRENT_TIME_SEC; + new_inode->i_ctime = ext4_current_time(new_inode); } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; + old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); ext4_update_dx_flag(old_dir); if (dir_bh) { BUFFER_TRACE(dir_bh, "get_write_access"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index af0835187e76..b47259f6f39c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1651,6 +1651,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_inode_size); goto failed_mount; } + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) + sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); } sbi->s_frag_size = EXT4_MIN_FRAG_SIZE << le32_to_cpu(es->s_log_frag_size); @@ -1874,6 +1876,32 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) } ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); + + /* determine the minimum size of new large inodes, if present */ + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) { + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_want_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_want_extra_isize); + if (sbi->s_want_extra_isize < + le16_to_cpu(es->s_min_extra_isize)) + sbi->s_want_extra_isize = + le16_to_cpu(es->s_min_extra_isize); + } + } + /* Check if enough inode space is available */ + if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > + sbi->s_inode_size) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + printk(KERN_INFO "EXT4-fs: required extra inode space not" + "available.\n"); + } + /* * akpm: core read_super() calls in here with the superblock locked. * That deadlocks, because orphan cleanup needs to lock the superblock diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e832e96095b3..fe16a569d06b 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1013,7 +1013,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, } if (!error) { ext4_xattr_update_super_block(handle, inode->i_sb); - inode->i_ctime = CURRENT_TIME_SEC; + inode->i_ctime = ext4_current_time(inode); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 45ec7258b2b1..df5e38faa15f 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -288,7 +288,7 @@ struct ext4_inode { __le16 i_uid; /* Low 16 bits of Owner Uid */ __le32 i_size; /* Size in bytes */ __le32 i_atime; /* Access time */ - __le32 i_ctime; /* Creation time */ + __le32 i_ctime; /* Inode Change time */ __le32 i_mtime; /* Modification time */ __le32 i_dtime; /* Deletion Time */ __le16 i_gid; /* Low 16 bits of Group Id */ @@ -337,10 +337,85 @@ struct ext4_inode { } osd2; /* OS dependent 2 */ __le16 i_extra_isize; __le16 i_pad1; + __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */ + __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */ + __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ + __le32 i_crtime; /* File Creation time */ + __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ }; #define i_size_high i_dir_acl +#define EXT4_EPOCH_BITS 2 +#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) +#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS) + +/* + * Extended fields will fit into an inode if the filesystem was formatted + * with large inodes (-I 256 or larger) and there are not currently any EAs + * consuming all of the available space. For new inodes we always reserve + * enough space for the kernel's known extended fields, but for inodes + * created with an old kernel this might not have been the case. None of + * the extended inode fields is critical for correct filesystem operation. + * This macro checks if a certain field fits in the inode. Note that + * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize + */ +#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \ + ((offsetof(typeof(*ext4_inode), field) + \ + sizeof((ext4_inode)->field)) \ + <= (EXT4_GOOD_OLD_INODE_SIZE + \ + (einode)->i_extra_isize)) \ + +static inline __le32 ext4_encode_extra_time(struct timespec *time) +{ + return cpu_to_le32((sizeof(time->tv_sec) > 4 ? + time->tv_sec >> 32 : 0) | + ((time->tv_nsec << 2) & EXT4_NSEC_MASK)); +} + +static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) +{ + if (sizeof(time->tv_sec) > 4) + time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) + << 32; + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2; +} + +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ +do { \ + (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(inode)->xtime); \ +} while (0) + +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + (raw_inode)->xtime ## _extra = \ + ext4_encode_extra_time(&(einode)->xtime); \ +} while (0) + +#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \ +do { \ + (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \ + ext4_decode_extra_time(&(inode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \ +do { \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \ + (einode)->xtime.tv_sec = \ + (signed)le32_to_cpu((raw_inode)->xtime); \ + if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \ + ext4_decode_extra_time(&(einode)->xtime, \ + raw_inode->xtime ## _extra); \ +} while (0) + #if defined(__KERNEL__) || defined(__linux__) #define i_reserved1 osd1.linux1.l_i_reserved1 #define i_frag osd2.linux2.l_i_frag @@ -539,6 +614,13 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode) return container_of(inode, struct ext4_inode_info, vfs_inode); } +static inline struct timespec ext4_current_time(struct inode *inode) +{ + return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? + current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; +} + + static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) { return ino == EXT4_ROOT_INO || @@ -609,6 +691,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 @@ -626,6 +709,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) EXT4_FEATURE_INCOMPAT_64BIT) #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_BTREE_DIR) /* diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h index 9de494406995..1a511e9905aa 100644 --- a/include/linux/ext4_fs_i.h +++ b/include/linux/ext4_fs_i.h @@ -153,6 +153,11 @@ struct ext4_inode_info { unsigned long i_ext_generation; struct ext4_ext_cache i_cached_extent; + /* + * File creation time. Its function is same as that of + * struct timespec i_{a,c,m}time in the generic inode. + */ + struct timespec i_crtime; }; #endif /* _LINUX_EXT4_FS_I */ diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h index 0f7dc15924bf..1b2ffee12be9 100644 --- a/include/linux/ext4_fs_sb.h +++ b/include/linux/ext4_fs_sb.h @@ -81,6 +81,7 @@ struct ext4_sb_info { char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ int s_jquota_fmt; /* Format of quota to use */ #endif + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ #ifdef EXTENTS_STATS /* ext4 extents stats */ -- cgit v1.2.3 From 6dd4ee7cab7e3a17c571aebd444f4344c8c4946e Mon Sep 17 00:00:00 2001 From: Kalpak Shah Date: Wed, 18 Jul 2007 09:19:57 -0400 Subject: ext4: Expand extra_inodes space per the s_{want,min}_extra_isize fields We need to make sure that existing ext3 filesystems can also avail the new fields that have been added to the ext4 inode. We use s_want_extra_isize and s_min_extra_isize to decide by how much we should expand the inode. If EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE feature is set then we expand the inode by max(s_want_extra_isize, s_min_extra_isize , sizeof(ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE) bytes. Actually it is still an open question about whether users should be able to set s_*_extra_isize smaller than the known fields or not. This patch also adds the functionality to expand inodes to include the newly added fields. We start by trying to expand by s_want_extra_isize bytes and if its fails we try to expand by s_min_extra_isize bytes. This is done by changing the i_extra_isize if enough space is available in the inode and no EAs are present. If EAs are present and there is enough space in the inode then the EAs in the inode are shifted to make space. If enough space is not available in the inode due to the EAs then 1 or more EAs are shifted to the external EA block. In the worst case when even the external EA block does not have enough space we inform the user that some EA would need to be deleted or s_min_extra_isize would have to be reduced. Signed-off-by: Andreas Dilger Signed-off-by: Kalpak Shah Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 63 ++++++++++- fs/ext4/xattr.c | 274 ++++++++++++++++++++++++++++++++++++++++++++++-- fs/ext4/xattr.h | 17 +++ include/linux/ext4_fs.h | 1 + 4 files changed, 347 insertions(+), 8 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b83f91edebd1..f6d8528c4f55 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3105,6 +3105,39 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, return err; } +/* + * Expand an inode by new_extra_isize bytes. + * Returns 0 on success or negative error number on failure. + */ +int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize, + struct ext4_iloc iloc, handle_t *handle) +{ + struct ext4_inode *raw_inode; + struct ext4_xattr_ibody_header *header; + struct ext4_xattr_entry *entry; + + if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) + return 0; + + raw_inode = ext4_raw_inode(&iloc); + + header = IHDR(inode, raw_inode); + entry = IFIRST(header); + + /* No extended attributes present */ + if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || + header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { + memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, + new_extra_isize); + EXT4_I(inode)->i_extra_isize = new_extra_isize; + return 0; + } + + /* try to expand with EAs present */ + return ext4_expand_extra_isize_ea(inode, new_extra_isize, + raw_inode, handle); +} + /* * What we do here is to mark the in-core inode as clean with respect to inode * dirtiness (it may still be data-dirty). @@ -3129,10 +3162,38 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) { struct ext4_iloc iloc; - int err; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + static unsigned int mnt_count; + int err, ret; might_sleep(); err = ext4_reserve_inode_write(handle, inode, &iloc); + if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && + !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { + /* + * We need extra buffer credits since we may write into EA block + * with this same handle. If journal_extend fails, then it will + * only result in a minor loss of functionality for that inode. + * If this is felt to be critical, then e2fsck should be run to + * force a large enough s_min_extra_isize. + */ + if ((jbd2_journal_extend(handle, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { + ret = ext4_expand_extra_isize(inode, + sbi->s_want_extra_isize, + iloc, handle); + if (ret) { + EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; + if (mnt_count != sbi->s_es->s_mnt_count) { + ext4_warning(inode->i_sb, __FUNCTION__, + "Unable to expand inode %lu. Delete" + " some EAs or run e2fsck.", + inode->i_ino); + mnt_count = sbi->s_es->s_mnt_count; + } + } + } + } if (!err) err = ext4_mark_iloc_dirty(handle, inode, &iloc); return err; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index fe16a569d06b..b10d68fffb55 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -66,13 +66,6 @@ #define BFIRST(bh) ENTRY(BHDR(bh)+1) #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -#define IHDR(inode, raw_inode) \ - ((struct ext4_xattr_ibody_header *) \ - ((void *)raw_inode + \ - EXT4_GOOD_OLD_INODE_SIZE + \ - EXT4_I(inode)->i_extra_isize)) -#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) - #ifdef EXT4_XATTR_DEBUG # define ea_idebug(inode, f...) do { \ printk(KERN_DEBUG "inode %s:%lu: ", \ @@ -508,6 +501,24 @@ out: return; } +/* + * Find the available free space for EAs. This also returns the total number of + * bytes used by EA entries. + */ +static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last, + size_t *min_offs, void *base, int *total) +{ + for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + *total += EXT4_XATTR_LEN(last->e_name_len); + if (!last->e_value_block && last->e_value_size) { + size_t offs = le16_to_cpu(last->e_value_offs); + if (offs < *min_offs) + *min_offs = offs; + } + } + return (*min_offs - ((void *)last - base) - sizeof(__u32)); +} + struct ext4_xattr_info { int name_index; const char *name; @@ -1014,6 +1025,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (!error) { ext4_xattr_update_super_block(handle, inode->i_sb); inode->i_ctime = ext4_current_time(inode); + if (!value) + EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); /* * The bh is consumed by ext4_mark_iloc_dirty, even with @@ -1066,6 +1079,253 @@ retry: return error; } +/* + * Shift the EA entries in the inode to create space for the increased + * i_extra_isize. + */ +static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry, + int value_offs_shift, void *to, + void *from, size_t n, int blocksize) +{ + struct ext4_xattr_entry *last = entry; + int new_offs; + + /* Adjust the value offsets of the entries */ + for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + if (!last->e_value_block && last->e_value_size) { + new_offs = le16_to_cpu(last->e_value_offs) + + value_offs_shift; + BUG_ON(new_offs + le32_to_cpu(last->e_value_size) + > blocksize); + last->e_value_offs = cpu_to_le16(new_offs); + } + } + /* Shift the entries by n bytes */ + memmove(to, from, n); +} + +/* + * Expand an inode by new_extra_isize bytes when EAs are present. + * Returns 0 on success or negative error number on failure. + */ +int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + struct ext4_inode *raw_inode, handle_t *handle) +{ + struct ext4_xattr_ibody_header *header; + struct ext4_xattr_entry *entry, *last, *first; + struct buffer_head *bh = NULL; + struct ext4_xattr_ibody_find *is = NULL; + struct ext4_xattr_block_find *bs = NULL; + char *buffer = NULL, *b_entry_name = NULL; + size_t min_offs, free; + int total_ino, total_blk; + void *base, *start, *end; + int extra_isize = 0, error = 0, tried_min_extra_isize = 0; + int s_min_extra_isize = EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize; + + down_write(&EXT4_I(inode)->xattr_sem); +retry: + if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) { + up_write(&EXT4_I(inode)->xattr_sem); + return 0; + } + + header = IHDR(inode, raw_inode); + entry = IFIRST(header); + + /* + * Check if enough free space is available in the inode to shift the + * entries ahead by new_extra_isize. + */ + + base = start = entry; + end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; + min_offs = end - base; + last = entry; + total_ino = sizeof(struct ext4_xattr_ibody_header); + + free = ext4_xattr_free_space(last, &min_offs, base, &total_ino); + if (free >= new_extra_isize) { + entry = IFIRST(header); + ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize + - new_extra_isize, (void *)raw_inode + + EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, + (void *)header, total_ino, + inode->i_sb->s_blocksize); + EXT4_I(inode)->i_extra_isize = new_extra_isize; + error = 0; + goto cleanup; + } + + /* + * Enough free space isn't available in the inode, check if + * EA block can hold new_extra_isize bytes. + */ + if (EXT4_I(inode)->i_file_acl) { + bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); + error = -EIO; + if (!bh) + goto cleanup; + if (ext4_xattr_check_block(bh)) { + ext4_error(inode->i_sb, __FUNCTION__, + "inode %lu: bad block %llu", inode->i_ino, + EXT4_I(inode)->i_file_acl); + error = -EIO; + goto cleanup; + } + base = BHDR(bh); + first = BFIRST(bh); + end = bh->b_data + bh->b_size; + min_offs = end - base; + free = ext4_xattr_free_space(first, &min_offs, base, + &total_blk); + if (free < new_extra_isize) { + if (!tried_min_extra_isize && s_min_extra_isize) { + tried_min_extra_isize++; + new_extra_isize = s_min_extra_isize; + brelse(bh); + goto retry; + } + error = -1; + goto cleanup; + } + } else { + free = inode->i_sb->s_blocksize; + } + + while (new_extra_isize > 0) { + size_t offs, size, entry_size; + struct ext4_xattr_entry *small_entry = NULL; + struct ext4_xattr_info i = { + .value = NULL, + .value_len = 0, + }; + unsigned int total_size; /* EA entry size + value size */ + unsigned int shift_bytes; /* No. of bytes to shift EAs by? */ + unsigned int min_total_size = ~0U; + + is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); + bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); + if (!is || !bs) { + error = -ENOMEM; + goto cleanup; + } + + is->s.not_found = -ENODATA; + bs->s.not_found = -ENODATA; + is->iloc.bh = NULL; + bs->bh = NULL; + + last = IFIRST(header); + /* Find the entry best suited to be pushed into EA block */ + entry = NULL; + for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + total_size = + EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) + + EXT4_XATTR_LEN(last->e_name_len); + if (total_size <= free && total_size < min_total_size) { + if (total_size < new_extra_isize) { + small_entry = last; + } else { + entry = last; + min_total_size = total_size; + } + } + } + + if (entry == NULL) { + if (small_entry) { + entry = small_entry; + } else { + if (!tried_min_extra_isize && + s_min_extra_isize) { + tried_min_extra_isize++; + new_extra_isize = s_min_extra_isize; + goto retry; + } + error = -1; + goto cleanup; + } + } + offs = le16_to_cpu(entry->e_value_offs); + size = le32_to_cpu(entry->e_value_size); + entry_size = EXT4_XATTR_LEN(entry->e_name_len); + i.name_index = entry->e_name_index, + buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS); + b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); + if (!buffer || !b_entry_name) { + error = -ENOMEM; + goto cleanup; + } + /* Save the entry name and the entry value */ + memcpy(buffer, (void *)IFIRST(header) + offs, + EXT4_XATTR_SIZE(size)); + memcpy(b_entry_name, entry->e_name, entry->e_name_len); + b_entry_name[entry->e_name_len] = '\0'; + i.name = b_entry_name; + + error = ext4_get_inode_loc(inode, &is->iloc); + if (error) + goto cleanup; + + error = ext4_xattr_ibody_find(inode, &i, is); + if (error) + goto cleanup; + + /* Remove the chosen entry from the inode */ + error = ext4_xattr_ibody_set(handle, inode, &i, is); + + entry = IFIRST(header); + if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) + shift_bytes = new_extra_isize; + else + shift_bytes = entry_size + size; + /* Adjust the offsets and shift the remaining entries ahead */ + ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize - + shift_bytes, (void *)raw_inode + + EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes, + (void *)header, total_ino - entry_size, + inode->i_sb->s_blocksize); + + extra_isize += shift_bytes; + new_extra_isize -= shift_bytes; + EXT4_I(inode)->i_extra_isize = extra_isize; + + i.name = b_entry_name; + i.value = buffer; + i.value_len = cpu_to_le32(size); + error = ext4_xattr_block_find(inode, &i, bs); + if (error) + goto cleanup; + + /* Add entry which was removed from the inode into the block */ + error = ext4_xattr_block_set(handle, inode, &i, bs); + if (error) + goto cleanup; + kfree(b_entry_name); + kfree(buffer); + brelse(is->iloc.bh); + kfree(is); + kfree(bs); + } + brelse(bh); + up_write(&EXT4_I(inode)->xattr_sem); + return 0; + +cleanup: + kfree(b_entry_name); + kfree(buffer); + if (is) + brelse(is->iloc.bh); + kfree(is); + kfree(bs); + brelse(bh); + up_write(&EXT4_I(inode)->xattr_sem); + return error; +} + + + /* * ext4_xattr_delete_inode() * diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 79432b35398f..d7f5d6a12651 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -56,6 +56,13 @@ struct ext4_xattr_entry { #define EXT4_XATTR_SIZE(size) \ (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) +#define IHDR(inode, raw_inode) \ + ((struct ext4_xattr_ibody_header *) \ + ((void *)raw_inode + \ + EXT4_GOOD_OLD_INODE_SIZE + \ + EXT4_I(inode)->i_extra_isize)) +#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) + # ifdef CONFIG_EXT4DEV_FS_XATTR extern struct xattr_handler ext4_xattr_user_handler; @@ -74,6 +81,9 @@ extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, extern void ext4_xattr_delete_inode(handle_t *, struct inode *); extern void ext4_xattr_put_super(struct super_block *); +extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + struct ext4_inode *raw_inode, handle_t *handle); + extern int init_ext4_xattr(void); extern void exit_ext4_xattr(void); @@ -129,6 +139,13 @@ exit_ext4_xattr(void) { } +static inline int +ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + struct ext4_inode *raw_inode, handle_t *handle) +{ + return -EOPNOTSUPP; +} + #define ext4_xattr_handlers NULL # endif /* CONFIG_EXT4DEV_FS_XATTR */ diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index df5e38faa15f..52dcc24dd986 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h @@ -202,6 +202,7 @@ struct ext4_group_desc #define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ #define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ +#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ /* Used to pass group descriptor data when online resize is done */ struct ext4_new_group_input { -- cgit v1.2.3 From fc0e15a667121e02686cc52679f6272959fb60cc Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Wed, 18 Jul 2007 09:20:44 -0400 Subject: Use zero_user_page() in ext4 where possible Signed-off-by: Eric Sandeen Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f6d8528c4f55..125aca714685 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1766,7 +1766,6 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, struct inode *inode = mapping->host; struct buffer_head *bh; int err = 0; - void *kaddr; blocksize = inode->i_sb->s_blocksize; length = blocksize - (offset & (blocksize - 1)); @@ -1778,10 +1777,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, */ if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode) && PageUptodate(page)) { - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, length, KM_USER0); set_page_dirty(page); goto unlock; } @@ -1834,10 +1830,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, goto unlock; } - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, length); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + zero_user_page(page, offset, length, KM_USER0); BUFFER_TRACE(bh, "zeroed end of block"); -- cgit v1.2.3 From d699594dc151c664a2b307e680a3cd9b7145fd83 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 18 Jul 2007 08:33:51 -0400 Subject: ext4: remove extra IS_RDONLY() check ext4_change_inode_journal_flag() is only called from one location: ext4_ioctl(EXT3_IOC_SETFLAGS). That ioctl case already has a IS_RDONLY() call in it so this one is superfluous. Signed-off-by: Dave Hansen Cc: Signed-off-by: Andrew Morton Signed-off-by: Dave Kleikamp Signed-off-by: "Theodore Ts'o" --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/inode.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 125aca714685..de26c25d6a18 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3275,7 +3275,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) */ journal = EXT4_JOURNAL(inode); - if (is_journal_aborted(journal) || IS_RDONLY(inode)) + if (is_journal_aborted(journal)) return -EROFS; jbd2_journal_lock_updates(journal); -- cgit v1.2.3