diff options
61 files changed, 7656 insertions, 2552 deletions
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 4340cc825796..c2a0871280a0 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -28,13 +28,9 @@ Manish Singh <manish.singh@oracle.com> Caveats ======= Features which OCFS2 does not support yet: - - extended attributes - quotas - - cluster aware flock - - cluster aware lockf - Directory change notification (F_NOTIFY) - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease) - - POSIX ACLs Mount options ============= @@ -82,3 +78,5 @@ inode64 Indicates that Ocfs2 is allowed to create inodes at bits of significance. user_xattr (*) Enables Extended User Attributes. nouser_xattr Disables Extended User Attributes. +acl Enables POSIX Access Control Lists support. +noacl (*) Disables POSIX Access Control Lists support. diff --git a/fs/Kconfig b/fs/Kconfig index 522469a7eca3..107f1cda2adc 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -189,6 +189,8 @@ config OCFS2_FS select CONFIGFS_FS select JBD2 select CRC32 + select QUOTA + select QUOTA_TREE help OCFS2 is a general purpose extent based shared disk cluster file system with many similarities to ext3. It supports 64 bit inode @@ -258,15 +260,14 @@ config OCFS2_DEBUG_FS this option for debugging only as it is likely to decrease performance of the filesystem. -config OCFS2_COMPAT_JBD - bool "Use JBD for compatibility" +config OCFS2_FS_POSIX_ACL + bool "OCFS2 POSIX Access Control Lists" depends on OCFS2_FS + select FS_POSIX_ACL default n - select JBD help - The ocfs2 filesystem now uses JBD2 for its journalling. JBD2 - is backwards compatible with JBD. It is safe to say N here. - However, if you really want to use the original JBD, say Y here. + Posix Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. endif # BLOCK @@ -340,6 +341,10 @@ config PRINT_QUOTA_WARNING Note that this behavior is currently deprecated and may go away in future. Please use notification via netlink socket instead. +# Generic support for tree structured quota files. Seleted when needed. +config QUOTA_TREE + tristate + config QFMT_V1 tristate "Old quota format support" depends on QUOTA @@ -351,6 +356,7 @@ config QFMT_V1 config QFMT_V2 tristate "Quota format v2 support" depends on QUOTA + select QUOTA_TREE help This quota format allows using quotas with 32-bit UIDs/GIDs. If you need this functionality say Y here. diff --git a/fs/Makefile b/fs/Makefile index d9f8afe6f0c4..cdf655640594 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_GENERIC_ACL) += generic_acl.o obj-$(CONFIG_QUOTA) += dquot.o obj-$(CONFIG_QFMT_V1) += quota_v1.o obj-$(CONFIG_QFMT_V2) += quota_v2.o +obj-$(CONFIG_QUOTA_TREE) += quota_tree.o obj-$(CONFIG_QUOTACTL) += quota.o obj-$(CONFIG_DNOTIFY) += dnotify.o diff --git a/fs/dquot.c b/fs/dquot.c index c237ccc8581c..61bfff64e5af 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -211,8 +211,6 @@ static struct hlist_head *dquot_hash; struct dqstats dqstats; -static void dqput(struct dquot *dquot); - static inline unsigned int hashfn(const struct super_block *sb, unsigned int id, int type) { @@ -415,6 +413,17 @@ out_dqlock: return ret; } +void dquot_destroy(struct dquot *dquot) +{ + kmem_cache_free(dquot_cachep, dquot); +} +EXPORT_SYMBOL(dquot_destroy); + +static inline void do_destroy_dquot(struct dquot *dquot) +{ + dquot->dq_sb->dq_op->destroy_dquot(dquot); +} + /* Invalidate all dquots on the list. Note that this function is called after * quota is disabled and pointers from inodes removed so there cannot be new * quota users. There can still be some users of quotas due to inodes being @@ -463,9 +472,44 @@ restart: remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); - kmem_cache_free(dquot_cachep, dquot); + do_destroy_dquot(dquot); + } + spin_unlock(&dq_list_lock); +} + +/* Call callback for every active dquot on given filesystem */ +int dquot_scan_active(struct super_block *sb, + int (*fn)(struct dquot *dquot, unsigned long priv), + unsigned long priv) +{ + struct dquot *dquot, *old_dquot = NULL; + int ret = 0; + + mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); + spin_lock(&dq_list_lock); + list_for_each_entry(dquot, &inuse_list, dq_inuse) { + if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) + continue; + if (dquot->dq_sb != sb) + continue; + /* Now we have active dquot so we can just increase use count */ + atomic_inc(&dquot->dq_count); + dqstats.lookups++; + spin_unlock(&dq_list_lock); + dqput(old_dquot); + old_dquot = dquot; + ret = fn(dquot, priv); + if (ret < 0) + goto out; + spin_lock(&dq_list_lock); + /* We are safe to continue now because our dquot could not + * be moved out of the inuse list while we hold the reference */ } spin_unlock(&dq_list_lock); +out: + dqput(old_dquot); + mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + return ret; } int vfs_quota_sync(struct super_block *sb, int type) @@ -479,7 +523,7 @@ int vfs_quota_sync(struct super_block *sb, int type) for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; - if (!sb_has_quota_enabled(sb, cnt)) + if (!sb_has_quota_active(sb, cnt)) continue; spin_lock(&dq_list_lock); dirty = &dqopt->info[cnt].dqi_dirty_list; @@ -504,8 +548,8 @@ int vfs_quota_sync(struct super_block *sb, int type) } for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt) - && info_dirty(&dqopt->info[cnt])) + if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt) + && info_dirty(&dqopt->info[cnt])) sb->dq_op->write_info(sb, cnt); spin_lock(&dq_list_lock); dqstats.syncs++; @@ -527,7 +571,7 @@ static void prune_dqcache(int count) remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); - kmem_cache_free(dquot_cachep, dquot); + do_destroy_dquot(dquot); count--; head = free_dquots.prev; } @@ -558,7 +602,7 @@ static struct shrinker dqcache_shrinker = { * NOTE: If you change this function please check whether dqput_blocks() works right... * MUST be called with either dqptr_sem or dqonoff_mutex held */ -static void dqput(struct dquot *dquot) +void dqput(struct dquot *dquot) { int ret; @@ -584,7 +628,7 @@ we_slept: /* We have more than one user... nothing to do */ atomic_dec(&dquot->dq_count); /* Releasing dquot during quotaoff phase? */ - if (!sb_has_quota_enabled(dquot->dq_sb, dquot->dq_type) && + if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_type) && atomic_read(&dquot->dq_count) == 1) wake_up(&dquot->dq_wait_unused); spin_unlock(&dq_list_lock); @@ -625,11 +669,17 @@ we_slept: spin_unlock(&dq_list_lock); } +struct dquot *dquot_alloc(struct super_block *sb, int type) +{ + return kmem_cache_zalloc(dquot_cachep, GFP_NOFS); +} +EXPORT_SYMBOL(dquot_alloc); + static struct dquot *get_empty_dquot(struct super_block *sb, int type) { struct dquot *dquot; - dquot = kmem_cache_zalloc(dquot_cachep, GFP_NOFS); + dquot = sb->dq_op->alloc_dquot(sb, type); if(!dquot) return NODQUOT; @@ -647,15 +697,33 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) } /* + * Check whether dquot is in memory. + * MUST be called with either dqptr_sem or dqonoff_mutex held + */ +int dquot_is_cached(struct super_block *sb, unsigned int id, int type) +{ + unsigned int hashent = hashfn(sb, id, type); + int ret = 0; + + if (!sb_has_quota_active(sb, type)) + return 0; + spin_lock(&dq_list_lock); + if (find_dquot(hashent, sb, id, type) != NODQUOT) + ret = 1; + spin_unlock(&dq_list_lock); + return ret; +} + +/* * Get reference to dquot * MUST be called with either dqptr_sem or dqonoff_mutex held */ -static struct dquot *dqget(struct super_block *sb, unsigned int id, int type) +struct dquot *dqget(struct super_block *sb, unsigned int id, int type) { unsigned int hashent = hashfn(sb, id, type); struct dquot *dquot, *empty = NODQUOT; - if (!sb_has_quota_enabled(sb, type)) + if (!sb_has_quota_active(sb, type)) return NODQUOT; we_slept: spin_lock(&dq_list_lock); @@ -682,7 +750,7 @@ we_slept: dqstats.lookups++; spin_unlock(&dq_list_lock); if (empty) - kmem_cache_free(dquot_cachep, empty); + do_destroy_dquot(empty); } /* Wait for dq_lock - after this we know that either dquot_release() is already * finished or it will be canceled due to dq_count > 1 test */ @@ -820,7 +888,7 @@ static void drop_dquot_ref(struct super_block *sb, int type) } } -static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number) +static inline void dquot_incr_inodes(struct dquot *dquot, qsize_t number) { dquot->dq_dqb.dqb_curinodes += number; } @@ -830,9 +898,10 @@ static inline void dquot_incr_space(struct dquot *dquot, qsize_t number) dquot->dq_dqb.dqb_curspace += number; } -static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number) +static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number) { - if (dquot->dq_dqb.dqb_curinodes > number) + if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE || + dquot->dq_dqb.dqb_curinodes >= number) dquot->dq_dqb.dqb_curinodes -= number; else dquot->dq_dqb.dqb_curinodes = 0; @@ -843,11 +912,12 @@ static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number) static inline void dquot_decr_space(struct dquot *dquot, qsize_t number) { - if (dquot->dq_dqb.dqb_curspace > number) + if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE || + dquot->dq_dqb.dqb_curspace >= number) dquot->dq_dqb.dqb_curspace -= number; else dquot->dq_dqb.dqb_curspace = 0; - if (toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit) + if (dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time_t) 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } @@ -1023,10 +1093,11 @@ static inline char ignore_hardlimit(struct dquot *dquot) } /* needs dq_data_lock */ -static int check_idq(struct dquot *dquot, ulong inodes, char *warntype) +static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) { *warntype = QUOTA_NL_NOWARN; - if (inodes <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags)) + if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) || + test_bit(DQ_FAKE_B, &dquot->dq_flags)) return QUOTA_OK; if (dquot->dq_dqb.dqb_ihardlimit && @@ -1058,11 +1129,12 @@ static int check_idq(struct dquot *dquot, ulong inodes, char *warntype) static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype) { *warntype = QUOTA_NL_NOWARN; - if (space <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags)) + if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) || + test_bit(DQ_FAKE_B, &dquot->dq_flags)) return QUOTA_OK; if (dquot->dq_dqb.dqb_bhardlimit && - toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bhardlimit && + dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bhardlimit && !ignore_hardlimit(dquot)) { if (!prealloc) *warntype = QUOTA_NL_BHARDWARN; @@ -1070,7 +1142,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war } if (dquot->dq_dqb.dqb_bsoftlimit && - toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit && + dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit && dquot->dq_dqb.dqb_btime && get_seconds() >= dquot->dq_dqb.dqb_btime && !ignore_hardlimit(dquot)) { if (!prealloc) @@ -1079,7 +1151,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war } if (dquot->dq_dqb.dqb_bsoftlimit && - toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit && + dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit && dquot->dq_dqb.dqb_btime == 0) { if (!prealloc) { *warntype = QUOTA_NL_BSOFTWARN; @@ -1096,10 +1168,11 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war return QUOTA_OK; } -static int info_idq_free(struct dquot *dquot, ulong inodes) +static int info_idq_free(struct dquot *dquot, qsize_t inodes) { if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || - dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit) + dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit || + !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type)) return QUOTA_NL_NOWARN; if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit) @@ -1113,15 +1186,13 @@ static int info_idq_free(struct dquot *dquot, ulong inodes) static int info_bdq_free(struct dquot *dquot, qsize_t space) { if (test_bit(DQ_FAKE_B, &dquot->dq_flags) || - toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit) + dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_NOWARN; - if (toqb(dquot->dq_dqb.dqb_curspace - space) <= - dquot->dq_dqb.dqb_bsoftlimit) + if (dquot->dq_dqb.dqb_curspace - space <= dquot->dq_dqb.dqb_bsoftlimit) return QUOTA_NL_BSOFTBELOW; - if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit && - toqb(dquot->dq_dqb.dqb_curspace - space) < - dquot->dq_dqb.dqb_bhardlimit) + if (dquot->dq_dqb.dqb_curspace >= dquot->dq_dqb.dqb_bhardlimit && + dquot->dq_dqb.dqb_curspace - space < dquot->dq_dqb.dqb_bhardlimit) return QUOTA_NL_BHARDBELOW; return QUOTA_NL_NOWARN; } @@ -1166,17 +1237,23 @@ out_err: * Release all quotas referenced by inode * Transaction must be started at an entry */ -int dquot_drop(struct inode *inode) +int dquot_drop_locked(struct inode *inode) { int cnt; - down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt] != NODQUOT) { dqput(inode->i_dquot[cnt]); inode->i_dquot[cnt] = NODQUOT; } } + return 0; +} + +int dquot_drop(struct inode *inode) +{ + down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + dquot_drop_locked(inode); up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); return 0; } @@ -1264,7 +1341,7 @@ warn_put_all: /* * This operation can block, but only after everything is updated */ -int dquot_alloc_inode(const struct inode *inode, unsigned long number) +int dquot_alloc_inode(const struct inode *inode, qsize_t number) { int cnt, ret = NO_QUOTA; char warntype[MAXQUOTAS]; @@ -1349,7 +1426,7 @@ out_sub: /* * This operation can block, but only after everything is updated */ -int dquot_free_inode(const struct inode *inode, unsigned long number) +int dquot_free_inode(const struct inode *inode, qsize_t number) { unsigned int cnt; char warntype[MAXQUOTAS]; @@ -1495,7 +1572,7 @@ warn_put_all: /* Wrapper for transferring ownership of an inode */ int vfs_dq_transfer(struct inode *inode, struct iattr *iattr) { - if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) { + if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) { vfs_dq_init(inode); if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA) return 1; @@ -1533,54 +1610,27 @@ struct dquot_operations dquot_operations = { .acquire_dquot = dquot_acquire, .release_dquot = dquot_release, .mark_dirty = dquot_mark_dquot_dirty, - .write_info = dquot_commit_info + .write_info = dquot_commit_info, + .alloc_dquot = dquot_alloc, + .destroy_dquot = dquot_destroy, }; -static inline void set_enable_flags(struct quota_info *dqopt, int type) -{ - switch (type) { - case USRQUOTA: - dqopt->flags |= DQUOT_USR_ENABLED; - dqopt->flags &= ~DQUOT_USR_SUSPENDED; - break; - case GRPQUOTA: - dqopt->flags |= DQUOT_GRP_ENABLED; - dqopt->flags &= ~DQUOT_GRP_SUSPENDED; - break; - } -} - -static inline void reset_enable_flags(struct quota_info *dqopt, int type, - int remount) -{ - switch (type) { - case USRQUOTA: - dqopt->flags &= ~DQUOT_USR_ENABLED; - if (remount) - dqopt->flags |= DQUOT_USR_SUSPENDED; - else - dqopt->flags &= ~DQUOT_USR_SUSPENDED; - break; - case GRPQUOTA: - dqopt->flags &= ~DQUOT_GRP_ENABLED; - if (remount) - dqopt->flags |= DQUOT_GRP_SUSPENDED; - else - dqopt->flags &= ~DQUOT_GRP_SUSPENDED; - break; - } -} - - /* * Turn quota off on a device. type == -1 ==> quotaoff for all types (umount) */ -int vfs_quota_off(struct super_block *sb, int type, int remount) +int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags) { int cnt, ret = 0; struct quota_info *dqopt = sb_dqopt(sb); struct inode *toputinode[MAXQUOTAS]; + /* Cannot turn off usage accounting without turning off limits, or + * suspend quotas and simultaneously turn quotas off. */ + if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED)) + || (flags & DQUOT_SUSPENDED && flags & (DQUOT_LIMITS_ENABLED | + DQUOT_USAGE_ENABLED))) + return -EINVAL; + /* We need to serialize quota_off() for device */ mutex_lock(&dqopt->dqonoff_mutex); @@ -1589,7 +1639,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount) * sometimes we are called when fill_super() failed and calling * sync_fs() in such cases does no good. */ - if (!sb_any_quota_enabled(sb) && !sb_any_quota_suspended(sb)) { + if (!sb_any_quota_loaded(sb)) { mutex_unlock(&dqopt->dqonoff_mutex); return 0; } @@ -1597,17 +1647,28 @@ int vfs_quota_off(struct super_block *sb, int type, int remount) toputinode[cnt] = NULL; if (type != -1 && cnt != type) continue; - /* If we keep inodes of quota files after remount and quotaoff - * is called, drop kept inodes. */ - if (!remount && sb_has_quota_suspended(sb, cnt)) { - iput(dqopt->files[cnt]); - dqopt->files[cnt] = NULL; - reset_enable_flags(dqopt, cnt, 0); + if (!sb_has_quota_loaded(sb, cnt)) continue; + + if (flags & DQUOT_SUSPENDED) { + dqopt->flags |= + dquot_state_flag(DQUOT_SUSPENDED, cnt); + } else { + dqopt->flags &= ~dquot_state_flag(flags, cnt); + /* Turning off suspended quotas? */ + if (!sb_has_quota_loaded(sb, cnt) && + sb_has_quota_suspended(sb, cnt)) { + dqopt->flags &= ~dquot_state_flag( + DQUOT_SUSPENDED, cnt); + iput(dqopt->files[cnt]); + dqopt->files[cnt] = NULL; + continue; + } } - if (!sb_has_quota_enabled(sb, cnt)) + + /* We still have to keep quota loaded? */ + if (sb_has_quota_loaded(sb, cnt) && !(flags & DQUOT_SUSPENDED)) continue; - reset_enable_flags(dqopt, cnt, remount); /* Note: these are blocking operations */ drop_dquot_ref(sb, cnt); @@ -1623,7 +1684,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount) put_quota_format(dqopt->info[cnt].dqi_format); toputinode[cnt] = dqopt->files[cnt]; - if (!remount) + if (!sb_has_quota_loaded(sb, cnt)) dqopt->files[cnt] = NULL; dqopt->info[cnt].dqi_flags = 0; dqopt->info[cnt].dqi_igrace = 0; @@ -1631,6 +1692,11 @@ int vfs_quota_off(struct super_block *sb, int type, int remount) dqopt->ops[cnt] = NULL; } mutex_unlock(&dqopt->dqonoff_mutex); + + /* Skip syncing and setting flags if quota files are hidden */ + if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) + goto put_inodes; + /* Sync the superblock so that buffers with quota data are written to * disk (and so userspace sees correct data afterwards). */ if (sb->s_op->sync_fs) @@ -1646,7 +1712,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount) mutex_lock(&dqopt->dqonoff_mutex); /* If quota was reenabled in the meantime, we have * nothing to do */ - if (!sb_has_quota_enabled(sb, cnt)) { + if (!sb_has_quota_loaded(sb, cnt)) { mutex_lock_nested(&toputinode[cnt]->i_mutex, I_MUTEX_QUOTA); toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | S_NOATIME | S_NOQUOTA); @@ -1655,26 +1721,43 @@ int vfs_quota_off(struct super_block *sb, int type, int remount) mark_inode_dirty(toputinode[cnt]); } mutex_unlock(&dqopt->dqonoff_mutex); + } + if (sb->s_bdev) + invalidate_bdev(sb->s_bdev); +put_inodes: + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (toputinode[cnt]) { /* On remount RO, we keep the inode pointer so that we - * can reenable quota on the subsequent remount RW. - * But we have better not keep inode pointer when there - * is pending delete on the quota file... */ - if (!remount) + * can reenable quota on the subsequent remount RW. We + * have to check 'flags' variable and not use sb_has_ + * function because another quotaon / quotaoff could + * change global state before we got here. We refuse + * to suspend quotas when there is pending delete on + * the quota file... */ + if (!(flags & DQUOT_SUSPENDED)) iput(toputinode[cnt]); else if (!toputinode[cnt]->i_nlink) ret = -EBUSY; } - if (sb->s_bdev) - invalidate_bdev(sb->s_bdev); return ret; } +int vfs_quota_off(struct super_block *sb, int type, int remount) +{ + return vfs_quota_disable(sb, type, remount ? DQUOT_SUSPENDED : + (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED)); +} + /* * Turn quotas on on a device */ -/* Helper function when we already have the inode */ -static int vfs_quota_on_inode(struct inode *inode, int type, int format_id) +/* + * Helper function to turn quotas on when we already have the inode of + * quota file and no quota information is loaded. + */ +static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, + unsigned int flags) { struct quota_format_type *fmt = find_quota_format(format_id); struct super_block *sb = inode->i_sb; @@ -1696,27 +1779,37 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id) error = -EINVAL; goto out_fmt; } + /* Usage always has to be set... */ + if (!(flags & DQUOT_USAGE_ENABLED)) { + error = -EINVAL; + goto out_fmt; + } - /* As we bypass the pagecache we must now flush the inode so that - * we see all the changes from userspace... */ - write_inode_now(inode, 1); - /* And now flush the block cache so that kernel sees the changes */ - invalidate_bdev(sb->s_bdev); + if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { + /* As we bypass the pagecache we must now flush the inode so + * that we see all the changes from userspace... */ + write_inode_now(inode, 1); + /* And now flush the block cache so that kernel sees the + * changes */ + invalidate_bdev(sb->s_bdev); + } mutex_lock(&inode->i_mutex); mutex_lock(&dqopt->dqonoff_mutex); - if (sb_has_quota_enabled(sb, type) || - sb_has_quota_suspended(sb, type)) { + if (sb_has_quota_loaded(sb, type)) { error = -EBUSY; goto out_lock; } - /* We don't want quota and atime on quota files (deadlocks possible) - * Also nobody should write to the file - we use special IO operations - * which ignore the immutable bit. */ - down_write(&dqopt->dqptr_sem); - oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA); - inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; - up_write(&dqopt->dqptr_sem); - sb->dq_op->drop(inode); + + if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { + /* We don't want quota and atime on quota files (deadlocks + * possible) Also nobody should write to the file - we use + * special IO operations which ignore the immutable bit. */ + down_write(&dqopt->dqptr_sem); + oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA); + inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; + up_write(&dqopt->dqptr_sem); + sb->dq_op->drop(inode); + } error = -EIO; dqopt->files[type] = igrab(inode); @@ -1737,7 +1830,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id) } mutex_unlock(&dqopt->dqio_mutex); mutex_unlock(&inode->i_mutex); - set_enable_flags(dqopt, type); + dqopt->flags |= dquot_state_flag(flags, type); add_dquot_ref(sb, type); mutex_unlock(&dqopt->dqonoff_mutex); @@ -1770,20 +1863,23 @@ static int vfs_quota_on_remount(struct super_block *sb, int type) struct quota_info *dqopt = sb_dqopt(sb); struct inode *inode; int ret; + unsigned int flags; mutex_lock(&dqopt->dqonoff_mutex); if (!sb_has_quota_suspended(sb, type)) { mutex_unlock(&dqopt->dqonoff_mutex); return 0; } - BUG_ON(sb_has_quota_enabled(sb, type)); - inode = dqopt->files[type]; dqopt->files[type] = NULL; - reset_enable_flags(dqopt, type, 0); + flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED | + DQUOT_LIMITS_ENABLED, type); + dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, type); mutex_unlock(&dqopt->dqonoff_mutex); - ret = vfs_quota_on_inode(inode, type, dqopt->info[type].dqi_fmt_id); + flags = dquot_generic_flag(flags, type); + ret = vfs_load_quota_inode(inode, type, dqopt->info[type].dqi_fmt_id, + flags); iput(inode); return ret; @@ -1799,12 +1895,12 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id, if (path->mnt->mnt_sb != sb) error = -EXDEV; else - error = vfs_quota_on_inode(path->dentry->d_inode, type, - format_id); + error = vfs_load_quota_inode(path->dentry->d_inode, type, + format_id, DQUOT_USAGE_ENABLED | + DQUOT_LIMITS_ENABLED); return error; } -/* Actual function called from quotactl() */ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name, int remount) { @@ -1823,6 +1919,50 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name, } /* + * More powerful function for turning on quotas allowing setting + * of individual quota flags + */ +int vfs_quota_enable(struct inode *inode, int type, int format_id, + unsigned int flags) +{ + int ret = 0; + struct super_block *sb = inode->i_sb; + struct quota_info *dqopt = sb_dqopt(sb); + + /* Just unsuspend quotas? */ + if (flags & DQUOT_SUSPENDED) + return vfs_quota_on_remount(sb, type); + if (!flags) + return 0; + /* Just updating flags needed? */ + if (sb_has_quota_loaded(sb, type)) { + mutex_lock(&dqopt->dqonoff_mutex); + /* Now do a reliable test... */ + if (!sb_has_quota_loaded(sb, type)) { + mutex_unlock(&dqopt->dqonoff_mutex); + goto load_quota; + } + if (flags & DQUOT_USAGE_ENABLED && + sb_has_quota_usage_enabled(sb, type)) { + ret = -EBUSY; + goto out_lock; + } + if (flags & DQUOT_LIMITS_ENABLED && + sb_has_quota_limits_enabled(sb, type)) { + ret = -EBUSY; + goto out_lock; + } + sb_dqopt(sb)->flags |= dquot_state_flag(flags, type); +out_lock: + mutex_unlock(&dqopt->dqonoff_mutex); + return ret; + } + +load_quota: + return vfs_load_quota_inode(inode, type, format_id, flags); +} + +/* * This function is used when filesystem needs to initialize quotas * during mount time. */ @@ -1843,7 +1983,8 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name, error = security_quota_on(dentry); if (!error) - error = vfs_quota_on_inode(dentry->d_inode, type, format_id); + error = vfs_load_quota_inode(dentry->d_inode, type, format_id, + DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); out: dput(dentry); @@ -1866,14 +2007,24 @@ int vfs_dq_quota_on_remount(struct super_block *sb) return ret; } +static inline qsize_t qbtos(qsize_t blocks) +{ + return blocks << QIF_DQBLKSIZE_BITS; +} + +static inline qsize_t stoqb(qsize_t space) +{ + return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS; +} + /* Generic routine for getting common part of quota structure */ static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) { struct mem_dqblk *dm = &dquot->dq_dqb; spin_lock(&dq_data_lock); - di->dqb_bhardlimit = dm->dqb_bhardlimit; - di->dqb_bsoftlimit = dm->dqb_bsoftlimit; + di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit); + di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit); di->dqb_curspace = dm->dqb_curspace; di->dqb_ihardlimit = dm->dqb_ihardlimit; di->dqb_isoftlimit = dm->dqb_isoftlimit; @@ -1918,28 +2069,36 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di) if (di->dqb_valid & QIF_SPACE) { dm->dqb_curspace = di->dqb_curspace; check_blim = 1; + __set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); } if (di->dqb_valid & QIF_BLIMITS) { - dm->dqb_bsoftlimit = di->dqb_bsoftlimit; - dm->dqb_bhardlimit = di->dqb_bhardlimit; + dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit); + dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit); check_blim = 1; + __set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); } if (di->dqb_valid & QIF_INODES) { dm->dqb_curinodes = di->dqb_curinodes; check_ilim = 1; + __set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); } if (di->dqb_valid & QIF_ILIMITS) { dm->dqb_isoftlimit = di->dqb_isoftlimit; dm->dqb_ihardlimit = di->dqb_ihardlimit; check_ilim = 1; + __set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); } - if (di->dqb_valid & QIF_BTIME) + if (di->dqb_valid & QIF_BTIME) { dm->dqb_btime = di->dqb_btime; - if (di->dqb_valid & QIF_ITIME) + __set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); + } + if (di->dqb_valid & QIF_ITIME) { dm->dqb_itime = di->dqb_itime; + __set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); + } if (check_blim) { - if (!dm->dqb_bsoftlimit || toqb(dm->dqb_curspace) < dm->dqb_bsoftlimit) { + if (!dm->dqb_bsoftlimit || dm->dqb_curspace < dm->dqb_bsoftlimit) { dm->dqb_btime = 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } @@ -1970,12 +2129,14 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d int rc; mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - if (!(dquot = dqget(sb, id, type))) { - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); - return -ESRCH; + dquot = dqget(sb, id, type); + if (!dquot) { + rc = -ESRCH; + goto out; } rc = do_set_dqblk(dquot, di); dqput(dquot); +out: mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return rc; } @@ -1986,7 +2147,7 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) struct mem_dqinfo *mi; mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - if (!sb_has_quota_enabled(sb, type)) { + if (!sb_has_quota_active(sb, type)) { mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); return -ESRCH; } @@ -2005,11 +2166,12 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) { struct mem_dqinfo *mi; + int err = 0; mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); - if (!sb_has_quota_enabled(sb, type)) { - mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); - return -ESRCH; + if (!sb_has_quota_active(sb, type)) { + err = -ESRCH; + goto out; } mi = sb_dqopt(sb)->info + type; spin_lock(&dq_data_lock); @@ -2023,8 +2185,9 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) mark_info_dirty(sb, type); /* Force write to disk */ sb->dq_op->write_info(sb, type); +out: mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); - return 0; + return err; } struct quotactl_ops vfs_quotactl_ops = { @@ -2186,10 +2349,13 @@ EXPORT_SYMBOL(register_quota_format); EXPORT_SYMBOL(unregister_quota_format); EXPORT_SYMBOL(dqstats); EXPORT_SYMBOL(dq_data_lock); +EXPORT_SYMBOL(vfs_quota_enable); EXPORT_SYMBOL(vfs_quota_on); EXPORT_SYMBOL(vfs_quota_on_path); EXPORT_SYMBOL(vfs_quota_on_mount); +EXPORT_SYMBOL(vfs_quota_disable); EXPORT_SYMBOL(vfs_quota_off); +EXPORT_SYMBOL(dquot_scan_active); EXPORT_SYMBOL(vfs_quota_sync); EXPORT_SYMBOL(vfs_get_dqinfo); EXPORT_SYMBOL(vfs_set_dqinfo); @@ -2202,7 +2368,11 @@ EXPORT_SYMBOL(dquot_release); EXPORT_SYMBOL(dquot_mark_dquot_dirty); EXPORT_SYMBOL(dquot_initialize); EXPORT_SYMBOL(dquot_drop); +EXPORT_SYMBOL(dquot_drop_locked); EXPORT_SYMBOL(vfs_dq_drop); +EXPORT_SYMBOL(dqget); +EXPORT_SYMBOL(dqput); +EXPORT_SYMBOL(dquot_is_cached); EXPORT_SYMBOL(dquot_alloc_space); EXPORT_SYMBOL(dquot_alloc_inode); EXPORT_SYMBOL(dquot_free_space); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index c725b1a63a89..f4fb51811c59 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -721,7 +721,9 @@ static struct dquot_operations ext3_quota_operations = { .acquire_dquot = ext3_acquire_dquot, .release_dquot = ext3_release_dquot, .mark_dirty = ext3_mark_dquot_dirty, - .write_info = ext3_write_info + .write_info = ext3_write_info, + .alloc_dquot = dquot_alloc, + .destroy_dquot = dquot_destroy, }; static struct quotactl_ops ext3_qctl_operations = { @@ -1043,8 +1045,7 @@ static int parse_options (char *options, struct super_block *sb, case Opt_grpjquota: qtype = GRPQUOTA; set_qf_name: - if ((sb_any_quota_enabled(sb) || - sb_any_quota_suspended(sb)) && + if (sb_any_quota_loaded(sb) && !sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT3-fs: Cannot change journaled " @@ -1083,8 +1084,7 @@ set_qf_name: case Opt_offgrpjquota: qtype = GRPQUOTA; clear_qf_name: - if ((sb_any_quota_enabled(sb) || - sb_any_quota_suspended(sb)) && + if (sb_any_quota_loaded(sb) && sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT3-fs: Cannot change " "journaled quota options when " @@ -1103,8 +1103,7 @@ clear_qf_name: case Opt_jqfmt_vfsv0: qfmt = QFMT_VFS_V0; set_qf_format: - if ((sb_any_quota_enabled(sb) || - sb_any_quota_suspended(sb)) && + if (sb_any_quota_loaded(sb) && sbi->s_jquota_fmt != qfmt) { printk(KERN_ERR "EXT3-fs: Cannot change " "journaled quota options when " @@ -1123,8 +1122,7 @@ set_qf_format: set_opt(sbi->s_mount_opt, GRPQUOTA); break; case Opt_noquota: - if (sb_any_quota_enabled(sb) || - sb_any_quota_suspended(sb)) { + if (sb_any_quota_loaded(sb)) { printk(KERN_ERR "EXT3-fs: Cannot change quota " "options when quota turned on.\n"); return 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 59585d1966de..2570fa91864c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -811,7 +811,9 @@ static struct dquot_operations ext4_quota_operations = { .acquire_dquot = ext4_acquire_dquot, .release_dquot = ext4_release_dquot, .mark_dirty = ext4_mark_dquot_dirty, - .write_info = ext4_write_info + .write_info = ext4_write_info, + .alloc_dquot = dquot_alloc, + .destroy_dquot = dquot_destroy, }; static struct quotactl_ops ext4_qctl_operations = { @@ -1150,8 +1152,7 @@ static int parse_options(char *options, struct super_block *sb, case Opt_grpjquota: qtype = GRPQUOTA; set_qf_name: - if ((sb_any_quota_enabled(sb) || - sb_any_quota_suspended(sb)) && + if (sb_any_quota_loaded(sb) && !sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT4-fs: Cannot change journaled " @@ -1190,8 +1191,7 @@ set_qf_name: case Opt_offgrpjquota: qtype = GRPQUOTA; clear_qf_name: - if ((sb_any_quota_enabled(sb) || - sb_any_quota_suspended(sb)) && + if (sb_any_quota_loaded(sb) && sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT4-fs: Cannot change " "journaled quota options when " @@ -1210,8 +1210,7 @@ clear_qf_name: case Opt_jqfmt_vfsv0: qfmt = QFMT_VFS_V0; set_qf_format: - if ((sb_any_quota_enabled(sb) || - sb_any_quota_suspended(sb)) && + if (sb_any_quota_loaded(sb) && sbi->s_jquota_fmt != qfmt) { printk(KERN_ERR "EXT4-fs: Cannot change " "journaled quota options when " @@ -1230,7 +1229,7 @@ set_qf_format: set_opt(sbi->s_mount_opt, GRPQUOTA); break; case Opt_noquota: - if (sb_any_quota_enabled(sb)) { + if (sb_any_quota_loaded(sb)) { printk(KERN_ERR "EXT4-fs: Cannot change quota " "options when quota turned on.\n"); return 0; diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 589dcdfdfe3c..7e4b361b755c 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -35,8 +35,14 @@ ocfs2-objs := \ sysfile.o \ uptodate.o \ ver.o \ + quota_local.o \ + quota_global.o \ xattr.o +ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y) +ocfs2-objs += acl.o +endif + ocfs2_stackglue-objs := stackglue.o ocfs2_stack_o2cb-objs := stack_o2cb.o ocfs2_stack_user-objs := stack_user.o diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c new file mode 100644 index 000000000000..12dfb44c22e5 --- /dev/null +++ b/fs/ocfs2/acl.c @@ -0,0 +1,479 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * acl.c + * + * Copyright (C) 2004, 2008 Oracle. All rights reserved. + * + * CREDITS: + * Lots of code in this file is copy from linux/fs/ext3/acl.c. + * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/string.h> + +#define MLOG_MASK_PREFIX ML_INODE +#include <cluster/masklog.h> + +#include "ocfs2.h" +#include "alloc.h" +#include "dlmglue.h" +#include "file.h" +#include "ocfs2_fs.h" + +#include "xattr.h" +#include "acl.h" + +/* + * Convert from xattr value to acl struct. + */ +static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size) +{ + int n, count; + struct posix_acl *acl; + + if (!value) + return NULL; + if (size < sizeof(struct posix_acl_entry)) + return ERR_PTR(-EINVAL); + + count = size / sizeof(struct posix_acl_entry); + if (count < 0) + return ERR_PTR(-EINVAL); + if (count == 0) + return NULL; + + acl = posix_acl_alloc(count, GFP_NOFS); + if (!acl) + return ERR_PTR(-ENOMEM); + for (n = 0; n < count; n++) { + struct ocfs2_acl_entry *entry = + (struct ocfs2_acl_entry *)value; + + acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); + acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); + acl->a_entries[n].e_id = le32_to_cpu(entry->e_id); + value += sizeof(struct posix_acl_entry); + + } + return acl; +} + +/* + * Convert acl struct to xattr value. + */ +static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size) +{ + struct ocfs2_acl_entry *entry = NULL; + char *ocfs2_acl; + size_t n; + + *size = acl->a_count * sizeof(struct posix_acl_entry); + + ocfs2_acl = kmalloc(*size, GFP_NOFS); + if (!ocfs2_acl) + return ERR_PTR(-ENOMEM); + + entry = (struct ocfs2_acl_entry *)ocfs2_acl; + for (n = 0; n < acl->a_count; n++, entry++) { + entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); + entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); + entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); + } + return ocfs2_acl; +} + +static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode, + int type, + struct buffer_head *di_bh) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int name_index; + char *value = NULL; + struct posix_acl *acl; + int retval; + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return NULL; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + default: + return ERR_PTR(-EINVAL); + } + + retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, "", NULL, 0); + if (retval > 0) { + value = kmalloc(retval, GFP_NOFS); + if (!value) + return ERR_PTR(-ENOMEM); + retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, + "", value, retval); + } + + if (retval > 0) + acl = ocfs2_acl_from_xattr(value, retval); + else if (retval == -ENODATA || retval == 0) + acl = NULL; + else + acl = ERR_PTR(retval); + + kfree(value); + + return acl; +} + + +/* + * Get posix acl. + */ +static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct buffer_head *di_bh = NULL; + struct posix_acl *acl; + int ret; + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return NULL; + + ret = ocfs2_inode_lock(inode, &di_bh, 0); + if (ret < 0) { + mlog_errno(ret); + acl = ERR_PTR(ret); + return acl; + } + + acl = ocfs2_get_acl_nolock(inode, type, di_bh); + + ocfs2_inode_unlock(inode, 0); + + brelse(di_bh); + + return acl; +} + +/* + * Set the access or default ACL of an inode. + */ +static int ocfs2_set_acl(handle_t *handle, + struct inode *inode, + struct buffer_head *di_bh, + int type, + struct posix_acl *acl, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac) +{ + int name_index; + void *value = NULL; + size_t size = 0; + int ret; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; + if (acl) { + mode_t mode = inode->i_mode; + ret = posix_acl_equiv_mode(acl, &mode); + if (ret < 0) + return ret; + else { + inode->i_mode = mode; + if (ret == 0) + acl = NULL; + } + } + break; + case ACL_TYPE_DEFAULT: + name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + default: + return -EINVAL; + } + + if (acl) { + value = ocfs2_acl_to_xattr(acl, &size); + if (IS_ERR(value)) + return (int)PTR_ERR(value); + } + + if (handle) + ret = ocfs2_xattr_set_handle(handle, inode, di_bh, name_index, + "", value, size, 0, + meta_ac, data_ac); + else + ret = ocfs2_xattr_set(inode, name_index, "", value, size, 0); + + kfree(value); + + return ret; +} + +int ocfs2_check_acl(struct inode *inode, int mask) +{ + struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS); + + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + int ret = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return ret; + } + + return -EAGAIN; +} + +int ocfs2_acl_chmod(struct inode *inode) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct posix_acl *acl, *clone; + int ret; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return 0; + + acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + ret = posix_acl_chmod_masq(clone, inode->i_mode); + if (!ret) + ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS, + clone, NULL, NULL); + posix_acl_release(clone); + return ret; +} + +/* + * Initialize the ACLs of a new inode. If parent directory has default ACL, + * then clone to new inode. Called from ocfs2_mknod. + */ +int ocfs2_init_acl(handle_t *handle, + struct inode *inode, + struct inode *dir, + struct buffer_head *di_bh, + struct buffer_head *dir_bh, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct posix_acl *acl = NULL; + int ret = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { + acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT, + dir_bh); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl) + inode->i_mode &= ~current->fs->umask; + } + if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { + struct posix_acl *clone; + mode_t mode; + + if (S_ISDIR(inode->i_mode)) { + ret = ocfs2_set_acl(handle, inode, di_bh, + ACL_TYPE_DEFAULT, acl, + meta_ac, data_ac); + if (ret) + goto cleanup; + } + clone = posix_acl_clone(acl, GFP_NOFS); + ret = -ENOMEM; + if (!clone) + goto cleanup; + + mode = inode->i_mode; + ret = posix_acl_create_masq(clone, &mode); + if (ret >= 0) { + inode->i_mode = mode; + if (ret > 0) { + ret = ocfs2_set_acl(handle, inode, + di_bh, ACL_TYPE_ACCESS, + clone, meta_ac, data_ac); + } + } + posix_acl_release(clone); + } +cleanup: + posix_acl_release(acl); + return ret; +} + +static size_t ocfs2_xattr_list_acl_access(struct inode *inode, + char *list, + size_t list_len, + const char *name, + size_t name_len) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return 0; + + if (list && size <= list_len) + memcpy(list, POSIX_ACL_XATTR_ACCESS, size); + return size; +} + +static size_t ocfs2_xattr_list_acl_default(struct inode *inode, + char *list, + size_t list_len, + const char *name, + size_t name_len) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return 0; + + if (list && size <= list_len) + memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); + return size; +} + +static int ocfs2_xattr_get_acl(struct inode *inode, + int type, + void *buffer, + size_t size) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct posix_acl *acl; + int ret; + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return -EOPNOTSUPP; + + acl = ocfs2_get_acl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + ret = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return ret; +} + +static int ocfs2_xattr_get_acl_access(struct inode *inode, + const char *name, + void *buffer, + size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); +} + +static int ocfs2_xattr_get_acl_default(struct inode *inode, + const char *name, + void *buffer, + size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); +} + +static int ocfs2_xattr_set_acl(struct inode *inode, + int type, + const void *value, + size_t size) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct posix_acl *acl; + int ret = 0; + + if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) + return -EOPNOTSUPP; + + if (!is_owner_or_cap(inode)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + ret = posix_acl_valid(acl); + if (ret) + goto cleanup; + } + } else + acl = NULL; + + ret = ocfs2_set_acl(NULL, inode, NULL, type, acl, NULL, NULL); + +cleanup: + posix_acl_release(acl); + return ret; +} + +static int ocfs2_xattr_set_acl_access(struct inode *inode, + const char *name, + const void *value, + size_t size, + int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); +} + +static int ocfs2_xattr_set_acl_default(struct inode *inode, + const char *name, + const void *value, + size_t size, + int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); +} + +struct xattr_handler ocfs2_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .list = ocfs2_xattr_list_acl_access, + .get = ocfs2_xattr_get_acl_access, + .set = ocfs2_xattr_set_acl_access, +}; + +struct xattr_handler ocfs2_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .list = ocfs2_xattr_list_acl_default, + .get = ocfs2_xattr_get_acl_default, + .set = ocfs2_xattr_set_acl_default, +}; diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h new file mode 100644 index 000000000000..8f6389ed4da5 --- /dev/null +++ b/fs/ocfs2/acl.h @@ -0,0 +1,58 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * acl.h + * + * Copyright (C) 2004, 2008 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef OCFS2_ACL_H +#define OCFS2_ACL_H + +#include <linux/posix_acl_xattr.h> + +struct ocfs2_acl_entry { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +}; + +#ifdef CONFIG_OCFS2_FS_POSIX_ACL + +extern int ocfs2_check_acl(struct inode *, int); +extern int ocfs2_acl_chmod(struct inode *); +extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, + struct buffer_head *, struct buffer_head *, + struct ocfs2_alloc_context *, + struct ocfs2_alloc_context *); + +#else /* CONFIG_OCFS2_FS_POSIX_ACL*/ + +#define ocfs2_check_acl NULL +static inline int ocfs2_acl_chmod(struct inode *inode) +{ + return 0; +} +static inline int ocfs2_init_acl(handle_t *handle, + struct inode *inode, + struct inode *dir, + struct buffer_head *di_bh, + struct buffer_head *dir_bh, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac) +{ + return 0; +} + +#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/ + +#endif /* OCFS2_ACL_H */ diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 0cc2deb9394c..84a7bd4db5da 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -28,6 +28,7 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/swap.h> +#include <linux/quotaops.h> #define MLOG_MASK_PREFIX ML_DISK_ALLOC #include <cluster/masklog.h> @@ -187,20 +188,12 @@ static int ocfs2_dinode_insert_check(struct inode *inode, static int ocfs2_dinode_sanity_check(struct inode *inode, struct ocfs2_extent_tree *et) { - int ret = 0; - struct ocfs2_dinode *di; + struct ocfs2_dinode *di = et->et_object; BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); + BUG_ON(!OCFS2_IS_VALID_DINODE(di)); - di = et->et_object; - if (!OCFS2_IS_VALID_DINODE(di)) { - ret = -EIO; - ocfs2_error(inode->i_sb, - "Inode %llu has invalid path root", - (unsigned long long)OCFS2_I(inode)->ip_blkno); - } - - return ret; + return 0; } static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et) @@ -686,6 +679,61 @@ struct ocfs2_merge_ctxt { int c_split_covers_rec; }; +static int ocfs2_validate_extent_block(struct super_block *sb, + struct buffer_head *bh) +{ + struct ocfs2_extent_block *eb = + (struct ocfs2_extent_block *)bh->b_data; + + mlog(0, "Validating extent block %llu\n", + (unsigned long long)bh->b_blocknr); + + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { + ocfs2_error(sb, + "Extent block #%llu has bad signature %.*s", + (unsigned long long)bh->b_blocknr, 7, + eb->h_signature); + return -EINVAL; + } + + if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) { + ocfs2_error(sb, + "Extent block #%llu has an invalid h_blkno " + "of %llu", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(eb->h_blkno)); + return -EINVAL; + } + + if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) { + ocfs2_error(sb, + "Extent block #%llu has an invalid " + "h_fs_generation of #%u", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(eb->h_fs_generation)); + return -EINVAL; + } + + return 0; +} + +int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, + struct buffer_head **bh) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_block(inode, eb_blkno, &tmp, + ocfs2_validate_extent_block); + + /* If ocfs2_read_block() got us a new bh, pass it up. */ + if (!rc && !*bh) + *bh = tmp; + + return rc; +} + + /* * How many free extents have we got before we need more meta data? */ @@ -705,8 +753,7 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb, last_eb_blk = ocfs2_et_get_last_eb_blk(et); if (last_eb_blk) { - retval = ocfs2_read_block(inode, last_eb_blk, - &eb_bh); + retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh); if (retval < 0) { mlog_errno(retval); goto bail; @@ -908,11 +955,8 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, for(i = 0; i < new_blocks; i++) { bh = new_eb_bhs[i]; eb = (struct ocfs2_extent_block *) bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - status = -EIO; - goto bail; - } + /* ocfs2_create_new_meta_bhs() should create it right! */ + BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); eb_el = &eb->h_list; status = ocfs2_journal_access(handle, inode, bh, @@ -1052,11 +1096,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, } eb = (struct ocfs2_extent_block *) new_eb_bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - status = -EIO; - goto bail; - } + /* ocfs2_create_new_meta_bhs() should create it right! */ + BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); eb_el = &eb->h_list; root_el = et->et_root_el; @@ -1176,18 +1217,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, brelse(bh); bh = NULL; - status = ocfs2_read_block(inode, blkno, &bh); + status = ocfs2_read_extent_block(inode, blkno, &bh); if (status < 0) { mlog_errno(status); goto bail; } eb = (struct ocfs2_extent_block *) bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - status = -EIO; - goto bail; - } el = &eb->h_list; if (le16_to_cpu(el->l_next_free_rec) < @@ -1540,7 +1576,7 @@ static int __ocfs2_find_path(struct inode *inode, brelse(bh); bh = NULL; - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_extent_block(inode, blkno, &bh); if (ret) { mlog_errno(ret); goto out; @@ -1548,11 +1584,6 @@ static int __ocfs2_find_path(struct inode *inode, eb = (struct ocfs2_extent_block *) bh->b_data; el = &eb->h_list; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - ret = -EIO; - goto out; - } if (le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)) { @@ -4097,8 +4128,15 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, le16_to_cpu(new_el->l_count)) { bh = path_leaf_bh(left_path); eb = (struct ocfs2_extent_block *)bh->b_data; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, - eb); + ocfs2_error(inode->i_sb, + "Extent block #%llu has an " + "invalid l_next_free_rec of " + "%d. It should have " + "matched the l_count of %d", + (unsigned long long)le64_to_cpu(eb->h_blkno), + le16_to_cpu(new_el->l_next_free_rec), + le16_to_cpu(new_el->l_count)); + status = -EINVAL; goto out; } rec = &new_el->l_recs[ @@ -4147,8 +4185,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { bh = path_leaf_bh(right_path); eb = (struct ocfs2_extent_block *)bh->b_data; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, - eb); + ocfs2_error(inode->i_sb, + "Extent block #%llu has an " + "invalid l_next_free_rec of %d", + (unsigned long long)le64_to_cpu(eb->h_blkno), + le16_to_cpu(new_el->l_next_free_rec)); + status = -EINVAL; goto out; } rec = &new_el->l_recs[1]; @@ -4294,7 +4336,9 @@ static int ocfs2_figure_insert_type(struct inode *inode, * ocfs2_figure_insert_type() and ocfs2_add_branch() * may want it later. */ - ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh); + ret = ocfs2_read_extent_block(inode, + ocfs2_et_get_last_eb_blk(et), + &bh); if (ret) { mlog_exit(ret); goto out; @@ -4760,20 +4804,15 @@ static int __ocfs2_mark_extent_written(struct inode *inode, if (path->p_tree_depth) { struct ocfs2_extent_block *eb; - ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), - &last_eb_bh); + ret = ocfs2_read_extent_block(inode, + ocfs2_et_get_last_eb_blk(et), + &last_eb_bh); if (ret) { mlog_exit(ret); goto out; } eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - ret = -EROFS; - goto out; - } - rightmost_el = &eb->h_list; } else rightmost_el = path_root_el(path); @@ -4918,8 +4957,9 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, depth = path->p_tree_depth; if (depth > 0) { - ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), - &last_eb_bh); + ret = ocfs2_read_extent_block(inode, + ocfs2_et_get_last_eb_blk(et), + &last_eb_bh); if (ret < 0) { mlog_errno(ret); goto out; @@ -5255,6 +5295,78 @@ out: return ret; } +int ocfs2_remove_btree_range(struct inode *inode, + struct ocfs2_extent_tree *et, + u32 cpos, u32 phys_cpos, u32 len, + struct ocfs2_cached_dealloc_ctxt *dealloc) +{ + int ret; + u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct inode *tl_inode = osb->osb_tl_inode; + handle_t *handle; + struct ocfs2_alloc_context *meta_ac = NULL; + + ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac); + if (ret) { + mlog_errno(ret); + return ret; + } + + mutex_lock(&tl_inode->i_mutex); + + if (ocfs2_truncate_log_needs_flush(osb)) { + ret = __ocfs2_flush_truncate_log(osb); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + } + + handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, et->et_root_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac, + dealloc); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + ocfs2_et_update_clusters(inode, et, -len); + + ret = ocfs2_journal_dirty(handle, et->et_root_bh); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); + if (ret) + mlog_errno(ret); + +out_commit: + ocfs2_commit_trans(osb, handle); +out: + mutex_unlock(&tl_inode->i_mutex); + + if (meta_ac) + ocfs2_free_alloc_context(meta_ac); + + return ret; +} + int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) { struct buffer_head *tl_bh = osb->osb_tl_bh; @@ -5308,13 +5420,13 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb, start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk); di = (struct ocfs2_dinode *) tl_bh->b_data; - tl = &di->id2.i_dealloc; - if (!OCFS2_IS_VALID_DINODE(di)) { - OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); - status = -EIO; - goto bail; - } + /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated + * by the underlying call to ocfs2_read_inode_block(), so any + * corruption is a code bug */ + BUG_ON(!OCFS2_IS_VALID_DINODE(di)); + + tl = &di->id2.i_dealloc; tl_count = le16_to_cpu(tl->tl_count); mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || tl_count == 0, @@ -5464,13 +5576,13 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) BUG_ON(mutex_trylock(&tl_inode->i_mutex)); di = (struct ocfs2_dinode *) tl_bh->b_data; - tl = &di->id2.i_dealloc; - if (!OCFS2_IS_VALID_DINODE(di)) { - OCFS2_RO_ON_INVALID_DINODE(osb->sb, di); - status = -EIO; - goto out; - } + /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated + * by the underlying call to ocfs2_read_inode_block(), so any + * corruption is a code bug */ + BUG_ON(!OCFS2_IS_VALID_DINODE(di)); + + tl = &di->id2.i_dealloc; num_to_flush = le16_to_cpu(tl->tl_used); mlog(0, "Flush %u records from truncate log #%llu\n", num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno); @@ -5586,7 +5698,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, goto bail; } - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); + status = ocfs2_read_inode_block(inode, &bh); if (status < 0) { iput(inode); mlog_errno(status); @@ -5625,13 +5737,13 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, } di = (struct ocfs2_dinode *) tl_bh->b_data; - tl = &di->id2.i_dealloc; - if (!OCFS2_IS_VALID_DINODE(di)) { - OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di); - status = -EIO; - goto bail; - } + /* tl_bh is loaded from ocfs2_get_truncate_log_info(). It's + * validated by the underlying call to ocfs2_read_inode_block(), + * so any corruption is a code bug */ + BUG_ON(!OCFS2_IS_VALID_DINODE(di)); + + tl = &di->id2.i_dealloc; if (le16_to_cpu(tl->tl_used)) { mlog(0, "We'll have %u logs to recover\n", le16_to_cpu(tl->tl_used)); @@ -5800,7 +5912,10 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb) */ /* - * Describes a single block free from a suballocator + * Describe a single bit freed from a suballocator. For the block + * suballocators, it represents one block. For the global cluster + * allocator, it represents some clusters and free_bit indicates + * clusters number. */ struct ocfs2_cached_block_free { struct ocfs2_cached_block_free *free_next; @@ -5815,10 +5930,10 @@ struct ocfs2_per_slot_free_list { struct ocfs2_cached_block_free *f_first; }; -static int ocfs2_free_cached_items(struct ocfs2_super *osb, - int sysfile_type, - int slot, - struct ocfs2_cached_block_free *head) +static int ocfs2_free_cached_blocks(struct ocfs2_super *osb, + int sysfile_type, + int slot, + struct ocfs2_cached_block_free *head) { int ret; u64 bg_blkno; @@ -5893,6 +6008,82 @@ out: return ret; } +int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, + u64 blkno, unsigned int bit) +{ + int ret = 0; + struct ocfs2_cached_block_free *item; + + item = kmalloc(sizeof(*item), GFP_NOFS); + if (item == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + return ret; + } + + mlog(0, "Insert clusters: (bit %u, blk %llu)\n", + bit, (unsigned long long)blkno); + + item->free_blk = blkno; + item->free_bit = bit; + item->free_next = ctxt->c_global_allocator; + + ctxt->c_global_allocator = item; + return ret; +} + +static int ocfs2_free_cached_clusters(struct ocfs2_super *osb, + struct ocfs2_cached_block_free *head) +{ + struct ocfs2_cached_block_free *tmp; + struct inode *tl_inode = osb->osb_tl_inode; + handle_t *handle; + int ret = 0; + + mutex_lock(&tl_inode->i_mutex); + + while (head) { + if (ocfs2_truncate_log_needs_flush(osb)) { + ret = __ocfs2_flush_truncate_log(osb); + if (ret < 0) { + mlog_errno(ret); + break; + } + } + + handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + break; + } + + ret = ocfs2_truncate_log_append(osb, handle, head->free_blk, + head->free_bit); + + ocfs2_commit_trans(osb, handle); + tmp = head; + head = head->free_next; + kfree(tmp); + + if (ret < 0) { + mlog_errno(ret); + break; + } + } + + mutex_unlock(&tl_inode->i_mutex); + + while (head) { + /* Premature exit may have left some dangling items. */ + tmp = head; + head = head->free_next; + kfree(tmp); + } + + return ret; +} + int ocfs2_run_deallocs(struct ocfs2_super *osb, struct ocfs2_cached_dealloc_ctxt *ctxt) { @@ -5908,8 +6099,10 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb, if (fl->f_first) { mlog(0, "Free items: (type %u, slot %d)\n", fl->f_inode_type, fl->f_slot); - ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type, - fl->f_slot, fl->f_first); + ret2 = ocfs2_free_cached_blocks(osb, + fl->f_inode_type, + fl->f_slot, + fl->f_first); if (ret2) mlog_errno(ret2); if (!ret) @@ -5920,6 +6113,17 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb, kfree(fl); } + if (ctxt->c_global_allocator) { + ret2 = ocfs2_free_cached_clusters(osb, + ctxt->c_global_allocator); + if (ret2) + mlog_errno(ret2); + if (!ret) + ret = ret2; + + ctxt->c_global_allocator = NULL; + } + return ret; } @@ -6075,11 +6279,10 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode, eb = (struct ocfs2_extent_block *) bh->b_data; el = &eb->h_list; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - ret = -EROFS; - goto out; - } + + /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block(). + * Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); *new_last_eb = bh; get_bh(*new_last_eb); @@ -6350,6 +6553,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, goto bail; } + vfs_dq_free_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_del)); spin_lock(&OCFS2_I(inode)->ip_lock); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) - clusters_to_del; @@ -6436,11 +6641,6 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, mlog_errno(ret); else if (ocfs2_should_order_data(inode)) { ret = ocfs2_jbd2_file_inode(handle, inode); -#ifdef CONFIG_OCFS2_COMPAT_JBD - ret = walk_page_buffers(handle, page_buffers(page), - from, to, &partial, - ocfs2_journal_dirty_data); -#endif if (ret < 0) mlog_errno(ret); } @@ -6663,6 +6863,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, struct page **pages = NULL; loff_t end = osb->s_clustersize; struct ocfs2_extent_tree et; + int did_quota = 0; has_data = i_size_read(inode) ? 1 : 0; @@ -6682,7 +6883,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, } } - handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS); + handle = ocfs2_start_trans(osb, + ocfs2_inline_to_extents_credits(osb->sb)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); @@ -6701,6 +6903,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, unsigned int page_end; u64 phys; + if (vfs_dq_alloc_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, 1))) { + ret = -EDQUOT; + goto out_commit; + } + did_quota = 1; + ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &num); if (ret) { @@ -6774,6 +6983,10 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, } out_commit: + if (ret < 0 && did_quota) + vfs_dq_free_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, 1)); + ocfs2_commit_trans(osb, handle); out_unlock: @@ -6984,20 +7197,14 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); if (fe->id2.i_list.l_tree_depth) { - status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk), - &last_eb_bh); + status = ocfs2_read_extent_block(inode, + le64_to_cpu(fe->i_last_eb_blk), + &last_eb_bh); if (status < 0) { mlog_errno(status); goto bail; } eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - - brelse(last_eb_bh); - status = -EIO; - goto bail; - } } (*tc)->tc_last_eb_bh = last_eb_bh; diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 70257c84cfbe..59d37d1b7d4c 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -73,6 +73,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, struct buffer_head *bh, struct ocfs2_xattr_value_root *xv); +/* + * Read an extent block into *bh. If *bh is NULL, a bh will be + * allocated. This is a cached read. The extent block will be validated + * with ocfs2_validate_extent_block(). + */ +int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, + struct buffer_head **bh); + struct ocfs2_alloc_context; int ocfs2_insert_extent(struct ocfs2_super *osb, handle_t *handle, @@ -110,6 +118,11 @@ int ocfs2_remove_extent(struct inode *inode, u32 cpos, u32 len, handle_t *handle, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc); +int ocfs2_remove_btree_range(struct inode *inode, + struct ocfs2_extent_tree *et, + u32 cpos, u32 phys_cpos, u32 len, + struct ocfs2_cached_dealloc_ctxt *dealloc); + int ocfs2_num_free_extents(struct ocfs2_super *osb, struct inode *inode, struct ocfs2_extent_tree *et); @@ -167,10 +180,18 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb); */ struct ocfs2_cached_dealloc_ctxt { struct ocfs2_per_slot_free_list *c_first_suballocator; + struct ocfs2_cached_block_free *c_global_allocator; }; static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c) { c->c_first_suballocator = NULL; + c->c_global_allocator = NULL; +} +int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, + u64 blkno, unsigned int bit); +static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c) +{ + return c->c_global_allocator != NULL; } int ocfs2_run_deallocs(struct ocfs2_super *osb, struct ocfs2_cached_dealloc_ctxt *ctxt); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index c22543b33420..6b647ec87bb3 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -27,6 +27,7 @@ #include <linux/swap.h> #include <linux/pipe_fs_i.h> #include <linux/mpage.h> +#include <linux/quotaops.h> #define MLOG_MASK_PREFIX ML_FILE_IO #include <cluster/masklog.h> @@ -68,20 +69,13 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, goto bail; } - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); + status = ocfs2_read_inode_block(inode, &bh); if (status < 0) { mlog_errno(status); goto bail; } fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n", - (unsigned long long)le64_to_cpu(fe->i_blkno), 7, - fe->i_signature); - goto bail; - } - if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, le32_to_cpu(fe->i_clusters))) { mlog(ML_ERROR, "block offset is outside the allocated size: " @@ -262,7 +256,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page) BUG_ON(!PageLocked(page)); BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog_errno(ret); goto out; @@ -481,12 +475,6 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode, if (ocfs2_should_order_data(inode)) { ret = ocfs2_jbd2_file_inode(handle, inode); -#ifdef CONFIG_OCFS2_COMPAT_JBD - ret = walk_page_buffers(handle, - page_buffers(page), - from, to, NULL, - ocfs2_journal_dirty_data); -#endif if (ret < 0) mlog_errno(ret); } @@ -1072,15 +1060,8 @@ static void ocfs2_write_failure(struct inode *inode, tmppage = wc->w_pages[i]; if (page_has_buffers(tmppage)) { - if (ocfs2_should_order_data(inode)) { + if (ocfs2_should_order_data(inode)) ocfs2_jbd2_file_inode(wc->w_handle, inode); -#ifdef CONFIG_OCFS2_COMPAT_JBD - walk_page_buffers(wc->w_handle, - page_buffers(tmppage), - from, to, NULL, - ocfs2_journal_dirty_data); -#endif - } block_commit_write(tmppage, from, to); } @@ -1750,6 +1731,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, wc->w_handle = handle; + if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) { + ret = -EDQUOT; + goto out_commit; + } /* * We don't want this to fail in ocfs2_write_end(), so do it * here. @@ -1758,7 +1744,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out_quota; } /* @@ -1771,14 +1757,14 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, mmap_page); if (ret) { mlog_errno(ret); - goto out_commit; + goto out_quota; } ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, len); if (ret) { mlog_errno(ret); - goto out_commit; + goto out_quota; } if (data_ac) @@ -1790,6 +1776,10 @@ success: *pagep = wc->w_target_page; *fsdata = wc; return 0; +out_quota: + if (clusters_to_alloc) + vfs_dq_free_space(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc)); out_commit: ocfs2_commit_trans(osb, handle); @@ -1919,15 +1909,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping, } if (page_has_buffers(tmppage)) { - if (ocfs2_should_order_data(inode)) { + if (ocfs2_should_order_data(inode)) ocfs2_jbd2_file_inode(wc->w_handle, inode); -#ifdef CONFIG_OCFS2_COMPAT_JBD - walk_page_buffers(wc->w_handle, - page_buffers(tmppage), - from, to, NULL, - ocfs2_journal_dirty_data); -#endif - } block_commit_write(tmppage, from, to); } } diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 7e947c672469..15c8e6deee2e 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -39,6 +39,18 @@ #include "buffer_head_io.h" +/* + * Bits on bh->b_state used by ocfs2. + * + * These MUST be after the JBD2 bits. Hence, we use BH_JBDPrivateStart. + */ +enum ocfs2_state_bits { + BH_NeedsValidate = BH_JBDPrivateStart, +}; + +/* Expand the magic b_state functions */ +BUFFER_FNS(NeedsValidate, needs_validate); + int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, struct inode *inode) { @@ -112,7 +124,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, bh = bhs[i]; if (buffer_jbd(bh)) { - mlog(ML_ERROR, + mlog(ML_BH_IO, "trying to sync read a jbd " "managed bh (blocknr = %llu), skipping\n", (unsigned long long)bh->b_blocknr); @@ -147,15 +159,10 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, for (i = nr; i > 0; i--) { bh = bhs[i - 1]; - if (buffer_jbd(bh)) { - mlog(ML_ERROR, - "the journal got the buffer while it was " - "locked for io! (blocknr = %llu)\n", - (unsigned long long)bh->b_blocknr); - BUG(); - } + /* No need to wait on the buffer if it's managed by JBD. */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); - wait_on_buffer(bh); if (!buffer_uptodate(bh)) { /* Status won't be cleared from here on out, * so we can safely record this and loop back @@ -171,7 +178,9 @@ bail: } int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, - struct buffer_head *bhs[], int flags) + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) { int status = 0; int i, ignore_cache = 0; @@ -251,8 +260,6 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, ignore_cache = 1; } - /* XXX: Can we ever get this and *not* have the cached - * flag set? */ if (buffer_jbd(bh)) { if (ignore_cache) mlog(ML_BH_IO, "trying to sync read a jbd " @@ -305,6 +312,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ + if (validate) + set_buffer_needs_validate(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); continue; @@ -335,6 +344,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, bhs[i] = NULL; continue; } + + if (buffer_needs_validate(bh)) { + /* We never set NeedsValidate if the + * buffer was held by the journal, so + * that better not have changed */ + BUG_ON(buffer_jbd(bh)); + clear_buffer_needs_validate(bh); + status = validate(inode->i_sb, bh); + if (status) { + put_bh(bh); + bhs[i] = NULL; + continue; + } + } } /* Always set the buffer in the cache, even if it was diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index 75e1dcb1ade7..c75d682dadd8 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h @@ -31,21 +31,24 @@ void ocfs2_end_buffer_io_sync(struct buffer_head *bh, int uptodate); -static inline int ocfs2_read_block(struct inode *inode, - u64 off, - struct buffer_head **bh); - int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, struct inode *inode); -int ocfs2_read_blocks(struct inode *inode, - u64 block, - int nr, - struct buffer_head *bhs[], - int flags); int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, unsigned int nr, struct buffer_head *bhs[]); +/* + * If not NULL, validate() will be called on a buffer that is freshly + * read from disk. It will not be called if the buffer was in cache. + * Note that if validate() is being used for this buffer, it needs to + * be set even for a READAHEAD call, as it marks the buffer for later + * validation. + */ +int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)); + int ocfs2_write_super_or_backup(struct ocfs2_super *osb, struct buffer_head *bh); @@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, #define OCFS2_BH_READAHEAD 8 static inline int ocfs2_read_block(struct inode *inode, u64 off, - struct buffer_head **bh) + struct buffer_head **bh, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) { int status = 0; @@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off, goto bail; } - status = ocfs2_read_blocks(inode, off, 1, bh, 0); + status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate); bail: return status; diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 57670c680471..7e72a81bc2d4 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -113,6 +113,7 @@ #define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */ #define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ +#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ /* bits that are infrequently given and frequently matched in the high word */ #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 026e6eb85187..3708fe482e3e 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -40,6 +40,7 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> +#include <linux/quotaops.h> #define MLOG_MASK_PREFIX ML_NAMEI #include <cluster/masklog.h> @@ -82,49 +83,6 @@ static int ocfs2_do_extend_dir(struct super_block *sb, struct ocfs2_alloc_context *meta_ac, struct buffer_head **new_bh); -static struct buffer_head *ocfs2_bread(struct inode *inode, - int block, int *err, int reada) -{ - struct buffer_head *bh = NULL; - int tmperr; - u64 p_blkno; - int readflags = 0; - - if (reada) - readflags |= OCFS2_BH_READAHEAD; - - if (((u64)block << inode->i_sb->s_blocksize_bits) >= - i_size_read(inode)) { - BUG_ON(!reada); - return NULL; - } - - down_read(&OCFS2_I(inode)->ip_alloc_sem); - tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, - NULL); - up_read(&OCFS2_I(inode)->ip_alloc_sem); - if (tmperr < 0) { - mlog_errno(tmperr); - goto fail; - } - - tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags); - if (tmperr < 0) - goto fail; - - tmperr = 0; - - *err = 0; - return bh; - -fail: - brelse(bh); - bh = NULL; - - *err = -EIO; - return NULL; -} - /* * bh passed here can be an inode block or a dir data block, depending * on the inode inline data flag. @@ -231,7 +189,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name, struct ocfs2_dinode *di; struct ocfs2_inline_data *data; - ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(dir, &di_bh); if (ret) { mlog_errno(ret); goto out; @@ -250,6 +208,43 @@ out: return NULL; } +static int ocfs2_validate_dir_block(struct super_block *sb, + struct buffer_head *bh) +{ + /* + * Nothing yet. We don't validate dirents here, that's handled + * in-place when the code walks them. + */ + mlog(0, "Validating dirblock %llu\n", + (unsigned long long)bh->b_blocknr); + + return 0; +} + +/* + * This function forces all errors to -EIO for consistency with its + * predecessor, ocfs2_bread(). We haven't audited what returning the + * real error codes would do to callers. We log the real codes with + * mlog_errno() before we squash them. + */ +static int ocfs2_read_dir_block(struct inode *inode, u64 v_block, + struct buffer_head **bh, int flags) +{ + int rc = 0; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags, + ocfs2_validate_dir_block); + if (rc) + mlog_errno(rc); + + /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */ + if (!rc && !*bh) + *bh = tmp; + + return rc ? -EIO : 0; +} + static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen, struct inode *dir, struct ocfs2_dir_entry **res_dir) @@ -296,15 +291,17 @@ restart: } num++; - bh = ocfs2_bread(dir, b++, &err, 1); + bh = NULL; + err = ocfs2_read_dir_block(dir, b++, &bh, + OCFS2_BH_READAHEAD); bh_use[ra_max] = bh; } } if ((bh = bh_use[ra_ptr++]) == NULL) goto next; - if (ocfs2_read_block(dir, block, &bh)) { + if (ocfs2_read_dir_block(dir, block, &bh, 0)) { /* read error, skip block & hope for the best. - * ocfs2_read_block() has released the bh. */ + * ocfs2_read_dir_block() has released the bh. */ ocfs2_error(dir->i_sb, "reading directory %llu, " "offset %lu\n", (unsigned long long)OCFS2_I(dir)->ip_blkno, @@ -458,7 +455,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle, struct ocfs2_dinode *di; struct ocfs2_inline_data *data; - ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(dir, &di_bh); if (ret) { mlog_errno(ret); goto out; @@ -636,7 +633,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, struct ocfs2_inline_data *data; struct ocfs2_dir_entry *de; - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); @@ -724,7 +721,6 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, int i, stored; struct buffer_head * bh, * tmp; struct ocfs2_dir_entry * de; - int err; struct super_block * sb = inode->i_sb; unsigned int ra_sectors = 16; @@ -735,12 +731,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, while (!error && !stored && *f_pos < i_size_read(inode)) { blk = (*f_pos) >> sb->s_blocksize_bits; - bh = ocfs2_bread(inode, blk, &err, 0); - if (!bh) { - mlog(ML_ERROR, - "directory #%llu contains a hole at offset %lld\n", - (unsigned long long)OCFS2_I(inode)->ip_blkno, - *f_pos); + if (ocfs2_read_dir_block(inode, blk, &bh, 0)) { + /* Skip the corrupt dirblock and keep trying */ *f_pos += sb->s_blocksize - offset; continue; } @@ -754,8 +746,10 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { for (i = ra_sectors >> (sb->s_blocksize_bits - 9); i > 0; i--) { - tmp = ocfs2_bread(inode, ++blk, &err, 1); - brelse(tmp); + tmp = NULL; + if (!ocfs2_read_dir_block(inode, ++blk, &tmp, + OCFS2_BH_READAHEAD)) + brelse(tmp); } last_ra_blk = blk; ra_sectors = 8; @@ -828,6 +822,7 @@ revalidate: } offset = 0; brelse(bh); + bh = NULL; } stored = 0; @@ -1216,9 +1211,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, unsigned int blocks_wanted, struct buffer_head **first_block_bh) { - int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS; u32 alloc, bit_off, len; struct super_block *sb = dir->i_sb; + int ret, credits = ocfs2_inline_to_extents_credits(sb); u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); struct ocfs2_inode_info *oi = OCFS2_I(dir); @@ -1227,6 +1222,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; handle_t *handle; struct ocfs2_extent_tree et; + int did_quota = 0; ocfs2_init_dinode_extent_tree(&et, dir, di_bh); @@ -1264,6 +1260,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, goto out_sem; } + if (vfs_dq_alloc_space_nodirty(dir, + ocfs2_clusters_to_bytes(osb->sb, alloc))) { + ret = -EDQUOT; + goto out_commit; + } + did_quota = 1; /* * Try to claim as many clusters as the bitmap can give though * if we only get one now, that's enough to continue. The rest @@ -1386,6 +1388,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, dirdata_bh = NULL; out_commit: + if (ret < 0 && did_quota) + vfs_dq_free_space_nodirty(dir, + ocfs2_clusters_to_bytes(osb->sb, 2)); ocfs2_commit_trans(osb, handle); out_sem: @@ -1410,7 +1415,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb, struct buffer_head **new_bh) { int status; - int extend; + int extend, did_quota = 0; u64 p_blkno, v_blkno; spin_lock(&OCFS2_I(dir)->ip_lock); @@ -1420,6 +1425,13 @@ static int ocfs2_do_extend_dir(struct super_block *sb, if (extend) { u32 offset = OCFS2_I(dir)->ip_clusters; + if (vfs_dq_alloc_space_nodirty(dir, + ocfs2_clusters_to_bytes(sb, 1))) { + status = -EDQUOT; + goto bail; + } + did_quota = 1; + status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset, 1, 0, parent_fe_bh, handle, data_ac, meta_ac, NULL); @@ -1445,6 +1457,8 @@ static int ocfs2_do_extend_dir(struct super_block *sb, } status = 0; bail: + if (did_quota && status < 0) + vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1)); mlog_exit(status); return status; } @@ -1680,8 +1694,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, struct super_block *sb = dir->i_sb; int status; - bh = ocfs2_bread(dir, 0, &status, 0); - if (!bh) { + status = ocfs2_read_dir_block(dir, 0, &bh, 0); + if (status) { mlog_errno(status); goto bail; } @@ -1702,11 +1716,10 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, status = -ENOSPC; goto bail; } - bh = ocfs2_bread(dir, - offset >> sb->s_blocksize_bits, - &status, - 0); - if (!bh) { + status = ocfs2_read_dir_block(dir, + offset >> sb->s_blocksize_bits, + &bh, 0); + if (status) { mlog_errno(status); goto bail; } diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 3516d8a4166b..6f7a77d54020 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -608,8 +608,10 @@ static int __init init_dlmfs_fs(void) 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), dlmfs_init_once); - if (!dlmfs_inode_cache) + if (!dlmfs_inode_cache) { + status = -ENOMEM; goto bail; + } cleanup_inode = 1; user_dlm_worker = create_singlethread_workqueue("user_dlm"); diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h index 39ec27738499..0c3cc03c61fa 100644 --- a/fs/ocfs2/dlm/userdlm.h +++ b/fs/ocfs2/dlm/userdlm.h @@ -33,7 +33,7 @@ #include <linux/workqueue.h> /* user_lock_res->l_flags flags. */ -#define USER_LOCK_ATTACHED (0x00000001) /* have we initialized +#define USER_LOCK_ATTACHED (0x00000001) /* we have initialized * the lvb */ #define USER_LOCK_BUSY (0x00000002) /* we are currently in * dlm_lock */ diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index ec684426034b..b1c75911d8ad 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -32,6 +32,7 @@ #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/time.h> +#include <linux/quotaops.h> #define MLOG_MASK_PREFIX ML_DLM_GLUE #include <cluster/masklog.h> @@ -51,6 +52,7 @@ #include "slot_map.h" #include "super.h" #include "uptodate.h" +#include "quota.h" #include "buffer_head_io.h" @@ -68,6 +70,7 @@ struct ocfs2_mask_waiter { static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); +static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres); /* * Return value from ->downconvert_worker functions. @@ -102,6 +105,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, struct ocfs2_lock_res *lockres); +static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); #define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) @@ -258,6 +262,12 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = { .flags = 0, }; +static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { + .set_lvb = ocfs2_set_qinfo_lvb, + .get_osb = ocfs2_get_qinfo_osb, + .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, +}; + static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) { return lockres->l_type == OCFS2_LOCK_TYPE_META || @@ -279,6 +289,13 @@ static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res return (struct ocfs2_dentry_lock *)lockres->l_priv; } +static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres) +{ + BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO); + + return (struct ocfs2_mem_dqinfo *)lockres->l_priv; +} + static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) { if (lockres->l_ops->get_osb) @@ -507,6 +524,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) return OCFS2_SB(inode->i_sb); } +static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres) +{ + struct ocfs2_mem_dqinfo *info = lockres->l_priv; + + return OCFS2_SB(info->dqi_gi.dqi_sb); +} + static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) { struct ocfs2_file_private *fp = lockres->l_priv; @@ -609,6 +633,17 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, lockres->l_flags |= OCFS2_LOCK_NOCACHE; } +void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, + struct ocfs2_mem_dqinfo *info) +{ + ocfs2_lock_res_init_once(lockres); + ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type, + 0, lockres->l_name); + ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres, + OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops, + info); +} + void ocfs2_lock_res_free(struct ocfs2_lock_res *res) { mlog_entry_void(); @@ -2024,7 +2059,7 @@ static int ocfs2_inode_lock_update(struct inode *inode, } else { /* Boo, we have to go to disk. */ /* read bh, cast, ocfs2_refresh_inode */ - status = ocfs2_read_block(inode, oi->ip_blkno, bh); + status = ocfs2_read_inode_block(inode, bh); if (status < 0) { mlog_errno(status); goto bail_refresh; @@ -2032,18 +2067,14 @@ static int ocfs2_inode_lock_update(struct inode *inode, fe = (struct ocfs2_dinode *) (*bh)->b_data; /* This is a good chance to make sure we're not - * locking an invalid object. + * locking an invalid object. ocfs2_read_inode_block() + * already checked that the inode block is sane. * * We bug on a stale inode here because we checked * above whether it was wiped from disk. The wiping * node provides a guarantee that we receive that * message and can mark the inode before dropping any * locks associated with it. */ - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); - status = -EIO; - goto bail_refresh; - } mlog_bug_on_msg(inode->i_generation != le32_to_cpu(fe->i_generation), "Invalid dinode %llu disk generation: %u " @@ -2085,7 +2116,7 @@ static int ocfs2_assign_bh(struct inode *inode, return 0; } - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh); + status = ocfs2_read_inode_block(inode, ret_bh); if (status < 0) mlog_errno(status); @@ -2841,9 +2872,8 @@ static void ocfs2_unlock_ast(void *opaque, int error) lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; - spin_unlock_irqrestore(&lockres->l_lock, flags); - wake_up(&lockres->l_event); + spin_unlock_irqrestore(&lockres->l_lock, flags); mlog_exit_void(); } @@ -3450,6 +3480,117 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres, return UNBLOCK_CONTINUE_POST; } +static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) +{ + struct ocfs2_qinfo_lvb *lvb; + struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres); + struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, + oinfo->dqi_gi.dqi_type); + + mlog_entry_void(); + + lvb = (struct ocfs2_qinfo_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb); + lvb->lvb_version = OCFS2_QINFO_LVB_VERSION; + lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace); + lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace); + lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms); + lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks); + lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk); + lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry); + + mlog_exit_void(); +} + +void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex) +{ + struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; + struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); + int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; + + mlog_entry_void(); + if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) + ocfs2_cluster_unlock(osb, lockres, level); + mlog_exit_void(); +} + +static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) +{ + struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb, + oinfo->dqi_gi.dqi_type); + struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; + struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); + struct buffer_head *bh = NULL; + struct ocfs2_global_disk_dqinfo *gdinfo; + int status = 0; + + if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { + info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); + info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); + oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); + oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks); + oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk); + oinfo->dqi_gi.dqi_free_entry = + be32_to_cpu(lvb->lvb_free_entry); + } else { + status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh); + if (status) { + mlog_errno(status); + goto bail; + } + gdinfo = (struct ocfs2_global_disk_dqinfo *) + (bh->b_data + OCFS2_GLOBAL_INFO_OFF); + info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace); + info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace); + oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms); + oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks); + oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk); + oinfo->dqi_gi.dqi_free_entry = + le32_to_cpu(gdinfo->dqi_free_entry); + brelse(bh); + ocfs2_track_lock_refresh(lockres); + } + +bail: + return status; +} + +/* Lock quota info, this function expects at least shared lock on the quota file + * so that we can safely refresh quota info from disk. */ +int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex) +{ + struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock; + struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb); + int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; + int status = 0; + + mlog_entry_void(); + + /* On RO devices, locking really isn't needed... */ + if (ocfs2_is_hard_readonly(osb)) { + if (ex) + status = -EROFS; + goto bail; + } + if (ocfs2_mount_local(osb)) + goto bail; + + status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); + if (status < 0) { + mlog_errno(status); + goto bail; + } + if (!ocfs2_should_refresh_lock_res(lockres)) + goto bail; + /* OK, we have the lock but we need to refresh the quota info */ + status = ocfs2_refresh_qinfo(oinfo); + if (status) + ocfs2_qinfo_unlock(oinfo, ex); + ocfs2_complete_lock_res_refresh(lockres, status); +bail: + mlog_exit(status); + return status; +} + /* * This is the filesystem locking protocol. It provides the lock handling * hooks for the underlying DLM. It has a maximum version number. diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 2bb01f09c1b1..3f8d9986b8e0 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -49,6 +49,19 @@ struct ocfs2_meta_lvb { __be32 lvb_reserved2; }; +#define OCFS2_QINFO_LVB_VERSION 1 + +struct ocfs2_qinfo_lvb { + __u8 lvb_version; + __u8 lvb_reserved[3]; + __be32 lvb_bgrace; + __be32 lvb_igrace; + __be32 lvb_syncms; + __be32 lvb_blocks; + __be32 lvb_free_blk; + __be32 lvb_free_entry; +}; + /* ocfs2_inode_lock_full() 'arg_flags' flags */ /* don't wait on recovery. */ #define OCFS2_META_LOCK_RECOVERY (0x01) @@ -69,6 +82,9 @@ void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl, struct ocfs2_file_private; void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, struct ocfs2_file_private *fp); +struct ocfs2_mem_dqinfo; +void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, + struct ocfs2_mem_dqinfo *info); void ocfs2_lock_res_free(struct ocfs2_lock_res *res); int ocfs2_create_new_inode_locks(struct inode *inode); int ocfs2_drop_inode_locks(struct inode *inode); @@ -103,6 +119,9 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex); void ocfs2_dentry_unlock(struct dentry *dentry, int ex); int ocfs2_file_lock(struct file *file, int ex, int trylock); void ocfs2_file_unlock(struct file *file); +int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex); +void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex); + void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 2baedac58234..f2bb1a04d253 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode, struct ocfs2_extent_block *eb; struct ocfs2_extent_list *el; - ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh); + ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh); if (ret) { mlog_errno(ret); goto out; @@ -302,12 +302,6 @@ static int ocfs2_last_eb_is_empty(struct inode *inode, eb = (struct ocfs2_extent_block *) eb_bh->b_data; el = &eb->h_list; - if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { - ret = -EROFS; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); - goto out; - } - if (el->l_tree_depth) { ocfs2_error(inode->i_sb, "Inode %lu has non zero tree depth in " @@ -381,23 +375,16 @@ static int ocfs2_figure_hole_clusters(struct inode *inode, if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) goto no_more_extents; - ret = ocfs2_read_block(inode, - le64_to_cpu(eb->h_next_leaf_blk), - &next_eb_bh); + ret = ocfs2_read_extent_block(inode, + le64_to_cpu(eb->h_next_leaf_blk), + &next_eb_bh); if (ret) { mlog_errno(ret); goto out; } - next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; - - if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb)) { - ret = -EROFS; - OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, next_eb); - goto out; - } + next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data; el = &next_eb->h_list; - i = ocfs2_search_for_hole_index(el, v_cluster); } @@ -630,7 +617,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, if (ret == 0) goto out; - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); + ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog_errno(ret); goto out; @@ -819,3 +806,74 @@ out: return ret; } + +int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) +{ + int rc = 0; + u64 p_block, p_count; + int i, count, done = 0; + + mlog_entry("(inode = %p, v_block = %llu, nr = %d, bhs = %p, " + "flags = %x, validate = %p)\n", + inode, (unsigned long long)v_block, nr, bhs, flags, + validate); + + if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >= + i_size_read(inode)) { + BUG_ON(!(flags & OCFS2_BH_READAHEAD)); + goto out; + } + + while (done < nr) { + down_read(&OCFS2_I(inode)->ip_alloc_sem); + rc = ocfs2_extent_map_get_blocks(inode, v_block + done, + &p_block, &p_count, NULL); + up_read(&OCFS2_I(inode)->ip_alloc_sem); + if (rc) { + mlog_errno(rc); + break; + } + + if (!p_block) { + rc = -EIO; + mlog(ML_ERROR, + "Inode #%llu contains a hole at offset %llu\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)(v_block + done) << + inode->i_sb->s_blocksize_bits); + break; + } + + count = nr - done; + if (p_count < count) + count = p_count; + + /* + * If the caller passed us bhs, they should have come + * from a previous readahead call to this function. Thus, + * they should have the right b_blocknr. + */ + for (i = 0; i < count; i++) { + if (!bhs[done + i]) + continue; + BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); + } + + rc = ocfs2_read_blocks(inode, p_block, count, bhs + done, + flags, validate); + if (rc) { + mlog_errno(rc); + break; + } + done += count; + } + +out: + mlog_exit(rc); + return rc; +} + + diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index 1c4aa8b06f34..b7dd9731b462 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h @@ -57,4 +57,28 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, u32 *num_clusters, struct ocfs2_extent_list *el); +int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, + struct buffer_head *bhs[], int flags, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)); +static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block, + struct buffer_head **bh, + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) +{ + int status = 0; + + if (bh == NULL) { + printk("ocfs2: bh == NULL\n"); + status = -EINVAL; + goto bail; + } + + status = ocfs2_read_virt_blocks(inode, v_block, 1, bh, 0, validate); + +bail: + return status; +} + + #endif /* _EXTENT_MAP_H */ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index e2570a3bc2b2..9374d374a264 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -35,6 +35,7 @@ #include <linux/mount.h> #include <linux/writeback.h> #include <linux/falloc.h> +#include <linux/quotaops.h> #define MLOG_MASK_PREFIX ML_INODE #include <cluster/masklog.h> @@ -56,6 +57,8 @@ #include "suballoc.h" #include "super.h" #include "xattr.h" +#include "acl.h" +#include "quota.h" #include "buffer_head_io.h" @@ -303,9 +306,9 @@ bail: return status; } -static int ocfs2_simple_size_update(struct inode *inode, - struct buffer_head *di_bh, - u64 new_i_size) +int ocfs2_simple_size_update(struct inode *inode, + struct buffer_head *di_bh, + u64 new_i_size) { int ret; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); @@ -401,12 +404,9 @@ static int ocfs2_truncate_file(struct inode *inode, (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)new_i_size); + /* We trust di_bh because it comes from ocfs2_inode_lock(), which + * already validated it */ fe = (struct ocfs2_dinode *) di_bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); - status = -EIO; - goto bail; - } mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), "Inode %llu, inode i_size = %lld != di " @@ -536,6 +536,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, enum ocfs2_alloc_restarted why; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_extent_tree et; + int did_quota = 0; mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); @@ -545,18 +546,12 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, */ BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); + status = ocfs2_read_inode_block(inode, &bh); if (status < 0) { mlog_errno(status); goto leave; } - fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); - status = -EIO; - goto leave; - } restart_all: BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); @@ -585,6 +580,13 @@ restart_all: } restarted_transaction: + if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, + clusters_to_add))) { + status = -EDQUOT; + goto leave; + } + did_quota = 1; + /* reserve a write to the file entry early on - that we if we * run out of credits in the allocation path, we can still * update i_size. */ @@ -622,6 +624,10 @@ restarted_transaction: spin_lock(&OCFS2_I(inode)->ip_lock); clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); spin_unlock(&OCFS2_I(inode)->ip_lock); + /* Release unused quota reservation */ + vfs_dq_free_space(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); + did_quota = 0; if (why != RESTART_NONE && clusters_to_add) { if (why == RESTART_META) { @@ -654,6 +660,9 @@ restarted_transaction: OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode)); leave: + if (status < 0 && did_quota) + vfs_dq_free_space(inode, + ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); if (handle) { ocfs2_commit_trans(osb, handle); handle = NULL; @@ -885,6 +894,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) struct ocfs2_super *osb = OCFS2_SB(sb); struct buffer_head *bh = NULL; handle_t *handle = NULL; + int locked[MAXQUOTAS] = {0, 0}; + int credits, qtype; + struct ocfs2_mem_dqinfo *oinfo; mlog_entry("(0x%p, '%.*s')\n", dentry, dentry->d_name.len, dentry->d_name.name); @@ -955,11 +967,47 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) } } - handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); - if (IS_ERR(handle)) { - status = PTR_ERR(handle); - mlog_errno(status); - goto bail_unlock; + if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || + (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + credits = OCFS2_INODE_UPDATE_CREDITS; + if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid + && OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { + oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv; + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto bail_unlock; + credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) + + ocfs2_calc_qdel_credits(sb, USRQUOTA); + locked[USRQUOTA] = 1; + } + if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid + && OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { + oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv; + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto bail_unlock; + credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) + + ocfs2_calc_qdel_credits(sb, GRPQUOTA); + locked[GRPQUOTA] = 1; + } + handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto bail_unlock; + } + status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0; + if (status < 0) + goto bail_commit; + } else { + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto bail_unlock; + } } /* @@ -982,6 +1030,12 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) bail_commit: ocfs2_commit_trans(osb, handle); bail_unlock: + for (qtype = 0; qtype < MAXQUOTAS; qtype++) { + if (!locked[qtype]) + continue; + oinfo = sb_dqinfo(sb, qtype)->dqi_priv; + ocfs2_unlock_global_qf(oinfo, 1); + } ocfs2_inode_unlock(inode, 1); bail_unlock_rw: if (size_change) @@ -989,6 +1043,12 @@ bail_unlock_rw: bail: brelse(bh); + if (!status && attr->ia_valid & ATTR_MODE) { + status = ocfs2_acl_chmod(inode); + if (status < 0) + mlog_errno(status); + } + mlog_exit(status); return status; } @@ -1035,7 +1095,7 @@ int ocfs2_permission(struct inode *inode, int mask) goto out; } - ret = generic_permission(inode, mask, NULL); + ret = generic_permission(inode, mask, ocfs2_check_acl); ocfs2_inode_unlock(inode, 0); out: @@ -1128,9 +1188,8 @@ static int ocfs2_write_remove_suid(struct inode *inode) { int ret; struct buffer_head *bh = NULL; - struct ocfs2_inode_info *oi = OCFS2_I(inode); - ret = ocfs2_read_block(inode, oi->ip_blkno, &bh); + ret = ocfs2_read_inode_block(inode, &bh); if (ret < 0) { mlog_errno(ret); goto out; @@ -1156,8 +1215,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode, struct buffer_head *di_bh = NULL; if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { - ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, - &di_bh); + ret = ocfs2_read_inode_block(inode, &di_bh); if (ret) { mlog_errno(ret); goto out; @@ -1226,83 +1284,6 @@ out: return ret; } -static int __ocfs2_remove_inode_range(struct inode *inode, - struct buffer_head *di_bh, - u32 cpos, u32 phys_cpos, u32 len, - struct ocfs2_cached_dealloc_ctxt *dealloc) -{ - int ret; - u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct inode *tl_inode = osb->osb_tl_inode; - handle_t *handle; - struct ocfs2_alloc_context *meta_ac = NULL; - struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; - struct ocfs2_extent_tree et; - - ocfs2_init_dinode_extent_tree(&et, inode, di_bh); - - ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); - if (ret) { - mlog_errno(ret); - return ret; - } - - mutex_lock(&tl_inode->i_mutex); - - if (ocfs2_truncate_log_needs_flush(osb)) { - ret = __ocfs2_flush_truncate_log(osb); - if (ret < 0) { - mlog_errno(ret); - goto out; - } - } - - handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } - - ret = ocfs2_journal_access(handle, inode, di_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto out; - } - - ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, - dealloc); - if (ret) { - mlog_errno(ret); - goto out_commit; - } - - OCFS2_I(inode)->ip_clusters -= len; - di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); - - ret = ocfs2_journal_dirty(handle, di_bh); - if (ret) { - mlog_errno(ret); - goto out_commit; - } - - ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); - if (ret) - mlog_errno(ret); - -out_commit: - ocfs2_commit_trans(osb, handle); -out: - mutex_unlock(&tl_inode->i_mutex); - - if (meta_ac) - ocfs2_free_alloc_context(meta_ac); - - return ret; -} - /* * Truncate a byte range, avoiding pages within partial clusters. This * preserves those pages for the zeroing code to write to. @@ -1402,7 +1383,9 @@ static int ocfs2_remove_inode_range(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_cached_dealloc_ctxt dealloc; struct address_space *mapping = inode->i_mapping; + struct ocfs2_extent_tree et; + ocfs2_init_dinode_extent_tree(&et, inode, di_bh); ocfs2_init_dealloc_ctxt(&dealloc); if (byte_len == 0) @@ -1458,9 +1441,9 @@ static int ocfs2_remove_inode_range(struct inode *inode, /* Only do work for non-holes */ if (phys_cpos != 0) { - ret = __ocfs2_remove_inode_range(inode, di_bh, cpos, - phys_cpos, alloc_size, - &dealloc); + ret = ocfs2_remove_btree_range(inode, &et, cpos, + phys_cpos, alloc_size, + &dealloc); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index e92382cbca5f..172f9fbc9fc7 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -51,6 +51,9 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb, struct ocfs2_alloc_context *data_ac, struct ocfs2_alloc_context *meta_ac, enum ocfs2_alloc_restarted *reason_ret); +int ocfs2_simple_size_update(struct inode *inode, + struct buffer_head *di_bh, + u64 new_i_size); int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to); int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 7aa00d511874..288512c9dbc2 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -28,6 +28,7 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/pagemap.h> +#include <linux/quotaops.h> #include <asm/byteorder.h> @@ -214,12 +215,11 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) return 0; } -int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, - int create_ino) +void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, + int create_ino) { struct super_block *sb; struct ocfs2_super *osb; - int status = -EINVAL; int use_plocks = 1; mlog_entry("(0x%p, size:%llu)\n", inode, @@ -232,25 +232,17 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks()) use_plocks = 0; - /* this means that read_inode cannot create a superblock inode - * today. change if needed. */ - if (!OCFS2_IS_VALID_DINODE(fe) || - !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { - mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, " - "signature = %.*s, flags = 0x%x\n", - inode->i_ino, - (unsigned long long)le64_to_cpu(fe->i_blkno), 7, - fe->i_signature, le32_to_cpu(fe->i_flags)); - goto bail; - } + /* + * These have all been checked by ocfs2_read_inode_block() or set + * by ocfs2_mknod_locked(), so a failure is a code bug. + */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); /* This means that read_inode + cannot create a superblock + inode today. change if + that is needed. */ + BUG_ON(!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))); + BUG_ON(le32_to_cpu(fe->i_fs_generation) != osb->fs_generation); - if (le32_to_cpu(fe->i_fs_generation) != osb->fs_generation) { - mlog(ML_ERROR, "file entry generation does not match " - "superblock! osb->fs_generation=%x, " - "fe->i_fs_generation=%x\n", - osb->fs_generation, le32_to_cpu(fe->i_fs_generation)); - goto bail; - } OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); @@ -284,14 +276,18 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, inode->i_nlink = le16_to_cpu(fe->i_links_count); - if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) + if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) { OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE; + inode->i_flags |= S_NOQUOTA; + } if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) { OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino); } else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) { OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP; + } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) { + inode->i_flags |= S_NOQUOTA; } else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) { mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino); /* we can't actually hit this as read_inode can't @@ -354,10 +350,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, ocfs2_set_inode_flags(inode); - status = 0; -bail: - mlog_exit(status); - return status; + mlog_exit_void(); } static int ocfs2_read_locked_inode(struct inode *inode, @@ -460,11 +453,14 @@ static int ocfs2_read_locked_inode(struct inode *inode, } } - if (can_lock) - status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh, - OCFS2_BH_IGNORE_CACHE); - else + if (can_lock) { + status = ocfs2_read_inode_block_full(inode, &bh, + OCFS2_BH_IGNORE_CACHE); + } else { status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); + if (!status) + status = ocfs2_validate_inode_block(osb->sb, bh); + } if (status < 0) { mlog_errno(status); goto bail; @@ -472,12 +468,6 @@ static int ocfs2_read_locked_inode(struct inode *inode, status = -EINVAL; fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - mlog(0, "Invalid dinode #%llu: signature = %.*s\n", - (unsigned long long)args->fi_blkno, 7, - fe->i_signature); - goto bail; - } /* * This is a code bug. Right now the caller needs to @@ -491,10 +481,9 @@ static int ocfs2_read_locked_inode(struct inode *inode, if (S_ISCHR(le16_to_cpu(fe->i_mode)) || S_ISBLK(le16_to_cpu(fe->i_mode))) - inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); + inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); - if (ocfs2_populate_inode(inode, fe, 0) < 0) - goto bail; + ocfs2_populate_inode(inode, fe, 0); BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); @@ -615,7 +604,8 @@ static int ocfs2_remove_inode(struct inode *inode, goto bail; } - handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS); + handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS + + ocfs2_quota_trans_credits(inode->i_sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); @@ -647,6 +637,7 @@ static int ocfs2_remove_inode(struct inode *inode, } ocfs2_remove_from_cache(inode, di_bh); + vfs_dq_free_inode(inode); status = ocfs2_free_dinode(handle, inode_alloc_inode, inode_alloc_bh, di); @@ -929,7 +920,10 @@ void ocfs2_delete_inode(struct inode *inode) mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); - if (is_bad_inode(inode)) { + /* When we fail in read_inode() we mark inode as bad. The second test + * catches the case when inode allocation fails before allocating + * a block for inode. */ + if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) { mlog(0, "Skipping delete of bad inode\n"); goto bail; } @@ -1264,3 +1258,71 @@ void ocfs2_refresh_inode(struct inode *inode, spin_unlock(&OCFS2_I(inode)->ip_lock); } + +int ocfs2_validate_inode_block(struct super_block *sb, + struct buffer_head *bh) +{ + int rc = -EINVAL; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; + + mlog(0, "Validating dinode %llu\n", + (unsigned long long)bh->b_blocknr); + + BUG_ON(!buffer_uptodate(bh)); + + if (!OCFS2_IS_VALID_DINODE(di)) { + ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n", + (unsigned long long)bh->b_blocknr, 7, + di->i_signature); + goto bail; + } + + if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) { + ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(di->i_blkno)); + goto bail; + } + + if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { + ocfs2_error(sb, + "Invalid dinode #%llu: OCFS2_VALID_FL not set\n", + (unsigned long long)bh->b_blocknr); + goto bail; + } + + if (le32_to_cpu(di->i_fs_generation) != + OCFS2_SB(sb)->fs_generation) { + ocfs2_error(sb, + "Invalid dinode #%llu: fs_generation is %u\n", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(di->i_fs_generation)); + goto bail; + } + + rc = 0; + +bail: + return rc; +} + +int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, + int flags) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp, + flags, ocfs2_validate_inode_block); + + /* If ocfs2_read_blocks() got us a new bh, pass it up. */ + if (!rc && !*bh) + *bh = tmp; + + return rc; +} + +int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh) +{ + return ocfs2_read_inode_block_full(inode, bh, 0); +} diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 2f37af9bcc4a..eb3c302b38d3 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -128,8 +128,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags, int sysfile_type); int ocfs2_inode_init_private(struct inode *inode); int ocfs2_inode_revalidate(struct dentry *dentry); -int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, - int create_ino); +void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, + int create_ino); void ocfs2_read_inode(struct inode *inode); void ocfs2_read_inode2(struct inode *inode, void *opaque); ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf, @@ -142,6 +142,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle, struct buffer_head *bh); int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb); int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb); +struct buffer_head *ocfs2_bread(struct inode *inode, + int block, int *err, int reada); void ocfs2_set_inode_flags(struct inode *inode); void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi); @@ -153,4 +155,16 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode) return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits); } +/* Validate that a bh contains a valid inode */ +int ocfs2_validate_inode_block(struct super_block *sb, + struct buffer_head *bh); +/* + * Read an inode block into *bh. If *bh is NULL, a bh will be allocated. + * This is a cached read. The inode will be validated with + * ocfs2_validate_inode_block(). + */ +int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh); +/* The same, but can be passed OCFS2_BH_* flags */ +int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, + int flags); #endif /* OCFS2_INODE_H */ diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 99fe9d584f3c..302f1144a708 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -45,6 +45,7 @@ #include "slot_map.h" #include "super.h" #include "sysfile.h" +#include "quota.h" #include "buffer_head_io.h" @@ -52,10 +53,10 @@ DEFINE_SPINLOCK(trans_inc_lock); static int ocfs2_force_read_journal(struct inode *inode); static int ocfs2_recover_node(struct ocfs2_super *osb, - int node_num); + int node_num, int slot_num); static int __ocfs2_recovery_thread(void *arg); static int ocfs2_commit_cache(struct ocfs2_super *osb); -static int ocfs2_wait_on_mount(struct ocfs2_super *osb); +static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota); static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, int dirty, int replayed); static int ocfs2_trylock_journal(struct ocfs2_super *osb, @@ -64,6 +65,17 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, int slot); static int ocfs2_commit_thread(void *arg); +static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb) +{ + return __ocfs2_wait_on_mount(osb, 0); +} + +static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb) +{ + return __ocfs2_wait_on_mount(osb, 1); +} + + /* * The recovery_list is a simple linked list of node numbers to recover. @@ -256,11 +268,9 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE); BUG_ON(max_buffs <= 0); - /* JBD might support this, but our journalling code doesn't yet. */ - if (journal_current_handle()) { - mlog(ML_ERROR, "Recursive transaction attempted!\n"); - BUG(); - } + /* Nested transaction? Just return the handle... */ + if (journal_current_handle()) + return jbd2_journal_start(journal, max_buffs); down_read(&osb->journal->j_trans_barrier); @@ -285,16 +295,18 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) int ocfs2_commit_trans(struct ocfs2_super *osb, handle_t *handle) { - int ret; + int ret, nested; struct ocfs2_journal *journal = osb->journal; BUG_ON(!handle); + nested = handle->h_ref > 1; ret = jbd2_journal_stop(handle); if (ret < 0) mlog_errno(ret); - up_read(&journal->j_trans_barrier); + if (!nested) + up_read(&journal->j_trans_barrier); return ret; } @@ -434,20 +446,6 @@ int ocfs2_journal_dirty(handle_t *handle, return status; } -#ifdef CONFIG_OCFS2_COMPAT_JBD -int ocfs2_journal_dirty_data(handle_t *handle, - struct buffer_head *bh) -{ - int err = journal_dirty_data(handle, bh); - if (err) - mlog_errno(err); - /* TODO: When we can handle it, abort the handle and go RO on - * error here. */ - - return err; -} -#endif - #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) void ocfs2_set_journal_params(struct ocfs2_super *osb) @@ -587,17 +585,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, mlog_entry_void(); fe = (struct ocfs2_dinode *)bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - /* This is called from startup/shutdown which will - * handle the errors in a specific manner, so no need - * to call ocfs2_error() here. */ - mlog(ML_ERROR, "Journal dinode %llu has invalid " - "signature: %.*s", - (unsigned long long)le64_to_cpu(fe->i_blkno), 7, - fe->i_signature); - status = -EIO; - goto out; - } + + /* The journal bh on the osb always comes from ocfs2_journal_init() + * and was validated there inside ocfs2_inode_lock_full(). It's a + * code bug if we mess it up. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); flags = le32_to_cpu(fe->id1.journal1.ij_flags); if (dirty) @@ -613,7 +605,6 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, if (status < 0) mlog_errno(status); -out: mlog_exit(status); return status; } @@ -878,6 +869,7 @@ struct ocfs2_la_recovery_item { int lri_slot; struct ocfs2_dinode *lri_la_dinode; struct ocfs2_dinode *lri_tl_dinode; + struct ocfs2_quota_recovery *lri_qrec; }; /* Does the second half of the recovery process. By this point, the @@ -898,6 +890,7 @@ void ocfs2_complete_recovery(struct work_struct *work) struct ocfs2_super *osb = journal->j_osb; struct ocfs2_dinode *la_dinode, *tl_dinode; struct ocfs2_la_recovery_item *item, *n; + struct ocfs2_quota_recovery *qrec; LIST_HEAD(tmp_la_list); mlog_entry_void(); @@ -913,6 +906,8 @@ void ocfs2_complete_recovery(struct work_struct *work) mlog(0, "Complete recovery for slot %d\n", item->lri_slot); + ocfs2_wait_on_quotas(osb); + la_dinode = item->lri_la_dinode; if (la_dinode) { mlog(0, "Clean up local alloc %llu\n", @@ -943,6 +938,16 @@ void ocfs2_complete_recovery(struct work_struct *work) if (ret < 0) mlog_errno(ret); + qrec = item->lri_qrec; + if (qrec) { + mlog(0, "Recovering quota files"); + ret = ocfs2_finish_quota_recovery(osb, qrec, + item->lri_slot); + if (ret < 0) + mlog_errno(ret); + /* Recovery info is already freed now */ + } + kfree(item); } @@ -956,7 +961,8 @@ void ocfs2_complete_recovery(struct work_struct *work) static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, int slot_num, struct ocfs2_dinode *la_dinode, - struct ocfs2_dinode *tl_dinode) + struct ocfs2_dinode *tl_dinode, + struct ocfs2_quota_recovery *qrec) { struct ocfs2_la_recovery_item *item; @@ -971,6 +977,9 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, if (tl_dinode) kfree(tl_dinode); + if (qrec) + ocfs2_free_quota_recovery(qrec); + mlog_errno(-ENOMEM); return; } @@ -979,6 +988,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, item->lri_la_dinode = la_dinode; item->lri_slot = slot_num; item->lri_tl_dinode = tl_dinode; + item->lri_qrec = qrec; spin_lock(&journal->j_lock); list_add_tail(&item->lri_list, &journal->j_la_cleanups); @@ -998,6 +1008,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) ocfs2_queue_recovery_completion(journal, osb->slot_num, osb->local_alloc_copy, + NULL, NULL); ocfs2_schedule_truncate_log_flush(osb, 0); @@ -1006,11 +1017,26 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) } } +void ocfs2_complete_quota_recovery(struct ocfs2_super *osb) +{ + if (osb->quota_rec) { + ocfs2_queue_recovery_completion(osb->journal, + osb->slot_num, + NULL, + NULL, + osb->quota_rec); + osb->quota_rec = NULL; + } +} + static int __ocfs2_recovery_thread(void *arg) { - int status, node_num; + int status, node_num, slot_num; struct ocfs2_super *osb = arg; struct ocfs2_recovery_map *rm = osb->recovery_map; + int *rm_quota = NULL; + int rm_quota_used = 0, i; + struct ocfs2_quota_recovery *qrec; mlog_entry_void(); @@ -1019,6 +1045,11 @@ static int __ocfs2_recovery_thread(void *arg) goto bail; } + rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS); + if (!rm_quota) { + status = -ENOMEM; + goto bail; + } restart: status = ocfs2_super_lock(osb, 1); if (status < 0) { @@ -1032,8 +1063,28 @@ restart: * clear it until ocfs2_recover_node() has succeeded. */ node_num = rm->rm_entries[0]; spin_unlock(&osb->osb_lock); - - status = ocfs2_recover_node(osb, node_num); + mlog(0, "checking node %d\n", node_num); + slot_num = ocfs2_node_num_to_slot(osb, node_num); + if (slot_num == -ENOENT) { + status = 0; + mlog(0, "no slot for this node, so no recovery" + "required.\n"); + goto skip_recovery; + } + mlog(0, "node %d was using slot %d\n", node_num, slot_num); + + /* It is a bit subtle with quota recovery. We cannot do it + * immediately because we have to obtain cluster locks from + * quota files and we also don't want to just skip it because + * then quota usage would be out of sync until some node takes + * the slot. So we remember which nodes need quota recovery + * and when everything else is done, we recover quotas. */ + for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++); + if (i == rm_quota_used) + rm_quota[rm_quota_used++] = slot_num; + + status = ocfs2_recover_node(osb, node_num, slot_num); +skip_recovery: if (!status) { ocfs2_recovery_map_clear(osb, node_num); } else { @@ -1055,13 +1106,27 @@ restart: if (status < 0) mlog_errno(status); + /* Now it is right time to recover quotas... We have to do this under + * superblock lock so that noone can start using the slot (and crash) + * before we recover it */ + for (i = 0; i < rm_quota_used; i++) { + qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); + if (IS_ERR(qrec)) { + status = PTR_ERR(qrec); + mlog_errno(status); + continue; + } + ocfs2_queue_recovery_completion(osb->journal, rm_quota[i], + NULL, NULL, qrec); + } + ocfs2_super_unlock(osb, 1); /* We always run recovery on our own orphan dir - the dead * node(s) may have disallowd a previos inode delete. Re-processing * is therefore required. */ ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, - NULL); + NULL, NULL); bail: mutex_lock(&osb->recovery_lock); @@ -1076,6 +1141,9 @@ bail: mutex_unlock(&osb->recovery_lock); + if (rm_quota) + kfree(rm_quota); + mlog_exit(status); /* no one is callint kthread_stop() for us so the kthread() api * requires that we call do_exit(). And it isn't exported, but @@ -1135,8 +1203,7 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb, } SET_INODE_JOURNAL(inode); - status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh, - OCFS2_BH_IGNORE_CACHE); + status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; @@ -1304,31 +1371,19 @@ done: * far less concerning. */ static int ocfs2_recover_node(struct ocfs2_super *osb, - int node_num) + int node_num, int slot_num) { int status = 0; - int slot_num; struct ocfs2_dinode *la_copy = NULL; struct ocfs2_dinode *tl_copy = NULL; - mlog_entry("(node_num=%d, osb->node_num = %d)\n", - node_num, osb->node_num); - - mlog(0, "checking node %d\n", node_num); + mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n", + node_num, slot_num, osb->node_num); /* Should not ever be called to recover ourselves -- in that * case we should've called ocfs2_journal_load instead. */ BUG_ON(osb->node_num == node_num); - slot_num = ocfs2_node_num_to_slot(osb, node_num); - if (slot_num == -ENOENT) { - status = 0; - mlog(0, "no slot for this node, so no recovery required.\n"); - goto done; - } - - mlog(0, "node %d was using slot %d\n", node_num, slot_num); - status = ocfs2_replay_journal(osb, node_num, slot_num); if (status < 0) { if (status == -EBUSY) { @@ -1364,7 +1419,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, /* This will kfree the memory pointed to by la_copy and tl_copy */ ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, - tl_copy); + tl_copy, NULL); status = 0; done: @@ -1659,13 +1714,14 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, return ret; } -static int ocfs2_wait_on_mount(struct ocfs2_super *osb) +static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota) { /* This check is good because ocfs2 will wait on our recovery * thread before changing it to something other than MOUNTED * or DISABLED. */ wait_event(osb->osb_mount_event, - atomic_read(&osb->vol_state) == VOLUME_MOUNTED || + (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) || + atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS || atomic_read(&osb->vol_state) == VOLUME_DISABLED); /* If there's an error on mount, then we may never get to the diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index d4d14e9a3cea..37013bf9ce28 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -27,12 +27,7 @@ #define OCFS2_JOURNAL_H #include <linux/fs.h> -#ifndef CONFIG_OCFS2_COMPAT_JBD -# include <linux/jbd2.h> -#else -# include <linux/jbd.h> -# include "ocfs2_jbd_compat.h" -#endif +#include <linux/jbd2.h> enum ocfs2_journal_state { OCFS2_JOURNAL_FREE = 0, @@ -173,6 +168,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num); int ocfs2_mark_dead_nodes(struct ocfs2_super *osb); void ocfs2_complete_mount_recovery(struct ocfs2_super *osb); +void ocfs2_complete_quota_recovery(struct ocfs2_super *osb); static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb) { @@ -273,10 +269,6 @@ int ocfs2_journal_access(handle_t *handle, */ int ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh); -#ifdef CONFIG_OCFS2_COMPAT_JBD -int ocfs2_journal_dirty_data(handle_t *handle, - struct buffer_head *bh); -#endif /* * Credit Macros: @@ -293,6 +285,37 @@ int ocfs2_journal_dirty_data(handle_t *handle, /* extended attribute block update */ #define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 +/* global quotafile inode update, data block */ +#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) + +/* + * The two writes below can accidentally see global info dirty due + * to set_info() quotactl so make them prepared for the writes. + */ +/* quota data block, global info */ +/* Write to local quota file */ +#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1) + +/* global quota data block, local quota data block, global quota inode, + * global quota info */ +#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3) + +static inline int ocfs2_quota_trans_credits(struct super_block *sb) +{ + int credits = 0; + + if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) + credits += OCFS2_QWRITE_CREDITS; + if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) + credits += OCFS2_QWRITE_CREDITS; + return credits; +} + +/* Number of credits needed for removing quota structure from file */ +int ocfs2_calc_qdel_credits(struct super_block *sb, int type); +/* Number of credits needed for initialization of new quota structure */ +int ocfs2_calc_qinit_credits(struct super_block *sb, int type); + /* group extend. inode update and last group update. */ #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) @@ -303,8 +326,11 @@ int ocfs2_journal_dirty_data(handle_t *handle, * prev. group desc. if we relink. */ #define OCFS2_SUBALLOC_ALLOC (3) -#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC \ - + OCFS2_INODE_UPDATE_CREDITS) +static inline int ocfs2_inline_to_extents_credits(struct super_block *sb) +{ + return OCFS2_SUBALLOC_ALLOC + OCFS2_INODE_UPDATE_CREDITS + + ocfs2_quota_trans_credits(sb); +} /* dinode + group descriptor update. We don't relink on free yet. */ #define OCFS2_SUBALLOC_FREE (2) @@ -313,16 +339,23 @@ int ocfs2_journal_dirty_data(handle_t *handle, #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ + OCFS2_TRUNCATE_LOG_UPDATE) -#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS) +static inline int ocfs2_remove_extent_credits(struct super_block *sb) +{ + return OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS + + ocfs2_quota_trans_credits(sb); +} /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + * bitmap block for the new bit) */ #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) /* parent fe, parent block, new file entry, inode alloc fe, inode alloc - * group descriptor + mkdir/symlink blocks */ -#define OCFS2_MKNOD_CREDITS (3 + OCFS2_SUBALLOC_ALLOC \ - + OCFS2_DIR_LINK_ADDITIONAL_CREDITS) + * group descriptor + mkdir/symlink blocks + quota update */ +static inline int ocfs2_mknod_credits(struct super_block *sb) +{ + return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS + + ocfs2_quota_trans_credits(sb); +} /* local alloc metadata change + main bitmap updates */ #define OCFS2_WINDOW_MOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS \ @@ -332,13 +365,21 @@ int ocfs2_journal_dirty_data(handle_t *handle, * for the dinode, one for the new block. */ #define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2) -/* file update (nlink, etc) + directory mtime/ctime + dir entry block */ -#define OCFS2_LINK_CREDITS (2*OCFS2_INODE_UPDATE_CREDITS + 1) +/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota + * update on dir */ +static inline int ocfs2_link_credits(struct super_block *sb) +{ + return 2*OCFS2_INODE_UPDATE_CREDITS + 1 + + ocfs2_quota_trans_credits(sb); +} /* inode + dir inode (if we unlink a dir), + dir entry block + orphan * dir inode link */ -#define OCFS2_UNLINK_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 1 \ - + OCFS2_LINK_CREDITS) +static inline int ocfs2_unlink_credits(struct super_block *sb) +{ + /* The quota update from ocfs2_link_credits is unused here... */ + return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb); +} /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + * inode alloc group descriptor */ @@ -347,8 +388,10 @@ int ocfs2_journal_dirty_data(handle_t *handle, /* dinode update, old dir dinode update, new dir dinode update, old * dir dir entry, new dir dir entry, dir entry update for renaming * directory + target unlink */ -#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \ - + OCFS2_UNLINK_CREDITS) +static inline int ocfs2_rename_credits(struct super_block *sb) +{ + return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb); +} /* global bitmap dinode, group desc., relinked group, * suballocator dinode, group desc., relinked group, @@ -386,18 +429,19 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, * credit for the dinode there. */ extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); - return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks; + return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + + ocfs2_quota_trans_credits(sb); } static inline int ocfs2_calc_symlink_credits(struct super_block *sb) { - int blocks = OCFS2_MKNOD_CREDITS; + int blocks = ocfs2_mknod_credits(sb); /* links can be longer than one block so we may update many * within our single allocated extent. */ blocks += ocfs2_clusters_to_blocks(sb, 1); - return blocks; + return blocks + ocfs2_quota_trans_credits(sb); } static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb, @@ -434,6 +478,8 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, /* update to the truncate log. */ credits += OCFS2_TRUNCATE_LOG_UPDATE; + credits += ocfs2_quota_trans_credits(sb); + return credits; } diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 687b28713c32..19cfb1b9ce09 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -248,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) goto bail; } - status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, - &alloc_bh, OCFS2_BH_IGNORE_CACHE); + status = ocfs2_read_inode_block_full(inode, &alloc_bh, + OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; @@ -459,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, mutex_lock(&inode->i_mutex); - status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, - &alloc_bh, OCFS2_BH_IGNORE_CACHE); + status = ocfs2_read_inode_block_full(inode, &alloc_bh, + OCFS2_BH_IGNORE_CACHE); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 2545e7402efe..6173807ba23b 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -40,6 +40,7 @@ #include <linux/types.h> #include <linux/slab.h> #include <linux/highmem.h> +#include <linux/quotaops.h> #define MLOG_MASK_PREFIX ML_NAMEI #include <cluster/masklog.h> @@ -61,17 +62,18 @@ #include "sysfile.h" #include "uptodate.h" #include "xattr.h" +#include "acl.h" #include "buffer_head_io.h" static int ocfs2_mknod_locked(struct ocfs2_super *osb, struct inode *dir, - struct dentry *dentry, int mode, + struct inode *inode, + struct dentry *dentry, dev_t dev, struct buffer_head **new_fe_bh, struct buffer_head *parent_fe_bh, handle_t *handle, - struct inode **ret_inode, struct ocfs2_alloc_context *inode_ac); static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, @@ -186,6 +188,35 @@ bail: return ret; } +static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode) +{ + struct inode *inode; + + inode = new_inode(dir->i_sb); + if (!inode) { + mlog(ML_ERROR, "new_inode failed!\n"); + return NULL; + } + + /* populate as many fields early on as possible - many of + * these are used by the support functions here and in + * callers. */ + if (S_ISDIR(mode)) + inode->i_nlink = 2; + else + inode->i_nlink = 1; + inode->i_uid = current_fsuid(); + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else + inode->i_gid = current_fsgid(); + inode->i_mode = mode; + vfs_dq_init(inode); + return inode; +} + static int ocfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, @@ -201,6 +232,13 @@ static int ocfs2_mknod(struct inode *dir, struct inode *inode = NULL; struct ocfs2_alloc_context *inode_ac = NULL; struct ocfs2_alloc_context *data_ac = NULL; + struct ocfs2_alloc_context *xattr_ac = NULL; + int want_clusters = 0; + int xattr_credits = 0; + struct ocfs2_security_xattr_info si = { + .enable = 1, + }; + int did_quota_inode = 0; mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, (unsigned long)dev, dentry->d_name.len, @@ -250,17 +288,46 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } - /* Reserve a cluster if creating an extent based directory. */ - if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) { - status = ocfs2_reserve_clusters(osb, 1, &data_ac); - if (status < 0) { - if (status != -ENOSPC) - mlog_errno(status); + inode = ocfs2_get_init_inode(dir, mode); + if (!inode) { + status = -ENOMEM; + mlog_errno(status); + goto leave; + } + + /* get security xattr */ + status = ocfs2_init_security_get(inode, dir, &si); + if (status) { + if (status == -EOPNOTSUPP) + si.enable = 0; + else { + mlog_errno(status); goto leave; } } - handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS); + /* calculate meta data/clusters for setting security and acl xattr */ + status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode, + &si, &want_clusters, + &xattr_credits, &xattr_ac); + if (status < 0) { + mlog_errno(status); + goto leave; + } + + /* Reserve a cluster if creating an extent based directory. */ + if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) + want_clusters += 1; + + status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac); + if (status < 0) { + if (status != -ENOSPC) + mlog_errno(status); + goto leave; + } + + handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb) + + xattr_credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; @@ -268,10 +335,19 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } + /* We don't use standard VFS wrapper because we don't want vfs_dq_init + * to be called. */ + if (sb_any_quota_active(osb->sb) && + osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) { + status = -EDQUOT; + goto leave; + } + did_quota_inode = 1; + /* do the real work now. */ - status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev, + status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev, &new_fe_bh, parent_fe_bh, handle, - &inode, inode_ac); + inode_ac); if (status < 0) { mlog_errno(status); goto leave; @@ -300,6 +376,22 @@ static int ocfs2_mknod(struct inode *dir, inc_nlink(dir); } + status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh, + xattr_ac, data_ac); + if (status < 0) { + mlog_errno(status); + goto leave; + } + + if (si.enable) { + status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si, + xattr_ac, data_ac); + if (status < 0) { + mlog_errno(status); + goto leave; + } + } + status = ocfs2_add_entry(handle, dentry, inode, OCFS2_I(inode)->ip_blkno, parent_fe_bh, de_bh); @@ -320,6 +412,8 @@ static int ocfs2_mknod(struct inode *dir, d_instantiate(dentry, inode); status = 0; leave: + if (status < 0 && did_quota_inode) + vfs_dq_free_inode(inode); if (handle) ocfs2_commit_trans(osb, handle); @@ -331,9 +425,13 @@ leave: brelse(new_fe_bh); brelse(de_bh); brelse(parent_fe_bh); + kfree(si.name); + kfree(si.value); - if ((status < 0) && inode) + if ((status < 0) && inode) { + clear_nlink(inode); iput(inode); + } if (inode_ac) ocfs2_free_alloc_context(inode_ac); @@ -341,6 +439,9 @@ leave: if (data_ac) ocfs2_free_alloc_context(data_ac); + if (xattr_ac) + ocfs2_free_alloc_context(xattr_ac); + mlog_exit(status); return status; @@ -348,12 +449,12 @@ leave: static int ocfs2_mknod_locked(struct ocfs2_super *osb, struct inode *dir, - struct dentry *dentry, int mode, + struct inode *inode, + struct dentry *dentry, dev_t dev, struct buffer_head **new_fe_bh, struct buffer_head *parent_fe_bh, handle_t *handle, - struct inode **ret_inode, struct ocfs2_alloc_context *inode_ac) { int status = 0; @@ -361,14 +462,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, struct ocfs2_extent_list *fel; u64 fe_blkno = 0; u16 suballoc_bit; - struct inode *inode = NULL; - mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, - (unsigned long)dev, dentry->d_name.len, + mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, + inode->i_mode, (unsigned long)dev, dentry->d_name.len, dentry->d_name.name); *new_fe_bh = NULL; - *ret_inode = NULL; status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit, &fe_blkno); @@ -377,23 +476,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, goto leave; } - inode = new_inode(dir->i_sb); - if (!inode) { - status = -ENOMEM; - mlog(ML_ERROR, "new_inode failed!\n"); - goto leave; - } - /* populate as many fields early on as possible - many of * these are used by the support functions here and in * callers. */ inode->i_ino = ino_from_blkno(osb->sb, fe_blkno); OCFS2_I(inode)->ip_blkno = fe_blkno; - if (S_ISDIR(mode)) - inode->i_nlink = 2; - else - inode->i_nlink = 1; - inode->i_mode = mode; spin_lock(&osb->osb_lock); inode->i_generation = osb->s_next_generation++; spin_unlock(&osb->osb_lock); @@ -421,17 +508,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, fe->i_blkno = cpu_to_le64(fe_blkno); fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); - fe->i_uid = cpu_to_le32(current_fsuid()); - if (dir->i_mode & S_ISGID) { - fe->i_gid = cpu_to_le32(dir->i_gid); - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else - fe->i_gid = cpu_to_le32(current_fsgid()); - fe->i_mode = cpu_to_le16(mode); - if (S_ISCHR(mode) || S_ISBLK(mode)) + fe->i_uid = cpu_to_le32(inode->i_uid); + fe->i_gid = cpu_to_le32(inode->i_gid); + fe->i_mode = cpu_to_le16(inode->i_mode); + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev)); - fe->i_links_count = cpu_to_le16(inode->i_nlink); fe->i_last_eb_blk = 0; @@ -446,7 +527,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, /* * If supported, directories start with inline data. */ - if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) { + if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) { u16 feat = le16_to_cpu(fe->i_dyn_features); fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL); @@ -465,15 +546,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, goto leave; } - if (ocfs2_populate_inode(inode, fe, 1) < 0) { - mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, " - "i_blkno=%llu, i_ino=%lu\n", - (unsigned long long)(*new_fe_bh)->b_blocknr, - (unsigned long long)le64_to_cpu(fe->i_blkno), - inode->i_ino); - BUG(); - } - + ocfs2_populate_inode(inode, fe, 1); ocfs2_inode_set_new(osb, inode); if (!ocfs2_mount_local(osb)) { status = ocfs2_create_new_inode_locks(inode); @@ -484,17 +557,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, status = 0; /* error in ocfs2_create_new_inode_locks is not * critical */ - *ret_inode = inode; leave: if (status < 0) { if (*new_fe_bh) { brelse(*new_fe_bh); *new_fe_bh = NULL; } - if (inode) { - clear_nlink(inode); - iput(inode); - } } mlog_exit(status); @@ -588,7 +656,7 @@ static int ocfs2_link(struct dentry *old_dentry, goto out_unlock_inode; } - handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS); + handle = ocfs2_start_trans(osb, ocfs2_link_credits(osb->sb)); if (IS_ERR(handle)) { err = PTR_ERR(handle); handle = NULL; @@ -775,7 +843,7 @@ static int ocfs2_unlink(struct inode *dir, } } - handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS); + handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; @@ -1181,7 +1249,7 @@ static int ocfs2_rename(struct inode *old_dir, } } - handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS); + handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; @@ -1496,6 +1564,13 @@ static int ocfs2_symlink(struct inode *dir, handle_t *handle = NULL; struct ocfs2_alloc_context *inode_ac = NULL; struct ocfs2_alloc_context *data_ac = NULL; + struct ocfs2_alloc_context *xattr_ac = NULL; + int want_clusters = 0; + int xattr_credits = 0; + struct ocfs2_security_xattr_info si = { + .enable = 1, + }; + int did_quota = 0, did_quota_inode = 0; mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, dentry, symname, dentry->d_name.len, dentry->d_name.name); @@ -1542,17 +1617,46 @@ static int ocfs2_symlink(struct inode *dir, goto bail; } - /* don't reserve bitmap space for fast symlinks. */ - if (l > ocfs2_fast_symlink_chars(sb)) { - status = ocfs2_reserve_clusters(osb, 1, &data_ac); + inode = ocfs2_get_init_inode(dir, S_IFLNK | S_IRWXUGO); + if (!inode) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + + /* get security xattr */ + status = ocfs2_init_security_get(inode, dir, &si); + if (status) { + if (status == -EOPNOTSUPP) + si.enable = 0; + else { + mlog_errno(status); + goto bail; + } + } + + /* calculate meta data/clusters for setting security xattr */ + if (si.enable) { + status = ocfs2_calc_security_init(dir, &si, &want_clusters, + &xattr_credits, &xattr_ac); if (status < 0) { - if (status != -ENOSPC) - mlog_errno(status); + mlog_errno(status); goto bail; } } - handle = ocfs2_start_trans(osb, credits); + /* don't reserve bitmap space for fast symlinks. */ + if (l > ocfs2_fast_symlink_chars(sb)) + want_clusters += 1; + + status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac); + if (status < 0) { + if (status != -ENOSPC) + mlog_errno(status); + goto bail; + } + + handle = ocfs2_start_trans(osb, credits + xattr_credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; @@ -1560,10 +1664,18 @@ static int ocfs2_symlink(struct inode *dir, goto bail; } - status = ocfs2_mknod_locked(osb, dir, dentry, - S_IFLNK | S_IRWXUGO, 0, - &new_fe_bh, parent_fe_bh, handle, - &inode, inode_ac); + /* We don't use standard VFS wrapper because we don't want vfs_dq_init + * to be called. */ + if (sb_any_quota_active(osb->sb) && + osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) { + status = -EDQUOT; + goto bail; + } + did_quota_inode = 1; + + status = ocfs2_mknod_locked(osb, dir, inode, dentry, + 0, &new_fe_bh, parent_fe_bh, handle, + inode_ac); if (status < 0) { mlog_errno(status); goto bail; @@ -1576,6 +1688,12 @@ static int ocfs2_symlink(struct inode *dir, u32 offset = 0; inode->i_op = &ocfs2_symlink_inode_operations; + if (vfs_dq_alloc_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, 1))) { + status = -EDQUOT; + goto bail; + } + did_quota = 1; status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, new_fe_bh, handle, data_ac, NULL, @@ -1614,6 +1732,15 @@ static int ocfs2_symlink(struct inode *dir, } } + if (si.enable) { + status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si, + xattr_ac, data_ac); + if (status < 0) { + mlog_errno(status); + goto bail; + } + } + status = ocfs2_add_entry(handle, dentry, inode, le64_to_cpu(fe->i_blkno), parent_fe_bh, de_bh); @@ -1632,6 +1759,11 @@ static int ocfs2_symlink(struct inode *dir, dentry->d_op = &ocfs2_dentry_ops; d_instantiate(dentry, inode); bail: + if (status < 0 && did_quota) + vfs_dq_free_space_nodirty(inode, + ocfs2_clusters_to_bytes(osb->sb, 1)); + if (status < 0 && did_quota_inode) + vfs_dq_free_inode(inode); if (handle) ocfs2_commit_trans(osb, handle); @@ -1640,12 +1772,18 @@ bail: brelse(new_fe_bh); brelse(parent_fe_bh); brelse(de_bh); + kfree(si.name); + kfree(si.value); if (inode_ac) ocfs2_free_alloc_context(inode_ac); if (data_ac) ocfs2_free_alloc_context(data_ac); - if ((status < 0) && inode) + if (xattr_ac) + ocfs2_free_alloc_context(xattr_ac); + if ((status < 0) && inode) { + clear_nlink(inode); iput(inode); + } mlog_exit(status); @@ -1754,9 +1892,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); - status = ocfs2_read_block(orphan_dir_inode, - OCFS2_I(orphan_dir_inode)->ip_blkno, - &orphan_dir_bh); + status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh); if (status < 0) { mlog_errno(status); goto leave; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index fef7ece32376..5c777988042f 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -85,7 +85,7 @@ enum ocfs2_unlock_action { }; /* ocfs2_lock_res->l_flags flags. */ -#define OCFS2_LOCK_ATTACHED (0x00000001) /* have we initialized +#define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized * the lvb */ #define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in * dlm_lock */ @@ -161,6 +161,7 @@ enum ocfs2_vol_state { VOLUME_INIT = 0, VOLUME_MOUNTED, + VOLUME_MOUNTED_QUOTAS, VOLUME_DISMOUNTED, VOLUME_DISABLED }; @@ -195,6 +196,9 @@ enum ocfs2_mount_options OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ + OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */ + OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */ + OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ }; #define OCFS2_OSB_SOFT_RO 0x0001 @@ -205,6 +209,7 @@ enum ocfs2_mount_options struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; +struct ocfs2_quota_recovery; struct ocfs2_super { struct task_struct *commit_task; @@ -286,10 +291,11 @@ struct ocfs2_super char *local_alloc_debug_buf; #endif - /* Next two fields are for local node slot recovery during + /* Next three fields are for local node slot recovery during * mount. */ int dirty; struct ocfs2_dinode *local_alloc_copy; + struct ocfs2_quota_recovery *quota_rec; struct ocfs2_alloc_stats alloc_stats; char dev_str[20]; /* "major,minor" of the device */ @@ -443,35 +449,12 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) #define OCFS2_IS_VALID_DINODE(ptr) \ (!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE)) -#define OCFS2_RO_ON_INVALID_DINODE(__sb, __di) do { \ - typeof(__di) ____di = (__di); \ - ocfs2_error((__sb), \ - "Dinode # %llu has bad signature %.*s", \ - (unsigned long long)le64_to_cpu((____di)->i_blkno), 7, \ - (____di)->i_signature); \ -} while (0) - #define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \ (!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE)) -#define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb) do { \ - typeof(__eb) ____eb = (__eb); \ - ocfs2_error((__sb), \ - "Extent Block # %llu has bad signature %.*s", \ - (unsigned long long)le64_to_cpu((____eb)->h_blkno), 7, \ - (____eb)->h_signature); \ -} while (0) - #define OCFS2_IS_VALID_GROUP_DESC(ptr) \ (!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE)) -#define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd) do { \ - typeof(__gd) ____gd = (__gd); \ - ocfs2_error((__sb), \ - "Group Descriptor # %llu has bad signature %.*s", \ - (unsigned long long)le64_to_cpu((____gd)->bg_blkno), 7, \ - (____gd)->bg_signature); \ -} while (0) #define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \ (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE)) diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 5f180cf7abbd..4ae3984366ee 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -93,7 +93,9 @@ | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ | OCFS2_FEATURE_INCOMPAT_XATTR) -#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN +#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ + | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ + | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) /* * Heartbeat-only devices are missing journals and other files. The @@ -157,6 +159,12 @@ */ #define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001 +/* + * Maintain quota information for this filesystem + */ +#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002 +#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004 + /* The byte offset of the first backup block will be 1G. * The following will be 4G, 16G, 64G, 256G and 1T. */ @@ -186,6 +194,7 @@ #define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */ #define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ #define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ +#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */ /* * Flags on ocfs2_dinode.i_dyn_features @@ -323,13 +332,17 @@ enum { #define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE HEARTBEAT_SYSTEM_INODE, GLOBAL_BITMAP_SYSTEM_INODE, -#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GLOBAL_BITMAP_SYSTEM_INODE + USER_QUOTA_SYSTEM_INODE, + GROUP_QUOTA_SYSTEM_INODE, +#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE, EXTENT_ALLOC_SYSTEM_INODE, INODE_ALLOC_SYSTEM_INODE, JOURNAL_SYSTEM_INODE, LOCAL_ALLOC_SYSTEM_INODE, TRUNCATE_LOG_SYSTEM_INODE, + LOCAL_USER_QUOTA_SYSTEM_INODE, + LOCAL_GROUP_QUOTA_SYSTEM_INODE, NUM_SYSTEM_INODES }; @@ -343,6 +356,8 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { [SLOT_MAP_SYSTEM_INODE] = { "slot_map", 0, S_IFREG | 0644 }, [HEARTBEAT_SYSTEM_INODE] = { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 }, [GLOBAL_BITMAP_SYSTEM_INODE] = { "global_bitmap", 0, S_IFREG | 0644 }, + [USER_QUOTA_SYSTEM_INODE] = { "aquota.user", OCFS2_QUOTA_FL, S_IFREG | 0644 }, + [GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group", OCFS2_QUOTA_FL, S_IFREG | 0644 }, /* Slot-specific system inodes (one copy per slot) */ [ORPHAN_DIR_SYSTEM_INODE] = { "orphan_dir:%04d", 0, S_IFDIR | 0755 }, @@ -350,7 +365,9 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { [INODE_ALLOC_SYSTEM_INODE] = { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 }, [JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 }, [LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 }, - [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 } + [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 }, + [LOCAL_USER_QUOTA_SYSTEM_INODE] = { "aquota.user:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 }, + [LOCAL_GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 }, }; /* Parameter passed from mount.ocfs2 to module */ @@ -862,6 +879,109 @@ static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe) return xe->xe_type & OCFS2_XATTR_TYPE_MASK; } +/* + * On disk structures for global quota file + */ + +/* Magic numbers and known versions for global quota files */ +#define OCFS2_GLOBAL_QMAGICS {\ + 0x0cf52470, /* USRQUOTA */ \ + 0x0cf52471 /* GRPQUOTA */ \ +} + +#define OCFS2_GLOBAL_QVERSIONS {\ + 0, \ + 0, \ +} + + +/* Each block of each quota file has a certain fixed number of bytes reserved + * for OCFS2 internal use at its end. OCFS2 can use it for things like + * checksums, etc. */ +#define OCFS2_QBLK_RESERVED_SPACE 8 + +/* Generic header of all quota files */ +struct ocfs2_disk_dqheader { + __le32 dqh_magic; /* Magic number identifying file */ + __le32 dqh_version; /* Quota format version */ +}; + +#define OCFS2_GLOBAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader)) + +/* Information header of global quota file (immediately follows the generic + * header) */ +struct ocfs2_global_disk_dqinfo { +/*00*/ __le32 dqi_bgrace; /* Grace time for space softlimit excess */ + __le32 dqi_igrace; /* Grace time for inode softlimit excess */ + __le32 dqi_syncms; /* Time after which we sync local changes to + * global quota file */ + __le32 dqi_blocks; /* Number of blocks in quota file */ +/*10*/ __le32 dqi_free_blk; /* First free block in quota file */ + __le32 dqi_free_entry; /* First block with free dquot entry in quota + * file */ +}; + +/* Structure with global user / group information. We reserve some space + * for future use. */ +struct ocfs2_global_disk_dqblk { +/*00*/ __le32 dqb_id; /* ID the structure belongs to */ + __le32 dqb_use_count; /* Number of nodes having reference to this structure */ + __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */ +/*10*/ __le64 dqb_isoftlimit; /* preferred inode limit */ + __le64 dqb_curinodes; /* current # allocated inodes */ +/*20*/ __le64 dqb_bhardlimit; /* absolute limit on disk space */ + __le64 dqb_bsoftlimit; /* preferred limit on disk space */ +/*30*/ __le64 dqb_curspace; /* current space occupied */ + __le64 dqb_btime; /* time limit for excessive disk use */ +/*40*/ __le64 dqb_itime; /* time limit for excessive inode use */ + __le64 dqb_pad1; +/*50*/ __le64 dqb_pad2; +}; + +/* + * On-disk structures for local quota file + */ + +/* Magic numbers and known versions for local quota files */ +#define OCFS2_LOCAL_QMAGICS {\ + 0x0cf524c0, /* USRQUOTA */ \ + 0x0cf524c1 /* GRPQUOTA */ \ +} + +#define OCFS2_LOCAL_QVERSIONS {\ + 0, \ + 0, \ +} + +/* Quota flags in dqinfo header */ +#define OLQF_CLEAN 0x0001 /* Quota file is empty (this should be after\ + * quota has been cleanly turned off) */ + +#define OCFS2_LOCAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader)) + +/* Information header of local quota file (immediately follows the generic + * header) */ +struct ocfs2_local_disk_dqinfo { + __le32 dqi_flags; /* Flags for quota file */ + __le32 dqi_chunks; /* Number of chunks of quota structures + * with a bitmap */ + __le32 dqi_blocks; /* Number of blocks allocated for quota file */ +}; + +/* Header of one chunk of a quota file */ +struct ocfs2_local_disk_chunk { + __le32 dqc_free; /* Number of free entries in the bitmap */ + u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding + * chunk of quota file */ +}; + +/* One entry in local quota file */ +struct ocfs2_local_disk_dqblk { +/*00*/ __le64 dqb_id; /* id this quota applies to */ + __le64 dqb_spacemod; /* Change in the amount of used space */ +/*10*/ __le64 dqb_inodemod; /* Change in the amount of used inodes */ +}; + #ifdef __KERNEL__ static inline int ocfs2_fast_symlink_chars(struct super_block *sb) { diff --git a/fs/ocfs2/ocfs2_jbd_compat.h b/fs/ocfs2/ocfs2_jbd_compat.h deleted file mode 100644 index b91c78f8f558..000000000000 --- a/fs/ocfs2/ocfs2_jbd_compat.h +++ /dev/null @@ -1,82 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * ocfs2_jbd_compat.h - * - * Compatibility defines for JBD. - * - * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ - -#ifndef OCFS2_JBD_COMPAT_H -#define OCFS2_JBD_COMPAT_H - -#ifndef CONFIG_OCFS2_COMPAT_JBD -# error Should not have been included -#endif - -struct jbd2_inode { - unsigned int dummy; -}; - -#define JBD2_BARRIER JFS_BARRIER -#define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE - -#define jbd2_journal_ack_err journal_ack_err -#define jbd2_journal_clear_err journal_clear_err -#define jbd2_journal_destroy journal_destroy -#define jbd2_journal_dirty_metadata journal_dirty_metadata -#define jbd2_journal_errno journal_errno -#define jbd2_journal_extend journal_extend -#define jbd2_journal_flush journal_flush -#define jbd2_journal_force_commit journal_force_commit -#define jbd2_journal_get_write_access journal_get_write_access -#define jbd2_journal_get_undo_access journal_get_undo_access -#define jbd2_journal_init_inode journal_init_inode -#define jbd2_journal_invalidatepage journal_invalidatepage -#define jbd2_journal_load journal_load -#define jbd2_journal_lock_updates journal_lock_updates -#define jbd2_journal_restart journal_restart -#define jbd2_journal_start journal_start -#define jbd2_journal_start_commit journal_start_commit -#define jbd2_journal_stop journal_stop -#define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers -#define jbd2_journal_unlock_updates journal_unlock_updates -#define jbd2_journal_wipe journal_wipe -#define jbd2_log_wait_commit log_wait_commit - -static inline int jbd2_journal_file_inode(handle_t *handle, - struct jbd2_inode *inode) -{ - return 0; -} - -static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, - loff_t new_size) -{ - return 0; -} - -static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, - struct inode *inode) -{ - return; -} - -static inline void jbd2_journal_release_jbd_inode(journal_t *journal, - struct jbd2_inode *jinode) -{ - return; -} - - -#endif /* OCFS2_JBD_COMPAT_H */ diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index 82c200f7a8f1..eb6f50c9ceca 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h @@ -46,6 +46,7 @@ enum ocfs2_lock_type { OCFS2_LOCK_TYPE_DENTRY, OCFS2_LOCK_TYPE_OPEN, OCFS2_LOCK_TYPE_FLOCK, + OCFS2_LOCK_TYPE_QINFO, OCFS2_NUM_LOCK_TYPES }; @@ -77,6 +78,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type) case OCFS2_LOCK_TYPE_FLOCK: c = 'F'; break; + case OCFS2_LOCK_TYPE_QINFO: + c = 'Q'; + break; default: c = '\0'; } @@ -95,6 +99,7 @@ static char *ocfs2_lock_type_strings[] = { [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", [OCFS2_LOCK_TYPE_OPEN] = "Open", [OCFS2_LOCK_TYPE_FLOCK] = "Flock", + [OCFS2_LOCK_TYPE_QINFO] = "Quota", }; static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h new file mode 100644 index 000000000000..abf694155d21 --- /dev/null +++ b/fs/ocfs2/quota.h @@ -0,0 +1,117 @@ +/* + * quota.h for OCFS2 + * + * On disk quota structures for local and global quota file, in-memory + * structures. + * + */ + +#ifndef _OCFS2_QUOTA_H +#define _OCFS2_QUOTA_H + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/quota.h> +#include <linux/list.h> +#include <linux/dqblk_qtree.h> +#include <linux/timer.h> + +#include "ocfs2.h" + +/* Common stuff */ +/* id number of quota format */ +#define QFMT_OCFS2 3 + +/* + * In-memory structures + */ +struct ocfs2_dquot { + struct dquot dq_dquot; /* Generic VFS dquot */ + loff_t dq_local_off; /* Offset in the local quota file */ + struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */ + unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ + s64 dq_origspace; /* Last globally synced space usage */ + s64 dq_originodes; /* Last globally synced inode usage */ +}; + +/* Description of one chunk to recover in memory */ +struct ocfs2_recovery_chunk { + struct list_head rc_list; /* List of chunks */ + int rc_chunk; /* Chunk number */ + unsigned long *rc_bitmap; /* Bitmap of entries to recover */ +}; + +struct ocfs2_quota_recovery { + struct list_head r_list[MAXQUOTAS]; /* List of chunks to recover */ +}; + +/* In-memory structure with quota header information */ +struct ocfs2_mem_dqinfo { + unsigned int dqi_type; /* Quota type this structure describes */ + unsigned int dqi_chunks; /* Number of chunks in local quota file */ + unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ + unsigned int dqi_syncms; /* How often should we sync with other nodes */ + unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */ + struct list_head dqi_chunk; /* List of chunks */ + struct inode *dqi_gqinode; /* Global quota file inode */ + struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ + struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */ + int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */ + struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */ + struct buffer_head *dqi_ibh; /* Buffer with information header */ + struct qtree_mem_dqinfo dqi_gi; /* Info about global file */ + struct timer_list dqi_sync_timer; /* Timer for syncing dquots */ + struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery + * information, in case we + * enable quotas on file + * needing it */ +}; + +static inline struct ocfs2_dquot *OCFS2_DQUOT(struct dquot *dquot) +{ + return container_of(dquot, struct ocfs2_dquot, dq_dquot); +} + +struct ocfs2_quota_chunk { + struct list_head qc_chunk; /* List of quotafile chunks */ + int qc_num; /* Number of quota chunk */ + struct buffer_head *qc_headerbh; /* Buffer head with chunk header */ +}; + +extern struct kmem_cache *ocfs2_dquot_cachep; +extern struct kmem_cache *ocfs2_qf_chunk_cachep; + +extern struct qtree_fmt_operations ocfs2_global_ops; + +struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery( + struct ocfs2_super *osb, int slot_num); +int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, + struct ocfs2_quota_recovery *rec, + int slot_num); +void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec); +ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off); +ssize_t ocfs2_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off); +int ocfs2_global_read_info(struct super_block *sb, int type); +int ocfs2_global_write_info(struct super_block *sb, int type); +int ocfs2_global_read_dquot(struct dquot *dquot); +int __ocfs2_sync_dquot(struct dquot *dquot, int freeing); +static inline int ocfs2_sync_dquot(struct dquot *dquot) +{ + return __ocfs2_sync_dquot(dquot, 0); +} +static inline int ocfs2_global_release_dquot(struct dquot *dquot) +{ + return __ocfs2_sync_dquot(dquot, 1); +} + +int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); +void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex); +int ocfs2_read_quota_block(struct inode *inode, u64 v_block, + struct buffer_head **bh); + +extern struct dquot_operations ocfs2_quota_operations; +extern struct quota_format_type ocfs2_quota_format; + +#endif /* _OCFS2_QUOTA_H */ diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c new file mode 100644 index 000000000000..4b38a2ef7aae --- /dev/null +++ b/fs/ocfs2/quota_global.c @@ -0,0 +1,990 @@ +/* + * Implementation of operations over global quota file + */ +#include <linux/spinlock.h> +#include <linux/fs.h> +#include <linux/quota.h> +#include <linux/quotaops.h> +#include <linux/dqblk_qtree.h> +#include <linux/jiffies.h> +#include <linux/timer.h> +#include <linux/writeback.h> + +#define MLOG_MASK_PREFIX ML_QUOTA +#include <cluster/masklog.h> + +#include "ocfs2_fs.h" +#include "ocfs2.h" +#include "alloc.h" +#include "inode.h" +#include "journal.h" +#include "file.h" +#include "sysfile.h" +#include "dlmglue.h" +#include "uptodate.h" +#include "quota.h" + +static void qsync_timer_fn(unsigned long oinfo_ptr); + +static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp) +{ + struct ocfs2_global_disk_dqblk *d = dp; + struct mem_dqblk *m = &dquot->dq_dqb; + + /* Update from disk only entries not set by the admin */ + if (!test_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags)) { + m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit); + m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit); + } + if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags)) + m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes); + if (!test_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags)) { + m->dqb_bhardlimit = le64_to_cpu(d->dqb_bhardlimit); + m->dqb_bsoftlimit = le64_to_cpu(d->dqb_bsoftlimit); + } + if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags)) + m->dqb_curspace = le64_to_cpu(d->dqb_curspace); + if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags)) + m->dqb_btime = le64_to_cpu(d->dqb_btime); + if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags)) + m->dqb_itime = le64_to_cpu(d->dqb_itime); + OCFS2_DQUOT(dquot)->dq_use_count = le32_to_cpu(d->dqb_use_count); +} + +static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot) +{ + struct ocfs2_global_disk_dqblk *d = dp; + struct mem_dqblk *m = &dquot->dq_dqb; + + d->dqb_id = cpu_to_le32(dquot->dq_id); + d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count); + d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); + d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); + d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes); + d->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit); + d->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit); + d->dqb_curspace = cpu_to_le64(m->dqb_curspace); + d->dqb_btime = cpu_to_le64(m->dqb_btime); + d->dqb_itime = cpu_to_le64(m->dqb_itime); +} + +static int ocfs2_global_is_id(void *dp, struct dquot *dquot) +{ + struct ocfs2_global_disk_dqblk *d = dp; + struct ocfs2_mem_dqinfo *oinfo = + sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + + if (qtree_entry_unused(&oinfo->dqi_gi, dp)) + return 0; + return le32_to_cpu(d->dqb_id) == dquot->dq_id; +} + +struct qtree_fmt_operations ocfs2_global_ops = { + .mem2disk_dqblk = ocfs2_global_mem2diskdqb, + .disk2mem_dqblk = ocfs2_global_disk2memdqb, + .is_id = ocfs2_global_is_id, +}; + +int ocfs2_read_quota_block(struct inode *inode, u64 v_block, + struct buffer_head **bh) +{ + int rc = 0; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, NULL); + if (rc) + mlog_errno(rc); + + /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */ + if (!rc && !*bh) + *bh = tmp; + + return rc; +} + +static int ocfs2_get_quota_block(struct inode *inode, int block, + struct buffer_head **bh) +{ + u64 pblock, pcount; + int err; + + down_read(&OCFS2_I(inode)->ip_alloc_sem); + err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, NULL); + up_read(&OCFS2_I(inode)->ip_alloc_sem); + if (err) { + mlog_errno(err); + return err; + } + *bh = sb_getblk(inode->i_sb, pblock); + if (!*bh) { + err = -EIO; + mlog_errno(err); + } + return err;; +} + +/* Read data from global quotafile - avoid pagecache and such because we cannot + * afford acquiring the locks... We use quota cluster lock to serialize + * operations. Caller is responsible for acquiring it. */ +ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off) +{ + struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; + struct inode *gqinode = oinfo->dqi_gqinode; + loff_t i_size = i_size_read(gqinode); + int offset = off & (sb->s_blocksize - 1); + sector_t blk = off >> sb->s_blocksize_bits; + int err = 0; + struct buffer_head *bh; + size_t toread, tocopy; + + if (off > i_size) + return 0; + if (off + len > i_size) + len = i_size - off; + toread = len; + while (toread > 0) { + tocopy = min((size_t)(sb->s_blocksize - offset), toread); + bh = NULL; + err = ocfs2_read_quota_block(gqinode, blk, &bh); + if (err) { + mlog_errno(err); + return err; + } + memcpy(data, bh->b_data + offset, tocopy); + brelse(bh); + offset = 0; + toread -= tocopy; + data += tocopy; + blk++; + } + return len; +} + +/* Write to quotafile (we know the transaction is already started and has + * enough credits) */ +ssize_t ocfs2_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct inode *gqinode = oinfo->dqi_gqinode; + int offset = off & (sb->s_blocksize - 1); + sector_t blk = off >> sb->s_blocksize_bits; + int err = 0, new = 0, ja_type; + struct buffer_head *bh = NULL; + handle_t *handle = journal_current_handle(); + + if (!handle) { + mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled " + "because transaction was not started.\n", + (unsigned long long)off, (unsigned long long)len); + return -EIO; + } + if (len > sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset) { + WARN_ON(1); + len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; + } + + mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); + if (gqinode->i_size < off + len) { + down_write(&OCFS2_I(gqinode)->ip_alloc_sem); + err = ocfs2_extend_no_holes(gqinode, off + len, off); + up_write(&OCFS2_I(gqinode)->ip_alloc_sem); + if (err < 0) + goto out; + err = ocfs2_simple_size_update(gqinode, + oinfo->dqi_gqi_bh, + off + len); + if (err < 0) + goto out; + new = 1; + } + /* Not rewriting whole block? */ + if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) && + !new) { + err = ocfs2_read_quota_block(gqinode, blk, &bh); + ja_type = OCFS2_JOURNAL_ACCESS_WRITE; + } else { + err = ocfs2_get_quota_block(gqinode, blk, &bh); + ja_type = OCFS2_JOURNAL_ACCESS_CREATE; + } + if (err) { + mlog_errno(err); + return err; + } + lock_buffer(bh); + if (new) + memset(bh->b_data, 0, sb->s_blocksize); + memcpy(bh->b_data + offset, data, len); + flush_dcache_page(bh->b_page); + set_buffer_uptodate(bh); + unlock_buffer(bh); + ocfs2_set_buffer_uptodate(gqinode, bh); + err = ocfs2_journal_access(handle, gqinode, bh, ja_type); + if (err < 0) { + brelse(bh); + goto out; + } + err = ocfs2_journal_dirty(handle, bh); + brelse(bh); + if (err < 0) + goto out; +out: + if (err) { + mutex_unlock(&gqinode->i_mutex); + mlog_errno(err); + return err; + } + gqinode->i_version++; + ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh); + mutex_unlock(&gqinode->i_mutex); + return len; +} + +int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) +{ + int status; + struct buffer_head *bh = NULL; + + status = ocfs2_inode_lock(oinfo->dqi_gqinode, &bh, ex); + if (status < 0) + return status; + spin_lock(&dq_data_lock); + if (!oinfo->dqi_gqi_count++) + oinfo->dqi_gqi_bh = bh; + else + WARN_ON(bh != oinfo->dqi_gqi_bh); + spin_unlock(&dq_data_lock); + return 0; +} + +void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) +{ + ocfs2_inode_unlock(oinfo->dqi_gqinode, ex); + brelse(oinfo->dqi_gqi_bh); + spin_lock(&dq_data_lock); + if (!--oinfo->dqi_gqi_count) + oinfo->dqi_gqi_bh = NULL; + spin_unlock(&dq_data_lock); +} + +/* Read information header from global quota file */ +int ocfs2_global_read_info(struct super_block *sb, int type) +{ + struct inode *gqinode = NULL; + unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, + GROUP_QUOTA_SYSTEM_INODE }; + struct ocfs2_global_disk_dqinfo dinfo; + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + int status; + + mlog_entry_void(); + + /* Read global header */ + gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], + OCFS2_INVALID_SLOT); + if (!gqinode) { + mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n", + type); + status = -EINVAL; + goto out_err; + } + oinfo->dqi_gi.dqi_sb = sb; + oinfo->dqi_gi.dqi_type = type; + ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); + oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); + oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; + oinfo->dqi_gqi_bh = NULL; + oinfo->dqi_gqi_count = 0; + oinfo->dqi_gqinode = gqinode; + status = ocfs2_lock_global_qf(oinfo, 0); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + status = sb->s_op->quota_read(sb, type, (char *)&dinfo, + sizeof(struct ocfs2_global_disk_dqinfo), + OCFS2_GLOBAL_INFO_OFF); + ocfs2_unlock_global_qf(oinfo, 0); + if (status != sizeof(struct ocfs2_global_disk_dqinfo)) { + mlog(ML_ERROR, "Cannot read global quota info (%d).\n", + status); + if (status >= 0) + status = -EIO; + mlog_errno(status); + goto out_err; + } + info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); + info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); + oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); + oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms); + oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); + oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); + oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); + oinfo->dqi_gi.dqi_blocksize_bits = sb->s_blocksize_bits; + oinfo->dqi_gi.dqi_usable_bs = sb->s_blocksize - + OCFS2_QBLK_RESERVED_SPACE; + oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); + setup_timer(&oinfo->dqi_sync_timer, qsync_timer_fn, + (unsigned long)oinfo); + mod_timer(&oinfo->dqi_sync_timer, + round_jiffies(jiffies + oinfo->dqi_syncjiff)); +out_err: + mlog_exit(status); + return status; +} + +/* Write information to global quota file. Expects exlusive lock on quota + * file inode and quota info */ +static int __ocfs2_global_write_info(struct super_block *sb, int type) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct ocfs2_global_disk_dqinfo dinfo; + ssize_t size; + + spin_lock(&dq_data_lock); + info->dqi_flags &= ~DQF_INFO_DIRTY; + dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); + dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); + spin_unlock(&dq_data_lock); + dinfo.dqi_syncms = cpu_to_le32(oinfo->dqi_syncms); + dinfo.dqi_blocks = cpu_to_le32(oinfo->dqi_gi.dqi_blocks); + dinfo.dqi_free_blk = cpu_to_le32(oinfo->dqi_gi.dqi_free_blk); + dinfo.dqi_free_entry = cpu_to_le32(oinfo->dqi_gi.dqi_free_entry); + size = sb->s_op->quota_write(sb, type, (char *)&dinfo, + sizeof(struct ocfs2_global_disk_dqinfo), + OCFS2_GLOBAL_INFO_OFF); + if (size != sizeof(struct ocfs2_global_disk_dqinfo)) { + mlog(ML_ERROR, "Cannot write global quota info structure\n"); + if (size >= 0) + size = -EIO; + return size; + } + return 0; +} + +int ocfs2_global_write_info(struct super_block *sb, int type) +{ + int err; + struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; + + err = ocfs2_qinfo_lock(info, 1); + if (err < 0) + return err; + err = __ocfs2_global_write_info(sb, type); + ocfs2_qinfo_unlock(info, 1); + return err; +} + +/* Read in information from global quota file and acquire a reference to it. + * dquot_acquire() has already started the transaction and locked quota file */ +int ocfs2_global_read_dquot(struct dquot *dquot) +{ + int err, err2, ex = 0; + struct ocfs2_mem_dqinfo *info = + sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + + err = ocfs2_qinfo_lock(info, 0); + if (err < 0) + goto out; + err = qtree_read_dquot(&info->dqi_gi, dquot); + if (err < 0) + goto out_qlock; + OCFS2_DQUOT(dquot)->dq_use_count++; + OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; + OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; + if (!dquot->dq_off) { /* No real quota entry? */ + /* Upgrade to exclusive lock for allocation */ + err = ocfs2_qinfo_lock(info, 1); + if (err < 0) + goto out_qlock; + ex = 1; + } + err = qtree_write_dquot(&info->dqi_gi, dquot); + if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { + err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); + if (!err) + err = err2; + } +out_qlock: + if (ex) + ocfs2_qinfo_unlock(info, 1); + ocfs2_qinfo_unlock(info, 0); +out: + if (err < 0) + mlog_errno(err); + return err; +} + +/* Sync local information about quota modifications with global quota file. + * Caller must have started the transaction and obtained exclusive lock for + * global quota file inode */ +int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) +{ + int err, err2; + struct super_block *sb = dquot->dq_sb; + int type = dquot->dq_type; + struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; + struct ocfs2_global_disk_dqblk dqblk; + s64 spacechange, inodechange; + time_t olditime, oldbtime; + + err = sb->s_op->quota_read(sb, type, (char *)&dqblk, + sizeof(struct ocfs2_global_disk_dqblk), + dquot->dq_off); + if (err != sizeof(struct ocfs2_global_disk_dqblk)) { + if (err >= 0) { + mlog(ML_ERROR, "Short read from global quota file " + "(%u read)\n", err); + err = -EIO; + } + goto out; + } + + /* Update space and inode usage. Get also other information from + * global quota file so that we don't overwrite any changes there. + * We are */ + spin_lock(&dq_data_lock); + spacechange = dquot->dq_dqb.dqb_curspace - + OCFS2_DQUOT(dquot)->dq_origspace; + inodechange = dquot->dq_dqb.dqb_curinodes - + OCFS2_DQUOT(dquot)->dq_originodes; + olditime = dquot->dq_dqb.dqb_itime; + oldbtime = dquot->dq_dqb.dqb_btime; + ocfs2_global_disk2memdqb(dquot, &dqblk); + mlog(0, "Syncing global dquot %u space %lld+%lld, inodes %lld+%lld\n", + dquot->dq_id, dquot->dq_dqb.dqb_curspace, (long long)spacechange, + dquot->dq_dqb.dqb_curinodes, (long long)inodechange); + if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags)) + dquot->dq_dqb.dqb_curspace += spacechange; + if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags)) + dquot->dq_dqb.dqb_curinodes += inodechange; + /* Set properly space grace time... */ + if (dquot->dq_dqb.dqb_bsoftlimit && + dquot->dq_dqb.dqb_curspace > dquot->dq_dqb.dqb_bsoftlimit) { + if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags) && + oldbtime > 0) { + if (dquot->dq_dqb.dqb_btime > 0) + dquot->dq_dqb.dqb_btime = + min(dquot->dq_dqb.dqb_btime, oldbtime); + else + dquot->dq_dqb.dqb_btime = oldbtime; + } + } else { + dquot->dq_dqb.dqb_btime = 0; + clear_bit(DQ_BLKS_B, &dquot->dq_flags); + } + /* Set properly inode grace time... */ + if (dquot->dq_dqb.dqb_isoftlimit && + dquot->dq_dqb.dqb_curinodes > dquot->dq_dqb.dqb_isoftlimit) { + if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags) && + olditime > 0) { + if (dquot->dq_dqb.dqb_itime > 0) + dquot->dq_dqb.dqb_itime = + min(dquot->dq_dqb.dqb_itime, olditime); + else + dquot->dq_dqb.dqb_itime = olditime; + } + } else { + dquot->dq_dqb.dqb_itime = 0; + clear_bit(DQ_INODES_B, &dquot->dq_flags); + } + /* All information is properly updated, clear the flags */ + __clear_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); + __clear_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); + __clear_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); + __clear_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); + __clear_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); + __clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); + OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; + OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; + spin_unlock(&dq_data_lock); + err = ocfs2_qinfo_lock(info, freeing); + if (err < 0) { + mlog(ML_ERROR, "Failed to lock quota info, loosing quota write" + " (type=%d, id=%u)\n", dquot->dq_type, + (unsigned)dquot->dq_id); + goto out; + } + if (freeing) + OCFS2_DQUOT(dquot)->dq_use_count--; + err = qtree_write_dquot(&info->dqi_gi, dquot); + if (err < 0) + goto out_qlock; + if (freeing && !OCFS2_DQUOT(dquot)->dq_use_count) { + err = qtree_release_dquot(&info->dqi_gi, dquot); + if (info_dirty(sb_dqinfo(sb, type))) { + err2 = __ocfs2_global_write_info(sb, type); + if (!err) + err = err2; + } + } +out_qlock: + ocfs2_qinfo_unlock(info, freeing); +out: + if (err < 0) + mlog_errno(err); + return err; +} + +/* + * Functions for periodic syncing of dquots with global file + */ +static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type) +{ + handle_t *handle; + struct super_block *sb = dquot->dq_sb; + struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; + struct ocfs2_super *osb = OCFS2_SB(sb); + int status = 0; + + mlog_entry("id=%u qtype=%u type=%lu device=%s\n", dquot->dq_id, + dquot->dq_type, type, sb->s_id); + if (type != dquot->dq_type) + goto out; + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + + handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_ilock; + } + mutex_lock(&sb_dqopt(sb)->dqio_mutex); + status = ocfs2_sync_dquot(dquot); + mutex_unlock(&sb_dqopt(sb)->dqio_mutex); + if (status < 0) + mlog_errno(status); + /* We have to write local structure as well... */ + dquot_mark_dquot_dirty(dquot); + status = dquot_commit(dquot); + if (status < 0) + mlog_errno(status); + ocfs2_commit_trans(osb, handle); +out_ilock: + ocfs2_unlock_global_qf(oinfo, 1); +out: + mlog_exit(status); + return status; +} + +static void ocfs2_do_qsync(unsigned long oinfo_ptr) +{ + struct ocfs2_mem_dqinfo *oinfo = (struct ocfs2_mem_dqinfo *)oinfo_ptr; + struct super_block *sb = oinfo->dqi_gqinode->i_sb; + + dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); +} + +static void qsync_timer_fn(unsigned long oinfo_ptr) +{ + struct ocfs2_mem_dqinfo *oinfo = (struct ocfs2_mem_dqinfo *)oinfo_ptr; + + pdflush_operation(ocfs2_do_qsync, oinfo_ptr); + mod_timer(&oinfo->dqi_sync_timer, + round_jiffies(jiffies + oinfo->dqi_syncjiff)); +} + +/* + * Wrappers for generic quota functions + */ + +static int ocfs2_write_dquot(struct dquot *dquot) +{ + handle_t *handle; + struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); + int status = 0; + + mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); + + handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out; + } + status = dquot_commit(dquot); + ocfs2_commit_trans(osb, handle); +out: + mlog_exit(status); + return status; +} + +int ocfs2_calc_qdel_credits(struct super_block *sb, int type) +{ + struct ocfs2_mem_dqinfo *oinfo; + int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; + + if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) + return 0; + + oinfo = sb_dqinfo(sb, type)->dqi_priv; + /* We modify tree, leaf block, global info, local chunk header, + * global and local inode */ + return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 + + 2 * OCFS2_INODE_UPDATE_CREDITS; +} + +static int ocfs2_release_dquot(struct dquot *dquot) +{ + handle_t *handle; + struct ocfs2_mem_dqinfo *oinfo = + sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); + int status = 0; + + mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); + + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + handle = ocfs2_start_trans(osb, + ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_type)); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_ilock; + } + status = dquot_release(dquot); + ocfs2_commit_trans(osb, handle); +out_ilock: + ocfs2_unlock_global_qf(oinfo, 1); +out: + mlog_exit(status); + return status; +} + +int ocfs2_calc_qinit_credits(struct super_block *sb, int type) +{ + struct ocfs2_mem_dqinfo *oinfo; + int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; + struct ocfs2_dinode *lfe, *gfe; + + if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) + return 0; + + oinfo = sb_dqinfo(sb, type)->dqi_priv; + gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data; + lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data; + /* We can extend local file + global file. In local file we + * can modify info, chunk header block and dquot block. In + * global file we can modify info, tree and leaf block */ + return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) + + ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) + + 3 + oinfo->dqi_gi.dqi_qtree_depth + 2; +} + +static int ocfs2_acquire_dquot(struct dquot *dquot) +{ + handle_t *handle; + struct ocfs2_mem_dqinfo *oinfo = + sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); + int status = 0; + + mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); + /* We need an exclusive lock, because we're going to update use count + * and instantiate possibly new dquot structure */ + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + handle = ocfs2_start_trans(osb, + ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type)); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_ilock; + } + status = dquot_acquire(dquot); + ocfs2_commit_trans(osb, handle); +out_ilock: + ocfs2_unlock_global_qf(oinfo, 1); +out: + mlog_exit(status); + return status; +} + +static int ocfs2_mark_dquot_dirty(struct dquot *dquot) +{ + unsigned long mask = (1 << (DQ_LASTSET_B + QIF_ILIMITS_B)) | + (1 << (DQ_LASTSET_B + QIF_BLIMITS_B)) | + (1 << (DQ_LASTSET_B + QIF_INODES_B)) | + (1 << (DQ_LASTSET_B + QIF_SPACE_B)) | + (1 << (DQ_LASTSET_B + QIF_BTIME_B)) | + (1 << (DQ_LASTSET_B + QIF_ITIME_B)); + int sync = 0; + int status; + struct super_block *sb = dquot->dq_sb; + int type = dquot->dq_type; + struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; + handle_t *handle; + struct ocfs2_super *osb = OCFS2_SB(sb); + + mlog_entry("id=%u, type=%d", dquot->dq_id, type); + dquot_mark_dquot_dirty(dquot); + + /* In case user set some limits, sync dquot immediately to global + * quota file so that information propagates quicker */ + spin_lock(&dq_data_lock); + if (dquot->dq_flags & mask) + sync = 1; + spin_unlock(&dq_data_lock); + if (!sync) { + status = ocfs2_write_dquot(dquot); + goto out; + } + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_ilock; + } + status = ocfs2_sync_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + /* Now write updated local dquot structure */ + status = dquot_commit(dquot); +out_trans: + ocfs2_commit_trans(osb, handle); +out_ilock: + ocfs2_unlock_global_qf(oinfo, 1); +out: + mlog_exit(status); + return status; +} + +/* This should happen only after set_dqinfo(). */ +static int ocfs2_write_info(struct super_block *sb, int type) +{ + handle_t *handle; + int status = 0; + struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; + + mlog_entry_void(); + + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + handle = ocfs2_start_trans(OCFS2_SB(sb), OCFS2_QINFO_WRITE_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_ilock; + } + status = dquot_commit_info(sb, type); + ocfs2_commit_trans(OCFS2_SB(sb), handle); +out_ilock: + ocfs2_unlock_global_qf(oinfo, 1); +out: + mlog_exit(status); + return status; +} + +/* This is difficult. We have to lock quota inode and start transaction + * in this function but we don't want to take the penalty of exlusive + * quota file lock when we are just going to use cached structures. So + * we just take read lock check whether we have dquot cached and if so, + * we don't have to take the write lock... */ +static int ocfs2_dquot_initialize(struct inode *inode, int type) +{ + handle_t *handle = NULL; + int status = 0; + struct super_block *sb = inode->i_sb; + struct ocfs2_mem_dqinfo *oinfo; + int exclusive = 0; + int cnt; + qid_t id; + + mlog_entry_void(); + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (type != -1 && cnt != type) + continue; + if (!sb_has_quota_active(sb, cnt)) + continue; + oinfo = sb_dqinfo(sb, cnt)->dqi_priv; + status = ocfs2_lock_global_qf(oinfo, 0); + if (status < 0) + goto out; + /* This is just a performance optimization not a reliable test. + * Since we hold an inode lock, noone can actually release + * the structure until we are finished with initialization. */ + if (inode->i_dquot[cnt] != NODQUOT) { + ocfs2_unlock_global_qf(oinfo, 0); + continue; + } + /* When we have inode lock, we know that no dquot_release() can + * run and thus we can safely check whether we need to + * read+modify global file to get quota information or whether + * our node already has it. */ + if (cnt == USRQUOTA) + id = inode->i_uid; + else if (cnt == GRPQUOTA) + id = inode->i_gid; + else + BUG(); + /* Obtain exclusion from quota off... */ + down_write(&sb_dqopt(sb)->dqptr_sem); + exclusive = !dquot_is_cached(sb, id, cnt); + up_write(&sb_dqopt(sb)->dqptr_sem); + if (exclusive) { + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) { + exclusive = 0; + mlog_errno(status); + goto out_ilock; + } + handle = ocfs2_start_trans(OCFS2_SB(sb), + ocfs2_calc_qinit_credits(sb, cnt)); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_ilock; + } + } + dquot_initialize(inode, cnt); + if (exclusive) { + ocfs2_commit_trans(OCFS2_SB(sb), handle); + ocfs2_unlock_global_qf(oinfo, 1); + } + ocfs2_unlock_global_qf(oinfo, 0); + } + mlog_exit(0); + return 0; +out_ilock: + if (exclusive) + ocfs2_unlock_global_qf(oinfo, 1); + ocfs2_unlock_global_qf(oinfo, 0); +out: + mlog_exit(status); + return status; +} + +static int ocfs2_dquot_drop_slow(struct inode *inode) +{ + int status = 0; + int cnt; + int got_lock[MAXQUOTAS] = {0, 0}; + handle_t *handle; + struct super_block *sb = inode->i_sb; + struct ocfs2_mem_dqinfo *oinfo; + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (!sb_has_quota_active(sb, cnt)) + continue; + oinfo = sb_dqinfo(sb, cnt)->dqi_priv; + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + got_lock[cnt] = 1; + } + handle = ocfs2_start_trans(OCFS2_SB(sb), + ocfs2_calc_qinit_credits(sb, USRQUOTA) + + ocfs2_calc_qinit_credits(sb, GRPQUOTA)); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out; + } + dquot_drop(inode); + ocfs2_commit_trans(OCFS2_SB(sb), handle); +out: + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (got_lock[cnt]) { + oinfo = sb_dqinfo(sb, cnt)->dqi_priv; + ocfs2_unlock_global_qf(oinfo, 1); + } + return status; +} + +/* See the comment before ocfs2_dquot_initialize. */ +static int ocfs2_dquot_drop(struct inode *inode) +{ + int status = 0; + struct super_block *sb = inode->i_sb; + struct ocfs2_mem_dqinfo *oinfo; + int exclusive = 0; + int cnt; + int got_lock[MAXQUOTAS] = {0, 0}; + + mlog_entry_void(); + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (!sb_has_quota_active(sb, cnt)) + continue; + oinfo = sb_dqinfo(sb, cnt)->dqi_priv; + status = ocfs2_lock_global_qf(oinfo, 0); + if (status < 0) + goto out; + got_lock[cnt] = 1; + } + /* Lock against anyone releasing references so that when when we check + * we know we are not going to be last ones to release dquot */ + down_write(&sb_dqopt(sb)->dqptr_sem); + /* Urgh, this is a terrible hack :( */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (inode->i_dquot[cnt] != NODQUOT && + atomic_read(&inode->i_dquot[cnt]->dq_count) > 1) { + exclusive = 1; + break; + } + } + if (!exclusive) + dquot_drop_locked(inode); + up_write(&sb_dqopt(sb)->dqptr_sem); +out: + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (got_lock[cnt]) { + oinfo = sb_dqinfo(sb, cnt)->dqi_priv; + ocfs2_unlock_global_qf(oinfo, 0); + } + /* In case we bailed out because we had to do expensive locking + * do it now... */ + if (exclusive) + status = ocfs2_dquot_drop_slow(inode); + mlog_exit(status); + return status; +} + +static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type) +{ + struct ocfs2_dquot *dquot = + kmem_cache_zalloc(ocfs2_dquot_cachep, GFP_NOFS); + + if (!dquot) + return NULL; + return &dquot->dq_dquot; +} + +static void ocfs2_destroy_dquot(struct dquot *dquot) +{ + kmem_cache_free(ocfs2_dquot_cachep, dquot); +} + +struct dquot_operations ocfs2_quota_operations = { + .initialize = ocfs2_dquot_initialize, + .drop = ocfs2_dquot_drop, + .alloc_space = dquot_alloc_space, + .alloc_inode = dquot_alloc_inode, + .free_space = dquot_free_space, + .free_inode = dquot_free_inode, + .transfer = dquot_transfer, + .write_dquot = ocfs2_write_dquot, + .acquire_dquot = ocfs2_acquire_dquot, + .release_dquot = ocfs2_release_dquot, + .mark_dirty = ocfs2_mark_dquot_dirty, + .write_info = ocfs2_write_info, + .alloc_dquot = ocfs2_alloc_dquot, + .destroy_dquot = ocfs2_destroy_dquot, +}; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c new file mode 100644 index 000000000000..5353c423829f --- /dev/null +++ b/fs/ocfs2/quota_local.c @@ -0,0 +1,1253 @@ +/* + * Implementation of operations over local quota file + */ + +#include <linux/fs.h> +#include <linux/quota.h> +#include <linux/quotaops.h> +#include <linux/module.h> + +#define MLOG_MASK_PREFIX ML_QUOTA +#include <cluster/masklog.h> + +#include "ocfs2_fs.h" +#include "ocfs2.h" +#include "inode.h" +#include "alloc.h" +#include "file.h" +#include "buffer_head_io.h" +#include "journal.h" +#include "sysfile.h" +#include "dlmglue.h" +#include "quota.h" + +/* Number of local quota structures per block */ +static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) +{ + return ((sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) / + sizeof(struct ocfs2_local_disk_dqblk)); +} + +/* Number of blocks with entries in one chunk */ +static inline unsigned int ol_chunk_blocks(struct super_block *sb) +{ + return ((sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - + OCFS2_QBLK_RESERVED_SPACE) << 3) / + ol_quota_entries_per_block(sb); +} + +/* Number of entries in a chunk bitmap */ +static unsigned int ol_chunk_entries(struct super_block *sb) +{ + return ol_chunk_blocks(sb) * ol_quota_entries_per_block(sb); +} + +/* Offset of the chunk in quota file */ +static unsigned int ol_quota_chunk_block(struct super_block *sb, int c) +{ + /* 1 block for local quota file info, 1 block per chunk for chunk info */ + return 1 + (ol_chunk_blocks(sb) + 1) * c; +} + +static unsigned int ol_dqblk_block(struct super_block *sb, int c, int off) +{ + int epb = ol_quota_entries_per_block(sb); + + return ol_quota_chunk_block(sb, c) + 1 + off / epb; +} + +static unsigned int ol_dqblk_block_off(struct super_block *sb, int c, int off) +{ + int epb = ol_quota_entries_per_block(sb); + + return (off % epb) * sizeof(struct ocfs2_local_disk_dqblk); +} + +/* Offset of the dquot structure in the quota file */ +static loff_t ol_dqblk_off(struct super_block *sb, int c, int off) +{ + return (ol_dqblk_block(sb, c, off) << sb->s_blocksize_bits) + + ol_dqblk_block_off(sb, c, off); +} + +/* Compute block number from given offset */ +static inline unsigned int ol_dqblk_file_block(struct super_block *sb, loff_t off) +{ + return off >> sb->s_blocksize_bits; +} + +static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off) +{ + return off & ((1 << sb->s_blocksize_bits) - 1); +} + +/* Compute offset in the chunk of a structure with the given offset */ +static int ol_dqblk_chunk_off(struct super_block *sb, int c, loff_t off) +{ + int epb = ol_quota_entries_per_block(sb); + + return ((off >> sb->s_blocksize_bits) - + ol_quota_chunk_block(sb, c) - 1) * epb + + ((unsigned int)(off & ((1 << sb->s_blocksize_bits) - 1))) / + sizeof(struct ocfs2_local_disk_dqblk); +} + +/* Write bufferhead into the fs */ +static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh, + void (*modify)(struct buffer_head *, void *), void *private) +{ + struct super_block *sb = inode->i_sb; + handle_t *handle; + int status; + + handle = ocfs2_start_trans(OCFS2_SB(sb), 1); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + return status; + } + status = ocfs2_journal_access(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + ocfs2_commit_trans(OCFS2_SB(sb), handle); + return status; + } + lock_buffer(bh); + modify(bh, private); + unlock_buffer(bh); + status = ocfs2_journal_dirty(handle, bh); + if (status < 0) { + mlog_errno(status); + ocfs2_commit_trans(OCFS2_SB(sb), handle); + return status; + } + status = ocfs2_commit_trans(OCFS2_SB(sb), handle); + if (status < 0) { + mlog_errno(status); + return status; + } + return 0; +} + +/* Check whether we understand format of quota files */ +static int ocfs2_local_check_quota_file(struct super_block *sb, int type) +{ + unsigned int lmagics[MAXQUOTAS] = OCFS2_LOCAL_QMAGICS; + unsigned int lversions[MAXQUOTAS] = OCFS2_LOCAL_QVERSIONS; + unsigned int gmagics[MAXQUOTAS] = OCFS2_GLOBAL_QMAGICS; + unsigned int gversions[MAXQUOTAS] = OCFS2_GLOBAL_QVERSIONS; + unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, + GROUP_QUOTA_SYSTEM_INODE }; + struct buffer_head *bh = NULL; + struct inode *linode = sb_dqopt(sb)->files[type]; + struct inode *ginode = NULL; + struct ocfs2_disk_dqheader *dqhead; + int status, ret = 0; + + /* First check whether we understand local quota file */ + status = ocfs2_read_quota_block(linode, 0, &bh); + if (status) { + mlog_errno(status); + mlog(ML_ERROR, "failed to read quota file header (type=%d)\n", + type); + goto out_err; + } + dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data); + if (le32_to_cpu(dqhead->dqh_magic) != lmagics[type]) { + mlog(ML_ERROR, "quota file magic does not match (%u != %u)," + " type=%d\n", le32_to_cpu(dqhead->dqh_magic), + lmagics[type], type); + goto out_err; + } + if (le32_to_cpu(dqhead->dqh_version) != lversions[type]) { + mlog(ML_ERROR, "quota file version does not match (%u != %u)," + " type=%d\n", le32_to_cpu(dqhead->dqh_version), + lversions[type], type); + goto out_err; + } + brelse(bh); + bh = NULL; + + /* Next check whether we understand global quota file */ + ginode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], + OCFS2_INVALID_SLOT); + if (!ginode) { + mlog(ML_ERROR, "cannot get global quota file inode " + "(type=%d)\n", type); + goto out_err; + } + /* Since the header is read only, we don't care about locking */ + status = ocfs2_read_quota_block(ginode, 0, &bh); + if (status) { + mlog_errno(status); + mlog(ML_ERROR, "failed to read global quota file header " + "(type=%d)\n", type); + goto out_err; + } + dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data); + if (le32_to_cpu(dqhead->dqh_magic) != gmagics[type]) { + mlog(ML_ERROR, "global quota file magic does not match " + "(%u != %u), type=%d\n", + le32_to_cpu(dqhead->dqh_magic), gmagics[type], type); + goto out_err; + } + if (le32_to_cpu(dqhead->dqh_version) != gversions[type]) { + mlog(ML_ERROR, "global quota file version does not match " + "(%u != %u), type=%d\n", + le32_to_cpu(dqhead->dqh_version), gversions[type], + type); + goto out_err; + } + + ret = 1; +out_err: + brelse(bh); + iput(ginode); + return ret; +} + +/* Release given list of quota file chunks */ +static void ocfs2_release_local_quota_bitmaps(struct list_head *head) +{ + struct ocfs2_quota_chunk *pos, *next; + + list_for_each_entry_safe(pos, next, head, qc_chunk) { + list_del(&pos->qc_chunk); + brelse(pos->qc_headerbh); + kmem_cache_free(ocfs2_qf_chunk_cachep, pos); + } +} + +/* Load quota bitmaps into memory */ +static int ocfs2_load_local_quota_bitmaps(struct inode *inode, + struct ocfs2_local_disk_dqinfo *ldinfo, + struct list_head *head) +{ + struct ocfs2_quota_chunk *newchunk; + int i, status; + + INIT_LIST_HEAD(head); + for (i = 0; i < le32_to_cpu(ldinfo->dqi_chunks); i++) { + newchunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS); + if (!newchunk) { + ocfs2_release_local_quota_bitmaps(head); + return -ENOMEM; + } + newchunk->qc_num = i; + newchunk->qc_headerbh = NULL; + status = ocfs2_read_quota_block(inode, + ol_quota_chunk_block(inode->i_sb, i), + &newchunk->qc_headerbh); + if (status) { + mlog_errno(status); + kmem_cache_free(ocfs2_qf_chunk_cachep, newchunk); + ocfs2_release_local_quota_bitmaps(head); + return status; + } + list_add_tail(&newchunk->qc_chunk, head); + } + return 0; +} + +static void olq_update_info(struct buffer_head *bh, void *private) +{ + struct mem_dqinfo *info = private; + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct ocfs2_local_disk_dqinfo *ldinfo; + + ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + + OCFS2_LOCAL_INFO_OFF); + spin_lock(&dq_data_lock); + ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); + ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks); + ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks); + spin_unlock(&dq_data_lock); +} + +static int ocfs2_add_recovery_chunk(struct super_block *sb, + struct ocfs2_local_disk_chunk *dchunk, + int chunk, + struct list_head *head) +{ + struct ocfs2_recovery_chunk *rc; + + rc = kmalloc(sizeof(struct ocfs2_recovery_chunk), GFP_NOFS); + if (!rc) + return -ENOMEM; + rc->rc_chunk = chunk; + rc->rc_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS); + if (!rc->rc_bitmap) { + kfree(rc); + return -ENOMEM; + } + memcpy(rc->rc_bitmap, dchunk->dqc_bitmap, + (ol_chunk_entries(sb) + 7) >> 3); + list_add_tail(&rc->rc_list, head); + return 0; +} + +static void free_recovery_list(struct list_head *head) +{ + struct ocfs2_recovery_chunk *next; + struct ocfs2_recovery_chunk *rchunk; + + list_for_each_entry_safe(rchunk, next, head, rc_list) { + list_del(&rchunk->rc_list); + kfree(rchunk->rc_bitmap); + kfree(rchunk); + } +} + +void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec) +{ + int type; + + for (type = 0; type < MAXQUOTAS; type++) + free_recovery_list(&(rec->r_list[type])); + kfree(rec); +} + +/* Load entries in our quota file we have to recover*/ +static int ocfs2_recovery_load_quota(struct inode *lqinode, + struct ocfs2_local_disk_dqinfo *ldinfo, + int type, + struct list_head *head) +{ + struct super_block *sb = lqinode->i_sb; + struct buffer_head *hbh; + struct ocfs2_local_disk_chunk *dchunk; + int i, chunks = le32_to_cpu(ldinfo->dqi_chunks); + int status = 0; + + for (i = 0; i < chunks; i++) { + hbh = NULL; + status = ocfs2_read_quota_block(lqinode, + ol_quota_chunk_block(sb, i), + &hbh); + if (status) { + mlog_errno(status); + break; + } + dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data; + if (le32_to_cpu(dchunk->dqc_free) < ol_chunk_entries(sb)) + status = ocfs2_add_recovery_chunk(sb, dchunk, i, head); + brelse(hbh); + if (status < 0) + break; + } + if (status < 0) + free_recovery_list(head); + return status; +} + +static struct ocfs2_quota_recovery *ocfs2_alloc_quota_recovery(void) +{ + int type; + struct ocfs2_quota_recovery *rec; + + rec = kmalloc(sizeof(struct ocfs2_quota_recovery), GFP_NOFS); + if (!rec) + return NULL; + for (type = 0; type < MAXQUOTAS; type++) + INIT_LIST_HEAD(&(rec->r_list[type])); + return rec; +} + +/* Load information we need for quota recovery into memory */ +struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery( + struct ocfs2_super *osb, + int slot_num) +{ + unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; + unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, + LOCAL_GROUP_QUOTA_SYSTEM_INODE }; + struct super_block *sb = osb->sb; + struct ocfs2_local_disk_dqinfo *ldinfo; + struct inode *lqinode; + struct buffer_head *bh; + int type; + int status = 0; + struct ocfs2_quota_recovery *rec; + + mlog(ML_NOTICE, "Beginning quota recovery in slot %u\n", slot_num); + rec = ocfs2_alloc_quota_recovery(); + if (!rec) + return ERR_PTR(-ENOMEM); + /* First init... */ + + for (type = 0; type < MAXQUOTAS; type++) { + if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) + continue; + /* At this point, journal of the slot is already replayed so + * we can trust metadata and data of the quota file */ + lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num); + if (!lqinode) { + status = -ENOENT; + goto out; + } + status = ocfs2_inode_lock_full(lqinode, NULL, 1, + OCFS2_META_LOCK_RECOVERY); + if (status < 0) { + mlog_errno(status); + goto out_put; + } + /* Now read local header */ + bh = NULL; + status = ocfs2_read_quota_block(lqinode, 0, &bh); + if (status) { + mlog_errno(status); + mlog(ML_ERROR, "failed to read quota file info header " + "(slot=%d type=%d)\n", slot_num, type); + goto out_lock; + } + ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + + OCFS2_LOCAL_INFO_OFF); + status = ocfs2_recovery_load_quota(lqinode, ldinfo, type, + &rec->r_list[type]); + brelse(bh); +out_lock: + ocfs2_inode_unlock(lqinode, 1); +out_put: + iput(lqinode); + if (status < 0) + break; + } +out: + if (status < 0) { + ocfs2_free_quota_recovery(rec); + rec = ERR_PTR(status); + } + return rec; +} + +/* Sync changes in local quota file into global quota file and + * reinitialize local quota file. + * The function expects local quota file to be already locked and + * dqonoff_mutex locked. */ +static int ocfs2_recover_local_quota_file(struct inode *lqinode, + int type, + struct ocfs2_quota_recovery *rec) +{ + struct super_block *sb = lqinode->i_sb; + struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; + struct ocfs2_local_disk_chunk *dchunk; + struct ocfs2_local_disk_dqblk *dqblk; + struct dquot *dquot; + handle_t *handle; + struct buffer_head *hbh = NULL, *qbh = NULL; + int status = 0; + int bit, chunk; + struct ocfs2_recovery_chunk *rchunk, *next; + qsize_t spacechange, inodechange; + + mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type); + + status = ocfs2_lock_global_qf(oinfo, 1); + if (status < 0) + goto out; + + list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) { + chunk = rchunk->rc_chunk; + hbh = NULL; + status = ocfs2_read_quota_block(lqinode, + ol_quota_chunk_block(sb, chunk), + &hbh); + if (status) { + mlog_errno(status); + break; + } + dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data; + for_each_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) { + qbh = NULL; + status = ocfs2_read_quota_block(lqinode, + ol_dqblk_block(sb, chunk, bit), + &qbh); + if (status) { + mlog_errno(status); + break; + } + dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data + + ol_dqblk_block_off(sb, chunk, bit)); + dquot = dqget(sb, le64_to_cpu(dqblk->dqb_id), type); + if (!dquot) { + status = -EIO; + mlog(ML_ERROR, "Failed to get quota structure " + "for id %u, type %d. Cannot finish quota " + "file recovery.\n", + (unsigned)le64_to_cpu(dqblk->dqb_id), + type); + goto out_put_bh; + } + handle = ocfs2_start_trans(OCFS2_SB(sb), + OCFS2_QSYNC_CREDITS); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_put_dquot; + } + mutex_lock(&sb_dqopt(sb)->dqio_mutex); + spin_lock(&dq_data_lock); + /* Add usage from quota entry into quota changes + * of our node. Auxiliary variables are important + * due to signedness */ + spacechange = le64_to_cpu(dqblk->dqb_spacemod); + inodechange = le64_to_cpu(dqblk->dqb_inodemod); + dquot->dq_dqb.dqb_curspace += spacechange; + dquot->dq_dqb.dqb_curinodes += inodechange; + spin_unlock(&dq_data_lock); + /* We want to drop reference held by the crashed + * node. Since we have our own reference we know + * global structure actually won't be freed. */ + status = ocfs2_global_release_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out_commit; + } + /* Release local quota file entry */ + status = ocfs2_journal_access(handle, lqinode, + qbh, OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto out_commit; + } + lock_buffer(qbh); + WARN_ON(!ocfs2_test_bit(bit, dchunk->dqc_bitmap)); + ocfs2_clear_bit(bit, dchunk->dqc_bitmap); + le32_add_cpu(&dchunk->dqc_free, 1); + unlock_buffer(qbh); + status = ocfs2_journal_dirty(handle, qbh); + if (status < 0) + mlog_errno(status); +out_commit: + mutex_unlock(&sb_dqopt(sb)->dqio_mutex); + ocfs2_commit_trans(OCFS2_SB(sb), handle); +out_put_dquot: + dqput(dquot); +out_put_bh: + brelse(qbh); + if (status < 0) + break; + } + brelse(hbh); + list_del(&rchunk->rc_list); + kfree(rchunk->rc_bitmap); + kfree(rchunk); + if (status < 0) + break; + } + ocfs2_unlock_global_qf(oinfo, 1); +out: + if (status < 0) + free_recovery_list(&(rec->r_list[type])); + mlog_exit(status); + return status; +} + +/* Recover local quota files for given node different from us */ +int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, + struct ocfs2_quota_recovery *rec, + int slot_num) +{ + unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, + LOCAL_GROUP_QUOTA_SYSTEM_INODE }; + struct super_block *sb = osb->sb; + struct ocfs2_local_disk_dqinfo *ldinfo; + struct buffer_head *bh; + handle_t *handle; + int type; + int status = 0; + struct inode *lqinode; + unsigned int flags; + + mlog(ML_NOTICE, "Finishing quota recovery in slot %u\n", slot_num); + mutex_lock(&sb_dqopt(sb)->dqonoff_mutex); + for (type = 0; type < MAXQUOTAS; type++) { + if (list_empty(&(rec->r_list[type]))) + continue; + mlog(0, "Recovering quota in slot %d\n", slot_num); + lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num); + if (!lqinode) { + status = -ENOENT; + goto out; + } + status = ocfs2_inode_lock_full(lqinode, NULL, 1, + OCFS2_META_LOCK_NOQUEUE); + /* Someone else is holding the lock? Then he must be + * doing the recovery. Just skip the file... */ + if (status == -EAGAIN) { + mlog(ML_NOTICE, "skipping quota recovery for slot %d " + "because quota file is locked.\n", slot_num); + status = 0; + goto out_put; + } else if (status < 0) { + mlog_errno(status); + goto out_put; + } + /* Now read local header */ + bh = NULL; + status = ocfs2_read_quota_block(lqinode, 0, &bh); + if (status) { + mlog_errno(status); + mlog(ML_ERROR, "failed to read quota file info header " + "(slot=%d type=%d)\n", slot_num, type); + goto out_lock; + } + ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + + OCFS2_LOCAL_INFO_OFF); + /* Is recovery still needed? */ + flags = le32_to_cpu(ldinfo->dqi_flags); + if (!(flags & OLQF_CLEAN)) + status = ocfs2_recover_local_quota_file(lqinode, + type, + rec); + /* We don't want to mark file as clean when it is actually + * active */ + if (slot_num == osb->slot_num) + goto out_bh; + /* Mark quota file as clean if we are recovering quota file of + * some other node. */ + handle = ocfs2_start_trans(osb, 1); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out_bh; + } + status = ocfs2_journal_access(handle, lqinode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + lock_buffer(bh); + ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN); + unlock_buffer(bh); + status = ocfs2_journal_dirty(handle, bh); + if (status < 0) + mlog_errno(status); +out_trans: + ocfs2_commit_trans(osb, handle); +out_bh: + brelse(bh); +out_lock: + ocfs2_inode_unlock(lqinode, 1); +out_put: + iput(lqinode); + if (status < 0) + break; + } +out: + mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); + kfree(rec); + return status; +} + +/* Read information header from quota file */ +static int ocfs2_local_read_info(struct super_block *sb, int type) +{ + struct ocfs2_local_disk_dqinfo *ldinfo; + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo; + struct inode *lqinode = sb_dqopt(sb)->files[type]; + int status; + struct buffer_head *bh = NULL; + struct ocfs2_quota_recovery *rec; + int locked = 0; + + info->dqi_maxblimit = 0x7fffffffffffffffLL; + info->dqi_maxilimit = 0x7fffffffffffffffLL; + oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS); + if (!oinfo) { + mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota" + " info."); + goto out_err; + } + info->dqi_priv = oinfo; + oinfo->dqi_type = type; + INIT_LIST_HEAD(&oinfo->dqi_chunk); + oinfo->dqi_rec = NULL; + oinfo->dqi_lqi_bh = NULL; + oinfo->dqi_ibh = NULL; + + status = ocfs2_global_read_info(sb, type); + if (status < 0) + goto out_err; + + status = ocfs2_inode_lock(lqinode, &oinfo->dqi_lqi_bh, 1); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + locked = 1; + + /* Now read local header */ + status = ocfs2_read_quota_block(lqinode, 0, &bh); + if (status) { + mlog_errno(status); + mlog(ML_ERROR, "failed to read quota file info header " + "(type=%d)\n", type); + goto out_err; + } + ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + + OCFS2_LOCAL_INFO_OFF); + info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags); + oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks); + oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks); + oinfo->dqi_ibh = bh; + + /* We crashed when using local quota file? */ + if (!(info->dqi_flags & OLQF_CLEAN)) { + rec = OCFS2_SB(sb)->quota_rec; + if (!rec) { + rec = ocfs2_alloc_quota_recovery(); + if (!rec) { + status = -ENOMEM; + mlog_errno(status); + goto out_err; + } + OCFS2_SB(sb)->quota_rec = rec; + } + + status = ocfs2_recovery_load_quota(lqinode, ldinfo, type, + &rec->r_list[type]); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + } + + status = ocfs2_load_local_quota_bitmaps(lqinode, + ldinfo, + &oinfo->dqi_chunk); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + + /* Now mark quota file as used */ + info->dqi_flags &= ~OLQF_CLEAN; + status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + + return 0; +out_err: + if (oinfo) { + iput(oinfo->dqi_gqinode); + ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock); + ocfs2_lock_res_free(&oinfo->dqi_gqlock); + brelse(oinfo->dqi_lqi_bh); + if (locked) + ocfs2_inode_unlock(lqinode, 1); + ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); + kfree(oinfo); + } + brelse(bh); + return -1; +} + +/* Write local info to quota file */ +static int ocfs2_local_write_info(struct super_block *sb, int type) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv) + ->dqi_ibh; + int status; + + status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info, + info); + if (status < 0) { + mlog_errno(status); + return -1; + } + + return 0; +} + +/* Release info from memory */ +static int ocfs2_local_free_info(struct super_block *sb, int type) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct ocfs2_quota_chunk *chunk; + struct ocfs2_local_disk_chunk *dchunk; + int mark_clean = 1, len; + int status; + + /* At this point we know there are no more dquots and thus + * even if there's some sync in the pdflush queue, it won't + * find any dquots and return without doing anything */ + del_timer_sync(&oinfo->dqi_sync_timer); + iput(oinfo->dqi_gqinode); + ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock); + ocfs2_lock_res_free(&oinfo->dqi_gqlock); + list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) { + dchunk = (struct ocfs2_local_disk_chunk *) + (chunk->qc_headerbh->b_data); + if (chunk->qc_num < oinfo->dqi_chunks - 1) { + len = ol_chunk_entries(sb); + } else { + len = (oinfo->dqi_blocks - + ol_quota_chunk_block(sb, chunk->qc_num) - 1) + * ol_quota_entries_per_block(sb); + } + /* Not all entries free? Bug! */ + if (le32_to_cpu(dchunk->dqc_free) != len) { + mlog(ML_ERROR, "releasing quota file with used " + "entries (type=%d)\n", type); + mark_clean = 0; + } + } + ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); + + /* dqonoff_mutex protects us against racing with recovery thread... */ + if (oinfo->dqi_rec) { + ocfs2_free_quota_recovery(oinfo->dqi_rec); + mark_clean = 0; + } + + if (!mark_clean) + goto out; + + /* Mark local file as clean */ + info->dqi_flags |= OLQF_CLEAN; + status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], + oinfo->dqi_ibh, + olq_update_info, + info); + if (status < 0) { + mlog_errno(status); + goto out; + } + +out: + ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1); + brelse(oinfo->dqi_ibh); + brelse(oinfo->dqi_lqi_bh); + kfree(oinfo); + return 0; +} + +static void olq_set_dquot(struct buffer_head *bh, void *private) +{ + struct ocfs2_dquot *od = private; + struct ocfs2_local_disk_dqblk *dqblk; + struct super_block *sb = od->dq_dquot.dq_sb; + + dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data + + ol_dqblk_block_offset(sb, od->dq_local_off)); + + dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id); + spin_lock(&dq_data_lock); + dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace - + od->dq_origspace); + dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes - + od->dq_originodes); + spin_unlock(&dq_data_lock); + mlog(0, "Writing local dquot %u space %lld inodes %lld\n", + od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod), + (long long)le64_to_cpu(dqblk->dqb_inodemod)); +} + +/* Write dquot to local quota file */ +static int ocfs2_local_write_dquot(struct dquot *dquot) +{ + struct super_block *sb = dquot->dq_sb; + struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); + struct buffer_head *bh = NULL; + int status; + + status = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type], + ol_dqblk_file_block(sb, od->dq_local_off), + &bh); + if (status) { + mlog_errno(status); + goto out; + } + status = ocfs2_modify_bh(sb_dqopt(sb)->files[dquot->dq_type], bh, + olq_set_dquot, od); + if (status < 0) { + mlog_errno(status); + goto out; + } +out: + brelse(bh); + return status; +} + +/* Find free entry in local quota file */ +static struct ocfs2_quota_chunk *ocfs2_find_free_entry(struct super_block *sb, + int type, + int *offset) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct ocfs2_quota_chunk *chunk; + struct ocfs2_local_disk_chunk *dchunk; + int found = 0, len; + + list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) { + dchunk = (struct ocfs2_local_disk_chunk *) + chunk->qc_headerbh->b_data; + if (le32_to_cpu(dchunk->dqc_free) > 0) { + found = 1; + break; + } + } + if (!found) + return NULL; + + if (chunk->qc_num < oinfo->dqi_chunks - 1) { + len = ol_chunk_entries(sb); + } else { + len = (oinfo->dqi_blocks - + ol_quota_chunk_block(sb, chunk->qc_num) - 1) + * ol_quota_entries_per_block(sb); + } + + found = ocfs2_find_next_zero_bit(dchunk->dqc_bitmap, len, 0); + /* We failed? */ + if (found == len) { + mlog(ML_ERROR, "Did not find empty entry in chunk %d with %u" + " entries free (type=%d)\n", chunk->qc_num, + le32_to_cpu(dchunk->dqc_free), type); + return ERR_PTR(-EIO); + } + *offset = found; + return chunk; +} + +/* Add new chunk to the local quota file */ +static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( + struct super_block *sb, + int type, + int *offset) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct inode *lqinode = sb_dqopt(sb)->files[type]; + struct ocfs2_quota_chunk *chunk = NULL; + struct ocfs2_local_disk_chunk *dchunk; + int status; + handle_t *handle; + struct buffer_head *bh = NULL; + u64 p_blkno; + + /* We are protected by dqio_sem so no locking needed */ + status = ocfs2_extend_no_holes(lqinode, + lqinode->i_size + 2 * sb->s_blocksize, + lqinode->i_size); + if (status < 0) { + mlog_errno(status); + goto out; + } + status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, + lqinode->i_size + 2 * sb->s_blocksize); + if (status < 0) { + mlog_errno(status); + goto out; + } + + chunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS); + if (!chunk) { + status = -ENOMEM; + mlog_errno(status); + goto out; + } + + down_read(&OCFS2_I(lqinode)->ip_alloc_sem); + status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, + &p_blkno, NULL, NULL); + up_read(&OCFS2_I(lqinode)->ip_alloc_sem); + if (status < 0) { + mlog_errno(status); + goto out; + } + bh = sb_getblk(sb, p_blkno); + if (!bh) { + status = -ENOMEM; + mlog_errno(status); + goto out; + } + dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; + + handle = ocfs2_start_trans(OCFS2_SB(sb), 2); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out; + } + + status = ocfs2_journal_access(handle, lqinode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + lock_buffer(bh); + dchunk->dqc_free = cpu_to_le32(ol_quota_entries_per_block(sb)); + memset(dchunk->dqc_bitmap, 0, + sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - + OCFS2_QBLK_RESERVED_SPACE); + set_buffer_uptodate(bh); + unlock_buffer(bh); + status = ocfs2_journal_dirty(handle, bh); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + + oinfo->dqi_blocks += 2; + oinfo->dqi_chunks++; + status = ocfs2_local_write_info(sb, type); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + status = ocfs2_commit_trans(OCFS2_SB(sb), handle); + if (status < 0) { + mlog_errno(status); + goto out; + } + + list_add_tail(&chunk->qc_chunk, &oinfo->dqi_chunk); + chunk->qc_num = list_entry(chunk->qc_chunk.prev, + struct ocfs2_quota_chunk, + qc_chunk)->qc_num + 1; + chunk->qc_headerbh = bh; + *offset = 0; + return chunk; +out_trans: + ocfs2_commit_trans(OCFS2_SB(sb), handle); +out: + brelse(bh); + kmem_cache_free(ocfs2_qf_chunk_cachep, chunk); + return ERR_PTR(status); +} + +/* Find free entry in local quota file */ +static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( + struct super_block *sb, + int type, + int *offset) +{ + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; + struct ocfs2_quota_chunk *chunk; + struct inode *lqinode = sb_dqopt(sb)->files[type]; + struct ocfs2_local_disk_chunk *dchunk; + int epb = ol_quota_entries_per_block(sb); + unsigned int chunk_blocks; + int status; + handle_t *handle; + + if (list_empty(&oinfo->dqi_chunk)) + return ocfs2_local_quota_add_chunk(sb, type, offset); + /* Is the last chunk full? */ + chunk = list_entry(oinfo->dqi_chunk.prev, + struct ocfs2_quota_chunk, qc_chunk); + chunk_blocks = oinfo->dqi_blocks - + ol_quota_chunk_block(sb, chunk->qc_num) - 1; + if (ol_chunk_blocks(sb) == chunk_blocks) + return ocfs2_local_quota_add_chunk(sb, type, offset); + + /* We are protected by dqio_sem so no locking needed */ + status = ocfs2_extend_no_holes(lqinode, + lqinode->i_size + sb->s_blocksize, + lqinode->i_size); + if (status < 0) { + mlog_errno(status); + goto out; + } + status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, + lqinode->i_size + sb->s_blocksize); + if (status < 0) { + mlog_errno(status); + goto out; + } + handle = ocfs2_start_trans(OCFS2_SB(sb), 2); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto out; + } + status = ocfs2_journal_access(handle, lqinode, chunk->qc_headerbh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + + dchunk = (struct ocfs2_local_disk_chunk *)chunk->qc_headerbh->b_data; + lock_buffer(chunk->qc_headerbh); + le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb)); + unlock_buffer(chunk->qc_headerbh); + status = ocfs2_journal_dirty(handle, chunk->qc_headerbh); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + oinfo->dqi_blocks++; + status = ocfs2_local_write_info(sb, type); + if (status < 0) { + mlog_errno(status); + goto out_trans; + } + + status = ocfs2_commit_trans(OCFS2_SB(sb), handle); + if (status < 0) { + mlog_errno(status); + goto out; + } + *offset = chunk_blocks * epb; + return chunk; +out_trans: + ocfs2_commit_trans(OCFS2_SB(sb), handle); +out: + return ERR_PTR(status); +} + +static void olq_alloc_dquot(struct buffer_head *bh, void *private) +{ + int *offset = private; + struct ocfs2_local_disk_chunk *dchunk; + + dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; + ocfs2_set_bit(*offset, dchunk->dqc_bitmap); + le32_add_cpu(&dchunk->dqc_free, -1); +} + +/* Create dquot in the local file for given id */ +static int ocfs2_create_local_dquot(struct dquot *dquot) +{ + struct super_block *sb = dquot->dq_sb; + int type = dquot->dq_type; + struct inode *lqinode = sb_dqopt(sb)->files[type]; + struct ocfs2_quota_chunk *chunk; + struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); + int offset; + int status; + + chunk = ocfs2_find_free_entry(sb, type, &offset); + if (!chunk) { + chunk = ocfs2_extend_local_quota_file(sb, type, &offset); + if (IS_ERR(chunk)) + return PTR_ERR(chunk); + } else if (IS_ERR(chunk)) { + return PTR_ERR(chunk); + } + od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset); + od->dq_chunk = chunk; + + /* Initialize dquot structure on disk */ + status = ocfs2_local_write_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out; + } + + /* Mark structure as allocated */ + status = ocfs2_modify_bh(lqinode, chunk->qc_headerbh, olq_alloc_dquot, + &offset); + if (status < 0) { + mlog_errno(status); + goto out; + } +out: + return status; +} + +/* Create entry in local file for dquot, load data from the global file */ +static int ocfs2_local_read_dquot(struct dquot *dquot) +{ + int status; + + mlog_entry("id=%u, type=%d\n", dquot->dq_id, dquot->dq_type); + + status = ocfs2_global_read_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + + /* Now create entry in the local quota file */ + status = ocfs2_create_local_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out_err; + } + mlog_exit(0); + return 0; +out_err: + mlog_exit(status); + return status; +} + +/* Release dquot structure from local quota file. ocfs2_release_dquot() has + * already started a transaction and obtained exclusive lock for global + * quota file. */ +static int ocfs2_local_release_dquot(struct dquot *dquot) +{ + int status; + int type = dquot->dq_type; + struct ocfs2_dquot *od = OCFS2_DQUOT(dquot); + struct super_block *sb = dquot->dq_sb; + struct ocfs2_local_disk_chunk *dchunk; + int offset; + handle_t *handle = journal_current_handle(); + + BUG_ON(!handle); + /* First write all local changes to global file */ + status = ocfs2_global_release_dquot(dquot); + if (status < 0) { + mlog_errno(status); + goto out; + } + + status = ocfs2_journal_access(handle, sb_dqopt(sb)->files[type], + od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto out; + } + offset = ol_dqblk_chunk_off(sb, od->dq_chunk->qc_num, + od->dq_local_off); + dchunk = (struct ocfs2_local_disk_chunk *) + (od->dq_chunk->qc_headerbh->b_data); + /* Mark structure as freed */ + lock_buffer(od->dq_chunk->qc_headerbh); + ocfs2_clear_bit(offset, dchunk->dqc_bitmap); + le32_add_cpu(&dchunk->dqc_free, 1); + unlock_buffer(od->dq_chunk->qc_headerbh); + status = ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); + if (status < 0) { + mlog_errno(status); + goto out; + } + status = 0; +out: + /* Clear the read bit so that next time someone uses this + * dquot he reads fresh info from disk and allocates local + * dquot structure */ + clear_bit(DQ_READ_B, &dquot->dq_flags); + return status; +} + +static struct quota_format_ops ocfs2_format_ops = { + .check_quota_file = ocfs2_local_check_quota_file, + .read_file_info = ocfs2_local_read_info, + .write_file_info = ocfs2_global_write_info, + .free_file_info = ocfs2_local_free_info, + .read_dqblk = ocfs2_local_read_dquot, + .commit_dqblk = ocfs2_local_write_dquot, + .release_dqblk = ocfs2_local_release_dquot, +}; + +struct quota_format_type ocfs2_quota_format = { + .qf_fmt_id = QFMT_OCFS2, + .qf_ops = &ocfs2_format_ops, + .qf_owner = THIS_MODULE +}; diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index ffd48db229a7..867de3ebfcaf 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -314,6 +314,10 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) fe = (struct ocfs2_dinode *)main_bm_bh->b_data; + /* main_bm_bh is validated by inode read inside ocfs2_inode_lock(), + * so any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); + if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != ocfs2_group_bitmap_size(osb->sb) * 8) { mlog(ML_ERROR, "The disk is too old and small. " @@ -322,30 +326,18 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) goto out_unlock; } - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe); - ret = -EIO; - goto out_unlock; - } - first_new_cluster = le32_to_cpu(fe->i_clusters); lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, first_new_cluster - 1); - ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh); + ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno, + &group_bh); if (ret < 0) { mlog_errno(ret); goto out_unlock; } - group = (struct ocfs2_group_desc *)group_bh->b_data; - ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group); - if (ret) { - mlog_errno(ret); - goto out_unlock; - } - cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc); if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters > le16_to_cpu(fe->id2.i_chain.cl_cpg)) { @@ -398,41 +390,16 @@ static int ocfs2_check_new_group(struct inode *inode, struct buffer_head *group_bh) { int ret; - struct ocfs2_group_desc *gd; + struct ocfs2_group_desc *gd = + (struct ocfs2_group_desc *)group_bh->b_data; u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc); - unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * - le16_to_cpu(di->id2.i_chain.cl_bpc); - - gd = (struct ocfs2_group_desc *)group_bh->b_data; + ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh); + if (ret) + goto out; - ret = -EIO; - if (!OCFS2_IS_VALID_GROUP_DESC(gd)) - mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno)); - else if (di->i_blkno != gd->bg_parent_dinode) - mlog(ML_ERROR, "Group descriptor # %llu has bad parent " - "pointer (%llu, expected %llu)\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), - (unsigned long long)le64_to_cpu(di->i_blkno)); - else if (le16_to_cpu(gd->bg_bits) > max_bits) - mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits)); - else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) - mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " - "claims that %u are free\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - le16_to_cpu(gd->bg_free_bits_count)); - else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) - mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but " - "max bitmap bits of %u\n", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - 8 * le16_to_cpu(gd->bg_size)); - else if (le16_to_cpu(gd->bg_chain) != input->chain) + ret = -EINVAL; + if (le16_to_cpu(gd->bg_chain) != input->chain) mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u " "while input has %u set.\n", (unsigned long long)le64_to_cpu(gd->bg_blkno), @@ -451,6 +418,7 @@ static int ocfs2_check_new_group(struct inode *inode, else ret = 0; +out: return ret; } diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index bdda2d8f8508..40661e7824e9 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb) * this is not true, the read of -1 (UINT64_MAX) will fail. */ ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh, - OCFS2_BH_IGNORE_CACHE); + OCFS2_BH_IGNORE_CACHE, NULL); if (ret == 0) { spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); @@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, bh = NULL; /* Acquire a fresh bh */ status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh, - OCFS2_BH_IGNORE_CACHE); + OCFS2_BH_IGNORE_CACHE, NULL); if (status < 0) { mlog_errno(status); goto bail; diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index faec2d879357..9b76d41a8ac6 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -740,6 +740,9 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb) static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) { + if (!lksb->lksb_fsdlm.sb_lvbptr) + lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + + sizeof(struct dlm_lksb); return (void *)(lksb->lksb_fsdlm.sb_lvbptr); } diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index c5ff18b46b57..226fe21f2608 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -145,62 +145,151 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); } -/* somewhat more expensive than our other checks, so use sparingly. */ -int ocfs2_check_group_descriptor(struct super_block *sb, - struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd) +#define do_error(fmt, ...) \ + do{ \ + if (clean_error) \ + mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \ + else \ + ocfs2_error(sb, fmt, ##__VA_ARGS__); \ + } while (0) + +static int ocfs2_validate_gd_self(struct super_block *sb, + struct buffer_head *bh, + int clean_error) { - unsigned int max_bits; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); - return -EIO; + do_error("Group descriptor #%llu has bad signature %.*s", + (unsigned long long)bh->b_blocknr, 7, + gd->bg_signature); + return -EINVAL; + } + + if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) { + do_error("Group descriptor #%llu has an invalid bg_blkno " + "of %llu", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(gd->bg_blkno)); + return -EINVAL; } + if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) { + do_error("Group descriptor #%llu has an invalid " + "fs_generation of #%u", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(gd->bg_generation)); + return -EINVAL; + } + + if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { + do_error("Group descriptor #%llu has bit count %u but " + "claims that %u are free", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(gd->bg_bits), + le16_to_cpu(gd->bg_free_bits_count)); + return -EINVAL; + } + + if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { + do_error("Group descriptor #%llu has bit count %u but " + "max bitmap bits of %u", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(gd->bg_bits), + 8 * le16_to_cpu(gd->bg_size)); + return -EINVAL; + } + + return 0; +} + +static int ocfs2_validate_gd_parent(struct super_block *sb, + struct ocfs2_dinode *di, + struct buffer_head *bh, + int clean_error) +{ + unsigned int max_bits; + struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data; + if (di->i_blkno != gd->bg_parent_dinode) { - ocfs2_error(sb, "Group descriptor # %llu has bad parent " - "pointer (%llu, expected %llu)", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), - (unsigned long long)le64_to_cpu(di->i_blkno)); - return -EIO; + do_error("Group descriptor #%llu has bad parent " + "pointer (%llu, expected %llu)", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), + (unsigned long long)le64_to_cpu(di->i_blkno)); + return -EINVAL; } max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); if (le16_to_cpu(gd->bg_bits) > max_bits) { - ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits)); - return -EIO; + do_error("Group descriptor #%llu has bit count of %u", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(gd->bg_bits)); + return -EINVAL; } if (le16_to_cpu(gd->bg_chain) >= le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { - ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_chain)); - return -EIO; + do_error("Group descriptor #%llu has bad chain %u", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(gd->bg_chain)); + return -EINVAL; } - if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { - ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " - "claims that %u are free", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - le16_to_cpu(gd->bg_free_bits_count)); - return -EIO; - } + return 0; +} - if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { - ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " - "max bitmap bits of %u", - (unsigned long long)le64_to_cpu(gd->bg_blkno), - le16_to_cpu(gd->bg_bits), - 8 * le16_to_cpu(gd->bg_size)); - return -EIO; +#undef do_error + +/* + * This version only prints errors. It does not fail the filesystem, and + * exists only for resize. + */ +int ocfs2_check_group_descriptor(struct super_block *sb, + struct ocfs2_dinode *di, + struct buffer_head *bh) +{ + int rc; + + rc = ocfs2_validate_gd_self(sb, bh, 1); + if (!rc) + rc = ocfs2_validate_gd_parent(sb, di, bh, 1); + + return rc; +} + +static int ocfs2_validate_group_descriptor(struct super_block *sb, + struct buffer_head *bh) +{ + mlog(0, "Validating group descriptor %llu\n", + (unsigned long long)bh->b_blocknr); + + return ocfs2_validate_gd_self(sb, bh, 0); +} + +int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, + u64 gd_blkno, struct buffer_head **bh) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_block(inode, gd_blkno, &tmp, + ocfs2_validate_group_descriptor); + if (rc) + goto out; + + rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0); + if (rc) { + brelse(tmp); + goto out; } - return 0; + /* If ocfs2_read_block() got us a new bh, pass it up. */ + if (!*bh) + *bh = tmp; + +out: + return rc; } static int ocfs2_block_group_fill(handle_t *handle, @@ -441,11 +530,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, ac->ac_alloc_slot = slot; fe = (struct ocfs2_dinode *) bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); - status = -EIO; - goto bail; - } + + /* The bh was validated by the inode read inside + * ocfs2_inode_lock(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); + if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu", (unsigned long long)le64_to_cpu(fe->i_blkno)); @@ -790,10 +879,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, int offset, start, found, status = 0; struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg); - return -EIO; - } + /* Callers got this descriptor from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); found = start = best_offset = best_size = 0; bitmap = bg->bg_bitmap; @@ -858,11 +946,9 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, mlog_entry_void(); - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; - goto bail; - } + /* All callers get the descriptor via + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, @@ -931,21 +1017,10 @@ static int ocfs2_relink_block_group(handle_t *handle, struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); - status = -EIO; - goto out; - } - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; - goto out; - } - if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg); - status = -EIO; - goto out; - } + /* The caller got these descriptors from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg)); mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", (unsigned long long)le64_to_cpu(fe->i_blkno), chain, @@ -1008,7 +1083,7 @@ out_rollback: bg->bg_next_group = cpu_to_le64(bg_ptr); prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); } -out: + mlog_exit(status); return status; } @@ -1170,21 +1245,17 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, u16 found; struct buffer_head *group_bh = NULL; struct ocfs2_group_desc *gd; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data; struct inode *alloc_inode = ac->ac_inode; - ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh); + ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno, + &group_bh); if (ret < 0) { mlog_errno(ret); return ret; } gd = (struct ocfs2_group_desc *) group_bh->b_data; - if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd); - ret = -EIO; - goto out; - } - ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, ac->ac_max_block, bit_off, &found); if (ret < 0) { @@ -1241,19 +1312,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, bits_wanted, chain, (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); - status = ocfs2_read_block(alloc_inode, - le64_to_cpu(cl->cl_recs[chain].c_blkno), - &group_bh); + status = ocfs2_read_group_descriptor(alloc_inode, fe, + le64_to_cpu(cl->cl_recs[chain].c_blkno), + &group_bh); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); - if (status) { - mlog_errno(status); - goto bail; - } status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use @@ -1271,18 +1337,13 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, next_group = le64_to_cpu(bg->bg_next_group); prev_group_bh = group_bh; group_bh = NULL; - status = ocfs2_read_block(alloc_inode, - next_group, &group_bh); + status = ocfs2_read_group_descriptor(alloc_inode, fe, + next_group, &group_bh); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) group_bh->b_data; - status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); - if (status) { - mlog_errno(status); - goto bail; - } } if (status < 0) { if (status != -ENOSPC) @@ -1392,11 +1453,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, BUG_ON(!ac->ac_bh); fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe); - status = -EIO; - goto bail; - } + + /* The bh was validated by the inode read during + * ocfs2_reserve_suballoc_bits(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); + if (le32_to_cpu(fe->id1.bitmap1.i_used) >= le32_to_cpu(fe->id1.bitmap1.i_total)) { ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " @@ -1725,11 +1786,9 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, mlog_entry_void(); - if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { - OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); - status = -EIO; - goto bail; - } + /* The caller got this descriptor from + * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg)); mlog(0, "off = %u, num = %u\n", bit_off, num_bits); @@ -1782,29 +1841,26 @@ int ocfs2_free_suballoc_bits(handle_t *handle, mlog_entry_void(); - if (!OCFS2_IS_VALID_DINODE(fe)) { - OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); - status = -EIO; - goto bail; - } + /* The alloc_bh comes from ocfs2_free_dinode() or + * ocfs2_free_clusters(). The callers have all locked the + * allocator and gotten alloc_bh from the lock call. This + * validates the dinode buffer. Any corruption that has happended + * is a code bug. */ + BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, (unsigned long long)bg_blkno, start_bit); - status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh); + status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno, + &group_bh); if (status < 0) { mlog_errno(status); goto bail; } - group = (struct ocfs2_group_desc *) group_bh->b_data; - status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); - if (status) { - mlog_errno(status); - goto bail; - } + BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); status = ocfs2_block_group_clear_bits(handle, alloc_inode, diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 4df159d8f450..e3c13c77f9e8 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -164,10 +164,24 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); * and return that block offset. */ u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); -/* somewhat more expensive than our other checks, so use sparingly. */ +/* + * By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it + * finds a problem. A caller that wants to check a group descriptor + * without going readonly should read the block with ocfs2_read_block[s]() + * and then checking it with this function. This is only resize, really. + * Everyone else should be using ocfs2_read_group_descriptor(). + */ int ocfs2_check_group_descriptor(struct super_block *sb, struct ocfs2_dinode *di, - struct ocfs2_group_desc *gd); + struct buffer_head *bh); +/* + * Read a group descriptor block into *bh. If *bh is NULL, a bh will be + * allocated. This is a cached read. The descriptor will be validated with + * ocfs2_validate_group_descriptor(). + */ +int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di, + u64 gd_blkno, struct buffer_head **bh); + int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, u32 clusters_to_add, u32 extents_to_split, struct ocfs2_alloc_context **data_ac, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 304b63ac78cf..bc431386443e 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -41,6 +41,7 @@ #include <linux/debugfs.h> #include <linux/mount.h> #include <linux/seq_file.h> +#include <linux/quotaops.h> #define MLOG_MASK_PREFIX ML_SUPER #include <cluster/masklog.h> @@ -65,10 +66,13 @@ #include "uptodate.h" #include "ver.h" #include "xattr.h" +#include "quota.h" #include "buffer_head_io.h" static struct kmem_cache *ocfs2_inode_cachep = NULL; +struct kmem_cache *ocfs2_dquot_cachep; +struct kmem_cache *ocfs2_qf_chunk_cachep; /* OCFS2 needs to schedule several differnt types of work which * require cluster locking, disk I/O, recovery waits, etc. Since these @@ -124,6 +128,9 @@ static int ocfs2_get_sector(struct super_block *sb, static void ocfs2_write_super(struct super_block *sb); static struct inode *ocfs2_alloc_inode(struct super_block *sb); static void ocfs2_destroy_inode(struct inode *inode); +static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend); +static int ocfs2_enable_quotas(struct ocfs2_super *osb); +static void ocfs2_disable_quotas(struct ocfs2_super *osb); static const struct super_operations ocfs2_sops = { .statfs = ocfs2_statfs, @@ -137,6 +144,8 @@ static const struct super_operations ocfs2_sops = { .put_super = ocfs2_put_super, .remount_fs = ocfs2_remount, .show_options = ocfs2_show_options, + .quota_read = ocfs2_quota_read, + .quota_write = ocfs2_quota_write, }; enum { @@ -158,6 +167,10 @@ enum { Opt_user_xattr, Opt_nouser_xattr, Opt_inode64, + Opt_acl, + Opt_noacl, + Opt_usrquota, + Opt_grpquota, Opt_err, }; @@ -180,6 +193,10 @@ static const match_table_t tokens = { {Opt_user_xattr, "user_xattr"}, {Opt_nouser_xattr, "nouser_xattr"}, {Opt_inode64, "inode64"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, + {Opt_usrquota, "usrquota"}, + {Opt_grpquota, "grpquota"}, {Opt_err, NULL} }; @@ -221,6 +238,19 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait) return 0; } +static int ocfs2_need_system_inode(struct ocfs2_super *osb, int ino) +{ + if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA) + && (ino == USER_QUOTA_SYSTEM_INODE + || ino == LOCAL_USER_QUOTA_SYSTEM_INODE)) + return 0; + if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) + && (ino == GROUP_QUOTA_SYSTEM_INODE + || ino == LOCAL_GROUP_QUOTA_SYSTEM_INODE)) + return 0; + return 1; +} + static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) { struct inode *new = NULL; @@ -247,6 +277,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb) for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE; i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) { + if (!ocfs2_need_system_inode(osb, i)) + continue; new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); if (!new) { ocfs2_release_system_inodes(osb); @@ -277,6 +309,8 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb) for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1; i < NUM_SYSTEM_INODES; i++) { + if (!ocfs2_need_system_inode(osb, i)) + continue; new = ocfs2_get_system_file_inode(osb, i, osb->slot_num); if (!new) { ocfs2_release_system_inodes(osb); @@ -426,6 +460,12 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) /* We're going to/from readonly mode. */ if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { + /* Disable quota accounting before remounting RO */ + if (*flags & MS_RDONLY) { + ret = ocfs2_susp_quotas(osb, 0); + if (ret < 0) + goto out; + } /* Lock here so the check of HARD_RO and the potential * setting of SOFT_RO is atomic. */ spin_lock(&osb->osb_lock); @@ -461,11 +501,28 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) } unlock_osb: spin_unlock(&osb->osb_lock); + /* Enable quota accounting after remounting RW */ + if (!ret && !(*flags & MS_RDONLY)) { + if (sb_any_quota_suspended(sb)) + ret = ocfs2_susp_quotas(osb, 1); + else + ret = ocfs2_enable_quotas(osb); + if (ret < 0) { + /* Return back changes... */ + spin_lock(&osb->osb_lock); + sb->s_flags |= MS_RDONLY; + osb->osb_flags |= OCFS2_OSB_SOFT_RO; + spin_unlock(&osb->osb_lock); + goto out; + } + } } if (!ret) { /* Only save off the new mount options in case of a successful * remount. */ + if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) + parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; @@ -619,6 +676,131 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, return 0; } +static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend) +{ + int type; + struct super_block *sb = osb->sb; + unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; + int status = 0; + + for (type = 0; type < MAXQUOTAS; type++) { + if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) + continue; + if (unsuspend) + status = vfs_quota_enable( + sb_dqopt(sb)->files[type], + type, QFMT_OCFS2, + DQUOT_SUSPENDED); + else + status = vfs_quota_disable(sb, type, + DQUOT_SUSPENDED); + if (status < 0) + break; + } + if (status < 0) + mlog(ML_ERROR, "Failed to suspend/unsuspend quotas on " + "remount (error = %d).\n", status); + return status; +} + +static int ocfs2_enable_quotas(struct ocfs2_super *osb) +{ + struct inode *inode[MAXQUOTAS] = { NULL, NULL }; + struct super_block *sb = osb->sb; + unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; + unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, + LOCAL_GROUP_QUOTA_SYSTEM_INODE }; + int status; + int type; + + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE; + for (type = 0; type < MAXQUOTAS; type++) { + if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) + continue; + inode[type] = ocfs2_get_system_file_inode(osb, ino[type], + osb->slot_num); + if (!inode[type]) { + status = -ENOENT; + goto out_quota_off; + } + status = vfs_quota_enable(inode[type], type, QFMT_OCFS2, + DQUOT_USAGE_ENABLED); + if (status < 0) + goto out_quota_off; + } + + for (type = 0; type < MAXQUOTAS; type++) + iput(inode[type]); + return 0; +out_quota_off: + ocfs2_disable_quotas(osb); + for (type = 0; type < MAXQUOTAS; type++) + iput(inode[type]); + mlog_errno(status); + return status; +} + +static void ocfs2_disable_quotas(struct ocfs2_super *osb) +{ + int type; + struct inode *inode; + struct super_block *sb = osb->sb; + + /* We mostly ignore errors in this function because there's not much + * we can do when we see them */ + for (type = 0; type < MAXQUOTAS; type++) { + if (!sb_has_quota_loaded(sb, type)) + continue; + inode = igrab(sb->s_dquot.files[type]); + /* Turn off quotas. This will remove all dquot structures from + * memory and so they will be automatically synced to global + * quota files */ + vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED | + DQUOT_LIMITS_ENABLED); + if (!inode) + continue; + iput(inode); + } +} + +/* Handle quota on quotactl */ +static int ocfs2_quota_on(struct super_block *sb, int type, int format_id, + char *path, int remount) +{ + unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}; + + if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type])) + return -EINVAL; + + if (remount) + return 0; /* Just ignore it has been handled in + * ocfs2_remount() */ + return vfs_quota_enable(sb_dqopt(sb)->files[type], type, + format_id, DQUOT_LIMITS_ENABLED); +} + +/* Handle quota off quotactl */ +static int ocfs2_quota_off(struct super_block *sb, int type, int remount) +{ + if (remount) + return 0; /* Ignore now and handle later in + * ocfs2_remount() */ + return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED); +} + +static struct quotactl_ops ocfs2_quotactl_ops = { + .quota_on = ocfs2_quota_on, + .quota_off = ocfs2_quota_off, + .quota_sync = vfs_quota_sync, + .get_info = vfs_get_dqinfo, + .set_info = vfs_set_dqinfo, + .get_dqblk = vfs_get_dqblk, + .set_dqblk = vfs_set_dqblk, +}; + static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) { struct dentry *root; @@ -651,12 +833,32 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) } brelse(bh); bh = NULL; + + if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)) + parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; + osb->s_mount_opt = parsed_options.mount_opt; osb->s_atime_quantum = parsed_options.atime_quantum; osb->preferred_slot = parsed_options.slot; osb->osb_commit_interval = parsed_options.commit_interval; osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); osb->local_alloc_bits = osb->local_alloc_default_bits; + if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA && + !OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { + status = -EINVAL; + mlog(ML_ERROR, "User quotas were requested, but this " + "filesystem does not have the feature enabled.\n"); + goto read_super_error; + } + if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA && + !OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { + status = -EINVAL; + mlog(ML_ERROR, "Group quotas were requested, but this " + "filesystem does not have the feature enabled.\n"); + goto read_super_error; + } status = ocfs2_verify_userspace_stack(osb, &parsed_options); if (status) @@ -664,6 +866,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = OCFS2_SUPER_MAGIC; + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + /* Hard readonly mode only if: bdev_read_only, MS_RDONLY, * heartbeat=none */ if (bdev_read_only(sb->s_bdev)) { @@ -758,6 +963,28 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) atomic_set(&osb->vol_state, VOLUME_MOUNTED); wake_up(&osb->osb_mount_event); + /* Now we can initialize quotas because we can afford to wait + * for cluster locks recovery now. That also means that truncation + * log recovery can happen but that waits for proper quota setup */ + if (!(sb->s_flags & MS_RDONLY)) { + status = ocfs2_enable_quotas(osb); + if (status < 0) { + /* We have to err-out specially here because + * s_root is already set */ + mlog_errno(status); + atomic_set(&osb->vol_state, VOLUME_DISABLED); + wake_up(&osb->osb_mount_event); + mlog_exit(status); + return status; + } + } + + ocfs2_complete_quota_recovery(osb); + + /* Now we wake up again for processes waiting for quotas */ + atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS); + wake_up(&osb->osb_mount_event); + mlog_exit(status); return status; @@ -945,6 +1172,41 @@ static int ocfs2_parse_options(struct super_block *sb, case Opt_inode64: mopt->mount_opt |= OCFS2_MOUNT_INODE64; break; + case Opt_usrquota: + /* We check only on remount, otherwise features + * aren't yet initialized. */ + if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { + mlog(ML_ERROR, "User quota requested but " + "filesystem feature is not set\n"); + status = 0; + goto bail; + } + mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; + break; + case Opt_grpquota: + if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb, + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { + mlog(ML_ERROR, "Group quota requested but " + "filesystem feature is not set\n"); + status = 0; + goto bail; + } + mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; + break; +#ifdef CONFIG_OCFS2_FS_POSIX_ACL + case Opt_acl: + mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; + break; + case Opt_noacl: + mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; + break; +#else + case Opt_acl: + case Opt_noacl: + printk(KERN_INFO "ocfs2 (no)acl options not supported\n"); + break; +#endif default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -1008,6 +1270,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (osb->osb_cluster_stack[0]) seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, osb->osb_cluster_stack); + if (opts & OCFS2_MOUNT_USRQUOTA) + seq_printf(s, ",usrquota"); + if (opts & OCFS2_MOUNT_GRPQUOTA) + seq_printf(s, ",grpquota"); if (opts & OCFS2_MOUNT_NOUSERXATTR) seq_printf(s, ",nouser_xattr"); @@ -1017,6 +1283,13 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) if (opts & OCFS2_MOUNT_INODE64) seq_printf(s, ",inode64"); +#ifdef CONFIG_OCFS2_FS_POSIX_ACL + if (opts & OCFS2_MOUNT_POSIX_ACL) + seq_printf(s, ",acl"); + else + seq_printf(s, ",noacl"); +#endif + return 0; } @@ -1054,6 +1327,7 @@ static int __init ocfs2_init(void) ocfs2_set_locking_protocol(); + status = register_quota_format(&ocfs2_quota_format); leave: if (status < 0) { ocfs2_free_mem_caches(); @@ -1077,6 +1351,8 @@ static void __exit ocfs2_exit(void) destroy_workqueue(ocfs2_wq); } + unregister_quota_format(&ocfs2_quota_format); + debugfs_remove(ocfs2_debugfs_root); ocfs2_free_mem_caches(); @@ -1192,8 +1468,27 @@ static int ocfs2_initialize_mem_caches(void) (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), ocfs2_inode_init_once); - if (!ocfs2_inode_cachep) + ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache", + sizeof(struct ocfs2_dquot), + 0, + (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + NULL); + ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache", + sizeof(struct ocfs2_quota_chunk), + 0, + (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), + NULL); + if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep || + !ocfs2_qf_chunk_cachep) { + if (ocfs2_inode_cachep) + kmem_cache_destroy(ocfs2_inode_cachep); + if (ocfs2_dquot_cachep) + kmem_cache_destroy(ocfs2_dquot_cachep); + if (ocfs2_qf_chunk_cachep) + kmem_cache_destroy(ocfs2_qf_chunk_cachep); return -ENOMEM; + } return 0; } @@ -1202,8 +1497,15 @@ static void ocfs2_free_mem_caches(void) { if (ocfs2_inode_cachep) kmem_cache_destroy(ocfs2_inode_cachep); - ocfs2_inode_cachep = NULL; + + if (ocfs2_dquot_cachep) + kmem_cache_destroy(ocfs2_dquot_cachep); + ocfs2_dquot_cachep = NULL; + + if (ocfs2_qf_chunk_cachep) + kmem_cache_destroy(ocfs2_qf_chunk_cachep); + ocfs2_qf_chunk_cachep = NULL; } static int ocfs2_get_sector(struct super_block *sb, @@ -1303,6 +1605,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) osb = OCFS2_SB(sb); BUG_ON(!osb); + ocfs2_disable_quotas(osb); + ocfs2_shutdown_local_alloc(osb); ocfs2_truncate_log_shutdown(osb); @@ -1413,6 +1717,8 @@ static int ocfs2_initialize_super(struct super_block *sb, sb->s_fs_info = osb; sb->s_op = &ocfs2_sops; sb->s_export_op = &ocfs2_export_ops; + sb->s_qcop = &ocfs2_quotactl_ops; + sb->dq_op = &ocfs2_quota_operations; sb->s_xattr = ocfs2_xattr_handlers; sb->s_time_gran = 1; sb->s_flags |= MS_NOATIME; diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index cbd03dfdc7b9..ed0a0cfd68d2 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -84,7 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode, mlog_entry_void(); - status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh); + status = ocfs2_read_inode_block(inode, bh); if (status < 0) { mlog_errno(status); link = ERR_PTR(status); diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 054e2efb0b7e..7e0d62ac441b 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -35,6 +35,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/string.h> +#include <linux/security.h> #define MLOG_MASK_PREFIX ML_XATTR #include <cluster/masklog.h> @@ -61,12 +62,32 @@ struct ocfs2_xattr_def_value_root { }; struct ocfs2_xattr_bucket { - struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; - struct ocfs2_xattr_header *xh; + /* The inode these xattrs are associated with */ + struct inode *bu_inode; + + /* The actual buffers that make up the bucket */ + struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; + + /* How many blocks make up one bucket for this filesystem */ + int bu_blocks; +}; + +struct ocfs2_xattr_set_ctxt { + handle_t *handle; + struct ocfs2_alloc_context *meta_ac; + struct ocfs2_alloc_context *data_ac; + struct ocfs2_cached_dealloc_ctxt dealloc; }; #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) #define OCFS2_XATTR_INLINE_SIZE 80 +#define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \ + - sizeof(struct ocfs2_xattr_header) \ + - sizeof(__u32)) +#define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \ + - sizeof(struct ocfs2_xattr_block) \ + - sizeof(struct ocfs2_xattr_header) \ + - sizeof(__u32)) static struct ocfs2_xattr_def_value_root def_xv = { .xv.xr_list.l_count = cpu_to_le16(1), @@ -74,13 +95,25 @@ static struct ocfs2_xattr_def_value_root def_xv = { struct xattr_handler *ocfs2_xattr_handlers[] = { &ocfs2_xattr_user_handler, +#ifdef CONFIG_OCFS2_FS_POSIX_ACL + &ocfs2_xattr_acl_access_handler, + &ocfs2_xattr_acl_default_handler, +#endif &ocfs2_xattr_trusted_handler, + &ocfs2_xattr_security_handler, NULL }; static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, +#ifdef CONFIG_OCFS2_FS_POSIX_ACL + [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] + = &ocfs2_xattr_acl_access_handler, + [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] + = &ocfs2_xattr_acl_default_handler, +#endif [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, + [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler, }; struct ocfs2_xattr_info { @@ -98,7 +131,7 @@ struct ocfs2_xattr_search { */ struct buffer_head *xattr_bh; struct ocfs2_xattr_header *header; - struct ocfs2_xattr_bucket bucket; + struct ocfs2_xattr_bucket *bucket; void *base; void *end; struct ocfs2_xattr_entry *here; @@ -127,11 +160,13 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode, size_t buffer_size); static int ocfs2_xattr_create_index_block(struct inode *inode, - struct ocfs2_xattr_search *xs); + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt); static int ocfs2_xattr_set_entry_index_block(struct inode *inode, struct ocfs2_xattr_info *xi, - struct ocfs2_xattr_search *xs); + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt); static int ocfs2_delete_xattr_index_block(struct inode *inode, struct buffer_head *xb_bh); @@ -154,6 +189,187 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) return len / sizeof(struct ocfs2_xattr_entry); } +#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr) +#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data) +#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0)) + +static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode) +{ + struct ocfs2_xattr_bucket *bucket; + int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + + BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET); + + bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS); + if (bucket) { + bucket->bu_inode = inode; + bucket->bu_blocks = blks; + } + + return bucket; +} + +static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket) +{ + int i; + + for (i = 0; i < bucket->bu_blocks; i++) { + brelse(bucket->bu_bhs[i]); + bucket->bu_bhs[i] = NULL; + } +} + +static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) +{ + if (bucket) { + ocfs2_xattr_bucket_relse(bucket); + bucket->bu_inode = NULL; + kfree(bucket); + } +} + +/* + * A bucket that has never been written to disk doesn't need to be + * read. We just need the buffer_heads. Don't call this for + * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes + * them fully. + */ +static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, + u64 xb_blkno) +{ + int i, rc = 0; + + for (i = 0; i < bucket->bu_blocks; i++) { + bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb, + xb_blkno + i); + if (!bucket->bu_bhs[i]) { + rc = -EIO; + mlog_errno(rc); + break; + } + + if (!ocfs2_buffer_uptodate(bucket->bu_inode, + bucket->bu_bhs[i])) + ocfs2_set_new_buffer_uptodate(bucket->bu_inode, + bucket->bu_bhs[i]); + } + + if (rc) + ocfs2_xattr_bucket_relse(bucket); + return rc; +} + +/* Read the xattr bucket at xb_blkno */ +static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket, + u64 xb_blkno) +{ + int rc; + + rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno, + bucket->bu_blocks, bucket->bu_bhs, 0, + NULL); + if (rc) + ocfs2_xattr_bucket_relse(bucket); + return rc; +} + +static int ocfs2_xattr_bucket_journal_access(handle_t *handle, + struct ocfs2_xattr_bucket *bucket, + int type) +{ + int i, rc = 0; + + for (i = 0; i < bucket->bu_blocks; i++) { + rc = ocfs2_journal_access(handle, bucket->bu_inode, + bucket->bu_bhs[i], type); + if (rc) { + mlog_errno(rc); + break; + } + } + + return rc; +} + +static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle, + struct ocfs2_xattr_bucket *bucket) +{ + int i; + + for (i = 0; i < bucket->bu_blocks; i++) + ocfs2_journal_dirty(handle, bucket->bu_bhs[i]); +} + +static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest, + struct ocfs2_xattr_bucket *src) +{ + int i; + int blocksize = src->bu_inode->i_sb->s_blocksize; + + BUG_ON(dest->bu_blocks != src->bu_blocks); + BUG_ON(dest->bu_inode != src->bu_inode); + + for (i = 0; i < src->bu_blocks; i++) { + memcpy(bucket_block(dest, i), bucket_block(src, i), + blocksize); + } +} + +static int ocfs2_validate_xattr_block(struct super_block *sb, + struct buffer_head *bh) +{ + struct ocfs2_xattr_block *xb = + (struct ocfs2_xattr_block *)bh->b_data; + + mlog(0, "Validating xattr block %llu\n", + (unsigned long long)bh->b_blocknr); + + if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { + ocfs2_error(sb, + "Extended attribute block #%llu has bad " + "signature %.*s", + (unsigned long long)bh->b_blocknr, 7, + xb->xb_signature); + return -EINVAL; + } + + if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) { + ocfs2_error(sb, + "Extended attribute block #%llu has an " + "invalid xb_blkno of %llu", + (unsigned long long)bh->b_blocknr, + (unsigned long long)le64_to_cpu(xb->xb_blkno)); + return -EINVAL; + } + + if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) { + ocfs2_error(sb, + "Extended attribute block #%llu has an invalid " + "xb_fs_generation of #%u", + (unsigned long long)bh->b_blocknr, + le32_to_cpu(xb->xb_fs_generation)); + return -EINVAL; + } + + return 0; +} + +static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno, + struct buffer_head **bh) +{ + int rc; + struct buffer_head *tmp = *bh; + + rc = ocfs2_read_block(inode, xb_blkno, &tmp, + ocfs2_validate_xattr_block); + + /* If ocfs2_read_block() got us a new bh, pass it up. */ + if (!rc && !*bh) + *bh = tmp; + + return rc; +} + static inline const char *ocfs2_xattr_prefix(int name_index) { struct xattr_handler *handler = NULL; @@ -200,17 +416,135 @@ static void ocfs2_xattr_hash_entry(struct inode *inode, return; } +static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len) +{ + int size = 0; + + if (value_len <= OCFS2_XATTR_INLINE_SIZE) + size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len); + else + size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; + size += sizeof(struct ocfs2_xattr_entry); + + return size; +} + +int ocfs2_calc_security_init(struct inode *dir, + struct ocfs2_security_xattr_info *si, + int *want_clusters, + int *xattr_credits, + struct ocfs2_alloc_context **xattr_ac) +{ + int ret = 0; + struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); + int s_size = ocfs2_xattr_entry_real_size(strlen(si->name), + si->value_len); + + /* + * The max space of security xattr taken inline is + * 256(name) + 80(value) + 16(entry) = 352 bytes, + * So reserve one metadata block for it is ok. + */ + if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || + s_size > OCFS2_XATTR_FREE_IN_IBODY) { + ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); + if (ret) { + mlog_errno(ret); + return ret; + } + *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; + } + + /* reserve clusters for xattr value which will be set in B tree*/ + if (si->value_len > OCFS2_XATTR_INLINE_SIZE) + *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, + si->value_len); + return ret; +} + +int ocfs2_calc_xattr_init(struct inode *dir, + struct buffer_head *dir_bh, + int mode, + struct ocfs2_security_xattr_info *si, + int *want_clusters, + int *xattr_credits, + struct ocfs2_alloc_context **xattr_ac) +{ + int ret = 0; + struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); + int s_size = 0; + int a_size = 0; + int acl_len = 0; + + if (si->enable) + s_size = ocfs2_xattr_entry_real_size(strlen(si->name), + si->value_len); + + if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) { + acl_len = ocfs2_xattr_get_nolock(dir, dir_bh, + OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, + "", NULL, 0); + if (acl_len > 0) { + a_size = ocfs2_xattr_entry_real_size(0, acl_len); + if (S_ISDIR(mode)) + a_size <<= 1; + } else if (acl_len != 0 && acl_len != -ENODATA) { + mlog_errno(ret); + return ret; + } + } + + if (!(s_size + a_size)) + return ret; + + /* + * The max space of security xattr taken inline is + * 256(name) + 80(value) + 16(entry) = 352 bytes, + * The max space of acl xattr taken inline is + * 80(value) + 16(entry) * 2(if directory) = 192 bytes, + * when blocksize = 512, may reserve one more cluser for + * xattr bucket, otherwise reserve one metadata block + * for them is ok. + */ + if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE || + (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) { + ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac); + if (ret) { + mlog_errno(ret); + return ret; + } + *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; + } + + if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE && + (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) { + *want_clusters += 1; + *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb); + } + + /* reserve clusters for xattr value which will be set in B tree*/ + if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) + *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, + si->value_len); + if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL && + acl_len > OCFS2_XATTR_INLINE_SIZE) { + *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, acl_len); + if (S_ISDIR(mode)) + *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, + acl_len); + } + + return ret; +} + static int ocfs2_xattr_extend_allocation(struct inode *inode, u32 clusters_to_add, struct buffer_head *xattr_bh, - struct ocfs2_xattr_value_root *xv) + struct ocfs2_xattr_value_root *xv, + struct ocfs2_xattr_set_ctxt *ctxt) { int status = 0; - int restart_func = 0; - int credits = 0; - handle_t *handle = NULL; - struct ocfs2_alloc_context *data_ac = NULL; - struct ocfs2_alloc_context *meta_ac = NULL; + handle_t *handle = ctxt->handle; enum ocfs2_alloc_restarted why; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters); @@ -220,26 +554,6 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode, ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv); -restart_all: - - status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, - &data_ac, &meta_ac); - if (status) { - mlog_errno(status); - goto leave; - } - - credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el, - clusters_to_add); - handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(handle)) { - status = PTR_ERR(handle); - handle = NULL; - mlog_errno(status); - goto leave; - } - -restarted_transaction: status = ocfs2_journal_access(handle, inode, xattr_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { @@ -255,12 +569,11 @@ restarted_transaction: 0, &et, handle, - data_ac, - meta_ac, + ctxt->data_ac, + ctxt->meta_ac, &why); - if ((status < 0) && (status != -EAGAIN)) { - if (status != -ENOSPC) - mlog_errno(status); + if (status < 0) { + mlog_errno(status); goto leave; } @@ -272,47 +585,13 @@ restarted_transaction: clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters; - if (why != RESTART_NONE && clusters_to_add) { - if (why == RESTART_META) { - mlog(0, "restarting function.\n"); - restart_func = 1; - } else { - BUG_ON(why != RESTART_TRANS); - - mlog(0, "restarting transaction.\n"); - /* TODO: This can be more intelligent. */ - credits = ocfs2_calc_extend_credits(osb->sb, - et.et_root_el, - clusters_to_add); - status = ocfs2_extend_trans(handle, credits); - if (status < 0) { - /* handle still has to be committed at - * this point. */ - status = -ENOMEM; - mlog_errno(status); - goto leave; - } - goto restarted_transaction; - } - } + /* + * We should have already allocated enough space before the transaction, + * so no need to restart. + */ + BUG_ON(why != RESTART_NONE || clusters_to_add); leave: - if (handle) { - ocfs2_commit_trans(osb, handle); - handle = NULL; - } - if (data_ac) { - ocfs2_free_alloc_context(data_ac); - data_ac = NULL; - } - if (meta_ac) { - ocfs2_free_alloc_context(meta_ac); - meta_ac = NULL; - } - if ((!status) && restart_func) { - restart_func = 0; - goto restart_all; - } return status; } @@ -321,53 +600,27 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, struct buffer_head *root_bh, struct ocfs2_xattr_value_root *xv, u32 cpos, u32 phys_cpos, u32 len, - struct ocfs2_cached_dealloc_ctxt *dealloc) + struct ocfs2_xattr_set_ctxt *ctxt) { int ret; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct inode *tl_inode = osb->osb_tl_inode; - handle_t *handle; - struct ocfs2_alloc_context *meta_ac = NULL; + handle_t *handle = ctxt->handle; struct ocfs2_extent_tree et; ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv); - ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); - if (ret) { - mlog_errno(ret); - return ret; - } - - mutex_lock(&tl_inode->i_mutex); - - if (ocfs2_truncate_log_needs_flush(osb)) { - ret = __ocfs2_flush_truncate_log(osb); - if (ret < 0) { - mlog_errno(ret); - goto out; - } - } - - handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } - ret = ocfs2_journal_access(handle, inode, root_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } - ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, - dealloc); + ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac, + &ctxt->dealloc); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } le32_add_cpu(&xv->xr_clusters, -len); @@ -375,21 +628,14 @@ static int __ocfs2_remove_xattr_range(struct inode *inode, ret = ocfs2_journal_dirty(handle, root_bh); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } - ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); + ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len); if (ret) mlog_errno(ret); -out_commit: - ocfs2_commit_trans(osb, handle); out: - mutex_unlock(&tl_inode->i_mutex); - - if (meta_ac) - ocfs2_free_alloc_context(meta_ac); - return ret; } @@ -397,15 +643,12 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, u32 old_clusters, u32 new_clusters, struct buffer_head *root_bh, - struct ocfs2_xattr_value_root *xv) + struct ocfs2_xattr_value_root *xv, + struct ocfs2_xattr_set_ctxt *ctxt) { int ret = 0; u32 trunc_len, cpos, phys_cpos, alloc_size; u64 block; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct ocfs2_cached_dealloc_ctxt dealloc; - - ocfs2_init_dealloc_ctxt(&dealloc); if (old_clusters <= new_clusters) return 0; @@ -425,7 +668,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos, phys_cpos, alloc_size, - &dealloc); + ctxt); if (ret) { mlog_errno(ret); goto out; @@ -439,16 +682,14 @@ static int ocfs2_xattr_shrink_size(struct inode *inode, } out: - ocfs2_schedule_truncate_log_flush(osb, 1); - ocfs2_run_deallocs(osb, &dealloc); - return ret; } static int ocfs2_xattr_value_truncate(struct inode *inode, struct buffer_head *root_bh, struct ocfs2_xattr_value_root *xv, - int len) + int len, + struct ocfs2_xattr_set_ctxt *ctxt) { int ret; u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); @@ -460,11 +701,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode, if (new_clusters > old_clusters) ret = ocfs2_xattr_extend_allocation(inode, new_clusters - old_clusters, - root_bh, xv); + root_bh, xv, ctxt); else ret = ocfs2_xattr_shrink_size(inode, old_clusters, new_clusters, - root_bh, xv); + root_bh, xv, ctxt); return ret; } @@ -554,18 +795,14 @@ static int ocfs2_xattr_block_list(struct inode *inode, if (!di->i_xattr_loc) return ret; - ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); + ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), + &blk_bh); if (ret < 0) { mlog_errno(ret); return ret; } xb = (struct ocfs2_xattr_block *)blk_bh->b_data; - if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { - ret = -EIO; - goto cleanup; - } - if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; ret = ocfs2_xattr_list_entries(inode, header, @@ -575,7 +812,7 @@ static int ocfs2_xattr_block_list(struct inode *inode, ret = ocfs2_xattr_tree_list_index_block(inode, xt, buffer, buffer_size); } -cleanup: + brelse(blk_bh); return ret; @@ -685,7 +922,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode, blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); /* Copy ocfs2_xattr_value */ for (i = 0; i < num_clusters * bpc; i++, blkno++) { - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_block(inode, blkno, &bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -769,7 +1006,12 @@ static int ocfs2_xattr_block_get(struct inode *inode, size_t size; int ret = -ENODATA, name_offset, name_len, block_off, i; - memset(&xs->bucket, 0, sizeof(xs->bucket)); + xs->bucket = ocfs2_xattr_bucket_new(inode); + if (!xs->bucket) { + ret = -ENOMEM; + mlog_errno(ret); + goto cleanup; + } ret = ocfs2_xattr_block_find(inode, name_index, name, xs); if (ret) { @@ -795,11 +1037,11 @@ static int ocfs2_xattr_block_get(struct inode *inode, if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { ret = ocfs2_xattr_bucket_get_name_value(inode, - xs->bucket.xh, + bucket_xh(xs->bucket), i, &block_off, &name_offset); - xs->base = xs->bucket.bhs[block_off]->b_data; + xs->base = bucket_block(xs->bucket, block_off); } if (ocfs2_xattr_is_local(xs->here)) { memcpy(buffer, (void *)xs->base + @@ -817,21 +1059,15 @@ static int ocfs2_xattr_block_get(struct inode *inode, } ret = size; cleanup: - for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++) - brelse(xs->bucket.bhs[i]); - memset(&xs->bucket, 0, sizeof(xs->bucket)); + ocfs2_xattr_bucket_free(xs->bucket); brelse(xs->xattr_bh); xs->xattr_bh = NULL; return ret; } -/* ocfs2_xattr_get() - * - * Copy an extended attribute into the buffer provided. - * Buffer is NULL to compute the size of buffer required. - */ -static int ocfs2_xattr_get(struct inode *inode, +int ocfs2_xattr_get_nolock(struct inode *inode, + struct buffer_head *di_bh, int name_index, const char *name, void *buffer, @@ -839,7 +1075,6 @@ static int ocfs2_xattr_get(struct inode *inode, { int ret; struct ocfs2_dinode *di = NULL; - struct buffer_head *di_bh = NULL; struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_xattr_search xis = { .not_found = -ENODATA, @@ -854,11 +1089,6 @@ static int ocfs2_xattr_get(struct inode *inode, if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) ret = -ENODATA; - ret = ocfs2_inode_lock(inode, &di_bh, 0); - if (ret < 0) { - mlog_errno(ret); - return ret; - } xis.inode_bh = xbs.inode_bh = di_bh; di = (struct ocfs2_dinode *)di_bh->b_data; @@ -869,6 +1099,32 @@ static int ocfs2_xattr_get(struct inode *inode, ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, buffer_size, &xbs); up_read(&oi->ip_xattr_sem); + + return ret; +} + +/* ocfs2_xattr_get() + * + * Copy an extended attribute into the buffer provided. + * Buffer is NULL to compute the size of buffer required. + */ +static int ocfs2_xattr_get(struct inode *inode, + int name_index, + const char *name, + void *buffer, + size_t buffer_size) +{ + int ret; + struct buffer_head *di_bh = NULL; + + ret = ocfs2_inode_lock(inode, &di_bh, 0); + if (ret < 0) { + mlog_errno(ret); + return ret; + } + ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, + name, buffer, buffer_size); + ocfs2_inode_unlock(inode, 0); brelse(di_bh); @@ -877,6 +1133,7 @@ static int ocfs2_xattr_get(struct inode *inode, } static int __ocfs2_xattr_set_value_outside(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_value_root *xv, const void *value, int value_len) @@ -888,14 +1145,17 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); u64 blkno; struct buffer_head *bh = NULL; - handle_t *handle; BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); + /* + * In __ocfs2_xattr_set_value_outside has already been dirtied, + * so we don't need to worry about whether ocfs2_extend_trans + * will create a new transactio for us or not. + */ credits = clusters * bpc; - handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); + ret = ocfs2_extend_trans(handle, credits); + if (ret) { mlog_errno(ret); goto out; } @@ -905,16 +1165,16 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, &num_clusters, &xv->xr_list); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); for (i = 0; i < num_clusters * bpc; i++, blkno++) { - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_block(inode, blkno, &bh, NULL); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } ret = ocfs2_journal_access(handle, @@ -923,7 +1183,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto out; } cp_len = value_len > blocksize ? blocksize : value_len; @@ -937,7 +1197,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, ret = ocfs2_journal_dirty(handle, bh); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto out; } brelse(bh); bh = NULL; @@ -951,8 +1211,6 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode, } cpos += num_clusters; } -out_commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: brelse(bh); @@ -960,28 +1218,21 @@ out: } static int ocfs2_xattr_cleanup(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, size_t offs) { - handle_t *handle = NULL; int ret = 0; size_t name_len = strlen(xi->name); void *val = xs->base + offs; size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), - OCFS2_XATTR_BLOCK_UPDATE_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } /* Decrease xattr count */ le16_add_cpu(&xs->header->xh_count, -1); @@ -992,32 +1243,23 @@ static int ocfs2_xattr_cleanup(struct inode *inode, ret = ocfs2_journal_dirty(handle, xs->xattr_bh); if (ret < 0) mlog_errno(ret); -out_commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: return ret; } static int ocfs2_xattr_update_entry(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, size_t offs) { - handle_t *handle = NULL; - int ret = 0; + int ret; - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), - OCFS2_XATTR_BLOCK_UPDATE_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } xs->here->xe_name_offset = cpu_to_le16(offs); @@ -1031,8 +1273,6 @@ static int ocfs2_xattr_update_entry(struct inode *inode, ret = ocfs2_journal_dirty(handle, xs->xattr_bh); if (ret < 0) mlog_errno(ret); -out_commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: return ret; } @@ -1045,6 +1285,7 @@ out: static int ocfs2_xattr_set_value_outside(struct inode *inode, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt, size_t offs) { size_t name_len = strlen(xi->name); @@ -1064,18 +1305,18 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode, xv->xr_list.l_next_free_rec = 0; ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv, - xi->value_len); + xi->value_len, ctxt); if (ret < 0) { mlog_errno(ret); return ret; } - ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value, - xi->value_len); + ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, offs); if (ret < 0) { mlog_errno(ret); return ret; } - ret = ocfs2_xattr_update_entry(inode, xi, xs, offs); + ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, xv, + xi->value, xi->value_len); if (ret < 0) mlog_errno(ret); @@ -1195,6 +1436,7 @@ static void ocfs2_xattr_set_entry_local(struct inode *inode, static int ocfs2_xattr_set_entry(struct inode *inode, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt, int flag) { struct ocfs2_xattr_entry *last; @@ -1202,7 +1444,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name); size_t size_l = 0; - handle_t *handle = NULL; + handle_t *handle = ctxt->handle; int free, i, ret; struct ocfs2_xattr_info xi_l = { .name_index = xi->name_index, @@ -1265,7 +1507,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, if (ocfs2_xattr_is_local(xs->here) && size == size_l) { /* Replace existing local xattr with tree root */ ret = ocfs2_xattr_set_value_outside(inode, xi, xs, - offs); + ctxt, offs); if (ret < 0) mlog_errno(ret); goto out; @@ -1284,25 +1526,28 @@ static int ocfs2_xattr_set_entry(struct inode *inode, ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv, - xi->value_len); + xi->value_len, + ctxt); if (ret < 0) { mlog_errno(ret); goto out; } - ret = __ocfs2_xattr_set_value_outside(inode, - xv, - xi->value, - xi->value_len); + ret = ocfs2_xattr_update_entry(inode, + handle, + xi, + xs, + offs); if (ret < 0) { mlog_errno(ret); goto out; } - ret = ocfs2_xattr_update_entry(inode, - xi, - xs, - offs); + ret = __ocfs2_xattr_set_value_outside(inode, + handle, + xv, + xi->value, + xi->value_len); if (ret < 0) mlog_errno(ret); goto out; @@ -1312,44 +1557,29 @@ static int ocfs2_xattr_set_entry(struct inode *inode, * just trucate old value to zero. */ ret = ocfs2_xattr_value_truncate(inode, - xs->xattr_bh, - xv, - 0); + xs->xattr_bh, + xv, + 0, + ctxt); if (ret < 0) mlog_errno(ret); } } } - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), - OCFS2_INODE_UPDATE_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } - ret = ocfs2_journal_access(handle, inode, xs->inode_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } if (!(flag & OCFS2_INLINE_XATTR_FL)) { - /* set extended attribute in external block. */ - ret = ocfs2_extend_trans(handle, - OCFS2_INODE_UPDATE_CREDITS + - OCFS2_XATTR_BLOCK_UPDATE_CREDITS); - if (ret) { - mlog_errno(ret); - goto out_commit; - } ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } } @@ -1363,7 +1593,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode, ret = ocfs2_journal_dirty(handle, xs->xattr_bh); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto out; } } @@ -1400,16 +1630,13 @@ static int ocfs2_xattr_set_entry(struct inode *inode, if (ret < 0) mlog_errno(ret); -out_commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); - if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { /* * Set value outside in B tree. * This is the second step for value size > INLINE_SIZE. */ size_t offs = le16_to_cpu(xs->here->xe_name_offset); - ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs); + ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, offs); if (ret < 0) { int ret2; @@ -1418,14 +1645,14 @@ out_commit: * If set value outside failed, we have to clean * the junk tree root we have already set in local. */ - ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs); + ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle, + xi, xs, offs); if (ret2 < 0) mlog_errno(ret2); } } out: return ret; - } static int ocfs2_remove_value_outside(struct inode*inode, @@ -1433,6 +1660,18 @@ static int ocfs2_remove_value_outside(struct inode*inode, struct ocfs2_xattr_header *header) { int ret = 0, i; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; + + ocfs2_init_dealloc_ctxt(&ctxt.dealloc); + + ctxt.handle = ocfs2_start_trans(osb, + ocfs2_remove_extent_credits(osb->sb)); + if (IS_ERR(ctxt.handle)) { + ret = PTR_ERR(ctxt.handle); + mlog_errno(ret); + goto out; + } for (i = 0; i < le16_to_cpu(header->xh_count); i++) { struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; @@ -1445,14 +1684,19 @@ static int ocfs2_remove_value_outside(struct inode*inode, le16_to_cpu(entry->xe_name_offset); xv = (struct ocfs2_xattr_value_root *) (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); - ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0); + ret = ocfs2_xattr_value_truncate(inode, bh, xv, + 0, &ctxt); if (ret < 0) { mlog_errno(ret); - return ret; + break; } } } + ocfs2_commit_trans(osb, ctxt.handle); + ocfs2_schedule_truncate_log_flush(osb, 1); + ocfs2_run_deallocs(osb, &ctxt.dealloc); +out: return ret; } @@ -1502,24 +1746,19 @@ static int ocfs2_xattr_free_block(struct inode *inode, u64 blk, bg_blkno; u16 bit; - ret = ocfs2_read_block(inode, block, &blk_bh); + ret = ocfs2_read_xattr_block(inode, block, &blk_bh); if (ret < 0) { mlog_errno(ret); goto out; } - xb = (struct ocfs2_xattr_block *)blk_bh->b_data; - if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { - ret = -EIO; - goto out; - } - ret = ocfs2_xattr_block_remove(inode, blk_bh); if (ret < 0) { mlog_errno(ret); goto out; } + xb = (struct ocfs2_xattr_block *)blk_bh->b_data; blk = le64_to_cpu(xb->xb_blkno); bit = le16_to_cpu(xb->xb_suballoc_bit); bg_blkno = ocfs2_which_suballoc_group(blk, bit); @@ -1714,7 +1953,8 @@ static int ocfs2_xattr_ibody_find(struct inode *inode, */ static int ocfs2_xattr_ibody_set(struct inode *inode, struct ocfs2_xattr_info *xi, - struct ocfs2_xattr_search *xs) + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt) { struct ocfs2_inode_info *oi = OCFS2_I(inode); struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; @@ -1731,7 +1971,7 @@ static int ocfs2_xattr_ibody_set(struct inode *inode, } } - ret = ocfs2_xattr_set_entry(inode, xi, xs, + ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt, (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL)); out: up_write(&oi->ip_alloc_sem); @@ -1758,19 +1998,15 @@ static int ocfs2_xattr_block_find(struct inode *inode, if (!di->i_xattr_loc) return ret; - ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); + ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc), + &blk_bh); if (ret < 0) { mlog_errno(ret); return ret; } - xb = (struct ocfs2_xattr_block *)blk_bh->b_data; - if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) { - ret = -EIO; - goto cleanup; - } - xs->xattr_bh = blk_bh; + xb = (struct ocfs2_xattr_block *)blk_bh->b_data; if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { xs->header = &xb->xb_attrs.xb_header; @@ -1804,13 +2040,13 @@ cleanup: */ static int ocfs2_xattr_block_set(struct inode *inode, struct ocfs2_xattr_info *xi, - struct ocfs2_xattr_search *xs) + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt) { struct buffer_head *new_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; - struct ocfs2_alloc_context *meta_ac = NULL; - handle_t *handle = NULL; + handle_t *handle = ctxt->handle; struct ocfs2_xattr_block *xblk = NULL; u16 suballoc_bit_start; u32 num_got; @@ -1818,35 +2054,19 @@ static int ocfs2_xattr_block_set(struct inode *inode, int ret; if (!xs->xattr_bh) { - /* - * Alloc one external block for extended attribute - * outside of inode. - */ - ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); - if (ret < 0) { - mlog_errno(ret); - goto out; - } - handle = ocfs2_start_trans(osb, - OCFS2_XATTR_BLOCK_CREATE_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out; - } ret = ocfs2_journal_access(handle, inode, xs->inode_bh, OCFS2_JOURNAL_ACCESS_CREATE); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto end; } - ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, + ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1, &suballoc_bit_start, &num_got, &first_blkno); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto end; } new_bh = sb_getblk(inode->i_sb, first_blkno); @@ -1856,7 +2076,7 @@ static int ocfs2_xattr_block_set(struct inode *inode, OCFS2_JOURNAL_ACCESS_CREATE); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto end; } /* Initialize ocfs2_xattr_block */ @@ -1874,44 +2094,512 @@ static int ocfs2_xattr_block_set(struct inode *inode, xs->end = (void *)xblk + inode->i_sb->s_blocksize; xs->here = xs->header->xh_entries; - ret = ocfs2_journal_dirty(handle, new_bh); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto end; } di->i_xattr_loc = cpu_to_le64(first_blkno); - ret = ocfs2_journal_dirty(handle, xs->inode_bh); - if (ret < 0) - mlog_errno(ret); -out_commit: - ocfs2_commit_trans(osb, handle); -out: - if (meta_ac) - ocfs2_free_alloc_context(meta_ac); - if (ret < 0) - return ret; + ocfs2_journal_dirty(handle, xs->inode_bh); } else xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { /* Set extended attribute into external block */ - ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL); + ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt, + OCFS2_HAS_XATTR_FL); if (!ret || ret != -ENOSPC) goto end; - ret = ocfs2_xattr_create_index_block(inode, xs); + ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); if (ret) goto end; } - ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs); + ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt); end: return ret; } +/* Check whether the new xattr can be inserted into the inode. */ +static int ocfs2_xattr_can_be_in_inode(struct inode *inode, + struct ocfs2_xattr_info *xi, + struct ocfs2_xattr_search *xs) +{ + u64 value_size; + struct ocfs2_xattr_entry *last; + int free, i; + size_t min_offs = xs->end - xs->base; + + if (!xs->header) + return 0; + + last = xs->header->xh_entries; + + for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { + size_t offs = le16_to_cpu(last->xe_name_offset); + if (offs < min_offs) + min_offs = offs; + last += 1; + } + + free = min_offs - ((void *)last - xs->base) - sizeof(__u32); + if (free < 0) + return 0; + + BUG_ON(!xs->not_found); + + if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) + value_size = OCFS2_XATTR_ROOT_SIZE; + else + value_size = OCFS2_XATTR_SIZE(xi->value_len); + + if (free >= sizeof(struct ocfs2_xattr_entry) + + OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size) + return 1; + + return 0; +} + +static int ocfs2_calc_xattr_set_need(struct inode *inode, + struct ocfs2_dinode *di, + struct ocfs2_xattr_info *xi, + struct ocfs2_xattr_search *xis, + struct ocfs2_xattr_search *xbs, + int *clusters_need, + int *meta_need, + int *credits_need) +{ + int ret = 0, old_in_xb = 0; + int clusters_add = 0, meta_add = 0, credits = 0; + struct buffer_head *bh = NULL; + struct ocfs2_xattr_block *xb = NULL; + struct ocfs2_xattr_entry *xe = NULL; + struct ocfs2_xattr_value_root *xv = NULL; + char *base = NULL; + int name_offset, name_len = 0; + u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, + xi->value_len); + u64 value_size; + + if (xis->not_found && xbs->not_found) { + credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); + + if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { + clusters_add += new_clusters; + credits += ocfs2_calc_extend_credits(inode->i_sb, + &def_xv.xv.xr_list, + new_clusters); + } + + goto meta_guess; + } + + if (!xis->not_found) { + xe = xis->here; + name_offset = le16_to_cpu(xe->xe_name_offset); + name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); + base = xis->base; + credits += OCFS2_INODE_UPDATE_CREDITS; + } else { + int i, block_off = 0; + xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; + xe = xbs->here; + name_offset = le16_to_cpu(xe->xe_name_offset); + name_len = OCFS2_XATTR_SIZE(xe->xe_name_len); + i = xbs->here - xbs->header->xh_entries; + old_in_xb = 1; + + if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { + ret = ocfs2_xattr_bucket_get_name_value(inode, + bucket_xh(xbs->bucket), + i, &block_off, + &name_offset); + base = bucket_block(xbs->bucket, block_off); + credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); + } else { + base = xbs->base; + credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS; + } + } + + /* + * delete a xattr doesn't need metadata and cluster allocation. + * so just calculate the credits and return. + * + * The credits for removing the value tree will be extended + * by ocfs2_remove_extent itself. + */ + if (!xi->value) { + if (!ocfs2_xattr_is_local(xe)) + credits += ocfs2_remove_extent_credits(inode->i_sb); + + goto out; + } + + /* do cluster allocation guess first. */ + value_size = le64_to_cpu(xe->xe_value_size); + + if (old_in_xb) { + /* + * In xattr set, we always try to set the xe in inode first, + * so if it can be inserted into inode successfully, the old + * one will be removed from the xattr block, and this xattr + * will be inserted into inode as a new xattr in inode. + */ + if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) { + clusters_add += new_clusters; + credits += ocfs2_remove_extent_credits(inode->i_sb) + + OCFS2_INODE_UPDATE_CREDITS; + if (!ocfs2_xattr_is_local(xe)) + credits += ocfs2_calc_extend_credits( + inode->i_sb, + &def_xv.xv.xr_list, + new_clusters); + goto out; + } + } + + if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { + /* the new values will be stored outside. */ + u32 old_clusters = 0; + + if (!ocfs2_xattr_is_local(xe)) { + old_clusters = ocfs2_clusters_for_bytes(inode->i_sb, + value_size); + xv = (struct ocfs2_xattr_value_root *) + (base + name_offset + name_len); + value_size = OCFS2_XATTR_ROOT_SIZE; + } else + xv = &def_xv.xv; + + if (old_clusters >= new_clusters) { + credits += ocfs2_remove_extent_credits(inode->i_sb); + goto out; + } else { + meta_add += ocfs2_extend_meta_needed(&xv->xr_list); + clusters_add += new_clusters - old_clusters; + credits += ocfs2_calc_extend_credits(inode->i_sb, + &xv->xr_list, + new_clusters - + old_clusters); + if (value_size >= OCFS2_XATTR_ROOT_SIZE) + goto out; + } + } else { + /* + * Now the new value will be stored inside. So if the new + * value is smaller than the size of value root or the old + * value, we don't need any allocation, otherwise we have + * to guess metadata allocation. + */ + if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) || + (!ocfs2_xattr_is_local(xe) && + OCFS2_XATTR_ROOT_SIZE >= xi->value_len)) + goto out; + } + +meta_guess: + /* calculate metadata allocation. */ + if (di->i_xattr_loc) { + if (!xbs->xattr_bh) { + ret = ocfs2_read_xattr_block(inode, + le64_to_cpu(di->i_xattr_loc), + &bh); + if (ret) { + mlog_errno(ret); + goto out; + } + + xb = (struct ocfs2_xattr_block *)bh->b_data; + } else + xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data; + + if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { + struct ocfs2_extent_list *el = + &xb->xb_attrs.xb_root.xt_list; + meta_add += ocfs2_extend_meta_needed(el); + credits += ocfs2_calc_extend_credits(inode->i_sb, + el, 1); + } + + /* + * This cluster will be used either for new bucket or for + * new xattr block. + * If the cluster size is the same as the bucket size, one + * more is needed since we may need to extend the bucket + * also. + */ + clusters_add += 1; + credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); + if (OCFS2_XATTR_BUCKET_SIZE == + OCFS2_SB(inode->i_sb)->s_clustersize) { + credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb); + clusters_add += 1; + } + } else { + meta_add += 1; + credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; + } +out: + if (clusters_need) + *clusters_need = clusters_add; + if (meta_need) + *meta_need = meta_add; + if (credits_need) + *credits_need = credits; + brelse(bh); + return ret; +} + +static int ocfs2_init_xattr_set_ctxt(struct inode *inode, + struct ocfs2_dinode *di, + struct ocfs2_xattr_info *xi, + struct ocfs2_xattr_search *xis, + struct ocfs2_xattr_search *xbs, + struct ocfs2_xattr_set_ctxt *ctxt, + int *credits) +{ + int clusters_add, meta_add, ret; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt)); + + ocfs2_init_dealloc_ctxt(&ctxt->dealloc); + + ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs, + &clusters_add, &meta_add, credits); + if (ret) { + mlog_errno(ret); + return ret; + } + + mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, " + "credits = %d\n", xi->name, meta_add, clusters_add, *credits); + + if (meta_add) { + ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, + &ctxt->meta_ac); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + if (clusters_add) { + ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac); + if (ret) + mlog_errno(ret); + } +out: + if (ret) { + if (ctxt->meta_ac) { + ocfs2_free_alloc_context(ctxt->meta_ac); + ctxt->meta_ac = NULL; + } + + /* + * We cannot have an error and a non null ctxt->data_ac. + */ + } + + return ret; +} + +static int __ocfs2_xattr_set_handle(struct inode *inode, + struct ocfs2_dinode *di, + struct ocfs2_xattr_info *xi, + struct ocfs2_xattr_search *xis, + struct ocfs2_xattr_search *xbs, + struct ocfs2_xattr_set_ctxt *ctxt) +{ + int ret = 0, credits, old_found; + + if (!xi->value) { + /* Remove existing extended attribute */ + if (!xis->not_found) + ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); + else if (!xbs->not_found) + ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); + } else { + /* We always try to set extended attribute into inode first*/ + ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt); + if (!ret && !xbs->not_found) { + /* + * If succeed and that extended attribute existing in + * external block, then we will remove it. + */ + xi->value = NULL; + xi->value_len = 0; + + old_found = xis->not_found; + xis->not_found = -ENODATA; + ret = ocfs2_calc_xattr_set_need(inode, + di, + xi, + xis, + xbs, + NULL, + NULL, + &credits); + xis->not_found = old_found; + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_extend_trans(ctxt->handle, credits + + ctxt->handle->h_buffer_credits); + if (ret) { + mlog_errno(ret); + goto out; + } + ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); + } else if (ret == -ENOSPC) { + if (di->i_xattr_loc && !xbs->xattr_bh) { + ret = ocfs2_xattr_block_find(inode, + xi->name_index, + xi->name, xbs); + if (ret) + goto out; + + old_found = xis->not_found; + xis->not_found = -ENODATA; + ret = ocfs2_calc_xattr_set_need(inode, + di, + xi, + xis, + xbs, + NULL, + NULL, + &credits); + xis->not_found = old_found; + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_extend_trans(ctxt->handle, credits + + ctxt->handle->h_buffer_credits); + if (ret) { + mlog_errno(ret); + goto out; + } + } + /* + * If no space in inode, we will set extended attribute + * into external block. + */ + ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); + if (ret) + goto out; + if (!xis->not_found) { + /* + * If succeed and that extended attribute + * existing in inode, we will remove it. + */ + xi->value = NULL; + xi->value_len = 0; + xbs->not_found = -ENODATA; + ret = ocfs2_calc_xattr_set_need(inode, + di, + xi, + xis, + xbs, + NULL, + NULL, + &credits); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_extend_trans(ctxt->handle, credits + + ctxt->handle->h_buffer_credits); + if (ret) { + mlog_errno(ret); + goto out; + } + ret = ocfs2_xattr_ibody_set(inode, xi, + xis, ctxt); + } + } + } + +out: + return ret; +} + +/* + * This function only called duing creating inode + * for init security/acl xattrs of the new inode. + * The xattrs could be put into ibody or extent block, + * xattr bucket would not be use in this case. + * transanction credits also be reserved in here. + */ +int ocfs2_xattr_set_handle(handle_t *handle, + struct inode *inode, + struct buffer_head *di_bh, + int name_index, + const char *name, + const void *value, + size_t value_len, + int flags, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_alloc_context *data_ac) +{ + struct ocfs2_dinode *di; + int ret; + + struct ocfs2_xattr_info xi = { + .name_index = name_index, + .name = name, + .value = value, + .value_len = value_len, + }; + + struct ocfs2_xattr_search xis = { + .not_found = -ENODATA, + }; + + struct ocfs2_xattr_search xbs = { + .not_found = -ENODATA, + }; + + struct ocfs2_xattr_set_ctxt ctxt = { + .handle = handle, + .meta_ac = meta_ac, + .data_ac = data_ac, + }; + + if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) + return -EOPNOTSUPP; + + xis.inode_bh = xbs.inode_bh = di_bh; + di = (struct ocfs2_dinode *)di_bh->b_data; + + down_write(&OCFS2_I(inode)->ip_xattr_sem); + + ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); + if (ret) + goto cleanup; + if (xis.not_found) { + ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); + if (ret) + goto cleanup; + } + + ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); + +cleanup: + up_write(&OCFS2_I(inode)->ip_xattr_sem); + brelse(xbs.xattr_bh); + + return ret; +} + /* * ocfs2_xattr_set() * @@ -1928,8 +2616,10 @@ int ocfs2_xattr_set(struct inode *inode, { struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di; - int ret; - u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + int ret, credits; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct inode *tl_inode = osb->osb_tl_inode; + struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; struct ocfs2_xattr_info xi = { .name_index = name_index, @@ -1949,10 +2639,20 @@ int ocfs2_xattr_set(struct inode *inode, if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP; + /* + * Only xbs will be used on indexed trees. xis doesn't need a + * bucket. + */ + xbs.bucket = ocfs2_xattr_bucket_new(inode); + if (!xbs.bucket) { + mlog_errno(-ENOMEM); + return -ENOMEM; + } + ret = ocfs2_inode_lock(inode, &di_bh, 1); if (ret < 0) { mlog_errno(ret); - return ret; + goto cleanup_nolock; } xis.inode_bh = xbs.inode_bh = di_bh; di = (struct ocfs2_dinode *)di_bh->b_data; @@ -1984,55 +2684,51 @@ int ocfs2_xattr_set(struct inode *inode, goto cleanup; } - if (!value) { - /* Remove existing extended attribute */ - if (!xis.not_found) - ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); - else if (!xbs.not_found) - ret = ocfs2_xattr_block_set(inode, &xi, &xbs); - } else { - /* We always try to set extended attribute into inode first*/ - ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); - if (!ret && !xbs.not_found) { - /* - * If succeed and that extended attribute existing in - * external block, then we will remove it. - */ - xi.value = NULL; - xi.value_len = 0; - ret = ocfs2_xattr_block_set(inode, &xi, &xbs); - } else if (ret == -ENOSPC) { - if (di->i_xattr_loc && !xbs.xattr_bh) { - ret = ocfs2_xattr_block_find(inode, name_index, - name, &xbs); - if (ret) - goto cleanup; - } - /* - * If no space in inode, we will set extended attribute - * into external block. - */ - ret = ocfs2_xattr_block_set(inode, &xi, &xbs); - if (ret) - goto cleanup; - if (!xis.not_found) { - /* - * If succeed and that extended attribute - * existing in inode, we will remove it. - */ - xi.value = NULL; - xi.value_len = 0; - ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); - } + + mutex_lock(&tl_inode->i_mutex); + + if (ocfs2_truncate_log_needs_flush(osb)) { + ret = __ocfs2_flush_truncate_log(osb); + if (ret < 0) { + mutex_unlock(&tl_inode->i_mutex); + mlog_errno(ret); + goto cleanup; } } + mutex_unlock(&tl_inode->i_mutex); + + ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, + &xbs, &ctxt, &credits); + if (ret) { + mlog_errno(ret); + goto cleanup; + } + + ctxt.handle = ocfs2_start_trans(osb, credits); + if (IS_ERR(ctxt.handle)) { + ret = PTR_ERR(ctxt.handle); + mlog_errno(ret); + goto cleanup; + } + + ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); + + ocfs2_commit_trans(osb, ctxt.handle); + + if (ctxt.data_ac) + ocfs2_free_alloc_context(ctxt.data_ac); + if (ctxt.meta_ac) + ocfs2_free_alloc_context(ctxt.meta_ac); + if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) + ocfs2_schedule_truncate_log_flush(osb, 1); + ocfs2_run_deallocs(osb, &ctxt.dealloc); cleanup: up_write(&OCFS2_I(inode)->ip_xattr_sem); ocfs2_inode_unlock(inode, 1); +cleanup_nolock: brelse(di_bh); brelse(xbs.xattr_bh); - for (i = 0; i < blk_per_bucket; i++) - brelse(xbs.bucket.bhs[i]); + ocfs2_xattr_bucket_free(xbs.bucket); return ret; } @@ -2107,7 +2803,7 @@ typedef int (xattr_bucket_func)(struct inode *inode, void *para); static int ocfs2_find_xe_in_bucket(struct inode *inode, - struct buffer_head *header_bh, + struct ocfs2_xattr_bucket *bucket, int name_index, const char *name, u32 name_hash, @@ -2115,11 +2811,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode, int *found) { int i, ret = 0, cmp = 1, block_off, new_offset; - struct ocfs2_xattr_header *xh = - (struct ocfs2_xattr_header *)header_bh->b_data; + struct ocfs2_xattr_header *xh = bucket_xh(bucket); size_t name_len = strlen(name); struct ocfs2_xattr_entry *xe = NULL; - struct buffer_head *name_bh = NULL; char *xe_name; /* @@ -2150,19 +2844,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode, break; } - ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off, - &name_bh); - if (ret) { - mlog_errno(ret); - break; - } - xe_name = name_bh->b_data + new_offset; - - cmp = memcmp(name, xe_name, name_len); - brelse(name_bh); - name_bh = NULL; - if (cmp == 0) { + xe_name = bucket_block(bucket, block_off) + new_offset; + if (!memcmp(name, xe_name, name_len)) { *xe_index = i; *found = 1; ret = 0; @@ -2192,39 +2876,42 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, struct ocfs2_xattr_search *xs) { int ret, found = 0; - struct buffer_head *bh = NULL; - struct buffer_head *lower_bh = NULL; struct ocfs2_xattr_header *xh = NULL; struct ocfs2_xattr_entry *xe = NULL; u16 index = 0; u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); int low_bucket = 0, bucket, high_bucket; + struct ocfs2_xattr_bucket *search; u32 last_hash; - u64 blkno; + u64 blkno, lower_blkno = 0; + + search = ocfs2_xattr_bucket_new(inode); + if (!search) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } - ret = ocfs2_read_block(inode, p_blkno, &bh); + ret = ocfs2_read_xattr_bucket(search, p_blkno); if (ret) { mlog_errno(ret); goto out; } - xh = (struct ocfs2_xattr_header *)bh->b_data; + xh = bucket_xh(search); high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; - while (low_bucket <= high_bucket) { - brelse(bh); - bh = NULL; - bucket = (low_bucket + high_bucket) / 2; + ocfs2_xattr_bucket_relse(search); + bucket = (low_bucket + high_bucket) / 2; blkno = p_blkno + bucket * blk_per_bucket; - - ret = ocfs2_read_block(inode, blkno, &bh); + ret = ocfs2_read_xattr_bucket(search, blkno); if (ret) { mlog_errno(ret); goto out; } - xh = (struct ocfs2_xattr_header *)bh->b_data; + xh = bucket_xh(search); xe = &xh->xh_entries[0]; if (name_hash < le32_to_cpu(xe->xe_name_hash)) { high_bucket = bucket - 1; @@ -2241,10 +2928,8 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, last_hash = le32_to_cpu(xe->xe_name_hash); - /* record lower_bh which may be the insert place. */ - brelse(lower_bh); - lower_bh = bh; - bh = NULL; + /* record lower_blkno which may be the insert place. */ + lower_blkno = blkno; if (name_hash > le32_to_cpu(xe->xe_name_hash)) { low_bucket = bucket + 1; @@ -2252,7 +2937,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, } /* the searched xattr should reside in this bucket if exists. */ - ret = ocfs2_find_xe_in_bucket(inode, lower_bh, + ret = ocfs2_find_xe_in_bucket(inode, search, name_index, name, name_hash, &index, &found); if (ret) { @@ -2267,46 +2952,29 @@ static int ocfs2_xattr_bucket_find(struct inode *inode, * When the xattr's hash value is in the gap of 2 buckets, we will * always set it to the previous bucket. */ - if (!lower_bh) { - /* - * We can't find any bucket whose first name_hash is less - * than the find name_hash. - */ - BUG_ON(bh->b_blocknr != p_blkno); - lower_bh = bh; - bh = NULL; + if (!lower_blkno) + lower_blkno = p_blkno; + + /* This should be in cache - we just read it during the search */ + ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno); + if (ret) { + mlog_errno(ret); + goto out; } - xs->bucket.bhs[0] = lower_bh; - xs->bucket.xh = (struct ocfs2_xattr_header *) - xs->bucket.bhs[0]->b_data; - lower_bh = NULL; - xs->header = xs->bucket.xh; - xs->base = xs->bucket.bhs[0]->b_data; + xs->header = bucket_xh(xs->bucket); + xs->base = bucket_block(xs->bucket, 0); xs->end = xs->base + inode->i_sb->s_blocksize; if (found) { - /* - * If we have found the xattr enty, read all the blocks in - * this bucket. - */ - ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1, - blk_per_bucket - 1, &xs->bucket.bhs[1], - 0); - if (ret) { - mlog_errno(ret); - goto out; - } - xs->here = &xs->header->xh_entries[index]; mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, - (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index); + (unsigned long long)bucket_blkno(xs->bucket), index); } else ret = -ENODATA; out: - brelse(bh); - brelse(lower_bh); + ocfs2_xattr_bucket_free(search); return ret; } @@ -2357,53 +3025,50 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode, xattr_bucket_func *func, void *para) { - int i, j, ret = 0; - int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + int i, ret = 0; u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); u32 num_buckets = clusters * bpc; - struct ocfs2_xattr_bucket bucket; + struct ocfs2_xattr_bucket *bucket; - memset(&bucket, 0, sizeof(bucket)); + bucket = ocfs2_xattr_bucket_new(inode); + if (!bucket) { + mlog_errno(-ENOMEM); + return -ENOMEM; + } mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", clusters, (unsigned long long)blkno); - for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { - ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, - bucket.bhs, 0); + for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) { + ret = ocfs2_read_xattr_bucket(bucket, blkno); if (ret) { mlog_errno(ret); - goto out; + break; } - bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data; /* * The real bucket num in this series of blocks is stored * in the 1st bucket. */ if (i == 0) - num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); + num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets); mlog(0, "iterating xattr bucket %llu, first hash %u\n", (unsigned long long)blkno, - le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash)); + le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash)); if (func) { - ret = func(inode, &bucket, para); - if (ret) { - mlog_errno(ret); - break; - } + ret = func(inode, bucket, para); + if (ret) + mlog_errno(ret); + /* Fall through to bucket_relse() */ } - for (j = 0; j < blk_per_bucket; j++) - brelse(bucket.bhs[j]); - memset(&bucket, 0, sizeof(bucket)); + ocfs2_xattr_bucket_relse(bucket); + if (ret) + break; } -out: - for (j = 0; j < blk_per_bucket; j++) - brelse(bucket.bhs[j]); - + ocfs2_xattr_bucket_free(bucket); return ret; } @@ -2441,21 +3106,21 @@ static int ocfs2_list_xattr_bucket(struct inode *inode, int i, block_off, new_offset; const char *prefix, *name; - for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) { - struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i]; + for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) { + struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i]; type = ocfs2_xattr_get_type(entry); prefix = ocfs2_xattr_prefix(type); if (prefix) { ret = ocfs2_xattr_bucket_get_name_value(inode, - bucket->xh, + bucket_xh(bucket), i, &block_off, &new_offset); if (ret) break; - name = (const char *)bucket->bhs[block_off]->b_data + + name = (const char *)bucket_block(bucket, block_off) + new_offset; ret = ocfs2_xattr_list_entry(xl->buffer, xl->buffer_size, @@ -2540,32 +3205,34 @@ static void swap_xe(void *a, void *b, int size) /* * When the ocfs2_xattr_block is filled up, new bucket will be created * and all the xattr entries will be moved to the new bucket. + * The header goes at the start of the bucket, and the names+values are + * filled from the end. This is why *target starts as the last buffer. * Note: we need to sort the entries since they are not saved in order * in the ocfs2_xattr_block. */ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, struct buffer_head *xb_bh, - struct buffer_head *xh_bh, - struct buffer_head *data_bh) + struct ocfs2_xattr_bucket *bucket) { int i, blocksize = inode->i_sb->s_blocksize; + int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb); u16 offset, size, off_change; struct ocfs2_xattr_entry *xe; struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)xb_bh->b_data; struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; - struct ocfs2_xattr_header *xh = - (struct ocfs2_xattr_header *)xh_bh->b_data; + struct ocfs2_xattr_header *xh = bucket_xh(bucket); u16 count = le16_to_cpu(xb_xh->xh_count); - char *target = xh_bh->b_data, *src = xb_bh->b_data; + char *src = xb_bh->b_data; + char *target = bucket_block(bucket, blks - 1); mlog(0, "cp xattr from block %llu to bucket %llu\n", (unsigned long long)xb_bh->b_blocknr, - (unsigned long long)xh_bh->b_blocknr); + (unsigned long long)bucket_blkno(bucket)); + + for (i = 0; i < blks; i++) + memset(bucket_block(bucket, i), 0, blocksize); - memset(xh_bh->b_data, 0, blocksize); - if (data_bh) - memset(data_bh->b_data, 0, blocksize); /* * Since the xe_name_offset is based on ocfs2_xattr_header, * there is a offset change corresponding to the change of @@ -2577,8 +3244,6 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, size = blocksize - offset; /* copy all the names and values. */ - if (data_bh) - target = data_bh->b_data; memcpy(target + offset, src + offset, size); /* Init new header now. */ @@ -2588,7 +3253,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); /* copy all the entries. */ - target = xh_bh->b_data; + target = bucket_block(bucket, 0); offset = offsetof(struct ocfs2_xattr_header, xh_entries); size = count * sizeof(struct ocfs2_xattr_entry); memcpy(target + offset, (char *)xb_xh + offset, size); @@ -2614,73 +3279,53 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, * While if the entry is in index b-tree, "bucket" indicates the * real place of the xattr. */ -static int ocfs2_xattr_update_xattr_search(struct inode *inode, - struct ocfs2_xattr_search *xs, - struct buffer_head *old_bh, - struct buffer_head *new_bh) +static void ocfs2_xattr_update_xattr_search(struct inode *inode, + struct ocfs2_xattr_search *xs, + struct buffer_head *old_bh) { - int ret = 0; char *buf = old_bh->b_data; struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; - int i, blocksize = inode->i_sb->s_blocksize; - u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - - xs->bucket.bhs[0] = new_bh; - get_bh(new_bh); - xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data; - xs->header = xs->bucket.xh; + int i; - xs->base = new_bh->b_data; + xs->header = bucket_xh(xs->bucket); + xs->base = bucket_block(xs->bucket, 0); xs->end = xs->base + inode->i_sb->s_blocksize; - if (!xs->not_found) { - if (OCFS2_XATTR_BUCKET_SIZE != blocksize) { - ret = ocfs2_read_blocks(inode, - xs->bucket.bhs[0]->b_blocknr + 1, - blk_per_bucket - 1, &xs->bucket.bhs[1], - 0); - if (ret) { - mlog_errno(ret); - return ret; - } + if (xs->not_found) + return; - i = xs->here - old_xh->xh_entries; - xs->here = &xs->header->xh_entries[i]; - } + /* + * If a bucket is more than one block, the name+value moved when + * we went to a bucket. + */ + if (xs->bucket->bu_blocks > 1) { + i = xs->here - old_xh->xh_entries; + xs->here = &xs->header->xh_entries[i]; } - - return ret; } static int ocfs2_xattr_create_index_block(struct inode *inode, - struct ocfs2_xattr_search *xs) + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt) { - int ret, credits = OCFS2_SUBALLOC_ALLOC; + int ret; u32 bit_off, len; u64 blkno; - handle_t *handle; + handle_t *handle = ctxt->handle; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_inode_info *oi = OCFS2_I(inode); - struct ocfs2_alloc_context *data_ac; - struct buffer_head *xh_bh = NULL, *data_bh = NULL; struct buffer_head *xb_bh = xs->xattr_bh; struct ocfs2_xattr_block *xb = (struct ocfs2_xattr_block *)xb_bh->b_data; struct ocfs2_xattr_tree_root *xr; u16 xb_flags = le16_to_cpu(xb->xb_flags); - u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb); mlog(0, "create xattr index block for %llu\n", (unsigned long long)xb_bh->b_blocknr); BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); - - ret = ocfs2_reserve_clusters(osb, 1, &data_ac); - if (ret) { - mlog_errno(ret); - goto out; - } + BUG_ON(!xs->bucket); /* * XXX: @@ -2689,29 +3334,18 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, */ down_write(&oi->ip_alloc_sem); - /* - * 3 more credits, one for xattr block update, one for the 1st block - * of the new xattr bucket and one for the value/data. - */ - credits += 3; - handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out_sem; - } - ret = ocfs2_journal_access(handle, inode, xb_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } - ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len); + ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, + 1, 1, &bit_off, &len); if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } /* @@ -2724,51 +3358,23 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, mlog(0, "allocate 1 cluster from %llu to xattr block\n", (unsigned long long)blkno); - xh_bh = sb_getblk(inode->i_sb, blkno); - if (!xh_bh) { - ret = -EIO; + ret = ocfs2_init_xattr_bucket(xs->bucket, blkno); + if (ret) { mlog_errno(ret); - goto out_commit; + goto out; } - ocfs2_set_new_buffer_uptodate(inode, xh_bh); - - ret = ocfs2_journal_access(handle, inode, xh_bh, - OCFS2_JOURNAL_ACCESS_CREATE); + ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, + OCFS2_JOURNAL_ACCESS_CREATE); if (ret) { mlog_errno(ret); - goto out_commit; - } - - if (bpb > 1) { - data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1); - if (!data_bh) { - ret = -EIO; - mlog_errno(ret); - goto out_commit; - } - - ocfs2_set_new_buffer_uptodate(inode, data_bh); - - ret = ocfs2_journal_access(handle, inode, data_bh, - OCFS2_JOURNAL_ACCESS_CREATE); - if (ret) { - mlog_errno(ret); - goto out_commit; - } + goto out; } - ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh); + ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket); + ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); - ocfs2_journal_dirty(handle, xh_bh); - if (data_bh) - ocfs2_journal_dirty(handle, data_bh); - - ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh); - if (ret) { - mlog_errno(ret); - goto out_commit; - } + ocfs2_xattr_update_xattr_search(inode, xs, xb_bh); /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - @@ -2787,24 +3393,10 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); - ret = ocfs2_journal_dirty(handle, xb_bh); - if (ret) { - mlog_errno(ret); - goto out_commit; - } - -out_commit: - ocfs2_commit_trans(osb, handle); - -out_sem: - up_write(&oi->ip_alloc_sem); + ocfs2_journal_dirty(handle, xb_bh); out: - if (data_ac) - ocfs2_free_alloc_context(data_ac); - - brelse(xh_bh); - brelse(data_bh); + up_write(&oi->ip_alloc_sem); return ret; } @@ -2829,29 +3421,18 @@ static int cmp_xe_offset(const void *a, const void *b) * so that we can spare some space for insertion. */ static int ocfs2_defrag_xattr_bucket(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_bucket *bucket) { int ret, i; size_t end, offset, len, value_len; struct ocfs2_xattr_header *xh; char *entries, *buf, *bucket_buf = NULL; - u64 blkno = bucket->bhs[0]->b_blocknr; - u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); + u64 blkno = bucket_blkno(bucket); u16 xh_free_start; size_t blocksize = inode->i_sb->s_blocksize; - handle_t *handle; - struct buffer_head **bhs; struct ocfs2_xattr_entry *xe; - bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, - GFP_NOFS); - if (!bhs) - return -ENOMEM; - - ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0); - if (ret) - goto out; - /* * In order to make the operation more efficient and generic, * we copy all the blocks into a contiguous memory and do the @@ -2865,26 +3446,16 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, } buf = bucket_buf; - for (i = 0; i < blk_per_bucket; i++, buf += blocksize) - memcpy(buf, bhs[i]->b_data, blocksize); + for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) + memcpy(buf, bucket_block(bucket, i), blocksize); - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; + ret = ocfs2_xattr_bucket_journal_access(handle, bucket, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret < 0) { mlog_errno(ret); goto out; } - for (i = 0; i < blk_per_bucket; i++) { - ret = ocfs2_journal_access(handle, inode, bhs[i], - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret < 0) { - mlog_errno(ret); - goto commit; - } - } - xh = (struct ocfs2_xattr_header *)bucket_buf; entries = (char *)xh->xh_entries; xh_free_start = le16_to_cpu(xh->xh_free_start); @@ -2940,7 +3511,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, "bucket %llu\n", (unsigned long long)blkno); if (xh_free_start == end) - goto commit; + goto out; memset(bucket_buf + xh_free_start, 0, end - xh_free_start); xh->xh_free_start = cpu_to_le16(end); @@ -2951,21 +3522,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, cmp_xe, swap_xe); buf = bucket_buf; - for (i = 0; i < blk_per_bucket; i++, buf += blocksize) { - memcpy(bhs[i]->b_data, buf, blocksize); - ocfs2_journal_dirty(handle, bhs[i]); - } + for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) + memcpy(bucket_block(bucket, i), buf, blocksize); + ocfs2_xattr_bucket_journal_dirty(handle, bucket); -commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: - - if (bhs) { - for (i = 0; i < blk_per_bucket; i++) - brelse(bhs[i]); - } - kfree(bhs); - kfree(bucket_buf); return ret; } @@ -3015,7 +3576,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, * 1 more for the update of the 1st bucket of the previous * extent record. */ - credits = bpc / 2 + 1; + credits = bpc / 2 + 1 + handle->h_buffer_credits; ret = ocfs2_extend_trans(handle, credits); if (ret) { mlog_errno(ret); @@ -3048,7 +3609,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, goto out; } - ret = ocfs2_read_block(inode, prev_blkno, &old_bh); + ret = ocfs2_read_block(inode, prev_blkno, &old_bh, NULL); if (ret < 0) { mlog_errno(ret); brelse(new_bh); @@ -3092,31 +3653,6 @@ out: return ret; } -static int ocfs2_read_xattr_bucket(struct inode *inode, - u64 blkno, - struct buffer_head **bhs, - int new) -{ - int ret = 0; - u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - - if (!new) - return ocfs2_read_blocks(inode, blkno, - blk_per_bucket, bhs, 0); - - for (i = 0; i < blk_per_bucket; i++) { - bhs[i] = sb_getblk(inode->i_sb, blkno + i); - if (bhs[i] == NULL) { - ret = -EIO; - mlog_errno(ret); - break; - } - ocfs2_set_new_buffer_uptodate(inode, bhs[i]); - } - - return ret; -} - /* * Find the suitable pos when we divide a bucket into 2. * We have to make sure the xattrs with the same hash value exist @@ -3178,8 +3714,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, { int ret, i; int count, start, len, name_value_len = 0, xe_len, name_offset = 0; - u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - struct buffer_head **s_bhs, **t_bhs = NULL; + struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; struct ocfs2_xattr_header *xh; struct ocfs2_xattr_entry *xe; int blocksize = inode->i_sb->s_blocksize; @@ -3187,47 +3722,47 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, mlog(0, "move some of xattrs from bucket %llu to %llu\n", (unsigned long long)blk, (unsigned long long)new_blk); - s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); - if (!s_bhs) - return -ENOMEM; - - ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0); - if (ret) { + s_bucket = ocfs2_xattr_bucket_new(inode); + t_bucket = ocfs2_xattr_bucket_new(inode); + if (!s_bucket || !t_bucket) { + ret = -ENOMEM; mlog_errno(ret); goto out; } - ret = ocfs2_journal_access(handle, inode, s_bhs[0], - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_read_xattr_bucket(s_bucket, blk); if (ret) { mlog_errno(ret); goto out; } - t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); - if (!t_bhs) { - ret = -ENOMEM; + ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); goto out; } - ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head); + /* + * Even if !new_bucket_head, we're overwriting t_bucket. Thus, + * there's no need to read it. + */ + ret = ocfs2_init_xattr_bucket(t_bucket, new_blk); if (ret) { mlog_errno(ret); goto out; } - for (i = 0; i < blk_per_bucket; i++) { - ret = ocfs2_journal_access(handle, inode, t_bhs[i], - new_bucket_head ? - OCFS2_JOURNAL_ACCESS_CREATE : - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto out; - } + ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, + new_bucket_head ? + OCFS2_JOURNAL_ACCESS_CREATE : + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; } - xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; + xh = bucket_xh(s_bucket); count = le16_to_cpu(xh->xh_count); start = ocfs2_xattr_find_divide_pos(xh); @@ -3239,10 +3774,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, * The hash value is set as one larger than * that of the last entry in the previous bucket. */ - for (i = 0; i < blk_per_bucket; i++) - memset(t_bhs[i]->b_data, 0, blocksize); + for (i = 0; i < t_bucket->bu_blocks; i++) + memset(bucket_block(t_bucket, i), 0, blocksize); - xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; + xh = bucket_xh(t_bucket); xh->xh_free_start = cpu_to_le16(blocksize); xh->xh_entries[0].xe_name_hash = xe->xe_name_hash; le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1); @@ -3251,11 +3786,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, } /* copy the whole bucket to the new first. */ - for (i = 0; i < blk_per_bucket; i++) - memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); + ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); /* update the new bucket. */ - xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; + xh = bucket_xh(t_bucket); /* * Calculate the total name/value len and xh_free_start for @@ -3319,11 +3853,7 @@ set_num_buckets: else xh->xh_num_buckets = 0; - for (i = 0; i < blk_per_bucket; i++) { - ocfs2_journal_dirty(handle, t_bhs[i]); - if (ret) - mlog_errno(ret); - } + ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); /* store the first_hash of the new bucket. */ if (first_hash) @@ -3337,29 +3867,18 @@ set_num_buckets: if (start == count) goto out; - xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; + xh = bucket_xh(s_bucket); memset(&xh->xh_entries[start], 0, sizeof(struct ocfs2_xattr_entry) * (count - start)); xh->xh_count = cpu_to_le16(start); xh->xh_free_start = cpu_to_le16(name_offset); xh->xh_name_value_len = cpu_to_le16(name_value_len); - ocfs2_journal_dirty(handle, s_bhs[0]); - if (ret) - mlog_errno(ret); + ocfs2_xattr_bucket_journal_dirty(handle, s_bucket); out: - if (s_bhs) { - for (i = 0; i < blk_per_bucket; i++) - brelse(s_bhs[i]); - } - kfree(s_bhs); - - if (t_bhs) { - for (i = 0; i < blk_per_bucket; i++) - brelse(t_bhs[i]); - } - kfree(t_bhs); + ocfs2_xattr_bucket_free(s_bucket); + ocfs2_xattr_bucket_free(t_bucket); return ret; } @@ -3376,10 +3895,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, u64 t_blkno, int t_is_new) { - int ret, i; - int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - int blocksize = inode->i_sb->s_blocksize; - struct buffer_head **s_bhs, **t_bhs = NULL; + int ret; + struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL; BUG_ON(s_blkno == t_blkno); @@ -3387,52 +3904,39 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, (unsigned long long)s_blkno, (unsigned long long)t_blkno, t_is_new); - s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, - GFP_NOFS); - if (!s_bhs) - return -ENOMEM; - - ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0); - if (ret) - goto out; - - t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, - GFP_NOFS); - if (!t_bhs) { + s_bucket = ocfs2_xattr_bucket_new(inode); + t_bucket = ocfs2_xattr_bucket_new(inode); + if (!s_bucket || !t_bucket) { ret = -ENOMEM; + mlog_errno(ret); goto out; } + + ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno); + if (ret) + goto out; - ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new); + /* + * Even if !t_is_new, we're overwriting t_bucket. Thus, + * there's no need to read it. + */ + ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno); if (ret) goto out; - for (i = 0; i < blk_per_bucket; i++) { - ret = ocfs2_journal_access(handle, inode, t_bhs[i], - t_is_new ? - OCFS2_JOURNAL_ACCESS_CREATE : - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) - goto out; - } + ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket, + t_is_new ? + OCFS2_JOURNAL_ACCESS_CREATE : + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) + goto out; - for (i = 0; i < blk_per_bucket; i++) { - memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); - ocfs2_journal_dirty(handle, t_bhs[i]); - } + ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket); + ocfs2_xattr_bucket_journal_dirty(handle, t_bucket); out: - if (s_bhs) { - for (i = 0; i < blk_per_bucket; i++) - brelse(s_bhs[i]); - } - kfree(s_bhs); - - if (t_bhs) { - for (i = 0; i < blk_per_bucket; i++) - brelse(t_bhs[i]); - } - kfree(t_bhs); + ocfs2_xattr_bucket_free(t_bucket); + ocfs2_xattr_bucket_free(s_bucket); return ret; } @@ -3464,7 +3968,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, * We need to update the new cluster and 1 more for the update of * the 1st bucket of the previous extent rec. */ - credits = bpc + 1; + credits = bpc + 1 + handle->h_buffer_credits; ret = ocfs2_extend_trans(handle, credits); if (ret) { mlog_errno(ret); @@ -3497,7 +4001,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode, ocfs2_journal_dirty(handle, first_bh); /* update the new bucket header. */ - ret = ocfs2_read_block(inode, to_blk_start, &bh); + ret = ocfs2_read_block(inode, to_blk_start, &bh, NULL); if (ret < 0) { mlog_errno(ret); goto out; @@ -3534,7 +4038,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode, u32 *first_hash) { u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - int ret, credits = 2 * blk_per_bucket; + int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits; BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); @@ -3644,16 +4148,15 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, u32 *num_clusters, u32 prev_cpos, u64 prev_blkno, - int *extend) + int *extend, + struct ocfs2_xattr_set_ctxt *ctxt) { - int ret, credits; + int ret; u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); u32 prev_clusters = *num_clusters; u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; u64 block; - handle_t *handle = NULL; - struct ocfs2_alloc_context *data_ac = NULL; - struct ocfs2_alloc_context *meta_ac = NULL; + handle_t *handle = ctxt->handle; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_extent_tree et; @@ -3664,23 +4167,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); - ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, - &data_ac, &meta_ac); - if (ret) { - mlog_errno(ret); - goto leave; - } - - credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el, - clusters_to_add); - handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; - mlog_errno(ret); - goto leave; - } - ret = ocfs2_journal_access(handle, inode, root_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { @@ -3688,7 +4174,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, goto leave; } - ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, + ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1, clusters_to_add, &bit_off, &num_bits); if (ret < 0) { if (ret != -ENOSPC) @@ -3734,41 +4220,20 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode, } } - if (handle->h_buffer_credits < credits) { - /* - * The journal has been restarted before, and don't - * have enough space for the insertion, so extend it - * here. - */ - ret = ocfs2_extend_trans(handle, credits); - if (ret) { - mlog_errno(ret); - goto leave; - } - } mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", num_bits, (unsigned long long)block, v_start); ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, - num_bits, 0, meta_ac); + num_bits, 0, ctxt->meta_ac); if (ret < 0) { mlog_errno(ret); goto leave; } ret = ocfs2_journal_dirty(handle, root_bh); - if (ret < 0) { + if (ret < 0) mlog_errno(ret); - goto leave; - } leave: - if (handle) - ocfs2_commit_trans(osb, handle); - if (data_ac) - ocfs2_free_alloc_context(data_ac); - if (meta_ac) - ocfs2_free_alloc_context(meta_ac); - return ret; } @@ -3777,6 +4242,7 @@ leave: * We meet with start_bh. Only move half of the xattrs to the bucket after it. */ static int ocfs2_extend_xattr_bucket(struct inode *inode, + handle_t *handle, struct buffer_head *first_bh, struct buffer_head *start_bh, u32 num_clusters) @@ -3786,7 +4252,6 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); u64 start_blk = start_bh->b_blocknr, end_blk; u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb); - handle_t *handle; struct ocfs2_xattr_header *first_xh = (struct ocfs2_xattr_header *)first_bh->b_data; u16 bucket = le16_to_cpu(first_xh->xh_num_buckets); @@ -3801,13 +4266,12 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, /* * We will touch all the buckets after the start_bh(include it). - * Add one more bucket and modify the first_bh. + * Then we add one more bucket. */ - credits = end_blk - start_blk + 2 * blk_per_bucket + 1; - handle = ocfs2_start_trans(osb, credits); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; + credits = end_blk - start_blk + 3 * blk_per_bucket + 1 + + handle->h_buffer_credits; + ret = ocfs2_extend_trans(handle, credits); + if (ret) { mlog_errno(ret); goto out; } @@ -3816,14 +4280,14 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto commit; + goto out; } while (end_blk != start_blk) { ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, end_blk + blk_per_bucket, 0); if (ret) - goto commit; + goto out; end_blk -= blk_per_bucket; } @@ -3834,8 +4298,6 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode, le16_add_cpu(&first_xh->xh_num_buckets, 1); ocfs2_journal_dirty(handle, first_bh); -commit: - ocfs2_commit_trans(osb, handle); out: return ret; } @@ -3851,7 +4313,8 @@ out: */ static int ocfs2_add_new_xattr_bucket(struct inode *inode, struct buffer_head *xb_bh, - struct buffer_head *header_bh) + struct buffer_head *header_bh, + struct ocfs2_xattr_set_ctxt *ctxt) { struct ocfs2_xattr_header *first_xh = NULL; struct buffer_head *first_bh = NULL; @@ -3885,7 +4348,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, goto out; } - ret = ocfs2_read_block(inode, p_blkno, &first_bh); + ret = ocfs2_read_block(inode, p_blkno, &first_bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -3902,7 +4365,8 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, &num_clusters, e_cpos, p_blkno, - &extend); + &extend, + ctxt); if (ret) { mlog_errno(ret); goto out; @@ -3911,6 +4375,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode, if (extend) ret = ocfs2_extend_xattr_bucket(inode, + ctxt->handle, first_bh, header_bh, num_clusters); @@ -3929,7 +4394,7 @@ static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode, int block_off = offs >> inode->i_sb->s_blocksize_bits; offs = offs % inode->i_sb->s_blocksize; - return bucket->bhs[block_off]->b_data + offs; + return bucket_block(bucket, block_off) + offs; } /* @@ -3984,7 +4449,7 @@ static void ocfs2_xattr_set_entry_normal(struct inode *inode, xe->xe_value_size = 0; val = ocfs2_xattr_bucket_get_val(inode, - &xs->bucket, offs); + xs->bucket, offs); memset(val + OCFS2_XATTR_SIZE(name_len), 0, size - OCFS2_XATTR_SIZE(name_len)); if (OCFS2_XATTR_SIZE(xi->value_len) > 0) @@ -4062,8 +4527,7 @@ set_new_name_value: xh->xh_free_start = cpu_to_le16(offs); } - val = ocfs2_xattr_bucket_get_val(inode, - &xs->bucket, offs - size); + val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size); xe->xe_name_offset = cpu_to_le16(offs - size); memset(val, 0, size); @@ -4079,115 +4543,62 @@ set_new_name_value: return; } -static int ocfs2_xattr_bucket_handle_journal(struct inode *inode, - handle_t *handle, - struct ocfs2_xattr_search *xs, - struct buffer_head **bhs, - u16 bh_num) -{ - int ret = 0, off, block_off; - struct ocfs2_xattr_entry *xe = xs->here; - - /* - * First calculate all the blocks we should journal_access - * and journal_dirty. The first block should always be touched. - */ - ret = ocfs2_journal_dirty(handle, bhs[0]); - if (ret) - mlog_errno(ret); - - /* calc the data. */ - off = le16_to_cpu(xe->xe_name_offset); - block_off = off >> inode->i_sb->s_blocksize_bits; - ret = ocfs2_journal_dirty(handle, bhs[block_off]); - if (ret) - mlog_errno(ret); - - return ret; -} - /* * Set the xattr entry in the specified bucket. * The bucket is indicated by xs->bucket and it should have the enough * space for the xattr insertion. */ static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_info *xi, struct ocfs2_xattr_search *xs, u32 name_hash, int local) { - int i, ret; - handle_t *handle = NULL; - u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int ret; + u64 blkno; mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", (unsigned long)xi->value_len, xi->name_index, - (unsigned long long)xs->bucket.bhs[0]->b_blocknr); + (unsigned long long)bucket_blkno(xs->bucket)); - if (!xs->bucket.bhs[1]) { - ret = ocfs2_read_blocks(inode, - xs->bucket.bhs[0]->b_blocknr + 1, - blk_per_bucket - 1, &xs->bucket.bhs[1], - 0); + if (!xs->bucket->bu_bhs[1]) { + blkno = bucket_blkno(xs->bucket); + ocfs2_xattr_bucket_relse(xs->bucket); + ret = ocfs2_read_xattr_bucket(xs->bucket, blkno); if (ret) { mlog_errno(ret); goto out; } } - handle = ocfs2_start_trans(osb, blk_per_bucket); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - handle = NULL; + ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret < 0) { mlog_errno(ret); goto out; } - for (i = 0; i < blk_per_bucket; i++) { - ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i], - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret < 0) { - mlog_errno(ret); - goto out; - } - } - ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local); + ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); - /*Only dirty the blocks we have touched in set xattr. */ - ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs, - xs->bucket.bhs, blk_per_bucket); - if (ret) - mlog_errno(ret); out: - ocfs2_commit_trans(osb, handle); - return ret; } static int ocfs2_xattr_value_update_size(struct inode *inode, + handle_t *handle, struct buffer_head *xe_bh, struct ocfs2_xattr_entry *xe, u64 new_size) { int ret; - struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - handle_t *handle = NULL; - - handle = ocfs2_start_trans(osb, 1); - if (IS_ERR(handle)) { - ret = -ENOMEM; - mlog_errno(ret); - goto out; - } ret = ocfs2_journal_access(handle, inode, xe_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret < 0) { mlog_errno(ret); - goto out_commit; + goto out; } xe->xe_value_size = cpu_to_le64(new_size); @@ -4196,8 +4607,6 @@ static int ocfs2_xattr_value_update_size(struct inode *inode, if (ret < 0) mlog_errno(ret); -out_commit: - ocfs2_commit_trans(osb, handle); out: return ret; } @@ -4212,7 +4621,8 @@ out: static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, struct buffer_head *header_bh, int xe_off, - int len) + int len, + struct ocfs2_xattr_set_ctxt *ctxt) { int ret, offset; u64 value_blk; @@ -4236,7 +4646,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); value_blk += header_bh->b_blocknr; - ret = ocfs2_read_block(inode, value_blk, &value_bh); + ret = ocfs2_read_block(inode, value_blk, &value_bh, NULL); if (ret) { mlog_errno(ret); goto out; @@ -4247,13 +4657,14 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", xe_off, (unsigned long long)header_bh->b_blocknr, len); - ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len); + ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt); if (ret) { mlog_errno(ret); goto out; } - ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len); + ret = ocfs2_xattr_value_update_size(inode, ctxt->handle, + header_bh, xe, len); if (ret) { mlog_errno(ret); goto out; @@ -4265,18 +4676,19 @@ out: } static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, - struct ocfs2_xattr_search *xs, - int len) + struct ocfs2_xattr_search *xs, + int len, + struct ocfs2_xattr_set_ctxt *ctxt) { int ret, offset; struct ocfs2_xattr_entry *xe = xs->here; struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base; - BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe)); + BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe)); offset = xe - xh->xh_entries; - ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0], - offset, len); + ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0], + offset, len, ctxt); if (ret) mlog_errno(ret); @@ -4284,6 +4696,7 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, } static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_search *xs, char *val, int value_len) @@ -4299,7 +4712,8 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, xv = (struct ocfs2_xattr_value_root *)(xs->base + offset); - return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len); + return __ocfs2_xattr_set_value_outside(inode, handle, + xv, val, value_len); } static int ocfs2_rm_xattr_cluster(struct inode *inode, @@ -4343,7 +4757,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode, } } - handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); + handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); if (IS_ERR(handle)) { ret = -ENOMEM; mlog_errno(ret); @@ -4392,26 +4806,19 @@ out: } static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, + handle_t *handle, struct ocfs2_xattr_search *xs) { - handle_t *handle = NULL; - struct ocfs2_xattr_header *xh = xs->bucket.xh; + struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket); struct ocfs2_xattr_entry *last = &xh->xh_entries[ le16_to_cpu(xh->xh_count) - 1]; int ret = 0; - handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - return; - } - - ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0], - OCFS2_JOURNAL_ACCESS_WRITE); + ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket, + OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); - goto out_commit; + return; } /* Remove the old entry. */ @@ -4420,11 +4827,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, memset(last, 0, sizeof(struct ocfs2_xattr_entry)); le16_add_cpu(&xh->xh_count, -1); - ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]); - if (ret < 0) - mlog_errno(ret); -out_commit: - ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); + ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket); } /* @@ -4440,7 +4843,8 @@ out_commit: */ static int ocfs2_xattr_set_in_bucket(struct inode *inode, struct ocfs2_xattr_info *xi, - struct ocfs2_xattr_search *xs) + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt) { int ret, local = 1; size_t value_len; @@ -4468,7 +4872,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode, value_len = 0; ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, - value_len); + value_len, + ctxt); if (ret) goto out; @@ -4488,7 +4893,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode, xi->value_len = OCFS2_XATTR_ROOT_SIZE; } - ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local); + ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs, + name_hash, local); if (ret) { mlog_errno(ret); goto out; @@ -4499,7 +4905,7 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode, /* allocate the space now for the outside block storage. */ ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, - value_len); + value_len, ctxt); if (ret) { mlog_errno(ret); @@ -4509,13 +4915,14 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode, * storage and we have allocated xattr already, * so need to remove it. */ - ocfs2_xattr_bucket_remove_xs(inode, xs); + ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs); } goto out; } set_value_outside: - ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len); + ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle, + xs, val, value_len); out: return ret; } @@ -4530,7 +4937,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode, struct ocfs2_xattr_bucket *bucket, const char *name) { - struct ocfs2_xattr_header *xh = bucket->xh; + struct ocfs2_xattr_header *xh = bucket_xh(bucket); u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash)) @@ -4540,7 +4947,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode, xh->xh_entries[0].xe_name_hash) { mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " "hash = %u\n", - (unsigned long long)bucket->bhs[0]->b_blocknr, + (unsigned long long)bucket_blkno(bucket), le32_to_cpu(xh->xh_entries[0].xe_name_hash)); return -ENOSPC; } @@ -4550,16 +4957,16 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode, static int ocfs2_xattr_set_entry_index_block(struct inode *inode, struct ocfs2_xattr_info *xi, - struct ocfs2_xattr_search *xs) + struct ocfs2_xattr_search *xs, + struct ocfs2_xattr_set_ctxt *ctxt) { struct ocfs2_xattr_header *xh; struct ocfs2_xattr_entry *xe; u16 count, header_size, xh_free_start; - int i, free, max_free, need, old; + int free, max_free, need, old; size_t value_size = 0, name_len = strlen(xi->name); size_t blocksize = inode->i_sb->s_blocksize; int ret, allocation = 0; - u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); mlog_entry("Set xattr %s in xattr index block\n", xi->name); @@ -4574,7 +4981,7 @@ try_again: mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " "of %u which exceed block size\n", - (unsigned long long)xs->bucket.bhs[0]->b_blocknr, + (unsigned long long)bucket_blkno(xs->bucket), header_size); if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) @@ -4614,11 +5021,13 @@ try_again: mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, " "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len =" " %u\n", xs->not_found, - (unsigned long long)xs->bucket.bhs[0]->b_blocknr, + (unsigned long long)bucket_blkno(xs->bucket), free, need, max_free, le16_to_cpu(xh->xh_free_start), le16_to_cpu(xh->xh_name_value_len)); - if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { + if (free < need || + (xs->not_found && + count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) { if (need <= max_free && count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { /* @@ -4626,7 +5035,8 @@ try_again: * name/value will be moved, the xe shouldn't be changed * in xs. */ - ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket); + ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle, + xs->bucket); if (ret) { mlog_errno(ret); goto out; @@ -4658,7 +5068,7 @@ try_again: * add a new bucket for the insert. */ ret = ocfs2_check_xattr_bucket_collision(inode, - &xs->bucket, + xs->bucket, xi->name); if (ret) { mlog_errno(ret); @@ -4667,16 +5077,14 @@ try_again: ret = ocfs2_add_new_xattr_bucket(inode, xs->xattr_bh, - xs->bucket.bhs[0]); + xs->bucket->bu_bhs[0], + ctxt); if (ret) { mlog_errno(ret); goto out; } - for (i = 0; i < blk_per_bucket; i++) - brelse(xs->bucket.bhs[i]); - - memset(&xs->bucket, 0, sizeof(xs->bucket)); + ocfs2_xattr_bucket_relse(xs->bucket); ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, xi->name_index, @@ -4689,7 +5097,7 @@ try_again: } xattr_set: - ret = ocfs2_xattr_set_in_bucket(inode, xi, xs); + ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt); out: mlog_exit(ret); return ret; @@ -4700,9 +5108,21 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, void *para) { int ret = 0; - struct ocfs2_xattr_header *xh = bucket->xh; + struct ocfs2_xattr_header *xh = bucket_xh(bucket); u16 i; struct ocfs2_xattr_entry *xe; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; + + ocfs2_init_dealloc_ctxt(&ctxt.dealloc); + + ctxt.handle = ocfs2_start_trans(osb, + ocfs2_remove_extent_credits(osb->sb)); + if (IS_ERR(ctxt.handle)) { + ret = PTR_ERR(ctxt.handle); + mlog_errno(ret); + goto out; + } for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { xe = &xh->xh_entries[i]; @@ -4710,14 +5130,18 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode, continue; ret = ocfs2_xattr_bucket_value_truncate(inode, - bucket->bhs[0], - i, 0); + bucket->bu_bhs[0], + i, 0, &ctxt); if (ret) { mlog_errno(ret); break; } } + ret = ocfs2_commit_trans(osb, ctxt.handle); + ocfs2_schedule_truncate_log_flush(osb, 1); + ocfs2_run_deallocs(osb, &ctxt.dealloc); +out: return ret; } @@ -4768,6 +5192,71 @@ out: } /* + * 'security' attributes support + */ +static size_t ocfs2_xattr_security_list(struct inode *inode, char *list, + size_t list_size, const char *name, + size_t name_len) +{ + const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; + const size_t total_len = prefix_len + name_len + 1; + + if (list && total_len <= list_size) { + memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); + memcpy(list + prefix_len, name, name_len); + list[prefix_len + name_len] = '\0'; + } + return total_len; +} + +static int ocfs2_xattr_security_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name, + buffer, size); +} + +static int ocfs2_xattr_security_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + + return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value, + size, flags); +} + +int ocfs2_init_security_get(struct inode *inode, + struct inode *dir, + struct ocfs2_security_xattr_info *si) +{ + return security_inode_init_security(inode, dir, &si->name, &si->value, + &si->value_len); +} + +int ocfs2_init_security_set(handle_t *handle, + struct inode *inode, + struct buffer_head *di_bh, + struct ocfs2_security_xattr_info *si, + struct ocfs2_alloc_context *xattr_ac, + struct ocfs2_alloc_context *data_ac) +{ + return ocfs2_xattr_set_handle(handle, inode, di_bh, + OCFS2_XATTR_INDEX_SECURITY, + si->name, si->value, si->value_len, 0, + xattr_ac, data_ac); +} + +struct xattr_handler ocfs2_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .list = ocfs2_xattr_security_list, + .get = ocfs2_xattr_security_get, + .set = ocfs2_xattr_security_set, +}; + +/* * 'trusted' attributes support */ static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 1d8314c7656d..9a67e7d8f812 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -30,13 +30,44 @@ enum ocfs2_xattr_type { OCFS2_XATTR_MAX }; +struct ocfs2_security_xattr_info { + int enable; + char *name; + void *value; + size_t value_len; +}; + extern struct xattr_handler ocfs2_xattr_user_handler; extern struct xattr_handler ocfs2_xattr_trusted_handler; +extern struct xattr_handler ocfs2_xattr_security_handler; +#ifdef CONFIG_OCFS2_FS_POSIX_ACL +extern struct xattr_handler ocfs2_xattr_acl_access_handler; +extern struct xattr_handler ocfs2_xattr_acl_default_handler; +#endif extern struct xattr_handler *ocfs2_xattr_handlers[]; ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); +int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int, + const char *, void *, size_t); int ocfs2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); +int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *, + int, const char *, const void *, size_t, int, + struct ocfs2_alloc_context *, + struct ocfs2_alloc_context *); int ocfs2_xattr_remove(struct inode *, struct buffer_head *); +int ocfs2_init_security_get(struct inode *, struct inode *, + struct ocfs2_security_xattr_info *); +int ocfs2_init_security_set(handle_t *, struct inode *, + struct buffer_head *, + struct ocfs2_security_xattr_info *, + struct ocfs2_alloc_context *, + struct ocfs2_alloc_context *); +int ocfs2_calc_security_init(struct inode *, + struct ocfs2_security_xattr_info *, + int *, int *, struct ocfs2_alloc_context **); +int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *, + int, struct ocfs2_security_xattr_info *, + int *, int *, struct ocfs2_alloc_context **); #endif /* OCFS2_XATTR_H */ diff --git a/fs/quota.c b/fs/quota.c index b7fe44e01618..4a8c94f05f76 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -73,7 +73,7 @@ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid case Q_SETQUOTA: case Q_GETQUOTA: /* This is just informative test so we are satisfied without a lock */ - if (!sb_has_quota_enabled(sb, type)) + if (!sb_has_quota_active(sb, type)) return -ESRCH; } @@ -160,6 +160,9 @@ static void quota_sync_sb(struct super_block *sb, int type) int cnt; sb->s_qcop->quota_sync(sb, type); + + if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE) + return; /* This is not very clever (and fast) but currently I don't know about * any other simple way of getting quota data to disk and we must get * them there for userspace to be visible... */ @@ -175,7 +178,7 @@ static void quota_sync_sb(struct super_block *sb, int type) for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; - if (!sb_has_quota_enabled(sb, cnt)) + if (!sb_has_quota_active(sb, cnt)) continue; mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, I_MUTEX_QUOTA); truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0); @@ -201,7 +204,7 @@ restart: for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && type != cnt) continue; - if (!sb_has_quota_enabled(sb, cnt)) + if (!sb_has_quota_active(sb, cnt)) continue; if (!info_dirty(&sb_dqopt(sb)->info[cnt]) && list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list)) @@ -245,7 +248,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void __u32 fmt; down_read(&sb_dqopt(sb)->dqptr_sem); - if (!sb_has_quota_enabled(sb, type)) { + if (!sb_has_quota_active(sb, type)) { up_read(&sb_dqopt(sb)->dqptr_sem); return -ESRCH; } diff --git a/fs/quota_tree.c b/fs/quota_tree.c new file mode 100644 index 000000000000..953404c95b17 --- /dev/null +++ b/fs/quota_tree.c @@ -0,0 +1,645 @@ +/* + * vfsv0 quota IO operations on file + */ + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/dqblk_v2.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/quotaops.h> + +#include <asm/byteorder.h> + +#include "quota_tree.h" + +MODULE_AUTHOR("Jan Kara"); +MODULE_DESCRIPTION("Quota trie support"); +MODULE_LICENSE("GPL"); + +#define __QUOTA_QT_PARANOIA + +typedef char *dqbuf_t; + +static int get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth) +{ + unsigned int epb = info->dqi_usable_bs >> 2; + + depth = info->dqi_qtree_depth - depth - 1; + while (depth--) + id /= epb; + return id % epb; +} + +/* Number of entries in one blocks */ +static inline int qtree_dqstr_in_blk(struct qtree_mem_dqinfo *info) +{ + return (info->dqi_usable_bs - sizeof(struct qt_disk_dqdbheader)) + / info->dqi_entry_size; +} + +static dqbuf_t getdqbuf(size_t size) +{ + dqbuf_t buf = kmalloc(size, GFP_NOFS); + if (!buf) + printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n"); + return buf; +} + +static inline void freedqbuf(dqbuf_t buf) +{ + kfree(buf); +} + +static inline ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, dqbuf_t buf) +{ + struct super_block *sb = info->dqi_sb; + + memset(buf, 0, info->dqi_usable_bs); + return sb->s_op->quota_read(sb, info->dqi_type, (char *)buf, + info->dqi_usable_bs, blk << info->dqi_blocksize_bits); +} + +static inline ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, dqbuf_t buf) +{ + struct super_block *sb = info->dqi_sb; + + return sb->s_op->quota_write(sb, info->dqi_type, (char *)buf, + info->dqi_usable_bs, blk << info->dqi_blocksize_bits); +} + +/* Remove empty block from list and return it */ +static int get_free_dqblk(struct qtree_mem_dqinfo *info) +{ + dqbuf_t buf = getdqbuf(info->dqi_usable_bs); + struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf; + int ret, blk; + + if (!buf) + return -ENOMEM; + if (info->dqi_free_blk) { + blk = info->dqi_free_blk; + ret = read_blk(info, blk, buf); + if (ret < 0) + goto out_buf; + info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free); + } + else { + memset(buf, 0, info->dqi_usable_bs); + /* Assure block allocation... */ + ret = write_blk(info, info->dqi_blocks, buf); + if (ret < 0) + goto out_buf; + blk = info->dqi_blocks++; + } + mark_info_dirty(info->dqi_sb, info->dqi_type); + ret = blk; +out_buf: + freedqbuf(buf); + return ret; +} + +/* Insert empty block to the list */ +static int put_free_dqblk(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk) +{ + struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf; + int err; + + dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk); + dh->dqdh_prev_free = cpu_to_le32(0); + dh->dqdh_entries = cpu_to_le16(0); + err = write_blk(info, blk, buf); + if (err < 0) + return err; + info->dqi_free_blk = blk; + mark_info_dirty(info->dqi_sb, info->dqi_type); + return 0; +} + +/* Remove given block from the list of blocks with free entries */ +static int remove_free_dqentry(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk) +{ + dqbuf_t tmpbuf = getdqbuf(info->dqi_usable_bs); + struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf; + uint nextblk = le32_to_cpu(dh->dqdh_next_free); + uint prevblk = le32_to_cpu(dh->dqdh_prev_free); + int err; + + if (!tmpbuf) + return -ENOMEM; + if (nextblk) { + err = read_blk(info, nextblk, tmpbuf); + if (err < 0) + goto out_buf; + ((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = + dh->dqdh_prev_free; + err = write_blk(info, nextblk, tmpbuf); + if (err < 0) + goto out_buf; + } + if (prevblk) { + err = read_blk(info, prevblk, tmpbuf); + if (err < 0) + goto out_buf; + ((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_next_free = + dh->dqdh_next_free; + err = write_blk(info, prevblk, tmpbuf); + if (err < 0) + goto out_buf; + } else { + info->dqi_free_entry = nextblk; + mark_info_dirty(info->dqi_sb, info->dqi_type); + } + freedqbuf(tmpbuf); + dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); + /* No matter whether write succeeds block is out of list */ + if (write_blk(info, blk, buf) < 0) + printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk); + return 0; +out_buf: + freedqbuf(tmpbuf); + return err; +} + +/* Insert given block to the beginning of list with free entries */ +static int insert_free_dqentry(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk) +{ + dqbuf_t tmpbuf = getdqbuf(info->dqi_usable_bs); + struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf; + int err; + + if (!tmpbuf) + return -ENOMEM; + dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry); + dh->dqdh_prev_free = cpu_to_le32(0); + err = write_blk(info, blk, buf); + if (err < 0) + goto out_buf; + if (info->dqi_free_entry) { + err = read_blk(info, info->dqi_free_entry, tmpbuf); + if (err < 0) + goto out_buf; + ((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = + cpu_to_le32(blk); + err = write_blk(info, info->dqi_free_entry, tmpbuf); + if (err < 0) + goto out_buf; + } + freedqbuf(tmpbuf); + info->dqi_free_entry = blk; + mark_info_dirty(info->dqi_sb, info->dqi_type); + return 0; +out_buf: + freedqbuf(tmpbuf); + return err; +} + +/* Is the entry in the block free? */ +int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk) +{ + int i; + + for (i = 0; i < info->dqi_entry_size; i++) + if (disk[i]) + return 0; + return 1; +} +EXPORT_SYMBOL(qtree_entry_unused); + +/* Find space for dquot */ +static uint find_free_dqentry(struct qtree_mem_dqinfo *info, + struct dquot *dquot, int *err) +{ + uint blk, i; + struct qt_disk_dqdbheader *dh; + dqbuf_t buf = getdqbuf(info->dqi_usable_bs); + char *ddquot; + + *err = 0; + if (!buf) { + *err = -ENOMEM; + return 0; + } + dh = (struct qt_disk_dqdbheader *)buf; + if (info->dqi_free_entry) { + blk = info->dqi_free_entry; + *err = read_blk(info, blk, buf); + if (*err < 0) + goto out_buf; + } else { + blk = get_free_dqblk(info); + if ((int)blk < 0) { + *err = blk; + freedqbuf(buf); + return 0; + } + memset(buf, 0, info->dqi_usable_bs); + /* This is enough as block is already zeroed and entry list is empty... */ + info->dqi_free_entry = blk; + mark_info_dirty(dquot->dq_sb, dquot->dq_type); + } + /* Block will be full? */ + if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) { + *err = remove_free_dqentry(info, buf, blk); + if (*err < 0) { + printk(KERN_ERR "VFS: find_free_dqentry(): Can't " + "remove block (%u) from entry free list.\n", + blk); + goto out_buf; + } + } + le16_add_cpu(&dh->dqdh_entries, 1); + /* Find free structure in block */ + for (i = 0, ddquot = ((char *)buf) + sizeof(struct qt_disk_dqdbheader); + i < qtree_dqstr_in_blk(info) && !qtree_entry_unused(info, ddquot); + i++, ddquot += info->dqi_entry_size); +#ifdef __QUOTA_QT_PARANOIA + if (i == qtree_dqstr_in_blk(info)) { + printk(KERN_ERR "VFS: find_free_dqentry(): Data block full " + "but it shouldn't.\n"); + *err = -EIO; + goto out_buf; + } +#endif + *err = write_blk(info, blk, buf); + if (*err < 0) { + printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota " + "data block %u.\n", blk); + goto out_buf; + } + dquot->dq_off = (blk << info->dqi_blocksize_bits) + + sizeof(struct qt_disk_dqdbheader) + + i * info->dqi_entry_size; + freedqbuf(buf); + return blk; +out_buf: + freedqbuf(buf); + return 0; +} + +/* Insert reference to structure into the trie */ +static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, + uint *treeblk, int depth) +{ + dqbuf_t buf = getdqbuf(info->dqi_usable_bs); + int ret = 0, newson = 0, newact = 0; + __le32 *ref; + uint newblk; + + if (!buf) + return -ENOMEM; + if (!*treeblk) { + ret = get_free_dqblk(info); + if (ret < 0) + goto out_buf; + *treeblk = ret; + memset(buf, 0, info->dqi_usable_bs); + newact = 1; + } else { + ret = read_blk(info, *treeblk, buf); + if (ret < 0) { + printk(KERN_ERR "VFS: Can't read tree quota block " + "%u.\n", *treeblk); + goto out_buf; + } + } + ref = (__le32 *)buf; + newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); + if (!newblk) + newson = 1; + if (depth == info->dqi_qtree_depth - 1) { +#ifdef __QUOTA_QT_PARANOIA + if (newblk) { + printk(KERN_ERR "VFS: Inserting already present quota " + "entry (block %u).\n", + le32_to_cpu(ref[get_index(info, + dquot->dq_id, depth)])); + ret = -EIO; + goto out_buf; + } +#endif + newblk = find_free_dqentry(info, dquot, &ret); + } else { + ret = do_insert_tree(info, dquot, &newblk, depth+1); + } + if (newson && ret >= 0) { + ref[get_index(info, dquot->dq_id, depth)] = + cpu_to_le32(newblk); + ret = write_blk(info, *treeblk, buf); + } else if (newact && ret < 0) { + put_free_dqblk(info, buf, *treeblk); + } +out_buf: + freedqbuf(buf); + return ret; +} + +/* Wrapper for inserting quota structure into tree */ +static inline int dq_insert_tree(struct qtree_mem_dqinfo *info, + struct dquot *dquot) +{ + int tmp = QT_TREEOFF; + return do_insert_tree(info, dquot, &tmp, 0); +} + +/* + * We don't have to be afraid of deadlocks as we never have quotas on quota files... + */ +int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) +{ + int type = dquot->dq_type; + struct super_block *sb = dquot->dq_sb; + ssize_t ret; + dqbuf_t ddquot = getdqbuf(info->dqi_entry_size); + + if (!ddquot) + return -ENOMEM; + + /* dq_off is guarded by dqio_mutex */ + if (!dquot->dq_off) { + ret = dq_insert_tree(info, dquot); + if (ret < 0) { + printk(KERN_ERR "VFS: Error %zd occurred while " + "creating quota.\n", ret); + freedqbuf(ddquot); + return ret; + } + } + spin_lock(&dq_data_lock); + info->dqi_ops->mem2disk_dqblk(ddquot, dquot); + spin_unlock(&dq_data_lock); + ret = sb->s_op->quota_write(sb, type, (char *)ddquot, + info->dqi_entry_size, dquot->dq_off); + if (ret != info->dqi_entry_size) { + printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", + sb->s_id); + if (ret >= 0) + ret = -ENOSPC; + } else { + ret = 0; + } + dqstats.writes++; + freedqbuf(ddquot); + + return ret; +} +EXPORT_SYMBOL(qtree_write_dquot); + +/* Free dquot entry in data block */ +static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, + uint blk) +{ + struct qt_disk_dqdbheader *dh; + dqbuf_t buf = getdqbuf(info->dqi_usable_bs); + int ret = 0; + + if (!buf) + return -ENOMEM; + if (dquot->dq_off >> info->dqi_blocksize_bits != blk) { + printk(KERN_ERR "VFS: Quota structure has offset to other " + "block (%u) than it should (%u).\n", blk, + (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); + goto out_buf; + } + ret = read_blk(info, blk, buf); + if (ret < 0) { + printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk); + goto out_buf; + } + dh = (struct qt_disk_dqdbheader *)buf; + le16_add_cpu(&dh->dqdh_entries, -1); + if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ + ret = remove_free_dqentry(info, buf, blk); + if (ret >= 0) + ret = put_free_dqblk(info, buf, blk); + if (ret < 0) { + printk(KERN_ERR "VFS: Can't move quota data block (%u) " + "to free list.\n", blk); + goto out_buf; + } + } else { + memset(buf + + (dquot->dq_off & ((1 << info->dqi_blocksize_bits) - 1)), + 0, info->dqi_entry_size); + if (le16_to_cpu(dh->dqdh_entries) == + qtree_dqstr_in_blk(info) - 1) { + /* Insert will write block itself */ + ret = insert_free_dqentry(info, buf, blk); + if (ret < 0) { + printk(KERN_ERR "VFS: Can't insert quota data " + "block (%u) to free entry list.\n", blk); + goto out_buf; + } + } else { + ret = write_blk(info, blk, buf); + if (ret < 0) { + printk(KERN_ERR "VFS: Can't write quota data " + "block %u\n", blk); + goto out_buf; + } + } + } + dquot->dq_off = 0; /* Quota is now unattached */ +out_buf: + freedqbuf(buf); + return ret; +} + +/* Remove reference to dquot from tree */ +static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, + uint *blk, int depth) +{ + dqbuf_t buf = getdqbuf(info->dqi_usable_bs); + int ret = 0; + uint newblk; + __le32 *ref = (__le32 *)buf; + + if (!buf) + return -ENOMEM; + ret = read_blk(info, *blk, buf); + if (ret < 0) { + printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); + goto out_buf; + } + newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); + if (depth == info->dqi_qtree_depth - 1) { + ret = free_dqentry(info, dquot, newblk); + newblk = 0; + } else { + ret = remove_tree(info, dquot, &newblk, depth+1); + } + if (ret >= 0 && !newblk) { + int i; + ref[get_index(info, dquot->dq_id, depth)] = cpu_to_le32(0); + /* Block got empty? */ + for (i = 0; + i < (info->dqi_usable_bs >> 2) && !ref[i]; + i++); + /* Don't put the root block into the free block list */ + if (i == (info->dqi_usable_bs >> 2) + && *blk != QT_TREEOFF) { + put_free_dqblk(info, buf, *blk); + *blk = 0; + } else { + ret = write_blk(info, *blk, buf); + if (ret < 0) + printk(KERN_ERR "VFS: Can't write quota tree " + "block %u.\n", *blk); + } + } +out_buf: + freedqbuf(buf); + return ret; +} + +/* Delete dquot from tree */ +int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) +{ + uint tmp = QT_TREEOFF; + + if (!dquot->dq_off) /* Even not allocated? */ + return 0; + return remove_tree(info, dquot, &tmp, 0); +} +EXPORT_SYMBOL(qtree_delete_dquot); + +/* Find entry in block */ +static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info, + struct dquot *dquot, uint blk) +{ + dqbuf_t buf = getdqbuf(info->dqi_usable_bs); + loff_t ret = 0; + int i; + char *ddquot; + + if (!buf) + return -ENOMEM; + ret = read_blk(info, blk, buf); + if (ret < 0) { + printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); + goto out_buf; + } + for (i = 0, ddquot = ((char *)buf) + sizeof(struct qt_disk_dqdbheader); + i < qtree_dqstr_in_blk(info) && !info->dqi_ops->is_id(ddquot, dquot); + i++, ddquot += info->dqi_entry_size); + if (i == qtree_dqstr_in_blk(info)) { + printk(KERN_ERR "VFS: Quota for id %u referenced " + "but not present.\n", dquot->dq_id); + ret = -EIO; + goto out_buf; + } else { + ret = (blk << info->dqi_blocksize_bits) + sizeof(struct + qt_disk_dqdbheader) + i * info->dqi_entry_size; + } +out_buf: + freedqbuf(buf); + return ret; +} + +/* Find entry for given id in the tree */ +static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, + struct dquot *dquot, uint blk, int depth) +{ + dqbuf_t buf = getdqbuf(info->dqi_usable_bs); + loff_t ret = 0; + __le32 *ref = (__le32 *)buf; + + if (!buf) + return -ENOMEM; + ret = read_blk(info, blk, buf); + if (ret < 0) { + printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); + goto out_buf; + } + ret = 0; + blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); + if (!blk) /* No reference? */ + goto out_buf; + if (depth < info->dqi_qtree_depth - 1) + ret = find_tree_dqentry(info, dquot, blk, depth+1); + else + ret = find_block_dqentry(info, dquot, blk); +out_buf: + freedqbuf(buf); + return ret; +} + +/* Find entry for given id in the tree - wrapper function */ +static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info, + struct dquot *dquot) +{ + return find_tree_dqentry(info, dquot, QT_TREEOFF, 0); +} + +int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) +{ + int type = dquot->dq_type; + struct super_block *sb = dquot->dq_sb; + loff_t offset; + dqbuf_t ddquot; + int ret = 0; + +#ifdef __QUOTA_QT_PARANOIA + /* Invalidated quota? */ + if (!sb_dqopt(dquot->dq_sb)->files[type]) { + printk(KERN_ERR "VFS: Quota invalidated while reading!\n"); + return -EIO; + } +#endif + /* Do we know offset of the dquot entry in the quota file? */ + if (!dquot->dq_off) { + offset = find_dqentry(info, dquot); + if (offset <= 0) { /* Entry not present? */ + if (offset < 0) + printk(KERN_ERR "VFS: Can't read quota " + "structure for id %u.\n", dquot->dq_id); + dquot->dq_off = 0; + set_bit(DQ_FAKE_B, &dquot->dq_flags); + memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); + ret = offset; + goto out; + } + dquot->dq_off = offset; + } + ddquot = getdqbuf(info->dqi_entry_size); + if (!ddquot) + return -ENOMEM; + ret = sb->s_op->quota_read(sb, type, (char *)ddquot, + info->dqi_entry_size, dquot->dq_off); + if (ret != info->dqi_entry_size) { + if (ret >= 0) + ret = -EIO; + printk(KERN_ERR "VFS: Error while reading quota " + "structure for id %u.\n", dquot->dq_id); + set_bit(DQ_FAKE_B, &dquot->dq_flags); + memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); + freedqbuf(ddquot); + goto out; + } + spin_lock(&dq_data_lock); + info->dqi_ops->disk2mem_dqblk(dquot, ddquot); + if (!dquot->dq_dqb.dqb_bhardlimit && + !dquot->dq_dqb.dqb_bsoftlimit && + !dquot->dq_dqb.dqb_ihardlimit && + !dquot->dq_dqb.dqb_isoftlimit) + set_bit(DQ_FAKE_B, &dquot->dq_flags); + spin_unlock(&dq_data_lock); + freedqbuf(ddquot); +out: + dqstats.reads++; + return ret; +} +EXPORT_SYMBOL(qtree_read_dquot); + +/* Check whether dquot should not be deleted. We know we are + * the only one operating on dquot (thanks to dq_lock) */ +int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) +{ + if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace)) + return qtree_delete_dquot(info, dquot); + return 0; +} +EXPORT_SYMBOL(qtree_release_dquot); diff --git a/fs/quota_tree.h b/fs/quota_tree.h new file mode 100644 index 000000000000..a1ab8db81a51 --- /dev/null +++ b/fs/quota_tree.h @@ -0,0 +1,25 @@ +/* + * Definitions of structures for vfsv0 quota format + */ + +#ifndef _LINUX_QUOTA_TREE_H +#define _LINUX_QUOTA_TREE_H + +#include <linux/types.h> +#include <linux/quota.h> + +/* + * Structure of header of block with quota structures. It is padded to 16 bytes so + * there will be space for exactly 21 quota-entries in a block + */ +struct qt_disk_dqdbheader { + __le32 dqdh_next_free; /* Number of next block with free entry */ + __le32 dqdh_prev_free; /* Number of previous block with free entry */ + __le16 dqdh_entries; /* Number of valid entries in block */ + __le16 dqdh_pad1; + __le32 dqdh_pad2; +}; + +#define QT_TREEOFF 1 /* Offset of tree in file in blocks */ + +#endif /* _LINUX_QUOTAIO_TREE_H */ diff --git a/fs/quota_v1.c b/fs/quota_v1.c index 5ae15b13eeb0..b4af1c69ad16 100644 --- a/fs/quota_v1.c +++ b/fs/quota_v1.c @@ -3,25 +3,39 @@ #include <linux/quota.h> #include <linux/quotaops.h> #include <linux/dqblk_v1.h> -#include <linux/quotaio_v1.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <asm/byteorder.h> +#include "quotaio_v1.h" + MODULE_AUTHOR("Jan Kara"); MODULE_DESCRIPTION("Old quota format support"); MODULE_LICENSE("GPL"); +#define QUOTABLOCK_BITS 10 +#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS) + +static inline qsize_t v1_stoqb(qsize_t space) +{ + return (space + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS; +} + +static inline qsize_t v1_qbtos(qsize_t blocks) +{ + return blocks << QUOTABLOCK_BITS; +} + static void v1_disk2mem_dqblk(struct mem_dqblk *m, struct v1_disk_dqblk *d) { m->dqb_ihardlimit = d->dqb_ihardlimit; m->dqb_isoftlimit = d->dqb_isoftlimit; m->dqb_curinodes = d->dqb_curinodes; - m->dqb_bhardlimit = d->dqb_bhardlimit; - m->dqb_bsoftlimit = d->dqb_bsoftlimit; - m->dqb_curspace = ((qsize_t)d->dqb_curblocks) << QUOTABLOCK_BITS; + m->dqb_bhardlimit = v1_qbtos(d->dqb_bhardlimit); + m->dqb_bsoftlimit = v1_qbtos(d->dqb_bsoftlimit); + m->dqb_curspace = v1_qbtos(d->dqb_curblocks); m->dqb_itime = d->dqb_itime; m->dqb_btime = d->dqb_btime; } @@ -31,9 +45,9 @@ static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m) d->dqb_ihardlimit = m->dqb_ihardlimit; d->dqb_isoftlimit = m->dqb_isoftlimit; d->dqb_curinodes = m->dqb_curinodes; - d->dqb_bhardlimit = m->dqb_bhardlimit; - d->dqb_bsoftlimit = m->dqb_bsoftlimit; - d->dqb_curblocks = toqb(m->dqb_curspace); + d->dqb_bhardlimit = v1_stoqb(m->dqb_bhardlimit); + d->dqb_bsoftlimit = v1_stoqb(m->dqb_bsoftlimit); + d->dqb_curblocks = v1_stoqb(m->dqb_curspace); d->dqb_itime = m->dqb_itime; d->dqb_btime = m->dqb_btime; } diff --git a/fs/quota_v2.c b/fs/quota_v2.c index b53827dc02d9..b618b563635c 100644 --- a/fs/quota_v2.c +++ b/fs/quota_v2.c @@ -6,7 +6,6 @@ #include <linux/fs.h> #include <linux/mount.h> #include <linux/dqblk_v2.h> -#include <linux/quotaio_v2.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> @@ -15,16 +14,37 @@ #include <asm/byteorder.h> +#include "quota_tree.h" +#include "quotaio_v2.h" + MODULE_AUTHOR("Jan Kara"); MODULE_DESCRIPTION("Quota format v2 support"); MODULE_LICENSE("GPL"); #define __QUOTA_V2_PARANOIA -typedef char *dqbuf_t; +static void v2_mem2diskdqb(void *dp, struct dquot *dquot); +static void v2_disk2memdqb(struct dquot *dquot, void *dp); +static int v2_is_id(void *dp, struct dquot *dquot); + +static struct qtree_fmt_operations v2_qtree_ops = { + .mem2disk_dqblk = v2_mem2diskdqb, + .disk2mem_dqblk = v2_disk2memdqb, + .is_id = v2_is_id, +}; + +#define QUOTABLOCK_BITS 10 +#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS) -#define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff) -#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader))) +static inline qsize_t v2_stoqb(qsize_t space) +{ + return (space + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS; +} + +static inline qsize_t v2_qbtos(qsize_t blocks) +{ + return blocks << QUOTABLOCK_BITS; +} /* Check whether given file is really vfsv0 quotafile */ static int v2_check_quota_file(struct super_block *sb, int type) @@ -50,7 +70,8 @@ static int v2_check_quota_file(struct super_block *sb, int type) static int v2_read_file_info(struct super_block *sb, int type) { struct v2_disk_dqinfo dinfo; - struct mem_dqinfo *info = sb_dqopt(sb)->info+type; + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct qtree_mem_dqinfo *qinfo; ssize_t size; size = sb->s_op->quota_read(sb, type, (char *)&dinfo, @@ -60,15 +81,29 @@ static int v2_read_file_info(struct super_block *sb, int type) sb->s_id); return -1; } + info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS); + if (!info->dqi_priv) { + printk(KERN_WARNING + "Not enough memory for quota information structure.\n"); + return -1; + } + qinfo = info->dqi_priv; /* limits are stored as unsigned 32-bit data */ info->dqi_maxblimit = 0xffffffff; info->dqi_maxilimit = 0xffffffff; info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); info->dqi_flags = le32_to_cpu(dinfo.dqi_flags); - info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); - info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); - info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); + qinfo->dqi_sb = sb; + qinfo->dqi_type = type; + qinfo->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); + qinfo->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); + qinfo->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); + qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS; + qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS; + qinfo->dqi_qtree_depth = qtree_depth(qinfo); + qinfo->dqi_entry_size = sizeof(struct v2_disk_dqblk); + qinfo->dqi_ops = &v2_qtree_ops; return 0; } @@ -76,7 +111,8 @@ static int v2_read_file_info(struct super_block *sb, int type) static int v2_write_file_info(struct super_block *sb, int type) { struct v2_disk_dqinfo dinfo; - struct mem_dqinfo *info = sb_dqopt(sb)->info+type; + struct mem_dqinfo *info = sb_dqinfo(sb, type); + struct qtree_mem_dqinfo *qinfo = info->dqi_priv; ssize_t size; spin_lock(&dq_data_lock); @@ -85,9 +121,9 @@ static int v2_write_file_info(struct super_block *sb, int type) dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); spin_unlock(&dq_data_lock); - dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks); - dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk); - dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry); + dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks); + dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk); + dinfo.dqi_free_entry = cpu_to_le32(qinfo->dqi_free_entry); size = sb->s_op->quota_write(sb, type, (char *)&dinfo, sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF); if (size != sizeof(struct v2_disk_dqinfo)) { @@ -98,574 +134,75 @@ static int v2_write_file_info(struct super_block *sb, int type) return 0; } -static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d) +static void v2_disk2memdqb(struct dquot *dquot, void *dp) { + struct v2_disk_dqblk *d = dp, empty; + struct mem_dqblk *m = &dquot->dq_dqb; + m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit); m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit); m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes); m->dqb_itime = le64_to_cpu(d->dqb_itime); - m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit); - m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit); + m->dqb_bhardlimit = v2_qbtos(le32_to_cpu(d->dqb_bhardlimit)); + m->dqb_bsoftlimit = v2_qbtos(le32_to_cpu(d->dqb_bsoftlimit)); m->dqb_curspace = le64_to_cpu(d->dqb_curspace); m->dqb_btime = le64_to_cpu(d->dqb_btime); + /* We need to escape back all-zero structure */ + memset(&empty, 0, sizeof(struct v2_disk_dqblk)); + empty.dqb_itime = cpu_to_le64(1); + if (!memcmp(&empty, dp, sizeof(struct v2_disk_dqblk))) + m->dqb_itime = 0; } -static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id) +static void v2_mem2diskdqb(void *dp, struct dquot *dquot) { + struct v2_disk_dqblk *d = dp; + struct mem_dqblk *m = &dquot->dq_dqb; + struct qtree_mem_dqinfo *info = + sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; + d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit); d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit); d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes); d->dqb_itime = cpu_to_le64(m->dqb_itime); - d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit); - d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit); + d->dqb_bhardlimit = cpu_to_le32(v2_stoqb(m->dqb_bhardlimit)); + d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit)); d->dqb_curspace = cpu_to_le64(m->dqb_curspace); d->dqb_btime = cpu_to_le64(m->dqb_btime); - d->dqb_id = cpu_to_le32(id); -} - -static dqbuf_t getdqbuf(void) -{ - dqbuf_t buf = kmalloc(V2_DQBLKSIZE, GFP_NOFS); - if (!buf) - printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n"); - return buf; -} - -static inline void freedqbuf(dqbuf_t buf) -{ - kfree(buf); -} - -static inline ssize_t read_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf) -{ - memset(buf, 0, V2_DQBLKSIZE); - return sb->s_op->quota_read(sb, type, (char *)buf, - V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS); -} - -static inline ssize_t write_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf) -{ - return sb->s_op->quota_write(sb, type, (char *)buf, - V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS); -} - -/* Remove empty block from list and return it */ -static int get_free_dqblk(struct super_block *sb, int type) -{ - dqbuf_t buf = getdqbuf(); - struct mem_dqinfo *info = sb_dqinfo(sb, type); - struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; - int ret, blk; - - if (!buf) - return -ENOMEM; - if (info->u.v2_i.dqi_free_blk) { - blk = info->u.v2_i.dqi_free_blk; - if ((ret = read_blk(sb, type, blk, buf)) < 0) - goto out_buf; - info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free); - } - else { - memset(buf, 0, V2_DQBLKSIZE); - /* Assure block allocation... */ - if ((ret = write_blk(sb, type, info->u.v2_i.dqi_blocks, buf)) < 0) - goto out_buf; - blk = info->u.v2_i.dqi_blocks++; - } - mark_info_dirty(sb, type); - ret = blk; -out_buf: - freedqbuf(buf); - return ret; -} - -/* Insert empty block to the list */ -static int put_free_dqblk(struct super_block *sb, int type, dqbuf_t buf, uint blk) -{ - struct mem_dqinfo *info = sb_dqinfo(sb, type); - struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; - int err; - - dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_blk); - dh->dqdh_prev_free = cpu_to_le32(0); - dh->dqdh_entries = cpu_to_le16(0); - info->u.v2_i.dqi_free_blk = blk; - mark_info_dirty(sb, type); - /* Some strange block. We had better leave it... */ - if ((err = write_blk(sb, type, blk, buf)) < 0) - return err; - return 0; + d->dqb_id = cpu_to_le32(dquot->dq_id); + if (qtree_entry_unused(info, dp)) + d->dqb_itime = cpu_to_le64(1); } -/* Remove given block from the list of blocks with free entries */ -static int remove_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk) +static int v2_is_id(void *dp, struct dquot *dquot) { - dqbuf_t tmpbuf = getdqbuf(); - struct mem_dqinfo *info = sb_dqinfo(sb, type); - struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; - uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free); - int err; + struct v2_disk_dqblk *d = dp; + struct qtree_mem_dqinfo *info = + sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; - if (!tmpbuf) - return -ENOMEM; - if (nextblk) { - if ((err = read_blk(sb, type, nextblk, tmpbuf)) < 0) - goto out_buf; - ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free; - if ((err = write_blk(sb, type, nextblk, tmpbuf)) < 0) - goto out_buf; - } - if (prevblk) { - if ((err = read_blk(sb, type, prevblk, tmpbuf)) < 0) - goto out_buf; - ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free; - if ((err = write_blk(sb, type, prevblk, tmpbuf)) < 0) - goto out_buf; - } - else { - info->u.v2_i.dqi_free_entry = nextblk; - mark_info_dirty(sb, type); - } - freedqbuf(tmpbuf); - dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); - /* No matter whether write succeeds block is out of list */ - if (write_blk(sb, type, blk, buf) < 0) - printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk); - return 0; -out_buf: - freedqbuf(tmpbuf); - return err; -} - -/* Insert given block to the beginning of list with free entries */ -static int insert_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk) -{ - dqbuf_t tmpbuf = getdqbuf(); - struct mem_dqinfo *info = sb_dqinfo(sb, type); - struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; - int err; - - if (!tmpbuf) - return -ENOMEM; - dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry); - dh->dqdh_prev_free = cpu_to_le32(0); - if ((err = write_blk(sb, type, blk, buf)) < 0) - goto out_buf; - if (info->u.v2_i.dqi_free_entry) { - if ((err = read_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0) - goto out_buf; - ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk); - if ((err = write_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0) - goto out_buf; - } - freedqbuf(tmpbuf); - info->u.v2_i.dqi_free_entry = blk; - mark_info_dirty(sb, type); - return 0; -out_buf: - freedqbuf(tmpbuf); - return err; -} - -/* Find space for dquot */ -static uint find_free_dqentry(struct dquot *dquot, int *err) -{ - struct super_block *sb = dquot->dq_sb; - struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type; - uint blk, i; - struct v2_disk_dqdbheader *dh; - struct v2_disk_dqblk *ddquot; - struct v2_disk_dqblk fakedquot; - dqbuf_t buf; - - *err = 0; - if (!(buf = getdqbuf())) { - *err = -ENOMEM; + if (qtree_entry_unused(info, dp)) return 0; - } - dh = (struct v2_disk_dqdbheader *)buf; - ddquot = GETENTRIES(buf); - if (info->u.v2_i.dqi_free_entry) { - blk = info->u.v2_i.dqi_free_entry; - if ((*err = read_blk(sb, dquot->dq_type, blk, buf)) < 0) - goto out_buf; - } - else { - blk = get_free_dqblk(sb, dquot->dq_type); - if ((int)blk < 0) { - *err = blk; - freedqbuf(buf); - return 0; - } - memset(buf, 0, V2_DQBLKSIZE); - /* This is enough as block is already zeroed and entry list is empty... */ - info->u.v2_i.dqi_free_entry = blk; - mark_info_dirty(sb, dquot->dq_type); - } - if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK) /* Block will be full? */ - if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) { - printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); - goto out_buf; - } - le16_add_cpu(&dh->dqdh_entries, 1); - memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); - /* Find free structure in block */ - for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++); -#ifdef __QUOTA_V2_PARANOIA - if (i == V2_DQSTRINBLK) { - printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n"); - *err = -EIO; - goto out_buf; - } -#endif - if ((*err = write_blk(sb, dquot->dq_type, blk, buf)) < 0) { - printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk); - goto out_buf; - } - dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk); - freedqbuf(buf); - return blk; -out_buf: - freedqbuf(buf); - return 0; -} - -/* Insert reference to structure into the trie */ -static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth) -{ - struct super_block *sb = dquot->dq_sb; - dqbuf_t buf; - int ret = 0, newson = 0, newact = 0; - __le32 *ref; - uint newblk; - - if (!(buf = getdqbuf())) - return -ENOMEM; - if (!*treeblk) { - ret = get_free_dqblk(sb, dquot->dq_type); - if (ret < 0) - goto out_buf; - *treeblk = ret; - memset(buf, 0, V2_DQBLKSIZE); - newact = 1; - } - else { - if ((ret = read_blk(sb, dquot->dq_type, *treeblk, buf)) < 0) { - printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk); - goto out_buf; - } - } - ref = (__le32 *)buf; - newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]); - if (!newblk) - newson = 1; - if (depth == V2_DQTREEDEPTH-1) { -#ifdef __QUOTA_V2_PARANOIA - if (newblk) { - printk(KERN_ERR "VFS: Inserting already present quota entry (block %u).\n", le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)])); - ret = -EIO; - goto out_buf; - } -#endif - newblk = find_free_dqentry(dquot, &ret); - } - else - ret = do_insert_tree(dquot, &newblk, depth+1); - if (newson && ret >= 0) { - ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk); - ret = write_blk(sb, dquot->dq_type, *treeblk, buf); - } - else if (newact && ret < 0) - put_free_dqblk(sb, dquot->dq_type, buf, *treeblk); -out_buf: - freedqbuf(buf); - return ret; + return le32_to_cpu(d->dqb_id) == dquot->dq_id; } -/* Wrapper for inserting quota structure into tree */ -static inline int dq_insert_tree(struct dquot *dquot) +static int v2_read_dquot(struct dquot *dquot) { - int tmp = V2_DQTREEOFF; - return do_insert_tree(dquot, &tmp, 0); + return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot); } -/* - * We don't have to be afraid of deadlocks as we never have quotas on quota files... - */ static int v2_write_dquot(struct dquot *dquot) { - int type = dquot->dq_type; - ssize_t ret; - struct v2_disk_dqblk ddquot, empty; - - /* dq_off is guarded by dqio_mutex */ - if (!dquot->dq_off) - if ((ret = dq_insert_tree(dquot)) < 0) { - printk(KERN_ERR "VFS: Error %zd occurred while creating quota.\n", ret); - return ret; - } - spin_lock(&dq_data_lock); - mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id); - /* Argh... We may need to write structure full of zeroes but that would be - * treated as an empty place by the rest of the code. Format change would - * be definitely cleaner but the problems probably are not worth it */ - memset(&empty, 0, sizeof(struct v2_disk_dqblk)); - if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk))) - ddquot.dqb_itime = cpu_to_le64(1); - spin_unlock(&dq_data_lock); - ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type, - (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off); - if (ret != sizeof(struct v2_disk_dqblk)) { - printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id); - if (ret >= 0) - ret = -ENOSPC; - } - else - ret = 0; - dqstats.writes++; - - return ret; + return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot); } -/* Free dquot entry in data block */ -static int free_dqentry(struct dquot *dquot, uint blk) -{ - struct super_block *sb = dquot->dq_sb; - int type = dquot->dq_type; - struct v2_disk_dqdbheader *dh; - dqbuf_t buf = getdqbuf(); - int ret = 0; - - if (!buf) - return -ENOMEM; - if (dquot->dq_off >> V2_DQBLKSIZE_BITS != blk) { - printk(KERN_ERR "VFS: Quota structure has offset to other " - "block (%u) than it should (%u).\n", blk, - (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS)); - goto out_buf; - } - if ((ret = read_blk(sb, type, blk, buf)) < 0) { - printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk); - goto out_buf; - } - dh = (struct v2_disk_dqdbheader *)buf; - le16_add_cpu(&dh->dqdh_entries, -1); - if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ - if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 || - (ret = put_free_dqblk(sb, type, buf, blk)) < 0) { - printk(KERN_ERR "VFS: Can't move quota data block (%u) " - "to free list.\n", blk); - goto out_buf; - } - } - else { - memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0, - sizeof(struct v2_disk_dqblk)); - if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) { - /* Insert will write block itself */ - if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) { - printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk); - goto out_buf; - } - } - else - if ((ret = write_blk(sb, type, blk, buf)) < 0) { - printk(KERN_ERR "VFS: Can't write quota data " - "block %u\n", blk); - goto out_buf; - } - } - dquot->dq_off = 0; /* Quota is now unattached */ -out_buf: - freedqbuf(buf); - return ret; -} - -/* Remove reference to dquot from tree */ -static int remove_tree(struct dquot *dquot, uint *blk, int depth) -{ - struct super_block *sb = dquot->dq_sb; - int type = dquot->dq_type; - dqbuf_t buf = getdqbuf(); - int ret = 0; - uint newblk; - __le32 *ref = (__le32 *)buf; - - if (!buf) - return -ENOMEM; - if ((ret = read_blk(sb, type, *blk, buf)) < 0) { - printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk); - goto out_buf; - } - newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]); - if (depth == V2_DQTREEDEPTH-1) { - ret = free_dqentry(dquot, newblk); - newblk = 0; - } - else - ret = remove_tree(dquot, &newblk, depth+1); - if (ret >= 0 && !newblk) { - int i; - ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0); - for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++); /* Block got empty? */ - /* Don't put the root block into the free block list */ - if (i == V2_DQBLKSIZE && *blk != V2_DQTREEOFF) { - put_free_dqblk(sb, type, buf, *blk); - *blk = 0; - } - else - if ((ret = write_blk(sb, type, *blk, buf)) < 0) - printk(KERN_ERR "VFS: Can't write quota tree " - "block %u.\n", *blk); - } -out_buf: - freedqbuf(buf); - return ret; -} - -/* Delete dquot from tree */ -static int v2_delete_dquot(struct dquot *dquot) -{ - uint tmp = V2_DQTREEOFF; - - if (!dquot->dq_off) /* Even not allocated? */ - return 0; - return remove_tree(dquot, &tmp, 0); -} - -/* Find entry in block */ -static loff_t find_block_dqentry(struct dquot *dquot, uint blk) -{ - dqbuf_t buf = getdqbuf(); - loff_t ret = 0; - int i; - struct v2_disk_dqblk *ddquot = GETENTRIES(buf); - - if (!buf) - return -ENOMEM; - if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) { - printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); - goto out_buf; - } - if (dquot->dq_id) - for (i = 0; i < V2_DQSTRINBLK && - le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++); - else { /* ID 0 as a bit more complicated searching... */ - struct v2_disk_dqblk fakedquot; - - memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk)); - for (i = 0; i < V2_DQSTRINBLK; i++) - if (!le32_to_cpu(ddquot[i].dqb_id) && - memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk))) - break; - } - if (i == V2_DQSTRINBLK) { - printk(KERN_ERR "VFS: Quota for id %u referenced " - "but not present.\n", dquot->dq_id); - ret = -EIO; - goto out_buf; - } - else - ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct - v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk); -out_buf: - freedqbuf(buf); - return ret; -} - -/* Find entry for given id in the tree */ -static loff_t find_tree_dqentry(struct dquot *dquot, uint blk, int depth) -{ - dqbuf_t buf = getdqbuf(); - loff_t ret = 0; - __le32 *ref = (__le32 *)buf; - - if (!buf) - return -ENOMEM; - if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) { - printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk); - goto out_buf; - } - ret = 0; - blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]); - if (!blk) /* No reference? */ - goto out_buf; - if (depth < V2_DQTREEDEPTH-1) - ret = find_tree_dqentry(dquot, blk, depth+1); - else - ret = find_block_dqentry(dquot, blk); -out_buf: - freedqbuf(buf); - return ret; -} - -/* Find entry for given id in the tree - wrapper function */ -static inline loff_t find_dqentry(struct dquot *dquot) -{ - return find_tree_dqentry(dquot, V2_DQTREEOFF, 0); -} - -static int v2_read_dquot(struct dquot *dquot) +static int v2_release_dquot(struct dquot *dquot) { - int type = dquot->dq_type; - loff_t offset; - struct v2_disk_dqblk ddquot, empty; - int ret = 0; - -#ifdef __QUOTA_V2_PARANOIA - /* Invalidated quota? */ - if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) { - printk(KERN_ERR "VFS: Quota invalidated while reading!\n"); - return -EIO; - } -#endif - offset = find_dqentry(dquot); - if (offset <= 0) { /* Entry not present? */ - if (offset < 0) - printk(KERN_ERR "VFS: Can't read quota " - "structure for id %u.\n", dquot->dq_id); - dquot->dq_off = 0; - set_bit(DQ_FAKE_B, &dquot->dq_flags); - memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); - ret = offset; - } - else { - dquot->dq_off = offset; - if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type, - (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset)) - != sizeof(struct v2_disk_dqblk)) { - if (ret >= 0) - ret = -EIO; - printk(KERN_ERR "VFS: Error while reading quota " - "structure for id %u.\n", dquot->dq_id); - memset(&ddquot, 0, sizeof(struct v2_disk_dqblk)); - } - else { - ret = 0; - /* We need to escape back all-zero structure */ - memset(&empty, 0, sizeof(struct v2_disk_dqblk)); - empty.dqb_itime = cpu_to_le64(1); - if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk))) - ddquot.dqb_itime = 0; - } - disk2memdqb(&dquot->dq_dqb, &ddquot); - if (!dquot->dq_dqb.dqb_bhardlimit && - !dquot->dq_dqb.dqb_bsoftlimit && - !dquot->dq_dqb.dqb_ihardlimit && - !dquot->dq_dqb.dqb_isoftlimit) - set_bit(DQ_FAKE_B, &dquot->dq_flags); - } - dqstats.reads++; - - return ret; + return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot); } -/* Check whether dquot should not be deleted. We know we are - * the only one operating on dquot (thanks to dq_lock) */ -static int v2_release_dquot(struct dquot *dquot) +static int v2_free_file_info(struct super_block *sb, int type) { - if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace)) - return v2_delete_dquot(dquot); + kfree(sb_dqinfo(sb, type)->dqi_priv); return 0; } @@ -673,7 +210,7 @@ static struct quota_format_ops v2_format_ops = { .check_quota_file = v2_check_quota_file, .read_file_info = v2_read_file_info, .write_file_info = v2_write_file_info, - .free_file_info = NULL, + .free_file_info = v2_free_file_info, .read_dqblk = v2_read_dquot, .commit_dqblk = v2_write_dquot, .release_dqblk = v2_release_dquot, diff --git a/include/linux/quotaio_v1.h b/fs/quotaio_v1.h index 746654b5de70..746654b5de70 100644 --- a/include/linux/quotaio_v1.h +++ b/fs/quotaio_v1.h diff --git a/include/linux/quotaio_v2.h b/fs/quotaio_v2.h index 303d7cbe30d4..530fe580685c 100644 --- a/include/linux/quotaio_v2.h +++ b/fs/quotaio_v2.h @@ -21,6 +21,12 @@ 0 /* GRPQUOTA */\ } +/* First generic header */ +struct v2_disk_dqheader { + __le32 dqh_magic; /* Magic number identifying file */ + __le32 dqh_version; /* File version */ +}; + /* * The following structure defines the format of the disk quota file * (as it appears on disk) - the file is a radix tree whose leaves point @@ -38,15 +44,6 @@ struct v2_disk_dqblk { __le64 dqb_itime; /* time limit for excessive inode use */ }; -/* - * Here are header structures as written on disk and their in-memory copies - */ -/* First generic header */ -struct v2_disk_dqheader { - __le32 dqh_magic; /* Magic number identifying file */ - __le32 dqh_version; /* File version */ -}; - /* Header with type and version specific information */ struct v2_disk_dqinfo { __le32 dqi_bgrace; /* Time before block soft limit becomes hard limit */ @@ -57,23 +54,7 @@ struct v2_disk_dqinfo { __le32 dqi_free_entry; /* Number of block with at least one free entry */ }; -/* - * Structure of header of block with quota structures. It is padded to 16 bytes so - * there will be space for exactly 21 quota-entries in a block - */ -struct v2_disk_dqdbheader { - __le32 dqdh_next_free; /* Number of next block with free entry */ - __le32 dqdh_prev_free; /* Number of previous block with free entry */ - __le16 dqdh_entries; /* Number of valid entries in block */ - __le16 dqdh_pad1; - __le32 dqdh_pad2; -}; - #define V2_DQINFOOFF sizeof(struct v2_disk_dqheader) /* Offset of info header in file */ -#define V2_DQBLKSIZE_BITS 10 -#define V2_DQBLKSIZE (1 << V2_DQBLKSIZE_BITS) /* Size of block with quota structures */ -#define V2_DQTREEOFF 1 /* Offset of tree in file in blocks */ -#define V2_DQTREEDEPTH 4 /* Depth of quota tree */ -#define V2_DQSTRINBLK ((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk)) /* Number of entries in one blocks */ +#define V2_DQBLKSIZE_BITS 10 /* Size of leaf block in tree */ #endif /* _LINUX_QUOTAIO_V2_H */ diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 0d4c1a2f0f74..f863769fa8aa 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -657,6 +657,8 @@ static struct dquot_operations reiserfs_quota_operations = { .release_dquot = reiserfs_release_dquot, .mark_dirty = reiserfs_mark_dquot_dirty, .write_info = reiserfs_write_info, + .alloc_dquot = dquot_alloc, + .destroy_dquot = dquot_destroy, }; static struct quotactl_ops reiserfs_qctl_operations = { @@ -1002,8 +1004,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin if (c == 'u' || c == 'g') { int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; - if ((sb_any_quota_enabled(s) || - sb_any_quota_suspended(s)) && + if (sb_any_quota_loaded(s) && (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) { reiserfs_warning(s, "reiserfs_parse_options: cannot change journaled quota options when quota turned on."); @@ -1049,8 +1050,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin "reiserfs_parse_options: unknown quota format specified."); return 0; } - if ((sb_any_quota_enabled(s) || - sb_any_quota_suspended(s)) && + if (sb_any_quota_loaded(s) && *qfmt != REISERFS_SB(s)->s_jquota_fmt) { reiserfs_warning(s, "reiserfs_parse_options: cannot change journaled quota options when quota turned on."); @@ -1075,7 +1075,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin } /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ if (!(*mount_options & (1 << REISERFS_QUOTA)) - && sb_any_quota_enabled(s)) { + && sb_any_quota_loaded(s)) { reiserfs_warning(s, "reiserfs_parse_options: quota options must be present when quota is turned on."); return 0; diff --git a/include/linux/Kbuild b/include/linux/Kbuild index e531783e5d78..4c32642678f8 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -56,8 +56,6 @@ header-y += dlm_device.h header-y += dlm_netlink.h header-y += dm-ioctl.h header-y += dn.h -header-y += dqblk_v1.h -header-y += dqblk_v2.h header-y += dqblk_xfs.h header-y += efs_fs_sb.h header-y += elf-fdpic.h @@ -134,8 +132,6 @@ header-y += posix_types.h header-y += ppdev.h header-y += prctl.h header-y += qnxtypes.h -header-y += quotaio_v1.h -header-y += quotaio_v2.h header-y += radeonfb.h header-y += raw.h header-y += resource.h diff --git a/include/linux/dqblk_qtree.h b/include/linux/dqblk_qtree.h new file mode 100644 index 000000000000..82a16527b367 --- /dev/null +++ b/include/linux/dqblk_qtree.h @@ -0,0 +1,56 @@ +/* + * Definitions of structures and functions for quota formats using trie + */ + +#ifndef _LINUX_DQBLK_QTREE_H +#define _LINUX_DQBLK_QTREE_H + +#include <linux/types.h> + +/* Numbers of blocks needed for updates - we count with the smallest + * possible block size (1024) */ +#define QTREE_INIT_ALLOC 4 +#define QTREE_INIT_REWRITE 2 +#define QTREE_DEL_ALLOC 0 +#define QTREE_DEL_REWRITE 6 + +struct dquot; + +/* Operations */ +struct qtree_fmt_operations { + void (*mem2disk_dqblk)(void *disk, struct dquot *dquot); /* Convert given entry from in memory format to disk one */ + void (*disk2mem_dqblk)(struct dquot *dquot, void *disk); /* Convert given entry from disk format to in memory one */ + int (*is_id)(void *disk, struct dquot *dquot); /* Is this structure for given id? */ +}; + +/* Inmemory copy of version specific information */ +struct qtree_mem_dqinfo { + struct super_block *dqi_sb; /* Sb quota is on */ + int dqi_type; /* Quota type */ + unsigned int dqi_blocks; /* # of blocks in quota file */ + unsigned int dqi_free_blk; /* First block in list of free blocks */ + unsigned int dqi_free_entry; /* First block with free entry */ + unsigned int dqi_blocksize_bits; /* Block size of quota file */ + unsigned int dqi_entry_size; /* Size of quota entry in quota file */ + unsigned int dqi_usable_bs; /* Space usable in block for quota data */ + unsigned int dqi_qtree_depth; /* Precomputed depth of quota tree */ + struct qtree_fmt_operations *dqi_ops; /* Operations for entry manipulation */ +}; + +int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); +int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); +int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); +int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot); +int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk); +static inline int qtree_depth(struct qtree_mem_dqinfo *info) +{ + unsigned int epb = info->dqi_usable_bs >> 2; + unsigned long long entries = epb; + int i; + + for (i = 1; entries < (1ULL << 32); i++) + entries *= epb; + return i; +} + +#endif /* _LINUX_DQBLK_QTREE_H */ diff --git a/include/linux/dqblk_v1.h b/include/linux/dqblk_v1.h index 57f1250d5a52..3713a7232dd8 100644 --- a/include/linux/dqblk_v1.h +++ b/include/linux/dqblk_v1.h @@ -5,9 +5,6 @@ #ifndef _LINUX_DQBLK_V1_H #define _LINUX_DQBLK_V1_H -/* Id of quota format */ -#define QFMT_VFS_OLD 1 - /* Root squash turned on */ #define V1_DQF_RSQUASH 1 @@ -17,8 +14,4 @@ #define V1_DEL_ALLOC 0 #define V1_DEL_REWRITE 2 -/* Special information about quotafile */ -struct v1_mem_dqinfo { -}; - #endif /* _LINUX_DQBLK_V1_H */ diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h index 4f853322cb7f..18000a542677 100644 --- a/include/linux/dqblk_v2.h +++ b/include/linux/dqblk_v2.h @@ -1,26 +1,16 @@ /* - * Definitions of structures for vfsv0 quota format + * Definitions for vfsv0 quota format */ #ifndef _LINUX_DQBLK_V2_H #define _LINUX_DQBLK_V2_H -#include <linux/types.h> - -/* id numbers of quota format */ -#define QFMT_VFS_V0 2 +#include <linux/dqblk_qtree.h> /* Numbers of blocks needed for updates */ -#define V2_INIT_ALLOC 4 -#define V2_INIT_REWRITE 2 -#define V2_DEL_ALLOC 0 -#define V2_DEL_REWRITE 6 - -/* Inmemory copy of version specific information */ -struct v2_mem_dqinfo { - unsigned int dqi_blocks; - unsigned int dqi_free_blk; - unsigned int dqi_free_entry; -}; +#define V2_INIT_ALLOC QTREE_INIT_ALLOC +#define V2_INIT_REWRITE QTREE_INIT_REWRITE +#define V2_DEL_ALLOC QTREE_DEL_ALLOC +#define V2_DEL_REWRITE QTREE_DEL_REWRITE #endif /* _LINUX_DQBLK_V2_H */ diff --git a/include/linux/quota.h b/include/linux/quota.h index 40401b554484..d72d5d84fde5 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -36,17 +36,7 @@ #include <linux/errno.h> #include <linux/types.h> -#define __DQUOT_VERSION__ "dquot_6.5.1" -#define __DQUOT_NUM_VERSION__ 6*10000+5*100+1 - -/* Size of blocks in which are counted size limits */ -#define QUOTABLOCK_BITS 10 -#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS) - -/* Conversion routines from and to quota blocks */ -#define qb2kb(x) ((x) << (QUOTABLOCK_BITS-10)) -#define kb2qb(x) ((x) >> (QUOTABLOCK_BITS-10)) -#define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS) +#define __DQUOT_VERSION__ "dquot_6.5.2" #define MAXQUOTAS 2 #define USRQUOTA 0 /* element used for user quotas */ @@ -80,16 +70,34 @@ #define Q_GETQUOTA 0x800007 /* get user quota structure */ #define Q_SETQUOTA 0x800008 /* set user quota structure */ +/* Quota format type IDs */ +#define QFMT_VFS_OLD 1 +#define QFMT_VFS_V0 2 + +/* Size of block in which space limits are passed through the quota + * interface */ +#define QIF_DQBLKSIZE_BITS 10 +#define QIF_DQBLKSIZE (1 << QIF_DQBLKSIZE_BITS) + /* * Quota structure used for communication with userspace via quotactl * Following flags are used to specify which fields are valid */ -#define QIF_BLIMITS 1 -#define QIF_SPACE 2 -#define QIF_ILIMITS 4 -#define QIF_INODES 8 -#define QIF_BTIME 16 -#define QIF_ITIME 32 +enum { + QIF_BLIMITS_B = 0, + QIF_SPACE_B, + QIF_ILIMITS_B, + QIF_INODES_B, + QIF_BTIME_B, + QIF_ITIME_B, +}; + +#define QIF_BLIMITS (1 << QIF_BLIMITS_B) +#define QIF_SPACE (1 << QIF_SPACE_B) +#define QIF_ILIMITS (1 << QIF_ILIMITS_B) +#define QIF_INODES (1 << QIF_INODES_B) +#define QIF_BTIME (1 << QIF_BTIME_B) +#define QIF_ITIME (1 << QIF_ITIME_B) #define QIF_LIMITS (QIF_BLIMITS | QIF_ILIMITS) #define QIF_USAGE (QIF_SPACE | QIF_INODES) #define QIF_TIMES (QIF_BTIME | QIF_ITIME) @@ -172,7 +180,7 @@ enum { #include <asm/atomic.h> typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */ -typedef __u64 qsize_t; /* Type in which we store sizes */ +typedef long long qsize_t; /* Type in which we store sizes */ extern spinlock_t dq_data_lock; @@ -187,12 +195,12 @@ extern spinlock_t dq_data_lock; * Data for one user/group kept in memory */ struct mem_dqblk { - __u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */ - __u32 dqb_bsoftlimit; /* preferred limit on disk blks */ + qsize_t dqb_bhardlimit; /* absolute limit on disk blks alloc */ + qsize_t dqb_bsoftlimit; /* preferred limit on disk blks */ qsize_t dqb_curspace; /* current used space */ - __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */ - __u32 dqb_isoftlimit; /* preferred inode limit */ - __u32 dqb_curinodes; /* current # allocated inodes */ + qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */ + qsize_t dqb_isoftlimit; /* preferred inode limit */ + qsize_t dqb_curinodes; /* current # allocated inodes */ time_t dqb_btime; /* time limit for excessive disk use */ time_t dqb_itime; /* time limit for excessive inode use */ }; @@ -212,10 +220,7 @@ struct mem_dqinfo { unsigned int dqi_igrace; qsize_t dqi_maxblimit; qsize_t dqi_maxilimit; - union { - struct v1_mem_dqinfo v1_i; - struct v2_mem_dqinfo v2_i; - } u; + void *dqi_priv; }; struct super_block; @@ -249,6 +254,11 @@ extern struct dqstats dqstats; #define DQ_FAKE_B 3 /* no limits only usage */ #define DQ_READ_B 4 /* dquot was read into memory */ #define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ +#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\ + * for the mask of entries set via SETQUOTA\ + * quotactl. They are set under dq_data_lock\ + * and the quota format handling dquot can\ + * clear them when it sees fit. */ struct dquot { struct hlist_node dq_hash; /* Hash list in memory */ @@ -287,11 +297,13 @@ struct dquot_operations { int (*initialize) (struct inode *, int); int (*drop) (struct inode *); int (*alloc_space) (struct inode *, qsize_t, int); - int (*alloc_inode) (const struct inode *, unsigned long); + int (*alloc_inode) (const struct inode *, qsize_t); int (*free_space) (struct inode *, qsize_t); - int (*free_inode) (const struct inode *, unsigned long); + int (*free_inode) (const struct inode *, qsize_t); int (*transfer) (struct inode *, struct iattr *); int (*write_dquot) (struct dquot *); /* Ordinary dquot write */ + struct dquot *(*alloc_dquot)(struct super_block *, int); /* Allocate memory for new dquot */ + void (*destroy_dquot)(struct dquot *); /* Free memory for dquot */ int (*acquire_dquot) (struct dquot *); /* Quota is going to be created on disk */ int (*release_dquot) (struct dquot *); /* Quota is going to be deleted from disk */ int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */ @@ -320,12 +332,42 @@ struct quota_format_type { struct quota_format_type *qf_next; }; -#define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ -#define DQUOT_GRP_ENABLED 0x02 /* Group diskquotas enabled */ -#define DQUOT_USR_SUSPENDED 0x04 /* User diskquotas are off, but +/* Quota state flags - they actually come in two flavors - for users and groups */ +enum { + _DQUOT_USAGE_ENABLED = 0, /* Track disk usage for users */ + _DQUOT_LIMITS_ENABLED, /* Enforce quota limits for users */ + _DQUOT_SUSPENDED, /* User diskquotas are off, but * we have necessary info in * memory to turn them on */ -#define DQUOT_GRP_SUSPENDED 0x08 /* The same for group quotas */ + _DQUOT_STATE_FLAGS +}; +#define DQUOT_USAGE_ENABLED (1 << _DQUOT_USAGE_ENABLED) +#define DQUOT_LIMITS_ENABLED (1 << _DQUOT_LIMITS_ENABLED) +#define DQUOT_SUSPENDED (1 << _DQUOT_SUSPENDED) +#define DQUOT_STATE_FLAGS (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \ + DQUOT_SUSPENDED) +/* Other quota flags */ +#define DQUOT_QUOTA_SYS_FILE (1 << 6) /* Quota file is a special + * system file and user cannot + * touch it. Filesystem is + * responsible for setting + * S_NOQUOTA, S_NOATIME flags + */ +#define DQUOT_NEGATIVE_USAGE (1 << 7) /* Allow negative quota usage */ + +static inline unsigned int dquot_state_flag(unsigned int flags, int type) +{ + if (type == USRQUOTA) + return flags; + return flags << _DQUOT_STATE_FLAGS; +} + +static inline unsigned int dquot_generic_flag(unsigned int flags, int type) +{ + if (type == USRQUOTA) + return flags; + return flags >> _DQUOT_STATE_FLAGS; +} struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index a558a4c1d35a..21b781a3350f 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -24,12 +24,21 @@ void sync_dquots(struct super_block *sb, int type); int dquot_initialize(struct inode *inode, int type); int dquot_drop(struct inode *inode); +int dquot_drop_locked(struct inode *inode); +struct dquot *dqget(struct super_block *sb, unsigned int id, int type); +void dqput(struct dquot *dquot); +int dquot_is_cached(struct super_block *sb, unsigned int id, int type); +int dquot_scan_active(struct super_block *sb, + int (*fn)(struct dquot *dquot, unsigned long priv), + unsigned long priv); +struct dquot *dquot_alloc(struct super_block *sb, int type); +void dquot_destroy(struct dquot *dquot); int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); -int dquot_alloc_inode(const struct inode *inode, unsigned long number); +int dquot_alloc_inode(const struct inode *inode, qsize_t number); int dquot_free_space(struct inode *inode, qsize_t number); -int dquot_free_inode(const struct inode *inode, unsigned long number); +int dquot_free_inode(const struct inode *inode, qsize_t number); int dquot_transfer(struct inode *inode, struct iattr *iattr); int dquot_commit(struct dquot *dquot); @@ -40,11 +49,14 @@ int dquot_mark_dquot_dirty(struct dquot *dquot); int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, int remount); +int vfs_quota_enable(struct inode *inode, int type, int format_id, + unsigned int flags); int vfs_quota_on_path(struct super_block *sb, int type, int format_id, struct path *path); int vfs_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type); int vfs_quota_off(struct super_block *sb, int type, int remount); +int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags); int vfs_quota_sync(struct super_block *sb, int type); int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); @@ -64,24 +76,22 @@ static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type) * Functions for checking status of quota */ -static inline int sb_has_quota_enabled(struct super_block *sb, int type) +static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type) { - if (type == USRQUOTA) - return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED; - return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED; + return sb_dqopt(sb)->flags & + dquot_state_flag(DQUOT_USAGE_ENABLED, type); } -static inline int sb_any_quota_enabled(struct super_block *sb) +static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type) { - return sb_has_quota_enabled(sb, USRQUOTA) || - sb_has_quota_enabled(sb, GRPQUOTA); + return sb_dqopt(sb)->flags & + dquot_state_flag(DQUOT_LIMITS_ENABLED, type); } static inline int sb_has_quota_suspended(struct super_block *sb, int type) { - if (type == USRQUOTA) - return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED; - return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED; + return sb_dqopt(sb)->flags & + dquot_state_flag(DQUOT_SUSPENDED, type); } static inline int sb_any_quota_suspended(struct super_block *sb) @@ -90,6 +100,31 @@ static inline int sb_any_quota_suspended(struct super_block *sb) sb_has_quota_suspended(sb, GRPQUOTA); } +/* Does kernel know about any quota information for given sb + type? */ +static inline int sb_has_quota_loaded(struct super_block *sb, int type) +{ + /* Currently if anything is on, then quota usage is on as well */ + return sb_has_quota_usage_enabled(sb, type); +} + +static inline int sb_any_quota_loaded(struct super_block *sb) +{ + return sb_has_quota_loaded(sb, USRQUOTA) || + sb_has_quota_loaded(sb, GRPQUOTA); +} + +static inline int sb_has_quota_active(struct super_block *sb, int type) +{ + return sb_has_quota_loaded(sb, type) && + !sb_has_quota_suspended(sb, type); +} + +static inline int sb_any_quota_active(struct super_block *sb) +{ + return sb_has_quota_active(sb, USRQUOTA) || + sb_has_quota_active(sb, GRPQUOTA); +} + /* * Operations supported for diskquotas. */ @@ -104,7 +139,7 @@ extern struct quotactl_ops vfs_quotactl_ops; static inline void vfs_dq_init(struct inode *inode) { BUG_ON(!inode->i_sb); - if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) + if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) inode->i_sb->dq_op->initialize(inode, -1); } @@ -112,7 +147,7 @@ static inline void vfs_dq_init(struct inode *inode) * a transaction (deadlocks possible otherwise) */ static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr) { - if (sb_any_quota_enabled(inode->i_sb)) { + if (sb_any_quota_active(inode->i_sb)) { /* Used space is updated in alloc_space() */ if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA) return 1; @@ -132,7 +167,7 @@ static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr) static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr) { - if (sb_any_quota_enabled(inode->i_sb)) { + if (sb_any_quota_active(inode->i_sb)) { /* Used space is updated in alloc_space() */ if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA) return 1; @@ -152,7 +187,7 @@ static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr) static inline int vfs_dq_alloc_inode(struct inode *inode) { - if (sb_any_quota_enabled(inode->i_sb)) { + if (sb_any_quota_active(inode->i_sb)) { vfs_dq_init(inode); if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) return 1; @@ -162,7 +197,7 @@ static inline int vfs_dq_alloc_inode(struct inode *inode) static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr) { - if (sb_any_quota_enabled(inode->i_sb)) + if (sb_any_quota_active(inode->i_sb)) inode->i_sb->dq_op->free_space(inode, nr); else inode_sub_bytes(inode, nr); @@ -176,7 +211,7 @@ static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr) static inline void vfs_dq_free_inode(struct inode *inode) { - if (sb_any_quota_enabled(inode->i_sb)) + if (sb_any_quota_active(inode->i_sb)) inode->i_sb->dq_op->free_inode(inode, 1); } @@ -197,12 +232,12 @@ static inline int vfs_dq_off(struct super_block *sb, int remount) #else -static inline int sb_has_quota_enabled(struct super_block *sb, int type) +static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type) { return 0; } -static inline int sb_any_quota_enabled(struct super_block *sb) +static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type) { return 0; } @@ -217,6 +252,27 @@ static inline int sb_any_quota_suspended(struct super_block *sb) return 0; } +/* Does kernel know about any quota information for given sb + type? */ +static inline int sb_has_quota_loaded(struct super_block *sb, int type) +{ + return 0; +} + +static inline int sb_any_quota_loaded(struct super_block *sb) +{ + return 0; +} + +static inline int sb_has_quota_active(struct super_block *sb, int type) +{ + return 0; +} + +static inline int sb_any_quota_active(struct super_block *sb) +{ + return 0; +} + /* * NO-OP when quota not configured. */ diff --git a/mm/pdflush.c b/mm/pdflush.c index a0a14c4d5072..13af84ddada7 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -223,6 +223,7 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0) return ret; } +EXPORT_SYMBOL(pdflush_operation); static void start_one_pdflush_thread(void) { |