summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2008-11-27 20:58:10 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2008-11-27 20:58:10 +1100
commitff9f78de9c27284081ba661774a4409cd99ae388 (patch)
tree6778f42046468a3c3188b060d8d7a746bca893ee
parent33eed11c04c90f6593b5f583509b4d0b4631fdc4 (diff)
parent5e7e135fc3919b7c862432b88ba3ec46b7e9ad70 (diff)
Merge commit 'ocfs2/linux-next'
Conflicts: fs/ocfs2/namei.c
-rw-r--r--Documentation/filesystems/ocfs2.txt6
-rw-r--r--fs/Kconfig18
-rw-r--r--fs/Makefile1
-rw-r--r--fs/dquot.c436
-rw-r--r--fs/ext3/super.c16
-rw-r--r--fs/ext4/super.c15
-rw-r--r--fs/ocfs2/Makefile6
-rw-r--r--fs/ocfs2/acl.c479
-rw-r--r--fs/ocfs2/acl.h58
-rw-r--r--fs/ocfs2/alloc.c399
-rw-r--r--fs/ocfs2/alloc.h21
-rw-r--r--fs/ocfs2/aops.c51
-rw-r--r--fs/ocfs2/buffer_head_io.c47
-rw-r--r--fs/ocfs2/buffer_head_io.h27
-rw-r--r--fs/ocfs2/cluster/masklog.h1
-rw-r--r--fs/ocfs2/dir.c147
-rw-r--r--fs/ocfs2/dlm/dlmfs.c4
-rw-r--r--fs/ocfs2/dlm/userdlm.h2
-rw-r--r--fs/ocfs2/dlmglue.c161
-rw-r--r--fs/ocfs2/dlmglue.h19
-rw-r--r--fs/ocfs2/extent_map.c96
-rw-r--r--fs/ocfs2/extent_map.h24
-rw-r--r--fs/ocfs2/file.c193
-rw-r--r--fs/ocfs2/file.h3
-rw-r--r--fs/ocfs2/inode.c144
-rw-r--r--fs/ocfs2/inode.h18
-rw-r--r--fs/ocfs2/journal.c176
-rw-r--r--fs/ocfs2/journal.h96
-rw-r--r--fs/ocfs2/localalloc.c8
-rw-r--r--fs/ocfs2/namei.c280
-rw-r--r--fs/ocfs2/ocfs2.h33
-rw-r--r--fs/ocfs2/ocfs2_fs.h126
-rw-r--r--fs/ocfs2/ocfs2_jbd_compat.h82
-rw-r--r--fs/ocfs2/ocfs2_lockid.h5
-rw-r--r--fs/ocfs2/quota.h117
-rw-r--r--fs/ocfs2/quota_global.c990
-rw-r--r--fs/ocfs2/quota_local.c1253
-rw-r--r--fs/ocfs2/resize.c60
-rw-r--r--fs/ocfs2/slot_map.c4
-rw-r--r--fs/ocfs2/stack_user.c3
-rw-r--r--fs/ocfs2/suballoc.c278
-rw-r--r--fs/ocfs2/suballoc.h18
-rw-r--r--fs/ocfs2/super.c310
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/ocfs2/xattr.c2267
-rw-r--r--fs/ocfs2/xattr.h31
-rw-r--r--fs/quota.c11
-rw-r--r--fs/quota_tree.c645
-rw-r--r--fs/quota_tree.h25
-rw-r--r--fs/quota_v1.c28
-rw-r--r--fs/quota_v2.c631
-rw-r--r--fs/quotaio_v1.h (renamed from include/linux/quotaio_v1.h)0
-rw-r--r--fs/quotaio_v2.h (renamed from include/linux/quotaio_v2.h)33
-rw-r--r--fs/reiserfs/super.c10
-rw-r--r--include/linux/Kbuild4
-rw-r--r--include/linux/dqblk_qtree.h56
-rw-r--r--include/linux/dqblk_v1.h7
-rw-r--r--include/linux/dqblk_v2.h22
-rw-r--r--include/linux/quota.h108
-rw-r--r--include/linux/quotaops.h96
-rw-r--r--mm/pdflush.c1
61 files changed, 7656 insertions, 2552 deletions
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt
index 4340cc825796..c2a0871280a0 100644
--- a/Documentation/filesystems/ocfs2.txt
+++ b/Documentation/filesystems/ocfs2.txt
@@ -28,13 +28,9 @@ Manish Singh <manish.singh@oracle.com>
Caveats
=======
Features which OCFS2 does not support yet:
- - extended attributes
- quotas
- - cluster aware flock
- - cluster aware lockf
- Directory change notification (F_NOTIFY)
- Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
- - POSIX ACLs
Mount options
=============
@@ -82,3 +78,5 @@ inode64 Indicates that Ocfs2 is allowed to create inodes at
bits of significance.
user_xattr (*) Enables Extended User Attributes.
nouser_xattr Disables Extended User Attributes.
+acl Enables POSIX Access Control Lists support.
+noacl (*) Disables POSIX Access Control Lists support.
diff --git a/fs/Kconfig b/fs/Kconfig
index 522469a7eca3..107f1cda2adc 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -189,6 +189,8 @@ config OCFS2_FS
select CONFIGFS_FS
select JBD2
select CRC32
+ select QUOTA
+ select QUOTA_TREE
help
OCFS2 is a general purpose extent based shared disk cluster file
system with many similarities to ext3. It supports 64 bit inode
@@ -258,15 +260,14 @@ config OCFS2_DEBUG_FS
this option for debugging only as it is likely to decrease
performance of the filesystem.
-config OCFS2_COMPAT_JBD
- bool "Use JBD for compatibility"
+config OCFS2_FS_POSIX_ACL
+ bool "OCFS2 POSIX Access Control Lists"
depends on OCFS2_FS
+ select FS_POSIX_ACL
default n
- select JBD
help
- The ocfs2 filesystem now uses JBD2 for its journalling. JBD2
- is backwards compatible with JBD. It is safe to say N here.
- However, if you really want to use the original JBD, say Y here.
+ Posix Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
endif # BLOCK
@@ -340,6 +341,10 @@ config PRINT_QUOTA_WARNING
Note that this behavior is currently deprecated and may go away in
future. Please use notification via netlink socket instead.
+# Generic support for tree structured quota files. Seleted when needed.
+config QUOTA_TREE
+ tristate
+
config QFMT_V1
tristate "Old quota format support"
depends on QUOTA
@@ -351,6 +356,7 @@ config QFMT_V1
config QFMT_V2
tristate "Quota format v2 support"
depends on QUOTA
+ select QUOTA_TREE
help
This quota format allows using quotas with 32-bit UIDs/GIDs. If you
need this functionality say Y here.
diff --git a/fs/Makefile b/fs/Makefile
index d9f8afe6f0c4..cdf655640594 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -55,6 +55,7 @@ obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
obj-$(CONFIG_QUOTA) += dquot.o
obj-$(CONFIG_QFMT_V1) += quota_v1.o
obj-$(CONFIG_QFMT_V2) += quota_v2.o
+obj-$(CONFIG_QUOTA_TREE) += quota_tree.o
obj-$(CONFIG_QUOTACTL) += quota.o
obj-$(CONFIG_DNOTIFY) += dnotify.o
diff --git a/fs/dquot.c b/fs/dquot.c
index c237ccc8581c..61bfff64e5af 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -211,8 +211,6 @@ static struct hlist_head *dquot_hash;
struct dqstats dqstats;
-static void dqput(struct dquot *dquot);
-
static inline unsigned int
hashfn(const struct super_block *sb, unsigned int id, int type)
{
@@ -415,6 +413,17 @@ out_dqlock:
return ret;
}
+void dquot_destroy(struct dquot *dquot)
+{
+ kmem_cache_free(dquot_cachep, dquot);
+}
+EXPORT_SYMBOL(dquot_destroy);
+
+static inline void do_destroy_dquot(struct dquot *dquot)
+{
+ dquot->dq_sb->dq_op->destroy_dquot(dquot);
+}
+
/* Invalidate all dquots on the list. Note that this function is called after
* quota is disabled and pointers from inodes removed so there cannot be new
* quota users. There can still be some users of quotas due to inodes being
@@ -463,9 +472,44 @@ restart:
remove_dquot_hash(dquot);
remove_free_dquot(dquot);
remove_inuse(dquot);
- kmem_cache_free(dquot_cachep, dquot);
+ do_destroy_dquot(dquot);
+ }
+ spin_unlock(&dq_list_lock);
+}
+
+/* Call callback for every active dquot on given filesystem */
+int dquot_scan_active(struct super_block *sb,
+ int (*fn)(struct dquot *dquot, unsigned long priv),
+ unsigned long priv)
+{
+ struct dquot *dquot, *old_dquot = NULL;
+ int ret = 0;
+
+ mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+ spin_lock(&dq_list_lock);
+ list_for_each_entry(dquot, &inuse_list, dq_inuse) {
+ if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+ continue;
+ if (dquot->dq_sb != sb)
+ continue;
+ /* Now we have active dquot so we can just increase use count */
+ atomic_inc(&dquot->dq_count);
+ dqstats.lookups++;
+ spin_unlock(&dq_list_lock);
+ dqput(old_dquot);
+ old_dquot = dquot;
+ ret = fn(dquot, priv);
+ if (ret < 0)
+ goto out;
+ spin_lock(&dq_list_lock);
+ /* We are safe to continue now because our dquot could not
+ * be moved out of the inuse list while we hold the reference */
}
spin_unlock(&dq_list_lock);
+out:
+ dqput(old_dquot);
+ mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+ return ret;
}
int vfs_quota_sync(struct super_block *sb, int type)
@@ -479,7 +523,7 @@ int vfs_quota_sync(struct super_block *sb, int type)
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
continue;
- if (!sb_has_quota_enabled(sb, cnt))
+ if (!sb_has_quota_active(sb, cnt))
continue;
spin_lock(&dq_list_lock);
dirty = &dqopt->info[cnt].dqi_dirty_list;
@@ -504,8 +548,8 @@ int vfs_quota_sync(struct super_block *sb, int type)
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt)
- && info_dirty(&dqopt->info[cnt]))
+ if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
+ && info_dirty(&dqopt->info[cnt]))
sb->dq_op->write_info(sb, cnt);
spin_lock(&dq_list_lock);
dqstats.syncs++;
@@ -527,7 +571,7 @@ static void prune_dqcache(int count)
remove_dquot_hash(dquot);
remove_free_dquot(dquot);
remove_inuse(dquot);
- kmem_cache_free(dquot_cachep, dquot);
+ do_destroy_dquot(dquot);
count--;
head = free_dquots.prev;
}
@@ -558,7 +602,7 @@ static struct shrinker dqcache_shrinker = {
* NOTE: If you change this function please check whether dqput_blocks() works right...
* MUST be called with either dqptr_sem or dqonoff_mutex held
*/
-static void dqput(struct dquot *dquot)
+void dqput(struct dquot *dquot)
{
int ret;
@@ -584,7 +628,7 @@ we_slept:
/* We have more than one user... nothing to do */
atomic_dec(&dquot->dq_count);
/* Releasing dquot during quotaoff phase? */
- if (!sb_has_quota_enabled(dquot->dq_sb, dquot->dq_type) &&
+ if (!sb_has_quota_active(dquot->dq_sb, dquot->dq_type) &&
atomic_read(&dquot->dq_count) == 1)
wake_up(&dquot->dq_wait_unused);
spin_unlock(&dq_list_lock);
@@ -625,11 +669,17 @@ we_slept:
spin_unlock(&dq_list_lock);
}
+struct dquot *dquot_alloc(struct super_block *sb, int type)
+{
+ return kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
+}
+EXPORT_SYMBOL(dquot_alloc);
+
static struct dquot *get_empty_dquot(struct super_block *sb, int type)
{
struct dquot *dquot;
- dquot = kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
+ dquot = sb->dq_op->alloc_dquot(sb, type);
if(!dquot)
return NODQUOT;
@@ -647,15 +697,33 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
}
/*
+ * Check whether dquot is in memory.
+ * MUST be called with either dqptr_sem or dqonoff_mutex held
+ */
+int dquot_is_cached(struct super_block *sb, unsigned int id, int type)
+{
+ unsigned int hashent = hashfn(sb, id, type);
+ int ret = 0;
+
+ if (!sb_has_quota_active(sb, type))
+ return 0;
+ spin_lock(&dq_list_lock);
+ if (find_dquot(hashent, sb, id, type) != NODQUOT)
+ ret = 1;
+ spin_unlock(&dq_list_lock);
+ return ret;
+}
+
+/*
* Get reference to dquot
* MUST be called with either dqptr_sem or dqonoff_mutex held
*/
-static struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
+struct dquot *dqget(struct super_block *sb, unsigned int id, int type)
{
unsigned int hashent = hashfn(sb, id, type);
struct dquot *dquot, *empty = NODQUOT;
- if (!sb_has_quota_enabled(sb, type))
+ if (!sb_has_quota_active(sb, type))
return NODQUOT;
we_slept:
spin_lock(&dq_list_lock);
@@ -682,7 +750,7 @@ we_slept:
dqstats.lookups++;
spin_unlock(&dq_list_lock);
if (empty)
- kmem_cache_free(dquot_cachep, empty);
+ do_destroy_dquot(empty);
}
/* Wait for dq_lock - after this we know that either dquot_release() is already
* finished or it will be canceled due to dq_count > 1 test */
@@ -820,7 +888,7 @@ static void drop_dquot_ref(struct super_block *sb, int type)
}
}
-static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
+static inline void dquot_incr_inodes(struct dquot *dquot, qsize_t number)
{
dquot->dq_dqb.dqb_curinodes += number;
}
@@ -830,9 +898,10 @@ static inline void dquot_incr_space(struct dquot *dquot, qsize_t number)
dquot->dq_dqb.dqb_curspace += number;
}
-static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number)
+static inline void dquot_decr_inodes(struct dquot *dquot, qsize_t number)
{
- if (dquot->dq_dqb.dqb_curinodes > number)
+ if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
+ dquot->dq_dqb.dqb_curinodes >= number)
dquot->dq_dqb.dqb_curinodes -= number;
else
dquot->dq_dqb.dqb_curinodes = 0;
@@ -843,11 +912,12 @@ static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number)
static inline void dquot_decr_space(struct dquot *dquot, qsize_t number)
{
- if (dquot->dq_dqb.dqb_curspace > number)
+ if (sb_dqopt(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE ||
+ dquot->dq_dqb.dqb_curspace >= number)
dquot->dq_dqb.dqb_curspace -= number;
else
dquot->dq_dqb.dqb_curspace = 0;
- if (toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+ if (dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
dquot->dq_dqb.dqb_btime = (time_t) 0;
clear_bit(DQ_BLKS_B, &dquot->dq_flags);
}
@@ -1023,10 +1093,11 @@ static inline char ignore_hardlimit(struct dquot *dquot)
}
/* needs dq_data_lock */
-static int check_idq(struct dquot *dquot, ulong inodes, char *warntype)
+static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype)
{
*warntype = QUOTA_NL_NOWARN;
- if (inodes <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
+ if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
+ test_bit(DQ_FAKE_B, &dquot->dq_flags))
return QUOTA_OK;
if (dquot->dq_dqb.dqb_ihardlimit &&
@@ -1058,11 +1129,12 @@ static int check_idq(struct dquot *dquot, ulong inodes, char *warntype)
static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype)
{
*warntype = QUOTA_NL_NOWARN;
- if (space <= 0 || test_bit(DQ_FAKE_B, &dquot->dq_flags))
+ if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type) ||
+ test_bit(DQ_FAKE_B, &dquot->dq_flags))
return QUOTA_OK;
if (dquot->dq_dqb.dqb_bhardlimit &&
- toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bhardlimit &&
+ dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bhardlimit &&
!ignore_hardlimit(dquot)) {
if (!prealloc)
*warntype = QUOTA_NL_BHARDWARN;
@@ -1070,7 +1142,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
- toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit &&
+ dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_btime && get_seconds() >= dquot->dq_dqb.dqb_btime &&
!ignore_hardlimit(dquot)) {
if (!prealloc)
@@ -1079,7 +1151,7 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
}
if (dquot->dq_dqb.dqb_bsoftlimit &&
- toqb(dquot->dq_dqb.dqb_curspace + space) > dquot->dq_dqb.dqb_bsoftlimit &&
+ dquot->dq_dqb.dqb_curspace + space > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_btime == 0) {
if (!prealloc) {
*warntype = QUOTA_NL_BSOFTWARN;
@@ -1096,10 +1168,11 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
return QUOTA_OK;
}
-static int info_idq_free(struct dquot *dquot, ulong inodes)
+static int info_idq_free(struct dquot *dquot, qsize_t inodes)
{
if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
- dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
+ dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit ||
+ !sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_type))
return QUOTA_NL_NOWARN;
if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
@@ -1113,15 +1186,13 @@ static int info_idq_free(struct dquot *dquot, ulong inodes)
static int info_bdq_free(struct dquot *dquot, qsize_t space)
{
if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
- toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
+ dquot->dq_dqb.dqb_curspace <= dquot->dq_dqb.dqb_bsoftlimit)
return QUOTA_NL_NOWARN;
- if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
- dquot->dq_dqb.dqb_bsoftlimit)
+ if (dquot->dq_dqb.dqb_curspace - space <= dquot->dq_dqb.dqb_bsoftlimit)
return QUOTA_NL_BSOFTBELOW;
- if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
- toqb(dquot->dq_dqb.dqb_curspace - space) <
- dquot->dq_dqb.dqb_bhardlimit)
+ if (dquot->dq_dqb.dqb_curspace >= dquot->dq_dqb.dqb_bhardlimit &&
+ dquot->dq_dqb.dqb_curspace - space < dquot->dq_dqb.dqb_bhardlimit)
return QUOTA_NL_BHARDBELOW;
return QUOTA_NL_NOWARN;
}
@@ -1166,17 +1237,23 @@ out_err:
* Release all quotas referenced by inode
* Transaction must be started at an entry
*/
-int dquot_drop(struct inode *inode)
+int dquot_drop_locked(struct inode *inode)
{
int cnt;
- down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt] != NODQUOT) {
dqput(inode->i_dquot[cnt]);
inode->i_dquot[cnt] = NODQUOT;
}
}
+ return 0;
+}
+
+int dquot_drop(struct inode *inode)
+{
+ down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ dquot_drop_locked(inode);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
return 0;
}
@@ -1264,7 +1341,7 @@ warn_put_all:
/*
* This operation can block, but only after everything is updated
*/
-int dquot_alloc_inode(const struct inode *inode, unsigned long number)
+int dquot_alloc_inode(const struct inode *inode, qsize_t number)
{
int cnt, ret = NO_QUOTA;
char warntype[MAXQUOTAS];
@@ -1349,7 +1426,7 @@ out_sub:
/*
* This operation can block, but only after everything is updated
*/
-int dquot_free_inode(const struct inode *inode, unsigned long number)
+int dquot_free_inode(const struct inode *inode, qsize_t number)
{
unsigned int cnt;
char warntype[MAXQUOTAS];
@@ -1495,7 +1572,7 @@ warn_put_all:
/* Wrapper for transferring ownership of an inode */
int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
{
- if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
+ if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
vfs_dq_init(inode);
if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
return 1;
@@ -1533,54 +1610,27 @@ struct dquot_operations dquot_operations = {
.acquire_dquot = dquot_acquire,
.release_dquot = dquot_release,
.mark_dirty = dquot_mark_dquot_dirty,
- .write_info = dquot_commit_info
+ .write_info = dquot_commit_info,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
};
-static inline void set_enable_flags(struct quota_info *dqopt, int type)
-{
- switch (type) {
- case USRQUOTA:
- dqopt->flags |= DQUOT_USR_ENABLED;
- dqopt->flags &= ~DQUOT_USR_SUSPENDED;
- break;
- case GRPQUOTA:
- dqopt->flags |= DQUOT_GRP_ENABLED;
- dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
- break;
- }
-}
-
-static inline void reset_enable_flags(struct quota_info *dqopt, int type,
- int remount)
-{
- switch (type) {
- case USRQUOTA:
- dqopt->flags &= ~DQUOT_USR_ENABLED;
- if (remount)
- dqopt->flags |= DQUOT_USR_SUSPENDED;
- else
- dqopt->flags &= ~DQUOT_USR_SUSPENDED;
- break;
- case GRPQUOTA:
- dqopt->flags &= ~DQUOT_GRP_ENABLED;
- if (remount)
- dqopt->flags |= DQUOT_GRP_SUSPENDED;
- else
- dqopt->flags &= ~DQUOT_GRP_SUSPENDED;
- break;
- }
-}
-
-
/*
* Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
*/
-int vfs_quota_off(struct super_block *sb, int type, int remount)
+int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
{
int cnt, ret = 0;
struct quota_info *dqopt = sb_dqopt(sb);
struct inode *toputinode[MAXQUOTAS];
+ /* Cannot turn off usage accounting without turning off limits, or
+ * suspend quotas and simultaneously turn quotas off. */
+ if ((flags & DQUOT_USAGE_ENABLED && !(flags & DQUOT_LIMITS_ENABLED))
+ || (flags & DQUOT_SUSPENDED && flags & (DQUOT_LIMITS_ENABLED |
+ DQUOT_USAGE_ENABLED)))
+ return -EINVAL;
+
/* We need to serialize quota_off() for device */
mutex_lock(&dqopt->dqonoff_mutex);
@@ -1589,7 +1639,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
* sometimes we are called when fill_super() failed and calling
* sync_fs() in such cases does no good.
*/
- if (!sb_any_quota_enabled(sb) && !sb_any_quota_suspended(sb)) {
+ if (!sb_any_quota_loaded(sb)) {
mutex_unlock(&dqopt->dqonoff_mutex);
return 0;
}
@@ -1597,17 +1647,28 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
toputinode[cnt] = NULL;
if (type != -1 && cnt != type)
continue;
- /* If we keep inodes of quota files after remount and quotaoff
- * is called, drop kept inodes. */
- if (!remount && sb_has_quota_suspended(sb, cnt)) {
- iput(dqopt->files[cnt]);
- dqopt->files[cnt] = NULL;
- reset_enable_flags(dqopt, cnt, 0);
+ if (!sb_has_quota_loaded(sb, cnt))
continue;
+
+ if (flags & DQUOT_SUSPENDED) {
+ dqopt->flags |=
+ dquot_state_flag(DQUOT_SUSPENDED, cnt);
+ } else {
+ dqopt->flags &= ~dquot_state_flag(flags, cnt);
+ /* Turning off suspended quotas? */
+ if (!sb_has_quota_loaded(sb, cnt) &&
+ sb_has_quota_suspended(sb, cnt)) {
+ dqopt->flags &= ~dquot_state_flag(
+ DQUOT_SUSPENDED, cnt);
+ iput(dqopt->files[cnt]);
+ dqopt->files[cnt] = NULL;
+ continue;
+ }
}
- if (!sb_has_quota_enabled(sb, cnt))
+
+ /* We still have to keep quota loaded? */
+ if (sb_has_quota_loaded(sb, cnt) && !(flags & DQUOT_SUSPENDED))
continue;
- reset_enable_flags(dqopt, cnt, remount);
/* Note: these are blocking operations */
drop_dquot_ref(sb, cnt);
@@ -1623,7 +1684,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
put_quota_format(dqopt->info[cnt].dqi_format);
toputinode[cnt] = dqopt->files[cnt];
- if (!remount)
+ if (!sb_has_quota_loaded(sb, cnt))
dqopt->files[cnt] = NULL;
dqopt->info[cnt].dqi_flags = 0;
dqopt->info[cnt].dqi_igrace = 0;
@@ -1631,6 +1692,11 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
dqopt->ops[cnt] = NULL;
}
mutex_unlock(&dqopt->dqonoff_mutex);
+
+ /* Skip syncing and setting flags if quota files are hidden */
+ if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
+ goto put_inodes;
+
/* Sync the superblock so that buffers with quota data are written to
* disk (and so userspace sees correct data afterwards). */
if (sb->s_op->sync_fs)
@@ -1646,7 +1712,7 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
mutex_lock(&dqopt->dqonoff_mutex);
/* If quota was reenabled in the meantime, we have
* nothing to do */
- if (!sb_has_quota_enabled(sb, cnt)) {
+ if (!sb_has_quota_loaded(sb, cnt)) {
mutex_lock_nested(&toputinode[cnt]->i_mutex, I_MUTEX_QUOTA);
toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
S_NOATIME | S_NOQUOTA);
@@ -1655,26 +1721,43 @@ int vfs_quota_off(struct super_block *sb, int type, int remount)
mark_inode_dirty(toputinode[cnt]);
}
mutex_unlock(&dqopt->dqonoff_mutex);
+ }
+ if (sb->s_bdev)
+ invalidate_bdev(sb->s_bdev);
+put_inodes:
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (toputinode[cnt]) {
/* On remount RO, we keep the inode pointer so that we
- * can reenable quota on the subsequent remount RW.
- * But we have better not keep inode pointer when there
- * is pending delete on the quota file... */
- if (!remount)
+ * can reenable quota on the subsequent remount RW. We
+ * have to check 'flags' variable and not use sb_has_
+ * function because another quotaon / quotaoff could
+ * change global state before we got here. We refuse
+ * to suspend quotas when there is pending delete on
+ * the quota file... */
+ if (!(flags & DQUOT_SUSPENDED))
iput(toputinode[cnt]);
else if (!toputinode[cnt]->i_nlink)
ret = -EBUSY;
}
- if (sb->s_bdev)
- invalidate_bdev(sb->s_bdev);
return ret;
}
+int vfs_quota_off(struct super_block *sb, int type, int remount)
+{
+ return vfs_quota_disable(sb, type, remount ? DQUOT_SUSPENDED :
+ (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED));
+}
+
/*
* Turn quotas on on a device
*/
-/* Helper function when we already have the inode */
-static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
+/*
+ * Helper function to turn quotas on when we already have the inode of
+ * quota file and no quota information is loaded.
+ */
+static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
+ unsigned int flags)
{
struct quota_format_type *fmt = find_quota_format(format_id);
struct super_block *sb = inode->i_sb;
@@ -1696,27 +1779,37 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
error = -EINVAL;
goto out_fmt;
}
+ /* Usage always has to be set... */
+ if (!(flags & DQUOT_USAGE_ENABLED)) {
+ error = -EINVAL;
+ goto out_fmt;
+ }
- /* As we bypass the pagecache we must now flush the inode so that
- * we see all the changes from userspace... */
- write_inode_now(inode, 1);
- /* And now flush the block cache so that kernel sees the changes */
- invalidate_bdev(sb->s_bdev);
+ if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
+ /* As we bypass the pagecache we must now flush the inode so
+ * that we see all the changes from userspace... */
+ write_inode_now(inode, 1);
+ /* And now flush the block cache so that kernel sees the
+ * changes */
+ invalidate_bdev(sb->s_bdev);
+ }
mutex_lock(&inode->i_mutex);
mutex_lock(&dqopt->dqonoff_mutex);
- if (sb_has_quota_enabled(sb, type) ||
- sb_has_quota_suspended(sb, type)) {
+ if (sb_has_quota_loaded(sb, type)) {
error = -EBUSY;
goto out_lock;
}
- /* We don't want quota and atime on quota files (deadlocks possible)
- * Also nobody should write to the file - we use special IO operations
- * which ignore the immutable bit. */
- down_write(&dqopt->dqptr_sem);
- oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
- inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
- up_write(&dqopt->dqptr_sem);
- sb->dq_op->drop(inode);
+
+ if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) {
+ /* We don't want quota and atime on quota files (deadlocks
+ * possible) Also nobody should write to the file - we use
+ * special IO operations which ignore the immutable bit. */
+ down_write(&dqopt->dqptr_sem);
+ oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | S_NOQUOTA);
+ inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
+ up_write(&dqopt->dqptr_sem);
+ sb->dq_op->drop(inode);
+ }
error = -EIO;
dqopt->files[type] = igrab(inode);
@@ -1737,7 +1830,7 @@ static int vfs_quota_on_inode(struct inode *inode, int type, int format_id)
}
mutex_unlock(&dqopt->dqio_mutex);
mutex_unlock(&inode->i_mutex);
- set_enable_flags(dqopt, type);
+ dqopt->flags |= dquot_state_flag(flags, type);
add_dquot_ref(sb, type);
mutex_unlock(&dqopt->dqonoff_mutex);
@@ -1770,20 +1863,23 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
struct quota_info *dqopt = sb_dqopt(sb);
struct inode *inode;
int ret;
+ unsigned int flags;
mutex_lock(&dqopt->dqonoff_mutex);
if (!sb_has_quota_suspended(sb, type)) {
mutex_unlock(&dqopt->dqonoff_mutex);
return 0;
}
- BUG_ON(sb_has_quota_enabled(sb, type));
-
inode = dqopt->files[type];
dqopt->files[type] = NULL;
- reset_enable_flags(dqopt, type, 0);
+ flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED, type);
+ dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, type);
mutex_unlock(&dqopt->dqonoff_mutex);
- ret = vfs_quota_on_inode(inode, type, dqopt->info[type].dqi_fmt_id);
+ flags = dquot_generic_flag(flags, type);
+ ret = vfs_load_quota_inode(inode, type, dqopt->info[type].dqi_fmt_id,
+ flags);
iput(inode);
return ret;
@@ -1799,12 +1895,12 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
if (path->mnt->mnt_sb != sb)
error = -EXDEV;
else
- error = vfs_quota_on_inode(path->dentry->d_inode, type,
- format_id);
+ error = vfs_load_quota_inode(path->dentry->d_inode, type,
+ format_id, DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED);
return error;
}
-/* Actual function called from quotactl() */
int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
int remount)
{
@@ -1823,6 +1919,50 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
}
/*
+ * More powerful function for turning on quotas allowing setting
+ * of individual quota flags
+ */
+int vfs_quota_enable(struct inode *inode, int type, int format_id,
+ unsigned int flags)
+{
+ int ret = 0;
+ struct super_block *sb = inode->i_sb;
+ struct quota_info *dqopt = sb_dqopt(sb);
+
+ /* Just unsuspend quotas? */
+ if (flags & DQUOT_SUSPENDED)
+ return vfs_quota_on_remount(sb, type);
+ if (!flags)
+ return 0;
+ /* Just updating flags needed? */
+ if (sb_has_quota_loaded(sb, type)) {
+ mutex_lock(&dqopt->dqonoff_mutex);
+ /* Now do a reliable test... */
+ if (!sb_has_quota_loaded(sb, type)) {
+ mutex_unlock(&dqopt->dqonoff_mutex);
+ goto load_quota;
+ }
+ if (flags & DQUOT_USAGE_ENABLED &&
+ sb_has_quota_usage_enabled(sb, type)) {
+ ret = -EBUSY;
+ goto out_lock;
+ }
+ if (flags & DQUOT_LIMITS_ENABLED &&
+ sb_has_quota_limits_enabled(sb, type)) {
+ ret = -EBUSY;
+ goto out_lock;
+ }
+ sb_dqopt(sb)->flags |= dquot_state_flag(flags, type);
+out_lock:
+ mutex_unlock(&dqopt->dqonoff_mutex);
+ return ret;
+ }
+
+load_quota:
+ return vfs_load_quota_inode(inode, type, format_id, flags);
+}
+
+/*
* This function is used when filesystem needs to initialize quotas
* during mount time.
*/
@@ -1843,7 +1983,8 @@ int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
error = security_quota_on(dentry);
if (!error)
- error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
+ error = vfs_load_quota_inode(dentry->d_inode, type, format_id,
+ DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
out:
dput(dentry);
@@ -1866,14 +2007,24 @@ int vfs_dq_quota_on_remount(struct super_block *sb)
return ret;
}
+static inline qsize_t qbtos(qsize_t blocks)
+{
+ return blocks << QIF_DQBLKSIZE_BITS;
+}
+
+static inline qsize_t stoqb(qsize_t space)
+{
+ return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS;
+}
+
/* Generic routine for getting common part of quota structure */
static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
{
struct mem_dqblk *dm = &dquot->dq_dqb;
spin_lock(&dq_data_lock);
- di->dqb_bhardlimit = dm->dqb_bhardlimit;
- di->dqb_bsoftlimit = dm->dqb_bsoftlimit;
+ di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit);
+ di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit);
di->dqb_curspace = dm->dqb_curspace;
di->dqb_ihardlimit = dm->dqb_ihardlimit;
di->dqb_isoftlimit = dm->dqb_isoftlimit;
@@ -1918,28 +2069,36 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
if (di->dqb_valid & QIF_SPACE) {
dm->dqb_curspace = di->dqb_curspace;
check_blim = 1;
+ __set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
}
if (di->dqb_valid & QIF_BLIMITS) {
- dm->dqb_bsoftlimit = di->dqb_bsoftlimit;
- dm->dqb_bhardlimit = di->dqb_bhardlimit;
+ dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
+ dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
check_blim = 1;
+ __set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
}
if (di->dqb_valid & QIF_INODES) {
dm->dqb_curinodes = di->dqb_curinodes;
check_ilim = 1;
+ __set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
}
if (di->dqb_valid & QIF_ILIMITS) {
dm->dqb_isoftlimit = di->dqb_isoftlimit;
dm->dqb_ihardlimit = di->dqb_ihardlimit;
check_ilim = 1;
+ __set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
}
- if (di->dqb_valid & QIF_BTIME)
+ if (di->dqb_valid & QIF_BTIME) {
dm->dqb_btime = di->dqb_btime;
- if (di->dqb_valid & QIF_ITIME)
+ __set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
+ }
+ if (di->dqb_valid & QIF_ITIME) {
dm->dqb_itime = di->dqb_itime;
+ __set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
+ }
if (check_blim) {
- if (!dm->dqb_bsoftlimit || toqb(dm->dqb_curspace) < dm->dqb_bsoftlimit) {
+ if (!dm->dqb_bsoftlimit || dm->dqb_curspace < dm->dqb_bsoftlimit) {
dm->dqb_btime = 0;
clear_bit(DQ_BLKS_B, &dquot->dq_flags);
}
@@ -1970,12 +2129,14 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *d
int rc;
mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
- if (!(dquot = dqget(sb, id, type))) {
- mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
- return -ESRCH;
+ dquot = dqget(sb, id, type);
+ if (!dquot) {
+ rc = -ESRCH;
+ goto out;
}
rc = do_set_dqblk(dquot, di);
dqput(dquot);
+out:
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return rc;
}
@@ -1986,7 +2147,7 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
struct mem_dqinfo *mi;
mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
- if (!sb_has_quota_enabled(sb, type)) {
+ if (!sb_has_quota_active(sb, type)) {
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return -ESRCH;
}
@@ -2005,11 +2166,12 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
{
struct mem_dqinfo *mi;
+ int err = 0;
mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
- if (!sb_has_quota_enabled(sb, type)) {
- mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
- return -ESRCH;
+ if (!sb_has_quota_active(sb, type)) {
+ err = -ESRCH;
+ goto out;
}
mi = sb_dqopt(sb)->info + type;
spin_lock(&dq_data_lock);
@@ -2023,8 +2185,9 @@ int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
mark_info_dirty(sb, type);
/* Force write to disk */
sb->dq_op->write_info(sb, type);
+out:
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
- return 0;
+ return err;
}
struct quotactl_ops vfs_quotactl_ops = {
@@ -2186,10 +2349,13 @@ EXPORT_SYMBOL(register_quota_format);
EXPORT_SYMBOL(unregister_quota_format);
EXPORT_SYMBOL(dqstats);
EXPORT_SYMBOL(dq_data_lock);
+EXPORT_SYMBOL(vfs_quota_enable);
EXPORT_SYMBOL(vfs_quota_on);
EXPORT_SYMBOL(vfs_quota_on_path);
EXPORT_SYMBOL(vfs_quota_on_mount);
+EXPORT_SYMBOL(vfs_quota_disable);
EXPORT_SYMBOL(vfs_quota_off);
+EXPORT_SYMBOL(dquot_scan_active);
EXPORT_SYMBOL(vfs_quota_sync);
EXPORT_SYMBOL(vfs_get_dqinfo);
EXPORT_SYMBOL(vfs_set_dqinfo);
@@ -2202,7 +2368,11 @@ EXPORT_SYMBOL(dquot_release);
EXPORT_SYMBOL(dquot_mark_dquot_dirty);
EXPORT_SYMBOL(dquot_initialize);
EXPORT_SYMBOL(dquot_drop);
+EXPORT_SYMBOL(dquot_drop_locked);
EXPORT_SYMBOL(vfs_dq_drop);
+EXPORT_SYMBOL(dqget);
+EXPORT_SYMBOL(dqput);
+EXPORT_SYMBOL(dquot_is_cached);
EXPORT_SYMBOL(dquot_alloc_space);
EXPORT_SYMBOL(dquot_alloc_inode);
EXPORT_SYMBOL(dquot_free_space);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index c725b1a63a89..f4fb51811c59 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -721,7 +721,9 @@ static struct dquot_operations ext3_quota_operations = {
.acquire_dquot = ext3_acquire_dquot,
.release_dquot = ext3_release_dquot,
.mark_dirty = ext3_mark_dquot_dirty,
- .write_info = ext3_write_info
+ .write_info = ext3_write_info,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
};
static struct quotactl_ops ext3_qctl_operations = {
@@ -1043,8 +1045,7 @@ static int parse_options (char *options, struct super_block *sb,
case Opt_grpjquota:
qtype = GRPQUOTA;
set_qf_name:
- if ((sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) &&
+ if (sb_any_quota_loaded(sb) &&
!sbi->s_qf_names[qtype]) {
printk(KERN_ERR
"EXT3-fs: Cannot change journaled "
@@ -1083,8 +1084,7 @@ set_qf_name:
case Opt_offgrpjquota:
qtype = GRPQUOTA;
clear_qf_name:
- if ((sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) &&
+ if (sb_any_quota_loaded(sb) &&
sbi->s_qf_names[qtype]) {
printk(KERN_ERR "EXT3-fs: Cannot change "
"journaled quota options when "
@@ -1103,8 +1103,7 @@ clear_qf_name:
case Opt_jqfmt_vfsv0:
qfmt = QFMT_VFS_V0;
set_qf_format:
- if ((sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) &&
+ if (sb_any_quota_loaded(sb) &&
sbi->s_jquota_fmt != qfmt) {
printk(KERN_ERR "EXT3-fs: Cannot change "
"journaled quota options when "
@@ -1123,8 +1122,7 @@ set_qf_format:
set_opt(sbi->s_mount_opt, GRPQUOTA);
break;
case Opt_noquota:
- if (sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) {
+ if (sb_any_quota_loaded(sb)) {
printk(KERN_ERR "EXT3-fs: Cannot change quota "
"options when quota turned on.\n");
return 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 59585d1966de..2570fa91864c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -811,7 +811,9 @@ static struct dquot_operations ext4_quota_operations = {
.acquire_dquot = ext4_acquire_dquot,
.release_dquot = ext4_release_dquot,
.mark_dirty = ext4_mark_dquot_dirty,
- .write_info = ext4_write_info
+ .write_info = ext4_write_info,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
};
static struct quotactl_ops ext4_qctl_operations = {
@@ -1150,8 +1152,7 @@ static int parse_options(char *options, struct super_block *sb,
case Opt_grpjquota:
qtype = GRPQUOTA;
set_qf_name:
- if ((sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) &&
+ if (sb_any_quota_loaded(sb) &&
!sbi->s_qf_names[qtype]) {
printk(KERN_ERR
"EXT4-fs: Cannot change journaled "
@@ -1190,8 +1191,7 @@ set_qf_name:
case Opt_offgrpjquota:
qtype = GRPQUOTA;
clear_qf_name:
- if ((sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) &&
+ if (sb_any_quota_loaded(sb) &&
sbi->s_qf_names[qtype]) {
printk(KERN_ERR "EXT4-fs: Cannot change "
"journaled quota options when "
@@ -1210,8 +1210,7 @@ clear_qf_name:
case Opt_jqfmt_vfsv0:
qfmt = QFMT_VFS_V0;
set_qf_format:
- if ((sb_any_quota_enabled(sb) ||
- sb_any_quota_suspended(sb)) &&
+ if (sb_any_quota_loaded(sb) &&
sbi->s_jquota_fmt != qfmt) {
printk(KERN_ERR "EXT4-fs: Cannot change "
"journaled quota options when "
@@ -1230,7 +1229,7 @@ set_qf_format:
set_opt(sbi->s_mount_opt, GRPQUOTA);
break;
case Opt_noquota:
- if (sb_any_quota_enabled(sb)) {
+ if (sb_any_quota_loaded(sb)) {
printk(KERN_ERR "EXT4-fs: Cannot change quota "
"options when quota turned on.\n");
return 0;
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 589dcdfdfe3c..7e4b361b755c 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -35,8 +35,14 @@ ocfs2-objs := \
sysfile.o \
uptodate.o \
ver.o \
+ quota_local.o \
+ quota_global.o \
xattr.o
+ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
+ocfs2-objs += acl.o
+endif
+
ocfs2_stackglue-objs := stackglue.o
ocfs2_stack_o2cb-objs := stack_o2cb.o
ocfs2_stack_user-objs := stack_user.o
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
new file mode 100644
index 000000000000..12dfb44c22e5
--- /dev/null
+++ b/fs/ocfs2/acl.c
@@ -0,0 +1,479 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * acl.c
+ *
+ * Copyright (C) 2004, 2008 Oracle. All rights reserved.
+ *
+ * CREDITS:
+ * Lots of code in this file is copy from linux/fs/ext3/acl.c.
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#define MLOG_MASK_PREFIX ML_INODE
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+#include "alloc.h"
+#include "dlmglue.h"
+#include "file.h"
+#include "ocfs2_fs.h"
+
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * Convert from xattr value to acl struct.
+ */
+static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size)
+{
+ int n, count;
+ struct posix_acl *acl;
+
+ if (!value)
+ return NULL;
+ if (size < sizeof(struct posix_acl_entry))
+ return ERR_PTR(-EINVAL);
+
+ count = size / sizeof(struct posix_acl_entry);
+ if (count < 0)
+ return ERR_PTR(-EINVAL);
+ if (count == 0)
+ return NULL;
+
+ acl = posix_acl_alloc(count, GFP_NOFS);
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
+ for (n = 0; n < count; n++) {
+ struct ocfs2_acl_entry *entry =
+ (struct ocfs2_acl_entry *)value;
+
+ acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
+ acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
+ acl->a_entries[n].e_id = le32_to_cpu(entry->e_id);
+ value += sizeof(struct posix_acl_entry);
+
+ }
+ return acl;
+}
+
+/*
+ * Convert acl struct to xattr value.
+ */
+static void *ocfs2_acl_to_xattr(const struct posix_acl *acl, size_t *size)
+{
+ struct ocfs2_acl_entry *entry = NULL;
+ char *ocfs2_acl;
+ size_t n;
+
+ *size = acl->a_count * sizeof(struct posix_acl_entry);
+
+ ocfs2_acl = kmalloc(*size, GFP_NOFS);
+ if (!ocfs2_acl)
+ return ERR_PTR(-ENOMEM);
+
+ entry = (struct ocfs2_acl_entry *)ocfs2_acl;
+ for (n = 0; n < acl->a_count; n++, entry++) {
+ entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
+ entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
+ entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
+ }
+ return ocfs2_acl;
+}
+
+static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
+ int type,
+ struct buffer_head *di_bh)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ int name_index;
+ char *value = NULL;
+ struct posix_acl *acl;
+ int retval;
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return NULL;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index, "", NULL, 0);
+ if (retval > 0) {
+ value = kmalloc(retval, GFP_NOFS);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ retval = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
+ "", value, retval);
+ }
+
+ if (retval > 0)
+ acl = ocfs2_acl_from_xattr(value, retval);
+ else if (retval == -ENODATA || retval == 0)
+ acl = NULL;
+ else
+ acl = ERR_PTR(retval);
+
+ kfree(value);
+
+ return acl;
+}
+
+
+/*
+ * Get posix acl.
+ */
+static struct posix_acl *ocfs2_get_acl(struct inode *inode, int type)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct buffer_head *di_bh = NULL;
+ struct posix_acl *acl;
+ int ret;
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return NULL;
+
+ ret = ocfs2_inode_lock(inode, &di_bh, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ acl = ERR_PTR(ret);
+ return acl;
+ }
+
+ acl = ocfs2_get_acl_nolock(inode, type, di_bh);
+
+ ocfs2_inode_unlock(inode, 0);
+
+ brelse(di_bh);
+
+ return acl;
+}
+
+/*
+ * Set the access or default ACL of an inode.
+ */
+static int ocfs2_set_acl(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *di_bh,
+ int type,
+ struct posix_acl *acl,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac)
+{
+ int name_index;
+ void *value = NULL;
+ size_t size = 0;
+ int ret;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
+ if (acl) {
+ mode_t mode = inode->i_mode;
+ ret = posix_acl_equiv_mode(acl, &mode);
+ if (ret < 0)
+ return ret;
+ else {
+ inode->i_mode = mode;
+ if (ret == 0)
+ acl = NULL;
+ }
+ }
+ break;
+ case ACL_TYPE_DEFAULT:
+ name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (acl) {
+ value = ocfs2_acl_to_xattr(acl, &size);
+ if (IS_ERR(value))
+ return (int)PTR_ERR(value);
+ }
+
+ if (handle)
+ ret = ocfs2_xattr_set_handle(handle, inode, di_bh, name_index,
+ "", value, size, 0,
+ meta_ac, data_ac);
+ else
+ ret = ocfs2_xattr_set(inode, name_index, "", value, size, 0);
+
+ kfree(value);
+
+ return ret;
+}
+
+int ocfs2_check_acl(struct inode *inode, int mask)
+{
+ struct posix_acl *acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
+
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ int ret = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ return ret;
+ }
+
+ return -EAGAIN;
+}
+
+int ocfs2_acl_chmod(struct inode *inode)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct posix_acl *acl, *clone;
+ int ret;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return 0;
+
+ acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+ ret = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!ret)
+ ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
+ clone, NULL, NULL);
+ posix_acl_release(clone);
+ return ret;
+}
+
+/*
+ * Initialize the ACLs of a new inode. If parent directory has default ACL,
+ * then clone to new inode. Called from ocfs2_mknod.
+ */
+int ocfs2_init_acl(handle_t *handle,
+ struct inode *inode,
+ struct inode *dir,
+ struct buffer_head *di_bh,
+ struct buffer_head *dir_bh,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct posix_acl *acl = NULL;
+ int ret = 0;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
+ acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
+ dir_bh);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ }
+ if (!acl)
+ inode->i_mode &= ~current->fs->umask;
+ }
+ if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
+ struct posix_acl *clone;
+ mode_t mode;
+
+ if (S_ISDIR(inode->i_mode)) {
+ ret = ocfs2_set_acl(handle, inode, di_bh,
+ ACL_TYPE_DEFAULT, acl,
+ meta_ac, data_ac);
+ if (ret)
+ goto cleanup;
+ }
+ clone = posix_acl_clone(acl, GFP_NOFS);
+ ret = -ENOMEM;
+ if (!clone)
+ goto cleanup;
+
+ mode = inode->i_mode;
+ ret = posix_acl_create_masq(clone, &mode);
+ if (ret >= 0) {
+ inode->i_mode = mode;
+ if (ret > 0) {
+ ret = ocfs2_set_acl(handle, inode,
+ di_bh, ACL_TYPE_ACCESS,
+ clone, meta_ac, data_ac);
+ }
+ }
+ posix_acl_release(clone);
+ }
+cleanup:
+ posix_acl_release(acl);
+ return ret;
+}
+
+static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
+ char *list,
+ size_t list_len,
+ const char *name,
+ size_t name_len)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return 0;
+
+ if (list && size <= list_len)
+ memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
+ return size;
+}
+
+static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
+ char *list,
+ size_t list_len,
+ const char *name,
+ size_t name_len)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return 0;
+
+ if (list && size <= list_len)
+ memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
+ return size;
+}
+
+static int ocfs2_xattr_get_acl(struct inode *inode,
+ int type,
+ void *buffer,
+ size_t size)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct posix_acl *acl;
+ int ret;
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return -EOPNOTSUPP;
+
+ acl = ocfs2_get_acl(inode, type);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl == NULL)
+ return -ENODATA;
+ ret = posix_acl_to_xattr(acl, buffer, size);
+ posix_acl_release(acl);
+
+ return ret;
+}
+
+static int ocfs2_xattr_get_acl_access(struct inode *inode,
+ const char *name,
+ void *buffer,
+ size_t size)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
+}
+
+static int ocfs2_xattr_get_acl_default(struct inode *inode,
+ const char *name,
+ void *buffer,
+ size_t size)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
+}
+
+static int ocfs2_xattr_set_acl(struct inode *inode,
+ int type,
+ const void *value,
+ size_t size)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct posix_acl *acl;
+ int ret = 0;
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return -EOPNOTSUPP;
+
+ if (!is_owner_or_cap(inode))
+ return -EPERM;
+
+ if (value) {
+ acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ else if (acl) {
+ ret = posix_acl_valid(acl);
+ if (ret)
+ goto cleanup;
+ }
+ } else
+ acl = NULL;
+
+ ret = ocfs2_set_acl(NULL, inode, NULL, type, acl, NULL, NULL);
+
+cleanup:
+ posix_acl_release(acl);
+ return ret;
+}
+
+static int ocfs2_xattr_set_acl_access(struct inode *inode,
+ const char *name,
+ const void *value,
+ size_t size,
+ int flags)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int ocfs2_xattr_set_acl_default(struct inode *inode,
+ const char *name,
+ const void *value,
+ size_t size,
+ int flags)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+struct xattr_handler ocfs2_xattr_acl_access_handler = {
+ .prefix = POSIX_ACL_XATTR_ACCESS,
+ .list = ocfs2_xattr_list_acl_access,
+ .get = ocfs2_xattr_get_acl_access,
+ .set = ocfs2_xattr_set_acl_access,
+};
+
+struct xattr_handler ocfs2_xattr_acl_default_handler = {
+ .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .list = ocfs2_xattr_list_acl_default,
+ .get = ocfs2_xattr_get_acl_default,
+ .set = ocfs2_xattr_set_acl_default,
+};
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
new file mode 100644
index 000000000000..8f6389ed4da5
--- /dev/null
+++ b/fs/ocfs2/acl.h
@@ -0,0 +1,58 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * acl.h
+ *
+ * Copyright (C) 2004, 2008 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef OCFS2_ACL_H
+#define OCFS2_ACL_H
+
+#include <linux/posix_acl_xattr.h>
+
+struct ocfs2_acl_entry {
+ __le16 e_tag;
+ __le16 e_perm;
+ __le32 e_id;
+};
+
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+
+extern int ocfs2_check_acl(struct inode *, int);
+extern int ocfs2_acl_chmod(struct inode *);
+extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
+ struct buffer_head *, struct buffer_head *,
+ struct ocfs2_alloc_context *,
+ struct ocfs2_alloc_context *);
+
+#else /* CONFIG_OCFS2_FS_POSIX_ACL*/
+
+#define ocfs2_check_acl NULL
+static inline int ocfs2_acl_chmod(struct inode *inode)
+{
+ return 0;
+}
+static inline int ocfs2_init_acl(handle_t *handle,
+ struct inode *inode,
+ struct inode *dir,
+ struct buffer_head *di_bh,
+ struct buffer_head *dir_bh,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac)
+{
+ return 0;
+}
+
+#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
+
+#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 0cc2deb9394c..84a7bd4db5da 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/swap.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_DISK_ALLOC
#include <cluster/masklog.h>
@@ -187,20 +188,12 @@ static int ocfs2_dinode_insert_check(struct inode *inode,
static int ocfs2_dinode_sanity_check(struct inode *inode,
struct ocfs2_extent_tree *et)
{
- int ret = 0;
- struct ocfs2_dinode *di;
+ struct ocfs2_dinode *di = et->et_object;
BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
+ BUG_ON(!OCFS2_IS_VALID_DINODE(di));
- di = et->et_object;
- if (!OCFS2_IS_VALID_DINODE(di)) {
- ret = -EIO;
- ocfs2_error(inode->i_sb,
- "Inode %llu has invalid path root",
- (unsigned long long)OCFS2_I(inode)->ip_blkno);
- }
-
- return ret;
+ return 0;
}
static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
@@ -686,6 +679,61 @@ struct ocfs2_merge_ctxt {
int c_split_covers_rec;
};
+static int ocfs2_validate_extent_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ struct ocfs2_extent_block *eb =
+ (struct ocfs2_extent_block *)bh->b_data;
+
+ mlog(0, "Validating extent block %llu\n",
+ (unsigned long long)bh->b_blocknr);
+
+ if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
+ ocfs2_error(sb,
+ "Extent block #%llu has bad signature %.*s",
+ (unsigned long long)bh->b_blocknr, 7,
+ eb->h_signature);
+ return -EINVAL;
+ }
+
+ if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) {
+ ocfs2_error(sb,
+ "Extent block #%llu has an invalid h_blkno "
+ "of %llu",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(eb->h_blkno));
+ return -EINVAL;
+ }
+
+ if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation) {
+ ocfs2_error(sb,
+ "Extent block #%llu has an invalid "
+ "h_fs_generation of #%u",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(eb->h_fs_generation));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
+ struct buffer_head **bh)
+{
+ int rc;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_block(inode, eb_blkno, &tmp,
+ ocfs2_validate_extent_block);
+
+ /* If ocfs2_read_block() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc;
+}
+
+
/*
* How many free extents have we got before we need more meta data?
*/
@@ -705,8 +753,7 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
last_eb_blk = ocfs2_et_get_last_eb_blk(et);
if (last_eb_blk) {
- retval = ocfs2_read_block(inode, last_eb_blk,
- &eb_bh);
+ retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
if (retval < 0) {
mlog_errno(retval);
goto bail;
@@ -908,11 +955,8 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
for(i = 0; i < new_blocks; i++) {
bh = new_eb_bhs[i];
eb = (struct ocfs2_extent_block *) bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- status = -EIO;
- goto bail;
- }
+ /* ocfs2_create_new_meta_bhs() should create it right! */
+ BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
eb_el = &eb->h_list;
status = ocfs2_journal_access(handle, inode, bh,
@@ -1052,11 +1096,8 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
}
eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- status = -EIO;
- goto bail;
- }
+ /* ocfs2_create_new_meta_bhs() should create it right! */
+ BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
eb_el = &eb->h_list;
root_el = et->et_root_el;
@@ -1176,18 +1217,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
brelse(bh);
bh = NULL;
- status = ocfs2_read_block(inode, blkno, &bh);
+ status = ocfs2_read_extent_block(inode, blkno, &bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
eb = (struct ocfs2_extent_block *) bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- status = -EIO;
- goto bail;
- }
el = &eb->h_list;
if (le16_to_cpu(el->l_next_free_rec) <
@@ -1540,7 +1576,7 @@ static int __ocfs2_find_path(struct inode *inode,
brelse(bh);
bh = NULL;
- ret = ocfs2_read_block(inode, blkno, &bh);
+ ret = ocfs2_read_extent_block(inode, blkno, &bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1548,11 +1584,6 @@ static int __ocfs2_find_path(struct inode *inode,
eb = (struct ocfs2_extent_block *) bh->b_data;
el = &eb->h_list;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- ret = -EIO;
- goto out;
- }
if (le16_to_cpu(el->l_next_free_rec) >
le16_to_cpu(el->l_count)) {
@@ -4097,8 +4128,15 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
le16_to_cpu(new_el->l_count)) {
bh = path_leaf_bh(left_path);
eb = (struct ocfs2_extent_block *)bh->b_data;
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
- eb);
+ ocfs2_error(inode->i_sb,
+ "Extent block #%llu has an "
+ "invalid l_next_free_rec of "
+ "%d. It should have "
+ "matched the l_count of %d",
+ (unsigned long long)le64_to_cpu(eb->h_blkno),
+ le16_to_cpu(new_el->l_next_free_rec),
+ le16_to_cpu(new_el->l_count));
+ status = -EINVAL;
goto out;
}
rec = &new_el->l_recs[
@@ -4147,8 +4185,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
bh = path_leaf_bh(right_path);
eb = (struct ocfs2_extent_block *)bh->b_data;
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb,
- eb);
+ ocfs2_error(inode->i_sb,
+ "Extent block #%llu has an "
+ "invalid l_next_free_rec of %d",
+ (unsigned long long)le64_to_cpu(eb->h_blkno),
+ le16_to_cpu(new_el->l_next_free_rec));
+ status = -EINVAL;
goto out;
}
rec = &new_el->l_recs[1];
@@ -4294,7 +4336,9 @@ static int ocfs2_figure_insert_type(struct inode *inode,
* ocfs2_figure_insert_type() and ocfs2_add_branch()
* may want it later.
*/
- ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh);
+ ret = ocfs2_read_extent_block(inode,
+ ocfs2_et_get_last_eb_blk(et),
+ &bh);
if (ret) {
mlog_exit(ret);
goto out;
@@ -4760,20 +4804,15 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
if (path->p_tree_depth) {
struct ocfs2_extent_block *eb;
- ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et),
- &last_eb_bh);
+ ret = ocfs2_read_extent_block(inode,
+ ocfs2_et_get_last_eb_blk(et),
+ &last_eb_bh);
if (ret) {
mlog_exit(ret);
goto out;
}
eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- ret = -EROFS;
- goto out;
- }
-
rightmost_el = &eb->h_list;
} else
rightmost_el = path_root_el(path);
@@ -4918,8 +4957,9 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
depth = path->p_tree_depth;
if (depth > 0) {
- ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et),
- &last_eb_bh);
+ ret = ocfs2_read_extent_block(inode,
+ ocfs2_et_get_last_eb_blk(et),
+ &last_eb_bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -5255,6 +5295,78 @@ out:
return ret;
}
+int ocfs2_remove_btree_range(struct inode *inode,
+ struct ocfs2_extent_tree *et,
+ u32 cpos, u32 phys_cpos, u32 len,
+ struct ocfs2_cached_dealloc_ctxt *dealloc)
+{
+ int ret;
+ u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct inode *tl_inode = osb->osb_tl_inode;
+ handle_t *handle;
+ struct ocfs2_alloc_context *meta_ac = NULL;
+
+ ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac);
+ if (ret) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ mutex_lock(&tl_inode->i_mutex);
+
+ if (ocfs2_truncate_log_needs_flush(osb)) {
+ ret = __ocfs2_flush_truncate_log(osb);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_journal_access(handle, inode, et->et_root_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
+ dealloc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+ ocfs2_et_update_clusters(inode, et, -len);
+
+ ret = ocfs2_journal_dirty(handle, et->et_root_bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out_commit;
+ }
+
+ ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
+ if (ret)
+ mlog_errno(ret);
+
+out_commit:
+ ocfs2_commit_trans(osb, handle);
+out:
+ mutex_unlock(&tl_inode->i_mutex);
+
+ if (meta_ac)
+ ocfs2_free_alloc_context(meta_ac);
+
+ return ret;
+}
+
int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
{
struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -5308,13 +5420,13 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
di = (struct ocfs2_dinode *) tl_bh->b_data;
- tl = &di->id2.i_dealloc;
- if (!OCFS2_IS_VALID_DINODE(di)) {
- OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
- status = -EIO;
- goto bail;
- }
+ /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated
+ * by the underlying call to ocfs2_read_inode_block(), so any
+ * corruption is a code bug */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+ tl = &di->id2.i_dealloc;
tl_count = le16_to_cpu(tl->tl_count);
mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
tl_count == 0,
@@ -5464,13 +5576,13 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
BUG_ON(mutex_trylock(&tl_inode->i_mutex));
di = (struct ocfs2_dinode *) tl_bh->b_data;
- tl = &di->id2.i_dealloc;
- if (!OCFS2_IS_VALID_DINODE(di)) {
- OCFS2_RO_ON_INVALID_DINODE(osb->sb, di);
- status = -EIO;
- goto out;
- }
+ /* tl_bh is loaded from ocfs2_truncate_log_init(). It's validated
+ * by the underlying call to ocfs2_read_inode_block(), so any
+ * corruption is a code bug */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+ tl = &di->id2.i_dealloc;
num_to_flush = le16_to_cpu(tl->tl_used);
mlog(0, "Flush %u records from truncate log #%llu\n",
num_to_flush, (unsigned long long)OCFS2_I(tl_inode)->ip_blkno);
@@ -5586,7 +5698,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
goto bail;
}
- status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
+ status = ocfs2_read_inode_block(inode, &bh);
if (status < 0) {
iput(inode);
mlog_errno(status);
@@ -5625,13 +5737,13 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
}
di = (struct ocfs2_dinode *) tl_bh->b_data;
- tl = &di->id2.i_dealloc;
- if (!OCFS2_IS_VALID_DINODE(di)) {
- OCFS2_RO_ON_INVALID_DINODE(tl_inode->i_sb, di);
- status = -EIO;
- goto bail;
- }
+ /* tl_bh is loaded from ocfs2_get_truncate_log_info(). It's
+ * validated by the underlying call to ocfs2_read_inode_block(),
+ * so any corruption is a code bug */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(di));
+
+ tl = &di->id2.i_dealloc;
if (le16_to_cpu(tl->tl_used)) {
mlog(0, "We'll have %u logs to recover\n",
le16_to_cpu(tl->tl_used));
@@ -5800,7 +5912,10 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
*/
/*
- * Describes a single block free from a suballocator
+ * Describe a single bit freed from a suballocator. For the block
+ * suballocators, it represents one block. For the global cluster
+ * allocator, it represents some clusters and free_bit indicates
+ * clusters number.
*/
struct ocfs2_cached_block_free {
struct ocfs2_cached_block_free *free_next;
@@ -5815,10 +5930,10 @@ struct ocfs2_per_slot_free_list {
struct ocfs2_cached_block_free *f_first;
};
-static int ocfs2_free_cached_items(struct ocfs2_super *osb,
- int sysfile_type,
- int slot,
- struct ocfs2_cached_block_free *head)
+static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
+ int sysfile_type,
+ int slot,
+ struct ocfs2_cached_block_free *head)
{
int ret;
u64 bg_blkno;
@@ -5893,6 +6008,82 @@ out:
return ret;
}
+int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
+ u64 blkno, unsigned int bit)
+{
+ int ret = 0;
+ struct ocfs2_cached_block_free *item;
+
+ item = kmalloc(sizeof(*item), GFP_NOFS);
+ if (item == NULL) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ return ret;
+ }
+
+ mlog(0, "Insert clusters: (bit %u, blk %llu)\n",
+ bit, (unsigned long long)blkno);
+
+ item->free_blk = blkno;
+ item->free_bit = bit;
+ item->free_next = ctxt->c_global_allocator;
+
+ ctxt->c_global_allocator = item;
+ return ret;
+}
+
+static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
+ struct ocfs2_cached_block_free *head)
+{
+ struct ocfs2_cached_block_free *tmp;
+ struct inode *tl_inode = osb->osb_tl_inode;
+ handle_t *handle;
+ int ret = 0;
+
+ mutex_lock(&tl_inode->i_mutex);
+
+ while (head) {
+ if (ocfs2_truncate_log_needs_flush(osb)) {
+ ret = __ocfs2_flush_truncate_log(osb);
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
+ }
+
+ handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ break;
+ }
+
+ ret = ocfs2_truncate_log_append(osb, handle, head->free_blk,
+ head->free_bit);
+
+ ocfs2_commit_trans(osb, handle);
+ tmp = head;
+ head = head->free_next;
+ kfree(tmp);
+
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
+ }
+
+ mutex_unlock(&tl_inode->i_mutex);
+
+ while (head) {
+ /* Premature exit may have left some dangling items. */
+ tmp = head;
+ head = head->free_next;
+ kfree(tmp);
+ }
+
+ return ret;
+}
+
int ocfs2_run_deallocs(struct ocfs2_super *osb,
struct ocfs2_cached_dealloc_ctxt *ctxt)
{
@@ -5908,8 +6099,10 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
if (fl->f_first) {
mlog(0, "Free items: (type %u, slot %d)\n",
fl->f_inode_type, fl->f_slot);
- ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type,
- fl->f_slot, fl->f_first);
+ ret2 = ocfs2_free_cached_blocks(osb,
+ fl->f_inode_type,
+ fl->f_slot,
+ fl->f_first);
if (ret2)
mlog_errno(ret2);
if (!ret)
@@ -5920,6 +6113,17 @@ int ocfs2_run_deallocs(struct ocfs2_super *osb,
kfree(fl);
}
+ if (ctxt->c_global_allocator) {
+ ret2 = ocfs2_free_cached_clusters(osb,
+ ctxt->c_global_allocator);
+ if (ret2)
+ mlog_errno(ret2);
+ if (!ret)
+ ret = ret2;
+
+ ctxt->c_global_allocator = NULL;
+ }
+
return ret;
}
@@ -6075,11 +6279,10 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode,
eb = (struct ocfs2_extent_block *) bh->b_data;
el = &eb->h_list;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- ret = -EROFS;
- goto out;
- }
+
+ /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block().
+ * Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
*new_last_eb = bh;
get_bh(*new_last_eb);
@@ -6350,6 +6553,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
goto bail;
}
+ vfs_dq_free_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
spin_lock(&OCFS2_I(inode)->ip_lock);
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
clusters_to_del;
@@ -6436,11 +6641,6 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
mlog_errno(ret);
else if (ocfs2_should_order_data(inode)) {
ret = ocfs2_jbd2_file_inode(handle, inode);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
- ret = walk_page_buffers(handle, page_buffers(page),
- from, to, &partial,
- ocfs2_journal_dirty_data);
-#endif
if (ret < 0)
mlog_errno(ret);
}
@@ -6663,6 +6863,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct page **pages = NULL;
loff_t end = osb->s_clustersize;
struct ocfs2_extent_tree et;
+ int did_quota = 0;
has_data = i_size_read(inode) ? 1 : 0;
@@ -6682,7 +6883,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
}
- handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS);
+ handle = ocfs2_start_trans(osb,
+ ocfs2_inline_to_extents_credits(osb->sb));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
@@ -6701,6 +6903,13 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
unsigned int page_end;
u64 phys;
+ if (vfs_dq_alloc_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, 1))) {
+ ret = -EDQUOT;
+ goto out_commit;
+ }
+ did_quota = 1;
+
ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
&num);
if (ret) {
@@ -6774,6 +6983,10 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
out_commit:
+ if (ret < 0 && did_quota)
+ vfs_dq_free_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, 1));
+
ocfs2_commit_trans(osb, handle);
out_unlock:
@@ -6984,20 +7197,14 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
if (fe->id2.i_list.l_tree_depth) {
- status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk),
- &last_eb_bh);
+ status = ocfs2_read_extent_block(inode,
+ le64_to_cpu(fe->i_last_eb_blk),
+ &last_eb_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
-
- brelse(last_eb_bh);
- status = -EIO;
- goto bail;
- }
}
(*tc)->tc_last_eb_bh = last_eb_bh;
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 70257c84cfbe..59d37d1b7d4c 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -73,6 +73,14 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
struct buffer_head *bh,
struct ocfs2_xattr_value_root *xv);
+/*
+ * Read an extent block into *bh. If *bh is NULL, a bh will be
+ * allocated. This is a cached read. The extent block will be validated
+ * with ocfs2_validate_extent_block().
+ */
+int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
+ struct buffer_head **bh);
+
struct ocfs2_alloc_context;
int ocfs2_insert_extent(struct ocfs2_super *osb,
handle_t *handle,
@@ -110,6 +118,11 @@ int ocfs2_remove_extent(struct inode *inode,
u32 cpos, u32 len, handle_t *handle,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc);
+int ocfs2_remove_btree_range(struct inode *inode,
+ struct ocfs2_extent_tree *et,
+ u32 cpos, u32 phys_cpos, u32 len,
+ struct ocfs2_cached_dealloc_ctxt *dealloc);
+
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct inode *inode,
struct ocfs2_extent_tree *et);
@@ -167,10 +180,18 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
*/
struct ocfs2_cached_dealloc_ctxt {
struct ocfs2_per_slot_free_list *c_first_suballocator;
+ struct ocfs2_cached_block_free *c_global_allocator;
};
static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
{
c->c_first_suballocator = NULL;
+ c->c_global_allocator = NULL;
+}
+int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
+ u64 blkno, unsigned int bit);
+static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
+{
+ return c->c_global_allocator != NULL;
}
int ocfs2_run_deallocs(struct ocfs2_super *osb,
struct ocfs2_cached_dealloc_ctxt *ctxt);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c22543b33420..6b647ec87bb3 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -27,6 +27,7 @@
#include <linux/swap.h>
#include <linux/pipe_fs_i.h>
#include <linux/mpage.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_FILE_IO
#include <cluster/masklog.h>
@@ -68,20 +69,13 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
goto bail;
}
- status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
+ status = ocfs2_read_inode_block(inode, &bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
fe = (struct ocfs2_dinode *) bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
- (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
- fe->i_signature);
- goto bail;
- }
-
if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb,
le32_to_cpu(fe->i_clusters))) {
mlog(ML_ERROR, "block offset is outside the allocated size: "
@@ -262,7 +256,7 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
BUG_ON(!PageLocked(page));
BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
- ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
+ ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -481,12 +475,6 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
if (ocfs2_should_order_data(inode)) {
ret = ocfs2_jbd2_file_inode(handle, inode);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
- ret = walk_page_buffers(handle,
- page_buffers(page),
- from, to, NULL,
- ocfs2_journal_dirty_data);
-#endif
if (ret < 0)
mlog_errno(ret);
}
@@ -1072,15 +1060,8 @@ static void ocfs2_write_failure(struct inode *inode,
tmppage = wc->w_pages[i];
if (page_has_buffers(tmppage)) {
- if (ocfs2_should_order_data(inode)) {
+ if (ocfs2_should_order_data(inode))
ocfs2_jbd2_file_inode(wc->w_handle, inode);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
- walk_page_buffers(wc->w_handle,
- page_buffers(tmppage),
- from, to, NULL,
- ocfs2_journal_dirty_data);
-#endif
- }
block_commit_write(tmppage, from, to);
}
@@ -1750,6 +1731,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
wc->w_handle = handle;
+ if (clusters_to_alloc && vfs_dq_alloc_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc))) {
+ ret = -EDQUOT;
+ goto out_commit;
+ }
/*
* We don't want this to fail in ocfs2_write_end(), so do it
* here.
@@ -1758,7 +1744,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out_quota;
}
/*
@@ -1771,14 +1757,14 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
mmap_page);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out_quota;
}
ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
len);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out_quota;
}
if (data_ac)
@@ -1790,6 +1776,10 @@ success:
*pagep = wc->w_target_page;
*fsdata = wc;
return 0;
+out_quota:
+ if (clusters_to_alloc)
+ vfs_dq_free_space(inode,
+ ocfs2_clusters_to_bytes(osb->sb, clusters_to_alloc));
out_commit:
ocfs2_commit_trans(osb, handle);
@@ -1919,15 +1909,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
if (page_has_buffers(tmppage)) {
- if (ocfs2_should_order_data(inode)) {
+ if (ocfs2_should_order_data(inode))
ocfs2_jbd2_file_inode(wc->w_handle, inode);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
- walk_page_buffers(wc->w_handle,
- page_buffers(tmppage),
- from, to, NULL,
- ocfs2_journal_dirty_data);
-#endif
- }
block_commit_write(tmppage, from, to);
}
}
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 7e947c672469..15c8e6deee2e 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -39,6 +39,18 @@
#include "buffer_head_io.h"
+/*
+ * Bits on bh->b_state used by ocfs2.
+ *
+ * These MUST be after the JBD2 bits. Hence, we use BH_JBDPrivateStart.
+ */
+enum ocfs2_state_bits {
+ BH_NeedsValidate = BH_JBDPrivateStart,
+};
+
+/* Expand the magic b_state functions */
+BUFFER_FNS(NeedsValidate, needs_validate);
+
int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
struct inode *inode)
{
@@ -112,7 +124,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
bh = bhs[i];
if (buffer_jbd(bh)) {
- mlog(ML_ERROR,
+ mlog(ML_BH_IO,
"trying to sync read a jbd "
"managed bh (blocknr = %llu), skipping\n",
(unsigned long long)bh->b_blocknr);
@@ -147,15 +159,10 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
for (i = nr; i > 0; i--) {
bh = bhs[i - 1];
- if (buffer_jbd(bh)) {
- mlog(ML_ERROR,
- "the journal got the buffer while it was "
- "locked for io! (blocknr = %llu)\n",
- (unsigned long long)bh->b_blocknr);
- BUG();
- }
+ /* No need to wait on the buffer if it's managed by JBD. */
+ if (!buffer_jbd(bh))
+ wait_on_buffer(bh);
- wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* Status won't be cleared from here on out,
* so we can safely record this and loop back
@@ -171,7 +178,9 @@ bail:
}
int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
- struct buffer_head *bhs[], int flags)
+ struct buffer_head *bhs[], int flags,
+ int (*validate)(struct super_block *sb,
+ struct buffer_head *bh))
{
int status = 0;
int i, ignore_cache = 0;
@@ -251,8 +260,6 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
ignore_cache = 1;
}
- /* XXX: Can we ever get this and *not* have the cached
- * flag set? */
if (buffer_jbd(bh)) {
if (ignore_cache)
mlog(ML_BH_IO, "trying to sync read a jbd "
@@ -305,6 +312,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
+ if (validate)
+ set_buffer_needs_validate(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
continue;
@@ -335,6 +344,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
bhs[i] = NULL;
continue;
}
+
+ if (buffer_needs_validate(bh)) {
+ /* We never set NeedsValidate if the
+ * buffer was held by the journal, so
+ * that better not have changed */
+ BUG_ON(buffer_jbd(bh));
+ clear_buffer_needs_validate(bh);
+ status = validate(inode->i_sb, bh);
+ if (status) {
+ put_bh(bh);
+ bhs[i] = NULL;
+ continue;
+ }
+ }
}
/* Always set the buffer in the cache, even if it was
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index 75e1dcb1ade7..c75d682dadd8 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -31,21 +31,24 @@
void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
int uptodate);
-static inline int ocfs2_read_block(struct inode *inode,
- u64 off,
- struct buffer_head **bh);
-
int ocfs2_write_block(struct ocfs2_super *osb,
struct buffer_head *bh,
struct inode *inode);
-int ocfs2_read_blocks(struct inode *inode,
- u64 block,
- int nr,
- struct buffer_head *bhs[],
- int flags);
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
unsigned int nr, struct buffer_head *bhs[]);
+/*
+ * If not NULL, validate() will be called on a buffer that is freshly
+ * read from disk. It will not be called if the buffer was in cache.
+ * Note that if validate() is being used for this buffer, it needs to
+ * be set even for a READAHEAD call, as it marks the buffer for later
+ * validation.
+ */
+int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
+ struct buffer_head *bhs[], int flags,
+ int (*validate)(struct super_block *sb,
+ struct buffer_head *bh));
+
int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
struct buffer_head *bh);
@@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
#define OCFS2_BH_READAHEAD 8
static inline int ocfs2_read_block(struct inode *inode, u64 off,
- struct buffer_head **bh)
+ struct buffer_head **bh,
+ int (*validate)(struct super_block *sb,
+ struct buffer_head *bh))
{
int status = 0;
@@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
goto bail;
}
- status = ocfs2_read_blocks(inode, off, 1, bh, 0);
+ status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate);
bail:
return status;
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 57670c680471..7e72a81bc2d4 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -113,6 +113,7 @@
#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */
#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
+#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
/* bits that are infrequently given and frequently matched in the high word */
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 026e6eb85187..3708fe482e3e 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -40,6 +40,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_NAMEI
#include <cluster/masklog.h>
@@ -82,49 +83,6 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
struct ocfs2_alloc_context *meta_ac,
struct buffer_head **new_bh);
-static struct buffer_head *ocfs2_bread(struct inode *inode,
- int block, int *err, int reada)
-{
- struct buffer_head *bh = NULL;
- int tmperr;
- u64 p_blkno;
- int readflags = 0;
-
- if (reada)
- readflags |= OCFS2_BH_READAHEAD;
-
- if (((u64)block << inode->i_sb->s_blocksize_bits) >=
- i_size_read(inode)) {
- BUG_ON(!reada);
- return NULL;
- }
-
- down_read(&OCFS2_I(inode)->ip_alloc_sem);
- tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
- NULL);
- up_read(&OCFS2_I(inode)->ip_alloc_sem);
- if (tmperr < 0) {
- mlog_errno(tmperr);
- goto fail;
- }
-
- tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags);
- if (tmperr < 0)
- goto fail;
-
- tmperr = 0;
-
- *err = 0;
- return bh;
-
-fail:
- brelse(bh);
- bh = NULL;
-
- *err = -EIO;
- return NULL;
-}
-
/*
* bh passed here can be an inode block or a dir data block, depending
* on the inode inline data flag.
@@ -231,7 +189,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name,
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
- ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
+ ret = ocfs2_read_inode_block(dir, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -250,6 +208,43 @@ out:
return NULL;
}
+static int ocfs2_validate_dir_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ /*
+ * Nothing yet. We don't validate dirents here, that's handled
+ * in-place when the code walks them.
+ */
+ mlog(0, "Validating dirblock %llu\n",
+ (unsigned long long)bh->b_blocknr);
+
+ return 0;
+}
+
+/*
+ * This function forces all errors to -EIO for consistency with its
+ * predecessor, ocfs2_bread(). We haven't audited what returning the
+ * real error codes would do to callers. We log the real codes with
+ * mlog_errno() before we squash them.
+ */
+static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
+ struct buffer_head **bh, int flags)
+{
+ int rc = 0;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
+ ocfs2_validate_dir_block);
+ if (rc)
+ mlog_errno(rc);
+
+ /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc ? -EIO : 0;
+}
+
static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
struct inode *dir,
struct ocfs2_dir_entry **res_dir)
@@ -296,15 +291,17 @@ restart:
}
num++;
- bh = ocfs2_bread(dir, b++, &err, 1);
+ bh = NULL;
+ err = ocfs2_read_dir_block(dir, b++, &bh,
+ OCFS2_BH_READAHEAD);
bh_use[ra_max] = bh;
}
}
if ((bh = bh_use[ra_ptr++]) == NULL)
goto next;
- if (ocfs2_read_block(dir, block, &bh)) {
+ if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
/* read error, skip block & hope for the best.
- * ocfs2_read_block() has released the bh. */
+ * ocfs2_read_dir_block() has released the bh. */
ocfs2_error(dir->i_sb, "reading directory %llu, "
"offset %lu\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno,
@@ -458,7 +455,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle,
struct ocfs2_dinode *di;
struct ocfs2_inline_data *data;
- ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
+ ret = ocfs2_read_inode_block(dir, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -636,7 +633,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
struct ocfs2_inline_data *data;
struct ocfs2_dir_entry *de;
- ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
+ ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -724,7 +721,6 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
int i, stored;
struct buffer_head * bh, * tmp;
struct ocfs2_dir_entry * de;
- int err;
struct super_block * sb = inode->i_sb;
unsigned int ra_sectors = 16;
@@ -735,12 +731,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
while (!error && !stored && *f_pos < i_size_read(inode)) {
blk = (*f_pos) >> sb->s_blocksize_bits;
- bh = ocfs2_bread(inode, blk, &err, 0);
- if (!bh) {
- mlog(ML_ERROR,
- "directory #%llu contains a hole at offset %lld\n",
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
- *f_pos);
+ if (ocfs2_read_dir_block(inode, blk, &bh, 0)) {
+ /* Skip the corrupt dirblock and keep trying */
*f_pos += sb->s_blocksize - offset;
continue;
}
@@ -754,8 +746,10 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
|| (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
i > 0; i--) {
- tmp = ocfs2_bread(inode, ++blk, &err, 1);
- brelse(tmp);
+ tmp = NULL;
+ if (!ocfs2_read_dir_block(inode, ++blk, &tmp,
+ OCFS2_BH_READAHEAD))
+ brelse(tmp);
}
last_ra_blk = blk;
ra_sectors = 8;
@@ -828,6 +822,7 @@ revalidate:
}
offset = 0;
brelse(bh);
+ bh = NULL;
}
stored = 0;
@@ -1216,9 +1211,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
unsigned int blocks_wanted,
struct buffer_head **first_block_bh)
{
- int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS;
u32 alloc, bit_off, len;
struct super_block *sb = dir->i_sb;
+ int ret, credits = ocfs2_inline_to_extents_credits(sb);
u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits;
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
struct ocfs2_inode_info *oi = OCFS2_I(dir);
@@ -1227,6 +1222,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
handle_t *handle;
struct ocfs2_extent_tree et;
+ int did_quota = 0;
ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
@@ -1264,6 +1260,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
goto out_sem;
}
+ if (vfs_dq_alloc_space_nodirty(dir,
+ ocfs2_clusters_to_bytes(osb->sb, alloc))) {
+ ret = -EDQUOT;
+ goto out_commit;
+ }
+ did_quota = 1;
/*
* Try to claim as many clusters as the bitmap can give though
* if we only get one now, that's enough to continue. The rest
@@ -1386,6 +1388,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
dirdata_bh = NULL;
out_commit:
+ if (ret < 0 && did_quota)
+ vfs_dq_free_space_nodirty(dir,
+ ocfs2_clusters_to_bytes(osb->sb, 2));
ocfs2_commit_trans(osb, handle);
out_sem:
@@ -1410,7 +1415,7 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
struct buffer_head **new_bh)
{
int status;
- int extend;
+ int extend, did_quota = 0;
u64 p_blkno, v_blkno;
spin_lock(&OCFS2_I(dir)->ip_lock);
@@ -1420,6 +1425,13 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
if (extend) {
u32 offset = OCFS2_I(dir)->ip_clusters;
+ if (vfs_dq_alloc_space_nodirty(dir,
+ ocfs2_clusters_to_bytes(sb, 1))) {
+ status = -EDQUOT;
+ goto bail;
+ }
+ did_quota = 1;
+
status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
1, 0, parent_fe_bh, handle,
data_ac, meta_ac, NULL);
@@ -1445,6 +1457,8 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
}
status = 0;
bail:
+ if (did_quota && status < 0)
+ vfs_dq_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
mlog_exit(status);
return status;
}
@@ -1680,8 +1694,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
struct super_block *sb = dir->i_sb;
int status;
- bh = ocfs2_bread(dir, 0, &status, 0);
- if (!bh) {
+ status = ocfs2_read_dir_block(dir, 0, &bh, 0);
+ if (status) {
mlog_errno(status);
goto bail;
}
@@ -1702,11 +1716,10 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
status = -ENOSPC;
goto bail;
}
- bh = ocfs2_bread(dir,
- offset >> sb->s_blocksize_bits,
- &status,
- 0);
- if (!bh) {
+ status = ocfs2_read_dir_block(dir,
+ offset >> sb->s_blocksize_bits,
+ &bh, 0);
+ if (status) {
mlog_errno(status);
goto bail;
}
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 3516d8a4166b..6f7a77d54020 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -608,8 +608,10 @@ static int __init init_dlmfs_fs(void)
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD),
dlmfs_init_once);
- if (!dlmfs_inode_cache)
+ if (!dlmfs_inode_cache) {
+ status = -ENOMEM;
goto bail;
+ }
cleanup_inode = 1;
user_dlm_worker = create_singlethread_workqueue("user_dlm");
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h
index 39ec27738499..0c3cc03c61fa 100644
--- a/fs/ocfs2/dlm/userdlm.h
+++ b/fs/ocfs2/dlm/userdlm.h
@@ -33,7 +33,7 @@
#include <linux/workqueue.h>
/* user_lock_res->l_flags flags. */
-#define USER_LOCK_ATTACHED (0x00000001) /* have we initialized
+#define USER_LOCK_ATTACHED (0x00000001) /* we have initialized
* the lvb */
#define USER_LOCK_BUSY (0x00000002) /* we are currently in
* dlm_lock */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index ec684426034b..b1c75911d8ad 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -32,6 +32,7 @@
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/time.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_DLM_GLUE
#include <cluster/masklog.h>
@@ -51,6 +52,7 @@
#include "slot_map.h"
#include "super.h"
#include "uptodate.h"
+#include "quota.h"
#include "buffer_head_io.h"
@@ -68,6 +70,7 @@ struct ocfs2_mask_waiter {
static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres);
+static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres);
/*
* Return value from ->downconvert_worker functions.
@@ -102,6 +105,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
+static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
@@ -258,6 +262,12 @@ static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
.flags = 0,
};
+static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
+ .set_lvb = ocfs2_set_qinfo_lvb,
+ .get_osb = ocfs2_get_qinfo_osb,
+ .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
+};
+
static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
{
return lockres->l_type == OCFS2_LOCK_TYPE_META ||
@@ -279,6 +289,13 @@ static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res
return (struct ocfs2_dentry_lock *)lockres->l_priv;
}
+static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_res *lockres)
+{
+ BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_QINFO);
+
+ return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
+}
+
static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
{
if (lockres->l_ops->get_osb)
@@ -507,6 +524,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
return OCFS2_SB(inode->i_sb);
}
+static struct ocfs2_super *ocfs2_get_qinfo_osb(struct ocfs2_lock_res *lockres)
+{
+ struct ocfs2_mem_dqinfo *info = lockres->l_priv;
+
+ return OCFS2_SB(info->dqi_gi.dqi_sb);
+}
+
static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres)
{
struct ocfs2_file_private *fp = lockres->l_priv;
@@ -609,6 +633,17 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
lockres->l_flags |= OCFS2_LOCK_NOCACHE;
}
+void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
+ struct ocfs2_mem_dqinfo *info)
+{
+ ocfs2_lock_res_init_once(lockres);
+ ocfs2_build_lock_name(OCFS2_LOCK_TYPE_QINFO, info->dqi_gi.dqi_type,
+ 0, lockres->l_name);
+ ocfs2_lock_res_init_common(OCFS2_SB(info->dqi_gi.dqi_sb), lockres,
+ OCFS2_LOCK_TYPE_QINFO, &ocfs2_qinfo_lops,
+ info);
+}
+
void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
{
mlog_entry_void();
@@ -2024,7 +2059,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
} else {
/* Boo, we have to go to disk. */
/* read bh, cast, ocfs2_refresh_inode */
- status = ocfs2_read_block(inode, oi->ip_blkno, bh);
+ status = ocfs2_read_inode_block(inode, bh);
if (status < 0) {
mlog_errno(status);
goto bail_refresh;
@@ -2032,18 +2067,14 @@ static int ocfs2_inode_lock_update(struct inode *inode,
fe = (struct ocfs2_dinode *) (*bh)->b_data;
/* This is a good chance to make sure we're not
- * locking an invalid object.
+ * locking an invalid object. ocfs2_read_inode_block()
+ * already checked that the inode block is sane.
*
* We bug on a stale inode here because we checked
* above whether it was wiped from disk. The wiping
* node provides a guarantee that we receive that
* message and can mark the inode before dropping any
* locks associated with it. */
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
- status = -EIO;
- goto bail_refresh;
- }
mlog_bug_on_msg(inode->i_generation !=
le32_to_cpu(fe->i_generation),
"Invalid dinode %llu disk generation: %u "
@@ -2085,7 +2116,7 @@ static int ocfs2_assign_bh(struct inode *inode,
return 0;
}
- status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh);
+ status = ocfs2_read_inode_block(inode, ret_bh);
if (status < 0)
mlog_errno(status);
@@ -2841,9 +2872,8 @@ static void ocfs2_unlock_ast(void *opaque, int error)
lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
- spin_unlock_irqrestore(&lockres->l_lock, flags);
-
wake_up(&lockres->l_event);
+ spin_unlock_irqrestore(&lockres->l_lock, flags);
mlog_exit_void();
}
@@ -3450,6 +3480,117 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
return UNBLOCK_CONTINUE_POST;
}
+static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
+{
+ struct ocfs2_qinfo_lvb *lvb;
+ struct ocfs2_mem_dqinfo *oinfo = ocfs2_lock_res_qinfo(lockres);
+ struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
+ oinfo->dqi_gi.dqi_type);
+
+ mlog_entry_void();
+
+ lvb = (struct ocfs2_qinfo_lvb *)ocfs2_dlm_lvb(&lockres->l_lksb);
+ lvb->lvb_version = OCFS2_QINFO_LVB_VERSION;
+ lvb->lvb_bgrace = cpu_to_be32(info->dqi_bgrace);
+ lvb->lvb_igrace = cpu_to_be32(info->dqi_igrace);
+ lvb->lvb_syncms = cpu_to_be32(oinfo->dqi_syncms);
+ lvb->lvb_blocks = cpu_to_be32(oinfo->dqi_gi.dqi_blocks);
+ lvb->lvb_free_blk = cpu_to_be32(oinfo->dqi_gi.dqi_free_blk);
+ lvb->lvb_free_entry = cpu_to_be32(oinfo->dqi_gi.dqi_free_entry);
+
+ mlog_exit_void();
+}
+
+void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex)
+{
+ struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
+ struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
+ int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+
+ mlog_entry_void();
+ if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb))
+ ocfs2_cluster_unlock(osb, lockres, level);
+ mlog_exit_void();
+}
+
+static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo)
+{
+ struct mem_dqinfo *info = sb_dqinfo(oinfo->dqi_gi.dqi_sb,
+ oinfo->dqi_gi.dqi_type);
+ struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
+ struct ocfs2_qinfo_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
+ struct buffer_head *bh = NULL;
+ struct ocfs2_global_disk_dqinfo *gdinfo;
+ int status = 0;
+
+ if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) {
+ info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace);
+ info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace);
+ oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms);
+ oinfo->dqi_gi.dqi_blocks = be32_to_cpu(lvb->lvb_blocks);
+ oinfo->dqi_gi.dqi_free_blk = be32_to_cpu(lvb->lvb_free_blk);
+ oinfo->dqi_gi.dqi_free_entry =
+ be32_to_cpu(lvb->lvb_free_entry);
+ } else {
+ status = ocfs2_read_quota_block(oinfo->dqi_gqinode, 0, &bh);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+ gdinfo = (struct ocfs2_global_disk_dqinfo *)
+ (bh->b_data + OCFS2_GLOBAL_INFO_OFF);
+ info->dqi_bgrace = le32_to_cpu(gdinfo->dqi_bgrace);
+ info->dqi_igrace = le32_to_cpu(gdinfo->dqi_igrace);
+ oinfo->dqi_syncms = le32_to_cpu(gdinfo->dqi_syncms);
+ oinfo->dqi_gi.dqi_blocks = le32_to_cpu(gdinfo->dqi_blocks);
+ oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(gdinfo->dqi_free_blk);
+ oinfo->dqi_gi.dqi_free_entry =
+ le32_to_cpu(gdinfo->dqi_free_entry);
+ brelse(bh);
+ ocfs2_track_lock_refresh(lockres);
+ }
+
+bail:
+ return status;
+}
+
+/* Lock quota info, this function expects at least shared lock on the quota file
+ * so that we can safely refresh quota info from disk. */
+int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex)
+{
+ struct ocfs2_lock_res *lockres = &oinfo->dqi_gqlock;
+ struct ocfs2_super *osb = OCFS2_SB(oinfo->dqi_gi.dqi_sb);
+ int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+ int status = 0;
+
+ mlog_entry_void();
+
+ /* On RO devices, locking really isn't needed... */
+ if (ocfs2_is_hard_readonly(osb)) {
+ if (ex)
+ status = -EROFS;
+ goto bail;
+ }
+ if (ocfs2_mount_local(osb))
+ goto bail;
+
+ status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ if (!ocfs2_should_refresh_lock_res(lockres))
+ goto bail;
+ /* OK, we have the lock but we need to refresh the quota info */
+ status = ocfs2_refresh_qinfo(oinfo);
+ if (status)
+ ocfs2_qinfo_unlock(oinfo, ex);
+ ocfs2_complete_lock_res_refresh(lockres, status);
+bail:
+ mlog_exit(status);
+ return status;
+}
+
/*
* This is the filesystem locking protocol. It provides the lock handling
* hooks for the underlying DLM. It has a maximum version number.
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 2bb01f09c1b1..3f8d9986b8e0 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -49,6 +49,19 @@ struct ocfs2_meta_lvb {
__be32 lvb_reserved2;
};
+#define OCFS2_QINFO_LVB_VERSION 1
+
+struct ocfs2_qinfo_lvb {
+ __u8 lvb_version;
+ __u8 lvb_reserved[3];
+ __be32 lvb_bgrace;
+ __be32 lvb_igrace;
+ __be32 lvb_syncms;
+ __be32 lvb_blocks;
+ __be32 lvb_free_blk;
+ __be32 lvb_free_entry;
+};
+
/* ocfs2_inode_lock_full() 'arg_flags' flags */
/* don't wait on recovery. */
#define OCFS2_META_LOCK_RECOVERY (0x01)
@@ -69,6 +82,9 @@ void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
struct ocfs2_file_private;
void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
struct ocfs2_file_private *fp);
+struct ocfs2_mem_dqinfo;
+void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
+ struct ocfs2_mem_dqinfo *info);
void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
int ocfs2_create_new_inode_locks(struct inode *inode);
int ocfs2_drop_inode_locks(struct inode *inode);
@@ -103,6 +119,9 @@ int ocfs2_dentry_lock(struct dentry *dentry, int ex);
void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
int ocfs2_file_lock(struct file *file, int ex, int trylock);
void ocfs2_file_unlock(struct file *file);
+int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex);
+void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex);
+
void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 2baedac58234..f2bb1a04d253 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
- ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh);
+ ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -302,12 +302,6 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
eb = (struct ocfs2_extent_block *) eb_bh->b_data;
el = &eb->h_list;
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
- ret = -EROFS;
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
- goto out;
- }
-
if (el->l_tree_depth) {
ocfs2_error(inode->i_sb,
"Inode %lu has non zero tree depth in "
@@ -381,23 +375,16 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
goto no_more_extents;
- ret = ocfs2_read_block(inode,
- le64_to_cpu(eb->h_next_leaf_blk),
- &next_eb_bh);
+ ret = ocfs2_read_extent_block(inode,
+ le64_to_cpu(eb->h_next_leaf_blk),
+ &next_eb_bh);
if (ret) {
mlog_errno(ret);
goto out;
}
- next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
-
- if (!OCFS2_IS_VALID_EXTENT_BLOCK(next_eb)) {
- ret = -EROFS;
- OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, next_eb);
- goto out;
- }
+ next_eb = (struct ocfs2_extent_block *)next_eb_bh->b_data;
el = &next_eb->h_list;
-
i = ocfs2_search_for_hole_index(el, v_cluster);
}
@@ -630,7 +617,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
if (ret == 0)
goto out;
- ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
+ ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -819,3 +806,74 @@ out:
return ret;
}
+
+int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
+ struct buffer_head *bhs[], int flags,
+ int (*validate)(struct super_block *sb,
+ struct buffer_head *bh))
+{
+ int rc = 0;
+ u64 p_block, p_count;
+ int i, count, done = 0;
+
+ mlog_entry("(inode = %p, v_block = %llu, nr = %d, bhs = %p, "
+ "flags = %x, validate = %p)\n",
+ inode, (unsigned long long)v_block, nr, bhs, flags,
+ validate);
+
+ if (((v_block + nr - 1) << inode->i_sb->s_blocksize_bits) >=
+ i_size_read(inode)) {
+ BUG_ON(!(flags & OCFS2_BH_READAHEAD));
+ goto out;
+ }
+
+ while (done < nr) {
+ down_read(&OCFS2_I(inode)->ip_alloc_sem);
+ rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
+ &p_block, &p_count, NULL);
+ up_read(&OCFS2_I(inode)->ip_alloc_sem);
+ if (rc) {
+ mlog_errno(rc);
+ break;
+ }
+
+ if (!p_block) {
+ rc = -EIO;
+ mlog(ML_ERROR,
+ "Inode #%llu contains a hole at offset %llu\n",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ (unsigned long long)(v_block + done) <<
+ inode->i_sb->s_blocksize_bits);
+ break;
+ }
+
+ count = nr - done;
+ if (p_count < count)
+ count = p_count;
+
+ /*
+ * If the caller passed us bhs, they should have come
+ * from a previous readahead call to this function. Thus,
+ * they should have the right b_blocknr.
+ */
+ for (i = 0; i < count; i++) {
+ if (!bhs[done + i])
+ continue;
+ BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
+ }
+
+ rc = ocfs2_read_blocks(inode, p_block, count, bhs + done,
+ flags, validate);
+ if (rc) {
+ mlog_errno(rc);
+ break;
+ }
+ done += count;
+ }
+
+out:
+ mlog_exit(rc);
+ return rc;
+}
+
+
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index 1c4aa8b06f34..b7dd9731b462 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -57,4 +57,28 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
u32 *p_cluster, u32 *num_clusters,
struct ocfs2_extent_list *el);
+int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
+ struct buffer_head *bhs[], int flags,
+ int (*validate)(struct super_block *sb,
+ struct buffer_head *bh));
+static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block,
+ struct buffer_head **bh,
+ int (*validate)(struct super_block *sb,
+ struct buffer_head *bh))
+{
+ int status = 0;
+
+ if (bh == NULL) {
+ printk("ocfs2: bh == NULL\n");
+ status = -EINVAL;
+ goto bail;
+ }
+
+ status = ocfs2_read_virt_blocks(inode, v_block, 1, bh, 0, validate);
+
+bail:
+ return status;
+}
+
+
#endif /* _EXTENT_MAP_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e2570a3bc2b2..9374d374a264 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -35,6 +35,7 @@
#include <linux/mount.h>
#include <linux/writeback.h>
#include <linux/falloc.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_INODE
#include <cluster/masklog.h>
@@ -56,6 +57,8 @@
#include "suballoc.h"
#include "super.h"
#include "xattr.h"
+#include "acl.h"
+#include "quota.h"
#include "buffer_head_io.h"
@@ -303,9 +306,9 @@ bail:
return status;
}
-static int ocfs2_simple_size_update(struct inode *inode,
- struct buffer_head *di_bh,
- u64 new_i_size)
+int ocfs2_simple_size_update(struct inode *inode,
+ struct buffer_head *di_bh,
+ u64 new_i_size)
{
int ret;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -401,12 +404,9 @@ static int ocfs2_truncate_file(struct inode *inode,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)new_i_size);
+ /* We trust di_bh because it comes from ocfs2_inode_lock(), which
+ * already validated it */
fe = (struct ocfs2_dinode *) di_bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
- status = -EIO;
- goto bail;
- }
mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
"Inode %llu, inode i_size = %lld != di "
@@ -536,6 +536,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
enum ocfs2_alloc_restarted why;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_extent_tree et;
+ int did_quota = 0;
mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
@@ -545,18 +546,12 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
*/
BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
- status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
+ status = ocfs2_read_inode_block(inode, &bh);
if (status < 0) {
mlog_errno(status);
goto leave;
}
-
fe = (struct ocfs2_dinode *) bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
- status = -EIO;
- goto leave;
- }
restart_all:
BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
@@ -585,6 +580,13 @@ restart_all:
}
restarted_transaction:
+ if (vfs_dq_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb,
+ clusters_to_add))) {
+ status = -EDQUOT;
+ goto leave;
+ }
+ did_quota = 1;
+
/* reserve a write to the file entry early on - that we if we
* run out of credits in the allocation path, we can still
* update i_size. */
@@ -622,6 +624,10 @@ restarted_transaction:
spin_lock(&OCFS2_I(inode)->ip_lock);
clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
spin_unlock(&OCFS2_I(inode)->ip_lock);
+ /* Release unused quota reservation */
+ vfs_dq_free_space(inode,
+ ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
+ did_quota = 0;
if (why != RESTART_NONE && clusters_to_add) {
if (why == RESTART_META) {
@@ -654,6 +660,9 @@ restarted_transaction:
OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode));
leave:
+ if (status < 0 && did_quota)
+ vfs_dq_free_space(inode,
+ ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
if (handle) {
ocfs2_commit_trans(osb, handle);
handle = NULL;
@@ -885,6 +894,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
struct ocfs2_super *osb = OCFS2_SB(sb);
struct buffer_head *bh = NULL;
handle_t *handle = NULL;
+ int locked[MAXQUOTAS] = {0, 0};
+ int credits, qtype;
+ struct ocfs2_mem_dqinfo *oinfo;
mlog_entry("(0x%p, '%.*s')\n", dentry,
dentry->d_name.len, dentry->d_name.name);
@@ -955,11 +967,47 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
}
}
- handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto bail_unlock;
+ if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+ (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+ credits = OCFS2_INODE_UPDATE_CREDITS;
+ if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
+ && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
+ oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv;
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto bail_unlock;
+ credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) +
+ ocfs2_calc_qdel_credits(sb, USRQUOTA);
+ locked[USRQUOTA] = 1;
+ }
+ if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
+ && OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
+ oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv;
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto bail_unlock;
+ credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) +
+ ocfs2_calc_qdel_credits(sb, GRPQUOTA);
+ locked[GRPQUOTA] = 1;
+ }
+ handle = ocfs2_start_trans(osb, credits);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto bail_unlock;
+ }
+ status = vfs_dq_transfer(inode, attr) ? -EDQUOT : 0;
+ if (status < 0)
+ goto bail_commit;
+ } else {
+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto bail_unlock;
+ }
}
/*
@@ -982,6 +1030,12 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
bail_commit:
ocfs2_commit_trans(osb, handle);
bail_unlock:
+ for (qtype = 0; qtype < MAXQUOTAS; qtype++) {
+ if (!locked[qtype])
+ continue;
+ oinfo = sb_dqinfo(sb, qtype)->dqi_priv;
+ ocfs2_unlock_global_qf(oinfo, 1);
+ }
ocfs2_inode_unlock(inode, 1);
bail_unlock_rw:
if (size_change)
@@ -989,6 +1043,12 @@ bail_unlock_rw:
bail:
brelse(bh);
+ if (!status && attr->ia_valid & ATTR_MODE) {
+ status = ocfs2_acl_chmod(inode);
+ if (status < 0)
+ mlog_errno(status);
+ }
+
mlog_exit(status);
return status;
}
@@ -1035,7 +1095,7 @@ int ocfs2_permission(struct inode *inode, int mask)
goto out;
}
- ret = generic_permission(inode, mask, NULL);
+ ret = generic_permission(inode, mask, ocfs2_check_acl);
ocfs2_inode_unlock(inode, 0);
out:
@@ -1128,9 +1188,8 @@ static int ocfs2_write_remove_suid(struct inode *inode)
{
int ret;
struct buffer_head *bh = NULL;
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
- ret = ocfs2_read_block(inode, oi->ip_blkno, &bh);
+ ret = ocfs2_read_inode_block(inode, &bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -1156,8 +1215,7 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
struct buffer_head *di_bh = NULL;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
- ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno,
- &di_bh);
+ ret = ocfs2_read_inode_block(inode, &di_bh);
if (ret) {
mlog_errno(ret);
goto out;
@@ -1226,83 +1284,6 @@ out:
return ret;
}
-static int __ocfs2_remove_inode_range(struct inode *inode,
- struct buffer_head *di_bh,
- u32 cpos, u32 phys_cpos, u32 len,
- struct ocfs2_cached_dealloc_ctxt *dealloc)
-{
- int ret;
- u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct inode *tl_inode = osb->osb_tl_inode;
- handle_t *handle;
- struct ocfs2_alloc_context *meta_ac = NULL;
- struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
- struct ocfs2_extent_tree et;
-
- ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
-
- ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
- if (ret) {
- mlog_errno(ret);
- return ret;
- }
-
- mutex_lock(&tl_inode->i_mutex);
-
- if (ocfs2_truncate_log_needs_flush(osb)) {
- ret = __ocfs2_flush_truncate_log(osb);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
- }
-
- handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out;
- }
-
- ret = ocfs2_journal_access(handle, inode, di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
- ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
- dealloc);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
-
- OCFS2_I(inode)->ip_clusters -= len;
- di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
-
- ret = ocfs2_journal_dirty(handle, di_bh);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
-
- ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
- if (ret)
- mlog_errno(ret);
-
-out_commit:
- ocfs2_commit_trans(osb, handle);
-out:
- mutex_unlock(&tl_inode->i_mutex);
-
- if (meta_ac)
- ocfs2_free_alloc_context(meta_ac);
-
- return ret;
-}
-
/*
* Truncate a byte range, avoiding pages within partial clusters. This
* preserves those pages for the zeroing code to write to.
@@ -1402,7 +1383,9 @@ static int ocfs2_remove_inode_range(struct inode *inode,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_cached_dealloc_ctxt dealloc;
struct address_space *mapping = inode->i_mapping;
+ struct ocfs2_extent_tree et;
+ ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
ocfs2_init_dealloc_ctxt(&dealloc);
if (byte_len == 0)
@@ -1458,9 +1441,9 @@ static int ocfs2_remove_inode_range(struct inode *inode,
/* Only do work for non-holes */
if (phys_cpos != 0) {
- ret = __ocfs2_remove_inode_range(inode, di_bh, cpos,
- phys_cpos, alloc_size,
- &dealloc);
+ ret = ocfs2_remove_btree_range(inode, &et, cpos,
+ phys_cpos, alloc_size,
+ &dealloc);
if (ret) {
mlog_errno(ret);
goto out;
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index e92382cbca5f..172f9fbc9fc7 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -51,6 +51,9 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason_ret);
+int ocfs2_simple_size_update(struct inode *inode,
+ struct buffer_head *di_bh,
+ u64 new_i_size);
int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
u64 zero_to);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7aa00d511874..288512c9dbc2 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
+#include <linux/quotaops.h>
#include <asm/byteorder.h>
@@ -214,12 +215,11 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
return 0;
}
-int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
- int create_ino)
+void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
+ int create_ino)
{
struct super_block *sb;
struct ocfs2_super *osb;
- int status = -EINVAL;
int use_plocks = 1;
mlog_entry("(0x%p, size:%llu)\n", inode,
@@ -232,25 +232,17 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks())
use_plocks = 0;
- /* this means that read_inode cannot create a superblock inode
- * today. change if needed. */
- if (!OCFS2_IS_VALID_DINODE(fe) ||
- !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
- mlog(0, "Invalid dinode: i_ino=%lu, i_blkno=%llu, "
- "signature = %.*s, flags = 0x%x\n",
- inode->i_ino,
- (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
- fe->i_signature, le32_to_cpu(fe->i_flags));
- goto bail;
- }
+ /*
+ * These have all been checked by ocfs2_read_inode_block() or set
+ * by ocfs2_mknod_locked(), so a failure is a code bug.
+ */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); /* This means that read_inode
+ cannot create a superblock
+ inode today. change if
+ that is needed. */
+ BUG_ON(!(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)));
+ BUG_ON(le32_to_cpu(fe->i_fs_generation) != osb->fs_generation);
- if (le32_to_cpu(fe->i_fs_generation) != osb->fs_generation) {
- mlog(ML_ERROR, "file entry generation does not match "
- "superblock! osb->fs_generation=%x, "
- "fe->i_fs_generation=%x\n",
- osb->fs_generation, le32_to_cpu(fe->i_fs_generation));
- goto bail;
- }
OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
@@ -284,14 +276,18 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
inode->i_nlink = le16_to_cpu(fe->i_links_count);
- if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
+ if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
+ inode->i_flags |= S_NOQUOTA;
+ }
if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
} else if (fe->i_flags & cpu_to_le32(OCFS2_BITMAP_FL)) {
OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
+ } else if (fe->i_flags & cpu_to_le32(OCFS2_QUOTA_FL)) {
+ inode->i_flags |= S_NOQUOTA;
} else if (fe->i_flags & cpu_to_le32(OCFS2_SUPER_BLOCK_FL)) {
mlog(0, "superblock inode: i_ino=%lu\n", inode->i_ino);
/* we can't actually hit this as read_inode can't
@@ -354,10 +350,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
ocfs2_set_inode_flags(inode);
- status = 0;
-bail:
- mlog_exit(status);
- return status;
+ mlog_exit_void();
}
static int ocfs2_read_locked_inode(struct inode *inode,
@@ -460,11 +453,14 @@ static int ocfs2_read_locked_inode(struct inode *inode,
}
}
- if (can_lock)
- status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh,
- OCFS2_BH_IGNORE_CACHE);
- else
+ if (can_lock) {
+ status = ocfs2_read_inode_block_full(inode, &bh,
+ OCFS2_BH_IGNORE_CACHE);
+ } else {
status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
+ if (!status)
+ status = ocfs2_validate_inode_block(osb->sb, bh);
+ }
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -472,12 +468,6 @@ static int ocfs2_read_locked_inode(struct inode *inode,
status = -EINVAL;
fe = (struct ocfs2_dinode *) bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- mlog(0, "Invalid dinode #%llu: signature = %.*s\n",
- (unsigned long long)args->fi_blkno, 7,
- fe->i_signature);
- goto bail;
- }
/*
* This is a code bug. Right now the caller needs to
@@ -491,10 +481,9 @@ static int ocfs2_read_locked_inode(struct inode *inode,
if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
S_ISBLK(le16_to_cpu(fe->i_mode)))
- inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
+ inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
- if (ocfs2_populate_inode(inode, fe, 0) < 0)
- goto bail;
+ ocfs2_populate_inode(inode, fe, 0);
BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
@@ -615,7 +604,8 @@ static int ocfs2_remove_inode(struct inode *inode,
goto bail;
}
- handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS);
+ handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS +
+ ocfs2_quota_trans_credits(inode->i_sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
@@ -647,6 +637,7 @@ static int ocfs2_remove_inode(struct inode *inode,
}
ocfs2_remove_from_cache(inode, di_bh);
+ vfs_dq_free_inode(inode);
status = ocfs2_free_dinode(handle, inode_alloc_inode,
inode_alloc_bh, di);
@@ -929,7 +920,10 @@ void ocfs2_delete_inode(struct inode *inode)
mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
- if (is_bad_inode(inode)) {
+ /* When we fail in read_inode() we mark inode as bad. The second test
+ * catches the case when inode allocation fails before allocating
+ * a block for inode. */
+ if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) {
mlog(0, "Skipping delete of bad inode\n");
goto bail;
}
@@ -1264,3 +1258,71 @@ void ocfs2_refresh_inode(struct inode *inode,
spin_unlock(&OCFS2_I(inode)->ip_lock);
}
+
+int ocfs2_validate_inode_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ int rc = -EINVAL;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
+
+ mlog(0, "Validating dinode %llu\n",
+ (unsigned long long)bh->b_blocknr);
+
+ BUG_ON(!buffer_uptodate(bh));
+
+ if (!OCFS2_IS_VALID_DINODE(di)) {
+ ocfs2_error(sb, "Invalid dinode #%llu: signature = %.*s\n",
+ (unsigned long long)bh->b_blocknr, 7,
+ di->i_signature);
+ goto bail;
+ }
+
+ if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
+ ocfs2_error(sb, "Invalid dinode #%llu: i_blkno is %llu\n",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(di->i_blkno));
+ goto bail;
+ }
+
+ if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
+ ocfs2_error(sb,
+ "Invalid dinode #%llu: OCFS2_VALID_FL not set\n",
+ (unsigned long long)bh->b_blocknr);
+ goto bail;
+ }
+
+ if (le32_to_cpu(di->i_fs_generation) !=
+ OCFS2_SB(sb)->fs_generation) {
+ ocfs2_error(sb,
+ "Invalid dinode #%llu: fs_generation is %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(di->i_fs_generation));
+ goto bail;
+ }
+
+ rc = 0;
+
+bail:
+ return rc;
+}
+
+int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
+ int flags)
+{
+ int rc;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp,
+ flags, ocfs2_validate_inode_block);
+
+ /* If ocfs2_read_blocks() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc;
+}
+
+int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
+{
+ return ocfs2_read_inode_block_full(inode, bh, 0);
+}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 2f37af9bcc4a..eb3c302b38d3 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -128,8 +128,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
int sysfile_type);
int ocfs2_inode_init_private(struct inode *inode);
int ocfs2_inode_revalidate(struct dentry *dentry);
-int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
- int create_ino);
+void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
+ int create_ino);
void ocfs2_read_inode(struct inode *inode);
void ocfs2_read_inode2(struct inode *inode, void *opaque);
ssize_t ocfs2_rw_direct(int rw, struct file *filp, char *buf,
@@ -142,6 +142,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
struct buffer_head *bh);
int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
+struct buffer_head *ocfs2_bread(struct inode *inode,
+ int block, int *err, int reada);
void ocfs2_set_inode_flags(struct inode *inode);
void ocfs2_get_inode_flags(struct ocfs2_inode_info *oi);
@@ -153,4 +155,16 @@ static inline blkcnt_t ocfs2_inode_sector_count(struct inode *inode)
return (blkcnt_t)(OCFS2_I(inode)->ip_clusters << c_to_s_bits);
}
+/* Validate that a bh contains a valid inode */
+int ocfs2_validate_inode_block(struct super_block *sb,
+ struct buffer_head *bh);
+/*
+ * Read an inode block into *bh. If *bh is NULL, a bh will be allocated.
+ * This is a cached read. The inode will be validated with
+ * ocfs2_validate_inode_block().
+ */
+int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh);
+/* The same, but can be passed OCFS2_BH_* flags */
+int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
+ int flags);
#endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 99fe9d584f3c..302f1144a708 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -45,6 +45,7 @@
#include "slot_map.h"
#include "super.h"
#include "sysfile.h"
+#include "quota.h"
#include "buffer_head_io.h"
@@ -52,10 +53,10 @@ DEFINE_SPINLOCK(trans_inc_lock);
static int ocfs2_force_read_journal(struct inode *inode);
static int ocfs2_recover_node(struct ocfs2_super *osb,
- int node_num);
+ int node_num, int slot_num);
static int __ocfs2_recovery_thread(void *arg);
static int ocfs2_commit_cache(struct ocfs2_super *osb);
-static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
+static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota);
static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
int dirty, int replayed);
static int ocfs2_trylock_journal(struct ocfs2_super *osb,
@@ -64,6 +65,17 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
int slot);
static int ocfs2_commit_thread(void *arg);
+static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
+{
+ return __ocfs2_wait_on_mount(osb, 0);
+}
+
+static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
+{
+ return __ocfs2_wait_on_mount(osb, 1);
+}
+
+
/*
* The recovery_list is a simple linked list of node numbers to recover.
@@ -256,11 +268,9 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE);
BUG_ON(max_buffs <= 0);
- /* JBD might support this, but our journalling code doesn't yet. */
- if (journal_current_handle()) {
- mlog(ML_ERROR, "Recursive transaction attempted!\n");
- BUG();
- }
+ /* Nested transaction? Just return the handle... */
+ if (journal_current_handle())
+ return jbd2_journal_start(journal, max_buffs);
down_read(&osb->journal->j_trans_barrier);
@@ -285,16 +295,18 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
int ocfs2_commit_trans(struct ocfs2_super *osb,
handle_t *handle)
{
- int ret;
+ int ret, nested;
struct ocfs2_journal *journal = osb->journal;
BUG_ON(!handle);
+ nested = handle->h_ref > 1;
ret = jbd2_journal_stop(handle);
if (ret < 0)
mlog_errno(ret);
- up_read(&journal->j_trans_barrier);
+ if (!nested)
+ up_read(&journal->j_trans_barrier);
return ret;
}
@@ -434,20 +446,6 @@ int ocfs2_journal_dirty(handle_t *handle,
return status;
}
-#ifdef CONFIG_OCFS2_COMPAT_JBD
-int ocfs2_journal_dirty_data(handle_t *handle,
- struct buffer_head *bh)
-{
- int err = journal_dirty_data(handle, bh);
- if (err)
- mlog_errno(err);
- /* TODO: When we can handle it, abort the handle and go RO on
- * error here. */
-
- return err;
-}
-#endif
-
#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
void ocfs2_set_journal_params(struct ocfs2_super *osb)
@@ -587,17 +585,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
mlog_entry_void();
fe = (struct ocfs2_dinode *)bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- /* This is called from startup/shutdown which will
- * handle the errors in a specific manner, so no need
- * to call ocfs2_error() here. */
- mlog(ML_ERROR, "Journal dinode %llu has invalid "
- "signature: %.*s",
- (unsigned long long)le64_to_cpu(fe->i_blkno), 7,
- fe->i_signature);
- status = -EIO;
- goto out;
- }
+
+ /* The journal bh on the osb always comes from ocfs2_journal_init()
+ * and was validated there inside ocfs2_inode_lock_full(). It's a
+ * code bug if we mess it up. */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
flags = le32_to_cpu(fe->id1.journal1.ij_flags);
if (dirty)
@@ -613,7 +605,6 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
if (status < 0)
mlog_errno(status);
-out:
mlog_exit(status);
return status;
}
@@ -878,6 +869,7 @@ struct ocfs2_la_recovery_item {
int lri_slot;
struct ocfs2_dinode *lri_la_dinode;
struct ocfs2_dinode *lri_tl_dinode;
+ struct ocfs2_quota_recovery *lri_qrec;
};
/* Does the second half of the recovery process. By this point, the
@@ -898,6 +890,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
struct ocfs2_super *osb = journal->j_osb;
struct ocfs2_dinode *la_dinode, *tl_dinode;
struct ocfs2_la_recovery_item *item, *n;
+ struct ocfs2_quota_recovery *qrec;
LIST_HEAD(tmp_la_list);
mlog_entry_void();
@@ -913,6 +906,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
+ ocfs2_wait_on_quotas(osb);
+
la_dinode = item->lri_la_dinode;
if (la_dinode) {
mlog(0, "Clean up local alloc %llu\n",
@@ -943,6 +938,16 @@ void ocfs2_complete_recovery(struct work_struct *work)
if (ret < 0)
mlog_errno(ret);
+ qrec = item->lri_qrec;
+ if (qrec) {
+ mlog(0, "Recovering quota files");
+ ret = ocfs2_finish_quota_recovery(osb, qrec,
+ item->lri_slot);
+ if (ret < 0)
+ mlog_errno(ret);
+ /* Recovery info is already freed now */
+ }
+
kfree(item);
}
@@ -956,7 +961,8 @@ void ocfs2_complete_recovery(struct work_struct *work)
static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
int slot_num,
struct ocfs2_dinode *la_dinode,
- struct ocfs2_dinode *tl_dinode)
+ struct ocfs2_dinode *tl_dinode,
+ struct ocfs2_quota_recovery *qrec)
{
struct ocfs2_la_recovery_item *item;
@@ -971,6 +977,9 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
if (tl_dinode)
kfree(tl_dinode);
+ if (qrec)
+ ocfs2_free_quota_recovery(qrec);
+
mlog_errno(-ENOMEM);
return;
}
@@ -979,6 +988,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
item->lri_la_dinode = la_dinode;
item->lri_slot = slot_num;
item->lri_tl_dinode = tl_dinode;
+ item->lri_qrec = qrec;
spin_lock(&journal->j_lock);
list_add_tail(&item->lri_list, &journal->j_la_cleanups);
@@ -998,6 +1008,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
ocfs2_queue_recovery_completion(journal,
osb->slot_num,
osb->local_alloc_copy,
+ NULL,
NULL);
ocfs2_schedule_truncate_log_flush(osb, 0);
@@ -1006,11 +1017,26 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
}
}
+void ocfs2_complete_quota_recovery(struct ocfs2_super *osb)
+{
+ if (osb->quota_rec) {
+ ocfs2_queue_recovery_completion(osb->journal,
+ osb->slot_num,
+ NULL,
+ NULL,
+ osb->quota_rec);
+ osb->quota_rec = NULL;
+ }
+}
+
static int __ocfs2_recovery_thread(void *arg)
{
- int status, node_num;
+ int status, node_num, slot_num;
struct ocfs2_super *osb = arg;
struct ocfs2_recovery_map *rm = osb->recovery_map;
+ int *rm_quota = NULL;
+ int rm_quota_used = 0, i;
+ struct ocfs2_quota_recovery *qrec;
mlog_entry_void();
@@ -1019,6 +1045,11 @@ static int __ocfs2_recovery_thread(void *arg)
goto bail;
}
+ rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS);
+ if (!rm_quota) {
+ status = -ENOMEM;
+ goto bail;
+ }
restart:
status = ocfs2_super_lock(osb, 1);
if (status < 0) {
@@ -1032,8 +1063,28 @@ restart:
* clear it until ocfs2_recover_node() has succeeded. */
node_num = rm->rm_entries[0];
spin_unlock(&osb->osb_lock);
-
- status = ocfs2_recover_node(osb, node_num);
+ mlog(0, "checking node %d\n", node_num);
+ slot_num = ocfs2_node_num_to_slot(osb, node_num);
+ if (slot_num == -ENOENT) {
+ status = 0;
+ mlog(0, "no slot for this node, so no recovery"
+ "required.\n");
+ goto skip_recovery;
+ }
+ mlog(0, "node %d was using slot %d\n", node_num, slot_num);
+
+ /* It is a bit subtle with quota recovery. We cannot do it
+ * immediately because we have to obtain cluster locks from
+ * quota files and we also don't want to just skip it because
+ * then quota usage would be out of sync until some node takes
+ * the slot. So we remember which nodes need quota recovery
+ * and when everything else is done, we recover quotas. */
+ for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
+ if (i == rm_quota_used)
+ rm_quota[rm_quota_used++] = slot_num;
+
+ status = ocfs2_recover_node(osb, node_num, slot_num);
+skip_recovery:
if (!status) {
ocfs2_recovery_map_clear(osb, node_num);
} else {
@@ -1055,13 +1106,27 @@ restart:
if (status < 0)
mlog_errno(status);
+ /* Now it is right time to recover quotas... We have to do this under
+ * superblock lock so that noone can start using the slot (and crash)
+ * before we recover it */
+ for (i = 0; i < rm_quota_used; i++) {
+ qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
+ if (IS_ERR(qrec)) {
+ status = PTR_ERR(qrec);
+ mlog_errno(status);
+ continue;
+ }
+ ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
+ NULL, NULL, qrec);
+ }
+
ocfs2_super_unlock(osb, 1);
/* We always run recovery on our own orphan dir - the dead
* node(s) may have disallowd a previos inode delete. Re-processing
* is therefore required. */
ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL,
- NULL);
+ NULL, NULL);
bail:
mutex_lock(&osb->recovery_lock);
@@ -1076,6 +1141,9 @@ bail:
mutex_unlock(&osb->recovery_lock);
+ if (rm_quota)
+ kfree(rm_quota);
+
mlog_exit(status);
/* no one is callint kthread_stop() for us so the kthread() api
* requires that we call do_exit(). And it isn't exported, but
@@ -1135,8 +1203,7 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
}
SET_INODE_JOURNAL(inode);
- status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh,
- OCFS2_BH_IGNORE_CACHE);
+ status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -1304,31 +1371,19 @@ done:
* far less concerning.
*/
static int ocfs2_recover_node(struct ocfs2_super *osb,
- int node_num)
+ int node_num, int slot_num)
{
int status = 0;
- int slot_num;
struct ocfs2_dinode *la_copy = NULL;
struct ocfs2_dinode *tl_copy = NULL;
- mlog_entry("(node_num=%d, osb->node_num = %d)\n",
- node_num, osb->node_num);
-
- mlog(0, "checking node %d\n", node_num);
+ mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n",
+ node_num, slot_num, osb->node_num);
/* Should not ever be called to recover ourselves -- in that
* case we should've called ocfs2_journal_load instead. */
BUG_ON(osb->node_num == node_num);
- slot_num = ocfs2_node_num_to_slot(osb, node_num);
- if (slot_num == -ENOENT) {
- status = 0;
- mlog(0, "no slot for this node, so no recovery required.\n");
- goto done;
- }
-
- mlog(0, "node %d was using slot %d\n", node_num, slot_num);
-
status = ocfs2_replay_journal(osb, node_num, slot_num);
if (status < 0) {
if (status == -EBUSY) {
@@ -1364,7 +1419,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
/* This will kfree the memory pointed to by la_copy and tl_copy */
ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy,
- tl_copy);
+ tl_copy, NULL);
status = 0;
done:
@@ -1659,13 +1714,14 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
return ret;
}
-static int ocfs2_wait_on_mount(struct ocfs2_super *osb)
+static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota)
{
/* This check is good because ocfs2 will wait on our recovery
* thread before changing it to something other than MOUNTED
* or DISABLED. */
wait_event(osb->osb_mount_event,
- atomic_read(&osb->vol_state) == VOLUME_MOUNTED ||
+ (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) ||
+ atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS ||
atomic_read(&osb->vol_state) == VOLUME_DISABLED);
/* If there's an error on mount, then we may never get to the
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index d4d14e9a3cea..37013bf9ce28 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -27,12 +27,7 @@
#define OCFS2_JOURNAL_H
#include <linux/fs.h>
-#ifndef CONFIG_OCFS2_COMPAT_JBD
-# include <linux/jbd2.h>
-#else
-# include <linux/jbd.h>
-# include "ocfs2_jbd_compat.h"
-#endif
+#include <linux/jbd2.h>
enum ocfs2_journal_state {
OCFS2_JOURNAL_FREE = 0,
@@ -173,6 +168,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb,
int node_num);
int ocfs2_mark_dead_nodes(struct ocfs2_super *osb);
void ocfs2_complete_mount_recovery(struct ocfs2_super *osb);
+void ocfs2_complete_quota_recovery(struct ocfs2_super *osb);
static inline void ocfs2_start_checkpoint(struct ocfs2_super *osb)
{
@@ -273,10 +269,6 @@ int ocfs2_journal_access(handle_t *handle,
*/
int ocfs2_journal_dirty(handle_t *handle,
struct buffer_head *bh);
-#ifdef CONFIG_OCFS2_COMPAT_JBD
-int ocfs2_journal_dirty_data(handle_t *handle,
- struct buffer_head *bh);
-#endif
/*
* Credit Macros:
@@ -293,6 +285,37 @@ int ocfs2_journal_dirty_data(handle_t *handle,
/* extended attribute block update */
#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
+/* global quotafile inode update, data block */
+#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
+
+/*
+ * The two writes below can accidentally see global info dirty due
+ * to set_info() quotactl so make them prepared for the writes.
+ */
+/* quota data block, global info */
+/* Write to local quota file */
+#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1)
+
+/* global quota data block, local quota data block, global quota inode,
+ * global quota info */
+#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3)
+
+static inline int ocfs2_quota_trans_credits(struct super_block *sb)
+{
+ int credits = 0;
+
+ if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA))
+ credits += OCFS2_QWRITE_CREDITS;
+ if (OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA))
+ credits += OCFS2_QWRITE_CREDITS;
+ return credits;
+}
+
+/* Number of credits needed for removing quota structure from file */
+int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
+/* Number of credits needed for initialization of new quota structure */
+int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
+
/* group extend. inode update and last group update. */
#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
@@ -303,8 +326,11 @@ int ocfs2_journal_dirty_data(handle_t *handle,
* prev. group desc. if we relink. */
#define OCFS2_SUBALLOC_ALLOC (3)
-#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC \
- + OCFS2_INODE_UPDATE_CREDITS)
+static inline int ocfs2_inline_to_extents_credits(struct super_block *sb)
+{
+ return OCFS2_SUBALLOC_ALLOC + OCFS2_INODE_UPDATE_CREDITS +
+ ocfs2_quota_trans_credits(sb);
+}
/* dinode + group descriptor update. We don't relink on free yet. */
#define OCFS2_SUBALLOC_FREE (2)
@@ -313,16 +339,23 @@ int ocfs2_journal_dirty_data(handle_t *handle,
#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \
+ OCFS2_TRUNCATE_LOG_UPDATE)
-#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS)
+static inline int ocfs2_remove_extent_credits(struct super_block *sb)
+{
+ return OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS +
+ ocfs2_quota_trans_credits(sb);
+}
/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
* bitmap block for the new bit) */
#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)
/* parent fe, parent block, new file entry, inode alloc fe, inode alloc
- * group descriptor + mkdir/symlink blocks */
-#define OCFS2_MKNOD_CREDITS (3 + OCFS2_SUBALLOC_ALLOC \
- + OCFS2_DIR_LINK_ADDITIONAL_CREDITS)
+ * group descriptor + mkdir/symlink blocks + quota update */
+static inline int ocfs2_mknod_credits(struct super_block *sb)
+{
+ return 3 + OCFS2_SUBALLOC_ALLOC + OCFS2_DIR_LINK_ADDITIONAL_CREDITS +
+ ocfs2_quota_trans_credits(sb);
+}
/* local alloc metadata change + main bitmap updates */
#define OCFS2_WINDOW_MOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS \
@@ -332,13 +365,21 @@ int ocfs2_journal_dirty_data(handle_t *handle,
* for the dinode, one for the new block. */
#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
-/* file update (nlink, etc) + directory mtime/ctime + dir entry block */
-#define OCFS2_LINK_CREDITS (2*OCFS2_INODE_UPDATE_CREDITS + 1)
+/* file update (nlink, etc) + directory mtime/ctime + dir entry block + quota
+ * update on dir */
+static inline int ocfs2_link_credits(struct super_block *sb)
+{
+ return 2*OCFS2_INODE_UPDATE_CREDITS + 1 +
+ ocfs2_quota_trans_credits(sb);
+}
/* inode + dir inode (if we unlink a dir), + dir entry block + orphan
* dir inode link */
-#define OCFS2_UNLINK_CREDITS (2 * OCFS2_INODE_UPDATE_CREDITS + 1 \
- + OCFS2_LINK_CREDITS)
+static inline int ocfs2_unlink_credits(struct super_block *sb)
+{
+ /* The quota update from ocfs2_link_credits is unused here... */
+ return 2 * OCFS2_INODE_UPDATE_CREDITS + 1 + ocfs2_link_credits(sb);
+}
/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
* inode alloc group descriptor */
@@ -347,8 +388,10 @@ int ocfs2_journal_dirty_data(handle_t *handle,
/* dinode update, old dir dinode update, new dir dinode update, old
* dir dir entry, new dir dir entry, dir entry update for renaming
* directory + target unlink */
-#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \
- + OCFS2_UNLINK_CREDITS)
+static inline int ocfs2_rename_credits(struct super_block *sb)
+{
+ return 3 * OCFS2_INODE_UPDATE_CREDITS + 3 + ocfs2_unlink_credits(sb);
+}
/* global bitmap dinode, group desc., relinked group,
* suballocator dinode, group desc., relinked group,
@@ -386,18 +429,19 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
* credit for the dinode there. */
extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
- return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks;
+ return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
+ ocfs2_quota_trans_credits(sb);
}
static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
{
- int blocks = OCFS2_MKNOD_CREDITS;
+ int blocks = ocfs2_mknod_credits(sb);
/* links can be longer than one block so we may update many
* within our single allocated extent. */
blocks += ocfs2_clusters_to_blocks(sb, 1);
- return blocks;
+ return blocks + ocfs2_quota_trans_credits(sb);
}
static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb,
@@ -434,6 +478,8 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
/* update to the truncate log. */
credits += OCFS2_TRUNCATE_LOG_UPDATE;
+ credits += ocfs2_quota_trans_credits(sb);
+
return credits;
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 687b28713c32..19cfb1b9ce09 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -248,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
goto bail;
}
- status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
- &alloc_bh, OCFS2_BH_IGNORE_CACHE);
+ status = ocfs2_read_inode_block_full(inode, &alloc_bh,
+ OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -459,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
mutex_lock(&inode->i_mutex);
- status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
- &alloc_bh, OCFS2_BH_IGNORE_CACHE);
+ status = ocfs2_read_inode_block_full(inode, &alloc_bh,
+ OCFS2_BH_IGNORE_CACHE);
if (status < 0) {
mlog_errno(status);
goto bail;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2545e7402efe..6173807ba23b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -40,6 +40,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_NAMEI
#include <cluster/masklog.h>
@@ -61,17 +62,18 @@
#include "sysfile.h"
#include "uptodate.h"
#include "xattr.h"
+#include "acl.h"
#include "buffer_head_io.h"
static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct inode *dir,
- struct dentry *dentry, int mode,
+ struct inode *inode,
+ struct dentry *dentry,
dev_t dev,
struct buffer_head **new_fe_bh,
struct buffer_head *parent_fe_bh,
handle_t *handle,
- struct inode **ret_inode,
struct ocfs2_alloc_context *inode_ac);
static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
@@ -186,6 +188,35 @@ bail:
return ret;
}
+static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
+{
+ struct inode *inode;
+
+ inode = new_inode(dir->i_sb);
+ if (!inode) {
+ mlog(ML_ERROR, "new_inode failed!\n");
+ return NULL;
+ }
+
+ /* populate as many fields early on as possible - many of
+ * these are used by the support functions here and in
+ * callers. */
+ if (S_ISDIR(mode))
+ inode->i_nlink = 2;
+ else
+ inode->i_nlink = 1;
+ inode->i_uid = current_fsuid();
+ if (dir->i_mode & S_ISGID) {
+ inode->i_gid = dir->i_gid;
+ if (S_ISDIR(mode))
+ mode |= S_ISGID;
+ } else
+ inode->i_gid = current_fsgid();
+ inode->i_mode = mode;
+ vfs_dq_init(inode);
+ return inode;
+}
+
static int ocfs2_mknod(struct inode *dir,
struct dentry *dentry,
int mode,
@@ -201,6 +232,13 @@ static int ocfs2_mknod(struct inode *dir,
struct inode *inode = NULL;
struct ocfs2_alloc_context *inode_ac = NULL;
struct ocfs2_alloc_context *data_ac = NULL;
+ struct ocfs2_alloc_context *xattr_ac = NULL;
+ int want_clusters = 0;
+ int xattr_credits = 0;
+ struct ocfs2_security_xattr_info si = {
+ .enable = 1,
+ };
+ int did_quota_inode = 0;
mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
(unsigned long)dev, dentry->d_name.len,
@@ -250,17 +288,46 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
- /* Reserve a cluster if creating an extent based directory. */
- if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
- status = ocfs2_reserve_clusters(osb, 1, &data_ac);
- if (status < 0) {
- if (status != -ENOSPC)
- mlog_errno(status);
+ inode = ocfs2_get_init_inode(dir, mode);
+ if (!inode) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto leave;
+ }
+
+ /* get security xattr */
+ status = ocfs2_init_security_get(inode, dir, &si);
+ if (status) {
+ if (status == -EOPNOTSUPP)
+ si.enable = 0;
+ else {
+ mlog_errno(status);
goto leave;
}
}
- handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS);
+ /* calculate meta data/clusters for setting security and acl xattr */
+ status = ocfs2_calc_xattr_init(dir, parent_fe_bh, mode,
+ &si, &want_clusters,
+ &xattr_credits, &xattr_ac);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+
+ /* Reserve a cluster if creating an extent based directory. */
+ if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb))
+ want_clusters += 1;
+
+ status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
+ if (status < 0) {
+ if (status != -ENOSPC)
+ mlog_errno(status);
+ goto leave;
+ }
+
+ handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb) +
+ xattr_credits);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
@@ -268,10 +335,19 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
+ /* We don't use standard VFS wrapper because we don't want vfs_dq_init
+ * to be called. */
+ if (sb_any_quota_active(osb->sb) &&
+ osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
+ status = -EDQUOT;
+ goto leave;
+ }
+ did_quota_inode = 1;
+
/* do the real work now. */
- status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev,
+ status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev,
&new_fe_bh, parent_fe_bh, handle,
- &inode, inode_ac);
+ inode_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
@@ -300,6 +376,22 @@ static int ocfs2_mknod(struct inode *dir,
inc_nlink(dir);
}
+ status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
+ xattr_ac, data_ac);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+
+ if (si.enable) {
+ status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
+ xattr_ac, data_ac);
+ if (status < 0) {
+ mlog_errno(status);
+ goto leave;
+ }
+ }
+
status = ocfs2_add_entry(handle, dentry, inode,
OCFS2_I(inode)->ip_blkno, parent_fe_bh,
de_bh);
@@ -320,6 +412,8 @@ static int ocfs2_mknod(struct inode *dir,
d_instantiate(dentry, inode);
status = 0;
leave:
+ if (status < 0 && did_quota_inode)
+ vfs_dq_free_inode(inode);
if (handle)
ocfs2_commit_trans(osb, handle);
@@ -331,9 +425,13 @@ leave:
brelse(new_fe_bh);
brelse(de_bh);
brelse(parent_fe_bh);
+ kfree(si.name);
+ kfree(si.value);
- if ((status < 0) && inode)
+ if ((status < 0) && inode) {
+ clear_nlink(inode);
iput(inode);
+ }
if (inode_ac)
ocfs2_free_alloc_context(inode_ac);
@@ -341,6 +439,9 @@ leave:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
+ if (xattr_ac)
+ ocfs2_free_alloc_context(xattr_ac);
+
mlog_exit(status);
return status;
@@ -348,12 +449,12 @@ leave:
static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct inode *dir,
- struct dentry *dentry, int mode,
+ struct inode *inode,
+ struct dentry *dentry,
dev_t dev,
struct buffer_head **new_fe_bh,
struct buffer_head *parent_fe_bh,
handle_t *handle,
- struct inode **ret_inode,
struct ocfs2_alloc_context *inode_ac)
{
int status = 0;
@@ -361,14 +462,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
struct ocfs2_extent_list *fel;
u64 fe_blkno = 0;
u16 suballoc_bit;
- struct inode *inode = NULL;
- mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
- (unsigned long)dev, dentry->d_name.len,
+ mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
+ inode->i_mode, (unsigned long)dev, dentry->d_name.len,
dentry->d_name.name);
*new_fe_bh = NULL;
- *ret_inode = NULL;
status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
&fe_blkno);
@@ -377,23 +476,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
goto leave;
}
- inode = new_inode(dir->i_sb);
- if (!inode) {
- status = -ENOMEM;
- mlog(ML_ERROR, "new_inode failed!\n");
- goto leave;
- }
-
/* populate as many fields early on as possible - many of
* these are used by the support functions here and in
* callers. */
inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
OCFS2_I(inode)->ip_blkno = fe_blkno;
- if (S_ISDIR(mode))
- inode->i_nlink = 2;
- else
- inode->i_nlink = 1;
- inode->i_mode = mode;
spin_lock(&osb->osb_lock);
inode->i_generation = osb->s_next_generation++;
spin_unlock(&osb->osb_lock);
@@ -421,17 +508,11 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
fe->i_blkno = cpu_to_le64(fe_blkno);
fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
- fe->i_uid = cpu_to_le32(current_fsuid());
- if (dir->i_mode & S_ISGID) {
- fe->i_gid = cpu_to_le32(dir->i_gid);
- if (S_ISDIR(mode))
- mode |= S_ISGID;
- } else
- fe->i_gid = cpu_to_le32(current_fsgid());
- fe->i_mode = cpu_to_le16(mode);
- if (S_ISCHR(mode) || S_ISBLK(mode))
+ fe->i_uid = cpu_to_le32(inode->i_uid);
+ fe->i_gid = cpu_to_le32(inode->i_gid);
+ fe->i_mode = cpu_to_le16(inode->i_mode);
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
-
fe->i_links_count = cpu_to_le16(inode->i_nlink);
fe->i_last_eb_blk = 0;
@@ -446,7 +527,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
/*
* If supported, directories start with inline data.
*/
- if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) {
+ if (S_ISDIR(inode->i_mode) && ocfs2_supports_inline_data(osb)) {
u16 feat = le16_to_cpu(fe->i_dyn_features);
fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
@@ -465,15 +546,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
goto leave;
}
- if (ocfs2_populate_inode(inode, fe, 1) < 0) {
- mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
- "i_blkno=%llu, i_ino=%lu\n",
- (unsigned long long)(*new_fe_bh)->b_blocknr,
- (unsigned long long)le64_to_cpu(fe->i_blkno),
- inode->i_ino);
- BUG();
- }
-
+ ocfs2_populate_inode(inode, fe, 1);
ocfs2_inode_set_new(osb, inode);
if (!ocfs2_mount_local(osb)) {
status = ocfs2_create_new_inode_locks(inode);
@@ -484,17 +557,12 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
status = 0; /* error in ocfs2_create_new_inode_locks is not
* critical */
- *ret_inode = inode;
leave:
if (status < 0) {
if (*new_fe_bh) {
brelse(*new_fe_bh);
*new_fe_bh = NULL;
}
- if (inode) {
- clear_nlink(inode);
- iput(inode);
- }
}
mlog_exit(status);
@@ -588,7 +656,7 @@ static int ocfs2_link(struct dentry *old_dentry,
goto out_unlock_inode;
}
- handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS);
+ handle = ocfs2_start_trans(osb, ocfs2_link_credits(osb->sb));
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
handle = NULL;
@@ -775,7 +843,7 @@ static int ocfs2_unlink(struct inode *dir,
}
}
- handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS);
+ handle = ocfs2_start_trans(osb, ocfs2_unlink_credits(osb->sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
@@ -1181,7 +1249,7 @@ static int ocfs2_rename(struct inode *old_dir,
}
}
- handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS);
+ handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
@@ -1496,6 +1564,13 @@ static int ocfs2_symlink(struct inode *dir,
handle_t *handle = NULL;
struct ocfs2_alloc_context *inode_ac = NULL;
struct ocfs2_alloc_context *data_ac = NULL;
+ struct ocfs2_alloc_context *xattr_ac = NULL;
+ int want_clusters = 0;
+ int xattr_credits = 0;
+ struct ocfs2_security_xattr_info si = {
+ .enable = 1,
+ };
+ int did_quota = 0, did_quota_inode = 0;
mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
dentry, symname, dentry->d_name.len, dentry->d_name.name);
@@ -1542,17 +1617,46 @@ static int ocfs2_symlink(struct inode *dir,
goto bail;
}
- /* don't reserve bitmap space for fast symlinks. */
- if (l > ocfs2_fast_symlink_chars(sb)) {
- status = ocfs2_reserve_clusters(osb, 1, &data_ac);
+ inode = ocfs2_get_init_inode(dir, S_IFLNK | S_IRWXUGO);
+ if (!inode) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto bail;
+ }
+
+ /* get security xattr */
+ status = ocfs2_init_security_get(inode, dir, &si);
+ if (status) {
+ if (status == -EOPNOTSUPP)
+ si.enable = 0;
+ else {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
+ /* calculate meta data/clusters for setting security xattr */
+ if (si.enable) {
+ status = ocfs2_calc_security_init(dir, &si, &want_clusters,
+ &xattr_credits, &xattr_ac);
if (status < 0) {
- if (status != -ENOSPC)
- mlog_errno(status);
+ mlog_errno(status);
goto bail;
}
}
- handle = ocfs2_start_trans(osb, credits);
+ /* don't reserve bitmap space for fast symlinks. */
+ if (l > ocfs2_fast_symlink_chars(sb))
+ want_clusters += 1;
+
+ status = ocfs2_reserve_clusters(osb, want_clusters, &data_ac);
+ if (status < 0) {
+ if (status != -ENOSPC)
+ mlog_errno(status);
+ goto bail;
+ }
+
+ handle = ocfs2_start_trans(osb, credits + xattr_credits);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
handle = NULL;
@@ -1560,10 +1664,18 @@ static int ocfs2_symlink(struct inode *dir,
goto bail;
}
- status = ocfs2_mknod_locked(osb, dir, dentry,
- S_IFLNK | S_IRWXUGO, 0,
- &new_fe_bh, parent_fe_bh, handle,
- &inode, inode_ac);
+ /* We don't use standard VFS wrapper because we don't want vfs_dq_init
+ * to be called. */
+ if (sb_any_quota_active(osb->sb) &&
+ osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
+ status = -EDQUOT;
+ goto bail;
+ }
+ did_quota_inode = 1;
+
+ status = ocfs2_mknod_locked(osb, dir, inode, dentry,
+ 0, &new_fe_bh, parent_fe_bh, handle,
+ inode_ac);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -1576,6 +1688,12 @@ static int ocfs2_symlink(struct inode *dir,
u32 offset = 0;
inode->i_op = &ocfs2_symlink_inode_operations;
+ if (vfs_dq_alloc_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, 1))) {
+ status = -EDQUOT;
+ goto bail;
+ }
+ did_quota = 1;
status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
new_fe_bh,
handle, data_ac, NULL,
@@ -1614,6 +1732,15 @@ static int ocfs2_symlink(struct inode *dir,
}
}
+ if (si.enable) {
+ status = ocfs2_init_security_set(handle, inode, new_fe_bh, &si,
+ xattr_ac, data_ac);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
status = ocfs2_add_entry(handle, dentry, inode,
le64_to_cpu(fe->i_blkno), parent_fe_bh,
de_bh);
@@ -1632,6 +1759,11 @@ static int ocfs2_symlink(struct inode *dir,
dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
bail:
+ if (status < 0 && did_quota)
+ vfs_dq_free_space_nodirty(inode,
+ ocfs2_clusters_to_bytes(osb->sb, 1));
+ if (status < 0 && did_quota_inode)
+ vfs_dq_free_inode(inode);
if (handle)
ocfs2_commit_trans(osb, handle);
@@ -1640,12 +1772,18 @@ bail:
brelse(new_fe_bh);
brelse(parent_fe_bh);
brelse(de_bh);
+ kfree(si.name);
+ kfree(si.value);
if (inode_ac)
ocfs2_free_alloc_context(inode_ac);
if (data_ac)
ocfs2_free_alloc_context(data_ac);
- if ((status < 0) && inode)
+ if (xattr_ac)
+ ocfs2_free_alloc_context(xattr_ac);
+ if ((status < 0) && inode) {
+ clear_nlink(inode);
iput(inode);
+ }
mlog_exit(status);
@@ -1754,9 +1892,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
- status = ocfs2_read_block(orphan_dir_inode,
- OCFS2_I(orphan_dir_inode)->ip_blkno,
- &orphan_dir_bh);
+ status = ocfs2_read_inode_block(orphan_dir_inode, &orphan_dir_bh);
if (status < 0) {
mlog_errno(status);
goto leave;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index fef7ece32376..5c777988042f 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -85,7 +85,7 @@ enum ocfs2_unlock_action {
};
/* ocfs2_lock_res->l_flags flags. */
-#define OCFS2_LOCK_ATTACHED (0x00000001) /* have we initialized
+#define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized
* the lvb */
#define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in
* dlm_lock */
@@ -161,6 +161,7 @@ enum ocfs2_vol_state
{
VOLUME_INIT = 0,
VOLUME_MOUNTED,
+ VOLUME_MOUNTED_QUOTAS,
VOLUME_DISMOUNTED,
VOLUME_DISABLED
};
@@ -195,6 +196,9 @@ enum ocfs2_mount_options
OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
+ OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */
+ OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */
+ OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
};
#define OCFS2_OSB_SOFT_RO 0x0001
@@ -205,6 +209,7 @@ enum ocfs2_mount_options
struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
+struct ocfs2_quota_recovery;
struct ocfs2_super
{
struct task_struct *commit_task;
@@ -286,10 +291,11 @@ struct ocfs2_super
char *local_alloc_debug_buf;
#endif
- /* Next two fields are for local node slot recovery during
+ /* Next three fields are for local node slot recovery during
* mount. */
int dirty;
struct ocfs2_dinode *local_alloc_copy;
+ struct ocfs2_quota_recovery *quota_rec;
struct ocfs2_alloc_stats alloc_stats;
char dev_str[20]; /* "major,minor" of the device */
@@ -443,35 +449,12 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
#define OCFS2_IS_VALID_DINODE(ptr) \
(!strcmp((ptr)->i_signature, OCFS2_INODE_SIGNATURE))
-#define OCFS2_RO_ON_INVALID_DINODE(__sb, __di) do { \
- typeof(__di) ____di = (__di); \
- ocfs2_error((__sb), \
- "Dinode # %llu has bad signature %.*s", \
- (unsigned long long)le64_to_cpu((____di)->i_blkno), 7, \
- (____di)->i_signature); \
-} while (0)
-
#define OCFS2_IS_VALID_EXTENT_BLOCK(ptr) \
(!strcmp((ptr)->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE))
-#define OCFS2_RO_ON_INVALID_EXTENT_BLOCK(__sb, __eb) do { \
- typeof(__eb) ____eb = (__eb); \
- ocfs2_error((__sb), \
- "Extent Block # %llu has bad signature %.*s", \
- (unsigned long long)le64_to_cpu((____eb)->h_blkno), 7, \
- (____eb)->h_signature); \
-} while (0)
-
#define OCFS2_IS_VALID_GROUP_DESC(ptr) \
(!strcmp((ptr)->bg_signature, OCFS2_GROUP_DESC_SIGNATURE))
-#define OCFS2_RO_ON_INVALID_GROUP_DESC(__sb, __gd) do { \
- typeof(__gd) ____gd = (__gd); \
- ocfs2_error((__sb), \
- "Group Descriptor # %llu has bad signature %.*s", \
- (unsigned long long)le64_to_cpu((____gd)->bg_blkno), 7, \
- (____gd)->bg_signature); \
-} while (0)
#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \
(!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE))
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 5f180cf7abbd..4ae3984366ee 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -93,7 +93,9 @@
| OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
| OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
| OCFS2_FEATURE_INCOMPAT_XATTR)
-#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
+#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
+ | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
+ | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
/*
* Heartbeat-only devices are missing journals and other files. The
@@ -157,6 +159,12 @@
*/
#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001
+/*
+ * Maintain quota information for this filesystem
+ */
+#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002
+#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004
+
/* The byte offset of the first backup block will be 1G.
* The following will be 4G, 16G, 64G, 256G and 1T.
*/
@@ -186,6 +194,7 @@
#define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */
#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
+#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */
/*
* Flags on ocfs2_dinode.i_dyn_features
@@ -323,13 +332,17 @@ enum {
#define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE
HEARTBEAT_SYSTEM_INODE,
GLOBAL_BITMAP_SYSTEM_INODE,
-#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GLOBAL_BITMAP_SYSTEM_INODE
+ USER_QUOTA_SYSTEM_INODE,
+ GROUP_QUOTA_SYSTEM_INODE,
+#define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE
ORPHAN_DIR_SYSTEM_INODE,
EXTENT_ALLOC_SYSTEM_INODE,
INODE_ALLOC_SYSTEM_INODE,
JOURNAL_SYSTEM_INODE,
LOCAL_ALLOC_SYSTEM_INODE,
TRUNCATE_LOG_SYSTEM_INODE,
+ LOCAL_USER_QUOTA_SYSTEM_INODE,
+ LOCAL_GROUP_QUOTA_SYSTEM_INODE,
NUM_SYSTEM_INODES
};
@@ -343,6 +356,8 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
[SLOT_MAP_SYSTEM_INODE] = { "slot_map", 0, S_IFREG | 0644 },
[HEARTBEAT_SYSTEM_INODE] = { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 },
[GLOBAL_BITMAP_SYSTEM_INODE] = { "global_bitmap", 0, S_IFREG | 0644 },
+ [USER_QUOTA_SYSTEM_INODE] = { "aquota.user", OCFS2_QUOTA_FL, S_IFREG | 0644 },
+ [GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group", OCFS2_QUOTA_FL, S_IFREG | 0644 },
/* Slot-specific system inodes (one copy per slot) */
[ORPHAN_DIR_SYSTEM_INODE] = { "orphan_dir:%04d", 0, S_IFDIR | 0755 },
@@ -350,7 +365,9 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = {
[INODE_ALLOC_SYSTEM_INODE] = { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 },
[JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 },
[LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 },
- [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 }
+ [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 },
+ [LOCAL_USER_QUOTA_SYSTEM_INODE] = { "aquota.user:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 },
+ [LOCAL_GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 },
};
/* Parameter passed from mount.ocfs2 to module */
@@ -862,6 +879,109 @@ static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe)
return xe->xe_type & OCFS2_XATTR_TYPE_MASK;
}
+/*
+ * On disk structures for global quota file
+ */
+
+/* Magic numbers and known versions for global quota files */
+#define OCFS2_GLOBAL_QMAGICS {\
+ 0x0cf52470, /* USRQUOTA */ \
+ 0x0cf52471 /* GRPQUOTA */ \
+}
+
+#define OCFS2_GLOBAL_QVERSIONS {\
+ 0, \
+ 0, \
+}
+
+
+/* Each block of each quota file has a certain fixed number of bytes reserved
+ * for OCFS2 internal use at its end. OCFS2 can use it for things like
+ * checksums, etc. */
+#define OCFS2_QBLK_RESERVED_SPACE 8
+
+/* Generic header of all quota files */
+struct ocfs2_disk_dqheader {
+ __le32 dqh_magic; /* Magic number identifying file */
+ __le32 dqh_version; /* Quota format version */
+};
+
+#define OCFS2_GLOBAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader))
+
+/* Information header of global quota file (immediately follows the generic
+ * header) */
+struct ocfs2_global_disk_dqinfo {
+/*00*/ __le32 dqi_bgrace; /* Grace time for space softlimit excess */
+ __le32 dqi_igrace; /* Grace time for inode softlimit excess */
+ __le32 dqi_syncms; /* Time after which we sync local changes to
+ * global quota file */
+ __le32 dqi_blocks; /* Number of blocks in quota file */
+/*10*/ __le32 dqi_free_blk; /* First free block in quota file */
+ __le32 dqi_free_entry; /* First block with free dquot entry in quota
+ * file */
+};
+
+/* Structure with global user / group information. We reserve some space
+ * for future use. */
+struct ocfs2_global_disk_dqblk {
+/*00*/ __le32 dqb_id; /* ID the structure belongs to */
+ __le32 dqb_use_count; /* Number of nodes having reference to this structure */
+ __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */
+/*10*/ __le64 dqb_isoftlimit; /* preferred inode limit */
+ __le64 dqb_curinodes; /* current # allocated inodes */
+/*20*/ __le64 dqb_bhardlimit; /* absolute limit on disk space */
+ __le64 dqb_bsoftlimit; /* preferred limit on disk space */
+/*30*/ __le64 dqb_curspace; /* current space occupied */
+ __le64 dqb_btime; /* time limit for excessive disk use */
+/*40*/ __le64 dqb_itime; /* time limit for excessive inode use */
+ __le64 dqb_pad1;
+/*50*/ __le64 dqb_pad2;
+};
+
+/*
+ * On-disk structures for local quota file
+ */
+
+/* Magic numbers and known versions for local quota files */
+#define OCFS2_LOCAL_QMAGICS {\
+ 0x0cf524c0, /* USRQUOTA */ \
+ 0x0cf524c1 /* GRPQUOTA */ \
+}
+
+#define OCFS2_LOCAL_QVERSIONS {\
+ 0, \
+ 0, \
+}
+
+/* Quota flags in dqinfo header */
+#define OLQF_CLEAN 0x0001 /* Quota file is empty (this should be after\
+ * quota has been cleanly turned off) */
+
+#define OCFS2_LOCAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader))
+
+/* Information header of local quota file (immediately follows the generic
+ * header) */
+struct ocfs2_local_disk_dqinfo {
+ __le32 dqi_flags; /* Flags for quota file */
+ __le32 dqi_chunks; /* Number of chunks of quota structures
+ * with a bitmap */
+ __le32 dqi_blocks; /* Number of blocks allocated for quota file */
+};
+
+/* Header of one chunk of a quota file */
+struct ocfs2_local_disk_chunk {
+ __le32 dqc_free; /* Number of free entries in the bitmap */
+ u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding
+ * chunk of quota file */
+};
+
+/* One entry in local quota file */
+struct ocfs2_local_disk_dqblk {
+/*00*/ __le64 dqb_id; /* id this quota applies to */
+ __le64 dqb_spacemod; /* Change in the amount of used space */
+/*10*/ __le64 dqb_inodemod; /* Change in the amount of used inodes */
+};
+
#ifdef __KERNEL__
static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
{
diff --git a/fs/ocfs2/ocfs2_jbd_compat.h b/fs/ocfs2/ocfs2_jbd_compat.h
deleted file mode 100644
index b91c78f8f558..000000000000
--- a/fs/ocfs2/ocfs2_jbd_compat.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ocfs2_jbd_compat.h
- *
- * Compatibility defines for JBD.
- *
- * Copyright (C) 2008 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-
-#ifndef OCFS2_JBD_COMPAT_H
-#define OCFS2_JBD_COMPAT_H
-
-#ifndef CONFIG_OCFS2_COMPAT_JBD
-# error Should not have been included
-#endif
-
-struct jbd2_inode {
- unsigned int dummy;
-};
-
-#define JBD2_BARRIER JFS_BARRIER
-#define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE
-
-#define jbd2_journal_ack_err journal_ack_err
-#define jbd2_journal_clear_err journal_clear_err
-#define jbd2_journal_destroy journal_destroy
-#define jbd2_journal_dirty_metadata journal_dirty_metadata
-#define jbd2_journal_errno journal_errno
-#define jbd2_journal_extend journal_extend
-#define jbd2_journal_flush journal_flush
-#define jbd2_journal_force_commit journal_force_commit
-#define jbd2_journal_get_write_access journal_get_write_access
-#define jbd2_journal_get_undo_access journal_get_undo_access
-#define jbd2_journal_init_inode journal_init_inode
-#define jbd2_journal_invalidatepage journal_invalidatepage
-#define jbd2_journal_load journal_load
-#define jbd2_journal_lock_updates journal_lock_updates
-#define jbd2_journal_restart journal_restart
-#define jbd2_journal_start journal_start
-#define jbd2_journal_start_commit journal_start_commit
-#define jbd2_journal_stop journal_stop
-#define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers
-#define jbd2_journal_unlock_updates journal_unlock_updates
-#define jbd2_journal_wipe journal_wipe
-#define jbd2_log_wait_commit log_wait_commit
-
-static inline int jbd2_journal_file_inode(handle_t *handle,
- struct jbd2_inode *inode)
-{
- return 0;
-}
-
-static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
- loff_t new_size)
-{
- return 0;
-}
-
-static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode,
- struct inode *inode)
-{
- return;
-}
-
-static inline void jbd2_journal_release_jbd_inode(journal_t *journal,
- struct jbd2_inode *jinode)
-{
- return;
-}
-
-
-#endif /* OCFS2_JBD_COMPAT_H */
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 82c200f7a8f1..eb6f50c9ceca 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -46,6 +46,7 @@ enum ocfs2_lock_type {
OCFS2_LOCK_TYPE_DENTRY,
OCFS2_LOCK_TYPE_OPEN,
OCFS2_LOCK_TYPE_FLOCK,
+ OCFS2_LOCK_TYPE_QINFO,
OCFS2_NUM_LOCK_TYPES
};
@@ -77,6 +78,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
case OCFS2_LOCK_TYPE_FLOCK:
c = 'F';
break;
+ case OCFS2_LOCK_TYPE_QINFO:
+ c = 'Q';
+ break;
default:
c = '\0';
}
@@ -95,6 +99,7 @@ static char *ocfs2_lock_type_strings[] = {
[OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
[OCFS2_LOCK_TYPE_OPEN] = "Open",
[OCFS2_LOCK_TYPE_FLOCK] = "Flock",
+ [OCFS2_LOCK_TYPE_QINFO] = "Quota",
};
static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
new file mode 100644
index 000000000000..abf694155d21
--- /dev/null
+++ b/fs/ocfs2/quota.h
@@ -0,0 +1,117 @@
+/*
+ * quota.h for OCFS2
+ *
+ * On disk quota structures for local and global quota file, in-memory
+ * structures.
+ *
+ */
+
+#ifndef _OCFS2_QUOTA_H
+#define _OCFS2_QUOTA_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/quota.h>
+#include <linux/list.h>
+#include <linux/dqblk_qtree.h>
+#include <linux/timer.h>
+
+#include "ocfs2.h"
+
+/* Common stuff */
+/* id number of quota format */
+#define QFMT_OCFS2 3
+
+/*
+ * In-memory structures
+ */
+struct ocfs2_dquot {
+ struct dquot dq_dquot; /* Generic VFS dquot */
+ loff_t dq_local_off; /* Offset in the local quota file */
+ struct ocfs2_quota_chunk *dq_chunk; /* Chunk dquot is in */
+ unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */
+ s64 dq_origspace; /* Last globally synced space usage */
+ s64 dq_originodes; /* Last globally synced inode usage */
+};
+
+/* Description of one chunk to recover in memory */
+struct ocfs2_recovery_chunk {
+ struct list_head rc_list; /* List of chunks */
+ int rc_chunk; /* Chunk number */
+ unsigned long *rc_bitmap; /* Bitmap of entries to recover */
+};
+
+struct ocfs2_quota_recovery {
+ struct list_head r_list[MAXQUOTAS]; /* List of chunks to recover */
+};
+
+/* In-memory structure with quota header information */
+struct ocfs2_mem_dqinfo {
+ unsigned int dqi_type; /* Quota type this structure describes */
+ unsigned int dqi_chunks; /* Number of chunks in local quota file */
+ unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
+ unsigned int dqi_syncms; /* How often should we sync with other nodes */
+ unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
+ struct list_head dqi_chunk; /* List of chunks */
+ struct inode *dqi_gqinode; /* Global quota file inode */
+ struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
+ struct buffer_head *dqi_gqi_bh; /* Buffer head with global quota file inode - set only if inode lock is obtained */
+ int dqi_gqi_count; /* Number of holders of dqi_gqi_bh */
+ struct buffer_head *dqi_lqi_bh; /* Buffer head with local quota file inode */
+ struct buffer_head *dqi_ibh; /* Buffer with information header */
+ struct qtree_mem_dqinfo dqi_gi; /* Info about global file */
+ struct timer_list dqi_sync_timer; /* Timer for syncing dquots */
+ struct ocfs2_quota_recovery *dqi_rec; /* Pointer to recovery
+ * information, in case we
+ * enable quotas on file
+ * needing it */
+};
+
+static inline struct ocfs2_dquot *OCFS2_DQUOT(struct dquot *dquot)
+{
+ return container_of(dquot, struct ocfs2_dquot, dq_dquot);
+}
+
+struct ocfs2_quota_chunk {
+ struct list_head qc_chunk; /* List of quotafile chunks */
+ int qc_num; /* Number of quota chunk */
+ struct buffer_head *qc_headerbh; /* Buffer head with chunk header */
+};
+
+extern struct kmem_cache *ocfs2_dquot_cachep;
+extern struct kmem_cache *ocfs2_qf_chunk_cachep;
+
+extern struct qtree_fmt_operations ocfs2_global_ops;
+
+struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
+ struct ocfs2_super *osb, int slot_num);
+int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
+ struct ocfs2_quota_recovery *rec,
+ int slot_num);
+void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec);
+ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
+ size_t len, loff_t off);
+ssize_t ocfs2_quota_write(struct super_block *sb, int type,
+ const char *data, size_t len, loff_t off);
+int ocfs2_global_read_info(struct super_block *sb, int type);
+int ocfs2_global_write_info(struct super_block *sb, int type);
+int ocfs2_global_read_dquot(struct dquot *dquot);
+int __ocfs2_sync_dquot(struct dquot *dquot, int freeing);
+static inline int ocfs2_sync_dquot(struct dquot *dquot)
+{
+ return __ocfs2_sync_dquot(dquot, 0);
+}
+static inline int ocfs2_global_release_dquot(struct dquot *dquot)
+{
+ return __ocfs2_sync_dquot(dquot, 1);
+}
+
+int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
+void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
+int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
+ struct buffer_head **bh);
+
+extern struct dquot_operations ocfs2_quota_operations;
+extern struct quota_format_type ocfs2_quota_format;
+
+#endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
new file mode 100644
index 000000000000..4b38a2ef7aae
--- /dev/null
+++ b/fs/ocfs2/quota_global.c
@@ -0,0 +1,990 @@
+/*
+ * Implementation of operations over global quota file
+ */
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/quotaops.h>
+#include <linux/dqblk_qtree.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/writeback.h>
+
+#define MLOG_MASK_PREFIX ML_QUOTA
+#include <cluster/masklog.h>
+
+#include "ocfs2_fs.h"
+#include "ocfs2.h"
+#include "alloc.h"
+#include "inode.h"
+#include "journal.h"
+#include "file.h"
+#include "sysfile.h"
+#include "dlmglue.h"
+#include "uptodate.h"
+#include "quota.h"
+
+static void qsync_timer_fn(unsigned long oinfo_ptr);
+
+static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
+{
+ struct ocfs2_global_disk_dqblk *d = dp;
+ struct mem_dqblk *m = &dquot->dq_dqb;
+
+ /* Update from disk only entries not set by the admin */
+ if (!test_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags)) {
+ m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit);
+ m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit);
+ }
+ if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
+ m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes);
+ if (!test_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags)) {
+ m->dqb_bhardlimit = le64_to_cpu(d->dqb_bhardlimit);
+ m->dqb_bsoftlimit = le64_to_cpu(d->dqb_bsoftlimit);
+ }
+ if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
+ m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
+ if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags))
+ m->dqb_btime = le64_to_cpu(d->dqb_btime);
+ if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags))
+ m->dqb_itime = le64_to_cpu(d->dqb_itime);
+ OCFS2_DQUOT(dquot)->dq_use_count = le32_to_cpu(d->dqb_use_count);
+}
+
+static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
+{
+ struct ocfs2_global_disk_dqblk *d = dp;
+ struct mem_dqblk *m = &dquot->dq_dqb;
+
+ d->dqb_id = cpu_to_le32(dquot->dq_id);
+ d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count);
+ d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit);
+ d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit);
+ d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes);
+ d->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit);
+ d->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit);
+ d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
+ d->dqb_btime = cpu_to_le64(m->dqb_btime);
+ d->dqb_itime = cpu_to_le64(m->dqb_itime);
+}
+
+static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
+{
+ struct ocfs2_global_disk_dqblk *d = dp;
+ struct ocfs2_mem_dqinfo *oinfo =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
+ if (qtree_entry_unused(&oinfo->dqi_gi, dp))
+ return 0;
+ return le32_to_cpu(d->dqb_id) == dquot->dq_id;
+}
+
+struct qtree_fmt_operations ocfs2_global_ops = {
+ .mem2disk_dqblk = ocfs2_global_mem2diskdqb,
+ .disk2mem_dqblk = ocfs2_global_disk2memdqb,
+ .is_id = ocfs2_global_is_id,
+};
+
+int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
+ struct buffer_head **bh)
+{
+ int rc = 0;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, 0, NULL);
+ if (rc)
+ mlog_errno(rc);
+
+ /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc;
+}
+
+static int ocfs2_get_quota_block(struct inode *inode, int block,
+ struct buffer_head **bh)
+{
+ u64 pblock, pcount;
+ int err;
+
+ down_read(&OCFS2_I(inode)->ip_alloc_sem);
+ err = ocfs2_extent_map_get_blocks(inode, block, &pblock, &pcount, NULL);
+ up_read(&OCFS2_I(inode)->ip_alloc_sem);
+ if (err) {
+ mlog_errno(err);
+ return err;
+ }
+ *bh = sb_getblk(inode->i_sb, pblock);
+ if (!*bh) {
+ err = -EIO;
+ mlog_errno(err);
+ }
+ return err;;
+}
+
+/* Read data from global quotafile - avoid pagecache and such because we cannot
+ * afford acquiring the locks... We use quota cluster lock to serialize
+ * operations. Caller is responsible for acquiring it. */
+ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data,
+ size_t len, loff_t off)
+{
+ struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ struct inode *gqinode = oinfo->dqi_gqinode;
+ loff_t i_size = i_size_read(gqinode);
+ int offset = off & (sb->s_blocksize - 1);
+ sector_t blk = off >> sb->s_blocksize_bits;
+ int err = 0;
+ struct buffer_head *bh;
+ size_t toread, tocopy;
+
+ if (off > i_size)
+ return 0;
+ if (off + len > i_size)
+ len = i_size - off;
+ toread = len;
+ while (toread > 0) {
+ tocopy = min((size_t)(sb->s_blocksize - offset), toread);
+ bh = NULL;
+ err = ocfs2_read_quota_block(gqinode, blk, &bh);
+ if (err) {
+ mlog_errno(err);
+ return err;
+ }
+ memcpy(data, bh->b_data + offset, tocopy);
+ brelse(bh);
+ offset = 0;
+ toread -= tocopy;
+ data += tocopy;
+ blk++;
+ }
+ return len;
+}
+
+/* Write to quotafile (we know the transaction is already started and has
+ * enough credits) */
+ssize_t ocfs2_quota_write(struct super_block *sb, int type,
+ const char *data, size_t len, loff_t off)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct inode *gqinode = oinfo->dqi_gqinode;
+ int offset = off & (sb->s_blocksize - 1);
+ sector_t blk = off >> sb->s_blocksize_bits;
+ int err = 0, new = 0, ja_type;
+ struct buffer_head *bh = NULL;
+ handle_t *handle = journal_current_handle();
+
+ if (!handle) {
+ mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled "
+ "because transaction was not started.\n",
+ (unsigned long long)off, (unsigned long long)len);
+ return -EIO;
+ }
+ if (len > sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset) {
+ WARN_ON(1);
+ len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset;
+ }
+
+ mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
+ if (gqinode->i_size < off + len) {
+ down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
+ err = ocfs2_extend_no_holes(gqinode, off + len, off);
+ up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
+ if (err < 0)
+ goto out;
+ err = ocfs2_simple_size_update(gqinode,
+ oinfo->dqi_gqi_bh,
+ off + len);
+ if (err < 0)
+ goto out;
+ new = 1;
+ }
+ /* Not rewriting whole block? */
+ if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) &&
+ !new) {
+ err = ocfs2_read_quota_block(gqinode, blk, &bh);
+ ja_type = OCFS2_JOURNAL_ACCESS_WRITE;
+ } else {
+ err = ocfs2_get_quota_block(gqinode, blk, &bh);
+ ja_type = OCFS2_JOURNAL_ACCESS_CREATE;
+ }
+ if (err) {
+ mlog_errno(err);
+ return err;
+ }
+ lock_buffer(bh);
+ if (new)
+ memset(bh->b_data, 0, sb->s_blocksize);
+ memcpy(bh->b_data + offset, data, len);
+ flush_dcache_page(bh->b_page);
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ ocfs2_set_buffer_uptodate(gqinode, bh);
+ err = ocfs2_journal_access(handle, gqinode, bh, ja_type);
+ if (err < 0) {
+ brelse(bh);
+ goto out;
+ }
+ err = ocfs2_journal_dirty(handle, bh);
+ brelse(bh);
+ if (err < 0)
+ goto out;
+out:
+ if (err) {
+ mutex_unlock(&gqinode->i_mutex);
+ mlog_errno(err);
+ return err;
+ }
+ gqinode->i_version++;
+ ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh);
+ mutex_unlock(&gqinode->i_mutex);
+ return len;
+}
+
+int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
+{
+ int status;
+ struct buffer_head *bh = NULL;
+
+ status = ocfs2_inode_lock(oinfo->dqi_gqinode, &bh, ex);
+ if (status < 0)
+ return status;
+ spin_lock(&dq_data_lock);
+ if (!oinfo->dqi_gqi_count++)
+ oinfo->dqi_gqi_bh = bh;
+ else
+ WARN_ON(bh != oinfo->dqi_gqi_bh);
+ spin_unlock(&dq_data_lock);
+ return 0;
+}
+
+void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
+{
+ ocfs2_inode_unlock(oinfo->dqi_gqinode, ex);
+ brelse(oinfo->dqi_gqi_bh);
+ spin_lock(&dq_data_lock);
+ if (!--oinfo->dqi_gqi_count)
+ oinfo->dqi_gqi_bh = NULL;
+ spin_unlock(&dq_data_lock);
+}
+
+/* Read information header from global quota file */
+int ocfs2_global_read_info(struct super_block *sb, int type)
+{
+ struct inode *gqinode = NULL;
+ unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE,
+ GROUP_QUOTA_SYSTEM_INODE };
+ struct ocfs2_global_disk_dqinfo dinfo;
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ int status;
+
+ mlog_entry_void();
+
+ /* Read global header */
+ gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
+ OCFS2_INVALID_SLOT);
+ if (!gqinode) {
+ mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n",
+ type);
+ status = -EINVAL;
+ goto out_err;
+ }
+ oinfo->dqi_gi.dqi_sb = sb;
+ oinfo->dqi_gi.dqi_type = type;
+ ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo);
+ oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk);
+ oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops;
+ oinfo->dqi_gqi_bh = NULL;
+ oinfo->dqi_gqi_count = 0;
+ oinfo->dqi_gqinode = gqinode;
+ status = ocfs2_lock_global_qf(oinfo, 0);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+ status = sb->s_op->quota_read(sb, type, (char *)&dinfo,
+ sizeof(struct ocfs2_global_disk_dqinfo),
+ OCFS2_GLOBAL_INFO_OFF);
+ ocfs2_unlock_global_qf(oinfo, 0);
+ if (status != sizeof(struct ocfs2_global_disk_dqinfo)) {
+ mlog(ML_ERROR, "Cannot read global quota info (%d).\n",
+ status);
+ if (status >= 0)
+ status = -EIO;
+ mlog_errno(status);
+ goto out_err;
+ }
+ info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
+ info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
+ oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
+ oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
+ oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
+ oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
+ oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+ oinfo->dqi_gi.dqi_blocksize_bits = sb->s_blocksize_bits;
+ oinfo->dqi_gi.dqi_usable_bs = sb->s_blocksize -
+ OCFS2_QBLK_RESERVED_SPACE;
+ oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
+ setup_timer(&oinfo->dqi_sync_timer, qsync_timer_fn,
+ (unsigned long)oinfo);
+ mod_timer(&oinfo->dqi_sync_timer,
+ round_jiffies(jiffies + oinfo->dqi_syncjiff));
+out_err:
+ mlog_exit(status);
+ return status;
+}
+
+/* Write information to global quota file. Expects exlusive lock on quota
+ * file inode and quota info */
+static int __ocfs2_global_write_info(struct super_block *sb, int type)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct ocfs2_global_disk_dqinfo dinfo;
+ ssize_t size;
+
+ spin_lock(&dq_data_lock);
+ info->dqi_flags &= ~DQF_INFO_DIRTY;
+ dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace);
+ dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
+ spin_unlock(&dq_data_lock);
+ dinfo.dqi_syncms = cpu_to_le32(oinfo->dqi_syncms);
+ dinfo.dqi_blocks = cpu_to_le32(oinfo->dqi_gi.dqi_blocks);
+ dinfo.dqi_free_blk = cpu_to_le32(oinfo->dqi_gi.dqi_free_blk);
+ dinfo.dqi_free_entry = cpu_to_le32(oinfo->dqi_gi.dqi_free_entry);
+ size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
+ sizeof(struct ocfs2_global_disk_dqinfo),
+ OCFS2_GLOBAL_INFO_OFF);
+ if (size != sizeof(struct ocfs2_global_disk_dqinfo)) {
+ mlog(ML_ERROR, "Cannot write global quota info structure\n");
+ if (size >= 0)
+ size = -EIO;
+ return size;
+ }
+ return 0;
+}
+
+int ocfs2_global_write_info(struct super_block *sb, int type)
+{
+ int err;
+ struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
+
+ err = ocfs2_qinfo_lock(info, 1);
+ if (err < 0)
+ return err;
+ err = __ocfs2_global_write_info(sb, type);
+ ocfs2_qinfo_unlock(info, 1);
+ return err;
+}
+
+/* Read in information from global quota file and acquire a reference to it.
+ * dquot_acquire() has already started the transaction and locked quota file */
+int ocfs2_global_read_dquot(struct dquot *dquot)
+{
+ int err, err2, ex = 0;
+ struct ocfs2_mem_dqinfo *info =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
+ err = ocfs2_qinfo_lock(info, 0);
+ if (err < 0)
+ goto out;
+ err = qtree_read_dquot(&info->dqi_gi, dquot);
+ if (err < 0)
+ goto out_qlock;
+ OCFS2_DQUOT(dquot)->dq_use_count++;
+ OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
+ OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
+ if (!dquot->dq_off) { /* No real quota entry? */
+ /* Upgrade to exclusive lock for allocation */
+ err = ocfs2_qinfo_lock(info, 1);
+ if (err < 0)
+ goto out_qlock;
+ ex = 1;
+ }
+ err = qtree_write_dquot(&info->dqi_gi, dquot);
+ if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
+ err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
+ if (!err)
+ err = err2;
+ }
+out_qlock:
+ if (ex)
+ ocfs2_qinfo_unlock(info, 1);
+ ocfs2_qinfo_unlock(info, 0);
+out:
+ if (err < 0)
+ mlog_errno(err);
+ return err;
+}
+
+/* Sync local information about quota modifications with global quota file.
+ * Caller must have started the transaction and obtained exclusive lock for
+ * global quota file inode */
+int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
+{
+ int err, err2;
+ struct super_block *sb = dquot->dq_sb;
+ int type = dquot->dq_type;
+ struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
+ struct ocfs2_global_disk_dqblk dqblk;
+ s64 spacechange, inodechange;
+ time_t olditime, oldbtime;
+
+ err = sb->s_op->quota_read(sb, type, (char *)&dqblk,
+ sizeof(struct ocfs2_global_disk_dqblk),
+ dquot->dq_off);
+ if (err != sizeof(struct ocfs2_global_disk_dqblk)) {
+ if (err >= 0) {
+ mlog(ML_ERROR, "Short read from global quota file "
+ "(%u read)\n", err);
+ err = -EIO;
+ }
+ goto out;
+ }
+
+ /* Update space and inode usage. Get also other information from
+ * global quota file so that we don't overwrite any changes there.
+ * We are */
+ spin_lock(&dq_data_lock);
+ spacechange = dquot->dq_dqb.dqb_curspace -
+ OCFS2_DQUOT(dquot)->dq_origspace;
+ inodechange = dquot->dq_dqb.dqb_curinodes -
+ OCFS2_DQUOT(dquot)->dq_originodes;
+ olditime = dquot->dq_dqb.dqb_itime;
+ oldbtime = dquot->dq_dqb.dqb_btime;
+ ocfs2_global_disk2memdqb(dquot, &dqblk);
+ mlog(0, "Syncing global dquot %u space %lld+%lld, inodes %lld+%lld\n",
+ dquot->dq_id, dquot->dq_dqb.dqb_curspace, (long long)spacechange,
+ dquot->dq_dqb.dqb_curinodes, (long long)inodechange);
+ if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags))
+ dquot->dq_dqb.dqb_curspace += spacechange;
+ if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags))
+ dquot->dq_dqb.dqb_curinodes += inodechange;
+ /* Set properly space grace time... */
+ if (dquot->dq_dqb.dqb_bsoftlimit &&
+ dquot->dq_dqb.dqb_curspace > dquot->dq_dqb.dqb_bsoftlimit) {
+ if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags) &&
+ oldbtime > 0) {
+ if (dquot->dq_dqb.dqb_btime > 0)
+ dquot->dq_dqb.dqb_btime =
+ min(dquot->dq_dqb.dqb_btime, oldbtime);
+ else
+ dquot->dq_dqb.dqb_btime = oldbtime;
+ }
+ } else {
+ dquot->dq_dqb.dqb_btime = 0;
+ clear_bit(DQ_BLKS_B, &dquot->dq_flags);
+ }
+ /* Set properly inode grace time... */
+ if (dquot->dq_dqb.dqb_isoftlimit &&
+ dquot->dq_dqb.dqb_curinodes > dquot->dq_dqb.dqb_isoftlimit) {
+ if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags) &&
+ olditime > 0) {
+ if (dquot->dq_dqb.dqb_itime > 0)
+ dquot->dq_dqb.dqb_itime =
+ min(dquot->dq_dqb.dqb_itime, olditime);
+ else
+ dquot->dq_dqb.dqb_itime = olditime;
+ }
+ } else {
+ dquot->dq_dqb.dqb_itime = 0;
+ clear_bit(DQ_INODES_B, &dquot->dq_flags);
+ }
+ /* All information is properly updated, clear the flags */
+ __clear_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
+ __clear_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
+ __clear_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
+ __clear_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
+ __clear_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
+ __clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
+ OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
+ OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
+ spin_unlock(&dq_data_lock);
+ err = ocfs2_qinfo_lock(info, freeing);
+ if (err < 0) {
+ mlog(ML_ERROR, "Failed to lock quota info, loosing quota write"
+ " (type=%d, id=%u)\n", dquot->dq_type,
+ (unsigned)dquot->dq_id);
+ goto out;
+ }
+ if (freeing)
+ OCFS2_DQUOT(dquot)->dq_use_count--;
+ err = qtree_write_dquot(&info->dqi_gi, dquot);
+ if (err < 0)
+ goto out_qlock;
+ if (freeing && !OCFS2_DQUOT(dquot)->dq_use_count) {
+ err = qtree_release_dquot(&info->dqi_gi, dquot);
+ if (info_dirty(sb_dqinfo(sb, type))) {
+ err2 = __ocfs2_global_write_info(sb, type);
+ if (!err)
+ err = err2;
+ }
+ }
+out_qlock:
+ ocfs2_qinfo_unlock(info, freeing);
+out:
+ if (err < 0)
+ mlog_errno(err);
+ return err;
+}
+
+/*
+ * Functions for periodic syncing of dquots with global file
+ */
+static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
+{
+ handle_t *handle;
+ struct super_block *sb = dquot->dq_sb;
+ struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ struct ocfs2_super *osb = OCFS2_SB(sb);
+ int status = 0;
+
+ mlog_entry("id=%u qtype=%u type=%lu device=%s\n", dquot->dq_id,
+ dquot->dq_type, type, sb->s_id);
+ if (type != dquot->dq_type)
+ goto out;
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+
+ handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ mutex_lock(&sb_dqopt(sb)->dqio_mutex);
+ status = ocfs2_sync_dquot(dquot);
+ mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
+ if (status < 0)
+ mlog_errno(status);
+ /* We have to write local structure as well... */
+ dquot_mark_dquot_dirty(dquot);
+ status = dquot_commit(dquot);
+ if (status < 0)
+ mlog_errno(status);
+ ocfs2_commit_trans(osb, handle);
+out_ilock:
+ ocfs2_unlock_global_qf(oinfo, 1);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+static void ocfs2_do_qsync(unsigned long oinfo_ptr)
+{
+ struct ocfs2_mem_dqinfo *oinfo = (struct ocfs2_mem_dqinfo *)oinfo_ptr;
+ struct super_block *sb = oinfo->dqi_gqinode->i_sb;
+
+ dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
+}
+
+static void qsync_timer_fn(unsigned long oinfo_ptr)
+{
+ struct ocfs2_mem_dqinfo *oinfo = (struct ocfs2_mem_dqinfo *)oinfo_ptr;
+
+ pdflush_operation(ocfs2_do_qsync, oinfo_ptr);
+ mod_timer(&oinfo->dqi_sync_timer,
+ round_jiffies(jiffies + oinfo->dqi_syncjiff));
+}
+
+/*
+ * Wrappers for generic quota functions
+ */
+
+static int ocfs2_write_dquot(struct dquot *dquot)
+{
+ handle_t *handle;
+ struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
+ int status = 0;
+
+ mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
+
+ handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out;
+ }
+ status = dquot_commit(dquot);
+ ocfs2_commit_trans(osb, handle);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
+{
+ struct ocfs2_mem_dqinfo *oinfo;
+ int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
+
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
+ return 0;
+
+ oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ /* We modify tree, leaf block, global info, local chunk header,
+ * global and local inode */
+ return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
+ 2 * OCFS2_INODE_UPDATE_CREDITS;
+}
+
+static int ocfs2_release_dquot(struct dquot *dquot)
+{
+ handle_t *handle;
+ struct ocfs2_mem_dqinfo *oinfo =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+ struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
+ int status = 0;
+
+ mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
+
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+ handle = ocfs2_start_trans(osb,
+ ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_type));
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ status = dquot_release(dquot);
+ ocfs2_commit_trans(osb, handle);
+out_ilock:
+ ocfs2_unlock_global_qf(oinfo, 1);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
+{
+ struct ocfs2_mem_dqinfo *oinfo;
+ int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
+ struct ocfs2_dinode *lfe, *gfe;
+
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
+ return 0;
+
+ oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
+ lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
+ /* We can extend local file + global file. In local file we
+ * can modify info, chunk header block and dquot block. In
+ * global file we can modify info, tree and leaf block */
+ return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
+ ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
+ 3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
+}
+
+static int ocfs2_acquire_dquot(struct dquot *dquot)
+{
+ handle_t *handle;
+ struct ocfs2_mem_dqinfo *oinfo =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+ struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
+ int status = 0;
+
+ mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
+ /* We need an exclusive lock, because we're going to update use count
+ * and instantiate possibly new dquot structure */
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+ handle = ocfs2_start_trans(osb,
+ ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ status = dquot_acquire(dquot);
+ ocfs2_commit_trans(osb, handle);
+out_ilock:
+ ocfs2_unlock_global_qf(oinfo, 1);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
+{
+ unsigned long mask = (1 << (DQ_LASTSET_B + QIF_ILIMITS_B)) |
+ (1 << (DQ_LASTSET_B + QIF_BLIMITS_B)) |
+ (1 << (DQ_LASTSET_B + QIF_INODES_B)) |
+ (1 << (DQ_LASTSET_B + QIF_SPACE_B)) |
+ (1 << (DQ_LASTSET_B + QIF_BTIME_B)) |
+ (1 << (DQ_LASTSET_B + QIF_ITIME_B));
+ int sync = 0;
+ int status;
+ struct super_block *sb = dquot->dq_sb;
+ int type = dquot->dq_type;
+ struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ handle_t *handle;
+ struct ocfs2_super *osb = OCFS2_SB(sb);
+
+ mlog_entry("id=%u, type=%d", dquot->dq_id, type);
+ dquot_mark_dquot_dirty(dquot);
+
+ /* In case user set some limits, sync dquot immediately to global
+ * quota file so that information propagates quicker */
+ spin_lock(&dq_data_lock);
+ if (dquot->dq_flags & mask)
+ sync = 1;
+ spin_unlock(&dq_data_lock);
+ if (!sync) {
+ status = ocfs2_write_dquot(dquot);
+ goto out;
+ }
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+ handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ status = ocfs2_sync_dquot(dquot);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ /* Now write updated local dquot structure */
+ status = dquot_commit(dquot);
+out_trans:
+ ocfs2_commit_trans(osb, handle);
+out_ilock:
+ ocfs2_unlock_global_qf(oinfo, 1);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+/* This should happen only after set_dqinfo(). */
+static int ocfs2_write_info(struct super_block *sb, int type)
+{
+ handle_t *handle;
+ int status = 0;
+ struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+
+ mlog_entry_void();
+
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+ handle = ocfs2_start_trans(OCFS2_SB(sb), OCFS2_QINFO_WRITE_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ status = dquot_commit_info(sb, type);
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out_ilock:
+ ocfs2_unlock_global_qf(oinfo, 1);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+/* This is difficult. We have to lock quota inode and start transaction
+ * in this function but we don't want to take the penalty of exlusive
+ * quota file lock when we are just going to use cached structures. So
+ * we just take read lock check whether we have dquot cached and if so,
+ * we don't have to take the write lock... */
+static int ocfs2_dquot_initialize(struct inode *inode, int type)
+{
+ handle_t *handle = NULL;
+ int status = 0;
+ struct super_block *sb = inode->i_sb;
+ struct ocfs2_mem_dqinfo *oinfo;
+ int exclusive = 0;
+ int cnt;
+ qid_t id;
+
+ mlog_entry_void();
+
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (type != -1 && cnt != type)
+ continue;
+ if (!sb_has_quota_active(sb, cnt))
+ continue;
+ oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+ status = ocfs2_lock_global_qf(oinfo, 0);
+ if (status < 0)
+ goto out;
+ /* This is just a performance optimization not a reliable test.
+ * Since we hold an inode lock, noone can actually release
+ * the structure until we are finished with initialization. */
+ if (inode->i_dquot[cnt] != NODQUOT) {
+ ocfs2_unlock_global_qf(oinfo, 0);
+ continue;
+ }
+ /* When we have inode lock, we know that no dquot_release() can
+ * run and thus we can safely check whether we need to
+ * read+modify global file to get quota information or whether
+ * our node already has it. */
+ if (cnt == USRQUOTA)
+ id = inode->i_uid;
+ else if (cnt == GRPQUOTA)
+ id = inode->i_gid;
+ else
+ BUG();
+ /* Obtain exclusion from quota off... */
+ down_write(&sb_dqopt(sb)->dqptr_sem);
+ exclusive = !dquot_is_cached(sb, id, cnt);
+ up_write(&sb_dqopt(sb)->dqptr_sem);
+ if (exclusive) {
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0) {
+ exclusive = 0;
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ handle = ocfs2_start_trans(OCFS2_SB(sb),
+ ocfs2_calc_qinit_credits(sb, cnt));
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_ilock;
+ }
+ }
+ dquot_initialize(inode, cnt);
+ if (exclusive) {
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+ ocfs2_unlock_global_qf(oinfo, 1);
+ }
+ ocfs2_unlock_global_qf(oinfo, 0);
+ }
+ mlog_exit(0);
+ return 0;
+out_ilock:
+ if (exclusive)
+ ocfs2_unlock_global_qf(oinfo, 1);
+ ocfs2_unlock_global_qf(oinfo, 0);
+out:
+ mlog_exit(status);
+ return status;
+}
+
+static int ocfs2_dquot_drop_slow(struct inode *inode)
+{
+ int status = 0;
+ int cnt;
+ int got_lock[MAXQUOTAS] = {0, 0};
+ handle_t *handle;
+ struct super_block *sb = inode->i_sb;
+ struct ocfs2_mem_dqinfo *oinfo;
+
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (!sb_has_quota_active(sb, cnt))
+ continue;
+ oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+ got_lock[cnt] = 1;
+ }
+ handle = ocfs2_start_trans(OCFS2_SB(sb),
+ ocfs2_calc_qinit_credits(sb, USRQUOTA) +
+ ocfs2_calc_qinit_credits(sb, GRPQUOTA));
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out;
+ }
+ dquot_drop(inode);
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out:
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (got_lock[cnt]) {
+ oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+ ocfs2_unlock_global_qf(oinfo, 1);
+ }
+ return status;
+}
+
+/* See the comment before ocfs2_dquot_initialize. */
+static int ocfs2_dquot_drop(struct inode *inode)
+{
+ int status = 0;
+ struct super_block *sb = inode->i_sb;
+ struct ocfs2_mem_dqinfo *oinfo;
+ int exclusive = 0;
+ int cnt;
+ int got_lock[MAXQUOTAS] = {0, 0};
+
+ mlog_entry_void();
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (!sb_has_quota_active(sb, cnt))
+ continue;
+ oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+ status = ocfs2_lock_global_qf(oinfo, 0);
+ if (status < 0)
+ goto out;
+ got_lock[cnt] = 1;
+ }
+ /* Lock against anyone releasing references so that when when we check
+ * we know we are not going to be last ones to release dquot */
+ down_write(&sb_dqopt(sb)->dqptr_sem);
+ /* Urgh, this is a terrible hack :( */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (inode->i_dquot[cnt] != NODQUOT &&
+ atomic_read(&inode->i_dquot[cnt]->dq_count) > 1) {
+ exclusive = 1;
+ break;
+ }
+ }
+ if (!exclusive)
+ dquot_drop_locked(inode);
+ up_write(&sb_dqopt(sb)->dqptr_sem);
+out:
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ if (got_lock[cnt]) {
+ oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
+ ocfs2_unlock_global_qf(oinfo, 0);
+ }
+ /* In case we bailed out because we had to do expensive locking
+ * do it now... */
+ if (exclusive)
+ status = ocfs2_dquot_drop_slow(inode);
+ mlog_exit(status);
+ return status;
+}
+
+static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type)
+{
+ struct ocfs2_dquot *dquot =
+ kmem_cache_zalloc(ocfs2_dquot_cachep, GFP_NOFS);
+
+ if (!dquot)
+ return NULL;
+ return &dquot->dq_dquot;
+}
+
+static void ocfs2_destroy_dquot(struct dquot *dquot)
+{
+ kmem_cache_free(ocfs2_dquot_cachep, dquot);
+}
+
+struct dquot_operations ocfs2_quota_operations = {
+ .initialize = ocfs2_dquot_initialize,
+ .drop = ocfs2_dquot_drop,
+ .alloc_space = dquot_alloc_space,
+ .alloc_inode = dquot_alloc_inode,
+ .free_space = dquot_free_space,
+ .free_inode = dquot_free_inode,
+ .transfer = dquot_transfer,
+ .write_dquot = ocfs2_write_dquot,
+ .acquire_dquot = ocfs2_acquire_dquot,
+ .release_dquot = ocfs2_release_dquot,
+ .mark_dirty = ocfs2_mark_dquot_dirty,
+ .write_info = ocfs2_write_info,
+ .alloc_dquot = ocfs2_alloc_dquot,
+ .destroy_dquot = ocfs2_destroy_dquot,
+};
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
new file mode 100644
index 000000000000..5353c423829f
--- /dev/null
+++ b/fs/ocfs2/quota_local.c
@@ -0,0 +1,1253 @@
+/*
+ * Implementation of operations over local quota file
+ */
+
+#include <linux/fs.h>
+#include <linux/quota.h>
+#include <linux/quotaops.h>
+#include <linux/module.h>
+
+#define MLOG_MASK_PREFIX ML_QUOTA
+#include <cluster/masklog.h>
+
+#include "ocfs2_fs.h"
+#include "ocfs2.h"
+#include "inode.h"
+#include "alloc.h"
+#include "file.h"
+#include "buffer_head_io.h"
+#include "journal.h"
+#include "sysfile.h"
+#include "dlmglue.h"
+#include "quota.h"
+
+/* Number of local quota structures per block */
+static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
+{
+ return ((sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) /
+ sizeof(struct ocfs2_local_disk_dqblk));
+}
+
+/* Number of blocks with entries in one chunk */
+static inline unsigned int ol_chunk_blocks(struct super_block *sb)
+{
+ return ((sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
+ OCFS2_QBLK_RESERVED_SPACE) << 3) /
+ ol_quota_entries_per_block(sb);
+}
+
+/* Number of entries in a chunk bitmap */
+static unsigned int ol_chunk_entries(struct super_block *sb)
+{
+ return ol_chunk_blocks(sb) * ol_quota_entries_per_block(sb);
+}
+
+/* Offset of the chunk in quota file */
+static unsigned int ol_quota_chunk_block(struct super_block *sb, int c)
+{
+ /* 1 block for local quota file info, 1 block per chunk for chunk info */
+ return 1 + (ol_chunk_blocks(sb) + 1) * c;
+}
+
+static unsigned int ol_dqblk_block(struct super_block *sb, int c, int off)
+{
+ int epb = ol_quota_entries_per_block(sb);
+
+ return ol_quota_chunk_block(sb, c) + 1 + off / epb;
+}
+
+static unsigned int ol_dqblk_block_off(struct super_block *sb, int c, int off)
+{
+ int epb = ol_quota_entries_per_block(sb);
+
+ return (off % epb) * sizeof(struct ocfs2_local_disk_dqblk);
+}
+
+/* Offset of the dquot structure in the quota file */
+static loff_t ol_dqblk_off(struct super_block *sb, int c, int off)
+{
+ return (ol_dqblk_block(sb, c, off) << sb->s_blocksize_bits) +
+ ol_dqblk_block_off(sb, c, off);
+}
+
+/* Compute block number from given offset */
+static inline unsigned int ol_dqblk_file_block(struct super_block *sb, loff_t off)
+{
+ return off >> sb->s_blocksize_bits;
+}
+
+static inline unsigned int ol_dqblk_block_offset(struct super_block *sb, loff_t off)
+{
+ return off & ((1 << sb->s_blocksize_bits) - 1);
+}
+
+/* Compute offset in the chunk of a structure with the given offset */
+static int ol_dqblk_chunk_off(struct super_block *sb, int c, loff_t off)
+{
+ int epb = ol_quota_entries_per_block(sb);
+
+ return ((off >> sb->s_blocksize_bits) -
+ ol_quota_chunk_block(sb, c) - 1) * epb
+ + ((unsigned int)(off & ((1 << sb->s_blocksize_bits) - 1))) /
+ sizeof(struct ocfs2_local_disk_dqblk);
+}
+
+/* Write bufferhead into the fs */
+static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
+ void (*modify)(struct buffer_head *, void *), void *private)
+{
+ struct super_block *sb = inode->i_sb;
+ handle_t *handle;
+ int status;
+
+ handle = ocfs2_start_trans(OCFS2_SB(sb), 1);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ return status;
+ }
+ status = ocfs2_journal_access(handle, inode, bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+ return status;
+ }
+ lock_buffer(bh);
+ modify(bh, private);
+ unlock_buffer(bh);
+ status = ocfs2_journal_dirty(handle, bh);
+ if (status < 0) {
+ mlog_errno(status);
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+ return status;
+ }
+ status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
+ if (status < 0) {
+ mlog_errno(status);
+ return status;
+ }
+ return 0;
+}
+
+/* Check whether we understand format of quota files */
+static int ocfs2_local_check_quota_file(struct super_block *sb, int type)
+{
+ unsigned int lmagics[MAXQUOTAS] = OCFS2_LOCAL_QMAGICS;
+ unsigned int lversions[MAXQUOTAS] = OCFS2_LOCAL_QVERSIONS;
+ unsigned int gmagics[MAXQUOTAS] = OCFS2_GLOBAL_QMAGICS;
+ unsigned int gversions[MAXQUOTAS] = OCFS2_GLOBAL_QVERSIONS;
+ unsigned int ino[MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE,
+ GROUP_QUOTA_SYSTEM_INODE };
+ struct buffer_head *bh = NULL;
+ struct inode *linode = sb_dqopt(sb)->files[type];
+ struct inode *ginode = NULL;
+ struct ocfs2_disk_dqheader *dqhead;
+ int status, ret = 0;
+
+ /* First check whether we understand local quota file */
+ status = ocfs2_read_quota_block(linode, 0, &bh);
+ if (status) {
+ mlog_errno(status);
+ mlog(ML_ERROR, "failed to read quota file header (type=%d)\n",
+ type);
+ goto out_err;
+ }
+ dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data);
+ if (le32_to_cpu(dqhead->dqh_magic) != lmagics[type]) {
+ mlog(ML_ERROR, "quota file magic does not match (%u != %u),"
+ " type=%d\n", le32_to_cpu(dqhead->dqh_magic),
+ lmagics[type], type);
+ goto out_err;
+ }
+ if (le32_to_cpu(dqhead->dqh_version) != lversions[type]) {
+ mlog(ML_ERROR, "quota file version does not match (%u != %u),"
+ " type=%d\n", le32_to_cpu(dqhead->dqh_version),
+ lversions[type], type);
+ goto out_err;
+ }
+ brelse(bh);
+ bh = NULL;
+
+ /* Next check whether we understand global quota file */
+ ginode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type],
+ OCFS2_INVALID_SLOT);
+ if (!ginode) {
+ mlog(ML_ERROR, "cannot get global quota file inode "
+ "(type=%d)\n", type);
+ goto out_err;
+ }
+ /* Since the header is read only, we don't care about locking */
+ status = ocfs2_read_quota_block(ginode, 0, &bh);
+ if (status) {
+ mlog_errno(status);
+ mlog(ML_ERROR, "failed to read global quota file header "
+ "(type=%d)\n", type);
+ goto out_err;
+ }
+ dqhead = (struct ocfs2_disk_dqheader *)(bh->b_data);
+ if (le32_to_cpu(dqhead->dqh_magic) != gmagics[type]) {
+ mlog(ML_ERROR, "global quota file magic does not match "
+ "(%u != %u), type=%d\n",
+ le32_to_cpu(dqhead->dqh_magic), gmagics[type], type);
+ goto out_err;
+ }
+ if (le32_to_cpu(dqhead->dqh_version) != gversions[type]) {
+ mlog(ML_ERROR, "global quota file version does not match "
+ "(%u != %u), type=%d\n",
+ le32_to_cpu(dqhead->dqh_version), gversions[type],
+ type);
+ goto out_err;
+ }
+
+ ret = 1;
+out_err:
+ brelse(bh);
+ iput(ginode);
+ return ret;
+}
+
+/* Release given list of quota file chunks */
+static void ocfs2_release_local_quota_bitmaps(struct list_head *head)
+{
+ struct ocfs2_quota_chunk *pos, *next;
+
+ list_for_each_entry_safe(pos, next, head, qc_chunk) {
+ list_del(&pos->qc_chunk);
+ brelse(pos->qc_headerbh);
+ kmem_cache_free(ocfs2_qf_chunk_cachep, pos);
+ }
+}
+
+/* Load quota bitmaps into memory */
+static int ocfs2_load_local_quota_bitmaps(struct inode *inode,
+ struct ocfs2_local_disk_dqinfo *ldinfo,
+ struct list_head *head)
+{
+ struct ocfs2_quota_chunk *newchunk;
+ int i, status;
+
+ INIT_LIST_HEAD(head);
+ for (i = 0; i < le32_to_cpu(ldinfo->dqi_chunks); i++) {
+ newchunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS);
+ if (!newchunk) {
+ ocfs2_release_local_quota_bitmaps(head);
+ return -ENOMEM;
+ }
+ newchunk->qc_num = i;
+ newchunk->qc_headerbh = NULL;
+ status = ocfs2_read_quota_block(inode,
+ ol_quota_chunk_block(inode->i_sb, i),
+ &newchunk->qc_headerbh);
+ if (status) {
+ mlog_errno(status);
+ kmem_cache_free(ocfs2_qf_chunk_cachep, newchunk);
+ ocfs2_release_local_quota_bitmaps(head);
+ return status;
+ }
+ list_add_tail(&newchunk->qc_chunk, head);
+ }
+ return 0;
+}
+
+static void olq_update_info(struct buffer_head *bh, void *private)
+{
+ struct mem_dqinfo *info = private;
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct ocfs2_local_disk_dqinfo *ldinfo;
+
+ ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
+ OCFS2_LOCAL_INFO_OFF);
+ spin_lock(&dq_data_lock);
+ ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
+ ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks);
+ ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks);
+ spin_unlock(&dq_data_lock);
+}
+
+static int ocfs2_add_recovery_chunk(struct super_block *sb,
+ struct ocfs2_local_disk_chunk *dchunk,
+ int chunk,
+ struct list_head *head)
+{
+ struct ocfs2_recovery_chunk *rc;
+
+ rc = kmalloc(sizeof(struct ocfs2_recovery_chunk), GFP_NOFS);
+ if (!rc)
+ return -ENOMEM;
+ rc->rc_chunk = chunk;
+ rc->rc_bitmap = kmalloc(sb->s_blocksize, GFP_NOFS);
+ if (!rc->rc_bitmap) {
+ kfree(rc);
+ return -ENOMEM;
+ }
+ memcpy(rc->rc_bitmap, dchunk->dqc_bitmap,
+ (ol_chunk_entries(sb) + 7) >> 3);
+ list_add_tail(&rc->rc_list, head);
+ return 0;
+}
+
+static void free_recovery_list(struct list_head *head)
+{
+ struct ocfs2_recovery_chunk *next;
+ struct ocfs2_recovery_chunk *rchunk;
+
+ list_for_each_entry_safe(rchunk, next, head, rc_list) {
+ list_del(&rchunk->rc_list);
+ kfree(rchunk->rc_bitmap);
+ kfree(rchunk);
+ }
+}
+
+void ocfs2_free_quota_recovery(struct ocfs2_quota_recovery *rec)
+{
+ int type;
+
+ for (type = 0; type < MAXQUOTAS; type++)
+ free_recovery_list(&(rec->r_list[type]));
+ kfree(rec);
+}
+
+/* Load entries in our quota file we have to recover*/
+static int ocfs2_recovery_load_quota(struct inode *lqinode,
+ struct ocfs2_local_disk_dqinfo *ldinfo,
+ int type,
+ struct list_head *head)
+{
+ struct super_block *sb = lqinode->i_sb;
+ struct buffer_head *hbh;
+ struct ocfs2_local_disk_chunk *dchunk;
+ int i, chunks = le32_to_cpu(ldinfo->dqi_chunks);
+ int status = 0;
+
+ for (i = 0; i < chunks; i++) {
+ hbh = NULL;
+ status = ocfs2_read_quota_block(lqinode,
+ ol_quota_chunk_block(sb, i),
+ &hbh);
+ if (status) {
+ mlog_errno(status);
+ break;
+ }
+ dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
+ if (le32_to_cpu(dchunk->dqc_free) < ol_chunk_entries(sb))
+ status = ocfs2_add_recovery_chunk(sb, dchunk, i, head);
+ brelse(hbh);
+ if (status < 0)
+ break;
+ }
+ if (status < 0)
+ free_recovery_list(head);
+ return status;
+}
+
+static struct ocfs2_quota_recovery *ocfs2_alloc_quota_recovery(void)
+{
+ int type;
+ struct ocfs2_quota_recovery *rec;
+
+ rec = kmalloc(sizeof(struct ocfs2_quota_recovery), GFP_NOFS);
+ if (!rec)
+ return NULL;
+ for (type = 0; type < MAXQUOTAS; type++)
+ INIT_LIST_HEAD(&(rec->r_list[type]));
+ return rec;
+}
+
+/* Load information we need for quota recovery into memory */
+struct ocfs2_quota_recovery *ocfs2_begin_quota_recovery(
+ struct ocfs2_super *osb,
+ int slot_num)
+{
+ unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
+ unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
+ LOCAL_GROUP_QUOTA_SYSTEM_INODE };
+ struct super_block *sb = osb->sb;
+ struct ocfs2_local_disk_dqinfo *ldinfo;
+ struct inode *lqinode;
+ struct buffer_head *bh;
+ int type;
+ int status = 0;
+ struct ocfs2_quota_recovery *rec;
+
+ mlog(ML_NOTICE, "Beginning quota recovery in slot %u\n", slot_num);
+ rec = ocfs2_alloc_quota_recovery();
+ if (!rec)
+ return ERR_PTR(-ENOMEM);
+ /* First init... */
+
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
+ continue;
+ /* At this point, journal of the slot is already replayed so
+ * we can trust metadata and data of the quota file */
+ lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num);
+ if (!lqinode) {
+ status = -ENOENT;
+ goto out;
+ }
+ status = ocfs2_inode_lock_full(lqinode, NULL, 1,
+ OCFS2_META_LOCK_RECOVERY);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_put;
+ }
+ /* Now read local header */
+ bh = NULL;
+ status = ocfs2_read_quota_block(lqinode, 0, &bh);
+ if (status) {
+ mlog_errno(status);
+ mlog(ML_ERROR, "failed to read quota file info header "
+ "(slot=%d type=%d)\n", slot_num, type);
+ goto out_lock;
+ }
+ ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
+ OCFS2_LOCAL_INFO_OFF);
+ status = ocfs2_recovery_load_quota(lqinode, ldinfo, type,
+ &rec->r_list[type]);
+ brelse(bh);
+out_lock:
+ ocfs2_inode_unlock(lqinode, 1);
+out_put:
+ iput(lqinode);
+ if (status < 0)
+ break;
+ }
+out:
+ if (status < 0) {
+ ocfs2_free_quota_recovery(rec);
+ rec = ERR_PTR(status);
+ }
+ return rec;
+}
+
+/* Sync changes in local quota file into global quota file and
+ * reinitialize local quota file.
+ * The function expects local quota file to be already locked and
+ * dqonoff_mutex locked. */
+static int ocfs2_recover_local_quota_file(struct inode *lqinode,
+ int type,
+ struct ocfs2_quota_recovery *rec)
+{
+ struct super_block *sb = lqinode->i_sb;
+ struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ struct ocfs2_local_disk_chunk *dchunk;
+ struct ocfs2_local_disk_dqblk *dqblk;
+ struct dquot *dquot;
+ handle_t *handle;
+ struct buffer_head *hbh = NULL, *qbh = NULL;
+ int status = 0;
+ int bit, chunk;
+ struct ocfs2_recovery_chunk *rchunk, *next;
+ qsize_t spacechange, inodechange;
+
+ mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type);
+
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0)
+ goto out;
+
+ list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {
+ chunk = rchunk->rc_chunk;
+ hbh = NULL;
+ status = ocfs2_read_quota_block(lqinode,
+ ol_quota_chunk_block(sb, chunk),
+ &hbh);
+ if (status) {
+ mlog_errno(status);
+ break;
+ }
+ dchunk = (struct ocfs2_local_disk_chunk *)hbh->b_data;
+ for_each_bit(bit, rchunk->rc_bitmap, ol_chunk_entries(sb)) {
+ qbh = NULL;
+ status = ocfs2_read_quota_block(lqinode,
+ ol_dqblk_block(sb, chunk, bit),
+ &qbh);
+ if (status) {
+ mlog_errno(status);
+ break;
+ }
+ dqblk = (struct ocfs2_local_disk_dqblk *)(qbh->b_data +
+ ol_dqblk_block_off(sb, chunk, bit));
+ dquot = dqget(sb, le64_to_cpu(dqblk->dqb_id), type);
+ if (!dquot) {
+ status = -EIO;
+ mlog(ML_ERROR, "Failed to get quota structure "
+ "for id %u, type %d. Cannot finish quota "
+ "file recovery.\n",
+ (unsigned)le64_to_cpu(dqblk->dqb_id),
+ type);
+ goto out_put_bh;
+ }
+ handle = ocfs2_start_trans(OCFS2_SB(sb),
+ OCFS2_QSYNC_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_put_dquot;
+ }
+ mutex_lock(&sb_dqopt(sb)->dqio_mutex);
+ spin_lock(&dq_data_lock);
+ /* Add usage from quota entry into quota changes
+ * of our node. Auxiliary variables are important
+ * due to signedness */
+ spacechange = le64_to_cpu(dqblk->dqb_spacemod);
+ inodechange = le64_to_cpu(dqblk->dqb_inodemod);
+ dquot->dq_dqb.dqb_curspace += spacechange;
+ dquot->dq_dqb.dqb_curinodes += inodechange;
+ spin_unlock(&dq_data_lock);
+ /* We want to drop reference held by the crashed
+ * node. Since we have our own reference we know
+ * global structure actually won't be freed. */
+ status = ocfs2_global_release_dquot(dquot);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_commit;
+ }
+ /* Release local quota file entry */
+ status = ocfs2_journal_access(handle, lqinode,
+ qbh, OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_commit;
+ }
+ lock_buffer(qbh);
+ WARN_ON(!ocfs2_test_bit(bit, dchunk->dqc_bitmap));
+ ocfs2_clear_bit(bit, dchunk->dqc_bitmap);
+ le32_add_cpu(&dchunk->dqc_free, 1);
+ unlock_buffer(qbh);
+ status = ocfs2_journal_dirty(handle, qbh);
+ if (status < 0)
+ mlog_errno(status);
+out_commit:
+ mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out_put_dquot:
+ dqput(dquot);
+out_put_bh:
+ brelse(qbh);
+ if (status < 0)
+ break;
+ }
+ brelse(hbh);
+ list_del(&rchunk->rc_list);
+ kfree(rchunk->rc_bitmap);
+ kfree(rchunk);
+ if (status < 0)
+ break;
+ }
+ ocfs2_unlock_global_qf(oinfo, 1);
+out:
+ if (status < 0)
+ free_recovery_list(&(rec->r_list[type]));
+ mlog_exit(status);
+ return status;
+}
+
+/* Recover local quota files for given node different from us */
+int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
+ struct ocfs2_quota_recovery *rec,
+ int slot_num)
+{
+ unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
+ LOCAL_GROUP_QUOTA_SYSTEM_INODE };
+ struct super_block *sb = osb->sb;
+ struct ocfs2_local_disk_dqinfo *ldinfo;
+ struct buffer_head *bh;
+ handle_t *handle;
+ int type;
+ int status = 0;
+ struct inode *lqinode;
+ unsigned int flags;
+
+ mlog(ML_NOTICE, "Finishing quota recovery in slot %u\n", slot_num);
+ mutex_lock(&sb_dqopt(sb)->dqonoff_mutex);
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (list_empty(&(rec->r_list[type])))
+ continue;
+ mlog(0, "Recovering quota in slot %d\n", slot_num);
+ lqinode = ocfs2_get_system_file_inode(osb, ino[type], slot_num);
+ if (!lqinode) {
+ status = -ENOENT;
+ goto out;
+ }
+ status = ocfs2_inode_lock_full(lqinode, NULL, 1,
+ OCFS2_META_LOCK_NOQUEUE);
+ /* Someone else is holding the lock? Then he must be
+ * doing the recovery. Just skip the file... */
+ if (status == -EAGAIN) {
+ mlog(ML_NOTICE, "skipping quota recovery for slot %d "
+ "because quota file is locked.\n", slot_num);
+ status = 0;
+ goto out_put;
+ } else if (status < 0) {
+ mlog_errno(status);
+ goto out_put;
+ }
+ /* Now read local header */
+ bh = NULL;
+ status = ocfs2_read_quota_block(lqinode, 0, &bh);
+ if (status) {
+ mlog_errno(status);
+ mlog(ML_ERROR, "failed to read quota file info header "
+ "(slot=%d type=%d)\n", slot_num, type);
+ goto out_lock;
+ }
+ ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
+ OCFS2_LOCAL_INFO_OFF);
+ /* Is recovery still needed? */
+ flags = le32_to_cpu(ldinfo->dqi_flags);
+ if (!(flags & OLQF_CLEAN))
+ status = ocfs2_recover_local_quota_file(lqinode,
+ type,
+ rec);
+ /* We don't want to mark file as clean when it is actually
+ * active */
+ if (slot_num == osb->slot_num)
+ goto out_bh;
+ /* Mark quota file as clean if we are recovering quota file of
+ * some other node. */
+ handle = ocfs2_start_trans(osb, 1);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out_bh;
+ }
+ status = ocfs2_journal_access(handle, lqinode, bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ lock_buffer(bh);
+ ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN);
+ unlock_buffer(bh);
+ status = ocfs2_journal_dirty(handle, bh);
+ if (status < 0)
+ mlog_errno(status);
+out_trans:
+ ocfs2_commit_trans(osb, handle);
+out_bh:
+ brelse(bh);
+out_lock:
+ ocfs2_inode_unlock(lqinode, 1);
+out_put:
+ iput(lqinode);
+ if (status < 0)
+ break;
+ }
+out:
+ mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
+ kfree(rec);
+ return status;
+}
+
+/* Read information header from quota file */
+static int ocfs2_local_read_info(struct super_block *sb, int type)
+{
+ struct ocfs2_local_disk_dqinfo *ldinfo;
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo;
+ struct inode *lqinode = sb_dqopt(sb)->files[type];
+ int status;
+ struct buffer_head *bh = NULL;
+ struct ocfs2_quota_recovery *rec;
+ int locked = 0;
+
+ info->dqi_maxblimit = 0x7fffffffffffffffLL;
+ info->dqi_maxilimit = 0x7fffffffffffffffLL;
+ oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS);
+ if (!oinfo) {
+ mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota"
+ " info.");
+ goto out_err;
+ }
+ info->dqi_priv = oinfo;
+ oinfo->dqi_type = type;
+ INIT_LIST_HEAD(&oinfo->dqi_chunk);
+ oinfo->dqi_rec = NULL;
+ oinfo->dqi_lqi_bh = NULL;
+ oinfo->dqi_ibh = NULL;
+
+ status = ocfs2_global_read_info(sb, type);
+ if (status < 0)
+ goto out_err;
+
+ status = ocfs2_inode_lock(lqinode, &oinfo->dqi_lqi_bh, 1);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+ locked = 1;
+
+ /* Now read local header */
+ status = ocfs2_read_quota_block(lqinode, 0, &bh);
+ if (status) {
+ mlog_errno(status);
+ mlog(ML_ERROR, "failed to read quota file info header "
+ "(type=%d)\n", type);
+ goto out_err;
+ }
+ ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data +
+ OCFS2_LOCAL_INFO_OFF);
+ info->dqi_flags = le32_to_cpu(ldinfo->dqi_flags);
+ oinfo->dqi_chunks = le32_to_cpu(ldinfo->dqi_chunks);
+ oinfo->dqi_blocks = le32_to_cpu(ldinfo->dqi_blocks);
+ oinfo->dqi_ibh = bh;
+
+ /* We crashed when using local quota file? */
+ if (!(info->dqi_flags & OLQF_CLEAN)) {
+ rec = OCFS2_SB(sb)->quota_rec;
+ if (!rec) {
+ rec = ocfs2_alloc_quota_recovery();
+ if (!rec) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto out_err;
+ }
+ OCFS2_SB(sb)->quota_rec = rec;
+ }
+
+ status = ocfs2_recovery_load_quota(lqinode, ldinfo, type,
+ &rec->r_list[type]);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+ }
+
+ status = ocfs2_load_local_quota_bitmaps(lqinode,
+ ldinfo,
+ &oinfo->dqi_chunk);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+
+ /* Now mark quota file as used */
+ info->dqi_flags &= ~OLQF_CLEAN;
+ status = ocfs2_modify_bh(lqinode, bh, olq_update_info, info);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+
+ return 0;
+out_err:
+ if (oinfo) {
+ iput(oinfo->dqi_gqinode);
+ ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
+ ocfs2_lock_res_free(&oinfo->dqi_gqlock);
+ brelse(oinfo->dqi_lqi_bh);
+ if (locked)
+ ocfs2_inode_unlock(lqinode, 1);
+ ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
+ kfree(oinfo);
+ }
+ brelse(bh);
+ return -1;
+}
+
+/* Write local info to quota file */
+static int ocfs2_local_write_info(struct super_block *sb, int type)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct buffer_head *bh = ((struct ocfs2_mem_dqinfo *)info->dqi_priv)
+ ->dqi_ibh;
+ int status;
+
+ status = ocfs2_modify_bh(sb_dqopt(sb)->files[type], bh, olq_update_info,
+ info);
+ if (status < 0) {
+ mlog_errno(status);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Release info from memory */
+static int ocfs2_local_free_info(struct super_block *sb, int type)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct ocfs2_quota_chunk *chunk;
+ struct ocfs2_local_disk_chunk *dchunk;
+ int mark_clean = 1, len;
+ int status;
+
+ /* At this point we know there are no more dquots and thus
+ * even if there's some sync in the pdflush queue, it won't
+ * find any dquots and return without doing anything */
+ del_timer_sync(&oinfo->dqi_sync_timer);
+ iput(oinfo->dqi_gqinode);
+ ocfs2_simple_drop_lockres(OCFS2_SB(sb), &oinfo->dqi_gqlock);
+ ocfs2_lock_res_free(&oinfo->dqi_gqlock);
+ list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) {
+ dchunk = (struct ocfs2_local_disk_chunk *)
+ (chunk->qc_headerbh->b_data);
+ if (chunk->qc_num < oinfo->dqi_chunks - 1) {
+ len = ol_chunk_entries(sb);
+ } else {
+ len = (oinfo->dqi_blocks -
+ ol_quota_chunk_block(sb, chunk->qc_num) - 1)
+ * ol_quota_entries_per_block(sb);
+ }
+ /* Not all entries free? Bug! */
+ if (le32_to_cpu(dchunk->dqc_free) != len) {
+ mlog(ML_ERROR, "releasing quota file with used "
+ "entries (type=%d)\n", type);
+ mark_clean = 0;
+ }
+ }
+ ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
+
+ /* dqonoff_mutex protects us against racing with recovery thread... */
+ if (oinfo->dqi_rec) {
+ ocfs2_free_quota_recovery(oinfo->dqi_rec);
+ mark_clean = 0;
+ }
+
+ if (!mark_clean)
+ goto out;
+
+ /* Mark local file as clean */
+ info->dqi_flags |= OLQF_CLEAN;
+ status = ocfs2_modify_bh(sb_dqopt(sb)->files[type],
+ oinfo->dqi_ibh,
+ olq_update_info,
+ info);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+out:
+ ocfs2_inode_unlock(sb_dqopt(sb)->files[type], 1);
+ brelse(oinfo->dqi_ibh);
+ brelse(oinfo->dqi_lqi_bh);
+ kfree(oinfo);
+ return 0;
+}
+
+static void olq_set_dquot(struct buffer_head *bh, void *private)
+{
+ struct ocfs2_dquot *od = private;
+ struct ocfs2_local_disk_dqblk *dqblk;
+ struct super_block *sb = od->dq_dquot.dq_sb;
+
+ dqblk = (struct ocfs2_local_disk_dqblk *)(bh->b_data
+ + ol_dqblk_block_offset(sb, od->dq_local_off));
+
+ dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id);
+ spin_lock(&dq_data_lock);
+ dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace -
+ od->dq_origspace);
+ dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes -
+ od->dq_originodes);
+ spin_unlock(&dq_data_lock);
+ mlog(0, "Writing local dquot %u space %lld inodes %lld\n",
+ od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod),
+ (long long)le64_to_cpu(dqblk->dqb_inodemod));
+}
+
+/* Write dquot to local quota file */
+static int ocfs2_local_write_dquot(struct dquot *dquot)
+{
+ struct super_block *sb = dquot->dq_sb;
+ struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
+ struct buffer_head *bh = NULL;
+ int status;
+
+ status = ocfs2_read_quota_block(sb_dqopt(sb)->files[dquot->dq_type],
+ ol_dqblk_file_block(sb, od->dq_local_off),
+ &bh);
+ if (status) {
+ mlog_errno(status);
+ goto out;
+ }
+ status = ocfs2_modify_bh(sb_dqopt(sb)->files[dquot->dq_type], bh,
+ olq_set_dquot, od);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+out:
+ brelse(bh);
+ return status;
+}
+
+/* Find free entry in local quota file */
+static struct ocfs2_quota_chunk *ocfs2_find_free_entry(struct super_block *sb,
+ int type,
+ int *offset)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct ocfs2_quota_chunk *chunk;
+ struct ocfs2_local_disk_chunk *dchunk;
+ int found = 0, len;
+
+ list_for_each_entry(chunk, &oinfo->dqi_chunk, qc_chunk) {
+ dchunk = (struct ocfs2_local_disk_chunk *)
+ chunk->qc_headerbh->b_data;
+ if (le32_to_cpu(dchunk->dqc_free) > 0) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ return NULL;
+
+ if (chunk->qc_num < oinfo->dqi_chunks - 1) {
+ len = ol_chunk_entries(sb);
+ } else {
+ len = (oinfo->dqi_blocks -
+ ol_quota_chunk_block(sb, chunk->qc_num) - 1)
+ * ol_quota_entries_per_block(sb);
+ }
+
+ found = ocfs2_find_next_zero_bit(dchunk->dqc_bitmap, len, 0);
+ /* We failed? */
+ if (found == len) {
+ mlog(ML_ERROR, "Did not find empty entry in chunk %d with %u"
+ " entries free (type=%d)\n", chunk->qc_num,
+ le32_to_cpu(dchunk->dqc_free), type);
+ return ERR_PTR(-EIO);
+ }
+ *offset = found;
+ return chunk;
+}
+
+/* Add new chunk to the local quota file */
+static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
+ struct super_block *sb,
+ int type,
+ int *offset)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct inode *lqinode = sb_dqopt(sb)->files[type];
+ struct ocfs2_quota_chunk *chunk = NULL;
+ struct ocfs2_local_disk_chunk *dchunk;
+ int status;
+ handle_t *handle;
+ struct buffer_head *bh = NULL;
+ u64 p_blkno;
+
+ /* We are protected by dqio_sem so no locking needed */
+ status = ocfs2_extend_no_holes(lqinode,
+ lqinode->i_size + 2 * sb->s_blocksize,
+ lqinode->i_size);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
+ lqinode->i_size + 2 * sb->s_blocksize);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ chunk = kmem_cache_alloc(ocfs2_qf_chunk_cachep, GFP_NOFS);
+ if (!chunk) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto out;
+ }
+
+ down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+ status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
+ &p_blkno, NULL, NULL);
+ up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ bh = sb_getblk(sb, p_blkno);
+ if (!bh) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ goto out;
+ }
+ dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
+
+ handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out;
+ }
+
+ status = ocfs2_journal_access(handle, lqinode, bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ lock_buffer(bh);
+ dchunk->dqc_free = cpu_to_le32(ol_quota_entries_per_block(sb));
+ memset(dchunk->dqc_bitmap, 0,
+ sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
+ OCFS2_QBLK_RESERVED_SPACE);
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+ status = ocfs2_journal_dirty(handle, bh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+
+ oinfo->dqi_blocks += 2;
+ oinfo->dqi_chunks++;
+ status = ocfs2_local_write_info(sb, type);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ list_add_tail(&chunk->qc_chunk, &oinfo->dqi_chunk);
+ chunk->qc_num = list_entry(chunk->qc_chunk.prev,
+ struct ocfs2_quota_chunk,
+ qc_chunk)->qc_num + 1;
+ chunk->qc_headerbh = bh;
+ *offset = 0;
+ return chunk;
+out_trans:
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out:
+ brelse(bh);
+ kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
+ return ERR_PTR(status);
+}
+
+/* Find free entry in local quota file */
+static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
+ struct super_block *sb,
+ int type,
+ int *offset)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv;
+ struct ocfs2_quota_chunk *chunk;
+ struct inode *lqinode = sb_dqopt(sb)->files[type];
+ struct ocfs2_local_disk_chunk *dchunk;
+ int epb = ol_quota_entries_per_block(sb);
+ unsigned int chunk_blocks;
+ int status;
+ handle_t *handle;
+
+ if (list_empty(&oinfo->dqi_chunk))
+ return ocfs2_local_quota_add_chunk(sb, type, offset);
+ /* Is the last chunk full? */
+ chunk = list_entry(oinfo->dqi_chunk.prev,
+ struct ocfs2_quota_chunk, qc_chunk);
+ chunk_blocks = oinfo->dqi_blocks -
+ ol_quota_chunk_block(sb, chunk->qc_num) - 1;
+ if (ol_chunk_blocks(sb) == chunk_blocks)
+ return ocfs2_local_quota_add_chunk(sb, type, offset);
+
+ /* We are protected by dqio_sem so no locking needed */
+ status = ocfs2_extend_no_holes(lqinode,
+ lqinode->i_size + sb->s_blocksize,
+ lqinode->i_size);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
+ lqinode->i_size + sb->s_blocksize);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto out;
+ }
+ status = ocfs2_journal_access(handle, lqinode, chunk->qc_headerbh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+
+ dchunk = (struct ocfs2_local_disk_chunk *)chunk->qc_headerbh->b_data;
+ lock_buffer(chunk->qc_headerbh);
+ le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb));
+ unlock_buffer(chunk->qc_headerbh);
+ status = ocfs2_journal_dirty(handle, chunk->qc_headerbh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+ oinfo->dqi_blocks++;
+ status = ocfs2_local_write_info(sb, type);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_trans;
+ }
+
+ status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ *offset = chunk_blocks * epb;
+ return chunk;
+out_trans:
+ ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out:
+ return ERR_PTR(status);
+}
+
+static void olq_alloc_dquot(struct buffer_head *bh, void *private)
+{
+ int *offset = private;
+ struct ocfs2_local_disk_chunk *dchunk;
+
+ dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
+ ocfs2_set_bit(*offset, dchunk->dqc_bitmap);
+ le32_add_cpu(&dchunk->dqc_free, -1);
+}
+
+/* Create dquot in the local file for given id */
+static int ocfs2_create_local_dquot(struct dquot *dquot)
+{
+ struct super_block *sb = dquot->dq_sb;
+ int type = dquot->dq_type;
+ struct inode *lqinode = sb_dqopt(sb)->files[type];
+ struct ocfs2_quota_chunk *chunk;
+ struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
+ int offset;
+ int status;
+
+ chunk = ocfs2_find_free_entry(sb, type, &offset);
+ if (!chunk) {
+ chunk = ocfs2_extend_local_quota_file(sb, type, &offset);
+ if (IS_ERR(chunk))
+ return PTR_ERR(chunk);
+ } else if (IS_ERR(chunk)) {
+ return PTR_ERR(chunk);
+ }
+ od->dq_local_off = ol_dqblk_off(sb, chunk->qc_num, offset);
+ od->dq_chunk = chunk;
+
+ /* Initialize dquot structure on disk */
+ status = ocfs2_local_write_dquot(dquot);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ /* Mark structure as allocated */
+ status = ocfs2_modify_bh(lqinode, chunk->qc_headerbh, olq_alloc_dquot,
+ &offset);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+out:
+ return status;
+}
+
+/* Create entry in local file for dquot, load data from the global file */
+static int ocfs2_local_read_dquot(struct dquot *dquot)
+{
+ int status;
+
+ mlog_entry("id=%u, type=%d\n", dquot->dq_id, dquot->dq_type);
+
+ status = ocfs2_global_read_dquot(dquot);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+
+ /* Now create entry in the local quota file */
+ status = ocfs2_create_local_dquot(dquot);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_err;
+ }
+ mlog_exit(0);
+ return 0;
+out_err:
+ mlog_exit(status);
+ return status;
+}
+
+/* Release dquot structure from local quota file. ocfs2_release_dquot() has
+ * already started a transaction and obtained exclusive lock for global
+ * quota file. */
+static int ocfs2_local_release_dquot(struct dquot *dquot)
+{
+ int status;
+ int type = dquot->dq_type;
+ struct ocfs2_dquot *od = OCFS2_DQUOT(dquot);
+ struct super_block *sb = dquot->dq_sb;
+ struct ocfs2_local_disk_chunk *dchunk;
+ int offset;
+ handle_t *handle = journal_current_handle();
+
+ BUG_ON(!handle);
+ /* First write all local changes to global file */
+ status = ocfs2_global_release_dquot(dquot);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ status = ocfs2_journal_access(handle, sb_dqopt(sb)->files[type],
+ od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ offset = ol_dqblk_chunk_off(sb, od->dq_chunk->qc_num,
+ od->dq_local_off);
+ dchunk = (struct ocfs2_local_disk_chunk *)
+ (od->dq_chunk->qc_headerbh->b_data);
+ /* Mark structure as freed */
+ lock_buffer(od->dq_chunk->qc_headerbh);
+ ocfs2_clear_bit(offset, dchunk->dqc_bitmap);
+ le32_add_cpu(&dchunk->dqc_free, 1);
+ unlock_buffer(od->dq_chunk->qc_headerbh);
+ status = ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+ status = 0;
+out:
+ /* Clear the read bit so that next time someone uses this
+ * dquot he reads fresh info from disk and allocates local
+ * dquot structure */
+ clear_bit(DQ_READ_B, &dquot->dq_flags);
+ return status;
+}
+
+static struct quota_format_ops ocfs2_format_ops = {
+ .check_quota_file = ocfs2_local_check_quota_file,
+ .read_file_info = ocfs2_local_read_info,
+ .write_file_info = ocfs2_global_write_info,
+ .free_file_info = ocfs2_local_free_info,
+ .read_dqblk = ocfs2_local_read_dquot,
+ .commit_dqblk = ocfs2_local_write_dquot,
+ .release_dqblk = ocfs2_local_release_dquot,
+};
+
+struct quota_format_type ocfs2_quota_format = {
+ .qf_fmt_id = QFMT_OCFS2,
+ .qf_ops = &ocfs2_format_ops,
+ .qf_owner = THIS_MODULE
+};
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index ffd48db229a7..867de3ebfcaf 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -314,6 +314,10 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
+ /* main_bm_bh is validated by inode read inside ocfs2_inode_lock(),
+ * so any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
+
if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
ocfs2_group_bitmap_size(osb->sb) * 8) {
mlog(ML_ERROR, "The disk is too old and small. "
@@ -322,30 +326,18 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
goto out_unlock;
}
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(main_bm_inode->i_sb, fe);
- ret = -EIO;
- goto out_unlock;
- }
-
first_new_cluster = le32_to_cpu(fe->i_clusters);
lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
first_new_cluster - 1);
- ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh);
+ ret = ocfs2_read_group_descriptor(main_bm_inode, fe, lgd_blkno,
+ &group_bh);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
}
-
group = (struct ocfs2_group_desc *)group_bh->b_data;
- ret = ocfs2_check_group_descriptor(inode->i_sb, fe, group);
- if (ret) {
- mlog_errno(ret);
- goto out_unlock;
- }
-
cl_bpc = le16_to_cpu(fe->id2.i_chain.cl_bpc);
if (le16_to_cpu(group->bg_bits) / cl_bpc + new_clusters >
le16_to_cpu(fe->id2.i_chain.cl_cpg)) {
@@ -398,41 +390,16 @@ static int ocfs2_check_new_group(struct inode *inode,
struct buffer_head *group_bh)
{
int ret;
- struct ocfs2_group_desc *gd;
+ struct ocfs2_group_desc *gd =
+ (struct ocfs2_group_desc *)group_bh->b_data;
u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
- unsigned int max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) *
- le16_to_cpu(di->id2.i_chain.cl_bpc);
-
- gd = (struct ocfs2_group_desc *)group_bh->b_data;
+ ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh);
+ if (ret)
+ goto out;
- ret = -EIO;
- if (!OCFS2_IS_VALID_GROUP_DESC(gd))
- mlog(ML_ERROR, "Group descriptor # %llu isn't valid.\n",
- (unsigned long long)le64_to_cpu(gd->bg_blkno));
- else if (di->i_blkno != gd->bg_parent_dinode)
- mlog(ML_ERROR, "Group descriptor # %llu has bad parent "
- "pointer (%llu, expected %llu)\n",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
- (unsigned long long)le64_to_cpu(di->i_blkno));
- else if (le16_to_cpu(gd->bg_bits) > max_bits)
- mlog(ML_ERROR, "Group descriptor # %llu has bit count of %u\n",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits));
- else if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits))
- mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
- "claims that %u are free\n",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits),
- le16_to_cpu(gd->bg_free_bits_count));
- else if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size)))
- mlog(ML_ERROR, "Group descriptor # %llu has bit count %u but "
- "max bitmap bits of %u\n",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits),
- 8 * le16_to_cpu(gd->bg_size));
- else if (le16_to_cpu(gd->bg_chain) != input->chain)
+ ret = -EINVAL;
+ if (le16_to_cpu(gd->bg_chain) != input->chain)
mlog(ML_ERROR, "Group descriptor # %llu has bad chain %u "
"while input has %u set.\n",
(unsigned long long)le64_to_cpu(gd->bg_blkno),
@@ -451,6 +418,7 @@ static int ocfs2_check_new_group(struct inode *inode,
else
ret = 0;
+out:
return ret;
}
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index bdda2d8f8508..40661e7824e9 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
* this is not true, the read of -1 (UINT64_MAX) will fail.
*/
ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
- OCFS2_BH_IGNORE_CACHE);
+ OCFS2_BH_IGNORE_CACHE, NULL);
if (ret == 0) {
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
@@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
bh = NULL; /* Acquire a fresh bh */
status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
- OCFS2_BH_IGNORE_CACHE);
+ OCFS2_BH_IGNORE_CACHE, NULL);
if (status < 0) {
mlog_errno(status);
goto bail;
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index faec2d879357..9b76d41a8ac6 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -740,6 +740,9 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
{
+ if (!lksb->lksb_fsdlm.sb_lvbptr)
+ lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
+ sizeof(struct dlm_lksb);
return (void *)(lksb->lksb_fsdlm.sb_lvbptr);
}
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index c5ff18b46b57..226fe21f2608 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -145,62 +145,151 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
}
-/* somewhat more expensive than our other checks, so use sparingly. */
-int ocfs2_check_group_descriptor(struct super_block *sb,
- struct ocfs2_dinode *di,
- struct ocfs2_group_desc *gd)
+#define do_error(fmt, ...) \
+ do{ \
+ if (clean_error) \
+ mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \
+ else \
+ ocfs2_error(sb, fmt, ##__VA_ARGS__); \
+ } while (0)
+
+static int ocfs2_validate_gd_self(struct super_block *sb,
+ struct buffer_head *bh,
+ int clean_error)
{
- unsigned int max_bits;
+ struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
- return -EIO;
+ do_error("Group descriptor #%llu has bad signature %.*s",
+ (unsigned long long)bh->b_blocknr, 7,
+ gd->bg_signature);
+ return -EINVAL;
+ }
+
+ if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
+ do_error("Group descriptor #%llu has an invalid bg_blkno "
+ "of %llu",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(gd->bg_blkno));
+ return -EINVAL;
}
+ if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
+ do_error("Group descriptor #%llu has an invalid "
+ "fs_generation of #%u",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(gd->bg_generation));
+ return -EINVAL;
+ }
+
+ if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
+ do_error("Group descriptor #%llu has bit count %u but "
+ "claims that %u are free",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(gd->bg_bits),
+ le16_to_cpu(gd->bg_free_bits_count));
+ return -EINVAL;
+ }
+
+ if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
+ do_error("Group descriptor #%llu has bit count %u but "
+ "max bitmap bits of %u",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(gd->bg_bits),
+ 8 * le16_to_cpu(gd->bg_size));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ocfs2_validate_gd_parent(struct super_block *sb,
+ struct ocfs2_dinode *di,
+ struct buffer_head *bh,
+ int clean_error)
+{
+ unsigned int max_bits;
+ struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
+
if (di->i_blkno != gd->bg_parent_dinode) {
- ocfs2_error(sb, "Group descriptor # %llu has bad parent "
- "pointer (%llu, expected %llu)",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
- (unsigned long long)le64_to_cpu(di->i_blkno));
- return -EIO;
+ do_error("Group descriptor #%llu has bad parent "
+ "pointer (%llu, expected %llu)",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
+ (unsigned long long)le64_to_cpu(di->i_blkno));
+ return -EINVAL;
}
max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
if (le16_to_cpu(gd->bg_bits) > max_bits) {
- ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits));
- return -EIO;
+ do_error("Group descriptor #%llu has bit count of %u",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(gd->bg_bits));
+ return -EINVAL;
}
if (le16_to_cpu(gd->bg_chain) >=
le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
- ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_chain));
- return -EIO;
+ do_error("Group descriptor #%llu has bad chain %u",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(gd->bg_chain));
+ return -EINVAL;
}
- if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
- ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
- "claims that %u are free",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits),
- le16_to_cpu(gd->bg_free_bits_count));
- return -EIO;
- }
+ return 0;
+}
- if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
- ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
- "max bitmap bits of %u",
- (unsigned long long)le64_to_cpu(gd->bg_blkno),
- le16_to_cpu(gd->bg_bits),
- 8 * le16_to_cpu(gd->bg_size));
- return -EIO;
+#undef do_error
+
+/*
+ * This version only prints errors. It does not fail the filesystem, and
+ * exists only for resize.
+ */
+int ocfs2_check_group_descriptor(struct super_block *sb,
+ struct ocfs2_dinode *di,
+ struct buffer_head *bh)
+{
+ int rc;
+
+ rc = ocfs2_validate_gd_self(sb, bh, 1);
+ if (!rc)
+ rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
+
+ return rc;
+}
+
+static int ocfs2_validate_group_descriptor(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ mlog(0, "Validating group descriptor %llu\n",
+ (unsigned long long)bh->b_blocknr);
+
+ return ocfs2_validate_gd_self(sb, bh, 0);
+}
+
+int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
+ u64 gd_blkno, struct buffer_head **bh)
+{
+ int rc;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_block(inode, gd_blkno, &tmp,
+ ocfs2_validate_group_descriptor);
+ if (rc)
+ goto out;
+
+ rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
+ if (rc) {
+ brelse(tmp);
+ goto out;
}
- return 0;
+ /* If ocfs2_read_block() got us a new bh, pass it up. */
+ if (!*bh)
+ *bh = tmp;
+
+out:
+ return rc;
}
static int ocfs2_block_group_fill(handle_t *handle,
@@ -441,11 +530,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
ac->ac_alloc_slot = slot;
fe = (struct ocfs2_dinode *) bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
- status = -EIO;
- goto bail;
- }
+
+ /* The bh was validated by the inode read inside
+ * ocfs2_inode_lock(). Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
+
if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
(unsigned long long)le64_to_cpu(fe->i_blkno));
@@ -790,10 +879,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
int offset, start, found, status = 0;
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
- if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg);
- return -EIO;
- }
+ /* Callers got this descriptor from
+ * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
found = start = best_offset = best_size = 0;
bitmap = bg->bg_bitmap;
@@ -858,11 +946,9 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
mlog_entry_void();
- if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
- status = -EIO;
- goto bail;
- }
+ /* All callers get the descriptor via
+ * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
@@ -931,21 +1017,10 @@ static int ocfs2_relink_block_group(handle_t *handle,
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
- status = -EIO;
- goto out;
- }
- if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
- status = -EIO;
- goto out;
- }
- if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg);
- status = -EIO;
- goto out;
- }
+ /* The caller got these descriptors from
+ * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
+ BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
(unsigned long long)le64_to_cpu(fe->i_blkno), chain,
@@ -1008,7 +1083,7 @@ out_rollback:
bg->bg_next_group = cpu_to_le64(bg_ptr);
prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
}
-out:
+
mlog_exit(status);
return status;
}
@@ -1170,21 +1245,17 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
u16 found;
struct buffer_head *group_bh = NULL;
struct ocfs2_group_desc *gd;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
struct inode *alloc_inode = ac->ac_inode;
- ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh);
+ ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno,
+ &group_bh);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
gd = (struct ocfs2_group_desc *) group_bh->b_data;
- if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
- ret = -EIO;
- goto out;
- }
-
ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
ac->ac_max_block, bit_off, &found);
if (ret < 0) {
@@ -1241,19 +1312,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
bits_wanted, chain,
(unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
- status = ocfs2_read_block(alloc_inode,
- le64_to_cpu(cl->cl_recs[chain].c_blkno),
- &group_bh);
+ status = ocfs2_read_group_descriptor(alloc_inode, fe,
+ le64_to_cpu(cl->cl_recs[chain].c_blkno),
+ &group_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
bg = (struct ocfs2_group_desc *) group_bh->b_data;
- status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
- if (status) {
- mlog_errno(status);
- goto bail;
- }
status = -ENOSPC;
/* for now, the chain search is a bit simplistic. We just use
@@ -1271,18 +1337,13 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
next_group = le64_to_cpu(bg->bg_next_group);
prev_group_bh = group_bh;
group_bh = NULL;
- status = ocfs2_read_block(alloc_inode,
- next_group, &group_bh);
+ status = ocfs2_read_group_descriptor(alloc_inode, fe,
+ next_group, &group_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
bg = (struct ocfs2_group_desc *) group_bh->b_data;
- status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
- if (status) {
- mlog_errno(status);
- goto bail;
- }
}
if (status < 0) {
if (status != -ENOSPC)
@@ -1392,11 +1453,11 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
BUG_ON(!ac->ac_bh);
fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe);
- status = -EIO;
- goto bail;
- }
+
+ /* The bh was validated by the inode read during
+ * ocfs2_reserve_suballoc_bits(). Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
+
if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
le32_to_cpu(fe->id1.bitmap1.i_total)) {
ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
@@ -1725,11 +1786,9 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
mlog_entry_void();
- if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
- OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
- status = -EIO;
- goto bail;
- }
+ /* The caller got this descriptor from
+ * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
@@ -1782,29 +1841,26 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
mlog_entry_void();
- if (!OCFS2_IS_VALID_DINODE(fe)) {
- OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
- status = -EIO;
- goto bail;
- }
+ /* The alloc_bh comes from ocfs2_free_dinode() or
+ * ocfs2_free_clusters(). The callers have all locked the
+ * allocator and gotten alloc_bh from the lock call. This
+ * validates the dinode buffer. Any corruption that has happended
+ * is a code bug. */
+ BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
(unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
(unsigned long long)bg_blkno, start_bit);
- status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh);
+ status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
+ &group_bh);
if (status < 0) {
mlog_errno(status);
goto bail;
}
-
group = (struct ocfs2_group_desc *) group_bh->b_data;
- status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
- if (status) {
- mlog_errno(status);
- goto bail;
- }
+
BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
status = ocfs2_block_group_clear_bits(handle, alloc_inode,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 4df159d8f450..e3c13c77f9e8 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -164,10 +164,24 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
* and return that block offset. */
u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
-/* somewhat more expensive than our other checks, so use sparingly. */
+/*
+ * By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it
+ * finds a problem. A caller that wants to check a group descriptor
+ * without going readonly should read the block with ocfs2_read_block[s]()
+ * and then checking it with this function. This is only resize, really.
+ * Everyone else should be using ocfs2_read_group_descriptor().
+ */
int ocfs2_check_group_descriptor(struct super_block *sb,
struct ocfs2_dinode *di,
- struct ocfs2_group_desc *gd);
+ struct buffer_head *bh);
+/*
+ * Read a group descriptor block into *bh. If *bh is NULL, a bh will be
+ * allocated. This is a cached read. The descriptor will be validated with
+ * ocfs2_validate_group_descriptor().
+ */
+int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
+ u64 gd_blkno, struct buffer_head **bh);
+
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 304b63ac78cf..bc431386443e 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -41,6 +41,7 @@
#include <linux/debugfs.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
+#include <linux/quotaops.h>
#define MLOG_MASK_PREFIX ML_SUPER
#include <cluster/masklog.h>
@@ -65,10 +66,13 @@
#include "uptodate.h"
#include "ver.h"
#include "xattr.h"
+#include "quota.h"
#include "buffer_head_io.h"
static struct kmem_cache *ocfs2_inode_cachep = NULL;
+struct kmem_cache *ocfs2_dquot_cachep;
+struct kmem_cache *ocfs2_qf_chunk_cachep;
/* OCFS2 needs to schedule several differnt types of work which
* require cluster locking, disk I/O, recovery waits, etc. Since these
@@ -124,6 +128,9 @@ static int ocfs2_get_sector(struct super_block *sb,
static void ocfs2_write_super(struct super_block *sb);
static struct inode *ocfs2_alloc_inode(struct super_block *sb);
static void ocfs2_destroy_inode(struct inode *inode);
+static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
+static int ocfs2_enable_quotas(struct ocfs2_super *osb);
+static void ocfs2_disable_quotas(struct ocfs2_super *osb);
static const struct super_operations ocfs2_sops = {
.statfs = ocfs2_statfs,
@@ -137,6 +144,8 @@ static const struct super_operations ocfs2_sops = {
.put_super = ocfs2_put_super,
.remount_fs = ocfs2_remount,
.show_options = ocfs2_show_options,
+ .quota_read = ocfs2_quota_read,
+ .quota_write = ocfs2_quota_write,
};
enum {
@@ -158,6 +167,10 @@ enum {
Opt_user_xattr,
Opt_nouser_xattr,
Opt_inode64,
+ Opt_acl,
+ Opt_noacl,
+ Opt_usrquota,
+ Opt_grpquota,
Opt_err,
};
@@ -180,6 +193,10 @@ static const match_table_t tokens = {
{Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
{Opt_inode64, "inode64"},
+ {Opt_acl, "acl"},
+ {Opt_noacl, "noacl"},
+ {Opt_usrquota, "usrquota"},
+ {Opt_grpquota, "grpquota"},
{Opt_err, NULL}
};
@@ -221,6 +238,19 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait)
return 0;
}
+static int ocfs2_need_system_inode(struct ocfs2_super *osb, int ino)
+{
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
+ && (ino == USER_QUOTA_SYSTEM_INODE
+ || ino == LOCAL_USER_QUOTA_SYSTEM_INODE))
+ return 0;
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
+ && (ino == GROUP_QUOTA_SYSTEM_INODE
+ || ino == LOCAL_GROUP_QUOTA_SYSTEM_INODE))
+ return 0;
+ return 1;
+}
+
static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
{
struct inode *new = NULL;
@@ -247,6 +277,8 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
for (i = OCFS2_FIRST_ONLINE_SYSTEM_INODE;
i <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; i++) {
+ if (!ocfs2_need_system_inode(osb, i))
+ continue;
new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
if (!new) {
ocfs2_release_system_inodes(osb);
@@ -277,6 +309,8 @@ static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb)
for (i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1;
i < NUM_SYSTEM_INODES;
i++) {
+ if (!ocfs2_need_system_inode(osb, i))
+ continue;
new = ocfs2_get_system_file_inode(osb, i, osb->slot_num);
if (!new) {
ocfs2_release_system_inodes(osb);
@@ -426,6 +460,12 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
/* We're going to/from readonly mode. */
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
+ /* Disable quota accounting before remounting RO */
+ if (*flags & MS_RDONLY) {
+ ret = ocfs2_susp_quotas(osb, 0);
+ if (ret < 0)
+ goto out;
+ }
/* Lock here so the check of HARD_RO and the potential
* setting of SOFT_RO is atomic. */
spin_lock(&osb->osb_lock);
@@ -461,11 +501,28 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
}
unlock_osb:
spin_unlock(&osb->osb_lock);
+ /* Enable quota accounting after remounting RW */
+ if (!ret && !(*flags & MS_RDONLY)) {
+ if (sb_any_quota_suspended(sb))
+ ret = ocfs2_susp_quotas(osb, 1);
+ else
+ ret = ocfs2_enable_quotas(osb);
+ if (ret < 0) {
+ /* Return back changes... */
+ spin_lock(&osb->osb_lock);
+ sb->s_flags |= MS_RDONLY;
+ osb->osb_flags |= OCFS2_OSB_SOFT_RO;
+ spin_unlock(&osb->osb_lock);
+ goto out;
+ }
+ }
}
if (!ret) {
/* Only save off the new mount options in case of a successful
* remount. */
+ if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
+ parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
osb->s_mount_opt = parsed_options.mount_opt;
osb->s_atime_quantum = parsed_options.atime_quantum;
osb->preferred_slot = parsed_options.slot;
@@ -619,6 +676,131 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
return 0;
}
+static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend)
+{
+ int type;
+ struct super_block *sb = osb->sb;
+ unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
+ int status = 0;
+
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
+ continue;
+ if (unsuspend)
+ status = vfs_quota_enable(
+ sb_dqopt(sb)->files[type],
+ type, QFMT_OCFS2,
+ DQUOT_SUSPENDED);
+ else
+ status = vfs_quota_disable(sb, type,
+ DQUOT_SUSPENDED);
+ if (status < 0)
+ break;
+ }
+ if (status < 0)
+ mlog(ML_ERROR, "Failed to suspend/unsuspend quotas on "
+ "remount (error = %d).\n", status);
+ return status;
+}
+
+static int ocfs2_enable_quotas(struct ocfs2_super *osb)
+{
+ struct inode *inode[MAXQUOTAS] = { NULL, NULL };
+ struct super_block *sb = osb->sb;
+ unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
+ unsigned int ino[MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
+ LOCAL_GROUP_QUOTA_SYSTEM_INODE };
+ int status;
+ int type;
+
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE;
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
+ continue;
+ inode[type] = ocfs2_get_system_file_inode(osb, ino[type],
+ osb->slot_num);
+ if (!inode[type]) {
+ status = -ENOENT;
+ goto out_quota_off;
+ }
+ status = vfs_quota_enable(inode[type], type, QFMT_OCFS2,
+ DQUOT_USAGE_ENABLED);
+ if (status < 0)
+ goto out_quota_off;
+ }
+
+ for (type = 0; type < MAXQUOTAS; type++)
+ iput(inode[type]);
+ return 0;
+out_quota_off:
+ ocfs2_disable_quotas(osb);
+ for (type = 0; type < MAXQUOTAS; type++)
+ iput(inode[type]);
+ mlog_errno(status);
+ return status;
+}
+
+static void ocfs2_disable_quotas(struct ocfs2_super *osb)
+{
+ int type;
+ struct inode *inode;
+ struct super_block *sb = osb->sb;
+
+ /* We mostly ignore errors in this function because there's not much
+ * we can do when we see them */
+ for (type = 0; type < MAXQUOTAS; type++) {
+ if (!sb_has_quota_loaded(sb, type))
+ continue;
+ inode = igrab(sb->s_dquot.files[type]);
+ /* Turn off quotas. This will remove all dquot structures from
+ * memory and so they will be automatically synced to global
+ * quota files */
+ vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED);
+ if (!inode)
+ continue;
+ iput(inode);
+ }
+}
+
+/* Handle quota on quotactl */
+static int ocfs2_quota_on(struct super_block *sb, int type, int format_id,
+ char *path, int remount)
+{
+ unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
+
+ if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
+ return -EINVAL;
+
+ if (remount)
+ return 0; /* Just ignore it has been handled in
+ * ocfs2_remount() */
+ return vfs_quota_enable(sb_dqopt(sb)->files[type], type,
+ format_id, DQUOT_LIMITS_ENABLED);
+}
+
+/* Handle quota off quotactl */
+static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
+{
+ if (remount)
+ return 0; /* Ignore now and handle later in
+ * ocfs2_remount() */
+ return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
+}
+
+static struct quotactl_ops ocfs2_quotactl_ops = {
+ .quota_on = ocfs2_quota_on,
+ .quota_off = ocfs2_quota_off,
+ .quota_sync = vfs_quota_sync,
+ .get_info = vfs_get_dqinfo,
+ .set_info = vfs_set_dqinfo,
+ .get_dqblk = vfs_get_dqblk,
+ .set_dqblk = vfs_set_dqblk,
+};
+
static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
{
struct dentry *root;
@@ -651,12 +833,32 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
}
brelse(bh);
bh = NULL;
+
+ if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
+ parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
+
osb->s_mount_opt = parsed_options.mount_opt;
osb->s_atime_quantum = parsed_options.atime_quantum;
osb->preferred_slot = parsed_options.slot;
osb->osb_commit_interval = parsed_options.commit_interval;
osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
osb->local_alloc_bits = osb->local_alloc_default_bits;
+ if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA &&
+ !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
+ status = -EINVAL;
+ mlog(ML_ERROR, "User quotas were requested, but this "
+ "filesystem does not have the feature enabled.\n");
+ goto read_super_error;
+ }
+ if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA &&
+ !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
+ status = -EINVAL;
+ mlog(ML_ERROR, "Group quotas were requested, but this "
+ "filesystem does not have the feature enabled.\n");
+ goto read_super_error;
+ }
status = ocfs2_verify_userspace_stack(osb, &parsed_options);
if (status)
@@ -664,6 +866,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
sb->s_magic = OCFS2_SUPER_MAGIC;
+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+
/* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
* heartbeat=none */
if (bdev_read_only(sb->s_bdev)) {
@@ -758,6 +963,28 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
atomic_set(&osb->vol_state, VOLUME_MOUNTED);
wake_up(&osb->osb_mount_event);
+ /* Now we can initialize quotas because we can afford to wait
+ * for cluster locks recovery now. That also means that truncation
+ * log recovery can happen but that waits for proper quota setup */
+ if (!(sb->s_flags & MS_RDONLY)) {
+ status = ocfs2_enable_quotas(osb);
+ if (status < 0) {
+ /* We have to err-out specially here because
+ * s_root is already set */
+ mlog_errno(status);
+ atomic_set(&osb->vol_state, VOLUME_DISABLED);
+ wake_up(&osb->osb_mount_event);
+ mlog_exit(status);
+ return status;
+ }
+ }
+
+ ocfs2_complete_quota_recovery(osb);
+
+ /* Now we wake up again for processes waiting for quotas */
+ atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS);
+ wake_up(&osb->osb_mount_event);
+
mlog_exit(status);
return status;
@@ -945,6 +1172,41 @@ static int ocfs2_parse_options(struct super_block *sb,
case Opt_inode64:
mopt->mount_opt |= OCFS2_MOUNT_INODE64;
break;
+ case Opt_usrquota:
+ /* We check only on remount, otherwise features
+ * aren't yet initialized. */
+ if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
+ mlog(ML_ERROR, "User quota requested but "
+ "filesystem feature is not set\n");
+ status = 0;
+ goto bail;
+ }
+ mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
+ break;
+ case Opt_grpquota:
+ if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
+ mlog(ML_ERROR, "Group quota requested but "
+ "filesystem feature is not set\n");
+ status = 0;
+ goto bail;
+ }
+ mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
+ break;
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+ case Opt_acl:
+ mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
+ break;
+ case Opt_noacl:
+ mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
+ break;
+#else
+ case Opt_acl:
+ case Opt_noacl:
+ printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
+ break;
+#endif
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
@@ -1008,6 +1270,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
if (osb->osb_cluster_stack[0])
seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
osb->osb_cluster_stack);
+ if (opts & OCFS2_MOUNT_USRQUOTA)
+ seq_printf(s, ",usrquota");
+ if (opts & OCFS2_MOUNT_GRPQUOTA)
+ seq_printf(s, ",grpquota");
if (opts & OCFS2_MOUNT_NOUSERXATTR)
seq_printf(s, ",nouser_xattr");
@@ -1017,6 +1283,13 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
if (opts & OCFS2_MOUNT_INODE64)
seq_printf(s, ",inode64");
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+ if (opts & OCFS2_MOUNT_POSIX_ACL)
+ seq_printf(s, ",acl");
+ else
+ seq_printf(s, ",noacl");
+#endif
+
return 0;
}
@@ -1054,6 +1327,7 @@ static int __init ocfs2_init(void)
ocfs2_set_locking_protocol();
+ status = register_quota_format(&ocfs2_quota_format);
leave:
if (status < 0) {
ocfs2_free_mem_caches();
@@ -1077,6 +1351,8 @@ static void __exit ocfs2_exit(void)
destroy_workqueue(ocfs2_wq);
}
+ unregister_quota_format(&ocfs2_quota_format);
+
debugfs_remove(ocfs2_debugfs_root);
ocfs2_free_mem_caches();
@@ -1192,8 +1468,27 @@ static int ocfs2_initialize_mem_caches(void)
(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD),
ocfs2_inode_init_once);
- if (!ocfs2_inode_cachep)
+ ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache",
+ sizeof(struct ocfs2_dquot),
+ 0,
+ (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD),
+ NULL);
+ ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache",
+ sizeof(struct ocfs2_quota_chunk),
+ 0,
+ (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+ NULL);
+ if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep ||
+ !ocfs2_qf_chunk_cachep) {
+ if (ocfs2_inode_cachep)
+ kmem_cache_destroy(ocfs2_inode_cachep);
+ if (ocfs2_dquot_cachep)
+ kmem_cache_destroy(ocfs2_dquot_cachep);
+ if (ocfs2_qf_chunk_cachep)
+ kmem_cache_destroy(ocfs2_qf_chunk_cachep);
return -ENOMEM;
+ }
return 0;
}
@@ -1202,8 +1497,15 @@ static void ocfs2_free_mem_caches(void)
{
if (ocfs2_inode_cachep)
kmem_cache_destroy(ocfs2_inode_cachep);
-
ocfs2_inode_cachep = NULL;
+
+ if (ocfs2_dquot_cachep)
+ kmem_cache_destroy(ocfs2_dquot_cachep);
+ ocfs2_dquot_cachep = NULL;
+
+ if (ocfs2_qf_chunk_cachep)
+ kmem_cache_destroy(ocfs2_qf_chunk_cachep);
+ ocfs2_qf_chunk_cachep = NULL;
}
static int ocfs2_get_sector(struct super_block *sb,
@@ -1303,6 +1605,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
osb = OCFS2_SB(sb);
BUG_ON(!osb);
+ ocfs2_disable_quotas(osb);
+
ocfs2_shutdown_local_alloc(osb);
ocfs2_truncate_log_shutdown(osb);
@@ -1413,6 +1717,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
sb->s_fs_info = osb;
sb->s_op = &ocfs2_sops;
sb->s_export_op = &ocfs2_export_ops;
+ sb->s_qcop = &ocfs2_quotactl_ops;
+ sb->dq_op = &ocfs2_quota_operations;
sb->s_xattr = ocfs2_xattr_handlers;
sb->s_time_gran = 1;
sb->s_flags |= MS_NOATIME;
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index cbd03dfdc7b9..ed0a0cfd68d2 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -84,7 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
mlog_entry_void();
- status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh);
+ status = ocfs2_read_inode_block(inode, bh);
if (status < 0) {
mlog_errno(status);
link = ERR_PTR(status);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 054e2efb0b7e..7e0d62ac441b 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -35,6 +35,7 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
+#include <linux/security.h>
#define MLOG_MASK_PREFIX ML_XATTR
#include <cluster/masklog.h>
@@ -61,12 +62,32 @@ struct ocfs2_xattr_def_value_root {
};
struct ocfs2_xattr_bucket {
- struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
- struct ocfs2_xattr_header *xh;
+ /* The inode these xattrs are associated with */
+ struct inode *bu_inode;
+
+ /* The actual buffers that make up the bucket */
+ struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
+
+ /* How many blocks make up one bucket for this filesystem */
+ int bu_blocks;
+};
+
+struct ocfs2_xattr_set_ctxt {
+ handle_t *handle;
+ struct ocfs2_alloc_context *meta_ac;
+ struct ocfs2_alloc_context *data_ac;
+ struct ocfs2_cached_dealloc_ctxt dealloc;
};
#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
#define OCFS2_XATTR_INLINE_SIZE 80
+#define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \
+ - sizeof(struct ocfs2_xattr_header) \
+ - sizeof(__u32))
+#define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \
+ - sizeof(struct ocfs2_xattr_block) \
+ - sizeof(struct ocfs2_xattr_header) \
+ - sizeof(__u32))
static struct ocfs2_xattr_def_value_root def_xv = {
.xv.xr_list.l_count = cpu_to_le16(1),
@@ -74,13 +95,25 @@ static struct ocfs2_xattr_def_value_root def_xv = {
struct xattr_handler *ocfs2_xattr_handlers[] = {
&ocfs2_xattr_user_handler,
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+ &ocfs2_xattr_acl_access_handler,
+ &ocfs2_xattr_acl_default_handler,
+#endif
&ocfs2_xattr_trusted_handler,
+ &ocfs2_xattr_security_handler,
NULL
};
static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
[OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+ [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
+ = &ocfs2_xattr_acl_access_handler,
+ [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
+ = &ocfs2_xattr_acl_default_handler,
+#endif
[OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
+ [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler,
};
struct ocfs2_xattr_info {
@@ -98,7 +131,7 @@ struct ocfs2_xattr_search {
*/
struct buffer_head *xattr_bh;
struct ocfs2_xattr_header *header;
- struct ocfs2_xattr_bucket bucket;
+ struct ocfs2_xattr_bucket *bucket;
void *base;
void *end;
struct ocfs2_xattr_entry *here;
@@ -127,11 +160,13 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
size_t buffer_size);
static int ocfs2_xattr_create_index_block(struct inode *inode,
- struct ocfs2_xattr_search *xs);
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt);
static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
struct ocfs2_xattr_info *xi,
- struct ocfs2_xattr_search *xs);
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt);
static int ocfs2_delete_xattr_index_block(struct inode *inode,
struct buffer_head *xb_bh);
@@ -154,6 +189,187 @@ static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
return len / sizeof(struct ocfs2_xattr_entry);
}
+#define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
+#define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
+#define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
+
+static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
+{
+ struct ocfs2_xattr_bucket *bucket;
+ int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
+ BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
+
+ bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
+ if (bucket) {
+ bucket->bu_inode = inode;
+ bucket->bu_blocks = blks;
+ }
+
+ return bucket;
+}
+
+static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
+{
+ int i;
+
+ for (i = 0; i < bucket->bu_blocks; i++) {
+ brelse(bucket->bu_bhs[i]);
+ bucket->bu_bhs[i] = NULL;
+ }
+}
+
+static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
+{
+ if (bucket) {
+ ocfs2_xattr_bucket_relse(bucket);
+ bucket->bu_inode = NULL;
+ kfree(bucket);
+ }
+}
+
+/*
+ * A bucket that has never been written to disk doesn't need to be
+ * read. We just need the buffer_heads. Don't call this for
+ * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes
+ * them fully.
+ */
+static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
+ u64 xb_blkno)
+{
+ int i, rc = 0;
+
+ for (i = 0; i < bucket->bu_blocks; i++) {
+ bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
+ xb_blkno + i);
+ if (!bucket->bu_bhs[i]) {
+ rc = -EIO;
+ mlog_errno(rc);
+ break;
+ }
+
+ if (!ocfs2_buffer_uptodate(bucket->bu_inode,
+ bucket->bu_bhs[i]))
+ ocfs2_set_new_buffer_uptodate(bucket->bu_inode,
+ bucket->bu_bhs[i]);
+ }
+
+ if (rc)
+ ocfs2_xattr_bucket_relse(bucket);
+ return rc;
+}
+
+/* Read the xattr bucket at xb_blkno */
+static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
+ u64 xb_blkno)
+{
+ int rc;
+
+ rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
+ bucket->bu_blocks, bucket->bu_bhs, 0,
+ NULL);
+ if (rc)
+ ocfs2_xattr_bucket_relse(bucket);
+ return rc;
+}
+
+static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
+ struct ocfs2_xattr_bucket *bucket,
+ int type)
+{
+ int i, rc = 0;
+
+ for (i = 0; i < bucket->bu_blocks; i++) {
+ rc = ocfs2_journal_access(handle, bucket->bu_inode,
+ bucket->bu_bhs[i], type);
+ if (rc) {
+ mlog_errno(rc);
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
+ struct ocfs2_xattr_bucket *bucket)
+{
+ int i;
+
+ for (i = 0; i < bucket->bu_blocks; i++)
+ ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
+}
+
+static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
+ struct ocfs2_xattr_bucket *src)
+{
+ int i;
+ int blocksize = src->bu_inode->i_sb->s_blocksize;
+
+ BUG_ON(dest->bu_blocks != src->bu_blocks);
+ BUG_ON(dest->bu_inode != src->bu_inode);
+
+ for (i = 0; i < src->bu_blocks; i++) {
+ memcpy(bucket_block(dest, i), bucket_block(src, i),
+ blocksize);
+ }
+}
+
+static int ocfs2_validate_xattr_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ struct ocfs2_xattr_block *xb =
+ (struct ocfs2_xattr_block *)bh->b_data;
+
+ mlog(0, "Validating xattr block %llu\n",
+ (unsigned long long)bh->b_blocknr);
+
+ if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
+ ocfs2_error(sb,
+ "Extended attribute block #%llu has bad "
+ "signature %.*s",
+ (unsigned long long)bh->b_blocknr, 7,
+ xb->xb_signature);
+ return -EINVAL;
+ }
+
+ if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
+ ocfs2_error(sb,
+ "Extended attribute block #%llu has an "
+ "invalid xb_blkno of %llu",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(xb->xb_blkno));
+ return -EINVAL;
+ }
+
+ if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
+ ocfs2_error(sb,
+ "Extended attribute block #%llu has an invalid "
+ "xb_fs_generation of #%u",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(xb->xb_fs_generation));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
+ struct buffer_head **bh)
+{
+ int rc;
+ struct buffer_head *tmp = *bh;
+
+ rc = ocfs2_read_block(inode, xb_blkno, &tmp,
+ ocfs2_validate_xattr_block);
+
+ /* If ocfs2_read_block() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc;
+}
+
static inline const char *ocfs2_xattr_prefix(int name_index)
{
struct xattr_handler *handler = NULL;
@@ -200,17 +416,135 @@ static void ocfs2_xattr_hash_entry(struct inode *inode,
return;
}
+static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
+{
+ int size = 0;
+
+ if (value_len <= OCFS2_XATTR_INLINE_SIZE)
+ size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
+ else
+ size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
+ size += sizeof(struct ocfs2_xattr_entry);
+
+ return size;
+}
+
+int ocfs2_calc_security_init(struct inode *dir,
+ struct ocfs2_security_xattr_info *si,
+ int *want_clusters,
+ int *xattr_credits,
+ struct ocfs2_alloc_context **xattr_ac)
+{
+ int ret = 0;
+ struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+ int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
+ si->value_len);
+
+ /*
+ * The max space of security xattr taken inline is
+ * 256(name) + 80(value) + 16(entry) = 352 bytes,
+ * So reserve one metadata block for it is ok.
+ */
+ if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
+ s_size > OCFS2_XATTR_FREE_IN_IBODY) {
+ ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
+ if (ret) {
+ mlog_errno(ret);
+ return ret;
+ }
+ *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
+ }
+
+ /* reserve clusters for xattr value which will be set in B tree*/
+ if (si->value_len > OCFS2_XATTR_INLINE_SIZE)
+ *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
+ si->value_len);
+ return ret;
+}
+
+int ocfs2_calc_xattr_init(struct inode *dir,
+ struct buffer_head *dir_bh,
+ int mode,
+ struct ocfs2_security_xattr_info *si,
+ int *want_clusters,
+ int *xattr_credits,
+ struct ocfs2_alloc_context **xattr_ac)
+{
+ int ret = 0;
+ struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+ int s_size = 0;
+ int a_size = 0;
+ int acl_len = 0;
+
+ if (si->enable)
+ s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
+ si->value_len);
+
+ if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
+ acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
+ OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
+ "", NULL, 0);
+ if (acl_len > 0) {
+ a_size = ocfs2_xattr_entry_real_size(0, acl_len);
+ if (S_ISDIR(mode))
+ a_size <<= 1;
+ } else if (acl_len != 0 && acl_len != -ENODATA) {
+ mlog_errno(ret);
+ return ret;
+ }
+ }
+
+ if (!(s_size + a_size))
+ return ret;
+
+ /*
+ * The max space of security xattr taken inline is
+ * 256(name) + 80(value) + 16(entry) = 352 bytes,
+ * The max space of acl xattr taken inline is
+ * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
+ * when blocksize = 512, may reserve one more cluser for
+ * xattr bucket, otherwise reserve one metadata block
+ * for them is ok.
+ */
+ if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
+ (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
+ ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
+ if (ret) {
+ mlog_errno(ret);
+ return ret;
+ }
+ *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
+ }
+
+ if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
+ (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
+ *want_clusters += 1;
+ *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
+ }
+
+ /* reserve clusters for xattr value which will be set in B tree*/
+ if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE)
+ *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
+ si->value_len);
+ if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
+ acl_len > OCFS2_XATTR_INLINE_SIZE) {
+ *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
+ if (S_ISDIR(mode))
+ *want_clusters += ocfs2_clusters_for_bytes(dir->i_sb,
+ acl_len);
+ }
+
+ return ret;
+}
+
static int ocfs2_xattr_extend_allocation(struct inode *inode,
u32 clusters_to_add,
struct buffer_head *xattr_bh,
- struct ocfs2_xattr_value_root *xv)
+ struct ocfs2_xattr_value_root *xv,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int status = 0;
- int restart_func = 0;
- int credits = 0;
- handle_t *handle = NULL;
- struct ocfs2_alloc_context *data_ac = NULL;
- struct ocfs2_alloc_context *meta_ac = NULL;
+ handle_t *handle = ctxt->handle;
enum ocfs2_alloc_restarted why;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
@@ -220,26 +554,6 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);
-restart_all:
-
- status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
- &data_ac, &meta_ac);
- if (status) {
- mlog_errno(status);
- goto leave;
- }
-
- credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
- clusters_to_add);
- handle = ocfs2_start_trans(osb, credits);
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- handle = NULL;
- mlog_errno(status);
- goto leave;
- }
-
-restarted_transaction:
status = ocfs2_journal_access(handle, inode, xattr_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
@@ -255,12 +569,11 @@ restarted_transaction:
0,
&et,
handle,
- data_ac,
- meta_ac,
+ ctxt->data_ac,
+ ctxt->meta_ac,
&why);
- if ((status < 0) && (status != -EAGAIN)) {
- if (status != -ENOSPC)
- mlog_errno(status);
+ if (status < 0) {
+ mlog_errno(status);
goto leave;
}
@@ -272,47 +585,13 @@ restarted_transaction:
clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
- if (why != RESTART_NONE && clusters_to_add) {
- if (why == RESTART_META) {
- mlog(0, "restarting function.\n");
- restart_func = 1;
- } else {
- BUG_ON(why != RESTART_TRANS);
-
- mlog(0, "restarting transaction.\n");
- /* TODO: This can be more intelligent. */
- credits = ocfs2_calc_extend_credits(osb->sb,
- et.et_root_el,
- clusters_to_add);
- status = ocfs2_extend_trans(handle, credits);
- if (status < 0) {
- /* handle still has to be committed at
- * this point. */
- status = -ENOMEM;
- mlog_errno(status);
- goto leave;
- }
- goto restarted_transaction;
- }
- }
+ /*
+ * We should have already allocated enough space before the transaction,
+ * so no need to restart.
+ */
+ BUG_ON(why != RESTART_NONE || clusters_to_add);
leave:
- if (handle) {
- ocfs2_commit_trans(osb, handle);
- handle = NULL;
- }
- if (data_ac) {
- ocfs2_free_alloc_context(data_ac);
- data_ac = NULL;
- }
- if (meta_ac) {
- ocfs2_free_alloc_context(meta_ac);
- meta_ac = NULL;
- }
- if ((!status) && restart_func) {
- restart_func = 0;
- goto restart_all;
- }
return status;
}
@@ -321,53 +600,27 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
struct buffer_head *root_bh,
struct ocfs2_xattr_value_root *xv,
u32 cpos, u32 phys_cpos, u32 len,
- struct ocfs2_cached_dealloc_ctxt *dealloc)
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int ret;
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct inode *tl_inode = osb->osb_tl_inode;
- handle_t *handle;
- struct ocfs2_alloc_context *meta_ac = NULL;
+ handle_t *handle = ctxt->handle;
struct ocfs2_extent_tree et;
ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
- ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
- if (ret) {
- mlog_errno(ret);
- return ret;
- }
-
- mutex_lock(&tl_inode->i_mutex);
-
- if (ocfs2_truncate_log_needs_flush(osb)) {
- ret = __ocfs2_flush_truncate_log(osb);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
- }
-
- handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out;
- }
-
ret = ocfs2_journal_access(handle, inode, root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
- ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
- dealloc);
+ ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac,
+ &ctxt->dealloc);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
le32_add_cpu(&xv->xr_clusters, -len);
@@ -375,21 +628,14 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
ret = ocfs2_journal_dirty(handle, root_bh);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
- ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
+ ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len);
if (ret)
mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(osb, handle);
out:
- mutex_unlock(&tl_inode->i_mutex);
-
- if (meta_ac)
- ocfs2_free_alloc_context(meta_ac);
-
return ret;
}
@@ -397,15 +643,12 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
u32 old_clusters,
u32 new_clusters,
struct buffer_head *root_bh,
- struct ocfs2_xattr_value_root *xv)
+ struct ocfs2_xattr_value_root *xv,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int ret = 0;
u32 trunc_len, cpos, phys_cpos, alloc_size;
u64 block;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct ocfs2_cached_dealloc_ctxt dealloc;
-
- ocfs2_init_dealloc_ctxt(&dealloc);
if (old_clusters <= new_clusters)
return 0;
@@ -425,7 +668,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
phys_cpos, alloc_size,
- &dealloc);
+ ctxt);
if (ret) {
mlog_errno(ret);
goto out;
@@ -439,16 +682,14 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
}
out:
- ocfs2_schedule_truncate_log_flush(osb, 1);
- ocfs2_run_deallocs(osb, &dealloc);
-
return ret;
}
static int ocfs2_xattr_value_truncate(struct inode *inode,
struct buffer_head *root_bh,
struct ocfs2_xattr_value_root *xv,
- int len)
+ int len,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int ret;
u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
@@ -460,11 +701,11 @@ static int ocfs2_xattr_value_truncate(struct inode *inode,
if (new_clusters > old_clusters)
ret = ocfs2_xattr_extend_allocation(inode,
new_clusters - old_clusters,
- root_bh, xv);
+ root_bh, xv, ctxt);
else
ret = ocfs2_xattr_shrink_size(inode,
old_clusters, new_clusters,
- root_bh, xv);
+ root_bh, xv, ctxt);
return ret;
}
@@ -554,18 +795,14 @@ static int ocfs2_xattr_block_list(struct inode *inode,
if (!di->i_xattr_loc)
return ret;
- ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
+ ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
+ &blk_bh);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
- if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
- ret = -EIO;
- goto cleanup;
- }
-
if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
ret = ocfs2_xattr_list_entries(inode, header,
@@ -575,7 +812,7 @@ static int ocfs2_xattr_block_list(struct inode *inode,
ret = ocfs2_xattr_tree_list_index_block(inode, xt,
buffer, buffer_size);
}
-cleanup:
+
brelse(blk_bh);
return ret;
@@ -685,7 +922,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
/* Copy ocfs2_xattr_value */
for (i = 0; i < num_clusters * bpc; i++, blkno++) {
- ret = ocfs2_read_block(inode, blkno, &bh);
+ ret = ocfs2_read_block(inode, blkno, &bh, NULL);
if (ret) {
mlog_errno(ret);
goto out;
@@ -769,7 +1006,12 @@ static int ocfs2_xattr_block_get(struct inode *inode,
size_t size;
int ret = -ENODATA, name_offset, name_len, block_off, i;
- memset(&xs->bucket, 0, sizeof(xs->bucket));
+ xs->bucket = ocfs2_xattr_bucket_new(inode);
+ if (!xs->bucket) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto cleanup;
+ }
ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
if (ret) {
@@ -795,11 +1037,11 @@ static int ocfs2_xattr_block_get(struct inode *inode,
if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
ret = ocfs2_xattr_bucket_get_name_value(inode,
- xs->bucket.xh,
+ bucket_xh(xs->bucket),
i,
&block_off,
&name_offset);
- xs->base = xs->bucket.bhs[block_off]->b_data;
+ xs->base = bucket_block(xs->bucket, block_off);
}
if (ocfs2_xattr_is_local(xs->here)) {
memcpy(buffer, (void *)xs->base +
@@ -817,21 +1059,15 @@ static int ocfs2_xattr_block_get(struct inode *inode,
}
ret = size;
cleanup:
- for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++)
- brelse(xs->bucket.bhs[i]);
- memset(&xs->bucket, 0, sizeof(xs->bucket));
+ ocfs2_xattr_bucket_free(xs->bucket);
brelse(xs->xattr_bh);
xs->xattr_bh = NULL;
return ret;
}
-/* ocfs2_xattr_get()
- *
- * Copy an extended attribute into the buffer provided.
- * Buffer is NULL to compute the size of buffer required.
- */
-static int ocfs2_xattr_get(struct inode *inode,
+int ocfs2_xattr_get_nolock(struct inode *inode,
+ struct buffer_head *di_bh,
int name_index,
const char *name,
void *buffer,
@@ -839,7 +1075,6 @@ static int ocfs2_xattr_get(struct inode *inode,
{
int ret;
struct ocfs2_dinode *di = NULL;
- struct buffer_head *di_bh = NULL;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_xattr_search xis = {
.not_found = -ENODATA,
@@ -854,11 +1089,6 @@ static int ocfs2_xattr_get(struct inode *inode,
if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
ret = -ENODATA;
- ret = ocfs2_inode_lock(inode, &di_bh, 0);
- if (ret < 0) {
- mlog_errno(ret);
- return ret;
- }
xis.inode_bh = xbs.inode_bh = di_bh;
di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -869,6 +1099,32 @@ static int ocfs2_xattr_get(struct inode *inode,
ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
buffer_size, &xbs);
up_read(&oi->ip_xattr_sem);
+
+ return ret;
+}
+
+/* ocfs2_xattr_get()
+ *
+ * Copy an extended attribute into the buffer provided.
+ * Buffer is NULL to compute the size of buffer required.
+ */
+static int ocfs2_xattr_get(struct inode *inode,
+ int name_index,
+ const char *name,
+ void *buffer,
+ size_t buffer_size)
+{
+ int ret;
+ struct buffer_head *di_bh = NULL;
+
+ ret = ocfs2_inode_lock(inode, &di_bh, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ return ret;
+ }
+ ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
+ name, buffer, buffer_size);
+
ocfs2_inode_unlock(inode, 0);
brelse(di_bh);
@@ -877,6 +1133,7 @@ static int ocfs2_xattr_get(struct inode *inode,
}
static int __ocfs2_xattr_set_value_outside(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_value_root *xv,
const void *value,
int value_len)
@@ -888,14 +1145,17 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
u64 blkno;
struct buffer_head *bh = NULL;
- handle_t *handle;
BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
+ /*
+ * In __ocfs2_xattr_set_value_outside has already been dirtied,
+ * so we don't need to worry about whether ocfs2_extend_trans
+ * will create a new transactio for us or not.
+ */
credits = clusters * bpc;
- handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
+ ret = ocfs2_extend_trans(handle, credits);
+ if (ret) {
mlog_errno(ret);
goto out;
}
@@ -905,16 +1165,16 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
&num_clusters, &xv->xr_list);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
for (i = 0; i < num_clusters * bpc; i++, blkno++) {
- ret = ocfs2_read_block(inode, blkno, &bh);
+ ret = ocfs2_read_block(inode, blkno, &bh, NULL);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
ret = ocfs2_journal_access(handle,
@@ -923,7 +1183,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
cp_len = value_len > blocksize ? blocksize : value_len;
@@ -937,7 +1197,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
ret = ocfs2_journal_dirty(handle, bh);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
brelse(bh);
bh = NULL;
@@ -951,8 +1211,6 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
}
cpos += num_clusters;
}
-out_commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:
brelse(bh);
@@ -960,28 +1218,21 @@ out:
}
static int ocfs2_xattr_cleanup(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_info *xi,
struct ocfs2_xattr_search *xs,
size_t offs)
{
- handle_t *handle = NULL;
int ret = 0;
size_t name_len = strlen(xi->name);
void *val = xs->base + offs;
size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
- handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
- OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out;
- }
ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
/* Decrease xattr count */
le16_add_cpu(&xs->header->xh_count, -1);
@@ -992,32 +1243,23 @@ static int ocfs2_xattr_cleanup(struct inode *inode,
ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
if (ret < 0)
mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:
return ret;
}
static int ocfs2_xattr_update_entry(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_info *xi,
struct ocfs2_xattr_search *xs,
size_t offs)
{
- handle_t *handle = NULL;
- int ret = 0;
+ int ret;
- handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
- OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out;
- }
ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
xs->here->xe_name_offset = cpu_to_le16(offs);
@@ -1031,8 +1273,6 @@ static int ocfs2_xattr_update_entry(struct inode *inode,
ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
if (ret < 0)
mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:
return ret;
}
@@ -1045,6 +1285,7 @@ out:
static int ocfs2_xattr_set_value_outside(struct inode *inode,
struct ocfs2_xattr_info *xi,
struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt,
size_t offs)
{
size_t name_len = strlen(xi->name);
@@ -1064,18 +1305,18 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
xv->xr_list.l_next_free_rec = 0;
ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
- xi->value_len);
+ xi->value_len, ctxt);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
- ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value,
- xi->value_len);
+ ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, offs);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
- ret = ocfs2_xattr_update_entry(inode, xi, xs, offs);
+ ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, xv,
+ xi->value, xi->value_len);
if (ret < 0)
mlog_errno(ret);
@@ -1195,6 +1436,7 @@ static void ocfs2_xattr_set_entry_local(struct inode *inode,
static int ocfs2_xattr_set_entry(struct inode *inode,
struct ocfs2_xattr_info *xi,
struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt,
int flag)
{
struct ocfs2_xattr_entry *last;
@@ -1202,7 +1444,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
size_t size_l = 0;
- handle_t *handle = NULL;
+ handle_t *handle = ctxt->handle;
int free, i, ret;
struct ocfs2_xattr_info xi_l = {
.name_index = xi->name_index,
@@ -1265,7 +1507,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
/* Replace existing local xattr with tree root */
ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
- offs);
+ ctxt, offs);
if (ret < 0)
mlog_errno(ret);
goto out;
@@ -1284,25 +1526,28 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
ret = ocfs2_xattr_value_truncate(inode,
xs->xattr_bh,
xv,
- xi->value_len);
+ xi->value_len,
+ ctxt);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
- ret = __ocfs2_xattr_set_value_outside(inode,
- xv,
- xi->value,
- xi->value_len);
+ ret = ocfs2_xattr_update_entry(inode,
+ handle,
+ xi,
+ xs,
+ offs);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
- ret = ocfs2_xattr_update_entry(inode,
- xi,
- xs,
- offs);
+ ret = __ocfs2_xattr_set_value_outside(inode,
+ handle,
+ xv,
+ xi->value,
+ xi->value_len);
if (ret < 0)
mlog_errno(ret);
goto out;
@@ -1312,44 +1557,29 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
* just trucate old value to zero.
*/
ret = ocfs2_xattr_value_truncate(inode,
- xs->xattr_bh,
- xv,
- 0);
+ xs->xattr_bh,
+ xv,
+ 0,
+ ctxt);
if (ret < 0)
mlog_errno(ret);
}
}
}
- handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
- OCFS2_INODE_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out;
- }
-
ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
if (!(flag & OCFS2_INLINE_XATTR_FL)) {
- /* set extended attribute in external block. */
- ret = ocfs2_extend_trans(handle,
- OCFS2_INODE_UPDATE_CREDITS +
- OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
}
@@ -1363,7 +1593,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
}
@@ -1400,16 +1630,13 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
if (ret < 0)
mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
-
if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
/*
* Set value outside in B tree.
* This is the second step for value size > INLINE_SIZE.
*/
size_t offs = le16_to_cpu(xs->here->xe_name_offset);
- ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs);
+ ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt, offs);
if (ret < 0) {
int ret2;
@@ -1418,14 +1645,14 @@ out_commit:
* If set value outside failed, we have to clean
* the junk tree root we have already set in local.
*/
- ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs);
+ ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
+ xi, xs, offs);
if (ret2 < 0)
mlog_errno(ret2);
}
}
out:
return ret;
-
}
static int ocfs2_remove_value_outside(struct inode*inode,
@@ -1433,6 +1660,18 @@ static int ocfs2_remove_value_outside(struct inode*inode,
struct ocfs2_xattr_header *header)
{
int ret = 0, i;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
+
+ ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
+
+ ctxt.handle = ocfs2_start_trans(osb,
+ ocfs2_remove_extent_credits(osb->sb));
+ if (IS_ERR(ctxt.handle)) {
+ ret = PTR_ERR(ctxt.handle);
+ mlog_errno(ret);
+ goto out;
+ }
for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
@@ -1445,14 +1684,19 @@ static int ocfs2_remove_value_outside(struct inode*inode,
le16_to_cpu(entry->xe_name_offset);
xv = (struct ocfs2_xattr_value_root *)
(val + OCFS2_XATTR_SIZE(entry->xe_name_len));
- ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0);
+ ret = ocfs2_xattr_value_truncate(inode, bh, xv,
+ 0, &ctxt);
if (ret < 0) {
mlog_errno(ret);
- return ret;
+ break;
}
}
}
+ ocfs2_commit_trans(osb, ctxt.handle);
+ ocfs2_schedule_truncate_log_flush(osb, 1);
+ ocfs2_run_deallocs(osb, &ctxt.dealloc);
+out:
return ret;
}
@@ -1502,24 +1746,19 @@ static int ocfs2_xattr_free_block(struct inode *inode,
u64 blk, bg_blkno;
u16 bit;
- ret = ocfs2_read_block(inode, block, &blk_bh);
+ ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
- xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
- if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
- ret = -EIO;
- goto out;
- }
-
ret = ocfs2_xattr_block_remove(inode, blk_bh);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
+ xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
blk = le64_to_cpu(xb->xb_blkno);
bit = le16_to_cpu(xb->xb_suballoc_bit);
bg_blkno = ocfs2_which_suballoc_group(blk, bit);
@@ -1714,7 +1953,8 @@ static int ocfs2_xattr_ibody_find(struct inode *inode,
*/
static int ocfs2_xattr_ibody_set(struct inode *inode,
struct ocfs2_xattr_info *xi,
- struct ocfs2_xattr_search *xs)
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
@@ -1731,7 +1971,7 @@ static int ocfs2_xattr_ibody_set(struct inode *inode,
}
}
- ret = ocfs2_xattr_set_entry(inode, xi, xs,
+ ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
out:
up_write(&oi->ip_alloc_sem);
@@ -1758,19 +1998,15 @@ static int ocfs2_xattr_block_find(struct inode *inode,
if (!di->i_xattr_loc)
return ret;
- ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
+ ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
+ &blk_bh);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
- xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
- if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
- ret = -EIO;
- goto cleanup;
- }
-
xs->xattr_bh = blk_bh;
+ xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
xs->header = &xb->xb_attrs.xb_header;
@@ -1804,13 +2040,13 @@ cleanup:
*/
static int ocfs2_xattr_block_set(struct inode *inode,
struct ocfs2_xattr_info *xi,
- struct ocfs2_xattr_search *xs)
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
struct buffer_head *new_bh = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
- struct ocfs2_alloc_context *meta_ac = NULL;
- handle_t *handle = NULL;
+ handle_t *handle = ctxt->handle;
struct ocfs2_xattr_block *xblk = NULL;
u16 suballoc_bit_start;
u32 num_got;
@@ -1818,35 +2054,19 @@ static int ocfs2_xattr_block_set(struct inode *inode,
int ret;
if (!xs->xattr_bh) {
- /*
- * Alloc one external block for extended attribute
- * outside of inode.
- */
- ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
- handle = ocfs2_start_trans(osb,
- OCFS2_XATTR_BLOCK_CREATE_CREDITS);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out;
- }
ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
OCFS2_JOURNAL_ACCESS_CREATE);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto end;
}
- ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
+ ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
&suballoc_bit_start, &num_got,
&first_blkno);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto end;
}
new_bh = sb_getblk(inode->i_sb, first_blkno);
@@ -1856,7 +2076,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
OCFS2_JOURNAL_ACCESS_CREATE);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto end;
}
/* Initialize ocfs2_xattr_block */
@@ -1874,44 +2094,512 @@ static int ocfs2_xattr_block_set(struct inode *inode,
xs->end = (void *)xblk + inode->i_sb->s_blocksize;
xs->here = xs->header->xh_entries;
-
ret = ocfs2_journal_dirty(handle, new_bh);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto end;
}
di->i_xattr_loc = cpu_to_le64(first_blkno);
- ret = ocfs2_journal_dirty(handle, xs->inode_bh);
- if (ret < 0)
- mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(osb, handle);
-out:
- if (meta_ac)
- ocfs2_free_alloc_context(meta_ac);
- if (ret < 0)
- return ret;
+ ocfs2_journal_dirty(handle, xs->inode_bh);
} else
xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
/* Set extended attribute into external block */
- ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL);
+ ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
+ OCFS2_HAS_XATTR_FL);
if (!ret || ret != -ENOSPC)
goto end;
- ret = ocfs2_xattr_create_index_block(inode, xs);
+ ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
if (ret)
goto end;
}
- ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
+ ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
end:
return ret;
}
+/* Check whether the new xattr can be inserted into the inode. */
+static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
+ struct ocfs2_xattr_info *xi,
+ struct ocfs2_xattr_search *xs)
+{
+ u64 value_size;
+ struct ocfs2_xattr_entry *last;
+ int free, i;
+ size_t min_offs = xs->end - xs->base;
+
+ if (!xs->header)
+ return 0;
+
+ last = xs->header->xh_entries;
+
+ for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
+ size_t offs = le16_to_cpu(last->xe_name_offset);
+ if (offs < min_offs)
+ min_offs = offs;
+ last += 1;
+ }
+
+ free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
+ if (free < 0)
+ return 0;
+
+ BUG_ON(!xs->not_found);
+
+ if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
+ value_size = OCFS2_XATTR_ROOT_SIZE;
+ else
+ value_size = OCFS2_XATTR_SIZE(xi->value_len);
+
+ if (free >= sizeof(struct ocfs2_xattr_entry) +
+ OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
+ return 1;
+
+ return 0;
+}
+
+static int ocfs2_calc_xattr_set_need(struct inode *inode,
+ struct ocfs2_dinode *di,
+ struct ocfs2_xattr_info *xi,
+ struct ocfs2_xattr_search *xis,
+ struct ocfs2_xattr_search *xbs,
+ int *clusters_need,
+ int *meta_need,
+ int *credits_need)
+{
+ int ret = 0, old_in_xb = 0;
+ int clusters_add = 0, meta_add = 0, credits = 0;
+ struct buffer_head *bh = NULL;
+ struct ocfs2_xattr_block *xb = NULL;
+ struct ocfs2_xattr_entry *xe = NULL;
+ struct ocfs2_xattr_value_root *xv = NULL;
+ char *base = NULL;
+ int name_offset, name_len = 0;
+ u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
+ xi->value_len);
+ u64 value_size;
+
+ if (xis->not_found && xbs->not_found) {
+ credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+
+ if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
+ clusters_add += new_clusters;
+ credits += ocfs2_calc_extend_credits(inode->i_sb,
+ &def_xv.xv.xr_list,
+ new_clusters);
+ }
+
+ goto meta_guess;
+ }
+
+ if (!xis->not_found) {
+ xe = xis->here;
+ name_offset = le16_to_cpu(xe->xe_name_offset);
+ name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
+ base = xis->base;
+ credits += OCFS2_INODE_UPDATE_CREDITS;
+ } else {
+ int i, block_off = 0;
+ xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
+ xe = xbs->here;
+ name_offset = le16_to_cpu(xe->xe_name_offset);
+ name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
+ i = xbs->here - xbs->header->xh_entries;
+ old_in_xb = 1;
+
+ if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
+ ret = ocfs2_xattr_bucket_get_name_value(inode,
+ bucket_xh(xbs->bucket),
+ i, &block_off,
+ &name_offset);
+ base = bucket_block(xbs->bucket, block_off);
+ credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+ } else {
+ base = xbs->base;
+ credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
+ }
+ }
+
+ /*
+ * delete a xattr doesn't need metadata and cluster allocation.
+ * so just calculate the credits and return.
+ *
+ * The credits for removing the value tree will be extended
+ * by ocfs2_remove_extent itself.
+ */
+ if (!xi->value) {
+ if (!ocfs2_xattr_is_local(xe))
+ credits += ocfs2_remove_extent_credits(inode->i_sb);
+
+ goto out;
+ }
+
+ /* do cluster allocation guess first. */
+ value_size = le64_to_cpu(xe->xe_value_size);
+
+ if (old_in_xb) {
+ /*
+ * In xattr set, we always try to set the xe in inode first,
+ * so if it can be inserted into inode successfully, the old
+ * one will be removed from the xattr block, and this xattr
+ * will be inserted into inode as a new xattr in inode.
+ */
+ if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
+ clusters_add += new_clusters;
+ credits += ocfs2_remove_extent_credits(inode->i_sb) +
+ OCFS2_INODE_UPDATE_CREDITS;
+ if (!ocfs2_xattr_is_local(xe))
+ credits += ocfs2_calc_extend_credits(
+ inode->i_sb,
+ &def_xv.xv.xr_list,
+ new_clusters);
+ goto out;
+ }
+ }
+
+ if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
+ /* the new values will be stored outside. */
+ u32 old_clusters = 0;
+
+ if (!ocfs2_xattr_is_local(xe)) {
+ old_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
+ value_size);
+ xv = (struct ocfs2_xattr_value_root *)
+ (base + name_offset + name_len);
+ value_size = OCFS2_XATTR_ROOT_SIZE;
+ } else
+ xv = &def_xv.xv;
+
+ if (old_clusters >= new_clusters) {
+ credits += ocfs2_remove_extent_credits(inode->i_sb);
+ goto out;
+ } else {
+ meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
+ clusters_add += new_clusters - old_clusters;
+ credits += ocfs2_calc_extend_credits(inode->i_sb,
+ &xv->xr_list,
+ new_clusters -
+ old_clusters);
+ if (value_size >= OCFS2_XATTR_ROOT_SIZE)
+ goto out;
+ }
+ } else {
+ /*
+ * Now the new value will be stored inside. So if the new
+ * value is smaller than the size of value root or the old
+ * value, we don't need any allocation, otherwise we have
+ * to guess metadata allocation.
+ */
+ if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
+ (!ocfs2_xattr_is_local(xe) &&
+ OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
+ goto out;
+ }
+
+meta_guess:
+ /* calculate metadata allocation. */
+ if (di->i_xattr_loc) {
+ if (!xbs->xattr_bh) {
+ ret = ocfs2_read_xattr_block(inode,
+ le64_to_cpu(di->i_xattr_loc),
+ &bh);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ xb = (struct ocfs2_xattr_block *)bh->b_data;
+ } else
+ xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
+
+ if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
+ struct ocfs2_extent_list *el =
+ &xb->xb_attrs.xb_root.xt_list;
+ meta_add += ocfs2_extend_meta_needed(el);
+ credits += ocfs2_calc_extend_credits(inode->i_sb,
+ el, 1);
+ }
+
+ /*
+ * This cluster will be used either for new bucket or for
+ * new xattr block.
+ * If the cluster size is the same as the bucket size, one
+ * more is needed since we may need to extend the bucket
+ * also.
+ */
+ clusters_add += 1;
+ credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+ if (OCFS2_XATTR_BUCKET_SIZE ==
+ OCFS2_SB(inode->i_sb)->s_clustersize) {
+ credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+ clusters_add += 1;
+ }
+ } else {
+ meta_add += 1;
+ credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
+ }
+out:
+ if (clusters_need)
+ *clusters_need = clusters_add;
+ if (meta_need)
+ *meta_need = meta_add;
+ if (credits_need)
+ *credits_need = credits;
+ brelse(bh);
+ return ret;
+}
+
+static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
+ struct ocfs2_dinode *di,
+ struct ocfs2_xattr_info *xi,
+ struct ocfs2_xattr_search *xis,
+ struct ocfs2_xattr_search *xbs,
+ struct ocfs2_xattr_set_ctxt *ctxt,
+ int *credits)
+{
+ int clusters_add, meta_add, ret;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+ memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
+
+ ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
+
+ ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
+ &clusters_add, &meta_add, credits);
+ if (ret) {
+ mlog_errno(ret);
+ return ret;
+ }
+
+ mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
+ "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
+
+ if (meta_add) {
+ ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
+ &ctxt->meta_ac);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+
+ if (clusters_add) {
+ ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
+ if (ret)
+ mlog_errno(ret);
+ }
+out:
+ if (ret) {
+ if (ctxt->meta_ac) {
+ ocfs2_free_alloc_context(ctxt->meta_ac);
+ ctxt->meta_ac = NULL;
+ }
+
+ /*
+ * We cannot have an error and a non null ctxt->data_ac.
+ */
+ }
+
+ return ret;
+}
+
+static int __ocfs2_xattr_set_handle(struct inode *inode,
+ struct ocfs2_dinode *di,
+ struct ocfs2_xattr_info *xi,
+ struct ocfs2_xattr_search *xis,
+ struct ocfs2_xattr_search *xbs,
+ struct ocfs2_xattr_set_ctxt *ctxt)
+{
+ int ret = 0, credits, old_found;
+
+ if (!xi->value) {
+ /* Remove existing extended attribute */
+ if (!xis->not_found)
+ ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
+ else if (!xbs->not_found)
+ ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
+ } else {
+ /* We always try to set extended attribute into inode first*/
+ ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
+ if (!ret && !xbs->not_found) {
+ /*
+ * If succeed and that extended attribute existing in
+ * external block, then we will remove it.
+ */
+ xi->value = NULL;
+ xi->value_len = 0;
+
+ old_found = xis->not_found;
+ xis->not_found = -ENODATA;
+ ret = ocfs2_calc_xattr_set_need(inode,
+ di,
+ xi,
+ xis,
+ xbs,
+ NULL,
+ NULL,
+ &credits);
+ xis->not_found = old_found;
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_extend_trans(ctxt->handle, credits +
+ ctxt->handle->h_buffer_credits);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
+ } else if (ret == -ENOSPC) {
+ if (di->i_xattr_loc && !xbs->xattr_bh) {
+ ret = ocfs2_xattr_block_find(inode,
+ xi->name_index,
+ xi->name, xbs);
+ if (ret)
+ goto out;
+
+ old_found = xis->not_found;
+ xis->not_found = -ENODATA;
+ ret = ocfs2_calc_xattr_set_need(inode,
+ di,
+ xi,
+ xis,
+ xbs,
+ NULL,
+ NULL,
+ &credits);
+ xis->not_found = old_found;
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_extend_trans(ctxt->handle, credits +
+ ctxt->handle->h_buffer_credits);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ }
+ /*
+ * If no space in inode, we will set extended attribute
+ * into external block.
+ */
+ ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
+ if (ret)
+ goto out;
+ if (!xis->not_found) {
+ /*
+ * If succeed and that extended attribute
+ * existing in inode, we will remove it.
+ */
+ xi->value = NULL;
+ xi->value_len = 0;
+ xbs->not_found = -ENODATA;
+ ret = ocfs2_calc_xattr_set_need(inode,
+ di,
+ xi,
+ xis,
+ xbs,
+ NULL,
+ NULL,
+ &credits);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_extend_trans(ctxt->handle, credits +
+ ctxt->handle->h_buffer_credits);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+ ret = ocfs2_xattr_ibody_set(inode, xi,
+ xis, ctxt);
+ }
+ }
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * This function only called duing creating inode
+ * for init security/acl xattrs of the new inode.
+ * The xattrs could be put into ibody or extent block,
+ * xattr bucket would not be use in this case.
+ * transanction credits also be reserved in here.
+ */
+int ocfs2_xattr_set_handle(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *di_bh,
+ int name_index,
+ const char *name,
+ const void *value,
+ size_t value_len,
+ int flags,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac)
+{
+ struct ocfs2_dinode *di;
+ int ret;
+
+ struct ocfs2_xattr_info xi = {
+ .name_index = name_index,
+ .name = name,
+ .value = value,
+ .value_len = value_len,
+ };
+
+ struct ocfs2_xattr_search xis = {
+ .not_found = -ENODATA,
+ };
+
+ struct ocfs2_xattr_search xbs = {
+ .not_found = -ENODATA,
+ };
+
+ struct ocfs2_xattr_set_ctxt ctxt = {
+ .handle = handle,
+ .meta_ac = meta_ac,
+ .data_ac = data_ac,
+ };
+
+ if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
+ return -EOPNOTSUPP;
+
+ xis.inode_bh = xbs.inode_bh = di_bh;
+ di = (struct ocfs2_dinode *)di_bh->b_data;
+
+ down_write(&OCFS2_I(inode)->ip_xattr_sem);
+
+ ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
+ if (ret)
+ goto cleanup;
+ if (xis.not_found) {
+ ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
+ if (ret)
+ goto cleanup;
+ }
+
+ ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
+
+cleanup:
+ up_write(&OCFS2_I(inode)->ip_xattr_sem);
+ brelse(xbs.xattr_bh);
+
+ return ret;
+}
+
/*
* ocfs2_xattr_set()
*
@@ -1928,8 +2616,10 @@ int ocfs2_xattr_set(struct inode *inode,
{
struct buffer_head *di_bh = NULL;
struct ocfs2_dinode *di;
- int ret;
- u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+ int ret, credits;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct inode *tl_inode = osb->osb_tl_inode;
+ struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
struct ocfs2_xattr_info xi = {
.name_index = name_index,
@@ -1949,10 +2639,20 @@ int ocfs2_xattr_set(struct inode *inode,
if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP;
+ /*
+ * Only xbs will be used on indexed trees. xis doesn't need a
+ * bucket.
+ */
+ xbs.bucket = ocfs2_xattr_bucket_new(inode);
+ if (!xbs.bucket) {
+ mlog_errno(-ENOMEM);
+ return -ENOMEM;
+ }
+
ret = ocfs2_inode_lock(inode, &di_bh, 1);
if (ret < 0) {
mlog_errno(ret);
- return ret;
+ goto cleanup_nolock;
}
xis.inode_bh = xbs.inode_bh = di_bh;
di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -1984,55 +2684,51 @@ int ocfs2_xattr_set(struct inode *inode,
goto cleanup;
}
- if (!value) {
- /* Remove existing extended attribute */
- if (!xis.not_found)
- ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
- else if (!xbs.not_found)
- ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
- } else {
- /* We always try to set extended attribute into inode first*/
- ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
- if (!ret && !xbs.not_found) {
- /*
- * If succeed and that extended attribute existing in
- * external block, then we will remove it.
- */
- xi.value = NULL;
- xi.value_len = 0;
- ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
- } else if (ret == -ENOSPC) {
- if (di->i_xattr_loc && !xbs.xattr_bh) {
- ret = ocfs2_xattr_block_find(inode, name_index,
- name, &xbs);
- if (ret)
- goto cleanup;
- }
- /*
- * If no space in inode, we will set extended attribute
- * into external block.
- */
- ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
- if (ret)
- goto cleanup;
- if (!xis.not_found) {
- /*
- * If succeed and that extended attribute
- * existing in inode, we will remove it.
- */
- xi.value = NULL;
- xi.value_len = 0;
- ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
- }
+
+ mutex_lock(&tl_inode->i_mutex);
+
+ if (ocfs2_truncate_log_needs_flush(osb)) {
+ ret = __ocfs2_flush_truncate_log(osb);
+ if (ret < 0) {
+ mutex_unlock(&tl_inode->i_mutex);
+ mlog_errno(ret);
+ goto cleanup;
}
}
+ mutex_unlock(&tl_inode->i_mutex);
+
+ ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
+ &xbs, &ctxt, &credits);
+ if (ret) {
+ mlog_errno(ret);
+ goto cleanup;
+ }
+
+ ctxt.handle = ocfs2_start_trans(osb, credits);
+ if (IS_ERR(ctxt.handle)) {
+ ret = PTR_ERR(ctxt.handle);
+ mlog_errno(ret);
+ goto cleanup;
+ }
+
+ ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
+
+ ocfs2_commit_trans(osb, ctxt.handle);
+
+ if (ctxt.data_ac)
+ ocfs2_free_alloc_context(ctxt.data_ac);
+ if (ctxt.meta_ac)
+ ocfs2_free_alloc_context(ctxt.meta_ac);
+ if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
+ ocfs2_schedule_truncate_log_flush(osb, 1);
+ ocfs2_run_deallocs(osb, &ctxt.dealloc);
cleanup:
up_write(&OCFS2_I(inode)->ip_xattr_sem);
ocfs2_inode_unlock(inode, 1);
+cleanup_nolock:
brelse(di_bh);
brelse(xbs.xattr_bh);
- for (i = 0; i < blk_per_bucket; i++)
- brelse(xbs.bucket.bhs[i]);
+ ocfs2_xattr_bucket_free(xbs.bucket);
return ret;
}
@@ -2107,7 +2803,7 @@ typedef int (xattr_bucket_func)(struct inode *inode,
void *para);
static int ocfs2_find_xe_in_bucket(struct inode *inode,
- struct buffer_head *header_bh,
+ struct ocfs2_xattr_bucket *bucket,
int name_index,
const char *name,
u32 name_hash,
@@ -2115,11 +2811,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
int *found)
{
int i, ret = 0, cmp = 1, block_off, new_offset;
- struct ocfs2_xattr_header *xh =
- (struct ocfs2_xattr_header *)header_bh->b_data;
+ struct ocfs2_xattr_header *xh = bucket_xh(bucket);
size_t name_len = strlen(name);
struct ocfs2_xattr_entry *xe = NULL;
- struct buffer_head *name_bh = NULL;
char *xe_name;
/*
@@ -2150,19 +2844,9 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
break;
}
- ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off,
- &name_bh);
- if (ret) {
- mlog_errno(ret);
- break;
- }
- xe_name = name_bh->b_data + new_offset;
-
- cmp = memcmp(name, xe_name, name_len);
- brelse(name_bh);
- name_bh = NULL;
- if (cmp == 0) {
+ xe_name = bucket_block(bucket, block_off) + new_offset;
+ if (!memcmp(name, xe_name, name_len)) {
*xe_index = i;
*found = 1;
ret = 0;
@@ -2192,39 +2876,42 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
struct ocfs2_xattr_search *xs)
{
int ret, found = 0;
- struct buffer_head *bh = NULL;
- struct buffer_head *lower_bh = NULL;
struct ocfs2_xattr_header *xh = NULL;
struct ocfs2_xattr_entry *xe = NULL;
u16 index = 0;
u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
int low_bucket = 0, bucket, high_bucket;
+ struct ocfs2_xattr_bucket *search;
u32 last_hash;
- u64 blkno;
+ u64 blkno, lower_blkno = 0;
+
+ search = ocfs2_xattr_bucket_new(inode);
+ if (!search) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
- ret = ocfs2_read_block(inode, p_blkno, &bh);
+ ret = ocfs2_read_xattr_bucket(search, p_blkno);
if (ret) {
mlog_errno(ret);
goto out;
}
- xh = (struct ocfs2_xattr_header *)bh->b_data;
+ xh = bucket_xh(search);
high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
-
while (low_bucket <= high_bucket) {
- brelse(bh);
- bh = NULL;
- bucket = (low_bucket + high_bucket) / 2;
+ ocfs2_xattr_bucket_relse(search);
+ bucket = (low_bucket + high_bucket) / 2;
blkno = p_blkno + bucket * blk_per_bucket;
-
- ret = ocfs2_read_block(inode, blkno, &bh);
+ ret = ocfs2_read_xattr_bucket(search, blkno);
if (ret) {
mlog_errno(ret);
goto out;
}
- xh = (struct ocfs2_xattr_header *)bh->b_data;
+ xh = bucket_xh(search);
xe = &xh->xh_entries[0];
if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
high_bucket = bucket - 1;
@@ -2241,10 +2928,8 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
last_hash = le32_to_cpu(xe->xe_name_hash);
- /* record lower_bh which may be the insert place. */
- brelse(lower_bh);
- lower_bh = bh;
- bh = NULL;
+ /* record lower_blkno which may be the insert place. */
+ lower_blkno = blkno;
if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
low_bucket = bucket + 1;
@@ -2252,7 +2937,7 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
}
/* the searched xattr should reside in this bucket if exists. */
- ret = ocfs2_find_xe_in_bucket(inode, lower_bh,
+ ret = ocfs2_find_xe_in_bucket(inode, search,
name_index, name, name_hash,
&index, &found);
if (ret) {
@@ -2267,46 +2952,29 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
* When the xattr's hash value is in the gap of 2 buckets, we will
* always set it to the previous bucket.
*/
- if (!lower_bh) {
- /*
- * We can't find any bucket whose first name_hash is less
- * than the find name_hash.
- */
- BUG_ON(bh->b_blocknr != p_blkno);
- lower_bh = bh;
- bh = NULL;
+ if (!lower_blkno)
+ lower_blkno = p_blkno;
+
+ /* This should be in cache - we just read it during the search */
+ ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
}
- xs->bucket.bhs[0] = lower_bh;
- xs->bucket.xh = (struct ocfs2_xattr_header *)
- xs->bucket.bhs[0]->b_data;
- lower_bh = NULL;
- xs->header = xs->bucket.xh;
- xs->base = xs->bucket.bhs[0]->b_data;
+ xs->header = bucket_xh(xs->bucket);
+ xs->base = bucket_block(xs->bucket, 0);
xs->end = xs->base + inode->i_sb->s_blocksize;
if (found) {
- /*
- * If we have found the xattr enty, read all the blocks in
- * this bucket.
- */
- ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1,
- blk_per_bucket - 1, &xs->bucket.bhs[1],
- 0);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
xs->here = &xs->header->xh_entries[index];
mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
- (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index);
+ (unsigned long long)bucket_blkno(xs->bucket), index);
} else
ret = -ENODATA;
out:
- brelse(bh);
- brelse(lower_bh);
+ ocfs2_xattr_bucket_free(search);
return ret;
}
@@ -2357,53 +3025,50 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
xattr_bucket_func *func,
void *para)
{
- int i, j, ret = 0;
- int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+ int i, ret = 0;
u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
u32 num_buckets = clusters * bpc;
- struct ocfs2_xattr_bucket bucket;
+ struct ocfs2_xattr_bucket *bucket;
- memset(&bucket, 0, sizeof(bucket));
+ bucket = ocfs2_xattr_bucket_new(inode);
+ if (!bucket) {
+ mlog_errno(-ENOMEM);
+ return -ENOMEM;
+ }
mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
clusters, (unsigned long long)blkno);
- for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
- ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket,
- bucket.bhs, 0);
+ for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
+ ret = ocfs2_read_xattr_bucket(bucket, blkno);
if (ret) {
mlog_errno(ret);
- goto out;
+ break;
}
- bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
/*
* The real bucket num in this series of blocks is stored
* in the 1st bucket.
*/
if (i == 0)
- num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);
+ num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
mlog(0, "iterating xattr bucket %llu, first hash %u\n",
(unsigned long long)blkno,
- le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash));
+ le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
if (func) {
- ret = func(inode, &bucket, para);
- if (ret) {
- mlog_errno(ret);
- break;
- }
+ ret = func(inode, bucket, para);
+ if (ret)
+ mlog_errno(ret);
+ /* Fall through to bucket_relse() */
}
- for (j = 0; j < blk_per_bucket; j++)
- brelse(bucket.bhs[j]);
- memset(&bucket, 0, sizeof(bucket));
+ ocfs2_xattr_bucket_relse(bucket);
+ if (ret)
+ break;
}
-out:
- for (j = 0; j < blk_per_bucket; j++)
- brelse(bucket.bhs[j]);
-
+ ocfs2_xattr_bucket_free(bucket);
return ret;
}
@@ -2441,21 +3106,21 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
int i, block_off, new_offset;
const char *prefix, *name;
- for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) {
- struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i];
+ for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
+ struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
type = ocfs2_xattr_get_type(entry);
prefix = ocfs2_xattr_prefix(type);
if (prefix) {
ret = ocfs2_xattr_bucket_get_name_value(inode,
- bucket->xh,
+ bucket_xh(bucket),
i,
&block_off,
&new_offset);
if (ret)
break;
- name = (const char *)bucket->bhs[block_off]->b_data +
+ name = (const char *)bucket_block(bucket, block_off) +
new_offset;
ret = ocfs2_xattr_list_entry(xl->buffer,
xl->buffer_size,
@@ -2540,32 +3205,34 @@ static void swap_xe(void *a, void *b, int size)
/*
* When the ocfs2_xattr_block is filled up, new bucket will be created
* and all the xattr entries will be moved to the new bucket.
+ * The header goes at the start of the bucket, and the names+values are
+ * filled from the end. This is why *target starts as the last buffer.
* Note: we need to sort the entries since they are not saved in order
* in the ocfs2_xattr_block.
*/
static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
struct buffer_head *xb_bh,
- struct buffer_head *xh_bh,
- struct buffer_head *data_bh)
+ struct ocfs2_xattr_bucket *bucket)
{
int i, blocksize = inode->i_sb->s_blocksize;
+ int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
u16 offset, size, off_change;
struct ocfs2_xattr_entry *xe;
struct ocfs2_xattr_block *xb =
(struct ocfs2_xattr_block *)xb_bh->b_data;
struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
- struct ocfs2_xattr_header *xh =
- (struct ocfs2_xattr_header *)xh_bh->b_data;
+ struct ocfs2_xattr_header *xh = bucket_xh(bucket);
u16 count = le16_to_cpu(xb_xh->xh_count);
- char *target = xh_bh->b_data, *src = xb_bh->b_data;
+ char *src = xb_bh->b_data;
+ char *target = bucket_block(bucket, blks - 1);
mlog(0, "cp xattr from block %llu to bucket %llu\n",
(unsigned long long)xb_bh->b_blocknr,
- (unsigned long long)xh_bh->b_blocknr);
+ (unsigned long long)bucket_blkno(bucket));
+
+ for (i = 0; i < blks; i++)
+ memset(bucket_block(bucket, i), 0, blocksize);
- memset(xh_bh->b_data, 0, blocksize);
- if (data_bh)
- memset(data_bh->b_data, 0, blocksize);
/*
* Since the xe_name_offset is based on ocfs2_xattr_header,
* there is a offset change corresponding to the change of
@@ -2577,8 +3244,6 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
size = blocksize - offset;
/* copy all the names and values. */
- if (data_bh)
- target = data_bh->b_data;
memcpy(target + offset, src + offset, size);
/* Init new header now. */
@@ -2588,7 +3253,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
/* copy all the entries. */
- target = xh_bh->b_data;
+ target = bucket_block(bucket, 0);
offset = offsetof(struct ocfs2_xattr_header, xh_entries);
size = count * sizeof(struct ocfs2_xattr_entry);
memcpy(target + offset, (char *)xb_xh + offset, size);
@@ -2614,73 +3279,53 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
* While if the entry is in index b-tree, "bucket" indicates the
* real place of the xattr.
*/
-static int ocfs2_xattr_update_xattr_search(struct inode *inode,
- struct ocfs2_xattr_search *xs,
- struct buffer_head *old_bh,
- struct buffer_head *new_bh)
+static void ocfs2_xattr_update_xattr_search(struct inode *inode,
+ struct ocfs2_xattr_search *xs,
+ struct buffer_head *old_bh)
{
- int ret = 0;
char *buf = old_bh->b_data;
struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
- int i, blocksize = inode->i_sb->s_blocksize;
- u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-
- xs->bucket.bhs[0] = new_bh;
- get_bh(new_bh);
- xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data;
- xs->header = xs->bucket.xh;
+ int i;
- xs->base = new_bh->b_data;
+ xs->header = bucket_xh(xs->bucket);
+ xs->base = bucket_block(xs->bucket, 0);
xs->end = xs->base + inode->i_sb->s_blocksize;
- if (!xs->not_found) {
- if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
- ret = ocfs2_read_blocks(inode,
- xs->bucket.bhs[0]->b_blocknr + 1,
- blk_per_bucket - 1, &xs->bucket.bhs[1],
- 0);
- if (ret) {
- mlog_errno(ret);
- return ret;
- }
+ if (xs->not_found)
+ return;
- i = xs->here - old_xh->xh_entries;
- xs->here = &xs->header->xh_entries[i];
- }
+ /*
+ * If a bucket is more than one block, the name+value moved when
+ * we went to a bucket.
+ */
+ if (xs->bucket->bu_blocks > 1) {
+ i = xs->here - old_xh->xh_entries;
+ xs->here = &xs->header->xh_entries[i];
}
-
- return ret;
}
static int ocfs2_xattr_create_index_block(struct inode *inode,
- struct ocfs2_xattr_search *xs)
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
- int ret, credits = OCFS2_SUBALLOC_ALLOC;
+ int ret;
u32 bit_off, len;
u64 blkno;
- handle_t *handle;
+ handle_t *handle = ctxt->handle;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_inode_info *oi = OCFS2_I(inode);
- struct ocfs2_alloc_context *data_ac;
- struct buffer_head *xh_bh = NULL, *data_bh = NULL;
struct buffer_head *xb_bh = xs->xattr_bh;
struct ocfs2_xattr_block *xb =
(struct ocfs2_xattr_block *)xb_bh->b_data;
struct ocfs2_xattr_tree_root *xr;
u16 xb_flags = le16_to_cpu(xb->xb_flags);
- u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
mlog(0, "create xattr index block for %llu\n",
(unsigned long long)xb_bh->b_blocknr);
BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
-
- ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
+ BUG_ON(!xs->bucket);
/*
* XXX:
@@ -2689,29 +3334,18 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
*/
down_write(&oi->ip_alloc_sem);
- /*
- * 3 more credits, one for xattr block update, one for the 1st block
- * of the new xattr bucket and one for the value/data.
- */
- credits += 3;
- handle = ocfs2_start_trans(osb, credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- goto out_sem;
- }
-
ret = ocfs2_journal_access(handle, inode, xb_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
- ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
+ ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
+ 1, 1, &bit_off, &len);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
/*
@@ -2724,51 +3358,23 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
mlog(0, "allocate 1 cluster from %llu to xattr block\n",
(unsigned long long)blkno);
- xh_bh = sb_getblk(inode->i_sb, blkno);
- if (!xh_bh) {
- ret = -EIO;
+ ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
+ if (ret) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
- ocfs2_set_new_buffer_uptodate(inode, xh_bh);
-
- ret = ocfs2_journal_access(handle, inode, xh_bh,
- OCFS2_JOURNAL_ACCESS_CREATE);
+ ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
+ OCFS2_JOURNAL_ACCESS_CREATE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
- }
-
- if (bpb > 1) {
- data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
- if (!data_bh) {
- ret = -EIO;
- mlog_errno(ret);
- goto out_commit;
- }
-
- ocfs2_set_new_buffer_uptodate(inode, data_bh);
-
- ret = ocfs2_journal_access(handle, inode, data_bh,
- OCFS2_JOURNAL_ACCESS_CREATE);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
+ goto out;
}
- ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);
+ ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
+ ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
- ocfs2_journal_dirty(handle, xh_bh);
- if (data_bh)
- ocfs2_journal_dirty(handle, data_bh);
-
- ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
+ ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
/* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
@@ -2787,24 +3393,10 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
- ret = ocfs2_journal_dirty(handle, xb_bh);
- if (ret) {
- mlog_errno(ret);
- goto out_commit;
- }
-
-out_commit:
- ocfs2_commit_trans(osb, handle);
-
-out_sem:
- up_write(&oi->ip_alloc_sem);
+ ocfs2_journal_dirty(handle, xb_bh);
out:
- if (data_ac)
- ocfs2_free_alloc_context(data_ac);
-
- brelse(xh_bh);
- brelse(data_bh);
+ up_write(&oi->ip_alloc_sem);
return ret;
}
@@ -2829,29 +3421,18 @@ static int cmp_xe_offset(const void *a, const void *b)
* so that we can spare some space for insertion.
*/
static int ocfs2_defrag_xattr_bucket(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_bucket *bucket)
{
int ret, i;
size_t end, offset, len, value_len;
struct ocfs2_xattr_header *xh;
char *entries, *buf, *bucket_buf = NULL;
- u64 blkno = bucket->bhs[0]->b_blocknr;
- u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
+ u64 blkno = bucket_blkno(bucket);
u16 xh_free_start;
size_t blocksize = inode->i_sb->s_blocksize;
- handle_t *handle;
- struct buffer_head **bhs;
struct ocfs2_xattr_entry *xe;
- bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
- GFP_NOFS);
- if (!bhs)
- return -ENOMEM;
-
- ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0);
- if (ret)
- goto out;
-
/*
* In order to make the operation more efficient and generic,
* we copy all the blocks into a contiguous memory and do the
@@ -2865,26 +3446,16 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
}
buf = bucket_buf;
- for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
- memcpy(buf, bhs[i]->b_data, blocksize);
+ for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
+ memcpy(buf, bucket_block(bucket, i), blocksize);
- handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- handle = NULL;
+ ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret < 0) {
mlog_errno(ret);
goto out;
}
- for (i = 0; i < blk_per_bucket; i++) {
- ret = ocfs2_journal_access(handle, inode, bhs[i],
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret < 0) {
- mlog_errno(ret);
- goto commit;
- }
- }
-
xh = (struct ocfs2_xattr_header *)bucket_buf;
entries = (char *)xh->xh_entries;
xh_free_start = le16_to_cpu(xh->xh_free_start);
@@ -2940,7 +3511,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
"bucket %llu\n", (unsigned long long)blkno);
if (xh_free_start == end)
- goto commit;
+ goto out;
memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
xh->xh_free_start = cpu_to_le16(end);
@@ -2951,21 +3522,11 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
cmp_xe, swap_xe);
buf = bucket_buf;
- for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
- memcpy(bhs[i]->b_data, buf, blocksize);
- ocfs2_journal_dirty(handle, bhs[i]);
- }
+ for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
+ memcpy(bucket_block(bucket, i), buf, blocksize);
+ ocfs2_xattr_bucket_journal_dirty(handle, bucket);
-commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
out:
-
- if (bhs) {
- for (i = 0; i < blk_per_bucket; i++)
- brelse(bhs[i]);
- }
- kfree(bhs);
-
kfree(bucket_buf);
return ret;
}
@@ -3015,7 +3576,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
* 1 more for the update of the 1st bucket of the previous
* extent record.
*/
- credits = bpc / 2 + 1;
+ credits = bpc / 2 + 1 + handle->h_buffer_credits;
ret = ocfs2_extend_trans(handle, credits);
if (ret) {
mlog_errno(ret);
@@ -3048,7 +3609,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
goto out;
}
- ret = ocfs2_read_block(inode, prev_blkno, &old_bh);
+ ret = ocfs2_read_block(inode, prev_blkno, &old_bh, NULL);
if (ret < 0) {
mlog_errno(ret);
brelse(new_bh);
@@ -3092,31 +3653,6 @@ out:
return ret;
}
-static int ocfs2_read_xattr_bucket(struct inode *inode,
- u64 blkno,
- struct buffer_head **bhs,
- int new)
-{
- int ret = 0;
- u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
-
- if (!new)
- return ocfs2_read_blocks(inode, blkno,
- blk_per_bucket, bhs, 0);
-
- for (i = 0; i < blk_per_bucket; i++) {
- bhs[i] = sb_getblk(inode->i_sb, blkno + i);
- if (bhs[i] == NULL) {
- ret = -EIO;
- mlog_errno(ret);
- break;
- }
- ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
- }
-
- return ret;
-}
-
/*
* Find the suitable pos when we divide a bucket into 2.
* We have to make sure the xattrs with the same hash value exist
@@ -3178,8 +3714,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
{
int ret, i;
int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
- u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
- struct buffer_head **s_bhs, **t_bhs = NULL;
+ struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
struct ocfs2_xattr_header *xh;
struct ocfs2_xattr_entry *xe;
int blocksize = inode->i_sb->s_blocksize;
@@ -3187,47 +3722,47 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
mlog(0, "move some of xattrs from bucket %llu to %llu\n",
(unsigned long long)blk, (unsigned long long)new_blk);
- s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
- if (!s_bhs)
- return -ENOMEM;
-
- ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
- if (ret) {
+ s_bucket = ocfs2_xattr_bucket_new(inode);
+ t_bucket = ocfs2_xattr_bucket_new(inode);
+ if (!s_bucket || !t_bucket) {
+ ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
- ret = ocfs2_journal_access(handle, inode, s_bhs[0],
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_read_xattr_bucket(s_bucket, blk);
if (ret) {
mlog_errno(ret);
goto out;
}
- t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
- if (!t_bhs) {
- ret = -ENOMEM;
+ ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
goto out;
}
- ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head);
+ /*
+ * Even if !new_bucket_head, we're overwriting t_bucket. Thus,
+ * there's no need to read it.
+ */
+ ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
if (ret) {
mlog_errno(ret);
goto out;
}
- for (i = 0; i < blk_per_bucket; i++) {
- ret = ocfs2_journal_access(handle, inode, t_bhs[i],
- new_bucket_head ?
- OCFS2_JOURNAL_ACCESS_CREATE :
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
+ ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
+ new_bucket_head ?
+ OCFS2_JOURNAL_ACCESS_CREATE :
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
}
- xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
+ xh = bucket_xh(s_bucket);
count = le16_to_cpu(xh->xh_count);
start = ocfs2_xattr_find_divide_pos(xh);
@@ -3239,10 +3774,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
* The hash value is set as one larger than
* that of the last entry in the previous bucket.
*/
- for (i = 0; i < blk_per_bucket; i++)
- memset(t_bhs[i]->b_data, 0, blocksize);
+ for (i = 0; i < t_bucket->bu_blocks; i++)
+ memset(bucket_block(t_bucket, i), 0, blocksize);
- xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
+ xh = bucket_xh(t_bucket);
xh->xh_free_start = cpu_to_le16(blocksize);
xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
@@ -3251,11 +3786,10 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode,
}
/* copy the whole bucket to the new first. */
- for (i = 0; i < blk_per_bucket; i++)
- memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
+ ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
/* update the new bucket. */
- xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
+ xh = bucket_xh(t_bucket);
/*
* Calculate the total name/value len and xh_free_start for
@@ -3319,11 +3853,7 @@ set_num_buckets:
else
xh->xh_num_buckets = 0;
- for (i = 0; i < blk_per_bucket; i++) {
- ocfs2_journal_dirty(handle, t_bhs[i]);
- if (ret)
- mlog_errno(ret);
- }
+ ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
/* store the first_hash of the new bucket. */
if (first_hash)
@@ -3337,29 +3867,18 @@ set_num_buckets:
if (start == count)
goto out;
- xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
+ xh = bucket_xh(s_bucket);
memset(&xh->xh_entries[start], 0,
sizeof(struct ocfs2_xattr_entry) * (count - start));
xh->xh_count = cpu_to_le16(start);
xh->xh_free_start = cpu_to_le16(name_offset);
xh->xh_name_value_len = cpu_to_le16(name_value_len);
- ocfs2_journal_dirty(handle, s_bhs[0]);
- if (ret)
- mlog_errno(ret);
+ ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
out:
- if (s_bhs) {
- for (i = 0; i < blk_per_bucket; i++)
- brelse(s_bhs[i]);
- }
- kfree(s_bhs);
-
- if (t_bhs) {
- for (i = 0; i < blk_per_bucket; i++)
- brelse(t_bhs[i]);
- }
- kfree(t_bhs);
+ ocfs2_xattr_bucket_free(s_bucket);
+ ocfs2_xattr_bucket_free(t_bucket);
return ret;
}
@@ -3376,10 +3895,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
u64 t_blkno,
int t_is_new)
{
- int ret, i;
- int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
- int blocksize = inode->i_sb->s_blocksize;
- struct buffer_head **s_bhs, **t_bhs = NULL;
+ int ret;
+ struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
BUG_ON(s_blkno == t_blkno);
@@ -3387,52 +3904,39 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
(unsigned long long)s_blkno, (unsigned long long)t_blkno,
t_is_new);
- s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
- GFP_NOFS);
- if (!s_bhs)
- return -ENOMEM;
-
- ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0);
- if (ret)
- goto out;
-
- t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
- GFP_NOFS);
- if (!t_bhs) {
+ s_bucket = ocfs2_xattr_bucket_new(inode);
+ t_bucket = ocfs2_xattr_bucket_new(inode);
+ if (!s_bucket || !t_bucket) {
ret = -ENOMEM;
+ mlog_errno(ret);
goto out;
}
+
+ ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
+ if (ret)
+ goto out;
- ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new);
+ /*
+ * Even if !t_is_new, we're overwriting t_bucket. Thus,
+ * there's no need to read it.
+ */
+ ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
if (ret)
goto out;
- for (i = 0; i < blk_per_bucket; i++) {
- ret = ocfs2_journal_access(handle, inode, t_bhs[i],
- t_is_new ?
- OCFS2_JOURNAL_ACCESS_CREATE :
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret)
- goto out;
- }
+ ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
+ t_is_new ?
+ OCFS2_JOURNAL_ACCESS_CREATE :
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret)
+ goto out;
- for (i = 0; i < blk_per_bucket; i++) {
- memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
- ocfs2_journal_dirty(handle, t_bhs[i]);
- }
+ ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
+ ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
out:
- if (s_bhs) {
- for (i = 0; i < blk_per_bucket; i++)
- brelse(s_bhs[i]);
- }
- kfree(s_bhs);
-
- if (t_bhs) {
- for (i = 0; i < blk_per_bucket; i++)
- brelse(t_bhs[i]);
- }
- kfree(t_bhs);
+ ocfs2_xattr_bucket_free(t_bucket);
+ ocfs2_xattr_bucket_free(s_bucket);
return ret;
}
@@ -3464,7 +3968,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
* We need to update the new cluster and 1 more for the update of
* the 1st bucket of the previous extent rec.
*/
- credits = bpc + 1;
+ credits = bpc + 1 + handle->h_buffer_credits;
ret = ocfs2_extend_trans(handle, credits);
if (ret) {
mlog_errno(ret);
@@ -3497,7 +4001,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
ocfs2_journal_dirty(handle, first_bh);
/* update the new bucket header. */
- ret = ocfs2_read_block(inode, to_blk_start, &bh);
+ ret = ocfs2_read_block(inode, to_blk_start, &bh, NULL);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -3534,7 +4038,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
u32 *first_hash)
{
u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
- int ret, credits = 2 * blk_per_bucket;
+ int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
@@ -3644,16 +4148,15 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
u32 *num_clusters,
u32 prev_cpos,
u64 prev_blkno,
- int *extend)
+ int *extend,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
- int ret, credits;
+ int ret;
u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
u32 prev_clusters = *num_clusters;
u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
u64 block;
- handle_t *handle = NULL;
- struct ocfs2_alloc_context *data_ac = NULL;
- struct ocfs2_alloc_context *meta_ac = NULL;
+ handle_t *handle = ctxt->handle;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_extent_tree et;
@@ -3664,23 +4167,6 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
- ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
- &data_ac, &meta_ac);
- if (ret) {
- mlog_errno(ret);
- goto leave;
- }
-
- credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
- clusters_to_add);
- handle = ocfs2_start_trans(osb, credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- handle = NULL;
- mlog_errno(ret);
- goto leave;
- }
-
ret = ocfs2_journal_access(handle, inode, root_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
@@ -3688,7 +4174,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
goto leave;
}
- ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
+ ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
clusters_to_add, &bit_off, &num_bits);
if (ret < 0) {
if (ret != -ENOSPC)
@@ -3734,41 +4220,20 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
}
}
- if (handle->h_buffer_credits < credits) {
- /*
- * The journal has been restarted before, and don't
- * have enough space for the insertion, so extend it
- * here.
- */
- ret = ocfs2_extend_trans(handle, credits);
- if (ret) {
- mlog_errno(ret);
- goto leave;
- }
- }
mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
num_bits, (unsigned long long)block, v_start);
ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
- num_bits, 0, meta_ac);
+ num_bits, 0, ctxt->meta_ac);
if (ret < 0) {
mlog_errno(ret);
goto leave;
}
ret = ocfs2_journal_dirty(handle, root_bh);
- if (ret < 0) {
+ if (ret < 0)
mlog_errno(ret);
- goto leave;
- }
leave:
- if (handle)
- ocfs2_commit_trans(osb, handle);
- if (data_ac)
- ocfs2_free_alloc_context(data_ac);
- if (meta_ac)
- ocfs2_free_alloc_context(meta_ac);
-
return ret;
}
@@ -3777,6 +4242,7 @@ leave:
* We meet with start_bh. Only move half of the xattrs to the bucket after it.
*/
static int ocfs2_extend_xattr_bucket(struct inode *inode,
+ handle_t *handle,
struct buffer_head *first_bh,
struct buffer_head *start_bh,
u32 num_clusters)
@@ -3786,7 +4252,6 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
u64 start_blk = start_bh->b_blocknr, end_blk;
u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
- handle_t *handle;
struct ocfs2_xattr_header *first_xh =
(struct ocfs2_xattr_header *)first_bh->b_data;
u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
@@ -3801,13 +4266,12 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
/*
* We will touch all the buckets after the start_bh(include it).
- * Add one more bucket and modify the first_bh.
+ * Then we add one more bucket.
*/
- credits = end_blk - start_blk + 2 * blk_per_bucket + 1;
- handle = ocfs2_start_trans(osb, credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- handle = NULL;
+ credits = end_blk - start_blk + 3 * blk_per_bucket + 1 +
+ handle->h_buffer_credits;
+ ret = ocfs2_extend_trans(handle, credits);
+ if (ret) {
mlog_errno(ret);
goto out;
}
@@ -3816,14 +4280,14 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto commit;
+ goto out;
}
while (end_blk != start_blk) {
ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
end_blk + blk_per_bucket, 0);
if (ret)
- goto commit;
+ goto out;
end_blk -= blk_per_bucket;
}
@@ -3834,8 +4298,6 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
le16_add_cpu(&first_xh->xh_num_buckets, 1);
ocfs2_journal_dirty(handle, first_bh);
-commit:
- ocfs2_commit_trans(osb, handle);
out:
return ret;
}
@@ -3851,7 +4313,8 @@ out:
*/
static int ocfs2_add_new_xattr_bucket(struct inode *inode,
struct buffer_head *xb_bh,
- struct buffer_head *header_bh)
+ struct buffer_head *header_bh,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
struct ocfs2_xattr_header *first_xh = NULL;
struct buffer_head *first_bh = NULL;
@@ -3885,7 +4348,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
goto out;
}
- ret = ocfs2_read_block(inode, p_blkno, &first_bh);
+ ret = ocfs2_read_block(inode, p_blkno, &first_bh, NULL);
if (ret) {
mlog_errno(ret);
goto out;
@@ -3902,7 +4365,8 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
&num_clusters,
e_cpos,
p_blkno,
- &extend);
+ &extend,
+ ctxt);
if (ret) {
mlog_errno(ret);
goto out;
@@ -3911,6 +4375,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
if (extend)
ret = ocfs2_extend_xattr_bucket(inode,
+ ctxt->handle,
first_bh,
header_bh,
num_clusters);
@@ -3929,7 +4394,7 @@ static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
int block_off = offs >> inode->i_sb->s_blocksize_bits;
offs = offs % inode->i_sb->s_blocksize;
- return bucket->bhs[block_off]->b_data + offs;
+ return bucket_block(bucket, block_off) + offs;
}
/*
@@ -3984,7 +4449,7 @@ static void ocfs2_xattr_set_entry_normal(struct inode *inode,
xe->xe_value_size = 0;
val = ocfs2_xattr_bucket_get_val(inode,
- &xs->bucket, offs);
+ xs->bucket, offs);
memset(val + OCFS2_XATTR_SIZE(name_len), 0,
size - OCFS2_XATTR_SIZE(name_len));
if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
@@ -4062,8 +4527,7 @@ set_new_name_value:
xh->xh_free_start = cpu_to_le16(offs);
}
- val = ocfs2_xattr_bucket_get_val(inode,
- &xs->bucket, offs - size);
+ val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
xe->xe_name_offset = cpu_to_le16(offs - size);
memset(val, 0, size);
@@ -4079,115 +4543,62 @@ set_new_name_value:
return;
}
-static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
- handle_t *handle,
- struct ocfs2_xattr_search *xs,
- struct buffer_head **bhs,
- u16 bh_num)
-{
- int ret = 0, off, block_off;
- struct ocfs2_xattr_entry *xe = xs->here;
-
- /*
- * First calculate all the blocks we should journal_access
- * and journal_dirty. The first block should always be touched.
- */
- ret = ocfs2_journal_dirty(handle, bhs[0]);
- if (ret)
- mlog_errno(ret);
-
- /* calc the data. */
- off = le16_to_cpu(xe->xe_name_offset);
- block_off = off >> inode->i_sb->s_blocksize_bits;
- ret = ocfs2_journal_dirty(handle, bhs[block_off]);
- if (ret)
- mlog_errno(ret);
-
- return ret;
-}
-
/*
* Set the xattr entry in the specified bucket.
* The bucket is indicated by xs->bucket and it should have the enough
* space for the xattr insertion.
*/
static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_info *xi,
struct ocfs2_xattr_search *xs,
u32 name_hash,
int local)
{
- int i, ret;
- handle_t *handle = NULL;
- u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ int ret;
+ u64 blkno;
mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
(unsigned long)xi->value_len, xi->name_index,
- (unsigned long long)xs->bucket.bhs[0]->b_blocknr);
+ (unsigned long long)bucket_blkno(xs->bucket));
- if (!xs->bucket.bhs[1]) {
- ret = ocfs2_read_blocks(inode,
- xs->bucket.bhs[0]->b_blocknr + 1,
- blk_per_bucket - 1, &xs->bucket.bhs[1],
- 0);
+ if (!xs->bucket->bu_bhs[1]) {
+ blkno = bucket_blkno(xs->bucket);
+ ocfs2_xattr_bucket_relse(xs->bucket);
+ ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
if (ret) {
mlog_errno(ret);
goto out;
}
}
- handle = ocfs2_start_trans(osb, blk_per_bucket);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- handle = NULL;
+ ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret < 0) {
mlog_errno(ret);
goto out;
}
- for (i = 0; i < blk_per_bucket; i++) {
- ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i],
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
- }
-
ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
+ ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
- /*Only dirty the blocks we have touched in set xattr. */
- ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
- xs->bucket.bhs, blk_per_bucket);
- if (ret)
- mlog_errno(ret);
out:
- ocfs2_commit_trans(osb, handle);
-
return ret;
}
static int ocfs2_xattr_value_update_size(struct inode *inode,
+ handle_t *handle,
struct buffer_head *xe_bh,
struct ocfs2_xattr_entry *xe,
u64 new_size)
{
int ret;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- handle_t *handle = NULL;
-
- handle = ocfs2_start_trans(osb, 1);
- if (IS_ERR(handle)) {
- ret = -ENOMEM;
- mlog_errno(ret);
- goto out;
- }
ret = ocfs2_journal_access(handle, inode, xe_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
mlog_errno(ret);
- goto out_commit;
+ goto out;
}
xe->xe_value_size = cpu_to_le64(new_size);
@@ -4196,8 +4607,6 @@ static int ocfs2_xattr_value_update_size(struct inode *inode,
if (ret < 0)
mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(osb, handle);
out:
return ret;
}
@@ -4212,7 +4621,8 @@ out:
static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
struct buffer_head *header_bh,
int xe_off,
- int len)
+ int len,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int ret, offset;
u64 value_blk;
@@ -4236,7 +4646,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
value_blk += header_bh->b_blocknr;
- ret = ocfs2_read_block(inode, value_blk, &value_bh);
+ ret = ocfs2_read_block(inode, value_blk, &value_bh, NULL);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4247,13 +4657,14 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
xe_off, (unsigned long long)header_bh->b_blocknr, len);
- ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
+ ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len, ctxt);
if (ret) {
mlog_errno(ret);
goto out;
}
- ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len);
+ ret = ocfs2_xattr_value_update_size(inode, ctxt->handle,
+ header_bh, xe, len);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4265,18 +4676,19 @@ out:
}
static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
- struct ocfs2_xattr_search *xs,
- int len)
+ struct ocfs2_xattr_search *xs,
+ int len,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int ret, offset;
struct ocfs2_xattr_entry *xe = xs->here;
struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
- BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe));
+ BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
offset = xe - xh->xh_entries;
- ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0],
- offset, len);
+ ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket->bu_bhs[0],
+ offset, len, ctxt);
if (ret)
mlog_errno(ret);
@@ -4284,6 +4696,7 @@ static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
}
static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_search *xs,
char *val,
int value_len)
@@ -4299,7 +4712,8 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
- return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len);
+ return __ocfs2_xattr_set_value_outside(inode, handle,
+ xv, val, value_len);
}
static int ocfs2_rm_xattr_cluster(struct inode *inode,
@@ -4343,7 +4757,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
}
}
- handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
+ handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
if (IS_ERR(handle)) {
ret = -ENOMEM;
mlog_errno(ret);
@@ -4392,26 +4806,19 @@ out:
}
static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
+ handle_t *handle,
struct ocfs2_xattr_search *xs)
{
- handle_t *handle = NULL;
- struct ocfs2_xattr_header *xh = xs->bucket.xh;
+ struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
struct ocfs2_xattr_entry *last = &xh->xh_entries[
le16_to_cpu(xh->xh_count) - 1];
int ret = 0;
- handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- mlog_errno(ret);
- return;
- }
-
- ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0],
- OCFS2_JOURNAL_ACCESS_WRITE);
+ ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
- goto out_commit;
+ return;
}
/* Remove the old entry. */
@@ -4420,11 +4827,7 @@ static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
memset(last, 0, sizeof(struct ocfs2_xattr_entry));
le16_add_cpu(&xh->xh_count, -1);
- ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]);
- if (ret < 0)
- mlog_errno(ret);
-out_commit:
- ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+ ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
}
/*
@@ -4440,7 +4843,8 @@ out_commit:
*/
static int ocfs2_xattr_set_in_bucket(struct inode *inode,
struct ocfs2_xattr_info *xi,
- struct ocfs2_xattr_search *xs)
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
int ret, local = 1;
size_t value_len;
@@ -4468,7 +4872,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
value_len = 0;
ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
- value_len);
+ value_len,
+ ctxt);
if (ret)
goto out;
@@ -4488,7 +4893,8 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
xi->value_len = OCFS2_XATTR_ROOT_SIZE;
}
- ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local);
+ ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
+ name_hash, local);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4499,7 +4905,7 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
/* allocate the space now for the outside block storage. */
ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
- value_len);
+ value_len, ctxt);
if (ret) {
mlog_errno(ret);
@@ -4509,13 +4915,14 @@ static int ocfs2_xattr_set_in_bucket(struct inode *inode,
* storage and we have allocated xattr already,
* so need to remove it.
*/
- ocfs2_xattr_bucket_remove_xs(inode, xs);
+ ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
}
goto out;
}
set_value_outside:
- ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len);
+ ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
+ xs, val, value_len);
out:
return ret;
}
@@ -4530,7 +4937,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
struct ocfs2_xattr_bucket *bucket,
const char *name)
{
- struct ocfs2_xattr_header *xh = bucket->xh;
+ struct ocfs2_xattr_header *xh = bucket_xh(bucket);
u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
@@ -4540,7 +4947,7 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
xh->xh_entries[0].xe_name_hash) {
mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
"hash = %u\n",
- (unsigned long long)bucket->bhs[0]->b_blocknr,
+ (unsigned long long)bucket_blkno(bucket),
le32_to_cpu(xh->xh_entries[0].xe_name_hash));
return -ENOSPC;
}
@@ -4550,16 +4957,16 @@ static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
struct ocfs2_xattr_info *xi,
- struct ocfs2_xattr_search *xs)
+ struct ocfs2_xattr_search *xs,
+ struct ocfs2_xattr_set_ctxt *ctxt)
{
struct ocfs2_xattr_header *xh;
struct ocfs2_xattr_entry *xe;
u16 count, header_size, xh_free_start;
- int i, free, max_free, need, old;
+ int free, max_free, need, old;
size_t value_size = 0, name_len = strlen(xi->name);
size_t blocksize = inode->i_sb->s_blocksize;
int ret, allocation = 0;
- u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
mlog_entry("Set xattr %s in xattr index block\n", xi->name);
@@ -4574,7 +4981,7 @@ try_again:
mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
"of %u which exceed block size\n",
- (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
+ (unsigned long long)bucket_blkno(xs->bucket),
header_size);
if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
@@ -4614,11 +5021,13 @@ try_again:
mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
"need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
" %u\n", xs->not_found,
- (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
+ (unsigned long long)bucket_blkno(xs->bucket),
free, need, max_free, le16_to_cpu(xh->xh_free_start),
le16_to_cpu(xh->xh_name_value_len));
- if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
+ if (free < need ||
+ (xs->not_found &&
+ count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
if (need <= max_free &&
count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
/*
@@ -4626,7 +5035,8 @@ try_again:
* name/value will be moved, the xe shouldn't be changed
* in xs.
*/
- ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
+ ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
+ xs->bucket);
if (ret) {
mlog_errno(ret);
goto out;
@@ -4658,7 +5068,7 @@ try_again:
* add a new bucket for the insert.
*/
ret = ocfs2_check_xattr_bucket_collision(inode,
- &xs->bucket,
+ xs->bucket,
xi->name);
if (ret) {
mlog_errno(ret);
@@ -4667,16 +5077,14 @@ try_again:
ret = ocfs2_add_new_xattr_bucket(inode,
xs->xattr_bh,
- xs->bucket.bhs[0]);
+ xs->bucket->bu_bhs[0],
+ ctxt);
if (ret) {
mlog_errno(ret);
goto out;
}
- for (i = 0; i < blk_per_bucket; i++)
- brelse(xs->bucket.bhs[i]);
-
- memset(&xs->bucket, 0, sizeof(xs->bucket));
+ ocfs2_xattr_bucket_relse(xs->bucket);
ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
xi->name_index,
@@ -4689,7 +5097,7 @@ try_again:
}
xattr_set:
- ret = ocfs2_xattr_set_in_bucket(inode, xi, xs);
+ ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
out:
mlog_exit(ret);
return ret;
@@ -4700,9 +5108,21 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
void *para)
{
int ret = 0;
- struct ocfs2_xattr_header *xh = bucket->xh;
+ struct ocfs2_xattr_header *xh = bucket_xh(bucket);
u16 i;
struct ocfs2_xattr_entry *xe;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
+
+ ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
+
+ ctxt.handle = ocfs2_start_trans(osb,
+ ocfs2_remove_extent_credits(osb->sb));
+ if (IS_ERR(ctxt.handle)) {
+ ret = PTR_ERR(ctxt.handle);
+ mlog_errno(ret);
+ goto out;
+ }
for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
xe = &xh->xh_entries[i];
@@ -4710,14 +5130,18 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
continue;
ret = ocfs2_xattr_bucket_value_truncate(inode,
- bucket->bhs[0],
- i, 0);
+ bucket->bu_bhs[0],
+ i, 0, &ctxt);
if (ret) {
mlog_errno(ret);
break;
}
}
+ ret = ocfs2_commit_trans(osb, ctxt.handle);
+ ocfs2_schedule_truncate_log_flush(osb, 1);
+ ocfs2_run_deallocs(osb, &ctxt.dealloc);
+out:
return ret;
}
@@ -4768,6 +5192,71 @@ out:
}
/*
+ * 'security' attributes support
+ */
+static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
+ size_t list_size, const char *name,
+ size_t name_len)
+{
+ const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
+ const size_t total_len = prefix_len + name_len + 1;
+
+ if (list && total_len <= list_size) {
+ memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
+ memcpy(list + prefix_len, name, name_len);
+ list[prefix_len + name_len] = '\0';
+ }
+ return total_len;
+}
+
+static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+ return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
+ buffer, size);
+}
+
+static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ if (strcmp(name, "") == 0)
+ return -EINVAL;
+
+ return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
+ size, flags);
+}
+
+int ocfs2_init_security_get(struct inode *inode,
+ struct inode *dir,
+ struct ocfs2_security_xattr_info *si)
+{
+ return security_inode_init_security(inode, dir, &si->name, &si->value,
+ &si->value_len);
+}
+
+int ocfs2_init_security_set(handle_t *handle,
+ struct inode *inode,
+ struct buffer_head *di_bh,
+ struct ocfs2_security_xattr_info *si,
+ struct ocfs2_alloc_context *xattr_ac,
+ struct ocfs2_alloc_context *data_ac)
+{
+ return ocfs2_xattr_set_handle(handle, inode, di_bh,
+ OCFS2_XATTR_INDEX_SECURITY,
+ si->name, si->value, si->value_len, 0,
+ xattr_ac, data_ac);
+}
+
+struct xattr_handler ocfs2_xattr_security_handler = {
+ .prefix = XATTR_SECURITY_PREFIX,
+ .list = ocfs2_xattr_security_list,
+ .get = ocfs2_xattr_security_get,
+ .set = ocfs2_xattr_security_set,
+};
+
+/*
* 'trusted' attributes support
*/
static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 1d8314c7656d..9a67e7d8f812 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -30,13 +30,44 @@ enum ocfs2_xattr_type {
OCFS2_XATTR_MAX
};
+struct ocfs2_security_xattr_info {
+ int enable;
+ char *name;
+ void *value;
+ size_t value_len;
+};
+
extern struct xattr_handler ocfs2_xattr_user_handler;
extern struct xattr_handler ocfs2_xattr_trusted_handler;
+extern struct xattr_handler ocfs2_xattr_security_handler;
+#ifdef CONFIG_OCFS2_FS_POSIX_ACL
+extern struct xattr_handler ocfs2_xattr_acl_access_handler;
+extern struct xattr_handler ocfs2_xattr_acl_default_handler;
+#endif
extern struct xattr_handler *ocfs2_xattr_handlers[];
ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
+int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int,
+ const char *, void *, size_t);
int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
size_t, int);
+int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *,
+ int, const char *, const void *, size_t, int,
+ struct ocfs2_alloc_context *,
+ struct ocfs2_alloc_context *);
int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
+int ocfs2_init_security_get(struct inode *, struct inode *,
+ struct ocfs2_security_xattr_info *);
+int ocfs2_init_security_set(handle_t *, struct inode *,
+ struct buffer_head *,
+ struct ocfs2_security_xattr_info *,
+ struct ocfs2_alloc_context *,
+ struct ocfs2_alloc_context *);
+int ocfs2_calc_security_init(struct inode *,
+ struct ocfs2_security_xattr_info *,
+ int *, int *, struct ocfs2_alloc_context **);
+int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
+ int, struct ocfs2_security_xattr_info *,
+ int *, int *, struct ocfs2_alloc_context **);
#endif /* OCFS2_XATTR_H */
diff --git a/fs/quota.c b/fs/quota.c
index b7fe44e01618..4a8c94f05f76 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -73,7 +73,7 @@ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid
case Q_SETQUOTA:
case Q_GETQUOTA:
/* This is just informative test so we are satisfied without a lock */
- if (!sb_has_quota_enabled(sb, type))
+ if (!sb_has_quota_active(sb, type))
return -ESRCH;
}
@@ -160,6 +160,9 @@ static void quota_sync_sb(struct super_block *sb, int type)
int cnt;
sb->s_qcop->quota_sync(sb, type);
+
+ if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)
+ return;
/* This is not very clever (and fast) but currently I don't know about
* any other simple way of getting quota data to disk and we must get
* them there for userspace to be visible... */
@@ -175,7 +178,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && cnt != type)
continue;
- if (!sb_has_quota_enabled(sb, cnt))
+ if (!sb_has_quota_active(sb, cnt))
continue;
mutex_lock_nested(&sb_dqopt(sb)->files[cnt]->i_mutex, I_MUTEX_QUOTA);
truncate_inode_pages(&sb_dqopt(sb)->files[cnt]->i_data, 0);
@@ -201,7 +204,7 @@ restart:
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (type != -1 && type != cnt)
continue;
- if (!sb_has_quota_enabled(sb, cnt))
+ if (!sb_has_quota_active(sb, cnt))
continue;
if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
@@ -245,7 +248,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void
__u32 fmt;
down_read(&sb_dqopt(sb)->dqptr_sem);
- if (!sb_has_quota_enabled(sb, type)) {
+ if (!sb_has_quota_active(sb, type)) {
up_read(&sb_dqopt(sb)->dqptr_sem);
return -ESRCH;
}
diff --git a/fs/quota_tree.c b/fs/quota_tree.c
new file mode 100644
index 000000000000..953404c95b17
--- /dev/null
+++ b/fs/quota_tree.c
@@ -0,0 +1,645 @@
+/*
+ * vfsv0 quota IO operations on file
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/dqblk_v2.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/quotaops.h>
+
+#include <asm/byteorder.h>
+
+#include "quota_tree.h"
+
+MODULE_AUTHOR("Jan Kara");
+MODULE_DESCRIPTION("Quota trie support");
+MODULE_LICENSE("GPL");
+
+#define __QUOTA_QT_PARANOIA
+
+typedef char *dqbuf_t;
+
+static int get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth)
+{
+ unsigned int epb = info->dqi_usable_bs >> 2;
+
+ depth = info->dqi_qtree_depth - depth - 1;
+ while (depth--)
+ id /= epb;
+ return id % epb;
+}
+
+/* Number of entries in one blocks */
+static inline int qtree_dqstr_in_blk(struct qtree_mem_dqinfo *info)
+{
+ return (info->dqi_usable_bs - sizeof(struct qt_disk_dqdbheader))
+ / info->dqi_entry_size;
+}
+
+static dqbuf_t getdqbuf(size_t size)
+{
+ dqbuf_t buf = kmalloc(size, GFP_NOFS);
+ if (!buf)
+ printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n");
+ return buf;
+}
+
+static inline void freedqbuf(dqbuf_t buf)
+{
+ kfree(buf);
+}
+
+static inline ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, dqbuf_t buf)
+{
+ struct super_block *sb = info->dqi_sb;
+
+ memset(buf, 0, info->dqi_usable_bs);
+ return sb->s_op->quota_read(sb, info->dqi_type, (char *)buf,
+ info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+}
+
+static inline ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, dqbuf_t buf)
+{
+ struct super_block *sb = info->dqi_sb;
+
+ return sb->s_op->quota_write(sb, info->dqi_type, (char *)buf,
+ info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+}
+
+/* Remove empty block from list and return it */
+static int get_free_dqblk(struct qtree_mem_dqinfo *info)
+{
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+ int ret, blk;
+
+ if (!buf)
+ return -ENOMEM;
+ if (info->dqi_free_blk) {
+ blk = info->dqi_free_blk;
+ ret = read_blk(info, blk, buf);
+ if (ret < 0)
+ goto out_buf;
+ info->dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
+ }
+ else {
+ memset(buf, 0, info->dqi_usable_bs);
+ /* Assure block allocation... */
+ ret = write_blk(info, info->dqi_blocks, buf);
+ if (ret < 0)
+ goto out_buf;
+ blk = info->dqi_blocks++;
+ }
+ mark_info_dirty(info->dqi_sb, info->dqi_type);
+ ret = blk;
+out_buf:
+ freedqbuf(buf);
+ return ret;
+}
+
+/* Insert empty block to the list */
+static int put_free_dqblk(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
+{
+ struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+ int err;
+
+ dh->dqdh_next_free = cpu_to_le32(info->dqi_free_blk);
+ dh->dqdh_prev_free = cpu_to_le32(0);
+ dh->dqdh_entries = cpu_to_le16(0);
+ err = write_blk(info, blk, buf);
+ if (err < 0)
+ return err;
+ info->dqi_free_blk = blk;
+ mark_info_dirty(info->dqi_sb, info->dqi_type);
+ return 0;
+}
+
+/* Remove given block from the list of blocks with free entries */
+static int remove_free_dqentry(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
+{
+ dqbuf_t tmpbuf = getdqbuf(info->dqi_usable_bs);
+ struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+ uint nextblk = le32_to_cpu(dh->dqdh_next_free);
+ uint prevblk = le32_to_cpu(dh->dqdh_prev_free);
+ int err;
+
+ if (!tmpbuf)
+ return -ENOMEM;
+ if (nextblk) {
+ err = read_blk(info, nextblk, tmpbuf);
+ if (err < 0)
+ goto out_buf;
+ ((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
+ dh->dqdh_prev_free;
+ err = write_blk(info, nextblk, tmpbuf);
+ if (err < 0)
+ goto out_buf;
+ }
+ if (prevblk) {
+ err = read_blk(info, prevblk, tmpbuf);
+ if (err < 0)
+ goto out_buf;
+ ((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_next_free =
+ dh->dqdh_next_free;
+ err = write_blk(info, prevblk, tmpbuf);
+ if (err < 0)
+ goto out_buf;
+ } else {
+ info->dqi_free_entry = nextblk;
+ mark_info_dirty(info->dqi_sb, info->dqi_type);
+ }
+ freedqbuf(tmpbuf);
+ dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
+ /* No matter whether write succeeds block is out of list */
+ if (write_blk(info, blk, buf) < 0)
+ printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
+ return 0;
+out_buf:
+ freedqbuf(tmpbuf);
+ return err;
+}
+
+/* Insert given block to the beginning of list with free entries */
+static int insert_free_dqentry(struct qtree_mem_dqinfo *info, dqbuf_t buf, uint blk)
+{
+ dqbuf_t tmpbuf = getdqbuf(info->dqi_usable_bs);
+ struct qt_disk_dqdbheader *dh = (struct qt_disk_dqdbheader *)buf;
+ int err;
+
+ if (!tmpbuf)
+ return -ENOMEM;
+ dh->dqdh_next_free = cpu_to_le32(info->dqi_free_entry);
+ dh->dqdh_prev_free = cpu_to_le32(0);
+ err = write_blk(info, blk, buf);
+ if (err < 0)
+ goto out_buf;
+ if (info->dqi_free_entry) {
+ err = read_blk(info, info->dqi_free_entry, tmpbuf);
+ if (err < 0)
+ goto out_buf;
+ ((struct qt_disk_dqdbheader *)tmpbuf)->dqdh_prev_free =
+ cpu_to_le32(blk);
+ err = write_blk(info, info->dqi_free_entry, tmpbuf);
+ if (err < 0)
+ goto out_buf;
+ }
+ freedqbuf(tmpbuf);
+ info->dqi_free_entry = blk;
+ mark_info_dirty(info->dqi_sb, info->dqi_type);
+ return 0;
+out_buf:
+ freedqbuf(tmpbuf);
+ return err;
+}
+
+/* Is the entry in the block free? */
+int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk)
+{
+ int i;
+
+ for (i = 0; i < info->dqi_entry_size; i++)
+ if (disk[i])
+ return 0;
+ return 1;
+}
+EXPORT_SYMBOL(qtree_entry_unused);
+
+/* Find space for dquot */
+static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
+ struct dquot *dquot, int *err)
+{
+ uint blk, i;
+ struct qt_disk_dqdbheader *dh;
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ char *ddquot;
+
+ *err = 0;
+ if (!buf) {
+ *err = -ENOMEM;
+ return 0;
+ }
+ dh = (struct qt_disk_dqdbheader *)buf;
+ if (info->dqi_free_entry) {
+ blk = info->dqi_free_entry;
+ *err = read_blk(info, blk, buf);
+ if (*err < 0)
+ goto out_buf;
+ } else {
+ blk = get_free_dqblk(info);
+ if ((int)blk < 0) {
+ *err = blk;
+ freedqbuf(buf);
+ return 0;
+ }
+ memset(buf, 0, info->dqi_usable_bs);
+ /* This is enough as block is already zeroed and entry list is empty... */
+ info->dqi_free_entry = blk;
+ mark_info_dirty(dquot->dq_sb, dquot->dq_type);
+ }
+ /* Block will be full? */
+ if (le16_to_cpu(dh->dqdh_entries) + 1 >= qtree_dqstr_in_blk(info)) {
+ *err = remove_free_dqentry(info, buf, blk);
+ if (*err < 0) {
+ printk(KERN_ERR "VFS: find_free_dqentry(): Can't "
+ "remove block (%u) from entry free list.\n",
+ blk);
+ goto out_buf;
+ }
+ }
+ le16_add_cpu(&dh->dqdh_entries, 1);
+ /* Find free structure in block */
+ for (i = 0, ddquot = ((char *)buf) + sizeof(struct qt_disk_dqdbheader);
+ i < qtree_dqstr_in_blk(info) && !qtree_entry_unused(info, ddquot);
+ i++, ddquot += info->dqi_entry_size);
+#ifdef __QUOTA_QT_PARANOIA
+ if (i == qtree_dqstr_in_blk(info)) {
+ printk(KERN_ERR "VFS: find_free_dqentry(): Data block full "
+ "but it shouldn't.\n");
+ *err = -EIO;
+ goto out_buf;
+ }
+#endif
+ *err = write_blk(info, blk, buf);
+ if (*err < 0) {
+ printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota "
+ "data block %u.\n", blk);
+ goto out_buf;
+ }
+ dquot->dq_off = (blk << info->dqi_blocksize_bits) +
+ sizeof(struct qt_disk_dqdbheader) +
+ i * info->dqi_entry_size;
+ freedqbuf(buf);
+ return blk;
+out_buf:
+ freedqbuf(buf);
+ return 0;
+}
+
+/* Insert reference to structure into the trie */
+static int do_insert_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ uint *treeblk, int depth)
+{
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ int ret = 0, newson = 0, newact = 0;
+ __le32 *ref;
+ uint newblk;
+
+ if (!buf)
+ return -ENOMEM;
+ if (!*treeblk) {
+ ret = get_free_dqblk(info);
+ if (ret < 0)
+ goto out_buf;
+ *treeblk = ret;
+ memset(buf, 0, info->dqi_usable_bs);
+ newact = 1;
+ } else {
+ ret = read_blk(info, *treeblk, buf);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't read tree quota block "
+ "%u.\n", *treeblk);
+ goto out_buf;
+ }
+ }
+ ref = (__le32 *)buf;
+ newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+ if (!newblk)
+ newson = 1;
+ if (depth == info->dqi_qtree_depth - 1) {
+#ifdef __QUOTA_QT_PARANOIA
+ if (newblk) {
+ printk(KERN_ERR "VFS: Inserting already present quota "
+ "entry (block %u).\n",
+ le32_to_cpu(ref[get_index(info,
+ dquot->dq_id, depth)]));
+ ret = -EIO;
+ goto out_buf;
+ }
+#endif
+ newblk = find_free_dqentry(info, dquot, &ret);
+ } else {
+ ret = do_insert_tree(info, dquot, &newblk, depth+1);
+ }
+ if (newson && ret >= 0) {
+ ref[get_index(info, dquot->dq_id, depth)] =
+ cpu_to_le32(newblk);
+ ret = write_blk(info, *treeblk, buf);
+ } else if (newact && ret < 0) {
+ put_free_dqblk(info, buf, *treeblk);
+ }
+out_buf:
+ freedqbuf(buf);
+ return ret;
+}
+
+/* Wrapper for inserting quota structure into tree */
+static inline int dq_insert_tree(struct qtree_mem_dqinfo *info,
+ struct dquot *dquot)
+{
+ int tmp = QT_TREEOFF;
+ return do_insert_tree(info, dquot, &tmp, 0);
+}
+
+/*
+ * We don't have to be afraid of deadlocks as we never have quotas on quota files...
+ */
+int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+ int type = dquot->dq_type;
+ struct super_block *sb = dquot->dq_sb;
+ ssize_t ret;
+ dqbuf_t ddquot = getdqbuf(info->dqi_entry_size);
+
+ if (!ddquot)
+ return -ENOMEM;
+
+ /* dq_off is guarded by dqio_mutex */
+ if (!dquot->dq_off) {
+ ret = dq_insert_tree(info, dquot);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Error %zd occurred while "
+ "creating quota.\n", ret);
+ freedqbuf(ddquot);
+ return ret;
+ }
+ }
+ spin_lock(&dq_data_lock);
+ info->dqi_ops->mem2disk_dqblk(ddquot, dquot);
+ spin_unlock(&dq_data_lock);
+ ret = sb->s_op->quota_write(sb, type, (char *)ddquot,
+ info->dqi_entry_size, dquot->dq_off);
+ if (ret != info->dqi_entry_size) {
+ printk(KERN_WARNING "VFS: dquota write failed on dev %s\n",
+ sb->s_id);
+ if (ret >= 0)
+ ret = -ENOSPC;
+ } else {
+ ret = 0;
+ }
+ dqstats.writes++;
+ freedqbuf(ddquot);
+
+ return ret;
+}
+EXPORT_SYMBOL(qtree_write_dquot);
+
+/* Free dquot entry in data block */
+static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ uint blk)
+{
+ struct qt_disk_dqdbheader *dh;
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ int ret = 0;
+
+ if (!buf)
+ return -ENOMEM;
+ if (dquot->dq_off >> info->dqi_blocksize_bits != blk) {
+ printk(KERN_ERR "VFS: Quota structure has offset to other "
+ "block (%u) than it should (%u).\n", blk,
+ (uint)(dquot->dq_off >> info->dqi_blocksize_bits));
+ goto out_buf;
+ }
+ ret = read_blk(info, blk, buf);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
+ goto out_buf;
+ }
+ dh = (struct qt_disk_dqdbheader *)buf;
+ le16_add_cpu(&dh->dqdh_entries, -1);
+ if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
+ ret = remove_free_dqentry(info, buf, blk);
+ if (ret >= 0)
+ ret = put_free_dqblk(info, buf, blk);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't move quota data block (%u) "
+ "to free list.\n", blk);
+ goto out_buf;
+ }
+ } else {
+ memset(buf +
+ (dquot->dq_off & ((1 << info->dqi_blocksize_bits) - 1)),
+ 0, info->dqi_entry_size);
+ if (le16_to_cpu(dh->dqdh_entries) ==
+ qtree_dqstr_in_blk(info) - 1) {
+ /* Insert will write block itself */
+ ret = insert_free_dqentry(info, buf, blk);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't insert quota data "
+ "block (%u) to free entry list.\n", blk);
+ goto out_buf;
+ }
+ } else {
+ ret = write_blk(info, blk, buf);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't write quota data "
+ "block %u\n", blk);
+ goto out_buf;
+ }
+ }
+ }
+ dquot->dq_off = 0; /* Quota is now unattached */
+out_buf:
+ freedqbuf(buf);
+ return ret;
+}
+
+/* Remove reference to dquot from tree */
+static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot,
+ uint *blk, int depth)
+{
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ int ret = 0;
+ uint newblk;
+ __le32 *ref = (__le32 *)buf;
+
+ if (!buf)
+ return -ENOMEM;
+ ret = read_blk(info, *blk, buf);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
+ goto out_buf;
+ }
+ newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+ if (depth == info->dqi_qtree_depth - 1) {
+ ret = free_dqentry(info, dquot, newblk);
+ newblk = 0;
+ } else {
+ ret = remove_tree(info, dquot, &newblk, depth+1);
+ }
+ if (ret >= 0 && !newblk) {
+ int i;
+ ref[get_index(info, dquot->dq_id, depth)] = cpu_to_le32(0);
+ /* Block got empty? */
+ for (i = 0;
+ i < (info->dqi_usable_bs >> 2) && !ref[i];
+ i++);
+ /* Don't put the root block into the free block list */
+ if (i == (info->dqi_usable_bs >> 2)
+ && *blk != QT_TREEOFF) {
+ put_free_dqblk(info, buf, *blk);
+ *blk = 0;
+ } else {
+ ret = write_blk(info, *blk, buf);
+ if (ret < 0)
+ printk(KERN_ERR "VFS: Can't write quota tree "
+ "block %u.\n", *blk);
+ }
+ }
+out_buf:
+ freedqbuf(buf);
+ return ret;
+}
+
+/* Delete dquot from tree */
+int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+ uint tmp = QT_TREEOFF;
+
+ if (!dquot->dq_off) /* Even not allocated? */
+ return 0;
+ return remove_tree(info, dquot, &tmp, 0);
+}
+EXPORT_SYMBOL(qtree_delete_dquot);
+
+/* Find entry in block */
+static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
+ struct dquot *dquot, uint blk)
+{
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ loff_t ret = 0;
+ int i;
+ char *ddquot;
+
+ if (!buf)
+ return -ENOMEM;
+ ret = read_blk(info, blk, buf);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+ goto out_buf;
+ }
+ for (i = 0, ddquot = ((char *)buf) + sizeof(struct qt_disk_dqdbheader);
+ i < qtree_dqstr_in_blk(info) && !info->dqi_ops->is_id(ddquot, dquot);
+ i++, ddquot += info->dqi_entry_size);
+ if (i == qtree_dqstr_in_blk(info)) {
+ printk(KERN_ERR "VFS: Quota for id %u referenced "
+ "but not present.\n", dquot->dq_id);
+ ret = -EIO;
+ goto out_buf;
+ } else {
+ ret = (blk << info->dqi_blocksize_bits) + sizeof(struct
+ qt_disk_dqdbheader) + i * info->dqi_entry_size;
+ }
+out_buf:
+ freedqbuf(buf);
+ return ret;
+}
+
+/* Find entry for given id in the tree */
+static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info,
+ struct dquot *dquot, uint blk, int depth)
+{
+ dqbuf_t buf = getdqbuf(info->dqi_usable_bs);
+ loff_t ret = 0;
+ __le32 *ref = (__le32 *)buf;
+
+ if (!buf)
+ return -ENOMEM;
+ ret = read_blk(info, blk, buf);
+ if (ret < 0) {
+ printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
+ goto out_buf;
+ }
+ ret = 0;
+ blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]);
+ if (!blk) /* No reference? */
+ goto out_buf;
+ if (depth < info->dqi_qtree_depth - 1)
+ ret = find_tree_dqentry(info, dquot, blk, depth+1);
+ else
+ ret = find_block_dqentry(info, dquot, blk);
+out_buf:
+ freedqbuf(buf);
+ return ret;
+}
+
+/* Find entry for given id in the tree - wrapper function */
+static inline loff_t find_dqentry(struct qtree_mem_dqinfo *info,
+ struct dquot *dquot)
+{
+ return find_tree_dqentry(info, dquot, QT_TREEOFF, 0);
+}
+
+int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+ int type = dquot->dq_type;
+ struct super_block *sb = dquot->dq_sb;
+ loff_t offset;
+ dqbuf_t ddquot;
+ int ret = 0;
+
+#ifdef __QUOTA_QT_PARANOIA
+ /* Invalidated quota? */
+ if (!sb_dqopt(dquot->dq_sb)->files[type]) {
+ printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
+ return -EIO;
+ }
+#endif
+ /* Do we know offset of the dquot entry in the quota file? */
+ if (!dquot->dq_off) {
+ offset = find_dqentry(info, dquot);
+ if (offset <= 0) { /* Entry not present? */
+ if (offset < 0)
+ printk(KERN_ERR "VFS: Can't read quota "
+ "structure for id %u.\n", dquot->dq_id);
+ dquot->dq_off = 0;
+ set_bit(DQ_FAKE_B, &dquot->dq_flags);
+ memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
+ ret = offset;
+ goto out;
+ }
+ dquot->dq_off = offset;
+ }
+ ddquot = getdqbuf(info->dqi_entry_size);
+ if (!ddquot)
+ return -ENOMEM;
+ ret = sb->s_op->quota_read(sb, type, (char *)ddquot,
+ info->dqi_entry_size, dquot->dq_off);
+ if (ret != info->dqi_entry_size) {
+ if (ret >= 0)
+ ret = -EIO;
+ printk(KERN_ERR "VFS: Error while reading quota "
+ "structure for id %u.\n", dquot->dq_id);
+ set_bit(DQ_FAKE_B, &dquot->dq_flags);
+ memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
+ freedqbuf(ddquot);
+ goto out;
+ }
+ spin_lock(&dq_data_lock);
+ info->dqi_ops->disk2mem_dqblk(dquot, ddquot);
+ if (!dquot->dq_dqb.dqb_bhardlimit &&
+ !dquot->dq_dqb.dqb_bsoftlimit &&
+ !dquot->dq_dqb.dqb_ihardlimit &&
+ !dquot->dq_dqb.dqb_isoftlimit)
+ set_bit(DQ_FAKE_B, &dquot->dq_flags);
+ spin_unlock(&dq_data_lock);
+ freedqbuf(ddquot);
+out:
+ dqstats.reads++;
+ return ret;
+}
+EXPORT_SYMBOL(qtree_read_dquot);
+
+/* Check whether dquot should not be deleted. We know we are
+ * the only one operating on dquot (thanks to dq_lock) */
+int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
+{
+ if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace))
+ return qtree_delete_dquot(info, dquot);
+ return 0;
+}
+EXPORT_SYMBOL(qtree_release_dquot);
diff --git a/fs/quota_tree.h b/fs/quota_tree.h
new file mode 100644
index 000000000000..a1ab8db81a51
--- /dev/null
+++ b/fs/quota_tree.h
@@ -0,0 +1,25 @@
+/*
+ * Definitions of structures for vfsv0 quota format
+ */
+
+#ifndef _LINUX_QUOTA_TREE_H
+#define _LINUX_QUOTA_TREE_H
+
+#include <linux/types.h>
+#include <linux/quota.h>
+
+/*
+ * Structure of header of block with quota structures. It is padded to 16 bytes so
+ * there will be space for exactly 21 quota-entries in a block
+ */
+struct qt_disk_dqdbheader {
+ __le32 dqdh_next_free; /* Number of next block with free entry */
+ __le32 dqdh_prev_free; /* Number of previous block with free entry */
+ __le16 dqdh_entries; /* Number of valid entries in block */
+ __le16 dqdh_pad1;
+ __le32 dqdh_pad2;
+};
+
+#define QT_TREEOFF 1 /* Offset of tree in file in blocks */
+
+#endif /* _LINUX_QUOTAIO_TREE_H */
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index 5ae15b13eeb0..b4af1c69ad16 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -3,25 +3,39 @@
#include <linux/quota.h>
#include <linux/quotaops.h>
#include <linux/dqblk_v1.h>
-#include <linux/quotaio_v1.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <asm/byteorder.h>
+#include "quotaio_v1.h"
+
MODULE_AUTHOR("Jan Kara");
MODULE_DESCRIPTION("Old quota format support");
MODULE_LICENSE("GPL");
+#define QUOTABLOCK_BITS 10
+#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
+
+static inline qsize_t v1_stoqb(qsize_t space)
+{
+ return (space + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS;
+}
+
+static inline qsize_t v1_qbtos(qsize_t blocks)
+{
+ return blocks << QUOTABLOCK_BITS;
+}
+
static void v1_disk2mem_dqblk(struct mem_dqblk *m, struct v1_disk_dqblk *d)
{
m->dqb_ihardlimit = d->dqb_ihardlimit;
m->dqb_isoftlimit = d->dqb_isoftlimit;
m->dqb_curinodes = d->dqb_curinodes;
- m->dqb_bhardlimit = d->dqb_bhardlimit;
- m->dqb_bsoftlimit = d->dqb_bsoftlimit;
- m->dqb_curspace = ((qsize_t)d->dqb_curblocks) << QUOTABLOCK_BITS;
+ m->dqb_bhardlimit = v1_qbtos(d->dqb_bhardlimit);
+ m->dqb_bsoftlimit = v1_qbtos(d->dqb_bsoftlimit);
+ m->dqb_curspace = v1_qbtos(d->dqb_curblocks);
m->dqb_itime = d->dqb_itime;
m->dqb_btime = d->dqb_btime;
}
@@ -31,9 +45,9 @@ static void v1_mem2disk_dqblk(struct v1_disk_dqblk *d, struct mem_dqblk *m)
d->dqb_ihardlimit = m->dqb_ihardlimit;
d->dqb_isoftlimit = m->dqb_isoftlimit;
d->dqb_curinodes = m->dqb_curinodes;
- d->dqb_bhardlimit = m->dqb_bhardlimit;
- d->dqb_bsoftlimit = m->dqb_bsoftlimit;
- d->dqb_curblocks = toqb(m->dqb_curspace);
+ d->dqb_bhardlimit = v1_stoqb(m->dqb_bhardlimit);
+ d->dqb_bsoftlimit = v1_stoqb(m->dqb_bsoftlimit);
+ d->dqb_curblocks = v1_stoqb(m->dqb_curspace);
d->dqb_itime = m->dqb_itime;
d->dqb_btime = m->dqb_btime;
}
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index b53827dc02d9..b618b563635c 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -6,7 +6,6 @@
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/dqblk_v2.h>
-#include <linux/quotaio_v2.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -15,16 +14,37 @@
#include <asm/byteorder.h>
+#include "quota_tree.h"
+#include "quotaio_v2.h"
+
MODULE_AUTHOR("Jan Kara");
MODULE_DESCRIPTION("Quota format v2 support");
MODULE_LICENSE("GPL");
#define __QUOTA_V2_PARANOIA
-typedef char *dqbuf_t;
+static void v2_mem2diskdqb(void *dp, struct dquot *dquot);
+static void v2_disk2memdqb(struct dquot *dquot, void *dp);
+static int v2_is_id(void *dp, struct dquot *dquot);
+
+static struct qtree_fmt_operations v2_qtree_ops = {
+ .mem2disk_dqblk = v2_mem2diskdqb,
+ .disk2mem_dqblk = v2_disk2memdqb,
+ .is_id = v2_is_id,
+};
+
+#define QUOTABLOCK_BITS 10
+#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
-#define GETIDINDEX(id, depth) (((id) >> ((V2_DQTREEDEPTH-(depth)-1)*8)) & 0xff)
-#define GETENTRIES(buf) ((struct v2_disk_dqblk *)(((char *)buf)+sizeof(struct v2_disk_dqdbheader)))
+static inline qsize_t v2_stoqb(qsize_t space)
+{
+ return (space + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS;
+}
+
+static inline qsize_t v2_qbtos(qsize_t blocks)
+{
+ return blocks << QUOTABLOCK_BITS;
+}
/* Check whether given file is really vfsv0 quotafile */
static int v2_check_quota_file(struct super_block *sb, int type)
@@ -50,7 +70,8 @@ static int v2_check_quota_file(struct super_block *sb, int type)
static int v2_read_file_info(struct super_block *sb, int type)
{
struct v2_disk_dqinfo dinfo;
- struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct qtree_mem_dqinfo *qinfo;
ssize_t size;
size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
@@ -60,15 +81,29 @@ static int v2_read_file_info(struct super_block *sb, int type)
sb->s_id);
return -1;
}
+ info->dqi_priv = kmalloc(sizeof(struct qtree_mem_dqinfo), GFP_NOFS);
+ if (!info->dqi_priv) {
+ printk(KERN_WARNING
+ "Not enough memory for quota information structure.\n");
+ return -1;
+ }
+ qinfo = info->dqi_priv;
/* limits are stored as unsigned 32-bit data */
info->dqi_maxblimit = 0xffffffff;
info->dqi_maxilimit = 0xffffffff;
info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
info->dqi_flags = le32_to_cpu(dinfo.dqi_flags);
- info->u.v2_i.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
- info->u.v2_i.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
- info->u.v2_i.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+ qinfo->dqi_sb = sb;
+ qinfo->dqi_type = type;
+ qinfo->dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
+ qinfo->dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
+ qinfo->dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
+ qinfo->dqi_blocksize_bits = V2_DQBLKSIZE_BITS;
+ qinfo->dqi_usable_bs = 1 << V2_DQBLKSIZE_BITS;
+ qinfo->dqi_qtree_depth = qtree_depth(qinfo);
+ qinfo->dqi_entry_size = sizeof(struct v2_disk_dqblk);
+ qinfo->dqi_ops = &v2_qtree_ops;
return 0;
}
@@ -76,7 +111,8 @@ static int v2_read_file_info(struct super_block *sb, int type)
static int v2_write_file_info(struct super_block *sb, int type)
{
struct v2_disk_dqinfo dinfo;
- struct mem_dqinfo *info = sb_dqopt(sb)->info+type;
+ struct mem_dqinfo *info = sb_dqinfo(sb, type);
+ struct qtree_mem_dqinfo *qinfo = info->dqi_priv;
ssize_t size;
spin_lock(&dq_data_lock);
@@ -85,9 +121,9 @@ static int v2_write_file_info(struct super_block *sb, int type)
dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace);
dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK);
spin_unlock(&dq_data_lock);
- dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks);
- dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk);
- dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry);
+ dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks);
+ dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk);
+ dinfo.dqi_free_entry = cpu_to_le32(qinfo->dqi_free_entry);
size = sb->s_op->quota_write(sb, type, (char *)&dinfo,
sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
if (size != sizeof(struct v2_disk_dqinfo)) {
@@ -98,574 +134,75 @@ static int v2_write_file_info(struct super_block *sb, int type)
return 0;
}
-static void disk2memdqb(struct mem_dqblk *m, struct v2_disk_dqblk *d)
+static void v2_disk2memdqb(struct dquot *dquot, void *dp)
{
+ struct v2_disk_dqblk *d = dp, empty;
+ struct mem_dqblk *m = &dquot->dq_dqb;
+
m->dqb_ihardlimit = le32_to_cpu(d->dqb_ihardlimit);
m->dqb_isoftlimit = le32_to_cpu(d->dqb_isoftlimit);
m->dqb_curinodes = le32_to_cpu(d->dqb_curinodes);
m->dqb_itime = le64_to_cpu(d->dqb_itime);
- m->dqb_bhardlimit = le32_to_cpu(d->dqb_bhardlimit);
- m->dqb_bsoftlimit = le32_to_cpu(d->dqb_bsoftlimit);
+ m->dqb_bhardlimit = v2_qbtos(le32_to_cpu(d->dqb_bhardlimit));
+ m->dqb_bsoftlimit = v2_qbtos(le32_to_cpu(d->dqb_bsoftlimit));
m->dqb_curspace = le64_to_cpu(d->dqb_curspace);
m->dqb_btime = le64_to_cpu(d->dqb_btime);
+ /* We need to escape back all-zero structure */
+ memset(&empty, 0, sizeof(struct v2_disk_dqblk));
+ empty.dqb_itime = cpu_to_le64(1);
+ if (!memcmp(&empty, dp, sizeof(struct v2_disk_dqblk)))
+ m->dqb_itime = 0;
}
-static void mem2diskdqb(struct v2_disk_dqblk *d, struct mem_dqblk *m, qid_t id)
+static void v2_mem2diskdqb(void *dp, struct dquot *dquot)
{
+ struct v2_disk_dqblk *d = dp;
+ struct mem_dqblk *m = &dquot->dq_dqb;
+ struct qtree_mem_dqinfo *info =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
+
d->dqb_ihardlimit = cpu_to_le32(m->dqb_ihardlimit);
d->dqb_isoftlimit = cpu_to_le32(m->dqb_isoftlimit);
d->dqb_curinodes = cpu_to_le32(m->dqb_curinodes);
d->dqb_itime = cpu_to_le64(m->dqb_itime);
- d->dqb_bhardlimit = cpu_to_le32(m->dqb_bhardlimit);
- d->dqb_bsoftlimit = cpu_to_le32(m->dqb_bsoftlimit);
+ d->dqb_bhardlimit = cpu_to_le32(v2_stoqb(m->dqb_bhardlimit));
+ d->dqb_bsoftlimit = cpu_to_le32(v2_stoqb(m->dqb_bsoftlimit));
d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
d->dqb_btime = cpu_to_le64(m->dqb_btime);
- d->dqb_id = cpu_to_le32(id);
-}
-
-static dqbuf_t getdqbuf(void)
-{
- dqbuf_t buf = kmalloc(V2_DQBLKSIZE, GFP_NOFS);
- if (!buf)
- printk(KERN_WARNING "VFS: Not enough memory for quota buffers.\n");
- return buf;
-}
-
-static inline void freedqbuf(dqbuf_t buf)
-{
- kfree(buf);
-}
-
-static inline ssize_t read_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
-{
- memset(buf, 0, V2_DQBLKSIZE);
- return sb->s_op->quota_read(sb, type, (char *)buf,
- V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
-}
-
-static inline ssize_t write_blk(struct super_block *sb, int type, uint blk, dqbuf_t buf)
-{
- return sb->s_op->quota_write(sb, type, (char *)buf,
- V2_DQBLKSIZE, blk << V2_DQBLKSIZE_BITS);
-}
-
-/* Remove empty block from list and return it */
-static int get_free_dqblk(struct super_block *sb, int type)
-{
- dqbuf_t buf = getdqbuf();
- struct mem_dqinfo *info = sb_dqinfo(sb, type);
- struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
- int ret, blk;
-
- if (!buf)
- return -ENOMEM;
- if (info->u.v2_i.dqi_free_blk) {
- blk = info->u.v2_i.dqi_free_blk;
- if ((ret = read_blk(sb, type, blk, buf)) < 0)
- goto out_buf;
- info->u.v2_i.dqi_free_blk = le32_to_cpu(dh->dqdh_next_free);
- }
- else {
- memset(buf, 0, V2_DQBLKSIZE);
- /* Assure block allocation... */
- if ((ret = write_blk(sb, type, info->u.v2_i.dqi_blocks, buf)) < 0)
- goto out_buf;
- blk = info->u.v2_i.dqi_blocks++;
- }
- mark_info_dirty(sb, type);
- ret = blk;
-out_buf:
- freedqbuf(buf);
- return ret;
-}
-
-/* Insert empty block to the list */
-static int put_free_dqblk(struct super_block *sb, int type, dqbuf_t buf, uint blk)
-{
- struct mem_dqinfo *info = sb_dqinfo(sb, type);
- struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
- int err;
-
- dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_blk);
- dh->dqdh_prev_free = cpu_to_le32(0);
- dh->dqdh_entries = cpu_to_le16(0);
- info->u.v2_i.dqi_free_blk = blk;
- mark_info_dirty(sb, type);
- /* Some strange block. We had better leave it... */
- if ((err = write_blk(sb, type, blk, buf)) < 0)
- return err;
- return 0;
+ d->dqb_id = cpu_to_le32(dquot->dq_id);
+ if (qtree_entry_unused(info, dp))
+ d->dqb_itime = cpu_to_le64(1);
}
-/* Remove given block from the list of blocks with free entries */
-static int remove_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
+static int v2_is_id(void *dp, struct dquot *dquot)
{
- dqbuf_t tmpbuf = getdqbuf();
- struct mem_dqinfo *info = sb_dqinfo(sb, type);
- struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
- uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free);
- int err;
+ struct v2_disk_dqblk *d = dp;
+ struct qtree_mem_dqinfo *info =
+ sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
- if (!tmpbuf)
- return -ENOMEM;
- if (nextblk) {
- if ((err = read_blk(sb, type, nextblk, tmpbuf)) < 0)
- goto out_buf;
- ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = dh->dqdh_prev_free;
- if ((err = write_blk(sb, type, nextblk, tmpbuf)) < 0)
- goto out_buf;
- }
- if (prevblk) {
- if ((err = read_blk(sb, type, prevblk, tmpbuf)) < 0)
- goto out_buf;
- ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_next_free = dh->dqdh_next_free;
- if ((err = write_blk(sb, type, prevblk, tmpbuf)) < 0)
- goto out_buf;
- }
- else {
- info->u.v2_i.dqi_free_entry = nextblk;
- mark_info_dirty(sb, type);
- }
- freedqbuf(tmpbuf);
- dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0);
- /* No matter whether write succeeds block is out of list */
- if (write_blk(sb, type, blk, buf) < 0)
- printk(KERN_ERR "VFS: Can't write block (%u) with free entries.\n", blk);
- return 0;
-out_buf:
- freedqbuf(tmpbuf);
- return err;
-}
-
-/* Insert given block to the beginning of list with free entries */
-static int insert_free_dqentry(struct super_block *sb, int type, dqbuf_t buf, uint blk)
-{
- dqbuf_t tmpbuf = getdqbuf();
- struct mem_dqinfo *info = sb_dqinfo(sb, type);
- struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf;
- int err;
-
- if (!tmpbuf)
- return -ENOMEM;
- dh->dqdh_next_free = cpu_to_le32(info->u.v2_i.dqi_free_entry);
- dh->dqdh_prev_free = cpu_to_le32(0);
- if ((err = write_blk(sb, type, blk, buf)) < 0)
- goto out_buf;
- if (info->u.v2_i.dqi_free_entry) {
- if ((err = read_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
- goto out_buf;
- ((struct v2_disk_dqdbheader *)tmpbuf)->dqdh_prev_free = cpu_to_le32(blk);
- if ((err = write_blk(sb, type, info->u.v2_i.dqi_free_entry, tmpbuf)) < 0)
- goto out_buf;
- }
- freedqbuf(tmpbuf);
- info->u.v2_i.dqi_free_entry = blk;
- mark_info_dirty(sb, type);
- return 0;
-out_buf:
- freedqbuf(tmpbuf);
- return err;
-}
-
-/* Find space for dquot */
-static uint find_free_dqentry(struct dquot *dquot, int *err)
-{
- struct super_block *sb = dquot->dq_sb;
- struct mem_dqinfo *info = sb_dqopt(sb)->info+dquot->dq_type;
- uint blk, i;
- struct v2_disk_dqdbheader *dh;
- struct v2_disk_dqblk *ddquot;
- struct v2_disk_dqblk fakedquot;
- dqbuf_t buf;
-
- *err = 0;
- if (!(buf = getdqbuf())) {
- *err = -ENOMEM;
+ if (qtree_entry_unused(info, dp))
return 0;
- }
- dh = (struct v2_disk_dqdbheader *)buf;
- ddquot = GETENTRIES(buf);
- if (info->u.v2_i.dqi_free_entry) {
- blk = info->u.v2_i.dqi_free_entry;
- if ((*err = read_blk(sb, dquot->dq_type, blk, buf)) < 0)
- goto out_buf;
- }
- else {
- blk = get_free_dqblk(sb, dquot->dq_type);
- if ((int)blk < 0) {
- *err = blk;
- freedqbuf(buf);
- return 0;
- }
- memset(buf, 0, V2_DQBLKSIZE);
- /* This is enough as block is already zeroed and entry list is empty... */
- info->u.v2_i.dqi_free_entry = blk;
- mark_info_dirty(sb, dquot->dq_type);
- }
- if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK) /* Block will be full? */
- if ((*err = remove_free_dqentry(sb, dquot->dq_type, buf, blk)) < 0) {
- printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
- goto out_buf;
- }
- le16_add_cpu(&dh->dqdh_entries, 1);
- memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
- /* Find free structure in block */
- for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
-#ifdef __QUOTA_V2_PARANOIA
- if (i == V2_DQSTRINBLK) {
- printk(KERN_ERR "VFS: find_free_dqentry(): Data block full but it shouldn't.\n");
- *err = -EIO;
- goto out_buf;
- }
-#endif
- if ((*err = write_blk(sb, dquot->dq_type, blk, buf)) < 0) {
- printk(KERN_ERR "VFS: find_free_dqentry(): Can't write quota data block %u.\n", blk);
- goto out_buf;
- }
- dquot->dq_off = (blk<<V2_DQBLKSIZE_BITS)+sizeof(struct v2_disk_dqdbheader)+i*sizeof(struct v2_disk_dqblk);
- freedqbuf(buf);
- return blk;
-out_buf:
- freedqbuf(buf);
- return 0;
-}
-
-/* Insert reference to structure into the trie */
-static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth)
-{
- struct super_block *sb = dquot->dq_sb;
- dqbuf_t buf;
- int ret = 0, newson = 0, newact = 0;
- __le32 *ref;
- uint newblk;
-
- if (!(buf = getdqbuf()))
- return -ENOMEM;
- if (!*treeblk) {
- ret = get_free_dqblk(sb, dquot->dq_type);
- if (ret < 0)
- goto out_buf;
- *treeblk = ret;
- memset(buf, 0, V2_DQBLKSIZE);
- newact = 1;
- }
- else {
- if ((ret = read_blk(sb, dquot->dq_type, *treeblk, buf)) < 0) {
- printk(KERN_ERR "VFS: Can't read tree quota block %u.\n", *treeblk);
- goto out_buf;
- }
- }
- ref = (__le32 *)buf;
- newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
- if (!newblk)
- newson = 1;
- if (depth == V2_DQTREEDEPTH-1) {
-#ifdef __QUOTA_V2_PARANOIA
- if (newblk) {
- printk(KERN_ERR "VFS: Inserting already present quota entry (block %u).\n", le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]));
- ret = -EIO;
- goto out_buf;
- }
-#endif
- newblk = find_free_dqentry(dquot, &ret);
- }
- else
- ret = do_insert_tree(dquot, &newblk, depth+1);
- if (newson && ret >= 0) {
- ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(newblk);
- ret = write_blk(sb, dquot->dq_type, *treeblk, buf);
- }
- else if (newact && ret < 0)
- put_free_dqblk(sb, dquot->dq_type, buf, *treeblk);
-out_buf:
- freedqbuf(buf);
- return ret;
+ return le32_to_cpu(d->dqb_id) == dquot->dq_id;
}
-/* Wrapper for inserting quota structure into tree */
-static inline int dq_insert_tree(struct dquot *dquot)
+static int v2_read_dquot(struct dquot *dquot)
{
- int tmp = V2_DQTREEOFF;
- return do_insert_tree(dquot, &tmp, 0);
+ return qtree_read_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
}
-/*
- * We don't have to be afraid of deadlocks as we never have quotas on quota files...
- */
static int v2_write_dquot(struct dquot *dquot)
{
- int type = dquot->dq_type;
- ssize_t ret;
- struct v2_disk_dqblk ddquot, empty;
-
- /* dq_off is guarded by dqio_mutex */
- if (!dquot->dq_off)
- if ((ret = dq_insert_tree(dquot)) < 0) {
- printk(KERN_ERR "VFS: Error %zd occurred while creating quota.\n", ret);
- return ret;
- }
- spin_lock(&dq_data_lock);
- mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id);
- /* Argh... We may need to write structure full of zeroes but that would be
- * treated as an empty place by the rest of the code. Format change would
- * be definitely cleaner but the problems probably are not worth it */
- memset(&empty, 0, sizeof(struct v2_disk_dqblk));
- if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
- ddquot.dqb_itime = cpu_to_le64(1);
- spin_unlock(&dq_data_lock);
- ret = dquot->dq_sb->s_op->quota_write(dquot->dq_sb, type,
- (char *)&ddquot, sizeof(struct v2_disk_dqblk), dquot->dq_off);
- if (ret != sizeof(struct v2_disk_dqblk)) {
- printk(KERN_WARNING "VFS: dquota write failed on dev %s\n", dquot->dq_sb->s_id);
- if (ret >= 0)
- ret = -ENOSPC;
- }
- else
- ret = 0;
- dqstats.writes++;
-
- return ret;
+ return qtree_write_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
}
-/* Free dquot entry in data block */
-static int free_dqentry(struct dquot *dquot, uint blk)
-{
- struct super_block *sb = dquot->dq_sb;
- int type = dquot->dq_type;
- struct v2_disk_dqdbheader *dh;
- dqbuf_t buf = getdqbuf();
- int ret = 0;
-
- if (!buf)
- return -ENOMEM;
- if (dquot->dq_off >> V2_DQBLKSIZE_BITS != blk) {
- printk(KERN_ERR "VFS: Quota structure has offset to other "
- "block (%u) than it should (%u).\n", blk,
- (uint)(dquot->dq_off >> V2_DQBLKSIZE_BITS));
- goto out_buf;
- }
- if ((ret = read_blk(sb, type, blk, buf)) < 0) {
- printk(KERN_ERR "VFS: Can't read quota data block %u\n", blk);
- goto out_buf;
- }
- dh = (struct v2_disk_dqdbheader *)buf;
- le16_add_cpu(&dh->dqdh_entries, -1);
- if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
- if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
- (ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
- printk(KERN_ERR "VFS: Can't move quota data block (%u) "
- "to free list.\n", blk);
- goto out_buf;
- }
- }
- else {
- memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0,
- sizeof(struct v2_disk_dqblk));
- if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) {
- /* Insert will write block itself */
- if ((ret = insert_free_dqentry(sb, type, buf, blk)) < 0) {
- printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk);
- goto out_buf;
- }
- }
- else
- if ((ret = write_blk(sb, type, blk, buf)) < 0) {
- printk(KERN_ERR "VFS: Can't write quota data "
- "block %u\n", blk);
- goto out_buf;
- }
- }
- dquot->dq_off = 0; /* Quota is now unattached */
-out_buf:
- freedqbuf(buf);
- return ret;
-}
-
-/* Remove reference to dquot from tree */
-static int remove_tree(struct dquot *dquot, uint *blk, int depth)
-{
- struct super_block *sb = dquot->dq_sb;
- int type = dquot->dq_type;
- dqbuf_t buf = getdqbuf();
- int ret = 0;
- uint newblk;
- __le32 *ref = (__le32 *)buf;
-
- if (!buf)
- return -ENOMEM;
- if ((ret = read_blk(sb, type, *blk, buf)) < 0) {
- printk(KERN_ERR "VFS: Can't read quota data block %u\n", *blk);
- goto out_buf;
- }
- newblk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
- if (depth == V2_DQTREEDEPTH-1) {
- ret = free_dqentry(dquot, newblk);
- newblk = 0;
- }
- else
- ret = remove_tree(dquot, &newblk, depth+1);
- if (ret >= 0 && !newblk) {
- int i;
- ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0);
- for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++); /* Block got empty? */
- /* Don't put the root block into the free block list */
- if (i == V2_DQBLKSIZE && *blk != V2_DQTREEOFF) {
- put_free_dqblk(sb, type, buf, *blk);
- *blk = 0;
- }
- else
- if ((ret = write_blk(sb, type, *blk, buf)) < 0)
- printk(KERN_ERR "VFS: Can't write quota tree "
- "block %u.\n", *blk);
- }
-out_buf:
- freedqbuf(buf);
- return ret;
-}
-
-/* Delete dquot from tree */
-static int v2_delete_dquot(struct dquot *dquot)
-{
- uint tmp = V2_DQTREEOFF;
-
- if (!dquot->dq_off) /* Even not allocated? */
- return 0;
- return remove_tree(dquot, &tmp, 0);
-}
-
-/* Find entry in block */
-static loff_t find_block_dqentry(struct dquot *dquot, uint blk)
-{
- dqbuf_t buf = getdqbuf();
- loff_t ret = 0;
- int i;
- struct v2_disk_dqblk *ddquot = GETENTRIES(buf);
-
- if (!buf)
- return -ENOMEM;
- if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
- printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
- goto out_buf;
- }
- if (dquot->dq_id)
- for (i = 0; i < V2_DQSTRINBLK &&
- le32_to_cpu(ddquot[i].dqb_id) != dquot->dq_id; i++);
- else { /* ID 0 as a bit more complicated searching... */
- struct v2_disk_dqblk fakedquot;
-
- memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
- for (i = 0; i < V2_DQSTRINBLK; i++)
- if (!le32_to_cpu(ddquot[i].dqb_id) &&
- memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)))
- break;
- }
- if (i == V2_DQSTRINBLK) {
- printk(KERN_ERR "VFS: Quota for id %u referenced "
- "but not present.\n", dquot->dq_id);
- ret = -EIO;
- goto out_buf;
- }
- else
- ret = (blk << V2_DQBLKSIZE_BITS) + sizeof(struct
- v2_disk_dqdbheader) + i * sizeof(struct v2_disk_dqblk);
-out_buf:
- freedqbuf(buf);
- return ret;
-}
-
-/* Find entry for given id in the tree */
-static loff_t find_tree_dqentry(struct dquot *dquot, uint blk, int depth)
-{
- dqbuf_t buf = getdqbuf();
- loff_t ret = 0;
- __le32 *ref = (__le32 *)buf;
-
- if (!buf)
- return -ENOMEM;
- if ((ret = read_blk(dquot->dq_sb, dquot->dq_type, blk, buf)) < 0) {
- printk(KERN_ERR "VFS: Can't read quota tree block %u.\n", blk);
- goto out_buf;
- }
- ret = 0;
- blk = le32_to_cpu(ref[GETIDINDEX(dquot->dq_id, depth)]);
- if (!blk) /* No reference? */
- goto out_buf;
- if (depth < V2_DQTREEDEPTH-1)
- ret = find_tree_dqentry(dquot, blk, depth+1);
- else
- ret = find_block_dqentry(dquot, blk);
-out_buf:
- freedqbuf(buf);
- return ret;
-}
-
-/* Find entry for given id in the tree - wrapper function */
-static inline loff_t find_dqentry(struct dquot *dquot)
-{
- return find_tree_dqentry(dquot, V2_DQTREEOFF, 0);
-}
-
-static int v2_read_dquot(struct dquot *dquot)
+static int v2_release_dquot(struct dquot *dquot)
{
- int type = dquot->dq_type;
- loff_t offset;
- struct v2_disk_dqblk ddquot, empty;
- int ret = 0;
-
-#ifdef __QUOTA_V2_PARANOIA
- /* Invalidated quota? */
- if (!dquot->dq_sb || !sb_dqopt(dquot->dq_sb)->files[type]) {
- printk(KERN_ERR "VFS: Quota invalidated while reading!\n");
- return -EIO;
- }
-#endif
- offset = find_dqentry(dquot);
- if (offset <= 0) { /* Entry not present? */
- if (offset < 0)
- printk(KERN_ERR "VFS: Can't read quota "
- "structure for id %u.\n", dquot->dq_id);
- dquot->dq_off = 0;
- set_bit(DQ_FAKE_B, &dquot->dq_flags);
- memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk));
- ret = offset;
- }
- else {
- dquot->dq_off = offset;
- if ((ret = dquot->dq_sb->s_op->quota_read(dquot->dq_sb, type,
- (char *)&ddquot, sizeof(struct v2_disk_dqblk), offset))
- != sizeof(struct v2_disk_dqblk)) {
- if (ret >= 0)
- ret = -EIO;
- printk(KERN_ERR "VFS: Error while reading quota "
- "structure for id %u.\n", dquot->dq_id);
- memset(&ddquot, 0, sizeof(struct v2_disk_dqblk));
- }
- else {
- ret = 0;
- /* We need to escape back all-zero structure */
- memset(&empty, 0, sizeof(struct v2_disk_dqblk));
- empty.dqb_itime = cpu_to_le64(1);
- if (!memcmp(&empty, &ddquot, sizeof(struct v2_disk_dqblk)))
- ddquot.dqb_itime = 0;
- }
- disk2memdqb(&dquot->dq_dqb, &ddquot);
- if (!dquot->dq_dqb.dqb_bhardlimit &&
- !dquot->dq_dqb.dqb_bsoftlimit &&
- !dquot->dq_dqb.dqb_ihardlimit &&
- !dquot->dq_dqb.dqb_isoftlimit)
- set_bit(DQ_FAKE_B, &dquot->dq_flags);
- }
- dqstats.reads++;
-
- return ret;
+ return qtree_release_dquot(sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv, dquot);
}
-/* Check whether dquot should not be deleted. We know we are
- * the only one operating on dquot (thanks to dq_lock) */
-static int v2_release_dquot(struct dquot *dquot)
+static int v2_free_file_info(struct super_block *sb, int type)
{
- if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace))
- return v2_delete_dquot(dquot);
+ kfree(sb_dqinfo(sb, type)->dqi_priv);
return 0;
}
@@ -673,7 +210,7 @@ static struct quota_format_ops v2_format_ops = {
.check_quota_file = v2_check_quota_file,
.read_file_info = v2_read_file_info,
.write_file_info = v2_write_file_info,
- .free_file_info = NULL,
+ .free_file_info = v2_free_file_info,
.read_dqblk = v2_read_dquot,
.commit_dqblk = v2_write_dquot,
.release_dqblk = v2_release_dquot,
diff --git a/include/linux/quotaio_v1.h b/fs/quotaio_v1.h
index 746654b5de70..746654b5de70 100644
--- a/include/linux/quotaio_v1.h
+++ b/fs/quotaio_v1.h
diff --git a/include/linux/quotaio_v2.h b/fs/quotaio_v2.h
index 303d7cbe30d4..530fe580685c 100644
--- a/include/linux/quotaio_v2.h
+++ b/fs/quotaio_v2.h
@@ -21,6 +21,12 @@
0 /* GRPQUOTA */\
}
+/* First generic header */
+struct v2_disk_dqheader {
+ __le32 dqh_magic; /* Magic number identifying file */
+ __le32 dqh_version; /* File version */
+};
+
/*
* The following structure defines the format of the disk quota file
* (as it appears on disk) - the file is a radix tree whose leaves point
@@ -38,15 +44,6 @@ struct v2_disk_dqblk {
__le64 dqb_itime; /* time limit for excessive inode use */
};
-/*
- * Here are header structures as written on disk and their in-memory copies
- */
-/* First generic header */
-struct v2_disk_dqheader {
- __le32 dqh_magic; /* Magic number identifying file */
- __le32 dqh_version; /* File version */
-};
-
/* Header with type and version specific information */
struct v2_disk_dqinfo {
__le32 dqi_bgrace; /* Time before block soft limit becomes hard limit */
@@ -57,23 +54,7 @@ struct v2_disk_dqinfo {
__le32 dqi_free_entry; /* Number of block with at least one free entry */
};
-/*
- * Structure of header of block with quota structures. It is padded to 16 bytes so
- * there will be space for exactly 21 quota-entries in a block
- */
-struct v2_disk_dqdbheader {
- __le32 dqdh_next_free; /* Number of next block with free entry */
- __le32 dqdh_prev_free; /* Number of previous block with free entry */
- __le16 dqdh_entries; /* Number of valid entries in block */
- __le16 dqdh_pad1;
- __le32 dqdh_pad2;
-};
-
#define V2_DQINFOOFF sizeof(struct v2_disk_dqheader) /* Offset of info header in file */
-#define V2_DQBLKSIZE_BITS 10
-#define V2_DQBLKSIZE (1 << V2_DQBLKSIZE_BITS) /* Size of block with quota structures */
-#define V2_DQTREEOFF 1 /* Offset of tree in file in blocks */
-#define V2_DQTREEDEPTH 4 /* Depth of quota tree */
-#define V2_DQSTRINBLK ((V2_DQBLKSIZE - sizeof(struct v2_disk_dqdbheader)) / sizeof(struct v2_disk_dqblk)) /* Number of entries in one blocks */
+#define V2_DQBLKSIZE_BITS 10 /* Size of leaf block in tree */
#endif /* _LINUX_QUOTAIO_V2_H */
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 0d4c1a2f0f74..f863769fa8aa 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -657,6 +657,8 @@ static struct dquot_operations reiserfs_quota_operations = {
.release_dquot = reiserfs_release_dquot,
.mark_dirty = reiserfs_mark_dquot_dirty,
.write_info = reiserfs_write_info,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
};
static struct quotactl_ops reiserfs_qctl_operations = {
@@ -1002,8 +1004,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
if (c == 'u' || c == 'g') {
int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
- if ((sb_any_quota_enabled(s) ||
- sb_any_quota_suspended(s)) &&
+ if (sb_any_quota_loaded(s) &&
(!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
reiserfs_warning(s,
"reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
@@ -1049,8 +1050,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
"reiserfs_parse_options: unknown quota format specified.");
return 0;
}
- if ((sb_any_quota_enabled(s) ||
- sb_any_quota_suspended(s)) &&
+ if (sb_any_quota_loaded(s) &&
*qfmt != REISERFS_SB(s)->s_jquota_fmt) {
reiserfs_warning(s,
"reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
@@ -1075,7 +1075,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
}
/* This checking is not precise wrt the quota type but for our purposes it is sufficient */
if (!(*mount_options & (1 << REISERFS_QUOTA))
- && sb_any_quota_enabled(s)) {
+ && sb_any_quota_loaded(s)) {
reiserfs_warning(s,
"reiserfs_parse_options: quota options must be present when quota is turned on.");
return 0;
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index e531783e5d78..4c32642678f8 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -56,8 +56,6 @@ header-y += dlm_device.h
header-y += dlm_netlink.h
header-y += dm-ioctl.h
header-y += dn.h
-header-y += dqblk_v1.h
-header-y += dqblk_v2.h
header-y += dqblk_xfs.h
header-y += efs_fs_sb.h
header-y += elf-fdpic.h
@@ -134,8 +132,6 @@ header-y += posix_types.h
header-y += ppdev.h
header-y += prctl.h
header-y += qnxtypes.h
-header-y += quotaio_v1.h
-header-y += quotaio_v2.h
header-y += radeonfb.h
header-y += raw.h
header-y += resource.h
diff --git a/include/linux/dqblk_qtree.h b/include/linux/dqblk_qtree.h
new file mode 100644
index 000000000000..82a16527b367
--- /dev/null
+++ b/include/linux/dqblk_qtree.h
@@ -0,0 +1,56 @@
+/*
+ * Definitions of structures and functions for quota formats using trie
+ */
+
+#ifndef _LINUX_DQBLK_QTREE_H
+#define _LINUX_DQBLK_QTREE_H
+
+#include <linux/types.h>
+
+/* Numbers of blocks needed for updates - we count with the smallest
+ * possible block size (1024) */
+#define QTREE_INIT_ALLOC 4
+#define QTREE_INIT_REWRITE 2
+#define QTREE_DEL_ALLOC 0
+#define QTREE_DEL_REWRITE 6
+
+struct dquot;
+
+/* Operations */
+struct qtree_fmt_operations {
+ void (*mem2disk_dqblk)(void *disk, struct dquot *dquot); /* Convert given entry from in memory format to disk one */
+ void (*disk2mem_dqblk)(struct dquot *dquot, void *disk); /* Convert given entry from disk format to in memory one */
+ int (*is_id)(void *disk, struct dquot *dquot); /* Is this structure for given id? */
+};
+
+/* Inmemory copy of version specific information */
+struct qtree_mem_dqinfo {
+ struct super_block *dqi_sb; /* Sb quota is on */
+ int dqi_type; /* Quota type */
+ unsigned int dqi_blocks; /* # of blocks in quota file */
+ unsigned int dqi_free_blk; /* First block in list of free blocks */
+ unsigned int dqi_free_entry; /* First block with free entry */
+ unsigned int dqi_blocksize_bits; /* Block size of quota file */
+ unsigned int dqi_entry_size; /* Size of quota entry in quota file */
+ unsigned int dqi_usable_bs; /* Space usable in block for quota data */
+ unsigned int dqi_qtree_depth; /* Precomputed depth of quota tree */
+ struct qtree_fmt_operations *dqi_ops; /* Operations for entry manipulation */
+};
+
+int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
+int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
+int qtree_delete_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
+int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot);
+int qtree_entry_unused(struct qtree_mem_dqinfo *info, char *disk);
+static inline int qtree_depth(struct qtree_mem_dqinfo *info)
+{
+ unsigned int epb = info->dqi_usable_bs >> 2;
+ unsigned long long entries = epb;
+ int i;
+
+ for (i = 1; entries < (1ULL << 32); i++)
+ entries *= epb;
+ return i;
+}
+
+#endif /* _LINUX_DQBLK_QTREE_H */
diff --git a/include/linux/dqblk_v1.h b/include/linux/dqblk_v1.h
index 57f1250d5a52..3713a7232dd8 100644
--- a/include/linux/dqblk_v1.h
+++ b/include/linux/dqblk_v1.h
@@ -5,9 +5,6 @@
#ifndef _LINUX_DQBLK_V1_H
#define _LINUX_DQBLK_V1_H
-/* Id of quota format */
-#define QFMT_VFS_OLD 1
-
/* Root squash turned on */
#define V1_DQF_RSQUASH 1
@@ -17,8 +14,4 @@
#define V1_DEL_ALLOC 0
#define V1_DEL_REWRITE 2
-/* Special information about quotafile */
-struct v1_mem_dqinfo {
-};
-
#endif /* _LINUX_DQBLK_V1_H */
diff --git a/include/linux/dqblk_v2.h b/include/linux/dqblk_v2.h
index 4f853322cb7f..18000a542677 100644
--- a/include/linux/dqblk_v2.h
+++ b/include/linux/dqblk_v2.h
@@ -1,26 +1,16 @@
/*
- * Definitions of structures for vfsv0 quota format
+ * Definitions for vfsv0 quota format
*/
#ifndef _LINUX_DQBLK_V2_H
#define _LINUX_DQBLK_V2_H
-#include <linux/types.h>
-
-/* id numbers of quota format */
-#define QFMT_VFS_V0 2
+#include <linux/dqblk_qtree.h>
/* Numbers of blocks needed for updates */
-#define V2_INIT_ALLOC 4
-#define V2_INIT_REWRITE 2
-#define V2_DEL_ALLOC 0
-#define V2_DEL_REWRITE 6
-
-/* Inmemory copy of version specific information */
-struct v2_mem_dqinfo {
- unsigned int dqi_blocks;
- unsigned int dqi_free_blk;
- unsigned int dqi_free_entry;
-};
+#define V2_INIT_ALLOC QTREE_INIT_ALLOC
+#define V2_INIT_REWRITE QTREE_INIT_REWRITE
+#define V2_DEL_ALLOC QTREE_DEL_ALLOC
+#define V2_DEL_REWRITE QTREE_DEL_REWRITE
#endif /* _LINUX_DQBLK_V2_H */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 40401b554484..d72d5d84fde5 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -36,17 +36,7 @@
#include <linux/errno.h>
#include <linux/types.h>
-#define __DQUOT_VERSION__ "dquot_6.5.1"
-#define __DQUOT_NUM_VERSION__ 6*10000+5*100+1
-
-/* Size of blocks in which are counted size limits */
-#define QUOTABLOCK_BITS 10
-#define QUOTABLOCK_SIZE (1 << QUOTABLOCK_BITS)
-
-/* Conversion routines from and to quota blocks */
-#define qb2kb(x) ((x) << (QUOTABLOCK_BITS-10))
-#define kb2qb(x) ((x) >> (QUOTABLOCK_BITS-10))
-#define toqb(x) (((x) + QUOTABLOCK_SIZE - 1) >> QUOTABLOCK_BITS)
+#define __DQUOT_VERSION__ "dquot_6.5.2"
#define MAXQUOTAS 2
#define USRQUOTA 0 /* element used for user quotas */
@@ -80,16 +70,34 @@
#define Q_GETQUOTA 0x800007 /* get user quota structure */
#define Q_SETQUOTA 0x800008 /* set user quota structure */
+/* Quota format type IDs */
+#define QFMT_VFS_OLD 1
+#define QFMT_VFS_V0 2
+
+/* Size of block in which space limits are passed through the quota
+ * interface */
+#define QIF_DQBLKSIZE_BITS 10
+#define QIF_DQBLKSIZE (1 << QIF_DQBLKSIZE_BITS)
+
/*
* Quota structure used for communication with userspace via quotactl
* Following flags are used to specify which fields are valid
*/
-#define QIF_BLIMITS 1
-#define QIF_SPACE 2
-#define QIF_ILIMITS 4
-#define QIF_INODES 8
-#define QIF_BTIME 16
-#define QIF_ITIME 32
+enum {
+ QIF_BLIMITS_B = 0,
+ QIF_SPACE_B,
+ QIF_ILIMITS_B,
+ QIF_INODES_B,
+ QIF_BTIME_B,
+ QIF_ITIME_B,
+};
+
+#define QIF_BLIMITS (1 << QIF_BLIMITS_B)
+#define QIF_SPACE (1 << QIF_SPACE_B)
+#define QIF_ILIMITS (1 << QIF_ILIMITS_B)
+#define QIF_INODES (1 << QIF_INODES_B)
+#define QIF_BTIME (1 << QIF_BTIME_B)
+#define QIF_ITIME (1 << QIF_ITIME_B)
#define QIF_LIMITS (QIF_BLIMITS | QIF_ILIMITS)
#define QIF_USAGE (QIF_SPACE | QIF_INODES)
#define QIF_TIMES (QIF_BTIME | QIF_ITIME)
@@ -172,7 +180,7 @@ enum {
#include <asm/atomic.h>
typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
-typedef __u64 qsize_t; /* Type in which we store sizes */
+typedef long long qsize_t; /* Type in which we store sizes */
extern spinlock_t dq_data_lock;
@@ -187,12 +195,12 @@ extern spinlock_t dq_data_lock;
* Data for one user/group kept in memory
*/
struct mem_dqblk {
- __u32 dqb_bhardlimit; /* absolute limit on disk blks alloc */
- __u32 dqb_bsoftlimit; /* preferred limit on disk blks */
+ qsize_t dqb_bhardlimit; /* absolute limit on disk blks alloc */
+ qsize_t dqb_bsoftlimit; /* preferred limit on disk blks */
qsize_t dqb_curspace; /* current used space */
- __u32 dqb_ihardlimit; /* absolute limit on allocated inodes */
- __u32 dqb_isoftlimit; /* preferred inode limit */
- __u32 dqb_curinodes; /* current # allocated inodes */
+ qsize_t dqb_ihardlimit; /* absolute limit on allocated inodes */
+ qsize_t dqb_isoftlimit; /* preferred inode limit */
+ qsize_t dqb_curinodes; /* current # allocated inodes */
time_t dqb_btime; /* time limit for excessive disk use */
time_t dqb_itime; /* time limit for excessive inode use */
};
@@ -212,10 +220,7 @@ struct mem_dqinfo {
unsigned int dqi_igrace;
qsize_t dqi_maxblimit;
qsize_t dqi_maxilimit;
- union {
- struct v1_mem_dqinfo v1_i;
- struct v2_mem_dqinfo v2_i;
- } u;
+ void *dqi_priv;
};
struct super_block;
@@ -249,6 +254,11 @@ extern struct dqstats dqstats;
#define DQ_FAKE_B 3 /* no limits only usage */
#define DQ_READ_B 4 /* dquot was read into memory */
#define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */
+#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\
+ * for the mask of entries set via SETQUOTA\
+ * quotactl. They are set under dq_data_lock\
+ * and the quota format handling dquot can\
+ * clear them when it sees fit. */
struct dquot {
struct hlist_node dq_hash; /* Hash list in memory */
@@ -287,11 +297,13 @@ struct dquot_operations {
int (*initialize) (struct inode *, int);
int (*drop) (struct inode *);
int (*alloc_space) (struct inode *, qsize_t, int);
- int (*alloc_inode) (const struct inode *, unsigned long);
+ int (*alloc_inode) (const struct inode *, qsize_t);
int (*free_space) (struct inode *, qsize_t);
- int (*free_inode) (const struct inode *, unsigned long);
+ int (*free_inode) (const struct inode *, qsize_t);
int (*transfer) (struct inode *, struct iattr *);
int (*write_dquot) (struct dquot *); /* Ordinary dquot write */
+ struct dquot *(*alloc_dquot)(struct super_block *, int); /* Allocate memory for new dquot */
+ void (*destroy_dquot)(struct dquot *); /* Free memory for dquot */
int (*acquire_dquot) (struct dquot *); /* Quota is going to be created on disk */
int (*release_dquot) (struct dquot *); /* Quota is going to be deleted from disk */
int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */
@@ -320,12 +332,42 @@ struct quota_format_type {
struct quota_format_type *qf_next;
};
-#define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */
-#define DQUOT_GRP_ENABLED 0x02 /* Group diskquotas enabled */
-#define DQUOT_USR_SUSPENDED 0x04 /* User diskquotas are off, but
+/* Quota state flags - they actually come in two flavors - for users and groups */
+enum {
+ _DQUOT_USAGE_ENABLED = 0, /* Track disk usage for users */
+ _DQUOT_LIMITS_ENABLED, /* Enforce quota limits for users */
+ _DQUOT_SUSPENDED, /* User diskquotas are off, but
* we have necessary info in
* memory to turn them on */
-#define DQUOT_GRP_SUSPENDED 0x08 /* The same for group quotas */
+ _DQUOT_STATE_FLAGS
+};
+#define DQUOT_USAGE_ENABLED (1 << _DQUOT_USAGE_ENABLED)
+#define DQUOT_LIMITS_ENABLED (1 << _DQUOT_LIMITS_ENABLED)
+#define DQUOT_SUSPENDED (1 << _DQUOT_SUSPENDED)
+#define DQUOT_STATE_FLAGS (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED | \
+ DQUOT_SUSPENDED)
+/* Other quota flags */
+#define DQUOT_QUOTA_SYS_FILE (1 << 6) /* Quota file is a special
+ * system file and user cannot
+ * touch it. Filesystem is
+ * responsible for setting
+ * S_NOQUOTA, S_NOATIME flags
+ */
+#define DQUOT_NEGATIVE_USAGE (1 << 7) /* Allow negative quota usage */
+
+static inline unsigned int dquot_state_flag(unsigned int flags, int type)
+{
+ if (type == USRQUOTA)
+ return flags;
+ return flags << _DQUOT_STATE_FLAGS;
+}
+
+static inline unsigned int dquot_generic_flag(unsigned int flags, int type)
+{
+ if (type == USRQUOTA)
+ return flags;
+ return flags >> _DQUOT_STATE_FLAGS;
+}
struct quota_info {
unsigned int flags; /* Flags for diskquotas on this device */
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index a558a4c1d35a..21b781a3350f 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -24,12 +24,21 @@ void sync_dquots(struct super_block *sb, int type);
int dquot_initialize(struct inode *inode, int type);
int dquot_drop(struct inode *inode);
+int dquot_drop_locked(struct inode *inode);
+struct dquot *dqget(struct super_block *sb, unsigned int id, int type);
+void dqput(struct dquot *dquot);
+int dquot_is_cached(struct super_block *sb, unsigned int id, int type);
+int dquot_scan_active(struct super_block *sb,
+ int (*fn)(struct dquot *dquot, unsigned long priv),
+ unsigned long priv);
+struct dquot *dquot_alloc(struct super_block *sb, int type);
+void dquot_destroy(struct dquot *dquot);
int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc);
-int dquot_alloc_inode(const struct inode *inode, unsigned long number);
+int dquot_alloc_inode(const struct inode *inode, qsize_t number);
int dquot_free_space(struct inode *inode, qsize_t number);
-int dquot_free_inode(const struct inode *inode, unsigned long number);
+int dquot_free_inode(const struct inode *inode, qsize_t number);
int dquot_transfer(struct inode *inode, struct iattr *iattr);
int dquot_commit(struct dquot *dquot);
@@ -40,11 +49,14 @@ int dquot_mark_dquot_dirty(struct dquot *dquot);
int vfs_quota_on(struct super_block *sb, int type, int format_id,
char *path, int remount);
+int vfs_quota_enable(struct inode *inode, int type, int format_id,
+ unsigned int flags);
int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
struct path *path);
int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
int format_id, int type);
int vfs_quota_off(struct super_block *sb, int type, int remount);
+int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags);
int vfs_quota_sync(struct super_block *sb, int type);
int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
@@ -64,24 +76,22 @@ static inline struct mem_dqinfo *sb_dqinfo(struct super_block *sb, int type)
* Functions for checking status of quota
*/
-static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
{
- if (type == USRQUOTA)
- return sb_dqopt(sb)->flags & DQUOT_USR_ENABLED;
- return sb_dqopt(sb)->flags & DQUOT_GRP_ENABLED;
+ return sb_dqopt(sb)->flags &
+ dquot_state_flag(DQUOT_USAGE_ENABLED, type);
}
-static inline int sb_any_quota_enabled(struct super_block *sb)
+static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type)
{
- return sb_has_quota_enabled(sb, USRQUOTA) ||
- sb_has_quota_enabled(sb, GRPQUOTA);
+ return sb_dqopt(sb)->flags &
+ dquot_state_flag(DQUOT_LIMITS_ENABLED, type);
}
static inline int sb_has_quota_suspended(struct super_block *sb, int type)
{
- if (type == USRQUOTA)
- return sb_dqopt(sb)->flags & DQUOT_USR_SUSPENDED;
- return sb_dqopt(sb)->flags & DQUOT_GRP_SUSPENDED;
+ return sb_dqopt(sb)->flags &
+ dquot_state_flag(DQUOT_SUSPENDED, type);
}
static inline int sb_any_quota_suspended(struct super_block *sb)
@@ -90,6 +100,31 @@ static inline int sb_any_quota_suspended(struct super_block *sb)
sb_has_quota_suspended(sb, GRPQUOTA);
}
+/* Does kernel know about any quota information for given sb + type? */
+static inline int sb_has_quota_loaded(struct super_block *sb, int type)
+{
+ /* Currently if anything is on, then quota usage is on as well */
+ return sb_has_quota_usage_enabled(sb, type);
+}
+
+static inline int sb_any_quota_loaded(struct super_block *sb)
+{
+ return sb_has_quota_loaded(sb, USRQUOTA) ||
+ sb_has_quota_loaded(sb, GRPQUOTA);
+}
+
+static inline int sb_has_quota_active(struct super_block *sb, int type)
+{
+ return sb_has_quota_loaded(sb, type) &&
+ !sb_has_quota_suspended(sb, type);
+}
+
+static inline int sb_any_quota_active(struct super_block *sb)
+{
+ return sb_has_quota_active(sb, USRQUOTA) ||
+ sb_has_quota_active(sb, GRPQUOTA);
+}
+
/*
* Operations supported for diskquotas.
*/
@@ -104,7 +139,7 @@ extern struct quotactl_ops vfs_quotactl_ops;
static inline void vfs_dq_init(struct inode *inode)
{
BUG_ON(!inode->i_sb);
- if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode))
+ if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode))
inode->i_sb->dq_op->initialize(inode, -1);
}
@@ -112,7 +147,7 @@ static inline void vfs_dq_init(struct inode *inode)
* a transaction (deadlocks possible otherwise) */
static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
{
- if (sb_any_quota_enabled(inode->i_sb)) {
+ if (sb_any_quota_active(inode->i_sb)) {
/* Used space is updated in alloc_space() */
if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
return 1;
@@ -132,7 +167,7 @@ static inline int vfs_dq_prealloc_space(struct inode *inode, qsize_t nr)
static inline int vfs_dq_alloc_space_nodirty(struct inode *inode, qsize_t nr)
{
- if (sb_any_quota_enabled(inode->i_sb)) {
+ if (sb_any_quota_active(inode->i_sb)) {
/* Used space is updated in alloc_space() */
if (inode->i_sb->dq_op->alloc_space(inode, nr, 0) == NO_QUOTA)
return 1;
@@ -152,7 +187,7 @@ static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
static inline int vfs_dq_alloc_inode(struct inode *inode)
{
- if (sb_any_quota_enabled(inode->i_sb)) {
+ if (sb_any_quota_active(inode->i_sb)) {
vfs_dq_init(inode);
if (inode->i_sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA)
return 1;
@@ -162,7 +197,7 @@ static inline int vfs_dq_alloc_inode(struct inode *inode)
static inline void vfs_dq_free_space_nodirty(struct inode *inode, qsize_t nr)
{
- if (sb_any_quota_enabled(inode->i_sb))
+ if (sb_any_quota_active(inode->i_sb))
inode->i_sb->dq_op->free_space(inode, nr);
else
inode_sub_bytes(inode, nr);
@@ -176,7 +211,7 @@ static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
static inline void vfs_dq_free_inode(struct inode *inode)
{
- if (sb_any_quota_enabled(inode->i_sb))
+ if (sb_any_quota_active(inode->i_sb))
inode->i_sb->dq_op->free_inode(inode, 1);
}
@@ -197,12 +232,12 @@ static inline int vfs_dq_off(struct super_block *sb, int remount)
#else
-static inline int sb_has_quota_enabled(struct super_block *sb, int type)
+static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
{
return 0;
}
-static inline int sb_any_quota_enabled(struct super_block *sb)
+static inline int sb_has_quota_limits_enabled(struct super_block *sb, int type)
{
return 0;
}
@@ -217,6 +252,27 @@ static inline int sb_any_quota_suspended(struct super_block *sb)
return 0;
}
+/* Does kernel know about any quota information for given sb + type? */
+static inline int sb_has_quota_loaded(struct super_block *sb, int type)
+{
+ return 0;
+}
+
+static inline int sb_any_quota_loaded(struct super_block *sb)
+{
+ return 0;
+}
+
+static inline int sb_has_quota_active(struct super_block *sb, int type)
+{
+ return 0;
+}
+
+static inline int sb_any_quota_active(struct super_block *sb)
+{
+ return 0;
+}
+
/*
* NO-OP when quota not configured.
*/
diff --git a/mm/pdflush.c b/mm/pdflush.c
index a0a14c4d5072..13af84ddada7 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -223,6 +223,7 @@ int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
return ret;
}
+EXPORT_SYMBOL(pdflush_operation);
static void start_one_pdflush_thread(void)
{