summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2010-12-31 10:55:42 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2010-12-31 10:55:42 +1100
commit46491588c6335cbc71ca660d9feb2c0d4f0f6401 (patch)
tree7f650ddd7b6e934c63332b75b0cd598006a714a9 /fs
parentd4a2c96152ed0290c0d6c7791f5315b2f01105ff (diff)
parent6245d166245845e40bd290b0df1ae99a2d5bbf63 (diff)
Merge remote branch 'vfs-scale/vfs-scale-working'
Conflicts: Documentation/filesystems/Locking fs/cifs/dir.c fs/fuse/inode.c fs/hfsplus/hfsplus_fs.h fs/hfsplus/unicode.c
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_dentry.c4
-rw-r--r--fs/9p/vfs_inode.c39
-rw-r--r--fs/adfs/dir.c13
-rw-r--r--fs/adfs/super.c11
-rw-r--r--fs/affs/amigaffs.c4
-rw-r--r--fs/affs/namei.c68
-rw-r--r--fs/affs/super.c11
-rw-r--r--fs/afs/dir.c6
-rw-r--r--fs/afs/super.c10
-rw-r--r--fs/anon_inodes.c6
-rw-r--r--fs/autofs4/autofs_i.h21
-rw-r--r--fs/autofs4/expire.c141
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c78
-rw-r--r--fs/autofs4/waitq.c23
-rw-r--r--fs/befs/linuxvfs.c10
-rw-r--r--fs/bfs/inode.c9
-rw-r--r--fs/block_dev.c9
-rw-r--r--fs/btrfs/acl.c19
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/export.c4
-rw-r--r--fs/btrfs/inode.c27
-rw-r--r--fs/ceph/dir.c21
-rw-r--r--fs/ceph/inode.c27
-rw-r--r--fs/ceph/mds_client.c2
-rw-r--r--fs/cifs/cifsfs.c9
-rw-r--r--fs/cifs/dir.c33
-rw-r--r--fs/cifs/inode.c14
-rw-r--r--fs/cifs/link.c4
-rw-r--r--fs/cifs/readdir.c6
-rw-r--r--fs/coda/cache.c4
-rw-r--r--fs/coda/dir.c8
-rw-r--r--fs/coda/inode.c9
-rw-r--r--fs/configfs/configfs_internal.h4
-rw-r--r--fs/configfs/dir.c13
-rw-r--r--fs/configfs/inode.c8
-rw-r--r--fs/dcache.c1388
-rw-r--r--fs/ecryptfs/dentry.c15
-rw-r--r--fs/ecryptfs/inode.c8
-rw-r--r--fs/ecryptfs/main.c4
-rw-r--r--fs/ecryptfs/super.c12
-rw-r--r--fs/efs/super.c9
-rw-r--r--fs/exofs/super.c9
-rw-r--r--fs/exportfs/expfs.c14
-rw-r--r--fs/ext2/acl.c11
-rw-r--r--fs/ext2/acl.h8
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext2/namei.c4
-rw-r--r--fs/ext2/super.c9
-rw-r--r--fs/ext3/acl.c11
-rw-r--r--fs/ext3/acl.h8
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/namei.c4
-rw-r--r--fs/ext3/super.c9
-rw-r--r--fs/ext4/acl.c11
-rw-r--r--fs/ext4/acl.h4
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fat/namei_msdos.c23
-rw-r--r--fs/fat/namei_vfat.c63
-rw-r--r--fs/filesystems.c3
-rw-r--r--fs/freevxfs/vxfs_inode.c9
-rw-r--r--fs/fs_struct.c36
-rw-r--r--fs/fuse/dir.c9
-rw-r--r--fs/fuse/inode.c13
-rw-r--r--fs/generic_acl.c20
-rw-r--r--fs/gfs2/dentry.c5
-rw-r--r--fs/gfs2/export.c4
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/ops_inode.c2
-rw-r--r--fs/gfs2/super.c9
-rw-r--r--fs/hfs/dir.c2
-rw-r--r--fs/hfs/hfs_fs.h8
-rw-r--r--fs/hfs/string.c17
-rw-r--r--fs/hfs/super.c11
-rw-r--r--fs/hfsplus/dir.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h9
-rw-r--r--fs/hfsplus/super.c12
-rw-r--r--fs/hfsplus/unicode.c19
-rw-r--r--fs/hostfs/hostfs_kern.c37
-rw-r--r--fs/hpfs/dentry.c27
-rw-r--r--fs/hpfs/super.c9
-rw-r--r--fs/hppfs/hppfs.c9
-rw-r--r--fs/hugetlbfs/inode.c9
-rw-r--r--fs/inode.c50
-rw-r--r--fs/internal.h1
-rw-r--r--fs/isofs/inode.c131
-rw-r--r--fs/isofs/namei.c5
-rw-r--r--fs/jffs2/super.c9
-rw-r--r--fs/jfs/namei.c61
-rw-r--r--fs/jfs/super.c12
-rw-r--r--fs/libfs.c63
-rw-r--r--fs/locks.c2
-rw-r--r--fs/logfs/inode.c9
-rw-r--r--fs/minix/inode.c9
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/namei.c937
-rw-r--r--fs/namespace.c271
-rw-r--r--fs/ncpfs/dir.c84
-rw-r--r--fs/ncpfs/inode.c11
-rw-r--r--fs/ncpfs/ncplib_kernel.h16
-rw-r--r--fs/nfs/dir.c17
-rw-r--r--fs/nfs/getroot.c10
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/namespace.c17
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nilfs2/super.c12
-rw-r--r--fs/notify/fsnotify.c8
-rw-r--r--fs/ntfs/inode.c9
-rw-r--r--fs/ocfs2/dcache.c10
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c9
-rw-r--r--fs/ocfs2/export.c4
-rw-r--r--fs/ocfs2/namei.c10
-rw-r--r--fs/ocfs2/super.c9
-rw-r--r--fs/openpromfs/inode.c9
-rw-r--r--fs/pipe.c12
-rw-r--r--fs/pnode.c4
-rw-r--r--fs/proc/base.c14
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/inode.c9
-rw-r--r--fs/proc/proc_sysctl.c23
-rw-r--r--fs/qnx4/inode.c9
-rw-r--r--fs/reiserfs/super.c9
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/romfs/super.c9
-rw-r--r--fs/squashfs/super.c9
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysfs/dir.c22
-rw-r--r--fs/sysv/inode.c9
-rw-r--r--fs/sysv/namei.c5
-rw-r--r--fs/sysv/super.c2
-rw-r--r--fs/ubifs/super.c10
-rw-r--r--fs/udf/super.c9
-rw-r--r--fs/ufs/super.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c8
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_iget.c13
141 files changed, 3321 insertions, 1320 deletions
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index cbf4e50f3933..466d2a4fc5cb 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -51,7 +51,7 @@
*
*/
-static int v9fs_dentry_delete(struct dentry *dentry)
+static int v9fs_dentry_delete(const struct dentry *dentry)
{
P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
dentry);
@@ -68,7 +68,7 @@ static int v9fs_dentry_delete(struct dentry *dentry)
*
*/
-static int v9fs_cached_dentry_delete(struct dentry *dentry)
+static int v9fs_cached_dentry_delete(const struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 34bf71b56542..59782981b225 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -237,10 +237,17 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
*
*/
-void v9fs_destroy_inode(struct inode *inode)
+static void v9fs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
}
+
+void v9fs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, v9fs_i_callback);
+}
#endif
/**
@@ -270,11 +277,11 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
{
struct dentry *dentry;
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
/* Directory should have only one entry. */
BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
return dentry;
}
@@ -628,9 +635,9 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
}
if (v9ses->cache)
- dentry->d_op = &v9fs_cached_dentry_operations;
+ d_set_d_op(dentry, &v9fs_cached_dentry_operations);
else
- dentry->d_op = &v9fs_dentry_operations;
+ d_set_d_op(dentry, &v9fs_dentry_operations);
d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
@@ -742,7 +749,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
err);
goto error;
}
- dentry->d_op = &v9fs_cached_dentry_operations;
+ d_set_d_op(dentry, &v9fs_cached_dentry_operations);
d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
if (err < 0)
@@ -760,7 +767,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
err = PTR_ERR(inode);
goto error;
}
- dentry->d_op = &v9fs_dentry_operations;
+ d_set_d_op(dentry, &v9fs_dentry_operations);
d_instantiate(dentry, inode);
}
/* Now set the ACL based on the default value */
@@ -949,7 +956,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
err);
goto error;
}
- dentry->d_op = &v9fs_cached_dentry_operations;
+ d_set_d_op(dentry, &v9fs_cached_dentry_operations);
d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
if (err < 0)
@@ -966,7 +973,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
err = PTR_ERR(inode);
goto error;
}
- dentry->d_op = &v9fs_dentry_operations;
+ d_set_d_op(dentry, &v9fs_dentry_operations);
d_instantiate(dentry, inode);
}
/* Now set the ACL based on the default value */
@@ -1034,9 +1041,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
inst_out:
if (v9ses->cache)
- dentry->d_op = &v9fs_cached_dentry_operations;
+ d_set_d_op(dentry, &v9fs_cached_dentry_operations);
else
- dentry->d_op = &v9fs_dentry_operations;
+ d_set_d_op(dentry, &v9fs_dentry_operations);
d_add(dentry, inode);
return NULL;
@@ -1702,7 +1709,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
err);
goto error;
}
- dentry->d_op = &v9fs_cached_dentry_operations;
+ d_set_d_op(dentry, &v9fs_cached_dentry_operations);
d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
if (err < 0)
@@ -1715,7 +1722,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
err = PTR_ERR(inode);
goto error;
}
- dentry->d_op = &v9fs_dentry_operations;
+ d_set_d_op(dentry, &v9fs_dentry_operations);
d_instantiate(dentry, inode);
}
@@ -1849,7 +1856,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
ihold(old_dentry->d_inode);
}
- dentry->d_op = old_dentry->d_op;
+ d_set_d_op(dentry, old_dentry->d_op);
d_instantiate(dentry, old_dentry->d_inode);
return err;
@@ -1973,7 +1980,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
err);
goto error;
}
- dentry->d_op = &v9fs_cached_dentry_operations;
+ d_set_d_op(dentry, &v9fs_cached_dentry_operations);
d_instantiate(dentry, inode);
err = v9fs_fid_add(dentry, fid);
if (err < 0)
@@ -1989,7 +1996,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
err = PTR_ERR(inode);
goto error;
}
- dentry->d_op = &v9fs_dentry_operations;
+ d_set_d_op(dentry, &v9fs_dentry_operations);
d_instantiate(dentry, inode);
}
/* Now set the ACL based on the default value */
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index f4287e4de744..bf7693c384f9 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -201,7 +201,8 @@ const struct file_operations adfs_dir_operations = {
};
static int
-adfs_hash(struct dentry *parent, struct qstr *qstr)
+adfs_hash(const struct dentry *parent, const struct inode *inode,
+ struct qstr *qstr)
{
const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen;
const unsigned char *name;
@@ -237,17 +238,19 @@ adfs_hash(struct dentry *parent, struct qstr *qstr)
* requirements of the underlying filesystem.
*/
static int
-adfs_compare(struct dentry *parent, struct qstr *entry, struct qstr *name)
+adfs_compare(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
int i;
- if (entry->len != name->len)
+ if (len != name->len)
return 1;
for (i = 0; i < name->len; i++) {
char a, b;
- a = entry->name[i];
+ a = str[i];
b = name->name[i];
if (a >= 'A' && a <= 'Z')
@@ -273,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
struct object_info obj;
int error;
- dentry->d_op = &adfs_dentry_operations;
+ d_set_d_op(dentry, &adfs_dentry_operations);
lock_kernel();
error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj);
if (error == 0) {
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 959dbff2d42d..a4041b52fbca 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -240,11 +240,18 @@ static struct inode *adfs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void adfs_destroy_inode(struct inode *inode)
+static void adfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
}
+static void adfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, adfs_i_callback);
+}
+
static void init_once(void *foo)
{
struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
@@ -477,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
adfs_error(sb, "get root inode failed\n");
goto error;
} else
- sb->s_root->d_op = &adfs_dentry_operations;
+ d_set_d_op(sb->s_root, &adfs_dentry_operations);
unlock_kernel();
return 0;
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 7d0f0a30f7a3..3a4557e8325c 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -128,7 +128,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
void *data = dentry->d_fsdata;
struct list_head *head, *next;
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
head = &inode->i_dentry;
next = head->next;
while (next != head) {
@@ -139,7 +139,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
}
next = next->next;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
}
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 914d1c0bc07a..944a4042fb65 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -13,11 +13,19 @@
typedef int (*toupper_t)(int);
static int affs_toupper(int ch);
-static int affs_hash_dentry(struct dentry *, struct qstr *);
-static int affs_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
+static int affs_hash_dentry(const struct dentry *,
+ const struct inode *, struct qstr *);
+static int affs_compare_dentry(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
static int affs_intl_toupper(int ch);
-static int affs_intl_hash_dentry(struct dentry *, struct qstr *);
-static int affs_intl_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
+static int affs_intl_hash_dentry(const struct dentry *,
+ const struct inode *, struct qstr *);
+static int affs_intl_compare_dentry(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
const struct dentry_operations affs_dentry_operations = {
.d_hash = affs_hash_dentry,
@@ -58,13 +66,13 @@ affs_get_toupper(struct super_block *sb)
* Note: the dentry argument is the parent dentry.
*/
static inline int
-__affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper)
+__affs_hash_dentry(struct qstr *qstr, toupper_t toupper)
{
const u8 *name = qstr->name;
unsigned long hash;
int i;
- i = affs_check_name(qstr->name,qstr->len);
+ i = affs_check_name(qstr->name, qstr->len);
if (i)
return i;
@@ -78,39 +86,41 @@ __affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper)
}
static int
-affs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
+affs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
- return __affs_hash_dentry(dentry, qstr, affs_toupper);
+ return __affs_hash_dentry(qstr, affs_toupper);
}
static int
-affs_intl_hash_dentry(struct dentry *dentry, struct qstr *qstr)
+affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
- return __affs_hash_dentry(dentry, qstr, affs_intl_toupper);
+ return __affs_hash_dentry(qstr, affs_intl_toupper);
}
-static inline int
-__affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, toupper_t toupper)
+static inline int __affs_compare_dentry(unsigned int len,
+ const char *str, const struct qstr *name, toupper_t toupper)
{
- const u8 *aname = a->name;
- const u8 *bname = b->name;
- int len;
+ const u8 *aname = str;
+ const u8 *bname = name->name;
- /* 'a' is the qstr of an already existing dentry, so the name
- * must be valid. 'b' must be validated first.
+ /*
+ * 'str' is the name of an already existing dentry, so the name
+ * must be valid. 'name' must be validated first.
*/
- if (affs_check_name(b->name,b->len))
+ if (affs_check_name(name->name, name->len))
return 1;
- /* If the names are longer than the allowed 30 chars,
+ /*
+ * If the names are longer than the allowed 30 chars,
* the excess is ignored, so their length may differ.
*/
- len = a->len;
if (len >= 30) {
- if (b->len < 30)
+ if (name->len < 30)
return 1;
len = 30;
- } else if (len != b->len)
+ } else if (len != name->len)
return 1;
for (; len > 0; len--)
@@ -121,14 +131,18 @@ __affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, tou
}
static int
-affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
+affs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- return __affs_compare_dentry(dentry, a, b, affs_toupper);
+ return __affs_compare_dentry(len, str, name, affs_toupper);
}
static int
-affs_intl_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
+affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- return __affs_compare_dentry(dentry, a, b, affs_intl_toupper);
+ return __affs_compare_dentry(len, str, name, affs_intl_toupper);
}
/*
@@ -226,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
if (IS_ERR(inode))
return ERR_CAST(inode);
}
- dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations;
+ d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations);
d_add(dentry, inode);
return NULL;
}
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 0cf7f4384cbd..d39081bbe7ce 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -95,11 +95,18 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
return &i->vfs_inode;
}
-static void affs_destroy_inode(struct inode *inode)
+static void affs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
}
+static void affs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, affs_i_callback);
+}
+
static void init_once(void *foo)
{
struct affs_inode_info *ei = (struct affs_inode_info *) foo;
@@ -475,7 +482,7 @@ got_root:
printk(KERN_ERR "AFFS: Get root inode failed\n");
goto out_error;
}
- sb->s_root->d_op = &affs_dentry_operations;
+ d_set_d_op(sb->s_root, &affs_dentry_operations);
pr_debug("AFFS: s_flags=%lX\n",sb->s_flags);
return 0;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5439e1bc9a86..b8bb7e7148d0 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -23,7 +23,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
static int afs_dir_open(struct inode *inode, struct file *file);
static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
-static int afs_d_delete(struct dentry *dentry);
+static int afs_d_delete(const struct dentry *dentry);
static void afs_d_release(struct dentry *dentry);
static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
loff_t fpos, u64 ino, unsigned dtype);
@@ -581,7 +581,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
}
success:
- dentry->d_op = &afs_fs_dentry_operations;
+ d_set_d_op(dentry, &afs_fs_dentry_operations);
d_add(dentry, inode);
_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }",
@@ -730,7 +730,7 @@ out_bad:
* - called from dput() when d_count is going to 0.
* - return 1 to request dentry be unhashed, 0 otherwise
*/
-static int afs_d_delete(struct dentry *dentry)
+static int afs_d_delete(const struct dentry *dentry)
{
_enter("%s", dentry->d_name.name);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 27201cffece4..f901a9d7c111 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -498,6 +498,14 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
return &vnode->vfs_inode;
}
+static void afs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(afs_inode_cachep, vnode);
+}
+
/*
* destroy an AFS inode struct
*/
@@ -511,7 +519,7 @@ static void afs_destroy_inode(struct inode *inode)
ASSERTCMP(vnode->server, ==, NULL);
- kmem_cache_free(afs_inode_cachep, vnode);
+ call_rcu(&inode->i_rcu, afs_i_callback);
atomic_dec(&afs_count_active_inodes);
}
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 57ce55b2564c..5fd38112a6ca 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -102,7 +102,7 @@ struct file *anon_inode_getfile(const char *name,
this.name = name;
this.len = strlen(name);
this.hash = 0;
- path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
+ path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
if (!path.dentry)
goto err_module;
@@ -113,7 +113,7 @@ struct file *anon_inode_getfile(const char *name,
*/
ihold(anon_inode_inode);
- path.dentry->d_op = &anon_inodefs_dentry_operations;
+ d_set_d_op(path.dentry, &anon_inodefs_dentry_operations);
d_instantiate(path.dentry, anon_inode_inode);
error = -ENFILE;
@@ -232,7 +232,7 @@ static int __init anon_inode_init(void)
return 0;
err_mntput:
- mntput(anon_inode_mnt);
+ mntput_long(anon_inode_mnt);
err_unregister_filesystem:
unregister_filesystem(&anon_inode_fs_type);
err_exit:
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 3d283abf67d7..0fffe1c24cec 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -16,6 +16,7 @@
#include <linux/auto_fs4.h>
#include <linux/auto_dev-ioctl.h>
#include <linux/mutex.h>
+#include <linux/spinlock.h>
#include <linux/list.h>
/* This is the range of ioctl() numbers we claim as ours */
@@ -60,6 +61,8 @@ do { \
current->pid, __func__, ##args); \
} while (0)
+extern spinlock_t autofs4_lock;
+
/* Unified info structure. This is pointed to by both the dentry and
inode structures. Each file in the filesystem has an instance of this
structure. It holds a reference to the dentry, so dentries are never
@@ -254,17 +257,15 @@ static inline int simple_positive(struct dentry *dentry)
return dentry->d_inode && !d_unhashed(dentry);
}
-static inline int __simple_empty(struct dentry *dentry)
+static inline void __autofs4_add_expiring(struct dentry *dentry)
{
- struct dentry *child;
- int ret = 0;
-
- list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
- if (simple_positive(child))
- goto out;
- ret = 1;
-out:
- return ret;
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ if (ino) {
+ if (list_empty(&ino->expiring))
+ list_add(&ino->expiring, &sbi->expiring_list);
+ }
+ return;
}
static inline void autofs4_add_expiring(struct dentry *dentry)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index a796c9417fb1..cc1d01365905 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -91,24 +91,64 @@ done:
}
/*
- * Calculate next entry in top down tree traversal.
- * From next_mnt in namespace.c - elegant.
+ * Calculate and dget next entry in top down tree traversal.
*/
-static struct dentry *next_dentry(struct dentry *p, struct dentry *root)
+static struct dentry *get_next_positive_dentry(struct dentry *prev,
+ struct dentry *root)
{
- struct list_head *next = p->d_subdirs.next;
+ struct list_head *next;
+ struct dentry *p, *ret;
+
+ if (prev == NULL)
+ return dget(prev);
+ spin_lock(&autofs4_lock);
+relock:
+ p = prev;
+ spin_lock(&p->d_lock);
+again:
+ next = p->d_subdirs.next;
if (next == &p->d_subdirs) {
while (1) {
- if (p == root)
+ struct dentry *parent;
+
+ if (p == root) {
+ spin_unlock(&p->d_lock);
+ spin_unlock(&autofs4_lock);
+ dput(prev);
return NULL;
+ }
+
+ parent = p->d_parent;
+ if (!spin_trylock(&parent->d_lock)) {
+ spin_unlock(&p->d_lock);
+ cpu_relax();
+ goto relock;
+ }
+ spin_unlock(&p->d_lock);
next = p->d_u.d_child.next;
- if (next != &p->d_parent->d_subdirs)
+ p = parent;
+ if (next != &parent->d_subdirs)
break;
- p = p->d_parent;
}
}
- return list_entry(next, struct dentry, d_u.d_child);
+ ret = list_entry(next, struct dentry, d_u.d_child);
+
+ spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
+ /* Negative dentry - try next */
+ if (!simple_positive(ret)) {
+ spin_unlock(&ret->d_lock);
+ p = ret;
+ goto again;
+ }
+ dget_dlock(ret);
+ spin_unlock(&ret->d_lock);
+ spin_unlock(&p->d_lock);
+ spin_unlock(&autofs4_lock);
+
+ dput(prev);
+
+ return ret;
}
/*
@@ -158,18 +198,11 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
if (!simple_positive(top))
return 1;
- spin_lock(&dcache_lock);
- for (p = top; p; p = next_dentry(p, top)) {
- /* Negative dentry - give up */
- if (!simple_positive(p))
- continue;
-
+ p = NULL;
+ while ((p = get_next_positive_dentry(p, top))) {
DPRINTK("dentry %p %.*s",
p, (int) p->d_name.len, p->d_name.name);
- p = dget(p);
- spin_unlock(&dcache_lock);
-
/*
* Is someone visiting anywhere in the subtree ?
* If there's no mount we need to check the usage
@@ -198,16 +231,13 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
else
ino_count++;
- if (atomic_read(&p->d_count) > ino_count) {
+ if (p->d_count > ino_count) {
top_ino->last_used = jiffies;
dput(p);
return 1;
}
}
- dput(p);
- spin_lock(&dcache_lock);
}
- spin_unlock(&dcache_lock);
/* Timeout of a tree mount is ultimately determined by its top dentry */
if (!autofs4_can_expire(top, timeout, do_now))
@@ -226,32 +256,21 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
DPRINTK("parent %p %.*s",
parent, (int)parent->d_name.len, parent->d_name.name);
- spin_lock(&dcache_lock);
- for (p = parent; p; p = next_dentry(p, parent)) {
- /* Negative dentry - give up */
- if (!simple_positive(p))
- continue;
-
+ p = NULL;
+ while ((p = get_next_positive_dentry(p, parent))) {
DPRINTK("dentry %p %.*s",
p, (int) p->d_name.len, p->d_name.name);
- p = dget(p);
- spin_unlock(&dcache_lock);
-
if (d_mountpoint(p)) {
/* Can we umount this guy */
if (autofs4_mount_busy(mnt, p))
- goto cont;
+ continue;
/* Can we expire this guy */
if (autofs4_can_expire(p, timeout, do_now))
return p;
}
-cont:
- dput(p);
- spin_lock(&dcache_lock);
}
- spin_unlock(&dcache_lock);
return NULL;
}
@@ -276,7 +295,9 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
struct autofs_info *ino = autofs4_dentry_ino(root);
if (d_mountpoint(root)) {
ino->flags |= AUTOFS_INF_MOUNTPOINT;
- root->d_mounted--;
+ spin_lock(&root->d_lock);
+ root->d_flags &= ~DCACHE_MOUNTED;
+ spin_unlock(&root->d_lock);
}
ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete);
@@ -302,8 +323,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
{
unsigned long timeout;
struct dentry *root = sb->s_root;
+ struct dentry *dentry;
struct dentry *expired = NULL;
- struct list_head *next;
int do_now = how & AUTOFS_EXP_IMMEDIATE;
int exp_leaves = how & AUTOFS_EXP_LEAVES;
struct autofs_info *ino;
@@ -315,23 +336,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
now = jiffies;
timeout = sbi->exp_timeout;
- spin_lock(&dcache_lock);
- next = root->d_subdirs.next;
-
- /* On exit from the loop expire is set to a dgot dentry
- * to expire or it's NULL */
- while ( next != &root->d_subdirs ) {
- struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
-
- /* Negative dentry - give up */
- if (!simple_positive(dentry)) {
- next = next->next;
- continue;
- }
-
- dentry = dget(dentry);
- spin_unlock(&dcache_lock);
-
+ dentry = NULL;
+ while ((dentry = get_next_positive_dentry(dentry, root))) {
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
@@ -347,7 +353,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
/* Path walk currently on this dentry? */
ino_count = atomic_read(&ino->count) + 2;
- if (atomic_read(&dentry->d_count) > ino_count)
+ if (dentry->d_count > ino_count)
goto next;
/* Can we umount this guy */
@@ -369,7 +375,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
if (!exp_leaves) {
/* Path walk currently on this dentry? */
ino_count = atomic_read(&ino->count) + 1;
- if (atomic_read(&dentry->d_count) > ino_count)
+ if (dentry->d_count > ino_count)
goto next;
if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
@@ -383,7 +389,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
} else {
/* Path walk currently on this dentry? */
ino_count = atomic_read(&ino->count) + 1;
- if (atomic_read(&dentry->d_count) > ino_count)
+ if (dentry->d_count > ino_count)
goto next;
expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
@@ -394,11 +400,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
}
next:
spin_unlock(&sbi->fs_lock);
- dput(dentry);
- spin_lock(&dcache_lock);
- next = next->next;
}
- spin_unlock(&dcache_lock);
return NULL;
found:
@@ -408,9 +410,13 @@ found:
ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
+ spin_lock(&expired->d_parent->d_lock);
+ spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
- spin_unlock(&dcache_lock);
+ spin_unlock(&expired->d_lock);
+ spin_unlock(&expired->d_parent->d_lock);
+ spin_unlock(&autofs4_lock);
return expired;
}
@@ -499,7 +505,14 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
spin_lock(&sbi->fs_lock);
if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
- sb->s_root->d_mounted++;
+ spin_lock(&sb->s_root->d_lock);
+ /*
+ * If we haven't been expired away, then reset
+ * mounted status.
+ */
+ if (mnt->mnt_parent != mnt)
+ sb->s_root->d_flags |= DCACHE_MOUNTED;
+ spin_unlock(&sb->s_root->d_lock);
ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
}
ino->flags &= ~AUTOFS_INF_EXPIRING;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index ac87e49fa706..a7bdb9dcac84 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
goto fail_iput;
pipe = NULL;
- root->d_op = &autofs4_sb_dentry_operations;
+ d_set_d_op(root, &autofs4_sb_dentry_operations);
root->d_fsdata = ino;
/* Can this call block? */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d34896cfb19f..bfe3f2eb684d 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -23,6 +23,8 @@
#include "autofs_i.h"
+DEFINE_SPINLOCK(autofs4_lock);
+
static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
static int autofs4_dir_unlink(struct inode *,struct dentry *);
static int autofs4_dir_rmdir(struct inode *,struct dentry *);
@@ -142,12 +144,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
* autofs file system so just let the libfs routines handle
* it.
*/
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
+ spin_lock(&dentry->d_lock);
if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
return -ENOENT;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
out:
return dcache_dir_open(inode, file);
@@ -252,9 +257,11 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
/* We trigger a mount for almost all flags */
lookup_type = autofs4_need_mount(nd->flags);
spin_lock(&sbi->fs_lock);
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
+ spin_lock(&dentry->d_lock);
if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
spin_unlock(&sbi->fs_lock);
goto follow;
}
@@ -266,7 +273,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
*/
if (ino->flags & AUTOFS_INF_PENDING ||
(!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
spin_unlock(&sbi->fs_lock);
status = try_to_fill_dentry(dentry, nd->flags);
@@ -275,7 +283,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
goto follow;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
spin_unlock(&sbi->fs_lock);
follow:
/*
@@ -346,12 +355,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
return 0;
/* Check for a non-mountpoint directory with no contents */
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
+ spin_lock(&dentry->d_lock);
if (S_ISDIR(dentry->d_inode->i_mode) &&
!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
DPRINTK("dentry=%p %.*s, emptydir",
dentry, dentry->d_name.len, dentry->d_name.name);
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
/* The daemon never causes a mount to trigger */
if (oz_mode)
@@ -367,7 +378,8 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
return status;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&autofs4_lock);
return 1;
}
@@ -422,7 +434,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
const unsigned char *str = name->name;
struct list_head *p, *head;
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
spin_lock(&sbi->lookup_lock);
head = &sbi->active_list;
list_for_each(p, head) {
@@ -436,7 +448,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
spin_lock(&active->d_lock);
/* Already gone? */
- if (atomic_read(&active->d_count) == 0)
+ if (active->d_count == 0)
goto next;
qstr = &active->d_name;
@@ -452,17 +464,17 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
goto next;
if (d_unhashed(active)) {
- dget(active);
+ dget_dlock(active);
spin_unlock(&active->d_lock);
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
return active;
}
next:
spin_unlock(&active->d_lock);
}
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
return NULL;
}
@@ -477,7 +489,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
const unsigned char *str = name->name;
struct list_head *p, *head;
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
spin_lock(&sbi->lookup_lock);
head = &sbi->expiring_list;
list_for_each(p, head) {
@@ -507,17 +519,17 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
goto next;
if (d_unhashed(expiring)) {
- dget(expiring);
+ dget_dlock(expiring);
spin_unlock(&expiring->d_lock);
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
return expiring;
}
next:
spin_unlock(&expiring->d_lock);
}
spin_unlock(&sbi->lookup_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
return NULL;
}
@@ -559,7 +571,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
* we check for the hashed dentry and return the newly
* hashed dentry.
*/
- dentry->d_op = &autofs4_root_dentry_operations;
+ d_set_d_op(dentry, &autofs4_root_dentry_operations);
/*
* And we need to ensure that the same dentry is used for
@@ -698,9 +710,9 @@ static int autofs4_dir_symlink(struct inode *dir,
d_add(dentry, inode);
if (dir == dir->i_sb->s_root->d_inode)
- dentry->d_op = &autofs4_root_dentry_operations;
+ d_set_d_op(dentry, &autofs4_root_dentry_operations);
else
- dentry->d_op = &autofs4_dentry_operations;
+ d_set_d_op(dentry, &autofs4_dentry_operations);
dentry->d_fsdata = ino;
ino->dentry = dget(dentry);
@@ -753,12 +765,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
dir->i_mtime = CURRENT_TIME;
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
autofs4_add_expiring(dentry);
spin_lock(&dentry->d_lock);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
return 0;
}
@@ -775,16 +787,20 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
if (!autofs4_oz_mode(sbi))
return -EACCES;
- spin_lock(&dcache_lock);
+ spin_lock(&autofs4_lock);
+ spin_lock(&sbi->lookup_lock);
+ spin_lock(&dentry->d_lock);
if (!list_empty(&dentry->d_subdirs)) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&sbi->lookup_lock);
+ spin_unlock(&autofs4_lock);
return -ENOTEMPTY;
}
- autofs4_add_expiring(dentry);
- spin_lock(&dentry->d_lock);
+ __autofs4_add_expiring(dentry);
+ spin_unlock(&sbi->lookup_lock);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
if (atomic_dec_and_test(&ino->count)) {
p_ino = autofs4_dentry_ino(dentry->d_parent);
@@ -829,9 +845,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
d_add(dentry, inode);
if (dir == dir->i_sb->s_root->d_inode)
- dentry->d_op = &autofs4_root_dentry_operations;
+ d_set_d_op(dentry, &autofs4_root_dentry_operations);
else
- dentry->d_op = &autofs4_dentry_operations;
+ d_set_d_op(dentry, &autofs4_dentry_operations);
dentry->d_fsdata = ino;
ino->dentry = dget(dentry);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 2341375386f8..c5f8459c905e 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -186,16 +186,26 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
{
struct dentry *root = sbi->sb->s_root;
struct dentry *tmp;
- char *buf = *name;
+ char *buf;
char *p;
- int len = 0;
+ int len;
+ unsigned seq;
- spin_lock(&dcache_lock);
+rename_retry:
+ buf = *name;
+ len = 0;
+
+ seq = read_seqbegin(&rename_lock);
+ rcu_read_lock();
+ spin_lock(&autofs4_lock);
for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
len += tmp->d_name.len + 1;
if (!len || --len > NAME_MAX) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
+ rcu_read_unlock();
+ if (read_seqretry(&rename_lock, seq))
+ goto rename_retry;
return 0;
}
@@ -208,7 +218,10 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
p -= tmp->d_name.len;
strncpy(p, tmp->d_name.name, tmp->d_name.len);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&autofs4_lock);
+ rcu_read_unlock();
+ if (read_seqretry(&rename_lock, seq))
+ goto rename_retry;
return len;
}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index aa4e7c7ae3c6..de93581b79a2 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -284,12 +284,18 @@ befs_alloc_inode(struct super_block *sb)
return &bi->vfs_inode;
}
-static void
-befs_destroy_inode(struct inode *inode)
+static void befs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
}
+static void befs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, befs_i_callback);
+}
+
static void init_once(void *foo)
{
struct befs_inode_info *bi = (struct befs_inode_info *) foo;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 76db6d7d49bb..a8e37f81d097 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -248,11 +248,18 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
return &bi->vfs_inode;
}
-static void bfs_destroy_inode(struct inode *inode)
+static void bfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
}
+static void bfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, bfs_i_callback);
+}
+
static void init_once(void *foo)
{
struct bfs_inode_info *bi = foo;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4230252fd689..771f23527010 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -409,13 +409,20 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void bdev_destroy_inode(struct inode *inode)
+static void bdev_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
struct bdev_inode *bdi = BDEV_I(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(bdev_cachep, bdi);
}
+static void bdev_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, bdev_i_callback);
+}
+
static void init_once(void *foo)
{
struct bdev_inode *ei = (struct bdev_inode *) foo;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 2222d161c7b6..c03c75317995 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -185,18 +185,23 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
return ret;
}
-int btrfs_check_acl(struct inode *inode, int mask)
+int btrfs_check_acl_rcu(struct inode *inode, int mask, unsigned int flags)
{
struct posix_acl *acl;
int error = -EAGAIN;
- acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ } else {
+ acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- error = posix_acl_permission(inode, acl, mask);
- posix_acl_release(acl);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ }
}
return error;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index af52f6d7a4d8..4b978b608b5c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2544,9 +2544,9 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
/* acl.c */
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
-int btrfs_check_acl(struct inode *inode, int mask);
+int btrfs_check_acl_rcu(struct inode *inode, int mask, unsigned int flags);
#else
-#define btrfs_check_acl NULL
+#define btrfs_check_acl_rcu NULL
#endif
int btrfs_init_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 659f532d26a0..0ccf9a8afcdf 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
dentry = d_obtain_alias(inode);
if (!IS_ERR(dentry))
- dentry->d_op = &btrfs_dentry_operations;
+ d_set_d_op(dentry, &btrfs_dentry_operations);
return dentry;
fail:
srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -225,7 +225,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
key.offset = 0;
dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
if (!IS_ERR(dentry))
- dentry->d_op = &btrfs_dentry_operations;
+ d_set_d_op(dentry, &btrfs_dentry_operations);
return dentry;
fail:
btrfs_free_path(path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 72f31ecb5c90..74b451464718 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4084,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
int index;
int ret;
- dentry->d_op = &btrfs_dentry_operations;
+ d_set_d_op(dentry, &btrfs_dentry_operations);
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
@@ -4127,7 +4127,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
return inode;
}
-static int btrfs_dentry_delete(struct dentry *dentry)
+static int btrfs_dentry_delete(const struct dentry *dentry)
{
struct btrfs_root *root;
@@ -6495,6 +6495,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
return inode;
}
+static void btrfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+}
+
void btrfs_destroy_inode(struct inode *inode)
{
struct btrfs_ordered_extent *ordered;
@@ -6564,7 +6571,7 @@ void btrfs_destroy_inode(struct inode *inode)
inode_tree_del(inode);
btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
free:
- kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+ call_rcu(&inode->i_rcu, btrfs_i_callback);
}
int btrfs_drop_inode(struct inode *inode)
@@ -7204,11 +7211,11 @@ static int btrfs_set_page_dirty(struct page *page)
return __set_page_dirty_nobuffers(page);
}
-static int btrfs_permission(struct inode *inode, int mask)
+static int btrfs_permission_rcu(struct inode *inode, int mask, unsigned int flags)
{
if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
return -EACCES;
- return generic_permission(inode, mask, btrfs_check_acl);
+ return generic_permission_rcu(inode, mask, flags, btrfs_check_acl_rcu);
}
static const struct inode_operations btrfs_dir_inode_operations = {
@@ -7227,11 +7234,11 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.getxattr = btrfs_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
- .permission = btrfs_permission,
+ .permission_rcu = btrfs_permission_rcu,
};
static const struct inode_operations btrfs_dir_ro_inode_operations = {
.lookup = btrfs_lookup,
- .permission = btrfs_permission,
+ .permission_rcu = btrfs_permission_rcu,
};
static const struct file_operations btrfs_dir_file_operations = {
@@ -7300,14 +7307,14 @@ static const struct inode_operations btrfs_file_inode_operations = {
.getxattr = btrfs_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
- .permission = btrfs_permission,
+ .permission_rcu = btrfs_permission_rcu,
.fallocate = btrfs_fallocate,
.fiemap = btrfs_fiemap,
};
static const struct inode_operations btrfs_special_inode_operations = {
.getattr = btrfs_getattr,
.setattr = btrfs_setattr,
- .permission = btrfs_permission,
+ .permission_rcu = btrfs_permission_rcu,
.setxattr = btrfs_setxattr,
.getxattr = btrfs_getxattr,
.listxattr = btrfs_listxattr,
@@ -7318,7 +7325,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
.follow_link = page_follow_link_light,
.put_link = page_put_link,
.getattr = btrfs_getattr,
- .permission = btrfs_permission,
+ .permission_rcu = btrfs_permission_rcu,
.setxattr = btrfs_setxattr,
.getxattr = btrfs_getxattr,
.listxattr = btrfs_listxattr,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 562f9884a4d9..46c1b89e0b15 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -42,11 +42,11 @@ int ceph_init_dentry(struct dentry *dentry)
if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
- dentry->d_op = &ceph_dentry_ops;
+ d_set_d_op(dentry->d_op, &ceph_dentry_ops);
else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
- dentry->d_op = &ceph_snapdir_dentry_ops;
+ d_set_d_op(dentry->d_op, &ceph_snapdir_dentry_ops);
else
- dentry->d_op = &ceph_snap_dentry_ops;
+ d_set_d_op(dentry->d_op, &ceph_snap_dentry_ops);
di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
if (!di)
@@ -112,7 +112,7 @@ static int __dcache_readdir(struct file *filp,
dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos,
last);
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
/* start at beginning? */
if (filp->f_pos == 2 || last == NULL ||
@@ -136,6 +136,7 @@ more:
fi->at_end = 1;
goto out_unlock;
}
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
if (!d_unhashed(dentry) && dentry->d_inode &&
ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
@@ -145,13 +146,15 @@ more:
dentry->d_name.len, dentry->d_name.name, di->offset,
filp->f_pos, d_unhashed(dentry) ? " unhashed" : "",
!dentry->d_inode ? " null" : "");
+ spin_unlock(&dentry->d_lock);
p = p->prev;
dentry = list_entry(p, struct dentry, d_u.d_child);
di = ceph_dentry(dentry);
}
- atomic_inc(&dentry->d_count);
- spin_unlock(&dcache_lock);
+ dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&parent->d_lock);
dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
@@ -177,19 +180,19 @@ more:
filp->f_pos++;
- /* make sure a dentry wasn't dropped while we didn't have dcache_lock */
+ /* make sure a dentry wasn't dropped while we didn't have parent lock */
if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
err = -EAGAIN;
goto out;
}
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
p = p->prev; /* advance to next dentry */
goto more;
out_unlock:
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
out:
if (last)
dput(last);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e791fa34b23d..b0b6f9e1faad 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -370,6 +370,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
return &ci->vfs_inode;
}
+static void ceph_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ceph_inode_cachep, ci);
+}
+
void ceph_destroy_inode(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -409,7 +418,7 @@ void ceph_destroy_inode(struct inode *inode)
if (ci->i_xattrs.prealloc_blob)
ceph_buffer_put(ci->i_xattrs.prealloc_blob);
- kmem_cache_free(ceph_inode_cachep, ci);
+ call_rcu(&inode->i_rcu, ceph_i_callback);
}
@@ -845,13 +854,13 @@ static void ceph_set_dentry_offset(struct dentry *dn)
di->offset = ceph_inode(inode)->i_max_offset++;
spin_unlock(&inode->i_lock);
- spin_lock(&dcache_lock);
- spin_lock(&dn->d_lock);
+ spin_lock(&dir->d_lock);
+ spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
list_move(&dn->d_u.d_child, &dir->d_subdirs);
dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
dn->d_u.d_child.prev, dn->d_u.d_child.next);
spin_unlock(&dn->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&dir->d_lock);
}
/*
@@ -883,8 +892,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
} else if (realdn) {
dout("dn %p (%d) spliced with %p (%d) "
"inode %p ino %llx.%llx\n",
- dn, atomic_read(&dn->d_count),
- realdn, atomic_read(&realdn->d_count),
+ dn, dn->d_count,
+ realdn, realdn->d_count,
realdn->d_inode, ceph_vinop(realdn->d_inode));
dput(dn);
dn = realdn;
@@ -1235,11 +1244,11 @@ retry_lookup:
goto retry_lookup;
} else {
/* reorder parent's d_subdirs */
- spin_lock(&dcache_lock);
- spin_lock(&dn->d_lock);
+ spin_lock(&parent->d_lock);
+ spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
list_move(&dn->d_u.d_child, &parent->d_subdirs);
spin_unlock(&dn->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
}
di = dn->d_fsdata;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 509339ceef72..1e30d194a8e3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1498,7 +1498,7 @@ retry:
*base = ceph_ino(temp->d_inode);
*plen = len;
dout("build_path on %p %d built %llx '%.*s'\n",
- dentry, atomic_read(&dentry->d_count), *base, len, path);
+ dentry, dentry->d_count, *base, len, path);
return path;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 9df5c0b94d0f..a5db37c099da 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -334,10 +334,17 @@ cifs_alloc_inode(struct super_block *sb)
return &cifs_inode->vfs_inode;
}
+static void cifs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
+}
+
static void
cifs_destroy_inode(struct inode *inode)
{
- kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
+ call_rcu(&inode->i_rcu, cifs_i_callback);
}
static void
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 521d841b1fd1..e3b10ca6d453 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon,
struct inode *newinode)
{
if (tcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
d_instantiate(direntry, newinode);
}
@@ -421,9 +421,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
rc = cifs_get_inode_info_unix(&newinode, full_path,
inode->i_sb, xid);
if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
if (rc == 0)
d_instantiate(direntry, newinode);
@@ -604,9 +604,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
if ((rc == 0) && (newInode != NULL)) {
if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
d_add(direntry, newInode);
if (posix_open) {
filp = lookup_instantiate_filp(nd, direntry,
@@ -634,9 +634,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
rc = 0;
direntry->d_time = jiffies;
if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
d_add(direntry, NULL);
/* if it was once a directory (but how can we tell?) we could do
shrink_dcache_parent(direntry); */
@@ -700,9 +700,10 @@ const struct dentry_operations cifs_dentry_ops = {
/* d_delete: cifs_d_delete, */ /* not needed except for debugging */
};
-static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
+static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *q)
{
- struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls;
+ struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls;
unsigned long hash;
int i;
@@ -715,13 +716,15 @@ static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
return 0;
}
-static int cifs_ci_compare(struct dentry *dentry, struct qstr *a,
- struct qstr *b)
+static int cifs_ci_compare(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls;
+ struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls;
- if ((a->len == b->len) &&
- (nls_strnicmp(codepage, a->name, b->name, a->len) == 0))
+ if ((name->len == len) &&
+ (nls_strnicmp(codepage, name->name, str, len) == 0))
return 0;
return 1;
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 589f3e3f6e00..a853a89857a5 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -809,14 +809,14 @@ inode_has_hashed_dentries(struct inode *inode)
{
struct dentry *dentry;
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
return true;
}
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
return false;
}
@@ -1319,9 +1319,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
to set uid/gid */
inc_nlink(inode);
if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1363,9 +1363,9 @@ mkdir_get_info:
inode->i_sb, xid, NULL);
if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
d_instantiate(direntry, newinode);
/* setting nlink not necessary except in cases where we
* failed to get it from the server or was set bogus */
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 85cdbf831e7b..fe2f6a93c49e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
rc);
} else {
if (pTcon->nocase)
- direntry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(direntry, &cifs_ci_dentry_ops);
else
- direntry->d_op = &cifs_dentry_ops;
+ d_set_d_op(direntry, &cifs_dentry_ops);
d_instantiate(direntry, newinode);
}
}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index a73eb9f4bdaf..ec5b68e3b928 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -79,7 +79,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
cFYI(1, "For %s", name->name);
if (parent->d_op && parent->d_op->d_hash)
- parent->d_op->d_hash(parent, name);
+ parent->d_op->d_hash(parent, parent->d_inode, name);
else
name->hash = full_name_hash(name->name, name->len);
@@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
}
if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase)
- dentry->d_op = &cifs_ci_dentry_ops;
+ d_set_d_op(dentry, &cifs_ci_dentry_ops);
else
- dentry->d_op = &cifs_dentry_ops;
+ d_set_d_op(dentry, &cifs_dentry_ops);
alias = d_materialise_unique(dentry, inode);
if (alias != NULL) {
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 9060f08e70cf..5525e1c660fd 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
struct list_head *child;
struct dentry *de;
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
list_for_each(child, &parent->d_subdirs)
{
de = list_entry(child, struct dentry, d_u.d_child);
@@ -102,7 +102,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
continue;
coda_flag_inode(de->d_inode, flag);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
return;
}
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 5d8b35539601..aa40c811f8d2 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -47,7 +47,7 @@ static int coda_readdir(struct file *file, void *buf, filldir_t filldir);
/* dentry ops */
static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd);
-static int coda_dentry_delete(struct dentry *);
+static int coda_dentry_delete(const struct dentry *);
/* support routines */
static int coda_venus_readdir(struct file *coda_file, void *buf,
@@ -125,7 +125,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc
return ERR_PTR(error);
exit:
- entry->d_op = &coda_dentry_operations;
+ d_set_d_op(entry, &coda_dentry_operations);
if (inode && (type & CODA_NOCACHE))
coda_flag_inode(inode, C_VATTR | C_PURGE);
@@ -559,7 +559,7 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
if (cii->c_flags & C_FLUSH)
coda_flag_inode_children(inode, C_FLUSH);
- if (atomic_read(&de->d_count) > 1)
+ if (de->d_count > 1)
/* pretend it's valid, but don't change the flags */
goto out;
@@ -577,7 +577,7 @@ out:
* This is the callback from dput() when d_count is going to 0.
* We use this to unhash dentries with bad inodes.
*/
-static int coda_dentry_delete(struct dentry * dentry)
+static int coda_dentry_delete(const struct dentry * dentry)
{
int flags;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 5ea57c8c7f97..50dc7d189f56 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -56,11 +56,18 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void coda_destroy_inode(struct inode *inode)
+static void coda_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(coda_inode_cachep, ITOC(inode));
}
+static void coda_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, coda_i_callback);
+}
+
static void init_once(void *foo)
{
struct coda_inode_info *ei = (struct coda_inode_info *) foo;
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da6061a6df40..026cf68553a4 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -120,7 +120,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
{
struct config_item * item = NULL;
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
if (!d_unhashed(dentry)) {
struct configfs_dirent * sd = dentry->d_fsdata;
if (sd->s_type & CONFIGFS_ITEM_LINK) {
@@ -129,7 +129,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
} else
item = config_item_get(sd->s_element);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
return item;
}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 0b502f80c691..8cd00c35363c 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -67,7 +67,7 @@ static void configfs_d_iput(struct dentry * dentry,
* We _must_ delete our dentries on last dput, as the chain-to-parent
* behavior is required to clear the parents of default_groups.
*/
-static int configfs_d_delete(struct dentry *dentry)
+static int configfs_d_delete(const struct dentry *dentry)
{
return 1;
}
@@ -234,7 +234,7 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd,
sd->s_dentry = dentry;
if (dentry) {
dentry->d_fsdata = configfs_get(sd);
- dentry->d_op = &configfs_dentry_ops;
+ d_set_d_op(dentry, &configfs_dentry_ops);
}
return 0;
@@ -278,7 +278,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
error = configfs_create(d, mode, init_dir);
if (!error) {
inc_nlink(p->d_inode);
- (d)->d_op = &configfs_dentry_ops;
+ d_set_d_op((d), &configfs_dentry_ops);
} else {
struct configfs_dirent *sd = d->d_fsdata;
if (sd) {
@@ -372,7 +372,7 @@ int configfs_create_link(struct configfs_symlink *sl,
if (!err) {
err = configfs_create(dentry, mode, init_symlink);
if (!err)
- dentry->d_op = &configfs_dentry_ops;
+ d_set_d_op(dentry, &configfs_dentry_ops);
else {
struct configfs_dirent *sd = dentry->d_fsdata;
if (sd) {
@@ -399,8 +399,7 @@ static void remove_dir(struct dentry * d)
if (d->d_inode)
simple_rmdir(parent->d_inode,d);
- pr_debug(" o %s removing done (%d)\n",d->d_name.name,
- atomic_read(&d->d_count));
+ pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count);
dput(parent);
}
@@ -448,7 +447,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
return error;
}
- dentry->d_op = &configfs_dentry_ops;
+ d_set_d_op(dentry, &configfs_dentry_ops);
d_rehash(dentry);
return 0;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 253476d78ed8..c83f4768eeaa 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -250,18 +250,14 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
struct dentry * dentry = sd->s_dentry;
if (dentry) {
- spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
if (!(d_unhashed(dentry) && dentry->d_inode)) {
- dget_locked(dentry);
+ dget_dlock(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
simple_unlink(parent->d_inode, dentry);
- } else {
+ } else
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
- }
}
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 23702a9d4e6d..ec6cfcd04af3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -33,20 +33,58 @@
#include <linux/bootmem.h>
#include <linux/fs_struct.h>
#include <linux/hardirq.h>
+#include <linux/bit_spinlock.h>
+#include <linux/rculist_bl.h>
#include "internal.h"
+/*
+ * Usage:
+ * dcache->d_inode->i_lock protects:
+ * - i_dentry, d_alias, d_inode of aliases
+ * dcache_hash_bucket lock protects:
+ * - the dcache hash table
+ * s_anon bl list spinlock protects:
+ * - the s_anon list (see __d_drop)
+ * dcache_lru_lock protects:
+ * - the dcache lru lists and counters
+ * d_lock protects:
+ * - d_flags
+ * - d_name
+ * - d_lru
+ * - d_count
+ * - d_unhashed()
+ * - d_parent and d_subdirs
+ * - childrens' d_child and d_parent
+ * - d_alias, d_inode
+ *
+ * Ordering:
+ * dentry->d_inode->i_lock
+ * dentry->d_lock
+ * dcache_lru_lock
+ * dcache_hash_bucket lock
+ * s_anon lock
+ *
+ * If there is an ancestor relationship:
+ * dentry->d_parent->...->d_parent->d_lock
+ * ...
+ * dentry->d_parent->d_lock
+ * dentry->d_lock
+ *
+ * If no ancestor relationship:
+ * if (dentry1 < dentry2)
+ * dentry1->d_lock
+ * dentry2->d_lock
+ */
int sysctl_vfs_cache_pressure __read_mostly = 100;
EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
- __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock);
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
-EXPORT_SYMBOL(dcache_lock);
+EXPORT_SYMBOL(rename_lock);
static struct kmem_cache *dentry_cache __read_mostly;
-#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
-
/*
* This is the single most critical data structure when it comes
* to the dcache: the hashtable for lookups. Somebody should try
@@ -60,22 +98,51 @@ static struct kmem_cache *dentry_cache __read_mostly;
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
-static struct hlist_head *dentry_hashtable __read_mostly;
+
+struct dcache_hash_bucket {
+ struct hlist_bl_head head;
+};
+static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
+
+static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
+ unsigned long hash)
+{
+ hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
+ hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
+ return dentry_hashtable + (hash & D_HASHMASK);
+}
+
+static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
+{
+ bit_spin_lock(0, (unsigned long *)&b->head.first);
+}
+
+static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
+{
+ __bit_spin_unlock(0, (unsigned long *)&b->head.first);
+}
/* Statistics gathering. */
struct dentry_stat_t dentry_stat = {
.age_limit = 45,
};
-static struct percpu_counter nr_dentry __cacheline_aligned_in_smp;
-static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
+static DEFINE_PER_CPU(unsigned int, nr_dentry);
#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+static int get_nr_dentry(void)
+{
+ int i;
+ int sum = 0;
+ for_each_possible_cpu(i)
+ sum += per_cpu(nr_dentry, i);
+ return sum < 0 ? 0 : sum;
+}
+
int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
size_t *lenp, loff_t *ppos)
{
- dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry);
- dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
+ dentry_stat.nr_dentry = get_nr_dentry();
return proc_dointvec(table, write, buffer, lenp, ppos);
}
#endif
@@ -91,35 +158,50 @@ static void __d_free(struct rcu_head *head)
}
/*
- * no dcache_lock, please.
+ * no locks, please.
*/
static void d_free(struct dentry *dentry)
{
- percpu_counter_dec(&nr_dentry);
+ BUG_ON(dentry->d_count);
+ this_cpu_dec(nr_dentry);
if (dentry->d_op && dentry->d_op->d_release)
dentry->d_op->d_release(dentry);
/* if dentry was never inserted into hash, immediate free is OK */
- if (hlist_unhashed(&dentry->d_hash))
+ if (hlist_bl_unhashed(&dentry->d_hash))
__d_free(&dentry->d_u.d_rcu);
else
call_rcu(&dentry->d_u.d_rcu, __d_free);
}
+/**
+ * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
+ * After this call, in-progress rcu-walk path lookup will fail. This
+ * should be called after unhashing, and after changing d_inode (if
+ * the dentry has not already been unhashed).
+ */
+static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
+{
+ assert_spin_locked(&dentry->d_lock);
+ /* Go through a barrier */
+ write_seqcount_barrier(&dentry->d_seq);
+}
+
/*
* Release the dentry's inode, using the filesystem
- * d_iput() operation if defined.
+ * d_iput() operation if defined. Dentry has no refcount
+ * and is unhashed.
*/
static void dentry_iput(struct dentry * dentry)
__releases(dentry->d_lock)
- __releases(dcache_lock)
+ __releases(dentry->d_inode->i_lock)
{
struct inode *inode = dentry->d_inode;
if (inode) {
dentry->d_inode = NULL;
list_del_init(&dentry->d_alias);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
if (!inode->i_nlink)
fsnotify_inoderemove(inode);
if (dentry->d_op && dentry->d_op->d_iput)
@@ -128,40 +210,72 @@ static void dentry_iput(struct dentry * dentry)
iput(inode);
} else {
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
}
}
/*
- * dentry_lru_(add|del|move_tail) must be called with dcache_lock held.
+ * Release the dentry's inode, using the filesystem
+ * d_iput() operation if defined. dentry remains in-use.
+ */
+static void dentry_unlink_inode(struct dentry * dentry)
+ __releases(dentry->d_lock)
+ __releases(dentry->d_inode->i_lock)
+{
+ struct inode *inode = dentry->d_inode;
+ dentry->d_inode = NULL;
+ list_del_init(&dentry->d_alias);
+ dentry_rcuwalk_barrier(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&inode->i_lock);
+ if (!inode->i_nlink)
+ fsnotify_inoderemove(inode);
+ if (dentry->d_op && dentry->d_op->d_iput)
+ dentry->d_op->d_iput(dentry, inode);
+ else
+ iput(inode);
+}
+
+/*
+ * dentry_lru_(add|del|move_tail) must be called with d_lock held.
*/
static void dentry_lru_add(struct dentry *dentry)
{
if (list_empty(&dentry->d_lru)) {
+ spin_lock(&dcache_lru_lock);
list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
dentry->d_sb->s_nr_dentry_unused++;
- percpu_counter_inc(&nr_dentry_unused);
+ dentry_stat.nr_unused++;
+ spin_unlock(&dcache_lru_lock);
}
}
+static void __dentry_lru_del(struct dentry *dentry)
+{
+ list_del_init(&dentry->d_lru);
+ dentry->d_sb->s_nr_dentry_unused--;
+ dentry_stat.nr_unused--;
+}
+
static void dentry_lru_del(struct dentry *dentry)
{
if (!list_empty(&dentry->d_lru)) {
- list_del_init(&dentry->d_lru);
- dentry->d_sb->s_nr_dentry_unused--;
- percpu_counter_dec(&nr_dentry_unused);
+ spin_lock(&dcache_lru_lock);
+ __dentry_lru_del(dentry);
+ spin_unlock(&dcache_lru_lock);
}
}
static void dentry_lru_move_tail(struct dentry *dentry)
{
+ spin_lock(&dcache_lru_lock);
if (list_empty(&dentry->d_lru)) {
list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
dentry->d_sb->s_nr_dentry_unused++;
- percpu_counter_inc(&nr_dentry_unused);
+ dentry_stat.nr_unused++;
} else {
list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
}
+ spin_unlock(&dcache_lru_lock);
}
/**
@@ -171,22 +285,115 @@ static void dentry_lru_move_tail(struct dentry *dentry)
* The dentry must already be unhashed and removed from the LRU.
*
* If this is the root of the dentry tree, return NULL.
+ *
+ * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
+ * d_kill.
*/
-static struct dentry *d_kill(struct dentry *dentry)
+static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
__releases(dentry->d_lock)
- __releases(dcache_lock)
+ __releases(parent->d_lock)
+ __releases(dentry->d_inode->i_lock)
{
- struct dentry *parent;
-
+ dentry->d_parent = NULL;
list_del(&dentry->d_u.d_child);
- /*drops the locks, at that point nobody can reach this dentry */
+ if (parent)
+ spin_unlock(&parent->d_lock);
dentry_iput(dentry);
+ /*
+ * dentry_iput drops the locks, at which point nobody (except
+ * transient RCU lookups) can reach this dentry.
+ */
+ d_free(dentry);
+ return parent;
+}
+
+/**
+ * d_drop - drop a dentry
+ * @dentry: dentry to drop
+ *
+ * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
+ * be found through a VFS lookup any more. Note that this is different from
+ * deleting the dentry - d_delete will try to mark the dentry negative if
+ * possible, giving a successful _negative_ lookup, while d_drop will
+ * just make the cache lookup fail.
+ *
+ * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
+ * reason (NFS timeouts or autofs deletes).
+ *
+ * __d_drop requires dentry->d_lock.
+ */
+void __d_drop(struct dentry *dentry)
+{
+ if (!(dentry->d_flags & DCACHE_UNHASHED)) {
+ if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) {
+ bit_spin_lock(0,
+ (unsigned long *)&dentry->d_sb->s_anon.first);
+ dentry->d_flags |= DCACHE_UNHASHED;
+ hlist_bl_del_init(&dentry->d_hash);
+ __bit_spin_unlock(0,
+ (unsigned long *)&dentry->d_sb->s_anon.first);
+ } else {
+ struct dcache_hash_bucket *b;
+ b = d_hash(dentry->d_parent, dentry->d_name.hash);
+ spin_lock_bucket(b);
+ /*
+ * We may not actually need to put DCACHE_UNHASHED
+ * manipulations under the hash lock, but follow
+ * the principle of least surprise.
+ */
+ dentry->d_flags |= DCACHE_UNHASHED;
+ hlist_bl_del_rcu(&dentry->d_hash);
+ spin_unlock_bucket(b);
+ dentry_rcuwalk_barrier(dentry);
+ }
+ }
+}
+EXPORT_SYMBOL(__d_drop);
+
+void d_drop(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL(d_drop);
+
+/*
+ * Finish off a dentry we've decided to kill.
+ * dentry->d_lock must be held, returns with it unlocked.
+ * If ref is non-zero, then decrement the refcount too.
+ * Returns dentry requiring refcount drop, or NULL if we're done.
+ */
+static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
+ __releases(dentry->d_lock)
+{
+ struct inode *inode;
+ struct dentry *parent;
+
+ inode = dentry->d_inode;
+ if (inode && !spin_trylock(&inode->i_lock)) {
+relock:
+ spin_unlock(&dentry->d_lock);
+ cpu_relax();
+ return dentry; /* try again with same dentry */
+ }
if (IS_ROOT(dentry))
parent = NULL;
else
parent = dentry->d_parent;
- d_free(dentry);
- return parent;
+ if (parent && !spin_trylock(&parent->d_lock)) {
+ if (inode)
+ spin_unlock(&inode->i_lock);
+ goto relock;
+ }
+
+ if (ref)
+ dentry->d_count--;
+ /* if dentry was on the d_lru list delete it from there */
+ dentry_lru_del(dentry);
+ /* if it was on the hash then remove it */
+ __d_drop(dentry);
+ return d_kill(dentry, parent);
}
/*
@@ -214,34 +421,26 @@ static struct dentry *d_kill(struct dentry *dentry)
* call the dentry unlink method as well as removing it from the queues and
* releasing its resources. If the parent dentries were scheduled for release
* they too may now get deleted.
- *
- * no dcache lock, please.
*/
-
void dput(struct dentry *dentry)
{
if (!dentry)
return;
repeat:
- if (atomic_read(&dentry->d_count) == 1)
+ if (dentry->d_count == 1)
might_sleep();
- if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
- return;
-
spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count)) {
+ BUG_ON(!dentry->d_count);
+ if (dentry->d_count > 1) {
+ dentry->d_count--;
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
return;
}
- /*
- * AV: ->d_delete() is _NOT_ allowed to block now.
- */
- if (dentry->d_op && dentry->d_op->d_delete) {
+ if (dentry->d_flags & DCACHE_OP_DELETE) {
if (dentry->d_op->d_delete(dentry))
- goto unhash_it;
+ goto kill_it;
}
/* Unreachable? Get rid of it */
@@ -252,16 +451,12 @@ repeat:
dentry->d_flags |= DCACHE_REFERENCED;
dentry_lru_add(dentry);
- spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
+ dentry->d_count--;
+ spin_unlock(&dentry->d_lock);
return;
-unhash_it:
- __d_drop(dentry);
kill_it:
- /* if dentry was on the d_lru list delete it from there */
- dentry_lru_del(dentry);
- dentry = d_kill(dentry);
+ dentry = dentry_kill(dentry, 1);
if (dentry)
goto repeat;
}
@@ -284,9 +479,9 @@ int d_invalidate(struct dentry * dentry)
/*
* If it's already been dropped, return OK.
*/
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
if (d_unhashed(dentry)) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
return 0;
}
/*
@@ -294,9 +489,9 @@ int d_invalidate(struct dentry * dentry)
* to get rid of unused child entries.
*/
if (!list_empty(&dentry->d_subdirs)) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
shrink_dcache_parent(dentry);
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
}
/*
@@ -309,35 +504,61 @@ int d_invalidate(struct dentry * dentry)
* we might still populate it if it was a
* working directory or similar).
*/
- spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count) > 1) {
+ if (dentry->d_count > 1) {
if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
return -EBUSY;
}
}
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
return 0;
}
EXPORT_SYMBOL(d_invalidate);
-/* This should be called _only_ with dcache_lock held */
-static inline struct dentry * __dget_locked(struct dentry *dentry)
+/* This must be called with d_lock held */
+static inline void __dget_dlock(struct dentry *dentry)
{
- atomic_inc(&dentry->d_count);
- dentry_lru_del(dentry);
- return dentry;
+ dentry->d_count++;
}
-struct dentry * dget_locked(struct dentry *dentry)
+static inline void __dget(struct dentry *dentry)
{
- return __dget_locked(dentry);
+ spin_lock(&dentry->d_lock);
+ __dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
}
-EXPORT_SYMBOL(dget_locked);
+
+struct dentry *dget_parent(struct dentry *dentry)
+{
+ struct dentry *ret;
+
+repeat:
+ /*
+ * Don't need rcu_dereference because we re-check it was correct under
+ * the lock.
+ */
+ rcu_read_lock();
+ ret = dentry->d_parent;
+ if (!ret) {
+ rcu_read_unlock();
+ goto out;
+ }
+ spin_lock(&ret->d_lock);
+ if (unlikely(ret != dentry->d_parent)) {
+ spin_unlock(&ret->d_lock);
+ rcu_read_unlock();
+ goto repeat;
+ }
+ rcu_read_unlock();
+ BUG_ON(!ret->d_count);
+ ret->d_count++;
+ spin_unlock(&ret->d_lock);
+out:
+ return ret;
+}
+EXPORT_SYMBOL(dget_parent);
/**
* d_find_alias - grab a hashed alias of inode
@@ -355,42 +576,56 @@ EXPORT_SYMBOL(dget_locked);
* any other hashed alias over that one unless @want_discon is set,
* in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
*/
-
-static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
+static struct dentry *___d_find_alias(struct inode *inode, int want_discon)
{
- struct list_head *head, *next, *tmp;
- struct dentry *alias, *discon_alias=NULL;
+ struct dentry *alias, *discon_alias;
- head = &inode->i_dentry;
- next = inode->i_dentry.next;
- while (next != head) {
- tmp = next;
- next = tmp->next;
- prefetch(next);
- alias = list_entry(tmp, struct dentry, d_alias);
+again:
+ discon_alias = NULL;
+ list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ spin_lock(&alias->d_lock);
if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
if (IS_ROOT(alias) &&
(alias->d_flags & DCACHE_DISCONNECTED))
discon_alias = alias;
- else if (!want_discon) {
- __dget_locked(alias);
+ else if (!want_discon)
+ return alias;
+ }
+ spin_unlock(&alias->d_lock);
+ }
+ if (discon_alias) {
+ alias = discon_alias;
+ spin_lock(&alias->d_lock);
+ if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
+ if (IS_ROOT(alias) &&
+ (alias->d_flags & DCACHE_DISCONNECTED))
return alias;
- }
}
+ spin_unlock(&alias->d_lock);
+ goto again;
+ }
+ return NULL;
+}
+
+static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
+{
+ struct dentry *alias;
+ alias = ___d_find_alias(inode, want_discon);
+ if (alias) {
+ __dget_dlock(alias);
+ spin_unlock(&alias->d_lock);
}
- if (discon_alias)
- __dget_locked(discon_alias);
- return discon_alias;
+ return alias;
}
-struct dentry * d_find_alias(struct inode *inode)
+struct dentry *d_find_alias(struct inode *inode)
{
struct dentry *de = NULL;
if (!list_empty(&inode->i_dentry)) {
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
de = __d_find_alias(inode, 0);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
}
return de;
}
@@ -404,54 +639,61 @@ void d_prune_aliases(struct inode *inode)
{
struct dentry *dentry;
restart:
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
spin_lock(&dentry->d_lock);
- if (!atomic_read(&dentry->d_count)) {
- __dget_locked(dentry);
+ if (!dentry->d_count) {
+ __dget_dlock(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
dput(dentry);
goto restart;
}
spin_unlock(&dentry->d_lock);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(d_prune_aliases);
/*
- * Throw away a dentry - free the inode, dput the parent. This requires that
- * the LRU list has already been removed.
+ * Try to throw away a dentry - free the inode, dput the parent.
+ * Requires dentry->d_lock is held, and dentry->d_count == 0.
+ * Releases dentry->d_lock.
*
- * Try to prune ancestors as well. This is necessary to prevent
- * quadratic behavior of shrink_dcache_parent(), but is also expected
- * to be beneficial in reducing dentry cache fragmentation.
+ * This may fail if locks cannot be acquired no problem, just try again.
*/
-static void prune_one_dentry(struct dentry * dentry)
+static void try_prune_one_dentry(struct dentry *dentry)
__releases(dentry->d_lock)
- __releases(dcache_lock)
- __acquires(dcache_lock)
{
- __d_drop(dentry);
- dentry = d_kill(dentry);
+ struct dentry *parent;
+ parent = dentry_kill(dentry, 0);
/*
- * Prune ancestors. Locking is simpler than in dput(),
- * because dcache_lock needs to be taken anyway.
+ * If dentry_kill returns NULL, we have nothing more to do.
+ * if it returns the same dentry, trylocks failed. In either
+ * case, just loop again.
+ *
+ * Otherwise, we need to prune ancestors too. This is necessary
+ * to prevent quadratic behavior of shrink_dcache_parent(), but
+ * is also expected to be beneficial in reducing dentry cache
+ * fragmentation.
*/
- spin_lock(&dcache_lock);
+ if (!parent)
+ return;
+ if (parent == dentry)
+ return;
+
+ /* Prune ancestors. */
+ dentry = parent;
while (dentry) {
- if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock))
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_count > 1) {
+ dentry->d_count--;
+ spin_unlock(&dentry->d_lock);
return;
-
- if (dentry->d_op && dentry->d_op->d_delete)
- dentry->d_op->d_delete(dentry);
- dentry_lru_del(dentry);
- __d_drop(dentry);
- dentry = d_kill(dentry);
- spin_lock(&dcache_lock);
+ }
+ dentry = dentry_kill(dentry, 1);
}
}
@@ -459,24 +701,35 @@ static void shrink_dentry_list(struct list_head *list)
{
struct dentry *dentry;
- while (!list_empty(list)) {
- dentry = list_entry(list->prev, struct dentry, d_lru);
- dentry_lru_del(dentry);
+ rcu_read_lock();
+ for (;;) {
+ dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
+ if (&dentry->d_lru == list)
+ break; /* empty */
+ spin_lock(&dentry->d_lock);
+ if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
+ spin_unlock(&dentry->d_lock);
+ continue;
+ }
/*
* We found an inuse dentry which was not removed from
* the LRU because of laziness during lookup. Do not free
* it - just keep it off the LRU list.
*/
- spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count)) {
+ if (dentry->d_count) {
+ dentry_lru_del(dentry);
spin_unlock(&dentry->d_lock);
continue;
}
- prune_one_dentry(dentry);
- /* dentry->d_lock was dropped in prune_one_dentry() */
- cond_resched_lock(&dcache_lock);
+
+ rcu_read_unlock();
+
+ try_prune_one_dentry(dentry);
+
+ rcu_read_lock();
}
+ rcu_read_unlock();
}
/**
@@ -495,42 +748,44 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
LIST_HEAD(tmp);
int cnt = *count;
- spin_lock(&dcache_lock);
+relock:
+ spin_lock(&dcache_lru_lock);
while (!list_empty(&sb->s_dentry_lru)) {
dentry = list_entry(sb->s_dentry_lru.prev,
struct dentry, d_lru);
BUG_ON(dentry->d_sb != sb);
+ if (!spin_trylock(&dentry->d_lock)) {
+ spin_unlock(&dcache_lru_lock);
+ cpu_relax();
+ goto relock;
+ }
+
/*
* If we are honouring the DCACHE_REFERENCED flag and the
* dentry has this flag set, don't free it. Clear the flag
* and put it back on the LRU.
*/
- if (flags & DCACHE_REFERENCED) {
- spin_lock(&dentry->d_lock);
- if (dentry->d_flags & DCACHE_REFERENCED) {
- dentry->d_flags &= ~DCACHE_REFERENCED;
- list_move(&dentry->d_lru, &referenced);
- spin_unlock(&dentry->d_lock);
- cond_resched_lock(&dcache_lock);
- continue;
- }
+ if (flags & DCACHE_REFERENCED &&
+ dentry->d_flags & DCACHE_REFERENCED) {
+ dentry->d_flags &= ~DCACHE_REFERENCED;
+ list_move(&dentry->d_lru, &referenced);
+ spin_unlock(&dentry->d_lock);
+ } else {
+ list_move_tail(&dentry->d_lru, &tmp);
spin_unlock(&dentry->d_lock);
+ if (!--cnt)
+ break;
}
-
- list_move_tail(&dentry->d_lru, &tmp);
- if (!--cnt)
- break;
- cond_resched_lock(&dcache_lock);
+ cond_resched_lock(&dcache_lru_lock);
}
-
- *count = cnt;
- shrink_dentry_list(&tmp);
-
if (!list_empty(&referenced))
list_splice(&referenced, &sb->s_dentry_lru);
- spin_unlock(&dcache_lock);
+ spin_unlock(&dcache_lru_lock);
+ shrink_dentry_list(&tmp);
+
+ *count = cnt;
}
/**
@@ -546,13 +801,12 @@ static void prune_dcache(int count)
{
struct super_block *sb, *p = NULL;
int w_count;
- int unused = percpu_counter_sum_positive(&nr_dentry_unused);
+ int unused = dentry_stat.nr_unused;
int prune_ratio;
int pruned;
if (unused == 0 || count == 0)
return;
- spin_lock(&dcache_lock);
if (count >= unused)
prune_ratio = 1;
else
@@ -589,11 +843,9 @@ static void prune_dcache(int count)
if (down_read_trylock(&sb->s_umount)) {
if ((sb->s_root != NULL) &&
(!list_empty(&sb->s_dentry_lru))) {
- spin_unlock(&dcache_lock);
__shrink_dcache_sb(sb, &w_count,
DCACHE_REFERENCED);
pruned -= w_count;
- spin_lock(&dcache_lock);
}
up_read(&sb->s_umount);
}
@@ -609,7 +861,6 @@ static void prune_dcache(int count)
if (p)
__put_super(p);
spin_unlock(&sb_lock);
- spin_unlock(&dcache_lock);
}
/**
@@ -623,12 +874,14 @@ void shrink_dcache_sb(struct super_block *sb)
{
LIST_HEAD(tmp);
- spin_lock(&dcache_lock);
+ spin_lock(&dcache_lru_lock);
while (!list_empty(&sb->s_dentry_lru)) {
list_splice_init(&sb->s_dentry_lru, &tmp);
+ spin_unlock(&dcache_lru_lock);
shrink_dentry_list(&tmp);
+ spin_lock(&dcache_lru_lock);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dcache_lru_lock);
}
EXPORT_SYMBOL(shrink_dcache_sb);
@@ -645,10 +898,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
BUG_ON(!IS_ROOT(dentry));
/* detach this root from the system */
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
dentry_lru_del(dentry);
__d_drop(dentry);
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
for (;;) {
/* descend to the first leaf in the current subtree */
@@ -657,14 +910,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
/* this is a branch with children - detach all of them
* from the system in one go */
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
list_for_each_entry(loop, &dentry->d_subdirs,
d_u.d_child) {
+ spin_lock_nested(&loop->d_lock,
+ DENTRY_D_LOCK_NESTED);
dentry_lru_del(loop);
__d_drop(loop);
- cond_resched_lock(&dcache_lock);
+ spin_unlock(&loop->d_lock);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
/* move to the first child */
dentry = list_entry(dentry->d_subdirs.next,
@@ -676,7 +931,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
do {
struct inode *inode;
- if (atomic_read(&dentry->d_count) != 0) {
+ if (dentry->d_count != 0) {
printk(KERN_ERR
"BUG: Dentry %p{i=%lx,n=%s}"
" still in use (%d)"
@@ -685,20 +940,23 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
dentry->d_inode ?
dentry->d_inode->i_ino : 0UL,
dentry->d_name.name,
- atomic_read(&dentry->d_count),
+ dentry->d_count,
dentry->d_sb->s_type->name,
dentry->d_sb->s_id);
BUG();
}
- if (IS_ROOT(dentry))
+ if (IS_ROOT(dentry)) {
parent = NULL;
- else {
+ list_del(&dentry->d_u.d_child);
+ } else {
parent = dentry->d_parent;
- atomic_dec(&parent->d_count);
+ spin_lock(&parent->d_lock);
+ parent->d_count--;
+ list_del(&dentry->d_u.d_child);
+ spin_unlock(&parent->d_lock);
}
- list_del(&dentry->d_u.d_child);
detached++;
inode = dentry->d_inode;
@@ -728,8 +986,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
/*
* destroy the dentries attached to a superblock on unmounting
- * - we don't need to use dentry->d_lock, and only need dcache_lock when
- * removing the dentry from the system lists and hashes because:
+ * - we don't need to use dentry->d_lock because:
* - the superblock is detached from all mountings and open files, so the
* dentry trees will not be rearranged by the VFS
* - s_umount is write-locked, so the memory pressure shrinker will ignore
@@ -746,11 +1003,13 @@ void shrink_dcache_for_umount(struct super_block *sb)
dentry = sb->s_root;
sb->s_root = NULL;
- atomic_dec(&dentry->d_count);
+ spin_lock(&dentry->d_lock);
+ dentry->d_count--;
+ spin_unlock(&dentry->d_lock);
shrink_dcache_for_umount_subtree(dentry);
- while (!hlist_empty(&sb->s_anon)) {
- dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
+ while (!hlist_bl_empty(&sb->s_anon)) {
+ dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
shrink_dcache_for_umount_subtree(dentry);
}
}
@@ -768,15 +1027,20 @@ void shrink_dcache_for_umount(struct super_block *sb)
* Return true if the parent or its subdirectories contain
* a mount point
*/
-
int have_submounts(struct dentry *parent)
{
- struct dentry *this_parent = parent;
+ struct dentry *this_parent;
struct list_head *next;
+ unsigned seq;
+ int locked = 0;
+
+ seq = read_seqbegin(&rename_lock);
+again:
+ this_parent = parent;
- spin_lock(&dcache_lock);
if (d_mountpoint(parent))
goto positive;
+ spin_lock(&this_parent->d_lock);
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -784,27 +1048,65 @@ resume:
struct list_head *tmp = next;
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
+
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
/* Have we found a mount point ? */
- if (d_mountpoint(dentry))
+ if (d_mountpoint(dentry)) {
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&this_parent->d_lock);
goto positive;
+ }
if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
goto repeat;
}
+ spin_unlock(&dentry->d_lock);
}
/*
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- next = this_parent->d_u.d_child.next;
- this_parent = this_parent->d_parent;
+ struct dentry *tmp;
+ struct dentry *child;
+
+ tmp = this_parent->d_parent;
+ rcu_read_lock();
+ spin_unlock(&this_parent->d_lock);
+ child = this_parent;
+ this_parent = tmp;
+ spin_lock(&this_parent->d_lock);
+ /* might go back up the wrong parent if we have had a rename
+ * or deletion */
+ if (this_parent != child->d_parent ||
+ (!locked && read_seqretry(&rename_lock, seq))) {
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
+ goto rename_retry;
+ }
+ rcu_read_unlock();
+ next = child->d_u.d_child.next;
goto resume;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&this_parent->d_lock);
+ if (!locked && read_seqretry(&rename_lock, seq))
+ goto rename_retry;
+ if (locked)
+ write_sequnlock(&rename_lock);
return 0; /* No mount points found in tree */
positive:
- spin_unlock(&dcache_lock);
+ if (!locked && read_seqretry(&rename_lock, seq))
+ goto rename_retry;
+ if (locked)
+ write_sequnlock(&rename_lock);
return 1;
+
+rename_retry:
+ locked = 1;
+ write_seqlock(&rename_lock);
+ goto again;
}
EXPORT_SYMBOL(have_submounts);
@@ -824,11 +1126,16 @@ EXPORT_SYMBOL(have_submounts);
*/
static int select_parent(struct dentry * parent)
{
- struct dentry *this_parent = parent;
+ struct dentry *this_parent;
struct list_head *next;
+ unsigned seq;
int found = 0;
+ int locked = 0;
- spin_lock(&dcache_lock);
+ seq = read_seqbegin(&rename_lock);
+again:
+ this_parent = parent;
+ spin_lock(&this_parent->d_lock);
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -837,11 +1144,13 @@ resume:
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+
/*
* move only zero ref count dentries to the end
* of the unused list for prune_dcache
*/
- if (!atomic_read(&dentry->d_count)) {
+ if (!dentry->d_count) {
dentry_lru_move_tail(dentry);
found++;
} else {
@@ -853,28 +1162,63 @@ resume:
* ensures forward progress). We'll be coming back to find
* the rest.
*/
- if (found && need_resched())
+ if (found && need_resched()) {
+ spin_unlock(&dentry->d_lock);
goto out;
+ }
/*
* Descend a level if the d_subdirs list is non-empty.
*/
if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
goto repeat;
}
+
+ spin_unlock(&dentry->d_lock);
}
/*
* All done at this level ... ascend and resume the search.
*/
if (this_parent != parent) {
- next = this_parent->d_u.d_child.next;
- this_parent = this_parent->d_parent;
+ struct dentry *tmp;
+ struct dentry *child;
+
+ tmp = this_parent->d_parent;
+ rcu_read_lock();
+ spin_unlock(&this_parent->d_lock);
+ child = this_parent;
+ this_parent = tmp;
+ spin_lock(&this_parent->d_lock);
+ /* might go back up the wrong parent if we have had a rename
+ * or deletion */
+ if (this_parent != child->d_parent ||
+ (!locked && read_seqretry(&rename_lock, seq))) {
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
+ goto rename_retry;
+ }
+ rcu_read_unlock();
+ next = child->d_u.d_child.next;
goto resume;
}
out:
- spin_unlock(&dcache_lock);
+ spin_unlock(&this_parent->d_lock);
+ if (!locked && read_seqretry(&rename_lock, seq))
+ goto rename_retry;
+ if (locked)
+ write_sequnlock(&rename_lock);
return found;
+
+rename_retry:
+ if (found)
+ return found;
+ locked = 1;
+ write_seqlock(&rename_lock);
+ goto again;
}
/**
@@ -908,16 +1252,13 @@ EXPORT_SYMBOL(shrink_dcache_parent);
*/
static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
- int nr_unused;
-
if (nr) {
if (!(gfp_mask & __GFP_FS))
return -1;
prune_dcache(nr);
}
- nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
- return (nr_unused / 100) * sysctl_vfs_cache_pressure;
+ return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
}
static struct shrinker dcache_shrinker = {
@@ -960,38 +1301,52 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
memcpy(dname, name->name, name->len);
dname[name->len] = 0;
- atomic_set(&dentry->d_count, 1);
+ dentry->d_count = 1;
dentry->d_flags = DCACHE_UNHASHED;
spin_lock_init(&dentry->d_lock);
+ seqcount_init(&dentry->d_seq);
dentry->d_inode = NULL;
dentry->d_parent = NULL;
dentry->d_sb = NULL;
dentry->d_op = NULL;
dentry->d_fsdata = NULL;
- dentry->d_mounted = 0;
- INIT_HLIST_NODE(&dentry->d_hash);
+ INIT_HLIST_BL_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
INIT_LIST_HEAD(&dentry->d_alias);
+ INIT_LIST_HEAD(&dentry->d_u.d_child);
if (parent) {
- dentry->d_parent = dget(parent);
+ spin_lock(&parent->d_lock);
+ /*
+ * don't need child lock because it is not subject
+ * to concurrency here
+ */
+ __dget_dlock(parent);
+ dentry->d_parent = parent;
dentry->d_sb = parent->d_sb;
- } else {
- INIT_LIST_HEAD(&dentry->d_u.d_child);
- }
-
- spin_lock(&dcache_lock);
- if (parent)
list_add(&dentry->d_u.d_child, &parent->d_subdirs);
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
+ }
- percpu_counter_inc(&nr_dentry);
+ this_cpu_inc(nr_dentry);
return dentry;
}
EXPORT_SYMBOL(d_alloc);
+struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
+{
+ struct dentry *dentry = d_alloc(NULL, name);
+ if (dentry) {
+ dentry->d_sb = sb;
+ dentry->d_parent = dentry;
+ dentry->d_flags |= DCACHE_DISCONNECTED;
+ }
+ return dentry;
+}
+EXPORT_SYMBOL(d_alloc_pseudo);
+
struct dentry *d_alloc_name(struct dentry *parent, const char *name)
{
struct qstr q;
@@ -1003,12 +1358,39 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
}
EXPORT_SYMBOL(d_alloc_name);
-/* the caller must hold dcache_lock */
+void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
+{
+ BUG_ON(dentry->d_op);
+ BUG_ON(dentry->d_flags & (DCACHE_OP_HASH |
+ DCACHE_OP_COMPARE |
+ DCACHE_OP_REVALIDATE |
+ DCACHE_OP_REVALIDATE_RCU|
+ DCACHE_OP_DELETE ));
+ dentry->d_op = op;
+ if (!op)
+ return;
+ if (op->d_hash)
+ dentry->d_flags |= DCACHE_OP_HASH;
+ if (op->d_compare)
+ dentry->d_flags |= DCACHE_OP_COMPARE;
+ if (op->d_revalidate)
+ dentry->d_flags |= DCACHE_OP_REVALIDATE;
+ if (op->d_revalidate_rcu)
+ dentry->d_flags |= DCACHE_OP_REVALIDATE_RCU;
+ if (op->d_delete)
+ dentry->d_flags |= DCACHE_OP_DELETE;
+
+}
+EXPORT_SYMBOL(d_set_d_op);
+
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
{
+ spin_lock(&dentry->d_lock);
if (inode)
list_add(&dentry->d_alias, &inode->i_dentry);
dentry->d_inode = inode;
+ dentry_rcuwalk_barrier(dentry);
+ spin_unlock(&dentry->d_lock);
fsnotify_d_instantiate(dentry, inode);
}
@@ -1030,9 +1412,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
void d_instantiate(struct dentry *entry, struct inode * inode)
{
BUG_ON(!list_empty(&entry->d_alias));
- spin_lock(&dcache_lock);
+ if (inode)
+ spin_lock(&inode->i_lock);
__d_instantiate(entry, inode);
- spin_unlock(&dcache_lock);
+ if (inode)
+ spin_unlock(&inode->i_lock);
security_d_instantiate(entry, inode);
}
EXPORT_SYMBOL(d_instantiate);
@@ -1069,15 +1453,20 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct qstr *qstr = &alias->d_name;
+ /*
+ * Don't need alias->d_lock here, because aliases with
+ * d_parent == entry->d_parent are not subject to name or
+ * parent changes, because the parent inode i_mutex is held.
+ */
if (qstr->hash != hash)
continue;
if (alias->d_parent != entry->d_parent)
continue;
if (qstr->len != len)
continue;
- if (memcmp(qstr->name, name, len))
+ if (dentry_memcmp(qstr->name, name, len))
continue;
- dget_locked(alias);
+ __dget(alias);
return alias;
}
@@ -1091,9 +1480,11 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
BUG_ON(!list_empty(&entry->d_alias));
- spin_lock(&dcache_lock);
+ if (inode)
+ spin_lock(&inode->i_lock);
result = __d_instantiate_unique(entry, inode);
- spin_unlock(&dcache_lock);
+ if (inode)
+ spin_unlock(&inode->i_lock);
if (!result) {
security_d_instantiate(entry, inode);
@@ -1134,14 +1525,6 @@ struct dentry * d_alloc_root(struct inode * root_inode)
}
EXPORT_SYMBOL(d_alloc_root);
-static inline struct hlist_head *d_hash(struct dentry *parent,
- unsigned long hash)
-{
- hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
- hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
- return dentry_hashtable + (hash & D_HASHMASK);
-}
-
/**
* d_obtain_alias - find or allocate a dentry for a given inode
* @inode: inode to allocate the dentry for
@@ -1182,10 +1565,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
}
tmp->d_parent = tmp; /* make sure dput doesn't croak */
- spin_lock(&dcache_lock);
+
+ spin_lock(&inode->i_lock);
res = __d_find_alias(inode, 0);
if (res) {
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
dput(tmp);
goto out_iput;
}
@@ -1195,12 +1579,14 @@ struct dentry *d_obtain_alias(struct inode *inode)
tmp->d_sb = inode->i_sb;
tmp->d_inode = inode;
tmp->d_flags |= DCACHE_DISCONNECTED;
- tmp->d_flags &= ~DCACHE_UNHASHED;
list_add(&tmp->d_alias, &inode->i_dentry);
- hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
+ bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
+ tmp->d_flags &= ~DCACHE_UNHASHED;
+ hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
+ __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
spin_unlock(&tmp->d_lock);
+ spin_unlock(&inode->i_lock);
- spin_unlock(&dcache_lock);
return tmp;
out_iput:
@@ -1230,18 +1616,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
struct dentry *new = NULL;
if (inode && S_ISDIR(inode->i_mode)) {
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
new = __d_find_alias(inode, 1);
if (new) {
BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
security_d_instantiate(new, inode);
d_move(new, dentry);
iput(inode);
} else {
- /* already taking dcache_lock, so d_add() by hand */
+ /* already taking inode->i_lock, so d_add() by hand */
__d_instantiate(dentry, inode);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
security_d_instantiate(dentry, inode);
d_rehash(dentry);
}
@@ -1314,10 +1700,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
* Negative dentry: instantiate it unless the inode is a directory and
* already has a dentry.
*/
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
__d_instantiate(found, inode);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
security_d_instantiate(found, inode);
return found;
}
@@ -1327,8 +1713,8 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
* reference to it, move it in place and use it.
*/
new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
- dget_locked(new);
- spin_unlock(&dcache_lock);
+ __dget(new);
+ spin_unlock(&inode->i_lock);
security_d_instantiate(found, inode);
d_move(new, found);
iput(inode);
@@ -1342,6 +1728,114 @@ err_out:
EXPORT_SYMBOL(d_add_ci);
/**
+ * __d_lookup_rcu - search for a dentry (racy, store-free)
+ * @parent: parent dentry
+ * @name: qstr of name we wish to find
+ * @seq: returns d_seq value at the point where the dentry was found
+ * @inode: returns dentry->d_inode when the inode was found valid.
+ * Returns: dentry, or NULL
+ *
+ * __d_lookup_rcu is the dcache lookup function for rcu-walk name
+ * resolution (store-free path walking) design described in
+ * Documentation/filesystems/path-lookup.txt.
+ *
+ * This is not to be used outside core vfs.
+ *
+ * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
+ * held, and rcu_read_lock held. The returned dentry must not be stored into
+ * without taking d_lock and checking d_seq sequence count against @seq
+ * returned here.
+ *
+ * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
+ * function.
+ *
+ * Alternatively, __d_lookup_rcu may be called again to look up the child of
+ * the returned dentry, so long as its parent's seqlock is checked after the
+ * child is looked up. Thus, an interlocking stepping of sequence lock checks
+ * is formed, giving integrity down the path walk.
+ */
+struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
+ unsigned *seq, struct inode **inode)
+{
+ unsigned int len = name->len;
+ unsigned int hash = name->hash;
+ const unsigned char *str = name->name;
+ struct dcache_hash_bucket *b = d_hash(parent, hash);
+ struct hlist_bl_node *node;
+ struct dentry *dentry;
+
+ /*
+ * Note: There is significant duplication with __d_lookup_rcu which is
+ * required to prevent single threaded performance regressions
+ * especially on architectures where smp_rmb (in seqcounts) are costly.
+ * Keep the two functions in sync.
+ */
+
+ /*
+ * The hash list is protected using RCU.
+ *
+ * Carefully use d_seq when comparing a candidate dentry, to avoid
+ * races with d_move().
+ *
+ * It is possible that concurrent renames can mess up our list
+ * walk here and result in missing our dentry, resulting in the
+ * false-negative result. d_lookup() protects against concurrent
+ * renames using rename_lock seqlock.
+ *
+ * See Documentation/vfs/dcache-locking.txt for more details.
+ */
+ hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
+ struct inode *i;
+ const char *tname;
+ int tlen;
+
+ if (dentry->d_name.hash != hash)
+ continue;
+
+seqretry:
+ *seq = read_seqcount_begin(&dentry->d_seq);
+ if (dentry->d_parent != parent)
+ continue;
+ if (d_unhashed(dentry))
+ continue;
+ tlen = dentry->d_name.len;
+ tname = dentry->d_name.name;
+ i = dentry->d_inode;
+ prefetch(tname);
+ if (i)
+ prefetch(i);
+ /*
+ * This seqcount check is required to ensure name and
+ * len are loaded atomically, so as not to walk off the
+ * edge of memory when walking. If we could load this
+ * atomically some other way, we could drop this check.
+ */
+ if (read_seqcount_retry(&dentry->d_seq, *seq))
+ goto seqretry;
+ if (parent->d_flags & DCACHE_OP_COMPARE) {
+ if (parent->d_op->d_compare(parent, *inode,
+ dentry, i,
+ tlen, tname, name))
+ continue;
+ } else {
+ if (tlen != len)
+ continue;
+ if (dentry_memcmp(tname, str, tlen))
+ continue;
+ }
+ /*
+ * No extra seqcount check is required after the name
+ * compare. The caller must perform a seqcount check in
+ * order to do anything useful with the returned dentry
+ * anyway.
+ */
+ *inode = i;
+ return dentry;
+ }
+ return NULL;
+}
+
+/**
* d_lookup - search for a dentry
* @parent: parent dentry
* @name: qstr of name we wish to find
@@ -1352,10 +1846,10 @@ EXPORT_SYMBOL(d_add_ci);
* dentry is returned. The caller must use dput to free the entry when it has
* finished using it. %NULL is returned if the dentry does not exist.
*/
-struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
+struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
{
- struct dentry * dentry = NULL;
- unsigned long seq;
+ struct dentry *dentry;
+ unsigned seq;
do {
seq = read_seqbegin(&rename_lock);
@@ -1367,7 +1861,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
}
EXPORT_SYMBOL(d_lookup);
-/*
+/**
* __d_lookup - search for a dentry (racy)
* @parent: parent dentry
* @name: qstr of name we wish to find
@@ -1382,17 +1876,24 @@ EXPORT_SYMBOL(d_lookup);
*
* __d_lookup callers must be commented.
*/
-struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
+struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
{
unsigned int len = name->len;
unsigned int hash = name->hash;
const unsigned char *str = name->name;
- struct hlist_head *head = d_hash(parent,hash);
+ struct dcache_hash_bucket *b = d_hash(parent, hash);
+ struct hlist_bl_node *node;
struct dentry *found = NULL;
- struct hlist_node *node;
struct dentry *dentry;
/*
+ * Note: There is significant duplication with __d_lookup_rcu which is
+ * required to prevent single threaded performance regressions
+ * especially on architectures where smp_rmb (in seqcounts) are costly.
+ * Keep the two functions in sync.
+ */
+
+ /*
* The hash list is protected using RCU.
*
* Take d_lock when comparing a candidate dentry, to avoid races
@@ -1407,25 +1908,16 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
*/
rcu_read_lock();
- hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
- struct qstr *qstr;
+ hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
+ const char *tname;
+ int tlen;
if (dentry->d_name.hash != hash)
continue;
- if (dentry->d_parent != parent)
- continue;
spin_lock(&dentry->d_lock);
-
- /*
- * Recheck the dentry after taking the lock - d_move may have
- * changed things. Don't bother checking the hash because
- * we're about to compare the whole name anyway.
- */
if (dentry->d_parent != parent)
goto next;
-
- /* non-existing due to RCU? */
if (d_unhashed(dentry))
goto next;
@@ -1433,18 +1925,21 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
* It is safe to compare names since d_move() cannot
* change the qstr (protected by d_lock).
*/
- qstr = &dentry->d_name;
- if (parent->d_op && parent->d_op->d_compare) {
- if (parent->d_op->d_compare(parent, qstr, name))
+ tlen = dentry->d_name.len;
+ tname = dentry->d_name.name;
+ if (parent->d_flags & DCACHE_OP_COMPARE) {
+ if (parent->d_op->d_compare(parent, parent->d_inode,
+ dentry, dentry->d_inode,
+ tlen, tname, name))
goto next;
} else {
- if (qstr->len != len)
+ if (tlen != len)
goto next;
- if (memcmp(qstr->name, str, len))
+ if (dentry_memcmp(tname, str, tlen))
goto next;
}
- atomic_inc(&dentry->d_count);
+ dentry->d_count++;
found = dentry;
spin_unlock(&dentry->d_lock);
break;
@@ -1473,8 +1968,8 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
* routine may choose to leave the hash value unchanged.
*/
name->hash = full_name_hash(name->name, name->len);
- if (dir->d_op && dir->d_op->d_hash) {
- if (dir->d_op->d_hash(dir, name) < 0)
+ if (dir->d_flags & DCACHE_OP_HASH) {
+ if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
goto out;
}
dentry = d_lookup(dir, name);
@@ -1483,34 +1978,32 @@ out:
}
/**
- * d_validate - verify dentry provided from insecure source
+ * d_validate - verify dentry provided from insecure source (deprecated)
* @dentry: The dentry alleged to be valid child of @dparent
* @dparent: The parent dentry (known to be valid)
*
* An insecure source has sent us a dentry, here we verify it and dget() it.
* This is used by ncpfs in its readdir implementation.
* Zero is returned in the dentry is invalid.
+ *
+ * This function is slow for big directories, and deprecated, do not use it.
*/
-int d_validate(struct dentry *dentry, struct dentry *parent)
+int d_validate(struct dentry *dentry, struct dentry *dparent)
{
- struct hlist_head *head = d_hash(parent, dentry->d_name.hash);
- struct hlist_node *node;
- struct dentry *d;
+ struct dentry *child;
- /* Check whether the ptr might be valid at all.. */
- if (!kmem_ptr_validate(dentry_cache, dentry))
- return 0;
- if (dentry->d_parent != parent)
- return 0;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(d, node, head, d_hash) {
- if (d == dentry) {
- dget(dentry);
+ spin_lock(&dparent->d_lock);
+ list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
+ if (dentry == child) {
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ __dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dparent->d_lock);
return 1;
}
}
- rcu_read_unlock();
+ spin_unlock(&dparent->d_lock);
+
return 0;
}
EXPORT_SYMBOL(d_validate);
@@ -1538,16 +2031,23 @@ EXPORT_SYMBOL(d_validate);
void d_delete(struct dentry * dentry)
{
+ struct inode *inode;
int isdir = 0;
/*
* Are we the only user?
*/
- spin_lock(&dcache_lock);
+again:
spin_lock(&dentry->d_lock);
- isdir = S_ISDIR(dentry->d_inode->i_mode);
- if (atomic_read(&dentry->d_count) == 1) {
+ inode = dentry->d_inode;
+ isdir = S_ISDIR(inode->i_mode);
+ if (dentry->d_count == 1) {
+ if (inode && !spin_trylock(&inode->i_lock)) {
+ spin_unlock(&dentry->d_lock);
+ cpu_relax();
+ goto again;
+ }
dentry->d_flags &= ~DCACHE_CANT_MOUNT;
- dentry_iput(dentry);
+ dentry_unlink_inode(dentry);
fsnotify_nameremove(dentry, isdir);
return;
}
@@ -1556,17 +2056,18 @@ void d_delete(struct dentry * dentry)
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
fsnotify_nameremove(dentry, isdir);
}
EXPORT_SYMBOL(d_delete);
-static void __d_rehash(struct dentry * entry, struct hlist_head *list)
+static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
{
-
+ BUG_ON(!d_unhashed(entry));
+ spin_lock_bucket(b);
entry->d_flags &= ~DCACHE_UNHASHED;
- hlist_add_head_rcu(&entry->d_hash, list);
+ hlist_bl_add_head_rcu(&entry->d_hash, &b->head);
+ spin_unlock_bucket(b);
}
static void _d_rehash(struct dentry * entry)
@@ -1583,25 +2084,39 @@ static void _d_rehash(struct dentry * entry)
void d_rehash(struct dentry * entry)
{
- spin_lock(&dcache_lock);
spin_lock(&entry->d_lock);
_d_rehash(entry);
spin_unlock(&entry->d_lock);
- spin_unlock(&dcache_lock);
}
EXPORT_SYMBOL(d_rehash);
-/*
- * When switching names, the actual string doesn't strictly have to
- * be preserved in the target - because we're dropping the target
- * anyway. As such, we can just do a simple memcpy() to copy over
- * the new name before we switch.
+/**
+ * dentry_update_name_case - update case insensitive dentry with a new name
+ * @dentry: dentry to be updated
+ * @name: new name
*
- * Note that we have to be a lot more careful about getting the hash
- * switched - we have to switch the hash value properly even if it
- * then no longer matches the actual (corrupted) string of the target.
- * The hash value has to match the hash queue that the dentry is on..
+ * Update a case insensitive dentry with new case of name.
+ *
+ * dentry must have been returned by d_lookup with name @name. Old and new
+ * name lengths must match (ie. no d_compare which allows mismatched name
+ * lengths).
+ *
+ * Parent inode i_mutex must be held over d_lookup and into this call (to
+ * keep renames and concurrent inserts, and readdir(2) away).
*/
+void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
+{
+ BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+ BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
+
+ spin_lock(&dentry->d_lock);
+ write_seqcount_begin(&dentry->d_seq);
+ memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
+ write_seqcount_end(&dentry->d_seq);
+ spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL(dentry_update_name_case);
+
static void switch_names(struct dentry *dentry, struct dentry *target)
{
if (dname_external(target)) {
@@ -1643,54 +2158,84 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
swap(dentry->d_name.len, target->d_name.len);
}
+static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
+{
+ /*
+ * XXXX: do we really need to take target->d_lock?
+ */
+ if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
+ spin_lock(&target->d_parent->d_lock);
+ else {
+ if (d_ancestor(dentry->d_parent, target->d_parent)) {
+ spin_lock(&dentry->d_parent->d_lock);
+ spin_lock_nested(&target->d_parent->d_lock,
+ DENTRY_D_LOCK_NESTED);
+ } else {
+ spin_lock(&target->d_parent->d_lock);
+ spin_lock_nested(&dentry->d_parent->d_lock,
+ DENTRY_D_LOCK_NESTED);
+ }
+ }
+ if (target < dentry) {
+ spin_lock_nested(&target->d_lock, 2);
+ spin_lock_nested(&dentry->d_lock, 3);
+ } else {
+ spin_lock_nested(&dentry->d_lock, 2);
+ spin_lock_nested(&target->d_lock, 3);
+ }
+}
+
+static void dentry_unlock_parents_for_move(struct dentry *dentry,
+ struct dentry *target)
+{
+ if (target->d_parent != dentry->d_parent)
+ spin_unlock(&dentry->d_parent->d_lock);
+ if (target->d_parent != target)
+ spin_unlock(&target->d_parent->d_lock);
+}
+
/*
- * We cannibalize "target" when moving dentry on top of it,
- * because it's going to be thrown away anyway. We could be more
- * polite about it, though.
- *
- * This forceful removal will result in ugly /proc output if
- * somebody holds a file open that got deleted due to a rename.
- * We could be nicer about the deleted file, and let it show
- * up under the name it had before it was deleted rather than
- * under the original name of the file that was moved on top of it.
+ * When switching names, the actual string doesn't strictly have to
+ * be preserved in the target - because we're dropping the target
+ * anyway. As such, we can just do a simple memcpy() to copy over
+ * the new name before we switch.
+ *
+ * Note that we have to be a lot more careful about getting the hash
+ * switched - we have to switch the hash value properly even if it
+ * then no longer matches the actual (corrupted) string of the target.
+ * The hash value has to match the hash queue that the dentry is on..
*/
-
/*
- * d_move_locked - move a dentry
+ * d_move - move a dentry
* @dentry: entry to move
* @target: new dentry
*
* Update the dcache to reflect the move of a file name. Negative
* dcache entries should not be moved in this way.
*/
-static void d_move_locked(struct dentry * dentry, struct dentry * target)
+void d_move(struct dentry * dentry, struct dentry * target)
{
- struct hlist_head *list;
-
if (!dentry->d_inode)
printk(KERN_WARNING "VFS: moving negative dcache entry\n");
+ BUG_ON(d_ancestor(dentry, target));
+ BUG_ON(d_ancestor(target, dentry));
+
write_seqlock(&rename_lock);
- /*
- * XXXX: do we really need to take target->d_lock?
- */
- if (target < dentry) {
- spin_lock(&target->d_lock);
- spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
- } else {
- spin_lock(&dentry->d_lock);
- spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
- }
- /* Move the dentry to the target hash queue, if on different bucket */
- if (d_unhashed(dentry))
- goto already_unhashed;
+ dentry_lock_for_move(dentry, target);
- hlist_del_rcu(&dentry->d_hash);
+ write_seqcount_begin(&dentry->d_seq);
+ write_seqcount_begin(&target->d_seq);
-already_unhashed:
- list = d_hash(target->d_parent, target->d_name.hash);
- __d_rehash(dentry, list);
+ /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
+
+ /*
+ * Move the dentry to the target hash queue. Don't bother checking
+ * for the same hash queue because of how unlikely it is.
+ */
+ __d_drop(dentry);
+ __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
/* Unhash the target: dput() will then get rid of it */
__d_drop(target);
@@ -1715,27 +2260,16 @@ already_unhashed:
}
list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+
+ write_seqcount_end(&target->d_seq);
+ write_seqcount_end(&dentry->d_seq);
+
+ dentry_unlock_parents_for_move(dentry, target);
spin_unlock(&target->d_lock);
fsnotify_d_move(dentry);
spin_unlock(&dentry->d_lock);
write_sequnlock(&rename_lock);
}
-
-/**
- * d_move - move a dentry
- * @dentry: entry to move
- * @target: new dentry
- *
- * Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.
- */
-
-void d_move(struct dentry * dentry, struct dentry * target)
-{
- spin_lock(&dcache_lock);
- d_move_locked(dentry, target);
- spin_unlock(&dcache_lock);
-}
EXPORT_SYMBOL(d_move);
/**
@@ -1761,13 +2295,13 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
* This helper attempts to cope with remotely renamed directories
*
* It assumes that the caller is already holding
- * dentry->d_parent->d_inode->i_mutex and the dcache_lock
+ * dentry->d_parent->d_inode->i_mutex and the inode->i_lock
*
* Note: If ever the locking in lock_rename() changes, then please
* remember to update this too...
*/
-static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
- __releases(dcache_lock)
+static struct dentry *__d_unalias(struct inode *inode,
+ struct dentry *dentry, struct dentry *alias)
{
struct mutex *m1 = NULL, *m2 = NULL;
struct dentry *ret;
@@ -1790,10 +2324,10 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
goto out_err;
m2 = &alias->d_parent->d_inode->i_mutex;
out_unalias:
- d_move_locked(alias, dentry);
+ d_move(alias, dentry);
ret = alias;
out_err:
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
if (m2)
mutex_unlock(m2);
if (m1)
@@ -1804,17 +2338,23 @@ out_err:
/*
* Prepare an anonymous dentry for life in the superblock's dentry tree as a
* named dentry in place of the dentry to be replaced.
+ * returns with anon->d_lock held!
*/
static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
{
struct dentry *dparent, *aparent;
- switch_names(dentry, anon);
- swap(dentry->d_name.hash, anon->d_name.hash);
+ dentry_lock_for_move(anon, dentry);
+
+ write_seqcount_begin(&dentry->d_seq);
+ write_seqcount_begin(&anon->d_seq);
dparent = dentry->d_parent;
aparent = anon->d_parent;
+ switch_names(dentry, anon);
+ swap(dentry->d_name.hash, anon->d_name.hash);
+
dentry->d_parent = (aparent == anon) ? dentry : aparent;
list_del(&dentry->d_u.d_child);
if (!IS_ROOT(dentry))
@@ -1829,6 +2369,13 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
else
INIT_LIST_HEAD(&anon->d_u.d_child);
+ write_seqcount_end(&dentry->d_seq);
+ write_seqcount_end(&anon->d_seq);
+
+ dentry_unlock_parents_for_move(anon, dentry);
+ spin_unlock(&dentry->d_lock);
+
+ /* anon->d_lock still locked, returns locked */
anon->d_flags &= ~DCACHE_DISCONNECTED;
}
@@ -1846,31 +2393,36 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
BUG_ON(!d_unhashed(dentry));
- spin_lock(&dcache_lock);
-
if (!inode) {
actual = dentry;
__d_instantiate(dentry, NULL);
- goto found_lock;
+ d_rehash(actual);
+ goto out_nolock;
}
+ spin_lock(&inode->i_lock);
+
if (S_ISDIR(inode->i_mode)) {
struct dentry *alias;
/* Does an aliased dentry already exist? */
alias = __d_find_alias(inode, 0);
if (alias) {
+ /*
+ * XXX: after dcache_lock removal, we no longer
+ * guarantee a !d_unhashed alias here. Is that going to
+ * be a problem?
+ */
actual = alias;
/* Is this an anonymous mountpoint that we could splice
* into our tree? */
if (IS_ROOT(alias)) {
- spin_lock(&alias->d_lock);
__d_materialise_dentry(dentry, alias);
__d_drop(alias);
goto found;
}
/* Nope, but we must(!) avoid directory aliasing */
- actual = __d_unalias(dentry, alias);
+ actual = __d_unalias(inode, dentry, alias);
if (IS_ERR(actual))
dput(alias);
goto out_nolock;
@@ -1881,15 +2433,14 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
actual = __d_instantiate_unique(dentry, inode);
if (!actual)
actual = dentry;
- else if (unlikely(!d_unhashed(actual)))
- goto shouldnt_be_hashed;
+ else
+ BUG_ON(!d_unhashed(actual));
-found_lock:
spin_lock(&actual->d_lock);
found:
_d_rehash(actual);
spin_unlock(&actual->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
out_nolock:
if (actual == dentry) {
security_d_instantiate(dentry, inode);
@@ -1898,10 +2449,6 @@ out_nolock:
iput(inode);
return actual;
-
-shouldnt_be_hashed:
- spin_unlock(&dcache_lock);
- BUG();
}
EXPORT_SYMBOL_GPL(d_materialise_unique);
@@ -1928,7 +2475,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
* @buffer: pointer to the end of the buffer
* @buflen: pointer to buffer length
*
- * Caller holds the dcache_lock.
+ * Caller holds the rename_lock.
*
* If path is not reachable from the supplied root, then the value of
* root is changed (without modifying refcounts).
@@ -1956,7 +2503,9 @@ static int prepend_path(const struct path *path, struct path *root,
}
parent = dentry->d_parent;
prefetch(parent);
+ spin_lock(&dentry->d_lock);
error = prepend_name(buffer, buflen, &dentry->d_name);
+ spin_unlock(&dentry->d_lock);
if (!error)
error = prepend(buffer, buflen, "/", 1);
if (error)
@@ -2012,9 +2561,9 @@ char *__d_path(const struct path *path, struct path *root,
int error;
prepend(&res, &buflen, "\0", 1);
- spin_lock(&dcache_lock);
+ write_seqlock(&rename_lock);
error = prepend_path(path, root, &res, &buflen);
- spin_unlock(&dcache_lock);
+ write_sequnlock(&rename_lock);
if (error)
return ERR_PTR(error);
@@ -2076,12 +2625,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
get_fs_root(current->fs, &root);
- spin_lock(&dcache_lock);
+ write_seqlock(&rename_lock);
tmp = root;
error = path_with_deleted(path, &tmp, &res, &buflen);
if (error)
res = ERR_PTR(error);
- spin_unlock(&dcache_lock);
+ write_sequnlock(&rename_lock);
path_put(&root);
return res;
}
@@ -2107,12 +2656,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
get_fs_root(current->fs, &root);
- spin_lock(&dcache_lock);
+ write_seqlock(&rename_lock);
tmp = root;
error = path_with_deleted(path, &tmp, &res, &buflen);
if (!error && !path_equal(&tmp, &root))
error = prepend_unreachable(&res, &buflen);
- spin_unlock(&dcache_lock);
+ write_sequnlock(&rename_lock);
path_put(&root);
if (error)
res = ERR_PTR(error);
@@ -2144,7 +2693,7 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
/*
* Write full pathname from the root of the filesystem into the buffer.
*/
-char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
+static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
{
char *end = buf + buflen;
char *retval;
@@ -2158,10 +2707,13 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
while (!IS_ROOT(dentry)) {
struct dentry *parent = dentry->d_parent;
+ int error;
prefetch(parent);
- if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) ||
- (prepend(&end, &buflen, "/", 1) != 0))
+ spin_lock(&dentry->d_lock);
+ error = prepend_name(&end, &buflen, &dentry->d_name);
+ spin_unlock(&dentry->d_lock);
+ if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
goto Elong;
retval = end;
@@ -2171,14 +2723,25 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
Elong:
return ERR_PTR(-ENAMETOOLONG);
}
-EXPORT_SYMBOL(__dentry_path);
+
+char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
+{
+ char *retval;
+
+ write_seqlock(&rename_lock);
+ retval = __dentry_path(dentry, buf, buflen);
+ write_sequnlock(&rename_lock);
+
+ return retval;
+}
+EXPORT_SYMBOL(dentry_path_raw);
char *dentry_path(struct dentry *dentry, char *buf, int buflen)
{
char *p = NULL;
char *retval;
- spin_lock(&dcache_lock);
+ write_seqlock(&rename_lock);
if (d_unlinked(dentry)) {
p = buf + buflen;
if (prepend(&p, &buflen, "//deleted", 10) != 0)
@@ -2186,12 +2749,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
buflen++;
}
retval = __dentry_path(dentry, buf, buflen);
- spin_unlock(&dcache_lock);
+ write_sequnlock(&rename_lock);
if (!IS_ERR(retval) && p)
*p = '/'; /* restore '/' overriden with '\0' */
return retval;
Elong:
- spin_unlock(&dcache_lock);
return ERR_PTR(-ENAMETOOLONG);
}
@@ -2225,7 +2787,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
get_fs_root_and_pwd(current->fs, &root, &pwd);
error = -ENOENT;
- spin_lock(&dcache_lock);
+ write_seqlock(&rename_lock);
if (!d_unlinked(pwd.dentry)) {
unsigned long len;
struct path tmp = root;
@@ -2234,7 +2796,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
prepend(&cwd, &buflen, "\0", 1);
error = prepend_path(&pwd, &tmp, &cwd, &buflen);
- spin_unlock(&dcache_lock);
+ write_sequnlock(&rename_lock);
if (error)
goto out;
@@ -2253,8 +2815,9 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
if (copy_to_user(buf, cwd, len))
error = -EFAULT;
}
- } else
- spin_unlock(&dcache_lock);
+ } else {
+ write_sequnlock(&rename_lock);
+ }
out:
path_put(&pwd);
@@ -2282,25 +2845,25 @@ out:
int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
{
int result;
- unsigned long seq;
+ unsigned seq;
if (new_dentry == old_dentry)
return 1;
- /*
- * Need rcu_readlock to protect against the d_parent trashing
- * due to d_move
- */
- rcu_read_lock();
do {
/* for restarting inner loop in case of seq retry */
seq = read_seqbegin(&rename_lock);
+ /*
+ * Need rcu_readlock to protect against the d_parent trashing
+ * due to d_move
+ */
+ rcu_read_lock();
if (d_ancestor(old_dentry, new_dentry))
result = 1;
else
result = 0;
+ rcu_read_unlock();
} while (read_seqretry(&rename_lock, seq));
- rcu_read_unlock();
return result;
}
@@ -2332,10 +2895,15 @@ EXPORT_SYMBOL(path_is_under);
void d_genocide(struct dentry *root)
{
- struct dentry *this_parent = root;
+ struct dentry *this_parent;
struct list_head *next;
+ unsigned seq;
+ int locked = 0;
- spin_lock(&dcache_lock);
+ seq = read_seqbegin(&rename_lock);
+again:
+ this_parent = root;
+ spin_lock(&this_parent->d_lock);
repeat:
next = this_parent->d_subdirs.next;
resume:
@@ -2343,21 +2911,62 @@ resume:
struct list_head *tmp = next;
struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
next = tmp->next;
- if (d_unhashed(dentry)||!dentry->d_inode)
+
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ if (d_unhashed(dentry) || !dentry->d_inode) {
+ spin_unlock(&dentry->d_lock);
continue;
+ }
if (!list_empty(&dentry->d_subdirs)) {
+ spin_unlock(&this_parent->d_lock);
+ spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
this_parent = dentry;
+ spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
goto repeat;
}
- atomic_dec(&dentry->d_count);
+ if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
+ dentry->d_flags |= DCACHE_GENOCIDE;
+ dentry->d_count--;
+ }
+ spin_unlock(&dentry->d_lock);
}
if (this_parent != root) {
- next = this_parent->d_u.d_child.next;
- atomic_dec(&this_parent->d_count);
- this_parent = this_parent->d_parent;
+ struct dentry *tmp;
+ struct dentry *child;
+
+ tmp = this_parent->d_parent;
+ if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
+ this_parent->d_flags |= DCACHE_GENOCIDE;
+ this_parent->d_count--;
+ }
+ rcu_read_lock();
+ spin_unlock(&this_parent->d_lock);
+ child = this_parent;
+ this_parent = tmp;
+ spin_lock(&this_parent->d_lock);
+ /* might go back up the wrong parent if we have had a rename
+ * or deletion */
+ if (this_parent != child->d_parent ||
+ (!locked && read_seqretry(&rename_lock, seq))) {
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
+ goto rename_retry;
+ }
+ rcu_read_unlock();
+ next = child->d_u.d_child.next;
goto resume;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&this_parent->d_lock);
+ if (!locked && read_seqretry(&rename_lock, seq))
+ goto rename_retry;
+ if (locked)
+ write_sequnlock(&rename_lock);
+ return;
+
+rename_retry:
+ locked = 1;
+ write_seqlock(&rename_lock);
+ goto again;
}
/**
@@ -2411,7 +3020,7 @@ static void __init dcache_init_early(void)
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
- sizeof(struct hlist_head),
+ sizeof(struct dcache_hash_bucket),
dhash_entries,
13,
HASH_EARLY,
@@ -2420,16 +3029,13 @@ static void __init dcache_init_early(void)
0);
for (loop = 0; loop < (1 << d_hash_shift); loop++)
- INIT_HLIST_HEAD(&dentry_hashtable[loop]);
+ INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
}
static void __init dcache_init(void)
{
int loop;
- percpu_counter_init(&nr_dentry, 0);
- percpu_counter_init(&nr_dentry_unused, 0);
-
/*
* A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature
@@ -2446,7 +3052,7 @@ static void __init dcache_init(void)
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
- sizeof(struct hlist_head),
+ sizeof(struct dcache_hash_bucket),
dhash_entries,
13,
0,
@@ -2455,7 +3061,7 @@ static void __init dcache_init(void)
0);
for (loop = 0; loop < (1 << d_hash_shift); loop++)
- INIT_HLIST_HEAD(&dentry_hashtable[loop]);
+ INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
}
/* SLAB cache for __getname() consumers */
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 906e803f7f79..f9a8be854f30 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -30,7 +30,7 @@
#include "ecryptfs_kernel.h"
/**
- * ecryptfs_d_revalidate - revalidate an ecryptfs dentry
+ * ecryptfs_d_revalidate_rcu - revalidate an ecryptfs dentry
* @dentry: The ecryptfs dentry
* @nd: The associated nameidata
*
@@ -42,7 +42,7 @@
* Returns 1 if valid, 0 otherwise.
*
*/
-static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int ecryptfs_d_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
{
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
@@ -50,13 +50,18 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
struct vfsmount *vfsmount_save;
int rc = 1;
- if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
+ if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE_EITHER))
goto out;
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
dentry_save = nd->path.dentry;
vfsmount_save = nd->path.mnt;
nd->path.dentry = lower_dentry;
nd->path.mnt = lower_mnt;
- rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
+ if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE)
+ rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
+ else
+ rc = lower_dentry->d_op->d_revalidate_rcu(lower_dentry, nd);
nd->path.dentry = dentry_save;
nd->path.mnt = vfsmount_save;
if (dentry->d_inode) {
@@ -91,6 +96,6 @@ static void ecryptfs_d_release(struct dentry *dentry)
}
const struct dentry_operations ecryptfs_dops = {
- .d_revalidate = ecryptfs_d_revalidate,
+ .d_revalidate_rcu = ecryptfs_d_revalidate_rcu,
.d_release = ecryptfs_d_release,
};
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 133f0a4ce719..aeb242de74f2 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -258,7 +258,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
ecryptfs_dentry->d_parent));
lower_inode = lower_dentry->d_inode;
fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode);
- BUG_ON(!atomic_read(&lower_dentry->d_count));
+ BUG_ON(!lower_dentry->d_count);
ecryptfs_set_dentry_private(ecryptfs_dentry,
kmem_cache_alloc(ecryptfs_dentry_info_cache,
GFP_KERNEL));
@@ -437,7 +437,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
struct qstr lower_name;
int rc = 0;
- ecryptfs_dentry->d_op = &ecryptfs_dops;
+ d_set_d_op(ecryptfs_dentry, &ecryptfs_dops);
if ((ecryptfs_dentry->d_name.len == 1
&& !strcmp(ecryptfs_dentry->d_name.name, "."))
|| (ecryptfs_dentry->d_name.len == 2
@@ -450,7 +450,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
lower_name.hash = ecryptfs_dentry->d_name.hash;
if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
- &lower_name);
+ lower_dir_dentry->d_inode, &lower_name);
if (rc < 0)
goto out_d_drop;
}
@@ -485,7 +485,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
lower_name.hash = full_name_hash(lower_name.name, lower_name.len);
if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
- &lower_name);
+ lower_dir_dentry->d_inode, &lower_name);
if (rc < 0)
goto out_d_drop;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 70009a7f2c99..84c6a0ddadf1 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
if (special_file(lower_inode->i_mode))
init_special_inode(inode, lower_inode->i_mode,
lower_inode->i_rdev);
- dentry->d_op = &ecryptfs_dops;
+ d_set_d_op(dentry, &ecryptfs_dops);
fsstack_copy_attr_all(inode, lower_inode);
/* This size will be overwritten for real files w/ headers and
* other metadata */
@@ -595,7 +595,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
deactivate_locked_super(s);
goto out;
}
- s->s_root->d_op = &ecryptfs_dops;
+ d_set_d_op(s->s_root, &ecryptfs_dops);
s->s_root->d_sb = s;
s->s_root->d_parent = s->s_root;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 2720178b7718..3042fe123a34 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -62,6 +62,16 @@ out:
return inode;
}
+static void ecryptfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ecryptfs_inode_info *inode_info;
+ inode_info = ecryptfs_inode_to_private(inode);
+
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
+}
+
/**
* ecryptfs_destroy_inode
* @inode: The ecryptfs inode
@@ -88,7 +98,7 @@ static void ecryptfs_destroy_inode(struct inode *inode)
}
}
ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
- kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
+ call_rcu(&inode->i_rcu, ecryptfs_i_callback);
}
/**
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 5073a07652cc..0f31acb0131c 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -65,11 +65,18 @@ static struct inode *efs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void efs_destroy_inode(struct inode *inode)
+static void efs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
}
+static void efs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, efs_i_callback);
+}
+
static void init_once(void *foo)
{
struct efs_inode_info *ei = (struct efs_inode_info *) foo;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 79c3ae6e0456..8c6c4669b381 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -150,12 +150,19 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
return &oi->vfs_inode;
}
+static void exofs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
+}
+
/*
* Remove an inode from the cache
*/
static void exofs_destroy_inode(struct inode *inode)
{
- kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
+ call_rcu(&inode->i_rcu, exofs_i_callback);
}
/*
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 51b304056f10..4b6825740dd5 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -43,24 +43,26 @@ find_acceptable_alias(struct dentry *result,
void *context)
{
struct dentry *dentry, *toput = NULL;
+ struct inode *inode;
if (acceptable(context, result))
return result;
- spin_lock(&dcache_lock);
- list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) {
- dget_locked(dentry);
- spin_unlock(&dcache_lock);
+ inode = result->d_inode;
+ spin_lock(&inode->i_lock);
+ list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ dget(dentry);
+ spin_unlock(&inode->i_lock);
if (toput)
dput(toput);
if (dentry != result && acceptable(context, dentry)) {
dput(result);
return dentry;
}
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
toput = dentry;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
if (toput)
dput(toput);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 2bcc0431bada..43196d7a31c0 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -232,10 +232,17 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
}
int
-ext2_check_acl(struct inode *inode, int mask)
+ext2_check_acl_rcu(struct inode *inode, int mask, unsigned int flags)
{
- struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
+ struct posix_acl *acl;
+
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ return -EAGAIN;
+ }
+ acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 3ff6cbb9ac44..fb493933c60d 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,15 +54,15 @@ static inline int ext2_acl_count(size_t size)
#ifdef CONFIG_EXT2_FS_POSIX_ACL
/* acl.c */
-extern int ext2_check_acl (struct inode *, int);
+extern int ext2_check_acl_rcu(struct inode *, int, unsigned int);
extern int ext2_acl_chmod (struct inode *);
extern int ext2_init_acl (struct inode *, struct inode *);
#else
#include <linux/sched.h>
-#define ext2_check_acl NULL
-#define ext2_get_acl NULL
-#define ext2_set_acl NULL
+#define ext2_check_acl_rcu NULL
+#define ext2_get_acl NULL
+#define ext2_set_acl NULL
static inline int
ext2_acl_chmod (struct inode *inode)
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 49eec9456c5b..a34fc5ade9b1 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -102,6 +102,6 @@ const struct inode_operations ext2_file_inode_operations = {
.removexattr = generic_removexattr,
#endif
.setattr = ext2_setattr,
- .check_acl = ext2_check_acl,
+ .check_acl_rcu = ext2_check_acl_rcu,
.fiemap = ext2_fiemap,
};
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index f8aecd2e3297..5366e02484ce 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -417,7 +417,7 @@ const struct inode_operations ext2_dir_inode_operations = {
.removexattr = generic_removexattr,
#endif
.setattr = ext2_setattr,
- .check_acl = ext2_check_acl,
+ .check_acl_rcu = ext2_check_acl_rcu,
};
const struct inode_operations ext2_special_inode_operations = {
@@ -428,5 +428,5 @@ const struct inode_operations ext2_special_inode_operations = {
.removexattr = generic_removexattr,
#endif
.setattr = ext2_setattr,
- .check_acl = ext2_check_acl,
+ .check_acl_rcu = ext2_check_acl_rcu,
};
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d89e0b6a2d78..e0c6380ff992 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -161,11 +161,18 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void ext2_destroy_inode(struct inode *inode)
+static void ext2_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
}
+static void ext2_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, ext2_i_callback);
+}
+
static void init_once(void *foo)
{
struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 8a11fe212183..cff7c134342f 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -240,10 +240,17 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
}
int
-ext3_check_acl(struct inode *inode, int mask)
+ext3_check_acl_rcu(struct inode *inode, int mask, unsigned int flags)
{
- struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+ struct posix_acl *acl;
+
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ return -EAGAIN;
+ }
+ acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 597334626de9..f2536e6129c5 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,13 +54,13 @@ static inline int ext3_acl_count(size_t size)
#ifdef CONFIG_EXT3_FS_POSIX_ACL
/* acl.c */
-extern int ext3_check_acl (struct inode *, int);
-extern int ext3_acl_chmod (struct inode *);
-extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
+extern int ext3_check_acl_rcu(struct inode *, int, unsigned int);
+extern int ext3_acl_chmod(struct inode *);
+extern int ext3_init_acl(handle_t *, struct inode *, struct inode *);
#else /* CONFIG_EXT3_FS_POSIX_ACL */
#include <linux/sched.h>
-#define ext3_check_acl NULL
+#define ext3_check_acl_rcu NULL
static inline int
ext3_acl_chmod(struct inode *inode)
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index f55df0e61cbd..74e5ef7b70dd 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -79,7 +79,7 @@ const struct inode_operations ext3_file_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .check_acl = ext3_check_acl,
+ .check_acl_rcu = ext3_check_acl_rcu,
.fiemap = ext3_fiemap,
};
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index bce9dce639b8..9b1043846ced 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2464,7 +2464,7 @@ const struct inode_operations ext3_dir_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .check_acl = ext3_check_acl,
+ .check_acl_rcu = ext3_check_acl_rcu,
};
const struct inode_operations ext3_special_inode_operations = {
@@ -2475,5 +2475,5 @@ const struct inode_operations ext3_special_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .check_acl = ext3_check_acl,
+ .check_acl_rcu = ext3_check_acl_rcu,
};
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index acf8695fa8f0..77ce1616f725 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -479,6 +479,13 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
+static void ext3_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+}
+
static void ext3_destroy_inode(struct inode *inode)
{
if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
@@ -489,7 +496,7 @@ static void ext3_destroy_inode(struct inode *inode)
false);
dump_stack();
}
- kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+ call_rcu(&inode->i_rcu, ext3_i_callback);
}
static void init_once(void *foo)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 5e2ed4504ead..ebc7127e8c9b 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -238,10 +238,17 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
}
int
-ext4_check_acl(struct inode *inode, int mask)
+ext4_check_acl_rcu(struct inode *inode, int mask, unsigned int flags)
{
- struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
+ struct posix_acl *acl;
+
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ return -EAGAIN;
+ }
+ acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 9d843d5deac4..71e8da73d0cf 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,13 +54,13 @@ static inline int ext4_acl_count(size_t size)
#ifdef CONFIG_EXT4_FS_POSIX_ACL
/* acl.c */
-extern int ext4_check_acl(struct inode *, int);
+extern int ext4_check_acl_rcu(struct inode *, int, unsigned int);
extern int ext4_acl_chmod(struct inode *);
extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
#else /* CONFIG_EXT4_FS_POSIX_ACL */
#include <linux/sched.h>
-#define ext4_check_acl NULL
+#define ext4_check_acl_rcu NULL
static inline int
ext4_acl_chmod(struct inode *inode)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 5a5c55ddceef..f6be0af7c0a0 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -200,7 +200,7 @@ const struct inode_operations ext4_file_inode_operations = {
.listxattr = ext4_listxattr,
.removexattr = generic_removexattr,
#endif
- .check_acl = ext4_check_acl,
+ .check_acl_rcu = ext4_check_acl_rcu,
.fallocate = ext4_fallocate,
.fiemap = ext4_fiemap,
};
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6dfc5b9de3e6..b3c1e86ba49b 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2518,7 +2518,7 @@ const struct inode_operations ext4_dir_inode_operations = {
.listxattr = ext4_listxattr,
.removexattr = generic_removexattr,
#endif
- .check_acl = ext4_check_acl,
+ .check_acl_rcu = ext4_check_acl_rcu,
.fiemap = ext4_fiemap,
};
@@ -2530,5 +2530,5 @@ const struct inode_operations ext4_special_inode_operations = {
.listxattr = ext4_listxattr,
.removexattr = generic_removexattr,
#endif
- .check_acl = ext4_check_acl,
+ .check_acl_rcu = ext4_check_acl_rcu,
};
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7728a4ca3d6c..6994aab3a8a3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -851,6 +851,13 @@ static int ext4_drop_inode(struct inode *inode)
return drop;
}
+static void ext4_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
+}
+
static void ext4_destroy_inode(struct inode *inode)
{
ext4_ioend_wait(inode);
@@ -863,7 +870,7 @@ static void ext4_destroy_inode(struct inode *inode)
true);
dump_stack();
}
- kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
+ call_rcu(&inode->i_rcu, ext4_i_callback);
}
static void init_once(void *foo)
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ad6998a92c30..206351af7c58 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -514,11 +514,18 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void fat_destroy_inode(struct inode *inode)
+static void fat_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
}
+static void fat_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, fat_i_callback);
+}
+
static void init_once(void *foo)
{
struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
@@ -743,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb,
*/
result = d_obtain_alias(inode);
if (!IS_ERR(result))
- result->d_op = sb->s_root->d_op;
+ d_set_d_op(result, sb->s_root->d_op);
return result;
}
@@ -793,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child)
parent = d_obtain_alias(inode);
if (!IS_ERR(parent))
- parent->d_op = sb->s_root->d_op;
+ d_set_d_op(parent, sb->s_root->d_op);
out:
unlock_super(sb);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 3345aabd1dd7..35ffe43afa4b 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -148,7 +148,8 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len,
* that the existing dentry can be used. The msdos fs routines will
* return ENOENT or EINVAL as appropriate.
*/
-static int msdos_hash(struct dentry *dentry, struct qstr *qstr)
+static int msdos_hash(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options;
unsigned char msdos_name[MSDOS_NAME];
@@ -164,16 +165,18 @@ static int msdos_hash(struct dentry *dentry, struct qstr *qstr)
* Compare two msdos names. If either of the names are invalid,
* we fall back to doing the standard name comparison.
*/
-static int msdos_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b)
+static int msdos_cmp(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options;
+ struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options;
unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME];
int error;
- error = msdos_format_name(a->name, a->len, a_msdos_name, options);
+ error = msdos_format_name(name->name, name->len, a_msdos_name, options);
if (error)
goto old_compare;
- error = msdos_format_name(b->name, b->len, b_msdos_name, options);
+ error = msdos_format_name(str, len, b_msdos_name, options);
if (error)
goto old_compare;
error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME);
@@ -182,8 +185,8 @@ out:
old_compare:
error = 1;
- if (a->len == b->len)
- error = memcmp(a->name, b->name, a->len);
+ if (name->len == len)
+ error = memcmp(name->name, str, len);
goto out;
}
@@ -224,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
}
out:
unlock_super(sb);
- dentry->d_op = &msdos_dentry_operations;
+ d_set_d_op(dentry, &msdos_dentry_operations);
dentry = d_splice_alias(inode, dentry);
if (dentry)
- dentry->d_op = &msdos_dentry_operations;
+ d_set_d_op(dentry, &msdos_dentry_operations);
return dentry;
error:
@@ -670,7 +673,7 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
}
sb->s_flags |= MS_NOATIME;
- sb->s_root->d_op = &msdos_dentry_operations;
+ d_set_d_op(sb->s_root, &msdos_dentry_operations);
unlock_super(sb);
return 0;
}
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index b936703b8924..9419ca54dee9 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -31,7 +31,7 @@
* If it happened, the negative dentry isn't actually negative
* anymore. So, drop it.
*/
-static int vfat_revalidate_shortname(struct dentry *dentry)
+static int vfat_revalidate_rcu_shortname(struct dentry *dentry)
{
int ret = 1;
spin_lock(&dentry->d_lock);
@@ -41,15 +41,15 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
return ret;
}
-static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int vfat_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
{
/* This is not negative dentry. Always valid. */
if (dentry->d_inode)
return 1;
- return vfat_revalidate_shortname(dentry);
+ return vfat_revalidate_rcu_shortname(dentry);
}
-static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
+static int vfat_revalidate_rcu_ci(struct dentry *dentry, struct nameidata *nd)
{
/*
* This is not negative dentry. Always valid.
@@ -81,26 +81,30 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
return 0;
}
- return vfat_revalidate_shortname(dentry);
+ return vfat_revalidate_rcu_shortname(dentry);
}
/* returns the length of a struct qstr, ignoring trailing dots */
-static unsigned int vfat_striptail_len(struct qstr *qstr)
+static unsigned int __vfat_striptail_len(unsigned int len, const char *name)
{
- unsigned int len = qstr->len;
-
- while (len && qstr->name[len - 1] == '.')
+ while (len && name[len - 1] == '.')
len--;
return len;
}
+static unsigned int vfat_striptail_len(const struct qstr *qstr)
+{
+ return __vfat_striptail_len(qstr->len, qstr->name);
+}
+
/*
* Compute the hash for the vfat name corresponding to the dentry.
* Note: if the name is invalid, we leave the hash code unchanged so
* that the existing dentry can be used. The vfat fs routines will
* return ENOENT or EINVAL as appropriate.
*/
-static int vfat_hash(struct dentry *dentry, struct qstr *qstr)
+static int vfat_hash(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr));
return 0;
@@ -112,9 +116,10 @@ static int vfat_hash(struct dentry *dentry, struct qstr *qstr)
* that the existing dentry can be used. The vfat fs routines will
* return ENOENT or EINVAL as appropriate.
*/
-static int vfat_hashi(struct dentry *dentry, struct qstr *qstr)
+static int vfat_hashi(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
- struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io;
+ struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
const unsigned char *name;
unsigned int len;
unsigned long hash;
@@ -133,16 +138,18 @@ static int vfat_hashi(struct dentry *dentry, struct qstr *qstr)
/*
* Case insensitive compare of two vfat names.
*/
-static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b)
+static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io;
+ struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io;
unsigned int alen, blen;
/* A filename cannot end in '.' or we treat it like it has none */
- alen = vfat_striptail_len(a);
- blen = vfat_striptail_len(b);
+ alen = vfat_striptail_len(name);
+ blen = __vfat_striptail_len(len, str);
if (alen == blen) {
- if (nls_strnicmp(t, a->name, b->name, alen) == 0)
+ if (nls_strnicmp(t, name->name, str, alen) == 0)
return 0;
}
return 1;
@@ -151,28 +158,30 @@ static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b)
/*
* Case sensitive compare of two vfat names.
*/
-static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b)
+static int vfat_cmp(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
unsigned int alen, blen;
/* A filename cannot end in '.' or we treat it like it has none */
- alen = vfat_striptail_len(a);
- blen = vfat_striptail_len(b);
+ alen = vfat_striptail_len(name);
+ blen = __vfat_striptail_len(len, str);
if (alen == blen) {
- if (strncmp(a->name, b->name, alen) == 0)
+ if (strncmp(name->name, str, alen) == 0)
return 0;
}
return 1;
}
static const struct dentry_operations vfat_ci_dentry_ops = {
- .d_revalidate = vfat_revalidate_ci,
+ .d_revalidate = vfat_revalidate_rcu_ci,
.d_hash = vfat_hashi,
.d_compare = vfat_cmpi,
};
static const struct dentry_operations vfat_dentry_ops = {
- .d_revalidate = vfat_revalidate,
+ .d_revalidate = vfat_revalidate_rcu,
.d_hash = vfat_hash,
.d_compare = vfat_cmp,
};
@@ -757,11 +766,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
out:
unlock_super(sb);
- dentry->d_op = sb->s_root->d_op;
+ d_set_d_op(dentry, sb->s_root->d_op);
dentry->d_time = dentry->d_parent->d_inode->i_version;
dentry = d_splice_alias(inode, dentry);
if (dentry) {
- dentry->d_op = sb->s_root->d_op;
+ d_set_d_op(dentry, sb->s_root->d_op);
dentry->d_time = dentry->d_parent->d_inode->i_version;
}
return dentry;
@@ -1063,9 +1072,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
}
if (MSDOS_SB(sb)->options.name_check != 's')
- sb->s_root->d_op = &vfat_ci_dentry_ops;
+ d_set_d_op(sb->s_root, &vfat_ci_dentry_ops);
else
- sb->s_root->d_op = &vfat_dentry_ops;
+ d_set_d_op(sb->s_root, &vfat_dentry_ops);
unlock_super(sb);
return 0;
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 68ba492d8eef..751d6b255a12 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -115,6 +115,9 @@ int unregister_filesystem(struct file_system_type * fs)
tmp = &(*tmp)->next;
}
write_unlock(&file_systems_lock);
+
+ synchronize_rcu();
+
return -EINVAL;
}
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 8c04eac5079d..2ba6719ac612 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -337,6 +337,13 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
return ip;
}
+static void vxfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(vxfs_inode_cachep, inode->i_private);
+}
+
/**
* vxfs_evict_inode - remove inode from main memory
* @ip: inode to discard.
@@ -350,5 +357,5 @@ vxfs_evict_inode(struct inode *ip)
{
truncate_inode_pages(&ip->i_data, 0);
end_writeback(ip);
- kmem_cache_free(vxfs_inode_cachep, ip->i_private);
+ call_rcu(&ip->i_rcu, vxfs_i_callback);
}
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index ed45a9cf5f3d..68ca487bedb1 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -14,12 +14,14 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
struct path old_root;
spin_lock(&fs->lock);
+ write_seqcount_begin(&fs->seq);
old_root = fs->root;
fs->root = *path;
- path_get(path);
+ path_get_long(path);
+ write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
if (old_root.dentry)
- path_put(&old_root);
+ path_put_long(&old_root);
}
/*
@@ -31,13 +33,15 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
struct path old_pwd;
spin_lock(&fs->lock);
+ write_seqcount_begin(&fs->seq);
old_pwd = fs->pwd;
fs->pwd = *path;
- path_get(path);
+ path_get_long(path);
+ write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
if (old_pwd.dentry)
- path_put(&old_pwd);
+ path_put_long(&old_pwd);
}
void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -52,31 +56,33 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
fs = p->fs;
if (fs) {
spin_lock(&fs->lock);
+ write_seqcount_begin(&fs->seq);
if (fs->root.dentry == old_root->dentry
&& fs->root.mnt == old_root->mnt) {
- path_get(new_root);
+ path_get_long(new_root);
fs->root = *new_root;
count++;
}
if (fs->pwd.dentry == old_root->dentry
&& fs->pwd.mnt == old_root->mnt) {
- path_get(new_root);
+ path_get_long(new_root);
fs->pwd = *new_root;
count++;
}
+ write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
}
task_unlock(p);
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
while (count--)
- path_put(old_root);
+ path_put_long(old_root);
}
void free_fs_struct(struct fs_struct *fs)
{
- path_put(&fs->root);
- path_put(&fs->pwd);
+ path_put_long(&fs->root);
+ path_put_long(&fs->pwd);
kmem_cache_free(fs_cachep, fs);
}
@@ -88,8 +94,10 @@ void exit_fs(struct task_struct *tsk)
int kill;
task_lock(tsk);
spin_lock(&fs->lock);
+ write_seqcount_begin(&fs->seq);
tsk->fs = NULL;
kill = !--fs->users;
+ write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock);
task_unlock(tsk);
if (kill)
@@ -105,8 +113,15 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
fs->users = 1;
fs->in_exec = 0;
spin_lock_init(&fs->lock);
+ seqcount_init(&fs->seq);
fs->umask = old->umask;
- get_fs_root_and_pwd(old, &fs->root, &fs->pwd);
+
+ spin_lock(&old->lock);
+ fs->root = old->root;
+ path_get_long(&fs->root);
+ fs->pwd = old->pwd;
+ path_get_long(&fs->pwd);
+ spin_unlock(&old->lock);
}
return fs;
}
@@ -144,6 +159,7 @@ EXPORT_SYMBOL(current_umask);
struct fs_struct init_fs = {
.users = 1,
.lock = __SPIN_LOCK_UNLOCKED(init_fs.lock),
+ .seq = SEQCNT_ZERO,
.umask = 0022,
};
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 6ea42e98cb17..99388899f9e8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -154,7 +154,7 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
* the lookup once more. If the lookup results in the same inode,
* then refresh the attributes, timeouts and mark the dentry valid.
*/
-static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
+static int fuse_dentry_revalidate_rcu(struct dentry *entry, struct nameidata *nd)
{
struct inode *inode = entry->d_inode;
@@ -169,6 +169,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
struct dentry *parent;
u64 attr_version;
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
/* For negative dentries, always do a fresh lookup */
if (!inode)
return 0;
@@ -224,7 +227,7 @@ static int invalid_nodeid(u64 nodeid)
}
const struct dentry_operations fuse_dentry_operations = {
- .d_revalidate = fuse_dentry_revalidate,
+ .d_revalidate_rcu = fuse_dentry_revalidate_rcu,
};
int fuse_valid_type(int m)
@@ -346,7 +349,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
}
entry = newent ? newent : entry;
- entry->d_op = &fuse_dentry_operations;
+ d_set_d_op(entry, &fuse_dentry_operations);
if (outarg_valid)
fuse_change_entry_timeout(entry, &outarg);
else
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 7ba4d351da65..f62b32cffea9 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -104,13 +104,20 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
return inode;
}
+static void fuse_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(fuse_inode_cachep, inode);
+}
+
static void fuse_destroy_inode(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
BUG_ON(!list_empty(&fi->write_files));
BUG_ON(!list_empty(&fi->queued_writes));
kfree(fi->forget);
- kmem_cache_free(fuse_inode_cachep, inode);
+ call_rcu(&inode->i_rcu, fuse_i_callback);
}
static void fuse_evict_inode(struct inode *inode)
@@ -611,7 +618,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
entry = d_obtain_alias(inode);
if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) {
- entry->d_op = &fuse_dentry_operations;
+ d_set_d_op(entry, &fuse_dentry_operations);
fuse_invalidate_entry_cache(entry);
}
@@ -713,7 +720,7 @@ static struct dentry *fuse_get_parent(struct dentry *child)
parent = d_obtain_alias(inode);
if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) {
- parent->d_op = &fuse_dentry_operations;
+ d_set_d_op(parent, &fuse_dentry_operations);
fuse_invalidate_entry_cache(parent);
}
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 6bc9e3a5a693..3b250e5b8be8 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -190,6 +190,26 @@ generic_acl_chmod(struct inode *inode)
}
int
+generic_check_acl_rcu(struct inode *inode, int mask, unsigned int flags)
+{
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ } else {
+ struct posix_acl *acl;
+
+ acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
+
+ if (acl) {
+ int error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ return error;
+ }
+ }
+ return -EAGAIN;
+}
+
+int
generic_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 6798755b3858..50497f65763b 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -100,13 +100,14 @@ fail:
return 0;
}
-static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
+static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *str)
{
str->hash = gfs2_disk_hash(str->name, str->len);
return 0;
}
-static int gfs2_dentry_delete(struct dentry *dentry)
+static int gfs2_dentry_delete(const struct dentry *dentry)
{
struct gfs2_inode *ginode;
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 5ab3839dfcb9..97012ecff560 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1));
if (!IS_ERR(dentry))
- dentry->d_op = &gfs2_dops;
+ d_set_d_op(dentry, &gfs2_dops);
return dentry;
}
@@ -158,7 +158,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
out_inode:
dentry = d_obtain_alias(inode);
if (!IS_ERR(dentry))
- dentry->d_op = &gfs2_dops;
+ d_set_d_op(dentry, &gfs2_dops);
return dentry;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3eb1393f7b81..2aeabd4218cc 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
iput(inode);
return -ENOMEM;
}
- dentry->d_op = &gfs2_dops;
+ d_set_d_op(dentry, &gfs2_dops);
*dptr = dentry;
return 0;
}
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 1db6b7343229..aa112c608295 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
{
struct inode *inode = NULL;
- dentry->d_op = &gfs2_dops;
+ d_set_d_op(dentry, &gfs2_dops);
inode = gfs2_lookupi(dir, &dentry->d_name, 0);
if (inode && IS_ERR(inode))
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2b2c4997430b..16c2ecac7eb7 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1405,11 +1405,18 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
return &ip->i_inode;
}
-static void gfs2_destroy_inode(struct inode *inode)
+static void gfs2_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(gfs2_inode_cachep, inode);
}
+static void gfs2_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, gfs2_i_callback);
+}
+
const struct super_operations gfs2_super_ops = {
.alloc_inode = gfs2_alloc_inode,
.destroy_inode = gfs2_destroy_inode,
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 2b3b8611b41b..ea4aefe7c652 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
struct inode *inode = NULL;
int res;
- dentry->d_op = &hfs_dentry_operations;
+ d_set_d_op(dentry, &hfs_dentry_operations);
hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd);
hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index c8cffb81e849..ad97c2d58287 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -213,10 +213,14 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *);
/* string.c */
extern const struct dentry_operations hfs_dentry_operations;
-extern int hfs_hash_dentry(struct dentry *, struct qstr *);
+extern int hfs_hash_dentry(const struct dentry *, const struct inode *,
+ struct qstr *);
extern int hfs_strcmp(const unsigned char *, unsigned int,
const unsigned char *, unsigned int);
-extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
+extern int hfs_compare_dentry(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
/* trans.c */
extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *);
diff --git a/fs/hfs/string.c b/fs/hfs/string.c
index 927a5af79428..495a976a3cc9 100644
--- a/fs/hfs/string.c
+++ b/fs/hfs/string.c
@@ -51,7 +51,8 @@ static unsigned char caseorder[256] = {
/*
* Hash a string to an integer in a case-independent way
*/
-int hfs_hash_dentry(struct dentry *dentry, struct qstr *this)
+int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *this)
{
const unsigned char *name = this->name;
unsigned int hash, len = this->len;
@@ -92,21 +93,21 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1,
* Test for equality of two strings in the HFS filename character ordering.
* return 1 on failure and 0 on success
*/
-int hfs_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2)
+int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
const unsigned char *n1, *n2;
- int len;
- len = s1->len;
if (len >= HFS_NAMELEN) {
- if (s2->len < HFS_NAMELEN)
+ if (name->len < HFS_NAMELEN)
return 1;
len = HFS_NAMELEN;
- } else if (len != s2->len)
+ } else if (len != name->len)
return 1;
- n1 = s1->name;
- n2 = s2->name;
+ n1 = str;
+ n2 = name->name;
while (len--) {
if (caseorder[*n1++] != caseorder[*n2++])
return 1;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4824c27cebb8..0bef62aa4f42 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -167,11 +167,18 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
return i ? &i->vfs_inode : NULL;
}
-static void hfs_destroy_inode(struct inode *inode)
+static void hfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
}
+static void hfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, hfs_i_callback);
+}
+
static const struct super_operations hfs_super_operations = {
.alloc_inode = hfs_alloc_inode,
.destroy_inode = hfs_destroy_inode,
@@ -427,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
if (!sb->s_root)
goto bail_iput;
- sb->s_root->d_op = &hfs_dentry_operations;
+ d_set_d_op(sb->s_root, &hfs_dentry_operations);
/* everything's okay */
return 0;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index f611d55c9f5e..f896dc843026 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
sb = dir->i_sb;
- dentry->d_op = &hfsplus_dentry_operations;
+ d_set_d_op(dentry, &hfsplus_dentry_operations);
dentry->d_fsdata = NULL;
hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index f7cbdf89ac9b..52f4a78455fe 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -426,9 +426,12 @@ int hfsplus_uni2asc(struct super_block *,
const struct hfsplus_unistr *, char *, int *);
int hfsplus_asc2uni(struct super_block *,
struct hfsplus_unistr *, const char *, int);
-int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str);
-int hfsplus_compare_dentry(struct dentry *dentry,
- struct qstr *s1, struct qstr *s2);
+int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *str);
+int hfsplus_compare_dentry(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
/* wrapper.c */
int hfsplus_read_wrapper(struct super_block *);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 3c9f30e9cd36..6ee6ad20acf2 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -450,7 +450,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
err = -ENOMEM;
goto cleanup;
}
- sb->s_root->d_op = &hfsplus_dentry_operations;
+ d_set_d_op(sb->s_root, &hfsplus_dentry_operations);
str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
str.name = HFSP_HIDDENDIR_NAME;
@@ -516,11 +516,19 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
return i ? &i->vfs_inode : NULL;
}
-static void hfsplus_destroy_inode(struct inode *inode)
+static void hfsplus_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode));
}
+static void hfsplus_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, hfsplus_i_callback);
+}
+
#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info)
static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 7dd90a540546..a3f0bfcc881e 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -324,7 +324,8 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
* Composed unicode characters are decomposed and case-folding is performed
* if the appropriate bits are (un)set on the superblock.
*/
-int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
+int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *str)
{
struct super_block *sb = dentry->d_sb;
const char *astr;
@@ -367,10 +368,12 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
* Composed unicode characters are decomposed and case-folding is performed
* if the appropriate bits are (un)set on the superblock.
*/
-int hfsplus_compare_dentry(struct dentry *dentry,
- struct qstr *s1, struct qstr *s2)
+int hfsplus_compare_dentry(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- struct super_block *sb = dentry->d_sb;
+ struct super_block *sb = parent->d_sb;
int casefold, decompose, size;
int dsize1, dsize2, len1, len2;
const u16 *dstr1, *dstr2;
@@ -380,10 +383,10 @@ int hfsplus_compare_dentry(struct dentry *dentry,
casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
- astr1 = s1->name;
- len1 = s1->len;
- astr2 = s2->name;
- len2 = s2->len;
+ astr1 = str;
+ len1 = len;
+ astr2 = name->name;
+ len2 = name->len;
dsize1 = dsize2 = 0;
dstr1 = dstr2 = NULL;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2c0f148a49e6..0bc81cf256b8 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -32,7 +32,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode)
-static int hostfs_d_delete(struct dentry *dentry)
+static int hostfs_d_delete(const struct dentry *dentry)
{
return 1;
}
@@ -92,12 +92,10 @@ __uml_setup("hostfs=", hostfs_args,
static char *__dentry_name(struct dentry *dentry, char *name)
{
- char *p = __dentry_path(dentry, name, PATH_MAX);
+ char *p = dentry_path_raw(dentry, name, PATH_MAX);
char *root;
size_t len;
- spin_unlock(&dcache_lock);
-
root = dentry->d_sb->s_fs_info;
len = strlen(root);
if (IS_ERR(p)) {
@@ -123,25 +121,23 @@ static char *dentry_name(struct dentry *dentry)
if (!name)
return NULL;
- spin_lock(&dcache_lock);
return __dentry_name(dentry, name); /* will unlock */
}
static char *inode_name(struct inode *ino)
{
struct dentry *dentry;
- char *name = __getname();
- if (!name)
- return NULL;
+ char *name;
- spin_lock(&dcache_lock);
- if (list_empty(&ino->i_dentry)) {
- spin_unlock(&dcache_lock);
- __putname(name);
+ dentry = d_find_alias(ino);
+ if (!dentry)
return NULL;
- }
- dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias);
- return __dentry_name(dentry, name); /* will unlock */
+
+ name = dentry_name(dentry);
+
+ dput(dentry);
+
+ return name;
}
static char *follow_link(char *link)
@@ -251,11 +247,18 @@ static void hostfs_evict_inode(struct inode *inode)
}
}
-static void hostfs_destroy_inode(struct inode *inode)
+static void hostfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kfree(HOSTFS_I(inode));
}
+static void hostfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, hostfs_i_callback);
+}
+
static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
{
const char *root_path = vfs->mnt_sb->s_fs_info;
@@ -609,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
goto out_put;
d_add(dentry, inode);
- dentry->d_op = &hostfs_dentry_ops;
+ d_set_d_op(dentry, &hostfs_dentry_ops);
return NULL;
out_put:
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index 67d9d36b3d5f..32c13a94e1e9 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -12,7 +12,8 @@
* Note: the dentry argument is the parent dentry.
*/
-static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
+static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
unsigned long hash;
int i;
@@ -34,19 +35,25 @@ static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
return 0;
}
-static int hpfs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
+static int hpfs_compare_dentry(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- unsigned al=a->len;
- unsigned bl=b->len;
- hpfs_adjust_length(a->name, &al);
+ unsigned al = len;
+ unsigned bl = name->len;
+
+ hpfs_adjust_length(str, &al);
/*hpfs_adjust_length(b->name, &bl);*/
- /* 'a' is the qstr of an already existing dentry, so the name
- * must be valid. 'b' must be validated first.
+
+ /*
+ * 'str' is the nane of an already existing dentry, so the name
+ * must be valid. 'name' must be validated first.
*/
- if (hpfs_chk_name(b->name, &bl))
+ if (hpfs_chk_name(name->name, &bl))
return 1;
- if (hpfs_compare_names(dentry->d_sb, a->name, al, b->name, bl, 0))
+ if (hpfs_compare_names(parent->d_sb, str, al, name->name, bl, 0))
return 1;
return 0;
}
@@ -58,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = {
void hpfs_set_dentry_operations(struct dentry *dentry)
{
- dentry->d_op = &hpfs_dentry_operations;
+ d_set_d_op(dentry, &hpfs_dentry_operations);
}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 6c5f01597c3a..49935ba78db8 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -177,11 +177,18 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void hpfs_destroy_inode(struct inode *inode)
+static void hpfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
}
+static void hpfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, hpfs_i_callback);
+}
+
static void init_once(void *foo)
{
struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index f702b5f713fc..87ed48e0343d 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -632,11 +632,18 @@ void hppfs_evict_inode(struct inode *ino)
mntput(ino->i_sb->s_fs_info);
}
-static void hppfs_destroy_inode(struct inode *inode)
+static void hppfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kfree(HPPFS_I(inode));
}
+static void hppfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, hppfs_i_callback);
+}
+
static const struct super_operations hppfs_sbops = {
.alloc_inode = hppfs_alloc_inode,
.destroy_inode = hppfs_destroy_inode,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a5fe68189eed..9885082b470f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -663,11 +663,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
return &p->vfs_inode;
}
+static void hugetlbfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+}
+
static void hugetlbfs_destroy_inode(struct inode *inode)
{
hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
- kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
+ call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
}
static const struct address_space_operations hugetlbfs_aops = {
diff --git a/fs/inode.c b/fs/inode.c
index ae2727ab0c3a..da85e56378f3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -102,26 +102,29 @@ static DECLARE_RWSEM(iprune_sem);
*/
struct inodes_stat_t inodes_stat;
-static struct percpu_counter nr_inodes __cacheline_aligned_in_smp;
-static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
+static DEFINE_PER_CPU(unsigned int, nr_inodes);
static struct kmem_cache *inode_cachep __read_mostly;
-static inline int get_nr_inodes(void)
+static int get_nr_inodes(void)
{
- return percpu_counter_sum_positive(&nr_inodes);
+ int i;
+ int sum = 0;
+ for_each_possible_cpu(i)
+ sum += per_cpu(nr_inodes, i);
+ return sum < 0 ? 0 : sum;
}
static inline int get_nr_inodes_unused(void)
{
- return percpu_counter_sum_positive(&nr_inodes_unused);
+ return inodes_stat.nr_unused;
}
int get_nr_dirty_inodes(void)
{
+ /* not actually dirty inodes, but a wild approximation */
int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
return nr_dirty > 0 ? nr_dirty : 0;
-
}
/*
@@ -132,7 +135,6 @@ int proc_nr_inodes(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
inodes_stat.nr_inodes = get_nr_inodes();
- inodes_stat.nr_unused = get_nr_inodes_unused();
return proc_dointvec(table, write, buffer, lenp, ppos);
}
#endif
@@ -224,7 +226,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
inode->i_fsnotify_mask = 0;
#endif
- percpu_counter_inc(&nr_inodes);
+ this_cpu_inc(nr_inodes);
return 0;
out:
@@ -255,6 +257,12 @@ static struct inode *alloc_inode(struct super_block *sb)
return inode;
}
+void free_inode_nonrcu(struct inode *inode)
+{
+ kmem_cache_free(inode_cachep, inode);
+}
+EXPORT_SYMBOL(free_inode_nonrcu);
+
void __destroy_inode(struct inode *inode)
{
BUG_ON(inode_has_buffers(inode));
@@ -266,10 +274,17 @@ void __destroy_inode(struct inode *inode)
if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
posix_acl_release(inode->i_default_acl);
#endif
- percpu_counter_dec(&nr_inodes);
+ this_cpu_dec(nr_inodes);
}
EXPORT_SYMBOL(__destroy_inode);
+static void i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(inode_cachep, inode);
+}
+
static void destroy_inode(struct inode *inode)
{
BUG_ON(!list_empty(&inode->i_lru));
@@ -277,7 +292,7 @@ static void destroy_inode(struct inode *inode)
if (inode->i_sb->s_op->destroy_inode)
inode->i_sb->s_op->destroy_inode(inode);
else
- kmem_cache_free(inode_cachep, (inode));
+ call_rcu(&inode->i_rcu, i_callback);
}
/*
@@ -335,7 +350,7 @@ static void inode_lru_list_add(struct inode *inode)
{
if (list_empty(&inode->i_lru)) {
list_add(&inode->i_lru, &inode_lru);
- percpu_counter_inc(&nr_inodes_unused);
+ inodes_stat.nr_unused++;
}
}
@@ -343,7 +358,7 @@ static void inode_lru_list_del(struct inode *inode)
{
if (!list_empty(&inode->i_lru)) {
list_del_init(&inode->i_lru);
- percpu_counter_dec(&nr_inodes_unused);
+ inodes_stat.nr_unused--;
}
}
@@ -430,6 +445,7 @@ void end_writeback(struct inode *inode)
BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(inode->i_state & I_CLEAR);
inode_sync_wait(inode);
+ /* don't need i_lock here, no concurrent mods to i_state */
inode->i_state = I_FREEING | I_CLEAR;
}
EXPORT_SYMBOL(end_writeback);
@@ -513,7 +529,7 @@ void evict_inodes(struct super_block *sb)
list_move(&inode->i_lru, &dispose);
list_del_init(&inode->i_wb_list);
if (!(inode->i_state & (I_DIRTY | I_SYNC)))
- percpu_counter_dec(&nr_inodes_unused);
+ inodes_stat.nr_unused--;
}
spin_unlock(&inode_lock);
@@ -554,7 +570,7 @@ int invalidate_inodes(struct super_block *sb)
list_move(&inode->i_lru, &dispose);
list_del_init(&inode->i_wb_list);
if (!(inode->i_state & (I_DIRTY | I_SYNC)))
- percpu_counter_dec(&nr_inodes_unused);
+ inodes_stat.nr_unused--;
}
spin_unlock(&inode_lock);
@@ -616,7 +632,7 @@ static void prune_icache(int nr_to_scan)
if (atomic_read(&inode->i_count) ||
(inode->i_state & ~I_REFERENCED)) {
list_del_init(&inode->i_lru);
- percpu_counter_dec(&nr_inodes_unused);
+ inodes_stat.nr_unused--;
continue;
}
@@ -650,7 +666,7 @@ static void prune_icache(int nr_to_scan)
*/
list_move(&inode->i_lru, &freeable);
list_del_init(&inode->i_wb_list);
- percpu_counter_dec(&nr_inodes_unused);
+ inodes_stat.nr_unused--;
}
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
@@ -1648,8 +1664,6 @@ void __init inode_init(void)
SLAB_MEM_SPREAD),
init_once);
register_shrinker(&icache_shrinker);
- percpu_counter_init(&nr_inodes, 0);
- percpu_counter_init(&nr_inodes_unused, 0);
/* Hash may have been set up in inode_init_early */
if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index e43b9a4dbf4e..3a73ae2679af 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **);
extern void free_vfsmnt(struct vfsmount *);
extern struct vfsmount *alloc_vfsmnt(const char *);
+extern unsigned int count_mnt_count(struct vfsmount *mnt);
extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
struct vfsmount *);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bfdeb82a53be..844a7903c72f 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -26,16 +26,32 @@
#define BEQUIET
-static int isofs_hashi(struct dentry *parent, struct qstr *qstr);
-static int isofs_hash(struct dentry *parent, struct qstr *qstr);
-static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b);
-static int isofs_dentry_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b);
+static int isofs_hashi(const struct dentry *parent, const struct inode *inode,
+ struct qstr *qstr);
+static int isofs_hash(const struct dentry *parent, const struct inode *inode,
+ struct qstr *qstr);
+static int isofs_dentry_cmpi(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
+static int isofs_dentry_cmp(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
#ifdef CONFIG_JOLIET
-static int isofs_hashi_ms(struct dentry *parent, struct qstr *qstr);
-static int isofs_hash_ms(struct dentry *parent, struct qstr *qstr);
-static int isofs_dentry_cmpi_ms(struct dentry *dentry, struct qstr *a, struct qstr *b);
-static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qstr *b);
+static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode,
+ struct qstr *qstr);
+static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode,
+ struct qstr *qstr);
+static int isofs_dentry_cmpi_ms(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
+static int isofs_dentry_cmp_ms(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name);
#endif
static void isofs_put_super(struct super_block *sb)
@@ -65,11 +81,18 @@ static struct inode *isofs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void isofs_destroy_inode(struct inode *inode)
+static void isofs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
}
+static void isofs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, isofs_i_callback);
+}
+
static void init_once(void *foo)
{
struct iso_inode_info *ei = foo;
@@ -160,7 +183,7 @@ struct iso9660_options{
* Compute the hash for the isofs name corresponding to the dentry.
*/
static int
-isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms)
+isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms)
{
const char *name;
int len;
@@ -181,7 +204,7 @@ isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms)
* Compute the hash for the isofs name corresponding to the dentry.
*/
static int
-isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
+isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms)
{
const char *name;
int len;
@@ -206,100 +229,94 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
}
/*
- * Case insensitive compare of two isofs names.
- */
-static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a,
- struct qstr *b, int ms)
-{
- int alen, blen;
-
- /* A filename cannot end in '.' or we treat it like it has none */
- alen = a->len;
- blen = b->len;
- if (ms) {
- while (alen && a->name[alen-1] == '.')
- alen--;
- while (blen && b->name[blen-1] == '.')
- blen--;
- }
- if (alen == blen) {
- if (strnicmp(a->name, b->name, alen) == 0)
- return 0;
- }
- return 1;
-}
-
-/*
- * Case sensitive compare of two isofs names.
+ * Compare of two isofs names.
*/
-static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a,
- struct qstr *b, int ms)
+static int isofs_dentry_cmp_common(
+ unsigned int len, const char *str,
+ const struct qstr *name, int ms, int ci)
{
int alen, blen;
/* A filename cannot end in '.' or we treat it like it has none */
- alen = a->len;
- blen = b->len;
+ alen = name->len;
+ blen = len;
if (ms) {
- while (alen && a->name[alen-1] == '.')
+ while (alen && name->name[alen-1] == '.')
alen--;
- while (blen && b->name[blen-1] == '.')
+ while (blen && str[blen-1] == '.')
blen--;
}
if (alen == blen) {
- if (strncmp(a->name, b->name, alen) == 0)
- return 0;
+ if (ci) {
+ if (strnicmp(name->name, str, alen) == 0)
+ return 0;
+ } else {
+ if (strncmp(name->name, str, alen) == 0)
+ return 0;
+ }
}
return 1;
}
static int
-isofs_hash(struct dentry *dentry, struct qstr *qstr)
+isofs_hash(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
return isofs_hash_common(dentry, qstr, 0);
}
static int
-isofs_hashi(struct dentry *dentry, struct qstr *qstr)
+isofs_hashi(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
return isofs_hashi_common(dentry, qstr, 0);
}
static int
-isofs_dentry_cmp(struct dentry *dentry,struct qstr *a,struct qstr *b)
+isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- return isofs_dentry_cmp_common(dentry, a, b, 0);
+ return isofs_dentry_cmp_common(len, str, name, 0, 0);
}
static int
-isofs_dentry_cmpi(struct dentry *dentry,struct qstr *a,struct qstr *b)
+isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- return isofs_dentry_cmpi_common(dentry, a, b, 0);
+ return isofs_dentry_cmp_common(len, str, name, 0, 1);
}
#ifdef CONFIG_JOLIET
static int
-isofs_hash_ms(struct dentry *dentry, struct qstr *qstr)
+isofs_hash_ms(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
return isofs_hash_common(dentry, qstr, 1);
}
static int
-isofs_hashi_ms(struct dentry *dentry, struct qstr *qstr)
+isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
return isofs_hashi_common(dentry, qstr, 1);
}
static int
-isofs_dentry_cmp_ms(struct dentry *dentry,struct qstr *a,struct qstr *b)
+isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- return isofs_dentry_cmp_common(dentry, a, b, 1);
+ return isofs_dentry_cmp_common(len, str, name, 1, 0);
}
static int
-isofs_dentry_cmpi_ms(struct dentry *dentry,struct qstr *a,struct qstr *b)
+isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- return isofs_dentry_cmpi_common(dentry, a, b, 1);
+ return isofs_dentry_cmp_common(len, str, name, 1, 1);
}
#endif
@@ -932,7 +949,7 @@ root_found:
table += 2;
if (opt.check == 'r')
table++;
- s->s_root->d_op = &isofs_dentry_ops[table];
+ d_set_d_op(s->s_root, &isofs_dentry_ops[table]);
kfree(opt.iocharset);
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 0d23abfd4280..679a849c3b27 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -37,7 +37,8 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen)
qstr.name = compare;
qstr.len = dlen;
- return dentry->d_op->d_compare(dentry, &dentry->d_name, &qstr);
+ return dentry->d_op->d_compare(NULL, NULL, NULL, NULL,
+ dentry->d_name.len, dentry->d_name.name, &qstr);
}
/*
@@ -171,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam
struct inode *inode;
struct page *page;
- dentry->d_op = dir->i_sb->s_root->d_op;
+ d_set_d_op(dentry, dir->i_sb->s_root->d_op);
page = alloc_page(GFP_USER);
if (!page)
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index c86041b866a4..853b8e300084 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -40,11 +40,18 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
return &f->vfs_inode;
}
-static void jffs2_destroy_inode(struct inode *inode)
+static void jffs2_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
}
+static void jffs2_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, jffs2_i_callback);
+}
+
static void jffs2_i_init_once(void *foo)
{
struct jffs2_inode_info *f = foo;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 231ca4af9bce..a151cbdec626 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -18,6 +18,7 @@
*/
#include <linux/fs.h>
+#include <linux/namei.h>
#include <linux/ctype.h>
#include <linux/quotaops.h>
#include <linux/exportfs.h>
@@ -1465,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
jfs_info("jfs_lookup: name = %s", name);
if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)
- dentry->d_op = &jfs_ci_dentry_operations;
+ d_set_d_op(dentry, &jfs_ci_dentry_operations);
if ((name[0] == '.') && (len == 1))
inum = dip->i_ino;
@@ -1494,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
dentry = d_splice_alias(ip, dentry);
if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
- dentry->d_op = &jfs_ci_dentry_operations;
+ d_set_d_op(dentry, &jfs_ci_dentry_operations);
return dentry;
}
@@ -1573,7 +1574,8 @@ const struct file_operations jfs_dir_operations = {
.llseek = generic_file_llseek,
};
-static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
+static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode,
+ struct qstr *this)
{
unsigned long hash;
int i;
@@ -1586,32 +1588,61 @@ static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
return 0;
}
-static int jfs_ci_compare(struct dentry *dir, struct qstr *a, struct qstr *b)
+static int jfs_ci_compare(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
int i, result = 1;
- if (a->len != b->len)
+ if (len != name->len)
goto out;
- for (i=0; i < a->len; i++) {
- if (tolower(a->name[i]) != tolower(b->name[i]))
+ for (i=0; i < len; i++) {
+ if (tolower(str[i]) != tolower(name->name[i]))
goto out;
}
result = 0;
+out:
+ return result;
+}
+static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
/*
- * We want creates to preserve case. A negative dentry, a, that
- * has a different case than b may cause a new entry to be created
- * with the wrong case. Since we can't tell if a comes from a negative
- * dentry, we blindly replace it with b. This should be harmless if
- * a is not a negative dentry.
+ * This is not negative dentry. Always valid.
+ *
+ * Note, rename() to existing directory entry will have ->d_inode,
+ * and will use existing name which isn't specified name by user.
+ *
+ * We may be able to drop this positive dentry here. But dropping
+ * positive dentry isn't good idea. So it's unsupported like
+ * rename("filename", "FILENAME") for now.
*/
- memcpy((unsigned char *)a->name, b->name, a->len);
-out:
- return result;
+ if (dentry->d_inode)
+ return 1;
+
+ /*
+ * This may be nfsd (or something), anyway, we can't see the
+ * intent of this. So, since this can be for creation, drop it.
+ */
+ if (!nd)
+ return 0;
+
+ /*
+ * Drop the negative dentry, in order to make sure to use the
+ * case sensitive name which is specified by user if this is
+ * for creation.
+ */
+ if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
+ if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+ return 0;
+ }
+ return 1;
}
const struct dentry_operations jfs_ci_dentry_operations =
{
.d_hash = jfs_ci_hash,
.d_compare = jfs_ci_compare,
+ .d_revalidate = jfs_ci_revalidate,
};
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0669fc1cc3bf..3150d766e0d4 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -115,6 +115,14 @@ static struct inode *jfs_alloc_inode(struct super_block *sb)
return &jfs_inode->vfs_inode;
}
+static void jfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct jfs_inode_info *ji = JFS_IP(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(jfs_inode_cachep, ji);
+}
+
static void jfs_destroy_inode(struct inode *inode)
{
struct jfs_inode_info *ji = JFS_IP(inode);
@@ -128,7 +136,7 @@ static void jfs_destroy_inode(struct inode *inode)
ji->active_ag = -1;
}
spin_unlock_irq(&ji->ag_lock);
- kmem_cache_free(jfs_inode_cachep, ji);
+ call_rcu(&inode->i_rcu, jfs_i_callback);
}
static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -517,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
goto out_no_root;
if (sbi->mntflag & JFS_OS2)
- sb->s_root->d_op = &jfs_ci_dentry_operations;
+ d_set_d_op(sb->s_root, &jfs_ci_dentry_operations);
/* logical blocks are represented by 40 bits in pxd_t, etc. */
sb->s_maxbytes = ((u64) sb->s_blocksize) << 40;
diff --git a/fs/libfs.c b/fs/libfs.c
index a3accdf528ad..889311e3d06b 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -16,6 +16,11 @@
#include <asm/uaccess.h>
+static inline int simple_positive(struct dentry *dentry)
+{
+ return dentry->d_inode && !d_unhashed(dentry);
+}
+
int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
@@ -37,7 +42,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
* Retaining negative dentries for an in-memory filesystem just wastes
* memory and lookup time: arrange for them to be deleted immediately.
*/
-static int simple_delete_dentry(struct dentry *dentry)
+static int simple_delete_dentry(const struct dentry *dentry)
{
return 1;
}
@@ -54,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
- dentry->d_op = &simple_dentry_operations;
+ d_set_d_op(dentry, &simple_dentry_operations);
d_add(dentry, NULL);
return NULL;
}
@@ -76,7 +81,8 @@ int dcache_dir_close(struct inode *inode, struct file *file)
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
{
- mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
+ struct dentry *dentry = file->f_path.dentry;
+ mutex_lock(&dentry->d_inode->i_mutex);
switch (origin) {
case 1:
offset += file->f_pos;
@@ -84,7 +90,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
if (offset >= 0)
break;
default:
- mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+ mutex_unlock(&dentry->d_inode->i_mutex);
return -EINVAL;
}
if (offset != file->f_pos) {
@@ -94,21 +100,24 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
struct dentry *cursor = file->private_data;
loff_t n = file->f_pos - 2;
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
+ /* d_lock not required for cursor */
list_del(&cursor->d_u.d_child);
- p = file->f_path.dentry->d_subdirs.next;
- while (n && p != &file->f_path.dentry->d_subdirs) {
+ p = dentry->d_subdirs.next;
+ while (n && p != &dentry->d_subdirs) {
struct dentry *next;
next = list_entry(p, struct dentry, d_u.d_child);
- if (!d_unhashed(next) && next->d_inode)
+ spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
+ if (simple_positive(next))
n--;
+ spin_unlock(&next->d_lock);
p = p->next;
}
list_add_tail(&cursor->d_u.d_child, p);
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
}
}
- mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+ mutex_unlock(&dentry->d_inode->i_mutex);
return offset;
}
@@ -148,29 +157,35 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
i++;
/* fallthrough */
default:
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
if (filp->f_pos == 2)
list_move(q, &dentry->d_subdirs);
for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
struct dentry *next;
next = list_entry(p, struct dentry, d_u.d_child);
- if (d_unhashed(next) || !next->d_inode)
+ spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
+ if (!simple_positive(next)) {
+ spin_unlock(&next->d_lock);
continue;
+ }
- spin_unlock(&dcache_lock);
+ spin_unlock(&next->d_lock);
+ spin_unlock(&dentry->d_lock);
if (filldir(dirent, next->d_name.name,
next->d_name.len, filp->f_pos,
next->d_inode->i_ino,
dt_type(next->d_inode)) < 0)
return 0;
- spin_lock(&dcache_lock);
+ spin_lock(&dentry->d_lock);
+ spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
/* next is still alive */
list_move(q, p);
+ spin_unlock(&next->d_lock);
p = q;
filp->f_pos++;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
}
return 0;
}
@@ -259,23 +274,23 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
return 0;
}
-static inline int simple_positive(struct dentry *dentry)
-{
- return dentry->d_inode && !d_unhashed(dentry);
-}
-
int simple_empty(struct dentry *dentry)
{
struct dentry *child;
int ret = 0;
- spin_lock(&dcache_lock);
- list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
- if (simple_positive(child))
+ spin_lock(&dentry->d_lock);
+ list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
+ spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+ if (simple_positive(child)) {
+ spin_unlock(&child->d_lock);
goto out;
+ }
+ spin_unlock(&child->d_lock);
+ }
ret = 1;
out:
- spin_unlock(&dcache_lock);
+ spin_unlock(&dentry->d_lock);
return ret;
}
diff --git a/fs/locks.c b/fs/locks.c
index 8729347bcd1a..08415b2a6d36 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1389,7 +1389,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
goto out;
if ((arg == F_WRLCK)
- && ((atomic_read(&dentry->d_count) > 1)
+ && ((dentry->d_count > 1)
|| (atomic_read(&inode->i_count) > 1)))
goto out;
}
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index d8c71ece098f..03b8c240aeda 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -141,13 +141,20 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
return __logfs_iget(sb, ino);
}
+static void logfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
+}
+
static void __logfs_destroy_inode(struct inode *inode)
{
struct logfs_inode *li = logfs_inode(inode);
BUG_ON(li->li_block);
list_del(&li->li_freeing_list);
- kmem_cache_free(logfs_inode_cache, li);
+ call_rcu(&inode->i_rcu, logfs_i_callback);
}
static void logfs_destroy_inode(struct inode *inode)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index fb2020858a34..ae0b83f476a6 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,11 +68,18 @@ static struct inode *minix_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void minix_destroy_inode(struct inode *inode)
+static void minix_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(minix_inode_cachep, minix_i(inode));
}
+static void minix_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, minix_i_callback);
+}
+
static void init_once(void *foo)
{
struct minix_inode_info *ei = (struct minix_inode_info *) foo;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index c0d35a3accef..1b9e07728a9f 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st
struct inode * inode = NULL;
ino_t ino;
- dentry->d_op = dir->i_sb->s_root->d_op;
+ d_set_d_op(dentry, dir->i_sb->s_root->d_op);
if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen)
return ERR_PTR(-ENAMETOOLONG);
diff --git a/fs/namei.c b/fs/namei.c
index 4ff7ca530533..1e2f2197d809 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname);
/*
* This does basic POSIX ACL permission checking
*/
-static int acl_permission_check(struct inode *inode, int mask,
- int (*check_acl)(struct inode *inode, int mask))
+static int acl_permission_check_rcu(struct inode *inode, int mask, unsigned int flags,
+ int (*check_acl_rcu)(struct inode *inode, int mask, unsigned int flags))
{
umode_t mode = inode->i_mode;
@@ -179,8 +179,8 @@ static int acl_permission_check(struct inode *inode, int mask,
if (current_fsuid() == inode->i_uid)
mode >>= 6;
else {
- if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
- int error = check_acl(inode, mask);
+ if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl_rcu) {
+ int error = check_acl_rcu(inode, mask, flags);
if (error != -EAGAIN)
return error;
}
@@ -197,6 +197,86 @@ static int acl_permission_check(struct inode *inode, int mask,
return -EACCES;
}
+static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
+ int (*check_acl)(struct inode *inode, int mask))
+{
+ umode_t mode = inode->i_mode;
+
+ mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
+
+ if (current_fsuid() == inode->i_uid)
+ mode >>= 6;
+ else {
+ if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
+ if (flags) {
+ return -ECHILD;
+ } else {
+ int error = check_acl(inode, mask);
+ if (error != -EAGAIN)
+ return error;
+ }
+ }
+
+ if (in_group_p(inode->i_gid))
+ mode >>= 3;
+ }
+
+ /*
+ * If the DACs are ok we don't need any capability check.
+ */
+ if ((mask & ~mode) == 0)
+ return 0;
+ return -EACCES;
+}
+
+/**
+ * generic_permission_rcu - check for access rights on a Posix-like filesystem
+ * @inode: inode to check access rights for
+ * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ * @check_acl_rcu: optional callback to check for Posix ACLs
+ * @flags IPERM_FLAG_ flags.
+ *
+ * Used to check for read/write/execute permissions on a file.
+ * We use "fsuid" for this, letting us set arbitrary permissions
+ * for filesystem access without changing the "normal" uids which
+ * are used for other things.
+ *
+ * generic_permission_rcu must be rcu-walk aware. It should return
+ * -ECHILD in case an rcu-walk request cannot be satisfied (eg.
+ * requires blocking or too much thought!). It would then be called
+ * again in ref-walk mode.
+ */
+int generic_permission_rcu(struct inode *inode, int mask, unsigned int flags,
+ int (*check_acl_rcu)(struct inode *inode, int mask, unsigned int flags))
+{
+ int ret;
+
+ /*
+ * Do the basic POSIX ACL permission checks.
+ */
+ ret = acl_permission_check_rcu(inode, mask, flags, check_acl_rcu);
+ if (ret != -EACCES)
+ return ret;
+
+ /*
+ * Read/write DACs are always overridable.
+ * Executable DACs are overridable if at least one exec bit is set.
+ */
+ if (!(mask & MAY_EXEC) || execute_ok(inode))
+ if (capable(CAP_DAC_OVERRIDE))
+ return 0;
+
+ /*
+ * Searching includes executable on directories, else just read.
+ */
+ mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
+ if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
+ if (capable(CAP_DAC_READ_SEARCH))
+ return 0;
+
+ return -EACCES;
+}
+
/**
* generic_permission - check for access rights on a Posix-like filesystem
* @inode: inode to check access rights for
@@ -216,7 +296,7 @@ int generic_permission(struct inode *inode, int mask,
/*
* Do the basic POSIX ACL permission checks.
*/
- ret = acl_permission_check(inode, mask, check_acl);
+ ret = acl_permission_check(inode, mask, 0, check_acl);
if (ret != -EACCES)
return ret;
@@ -272,8 +352,14 @@ int inode_permission(struct inode *inode, int mask)
if (inode->i_op->permission)
retval = inode->i_op->permission(inode, mask);
+ else if (inode->i_op->permission_rcu)
+ retval = inode->i_op->permission_rcu(inode, mask, 0);
+ else if (inode->i_op->check_acl_rcu)
+ retval = generic_permission_rcu(inode, mask, 0,
+ inode->i_op->check_acl_rcu);
else
- retval = generic_permission(inode, mask, inode->i_op->check_acl);
+ retval = generic_permission(inode, mask,
+ inode->i_op->check_acl);
if (retval)
return retval;
@@ -362,6 +448,18 @@ void path_get(struct path *path)
EXPORT_SYMBOL(path_get);
/**
+ * path_get_long - get a long reference to a path
+ * @path: path to get the reference to
+ *
+ * Given a path increment the reference count to the dentry and the vfsmount.
+ */
+void path_get_long(struct path *path)
+{
+ mntget_long(path->mnt);
+ dget(path->dentry);
+}
+
+/**
* path_put - put a reference to a path
* @path: path to put the reference to
*
@@ -375,6 +473,185 @@ void path_put(struct path *path)
EXPORT_SYMBOL(path_put);
/**
+ * path_put_long - put a long reference to a path
+ * @path: path to put the reference to
+ *
+ * Given a path decrement the reference count to the dentry and the vfsmount.
+ */
+void path_put_long(struct path *path)
+{
+ dput(path->dentry);
+ mntput_long(path->mnt);
+}
+
+/**
+ * nameidata_drop_rcu - drop this nameidata out of rcu-walk
+ * @nd: nameidata pathwalk data to drop
+ * @Returns: 0 on success, -ECHLID on failure
+ *
+ * Path walking has 2 modes, rcu-walk and ref-walk (see
+ * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt
+ * to drop out of rcu-walk mode and take normal reference counts on dentries
+ * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take
+ * refcounts at the last known good point before rcu-walk got stuck, so
+ * ref-walk may continue from there. If this is not successful (eg. a seqcount
+ * has changed), then failure is returned and path walk restarts from the
+ * beginning in ref-walk mode.
+ *
+ * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into
+ * ref-walk. Must be called from rcu-walk context.
+ */
+static int nameidata_drop_rcu(struct nameidata *nd)
+{
+ struct fs_struct *fs = current->fs;
+ struct dentry *dentry = nd->path.dentry;
+
+ BUG_ON(!(nd->flags & LOOKUP_RCU));
+ if (nd->root.mnt) {
+ spin_lock(&fs->lock);
+ if (nd->root.mnt != fs->root.mnt ||
+ nd->root.dentry != fs->root.dentry)
+ goto err_root;
+ }
+ spin_lock(&dentry->d_lock);
+ if (!__d_rcu_to_refcount(dentry, nd->seq))
+ goto err;
+ BUG_ON(nd->inode != dentry->d_inode);
+ spin_unlock(&dentry->d_lock);
+ if (nd->root.mnt) {
+ path_get(&nd->root);
+ spin_unlock(&fs->lock);
+ }
+ mntget(nd->path.mnt);
+
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ nd->flags &= ~LOOKUP_RCU;
+ return 0;
+err:
+ spin_unlock(&dentry->d_lock);
+err_root:
+ if (nd->root.mnt)
+ spin_unlock(&fs->lock);
+ return -ECHILD;
+}
+
+/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
+static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
+{
+ if (nd->flags & LOOKUP_RCU)
+ return nameidata_drop_rcu(nd);
+ return 0;
+}
+
+/**
+ * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk
+ * @nd: nameidata pathwalk data to drop
+ * @dentry: dentry to drop
+ * @Returns: 0 on success, -ECHLID on failure
+ *
+ * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root,
+ * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on
+ * @nd. Must be called from rcu-walk context.
+ */
+static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry)
+{
+ struct fs_struct *fs = current->fs;
+ struct dentry *parent = nd->path.dentry;
+
+ BUG_ON(!(nd->flags & LOOKUP_RCU));
+ if (nd->root.mnt) {
+ spin_lock(&fs->lock);
+ if (nd->root.mnt != fs->root.mnt ||
+ nd->root.dentry != fs->root.dentry)
+ goto err_root;
+ }
+ spin_lock(&parent->d_lock);
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ if (!__d_rcu_to_refcount(dentry, nd->seq))
+ goto err;
+ /*
+ * If the sequence check on the child dentry passed, then the child has
+ * not been removed from its parent. This means the parent dentry must
+ * be valid and able to take a reference at this point.
+ */
+ BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
+ BUG_ON(!parent->d_count);
+ parent->d_count++;
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&parent->d_lock);
+ if (nd->root.mnt) {
+ path_get(&nd->root);
+ spin_unlock(&fs->lock);
+ }
+ mntget(nd->path.mnt);
+
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ nd->flags &= ~LOOKUP_RCU;
+ return 0;
+err:
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&parent->d_lock);
+err_root:
+ if (nd->root.mnt)
+ spin_unlock(&fs->lock);
+ return -ECHILD;
+}
+
+/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
+static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
+{
+ if (nd->flags & LOOKUP_RCU)
+ return nameidata_dentry_drop_rcu(nd, dentry);
+ return 0;
+}
+
+/**
+ * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
+ * @nd: nameidata pathwalk data to drop
+ * @Returns: 0 on success, -ECHLID on failure
+ *
+ * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
+ * nd->path should be the final element of the lookup, so nd->root is discarded.
+ * Must be called from rcu-walk context.
+ */
+static int nameidata_drop_rcu_last(struct nameidata *nd)
+{
+ struct dentry *dentry = nd->path.dentry;
+
+ BUG_ON(!(nd->flags & LOOKUP_RCU));
+ nd->flags &= ~LOOKUP_RCU;
+ nd->root.mnt = NULL;
+ spin_lock(&dentry->d_lock);
+ if (!__d_rcu_to_refcount(dentry, nd->seq))
+ goto err_unlock;
+ BUG_ON(nd->inode != dentry->d_inode);
+ spin_unlock(&dentry->d_lock);
+
+ mntget(nd->path.mnt);
+
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+
+ return 0;
+
+err_unlock:
+ spin_unlock(&dentry->d_lock);
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ return -ECHILD;
+}
+
+/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
+static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
+{
+ if (likely(nd->flags & LOOKUP_RCU))
+ return nameidata_drop_rcu_last(nd);
+ return 0;
+}
+
+/**
* release_open_intent - free up open intent resources
* @nd: pointer to nameidata
*/
@@ -386,10 +663,30 @@ void release_open_intent(struct nameidata *nd)
fput(nd->intent.open.file);
}
+static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ if (dentry->d_op->d_revalidate_rcu) {
+ int status;
+ status = dentry->d_op->d_revalidate_rcu(dentry, nd);
+ if (!status && (nd->flags & LOOKUP_RCU))
+ status = -ECHILD;
+ if (status == -ECHILD) {
+ BUG_ON(!(nd->flags & LOOKUP_RCU));
+ if (nameidata_dentry_drop_rcu(nd, dentry))
+ return status;
+ status = dentry->d_op->d_revalidate_rcu(dentry, nd);
+ }
+ return status;
+ } else
+ return dentry->d_op->d_revalidate(dentry, nd);
+}
+
static inline struct dentry *
do_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- int status = dentry->d_op->d_revalidate(dentry, nd);
+ int status;
+
+ status = d_revalidate(dentry, nd);
if (unlikely(status <= 0)) {
/*
* The dentry failed validation.
@@ -410,6 +707,17 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
return dentry;
}
+static inline int need_reval_dot(struct dentry *dentry)
+{
+ if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE_EITHER)))
+ return 0;
+
+ if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
+ return 0;
+
+ return 1;
+}
+
/*
* force_reval_path - force revalidation of a dentry
*
@@ -433,13 +741,12 @@ force_reval_path(struct path *path, struct nameidata *nd)
/*
* only check on filesystems where it's possible for the dentry to
- * become stale. It's assumed that if this flag is set then the
- * d_revalidate op will also be defined.
+ * become stale.
*/
- if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))
+ if (!need_reval_dot(dentry))
return 0;
- status = dentry->d_op->d_revalidate(dentry, nd);
+ status = d_revalidate(dentry, nd);
if (status > 0)
return 0;
@@ -459,26 +766,34 @@ force_reval_path(struct path *path, struct nameidata *nd)
* short-cut DAC fails, then call ->permission() to do more
* complete permission check.
*/
-static int exec_permission(struct inode *inode)
+static inline int exec_permission(struct inode *inode, unsigned int flags)
{
int ret;
- if (inode->i_op->permission) {
+ if (inode->i_op->permission_rcu) {
+ ret = inode->i_op->permission_rcu(inode, MAY_EXEC, flags);
+ } else if (inode->i_op->permission) {
+ if (flags)
+ return -ECHILD;
ret = inode->i_op->permission(inode, MAY_EXEC);
- if (!ret)
- goto ok;
- return ret;
+ } else if (inode->i_op->check_acl_rcu) {
+ ret = acl_permission_check_rcu(inode, MAY_EXEC, flags,
+ inode->i_op->check_acl_rcu);
+ } else {
+ ret = acl_permission_check(inode, MAY_EXEC, flags,
+ inode->i_op->check_acl);
}
- ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl);
- if (!ret)
+ if (likely(!ret))
goto ok;
+ if (ret == -ECHILD)
+ return ret;
if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
goto ok;
return ret;
ok:
- return security_inode_permission(inode, MAY_EXEC);
+ return security_inode_exec_permission(inode, flags);
}
static __always_inline void set_root(struct nameidata *nd)
@@ -489,8 +804,23 @@ static __always_inline void set_root(struct nameidata *nd)
static int link_path_walk(const char *, struct nameidata *);
+static __always_inline void set_root_rcu(struct nameidata *nd)
+{
+ if (!nd->root.mnt) {
+ struct fs_struct *fs = current->fs;
+ unsigned seq;
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ nd->root = fs->root;
+ } while (read_seqcount_retry(&fs->seq, seq));
+ }
+}
+
static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
{
+ int ret;
+
if (IS_ERR(link))
goto fail;
@@ -500,8 +830,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
nd->path = nd->root;
path_get(&nd->root);
}
+ nd->inode = nd->path.dentry->d_inode;
- return link_path_walk(link, nd);
+ ret = link_path_walk(link, nd);
+ return ret;
fail:
path_put(&nd->path);
return PTR_ERR(link);
@@ -516,11 +848,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd)
static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
{
- dput(nd->path.dentry);
- if (nd->path.mnt != path->mnt) {
- mntput(nd->path.mnt);
- nd->path.mnt = path->mnt;
+ if (!(nd->flags & LOOKUP_RCU)) {
+ dput(nd->path.dentry);
+ if (nd->path.mnt != path->mnt)
+ mntput(nd->path.mnt);
}
+ nd->path.mnt = path->mnt;
nd->path.dentry = path->dentry;
}
@@ -535,9 +868,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p)
if (path->mnt != nd->path.mnt) {
path_to_nameidata(path, nd);
+ nd->inode = nd->path.dentry->d_inode;
dget(dentry);
}
mntget(path->mnt);
+
nd->last_type = LAST_BIND;
*p = dentry->d_inode->i_op->follow_link(dentry, nd);
error = PTR_ERR(*p);
@@ -591,6 +926,20 @@ loop:
return err;
}
+static int follow_up_rcu(struct path *path)
+{
+ struct vfsmount *parent;
+ struct dentry *mountpoint;
+
+ parent = path->mnt->mnt_parent;
+ if (parent == path->mnt)
+ return 0;
+ mountpoint = path->mnt->mnt_mountpoint;
+ path->dentry = mountpoint;
+ path->mnt = parent;
+ return 1;
+}
+
int follow_up(struct path *path)
{
struct vfsmount *parent;
@@ -612,9 +961,24 @@ int follow_up(struct path *path)
return 1;
}
-/* no need for dcache_lock, as serialization is taken care in
- * namespace.c
+/*
+ * serialization is taken care of in namespace.c
*/
+static void __follow_mount_rcu(struct nameidata *nd, struct path *path,
+ struct inode **inode)
+{
+ while (d_mountpoint(path->dentry)) {
+ struct vfsmount *mounted;
+ mounted = __lookup_mnt(path->mnt, path->dentry, 1);
+ if (!mounted)
+ return;
+ path->mnt = mounted;
+ path->dentry = mounted->mnt_root;
+ nd->seq = read_seqcount_begin(&path->dentry->d_seq);
+ *inode = path->dentry->d_inode;
+ }
+}
+
static int __follow_mount(struct path *path)
{
int res = 0;
@@ -645,9 +1009,6 @@ static void follow_mount(struct path *path)
}
}
-/* no need for dcache_lock, as serialization is taken care in
- * namespace.c
- */
int follow_down(struct path *path)
{
struct vfsmount *mounted;
@@ -663,7 +1024,42 @@ int follow_down(struct path *path)
return 0;
}
-static __always_inline void follow_dotdot(struct nameidata *nd)
+static int follow_dotdot_rcu(struct nameidata *nd)
+{
+ struct inode *inode = nd->inode;
+
+ set_root_rcu(nd);
+
+ while(1) {
+ if (nd->path.dentry == nd->root.dentry &&
+ nd->path.mnt == nd->root.mnt) {
+ break;
+ }
+ if (nd->path.dentry != nd->path.mnt->mnt_root) {
+ struct dentry *old = nd->path.dentry;
+ struct dentry *parent = old->d_parent;
+ unsigned seq;
+
+ seq = read_seqcount_begin(&parent->d_seq);
+ if (read_seqcount_retry(&old->d_seq, nd->seq))
+ return -ECHILD;
+ inode = parent->d_inode;
+ nd->path.dentry = parent;
+ nd->seq = seq;
+ break;
+ }
+ if (!follow_up_rcu(&nd->path))
+ break;
+ nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+ inode = nd->path.dentry->d_inode;
+ }
+ __follow_mount_rcu(nd, &nd->path, &inode);
+ nd->inode = inode;
+
+ return 0;
+}
+
+static void follow_dotdot(struct nameidata *nd)
{
set_root(nd);
@@ -684,6 +1080,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
break;
}
follow_mount(&nd->path);
+ nd->inode = nd->path.dentry->d_inode;
}
/*
@@ -721,17 +1118,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent,
* It _is_ time-critical.
*/
static int do_lookup(struct nameidata *nd, struct qstr *name,
- struct path *path)
+ struct path *path, struct inode **inode)
{
struct vfsmount *mnt = nd->path.mnt;
- struct dentry *dentry, *parent;
+ struct dentry *dentry, *parent = nd->path.dentry;
struct inode *dir;
/*
* See if the low-level filesystem might want
* to use its own hash..
*/
- if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
- int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name);
+ if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
+ int err = parent->d_op->d_hash(parent, nd->inode, name);
if (err < 0)
return err;
}
@@ -741,21 +1138,49 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
* of a false negative due to a concurrent rename, we're going to
* do the non-racy lookup, below.
*/
- dentry = __d_lookup(nd->path.dentry, name);
- if (!dentry)
- goto need_lookup;
+ if (nd->flags & LOOKUP_RCU) {
+ unsigned seq;
+
+ *inode = nd->inode;
+ dentry = __d_lookup_rcu(parent, name, &seq, inode);
+ if (!dentry) {
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ goto need_lookup;
+ }
+ /* Memory barrier in read_seqcount_begin of child is enough */
+ if (__read_seqcount_retry(&parent->d_seq, nd->seq))
+ return -ECHILD;
+
+ nd->seq = seq;
+ if (dentry->d_flags & DCACHE_OP_REVALIDATE_EITHER) {
+ if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
+ if (nameidata_dentry_drop_rcu(nd, dentry))
+ return -ECHILD;
+ }
+ goto need_revalidate;
+ }
+ path->mnt = mnt;
+ path->dentry = dentry;
+ __follow_mount_rcu(nd, path, inode);
+ } else {
+ dentry = __d_lookup(parent, name);
+ if (!dentry)
+ goto need_lookup;
found:
- if (dentry->d_op && dentry->d_op->d_revalidate)
- goto need_revalidate;
+ if (dentry->d_flags & DCACHE_OP_REVALIDATE)
+ goto need_revalidate;
done:
- path->mnt = mnt;
- path->dentry = dentry;
- __follow_mount(path);
+ path->mnt = mnt;
+ path->dentry = dentry;
+ __follow_mount(path);
+ *inode = path->dentry->d_inode;
+ }
return 0;
need_lookup:
- parent = nd->path.dentry;
dir = parent->d_inode;
+ BUG_ON(nd->inode != dir);
mutex_lock(&dir->i_mutex);
/*
@@ -785,8 +1210,11 @@ need_lookup:
need_revalidate:
dentry = do_revalidate(dentry, nd);
- if (!dentry)
+ if (!dentry) {
+ if (nameidata_drop_rcu_maybe(nd))
+ return -ECHILD;
goto need_lookup;
+ }
if (IS_ERR(dentry))
goto fail;
goto done;
@@ -817,7 +1245,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
static int link_path_walk(const char *name, struct nameidata *nd)
{
struct path next;
- struct inode *inode;
int err;
unsigned int lookup_flags = nd->flags;
@@ -826,18 +1253,28 @@ static int link_path_walk(const char *name, struct nameidata *nd)
if (!*name)
goto return_reval;
- inode = nd->path.dentry->d_inode;
if (nd->depth)
lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
/* At this point we know we have a real path component. */
for(;;) {
+ struct inode *inode;
unsigned long hash;
struct qstr this;
unsigned int c;
nd->flags |= LOOKUP_CONTINUE;
- err = exec_permission(inode);
+ if (nd->flags & LOOKUP_RCU) {
+ err = exec_permission(nd->inode, IPERM_FLAG_RCU);
+ if (err == -ECHILD) {
+ if (nameidata_drop_rcu(nd))
+ return -ECHILD;
+ goto exec_again;
+ }
+ } else {
+exec_again:
+ err = exec_permission(nd->inode, 0);
+ }
if (err)
break;
@@ -868,37 +1305,44 @@ static int link_path_walk(const char *name, struct nameidata *nd)
if (this.name[0] == '.') switch (this.len) {
default:
break;
- case 2:
+ case 2:
if (this.name[1] != '.')
break;
- follow_dotdot(nd);
- inode = nd->path.dentry->d_inode;
+ if (nd->flags & LOOKUP_RCU) {
+ if (follow_dotdot_rcu(nd))
+ return -ECHILD;
+ } else
+ follow_dotdot(nd);
/* fallthrough */
case 1:
continue;
}
/* This does the actual lookups.. */
- err = do_lookup(nd, &this, &next);
+ err = do_lookup(nd, &this, &next, &inode);
if (err)
break;
-
err = -ENOENT;
- inode = next.dentry->d_inode;
if (!inode)
goto out_dput;
if (inode->i_op->follow_link) {
+ /* We commonly drop rcu-walk here */
+ if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
+ return -ECHILD;
+ BUG_ON(inode != next.dentry->d_inode);
err = do_follow_link(&next, nd);
if (err)
goto return_err;
+ nd->inode = nd->path.dentry->d_inode;
err = -ENOENT;
- inode = nd->path.dentry->d_inode;
- if (!inode)
+ if (!nd->inode)
break;
- } else
+ } else {
path_to_nameidata(&next, nd);
+ nd->inode = inode;
+ }
err = -ENOTDIR;
- if (!inode->i_op->lookup)
+ if (!nd->inode->i_op->lookup)
break;
continue;
/* here ends the main loop */
@@ -913,32 +1357,39 @@ last_component:
if (this.name[0] == '.') switch (this.len) {
default:
break;
- case 2:
+ case 2:
if (this.name[1] != '.')
break;
- follow_dotdot(nd);
- inode = nd->path.dentry->d_inode;
+ if (nd->flags & LOOKUP_RCU) {
+ if (follow_dotdot_rcu(nd))
+ return -ECHILD;
+ } else
+ follow_dotdot(nd);
/* fallthrough */
case 1:
goto return_reval;
}
- err = do_lookup(nd, &this, &next);
+ err = do_lookup(nd, &this, &next, &inode);
if (err)
break;
- inode = next.dentry->d_inode;
if (follow_on_final(inode, lookup_flags)) {
+ if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
+ return -ECHILD;
+ BUG_ON(inode != next.dentry->d_inode);
err = do_follow_link(&next, nd);
if (err)
goto return_err;
- inode = nd->path.dentry->d_inode;
- } else
+ nd->inode = nd->path.dentry->d_inode;
+ } else {
path_to_nameidata(&next, nd);
+ nd->inode = inode;
+ }
err = -ENOENT;
- if (!inode)
+ if (!nd->inode)
break;
if (lookup_flags & LOOKUP_DIRECTORY) {
err = -ENOTDIR;
- if (!inode->i_op->lookup)
+ if (!nd->inode->i_op->lookup)
break;
}
goto return_base;
@@ -958,25 +1409,43 @@ return_reval:
* We bypassed the ordinary revalidation routines.
* We may need to check the cached dentry for staleness.
*/
- if (nd->path.dentry && nd->path.dentry->d_sb &&
- (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
- err = -ESTALE;
+ if (need_reval_dot(nd->path.dentry)) {
/* Note: we do not d_invalidate() */
- if (!nd->path.dentry->d_op->d_revalidate(
- nd->path.dentry, nd))
+ err = d_revalidate(nd->path.dentry, nd);
+ if (!err)
+ err = -ESTALE;
+ if (err < 0)
break;
}
return_base:
+ if (nameidata_drop_rcu_last_maybe(nd))
+ return -ECHILD;
return 0;
out_dput:
- path_put_conditional(&next, nd);
+ if (!(nd->flags & LOOKUP_RCU))
+ path_put_conditional(&next, nd);
break;
}
- path_put(&nd->path);
+ if (!(nd->flags & LOOKUP_RCU))
+ path_put(&nd->path);
return_err:
return err;
}
+static inline int path_walk_rcu(const char *name, struct nameidata *nd)
+{
+ current->total_link_count = 0;
+
+ return link_path_walk(name, nd);
+}
+
+static inline int path_walk_simple(const char *name, struct nameidata *nd)
+{
+ current->total_link_count = 0;
+
+ return link_path_walk(name, nd);
+}
+
static int path_walk(const char *name, struct nameidata *nd)
{
struct path save = nd->path;
@@ -1002,6 +1471,93 @@ static int path_walk(const char *name, struct nameidata *nd)
return result;
}
+static void path_finish_rcu(struct nameidata *nd)
+{
+ if (nd->flags & LOOKUP_RCU) {
+ /* RCU dangling. Cancel it. */
+ nd->flags &= ~LOOKUP_RCU;
+ nd->root.mnt = NULL;
+ rcu_read_unlock();
+ br_read_unlock(vfsmount_lock);
+ }
+ if (nd->file)
+ fput(nd->file);
+}
+
+static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
+{
+ int retval = 0;
+ int fput_needed;
+ struct file *file;
+
+ nd->last_type = LAST_ROOT; /* if there are only slashes... */
+ nd->flags = flags | LOOKUP_RCU;
+ nd->depth = 0;
+ nd->root.mnt = NULL;
+ nd->file = NULL;
+
+ if (*name=='/') {
+ struct fs_struct *fs = current->fs;
+ unsigned seq;
+
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ nd->root = fs->root;
+ nd->path = nd->root;
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ } while (read_seqcount_retry(&fs->seq, seq));
+
+ } else if (dfd == AT_FDCWD) {
+ struct fs_struct *fs = current->fs;
+ unsigned seq;
+
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+
+ do {
+ seq = read_seqcount_begin(&fs->seq);
+ nd->path = fs->pwd;
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ } while (read_seqcount_retry(&fs->seq, seq));
+
+ } else {
+ struct dentry *dentry;
+
+ file = fget_light(dfd, &fput_needed);
+ retval = -EBADF;
+ if (!file)
+ goto out_fail;
+
+ dentry = file->f_path.dentry;
+
+ retval = -ENOTDIR;
+ if (!S_ISDIR(dentry->d_inode->i_mode))
+ goto fput_fail;
+
+ retval = file_permission(file, MAY_EXEC);
+ if (retval)
+ goto fput_fail;
+
+ nd->path = file->f_path;
+ if (fput_needed)
+ nd->file = file;
+
+ nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+ br_read_lock(vfsmount_lock);
+ rcu_read_lock();
+ }
+ nd->inode = nd->path.dentry->d_inode;
+ return 0;
+
+fput_fail:
+ fput_light(file, fput_needed);
+out_fail:
+ return retval;
+}
+
static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
{
int retval = 0;
@@ -1042,6 +1598,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei
fput_light(file, fput_needed);
}
+ nd->inode = nd->path.dentry->d_inode;
return 0;
fput_fail:
@@ -1054,16 +1611,53 @@ out_fail:
static int do_path_lookup(int dfd, const char *name,
unsigned int flags, struct nameidata *nd)
{
- int retval = path_init(dfd, name, flags, nd);
- if (!retval)
- retval = path_walk(name, nd);
- if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
- nd->path.dentry->d_inode))
- audit_inode(name, nd->path.dentry);
+ int retval;
+
+ /*
+ * Path walking is largely split up into 2 different synchronisation
+ * schemes, rcu-walk and ref-walk (explained in
+ * Documentation/filesystems/path-lookup.txt). These share much of the
+ * path walk code, but some things particularly setup, cleanup, and
+ * following mounts are sufficiently divergent that functions are
+ * duplicated. Typically there is a function foo(), and its RCU
+ * analogue, foo_rcu().
+ *
+ * -ECHILD is the error number of choice (just to avoid clashes) that
+ * is returned if some aspect of an rcu-walk fails. Such an error must
+ * be handled by restarting a traditional ref-walk (which will always
+ * be able to complete).
+ */
+ retval = path_init_rcu(dfd, name, flags, nd);
+ if (unlikely(retval))
+ return retval;
+ retval = path_walk_rcu(name, nd);
+ path_finish_rcu(nd);
if (nd->root.mnt) {
path_put(&nd->root);
nd->root.mnt = NULL;
}
+
+ if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
+ /* slower, locked walk */
+ if (retval == -ESTALE)
+ flags |= LOOKUP_REVAL;
+ retval = path_init(dfd, name, flags, nd);
+ if (unlikely(retval))
+ return retval;
+ retval = path_walk(name, nd);
+ if (nd->root.mnt) {
+ path_put(&nd->root);
+ nd->root.mnt = NULL;
+ }
+ }
+
+ if (likely(!retval)) {
+ if (unlikely(!audit_dummy_context())) {
+ if (nd->path.dentry && nd->inode)
+ audit_inode(name, nd->path.dentry);
+ }
+ }
+
return retval;
}
@@ -1106,10 +1700,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
path_get(&nd->path);
nd->root = nd->path;
path_get(&nd->root);
+ nd->inode = nd->path.dentry->d_inode;
retval = path_walk(name, nd);
if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
- nd->path.dentry->d_inode))
+ nd->inode))
audit_inode(name, nd->path.dentry);
path_put(&nd->root);
@@ -1125,7 +1720,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
struct dentry *dentry;
int err;
- err = exec_permission(inode);
+ err = exec_permission(inode, 0);
if (err)
return ERR_PTR(err);
@@ -1133,8 +1728,8 @@ static struct dentry *__lookup_hash(struct qstr *name,
* See if the low-level filesystem might want
* to use its own hash..
*/
- if (base->d_op && base->d_op->d_hash) {
- err = base->d_op->d_hash(base, name);
+ if (base->d_flags & DCACHE_OP_HASH) {
+ err = base->d_op->d_hash(base, inode, name);
dentry = ERR_PTR(err);
if (err < 0)
goto out;
@@ -1147,7 +1742,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
*/
dentry = d_lookup(base, name);
- if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
+ if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE_EITHER))
dentry = do_revalidate(dentry, nd);
if (!dentry)
@@ -1490,6 +2085,7 @@ out_unlock:
mutex_unlock(&dir->d_inode->i_mutex);
dput(nd->path.dentry);
nd->path.dentry = path->dentry;
+
if (error)
return error;
/* Don't check for write permission, don't truncate */
@@ -1584,6 +2180,9 @@ exit:
return ERR_PTR(error);
}
+/*
+ * Handle O_CREAT case for do_filp_open
+ */
static struct file *do_last(struct nameidata *nd, struct path *path,
int open_flag, int acc_mode,
int mode, const char *pathname)
@@ -1597,50 +2196,25 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
follow_dotdot(nd);
dir = nd->path.dentry;
case LAST_DOT:
- if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {
- if (!dir->d_op->d_revalidate(dir, nd)) {
+ if (need_reval_dot(dir)) {
+ error = d_revalidate(nd->path.dentry, nd);
+ if (!error)
error = -ESTALE;
+ if (error < 0)
goto exit;
- }
}
/* fallthrough */
case LAST_ROOT:
- if (open_flag & O_CREAT)
- goto exit;
- /* fallthrough */
+ goto exit;
case LAST_BIND:
audit_inode(pathname, dir);
goto ok;
}
/* trailing slashes? */
- if (nd->last.name[nd->last.len]) {
- if (open_flag & O_CREAT)
- goto exit;
- nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
- }
-
- /* just plain open? */
- if (!(open_flag & O_CREAT)) {
- error = do_lookup(nd, &nd->last, path);
- if (error)
- goto exit;
- error = -ENOENT;
- if (!path->dentry->d_inode)
- goto exit_dput;
- if (path->dentry->d_inode->i_op->follow_link)
- return NULL;
- error = -ENOTDIR;
- if (nd->flags & LOOKUP_DIRECTORY) {
- if (!path->dentry->d_inode->i_op->lookup)
- goto exit_dput;
- }
- path_to_nameidata(path, nd);
- audit_inode(pathname, nd->path.dentry);
- goto ok;
- }
+ if (nd->last.name[nd->last.len])
+ goto exit;
- /* OK, it's O_CREAT */
mutex_lock(&dir->d_inode->i_mutex);
path->dentry = lookup_hash(nd);
@@ -1711,8 +2285,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
return NULL;
path_to_nameidata(path, nd);
+ nd->inode = path->dentry->d_inode;
error = -EISDIR;
- if (S_ISDIR(path->dentry->d_inode->i_mode))
+ if (S_ISDIR(nd->inode->i_mode))
goto exit;
ok:
filp = finish_open(nd, open_flag, acc_mode);
@@ -1743,7 +2318,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
struct path path;
int count = 0;
int flag = open_to_namei_flags(open_flag);
- int force_reval = 0;
+ int flags;
if (!(open_flag & O_CREAT))
mode = 0;
@@ -1772,54 +2347,84 @@ struct file *do_filp_open(int dfd, const char *pathname,
if (open_flag & O_APPEND)
acc_mode |= MAY_APPEND;
- /* find the parent */
-reval:
- error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
+ flags = LOOKUP_OPEN;
+ if (open_flag & O_CREAT) {
+ flags |= LOOKUP_CREATE;
+ if (open_flag & O_EXCL)
+ flags |= LOOKUP_EXCL;
+ }
+ if (open_flag & O_DIRECTORY)
+ flags |= LOOKUP_DIRECTORY;
+ if (!(open_flag & O_NOFOLLOW))
+ flags |= LOOKUP_FOLLOW;
+
+ filp = get_empty_filp();
+ if (!filp)
+ return ERR_PTR(-ENFILE);
+
+ filp->f_flags = open_flag;
+ nd.intent.open.file = filp;
+ nd.intent.open.flags = flag;
+ nd.intent.open.create_mode = mode;
+
+ if (open_flag & O_CREAT)
+ goto creat;
+
+ /* !O_CREAT, simple open */
+ error = do_path_lookup(dfd, pathname, flags, &nd);
+ if (unlikely(error))
+ goto out_filp;
+ error = -ELOOP;
+ if (!(nd.flags & LOOKUP_FOLLOW)) {
+ if (nd.inode->i_op->follow_link)
+ goto out_path;
+ }
+ error = -ENOTDIR;
+ if (nd.flags & LOOKUP_DIRECTORY) {
+ if (!nd.inode->i_op->lookup)
+ goto out_path;
+ }
+ audit_inode(pathname, nd.path.dentry);
+ filp = finish_open(&nd, open_flag, acc_mode);
+ return filp;
+
+creat:
+ /* OK, have to create the file. Find the parent. */
+ error = path_init_rcu(dfd, pathname,
+ LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
if (error)
- return ERR_PTR(error);
- if (force_reval)
- nd.flags |= LOOKUP_REVAL;
+ goto out_filp;
+ error = path_walk_rcu(pathname, &nd);
+ path_finish_rcu(&nd);
+ if (unlikely(error == -ECHILD || error == -ESTALE)) {
+ /* slower, locked walk */
+ if (error == -ESTALE) {
+reval:
+ flags |= LOOKUP_REVAL;
+ }
+ error = path_init(dfd, pathname,
+ LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
+ if (error)
+ goto out_filp;
- current->total_link_count = 0;
- error = link_path_walk(pathname, &nd);
- if (error) {
- filp = ERR_PTR(error);
- goto out;
+ error = path_walk_simple(pathname, &nd);
}
- if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT))
+ if (unlikely(error))
+ goto out_filp;
+ if (unlikely(!audit_dummy_context()))
audit_inode(pathname, nd.path.dentry);
/*
* We have the parent and last component.
*/
-
- error = -ENFILE;
- filp = get_empty_filp();
- if (filp == NULL)
- goto exit_parent;
- nd.intent.open.file = filp;
- filp->f_flags = open_flag;
- nd.intent.open.flags = flag;
- nd.intent.open.create_mode = mode;
- nd.flags &= ~LOOKUP_PARENT;
- nd.flags |= LOOKUP_OPEN;
- if (open_flag & O_CREAT) {
- nd.flags |= LOOKUP_CREATE;
- if (open_flag & O_EXCL)
- nd.flags |= LOOKUP_EXCL;
- }
- if (open_flag & O_DIRECTORY)
- nd.flags |= LOOKUP_DIRECTORY;
- if (!(open_flag & O_NOFOLLOW))
- nd.flags |= LOOKUP_FOLLOW;
+ nd.flags = flags;
filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
while (unlikely(!filp)) { /* trailing symlink */
struct path holder;
- struct inode *inode = path.dentry->d_inode;
void *cookie;
error = -ELOOP;
/* S_ISDIR part is a temporary automount kludge */
- if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))
+ if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode))
goto exit_dput;
if (count++ == 32)
goto exit_dput;
@@ -1840,36 +2445,33 @@ reval:
goto exit_dput;
error = __do_follow_link(&path, &nd, &cookie);
if (unlikely(error)) {
+ if (!IS_ERR(cookie) && nd.inode->i_op->put_link)
+ nd.inode->i_op->put_link(path.dentry, &nd, cookie);
/* nd.path had been dropped */
- if (!IS_ERR(cookie) && inode->i_op->put_link)
- inode->i_op->put_link(path.dentry, &nd, cookie);
- path_put(&path);
- release_open_intent(&nd);
- filp = ERR_PTR(error);
- goto out;
+ nd.path = path;
+ goto out_path;
}
holder = path;
nd.flags &= ~LOOKUP_PARENT;
filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
- if (inode->i_op->put_link)
- inode->i_op->put_link(holder.dentry, &nd, cookie);
+ if (nd.inode->i_op->put_link)
+ nd.inode->i_op->put_link(holder.dentry, &nd, cookie);
path_put(&holder);
}
out:
if (nd.root.mnt)
path_put(&nd.root);
- if (filp == ERR_PTR(-ESTALE) && !force_reval) {
- force_reval = 1;
+ if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
goto reval;
- }
return filp;
exit_dput:
path_put_conditional(&path, &nd);
+out_path:
+ path_put(&nd.path);
+out_filp:
if (!IS_ERR(nd.intent.open.file))
release_open_intent(&nd);
-exit_parent:
- path_put(&nd.path);
filp = ERR_PTR(error);
goto out;
}
@@ -2130,12 +2732,10 @@ void dentry_unhash(struct dentry *dentry)
{
dget(dentry);
shrink_dcache_parent(dentry);
- spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count) == 2)
+ if (dentry->d_count == 2)
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
}
int vfs_rmdir(struct inode *dir, struct dentry *dentry)
@@ -2907,6 +3507,7 @@ EXPORT_SYMBOL(vfs_follow_link);
EXPORT_SYMBOL(vfs_link);
EXPORT_SYMBOL(vfs_mkdir);
EXPORT_SYMBOL(vfs_mknod);
+EXPORT_SYMBOL(generic_permission_rcu);
EXPORT_SYMBOL(generic_permission);
EXPORT_SYMBOL(vfs_readlink);
EXPORT_SYMBOL(vfs_rename);
diff --git a/fs/namespace.c b/fs/namespace.c
index 3dbfc072ec70..744fcf675e74 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -138,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt)
mnt->mnt_group_id = 0;
}
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void add_mnt_count(struct vfsmount *mnt, int n)
+{
+#ifdef CONFIG_SMP
+ this_cpu_add(mnt->mnt_pcp->mnt_count, n);
+#else
+ preempt_disable();
+ mnt->mnt_count += n;
+ preempt_enable();
+#endif
+}
+
+static inline void set_mnt_count(struct vfsmount *mnt, int n)
+{
+#ifdef CONFIG_SMP
+ this_cpu_write(mnt->mnt_pcp->mnt_count, n);
+#else
+ mnt->mnt_count = n;
+#endif
+}
+
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void inc_mnt_count(struct vfsmount *mnt)
+{
+ add_mnt_count(mnt, 1);
+}
+
+/*
+ * vfsmount lock must be held for read
+ */
+static inline void dec_mnt_count(struct vfsmount *mnt)
+{
+ add_mnt_count(mnt, -1);
+}
+
+/*
+ * vfsmount lock must be held for write
+ */
+unsigned int count_mnt_count(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ unsigned int count = atomic_read(&mnt->mnt_longrefs);
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
+ }
+
+ return count;
+#else
+ return mnt->mnt_count;
+#endif
+}
+
struct vfsmount *alloc_vfsmnt(const char *name)
{
struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -154,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name)
goto out_free_id;
}
- atomic_set(&mnt->mnt_count, 1);
+#ifdef CONFIG_SMP
+ mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
+ if (!mnt->mnt_pcp)
+ goto out_free_devname;
+
+ atomic_set(&mnt->mnt_longrefs, 1);
+#else
+ mnt->mnt_count = 1;
+ mnt->mnt_writers = 0;
+#endif
+
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -166,13 +234,6 @@ struct vfsmount *alloc_vfsmnt(const char *name)
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
#endif
-#ifdef CONFIG_SMP
- mnt->mnt_writers = alloc_percpu(int);
- if (!mnt->mnt_writers)
- goto out_free_devname;
-#else
- mnt->mnt_writers = 0;
-#endif
}
return mnt;
@@ -219,7 +280,7 @@ EXPORT_SYMBOL_GPL(__mnt_is_readonly);
static inline void inc_mnt_writers(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
- (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
+ this_cpu_inc(mnt->mnt_pcp->mnt_writers);
#else
mnt->mnt_writers++;
#endif
@@ -228,7 +289,7 @@ static inline void inc_mnt_writers(struct vfsmount *mnt)
static inline void dec_mnt_writers(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
- (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
+ this_cpu_dec(mnt->mnt_pcp->mnt_writers);
#else
mnt->mnt_writers--;
#endif
@@ -241,7 +302,7 @@ static unsigned int count_mnt_writers(struct vfsmount *mnt)
int cpu;
for_each_possible_cpu(cpu) {
- count += *per_cpu_ptr(mnt->mnt_writers, cpu);
+ count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
}
return count;
@@ -418,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt)
kfree(mnt->mnt_devname);
mnt_free_id(mnt);
#ifdef CONFIG_SMP
- free_percpu(mnt->mnt_writers);
+ free_percpu(mnt->mnt_pcp);
#endif
kmem_cache_free(mnt_cache, mnt);
}
@@ -492,6 +553,27 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
}
/*
+ * Clear dentry's mounted state if it has no remaining mounts.
+ * vfsmount_lock must be held for write.
+ */
+static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry)
+{
+ unsigned u;
+
+ for (u = 0; u < HASH_SIZE; u++) {
+ struct vfsmount *p;
+
+ list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
+ if (p->mnt_mountpoint == dentry)
+ return;
+ }
+ }
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_MOUNTED;
+ spin_unlock(&dentry->d_lock);
+}
+
+/*
* vfsmount lock must be held for write
*/
static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
@@ -502,7 +584,7 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
mnt->mnt_mountpoint = mnt->mnt_root;
list_del_init(&mnt->mnt_child);
list_del_init(&mnt->mnt_hash);
- old_path->dentry->d_mounted--;
+ dentry_reset_mounted(old_path->mnt, old_path->dentry);
}
/*
@@ -513,7 +595,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
{
child_mnt->mnt_parent = mntget(mnt);
child_mnt->mnt_mountpoint = dget(dentry);
- dentry->d_mounted++;
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags |= DCACHE_MOUNTED;
+ spin_unlock(&dentry->d_lock);
}
/*
@@ -629,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
return NULL;
}
-static inline void __mntput(struct vfsmount *mnt)
+static inline void mntfree(struct vfsmount *mnt)
{
struct super_block *sb = mnt->mnt_sb;
+
/*
* This probably indicates that somebody messed
* up a mnt_want/drop_write() pair. If this
@@ -639,8 +724,8 @@ static inline void __mntput(struct vfsmount *mnt)
* to make r/w->r/o transitions.
*/
/*
- * atomic_dec_and_lock() used to deal with ->mnt_count decrements
- * provides barriers, so count_mnt_writers() below is safe. AV
+ * The locking used to deal with mnt_count decrement provides barriers,
+ * so count_mnt_writers() below is safe.
*/
WARN_ON(count_mnt_writers(mnt));
fsnotify_vfsmount_delete(mnt);
@@ -649,28 +734,113 @@ static inline void __mntput(struct vfsmount *mnt)
deactivate_super(sb);
}
-void mntput_no_expire(struct vfsmount *mnt)
-{
-repeat:
- if (atomic_add_unless(&mnt->mnt_count, -1, 1))
- return;
+#ifdef CONFIG_SMP
+static inline void __mntput(struct vfsmount *mnt, int longrefs)
+{
+ if (!longrefs) {
+put_again:
+ br_read_lock(vfsmount_lock);
+ if (likely(atomic_read(&mnt->mnt_longrefs))) {
+ dec_mnt_count(mnt);
+ br_read_unlock(vfsmount_lock);
+ return;
+ }
+ br_read_unlock(vfsmount_lock);
+ } else {
+ BUG_ON(!atomic_read(&mnt->mnt_longrefs));
+ if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
+ return;
+ }
+
br_write_lock(vfsmount_lock);
- if (!atomic_dec_and_test(&mnt->mnt_count)) {
+ if (!longrefs)
+ dec_mnt_count(mnt);
+ else
+ atomic_dec(&mnt->mnt_longrefs);
+ if (count_mnt_count(mnt)) {
br_write_unlock(vfsmount_lock);
return;
}
- if (likely(!mnt->mnt_pinned)) {
+ if (unlikely(mnt->mnt_pinned)) {
+ add_mnt_count(mnt, mnt->mnt_pinned + 1);
+ mnt->mnt_pinned = 0;
br_write_unlock(vfsmount_lock);
- __mntput(mnt);
+ acct_auto_close_mnt(mnt);
+ goto put_again;
+ }
+ br_write_unlock(vfsmount_lock);
+ mntfree(mnt);
+}
+#else
+static inline void __mntput(struct vfsmount *mnt, int longrefs)
+{
+put_again:
+ dec_mnt_count(mnt);
+ if (likely(count_mnt_count(mnt)))
return;
+ br_write_lock(vfsmount_lock);
+ if (unlikely(mnt->mnt_pinned)) {
+ add_mnt_count(mnt, mnt->mnt_pinned + 1);
+ mnt->mnt_pinned = 0;
+ br_write_unlock(vfsmount_lock);
+ acct_auto_close_mnt(mnt);
+ goto put_again;
}
- atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
- mnt->mnt_pinned = 0;
br_write_unlock(vfsmount_lock);
- acct_auto_close_mnt(mnt);
- goto repeat;
+ mntfree(mnt);
+}
+#endif
+
+static void mntput_no_expire(struct vfsmount *mnt)
+{
+ __mntput(mnt, 0);
+}
+
+void mntput(struct vfsmount *mnt)
+{
+ if (mnt) {
+ /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
+ if (unlikely(mnt->mnt_expiry_mark))
+ mnt->mnt_expiry_mark = 0;
+ __mntput(mnt, 0);
+ }
+}
+EXPORT_SYMBOL(mntput);
+
+struct vfsmount *mntget(struct vfsmount *mnt)
+{
+ if (mnt)
+ inc_mnt_count(mnt);
+ return mnt;
}
-EXPORT_SYMBOL(mntput_no_expire);
+EXPORT_SYMBOL(mntget);
+
+void mntput_long(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ if (mnt) {
+ /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
+ if (unlikely(mnt->mnt_expiry_mark))
+ mnt->mnt_expiry_mark = 0;
+ __mntput(mnt, 1);
+ }
+#else
+ mntput(mnt);
+#endif
+}
+EXPORT_SYMBOL(mntput_long);
+
+struct vfsmount *mntget_long(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ if (mnt)
+ atomic_inc(&mnt->mnt_longrefs);
+ return mnt;
+#else
+ return mntget(mnt);
+#endif
+}
+EXPORT_SYMBOL(mntget_long);
void mnt_pin(struct vfsmount *mnt)
{
@@ -678,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt)
mnt->mnt_pinned++;
br_write_unlock(vfsmount_lock);
}
-
EXPORT_SYMBOL(mnt_pin);
void mnt_unpin(struct vfsmount *mnt)
{
br_write_lock(vfsmount_lock);
if (mnt->mnt_pinned) {
- atomic_inc(&mnt->mnt_count);
+ inc_mnt_count(mnt);
mnt->mnt_pinned--;
}
br_write_unlock(vfsmount_lock);
}
-
EXPORT_SYMBOL(mnt_unpin);
static inline void mangle(struct seq_file *m, const char *s)
@@ -985,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt)
int minimum_refs = 0;
struct vfsmount *p;
- br_read_lock(vfsmount_lock);
+ /* write lock needed for count_mnt_count */
+ br_write_lock(vfsmount_lock);
for (p = mnt; p; p = next_mnt(p, mnt)) {
- actual_refs += atomic_read(&p->mnt_count);
+ actual_refs += count_mnt_count(p);
minimum_refs += 2;
}
- br_read_unlock(vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
if (actual_refs > minimum_refs)
return 0;
@@ -1017,10 +1186,10 @@ int may_umount(struct vfsmount *mnt)
{
int ret = 1;
down_read(&namespace_sem);
- br_read_lock(vfsmount_lock);
+ br_write_lock(vfsmount_lock);
if (propagate_mount_busy(mnt, 2))
ret = 0;
- br_read_unlock(vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
up_read(&namespace_sem);
return ret;
}
@@ -1047,7 +1216,7 @@ void release_mounts(struct list_head *head)
dput(dentry);
mntput(m);
}
- mntput(mnt);
+ mntput_long(mnt);
}
}
@@ -1073,7 +1242,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
list_del_init(&p->mnt_child);
if (p->mnt_parent != p) {
p->mnt_parent->mnt_ghosts++;
- p->mnt_mountpoint->d_mounted--;
+ dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint);
}
change_mnt_propagation(p, MS_PRIVATE);
}
@@ -1102,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags)
flags & (MNT_FORCE | MNT_DETACH))
return -EINVAL;
- if (atomic_read(&mnt->mnt_count) != 2)
+ /*
+ * probably don't strictly need the lock here if we examined
+ * all race cases, but it's a slowpath.
+ */
+ br_write_lock(vfsmount_lock);
+ if (count_mnt_count(mnt) != 2) {
+ br_write_lock(vfsmount_lock);
return -EBUSY;
+ }
+ br_write_unlock(vfsmount_lock);
if (!xchg(&mnt->mnt_expiry_mark, 1))
return -EAGAIN;
@@ -1792,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
unlock:
up_write(&namespace_sem);
- mntput(newmnt);
+ mntput_long(newmnt);
return err;
}
@@ -2125,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
if (fs) {
if (p == fs->root.mnt) {
rootmnt = p;
- fs->root.mnt = mntget(q);
+ fs->root.mnt = mntget_long(q);
}
if (p == fs->pwd.mnt) {
pwdmnt = p;
- fs->pwd.mnt = mntget(q);
+ fs->pwd.mnt = mntget_long(q);
}
}
p = next_mnt(p, mnt_ns->root);
@@ -2138,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
up_write(&namespace_sem);
if (rootmnt)
- mntput(rootmnt);
+ mntput_long(rootmnt);
if (pwdmnt)
- mntput(pwdmnt);
+ mntput_long(pwdmnt);
return new_ns;
}
@@ -2327,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
touch_mnt_namespace(current->nsproxy->mnt_ns);
br_write_unlock(vfsmount_lock);
chroot_fs_refs(&root, &new);
+
error = 0;
path_put(&root_parent);
path_put(&parent_path);
@@ -2353,6 +2531,7 @@ static void __init init_mount_tree(void)
mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
if (IS_ERR(mnt))
panic("Can't create rootfs");
+
ns = create_mnt_ns(mnt);
if (IS_ERR(ns))
panic("Can't allocate initial namespace");
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index f22b12e7d337..4b9cbb28d7fa 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -74,9 +74,12 @@ const struct inode_operations ncp_dir_inode_operations =
* Dentry operations routines
*/
static int ncp_lookup_validate(struct dentry *, struct nameidata *);
-static int ncp_hash_dentry(struct dentry *, struct qstr *);
-static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *);
-static int ncp_delete_dentry(struct dentry *);
+static int ncp_hash_dentry(const struct dentry *, const struct inode *,
+ struct qstr *);
+static int ncp_compare_dentry(const struct dentry *, const struct inode *,
+ const struct dentry *, const struct inode *,
+ unsigned int, const char *, const struct qstr *);
+static int ncp_delete_dentry(const struct dentry *);
static const struct dentry_operations ncp_dentry_operations =
{
@@ -113,10 +116,10 @@ static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator)
#define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS)
-static inline int ncp_case_sensitive(struct dentry *dentry)
+static inline int ncp_case_sensitive(const struct inode *i)
{
#ifdef CONFIG_NCPFS_NFS_NS
- return ncp_namespace(dentry->d_inode) == NW_NS_NFS;
+ return ncp_namespace(i) == NW_NS_NFS;
#else
return 0;
#endif /* CONFIG_NCPFS_NFS_NS */
@@ -127,14 +130,16 @@ static inline int ncp_case_sensitive(struct dentry *dentry)
* is case-sensitive.
*/
static int
-ncp_hash_dentry(struct dentry *dentry, struct qstr *this)
+ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *this)
{
- if (!ncp_case_sensitive(dentry)) {
+ if (!ncp_case_sensitive(inode)) {
+ struct super_block *sb = dentry->d_sb;
struct nls_table *t;
unsigned long hash;
int i;
- t = NCP_IO_TABLE(dentry);
+ t = NCP_IO_TABLE(sb);
hash = init_name_hash();
for (i=0; i<this->len ; i++)
hash = partial_name_hash(ncp_tolower(t, this->name[i]),
@@ -145,15 +150,17 @@ ncp_hash_dentry(struct dentry *dentry, struct qstr *this)
}
static int
-ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
+ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- if (a->len != b->len)
+ if (len != name->len)
return 1;
- if (ncp_case_sensitive(dentry))
- return strncmp(a->name, b->name, a->len);
+ if (ncp_case_sensitive(pinode))
+ return strncmp(str, name->name, len);
- return ncp_strnicmp(NCP_IO_TABLE(dentry), a->name, b->name, a->len);
+ return ncp_strnicmp(NCP_IO_TABLE(pinode->i_sb), str, name->name, len);
}
/*
@@ -162,7 +169,7 @@ ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
* Closing files can be safely postponed until iput() - it's done there anyway.
*/
static int
-ncp_delete_dentry(struct dentry * dentry)
+ncp_delete_dentry(const struct dentry * dentry)
{
struct inode *inode = dentry->d_inode;
@@ -384,21 +391,21 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
}
/* If a pointer is invalid, we search the dentry. */
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
dent = list_entry(next, struct dentry, d_u.d_child);
if ((unsigned long)dent->d_fsdata == fpos) {
if (dent->d_inode)
- dget_locked(dent);
+ dget(dent);
else
dent = NULL;
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
goto out;
}
next = next->next;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
return NULL;
out:
@@ -592,7 +599,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
qname.hash = full_name_hash(qname.name, qname.len);
if (dentry->d_op && dentry->d_op->d_hash)
- if (dentry->d_op->d_hash(dentry, &qname) != 0)
+ if (dentry->d_op->d_hash(dentry, dentry->d_inode, &qname) != 0)
goto end_advance;
newdent = d_lookup(dentry, &qname);
@@ -611,35 +618,12 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
shrink_dcache_parent(newdent);
/*
- * It is not as dangerous as it looks. NetWare's OS2 namespace is
- * case preserving yet case insensitive. So we update dentry's name
- * as received from server. We found dentry via d_lookup with our
- * hash, so we know that hash does not change, and so replacing name
- * should be reasonably safe.
+ * NetWare's OS2 namespace is case preserving yet case
+ * insensitive. So we update dentry's name as received from
+ * server. Parent dir's i_mutex is locked because we're in
+ * readdir.
*/
- if (qname.len == newdent->d_name.len &&
- memcmp(newdent->d_name.name, qname.name, newdent->d_name.len)) {
- struct inode *inode = newdent->d_inode;
-
- /*
- * Inside ncpfs all uses of d_name are either for debugging,
- * or on functions which acquire inode mutex (mknod, creat,
- * lookup). So grab i_mutex here, to be sure. d_path
- * uses dcache_lock when generating path, so we should too.
- * And finally d_compare is protected by dentry's d_lock, so
- * here we go.
- */
- if (inode)
- mutex_lock(&inode->i_mutex);
- spin_lock(&dcache_lock);
- spin_lock(&newdent->d_lock);
- memcpy((char *) newdent->d_name.name, qname.name,
- newdent->d_name.len);
- spin_unlock(&newdent->d_lock);
- spin_unlock(&dcache_lock);
- if (inode)
- mutex_unlock(&inode->i_mutex);
- }
+ dentry_update_name_case(newdent, &qname);
}
if (!newdent->d_inode) {
@@ -649,7 +633,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
entry->ino = iunique(dir->i_sb, 2);
inode = ncp_iget(dir->i_sb, entry);
if (inode) {
- newdent->d_op = &ncp_dentry_operations;
+ d_set_d_op(newdent, &ncp_dentry_operations);
d_instantiate(newdent, inode);
if (!hashed)
d_rehash(newdent);
@@ -657,7 +641,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
} else {
struct inode *inode = newdent->d_inode;
- mutex_lock(&inode->i_mutex);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
ncp_update_inode2(inode, entry);
mutex_unlock(&inode->i_mutex);
}
@@ -905,7 +889,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc
if (inode) {
ncp_new_dentry(dentry);
add_entry:
- dentry->d_op = &ncp_dentry_operations;
+ d_set_d_op(dentry, &ncp_dentry_operations);
d_add(dentry, inode);
error = 0;
}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 8fb93b604e73..0c75a5f3cafd 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -58,11 +58,18 @@ static struct inode *ncp_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void ncp_destroy_inode(struct inode *inode)
+static void ncp_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
}
+static void ncp_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, ncp_i_callback);
+}
+
static void init_once(void *foo)
{
struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
@@ -710,7 +717,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
sb->s_root = d_alloc_root(root_inode);
if (!sb->s_root)
goto out_no_root;
- sb->s_root->d_op = &ncp_root_dentry_operations;
+ d_set_d_op(sb->s_root, &ncp_root_dentry_operations);
return 0;
out_no_root:
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 3c57eca634ce..1220df75ff22 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -135,7 +135,7 @@ int ncp__vol2io(struct ncp_server *, unsigned char *, unsigned int *,
const unsigned char *, unsigned int, int);
#define NCP_ESC ':'
-#define NCP_IO_TABLE(dentry) (NCP_SERVER((dentry)->d_inode)->nls_io)
+#define NCP_IO_TABLE(sb) (NCP_SBP(sb)->nls_io)
#define ncp_tolower(t, c) nls_tolower(t, c)
#define ncp_toupper(t, c) nls_toupper(t, c)
#define ncp_strnicmp(t, s1, s2, len) \
@@ -150,15 +150,15 @@ int ncp__io2vol(unsigned char *, unsigned int *,
int ncp__vol2io(unsigned char *, unsigned int *,
const unsigned char *, unsigned int, int);
-#define NCP_IO_TABLE(dentry) NULL
+#define NCP_IO_TABLE(sb) NULL
#define ncp_tolower(t, c) tolower(c)
#define ncp_toupper(t, c) toupper(c)
#define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U)
#define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U)
-static inline int ncp_strnicmp(struct nls_table *t, const unsigned char *s1,
- const unsigned char *s2, int len)
+static inline int ncp_strnicmp(const struct nls_table *t,
+ const unsigned char *s1, const unsigned char *s2, int len)
{
while (len--) {
if (tolower(*s1++) != tolower(*s2++))
@@ -193,7 +193,7 @@ ncp_renew_dentries(struct dentry *parent)
struct list_head *next;
struct dentry *dentry;
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -205,7 +205,7 @@ ncp_renew_dentries(struct dentry *parent)
next = next->next;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
}
static inline void
@@ -215,7 +215,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
struct list_head *next;
struct dentry *dentry;
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -223,7 +223,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
ncp_age_dentry(server, dentry);
next = next->next;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
}
struct ncp_cache_head {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3e2123fe79f5..4a97375d6bcf 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -438,7 +438,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
if (dentry == NULL)
return;
- dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+ d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
if (IS_ERR(inode))
goto out;
@@ -1118,7 +1118,7 @@ out_error:
/*
* This is called from dput() when d_count is going to 0.
*/
-static int nfs_dentry_delete(struct dentry *dentry)
+static int nfs_dentry_delete(const struct dentry *dentry)
{
dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -1189,7 +1189,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
goto out;
- dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+ d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
/*
* If we're doing an exclusive create, optimize away the lookup
@@ -1334,7 +1334,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
res = ERR_PTR(-ENAMETOOLONG);
goto out;
}
- dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+ d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
/* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
* the dentry. */
@@ -1719,11 +1719,9 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
dir->i_ino, dentry->d_name.name);
- spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
- if (atomic_read(&dentry->d_count) > 1) {
+ if (dentry->d_count > 1) {
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
/* Start asynchronous writeout of the inode */
write_inode_now(dentry->d_inode, 0);
error = nfs_sillyrename(dir, dentry);
@@ -1734,7 +1732,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
need_rehash = 1;
}
spin_unlock(&dentry->d_lock);
- spin_unlock(&dcache_lock);
error = nfs_safe_remove(dentry);
if (!error || error == -ENOENT) {
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -1869,7 +1866,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
- atomic_read(&new_dentry->d_count));
+ new_dentry->d_count);
/*
* For non-directories, check whether the target is busy and if so,
@@ -1887,7 +1884,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
rehash = new_dentry;
}
- if (atomic_read(&new_dentry->d_count) > 2) {
+ if (new_dentry->d_count > 2) {
int err;
/* copy the target dentry's name */
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index ac7b814ce162..5596c6a2881e 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -63,9 +63,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
* This again causes shrink_dcache_for_umount_subtree() to
* Oops, since the test for IS_ROOT() will fail.
*/
- spin_lock(&dcache_lock);
+ spin_lock(&sb->s_root->d_inode->i_lock);
+ spin_lock(&sb->s_root->d_lock);
list_del_init(&sb->s_root->d_alias);
- spin_unlock(&dcache_lock);
+ spin_unlock(&sb->s_root->d_lock);
+ spin_unlock(&sb->s_root->d_inode->i_lock);
}
return 0;
}
@@ -119,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
security_d_instantiate(ret, inode);
if (ret->d_op == NULL)
- ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
+ d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
out:
nfs_free_fattr(fsinfo.fattr);
return ret;
@@ -226,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
security_d_instantiate(ret, inode);
if (ret->d_op == NULL)
- ret->d_op = server->nfs_client->rpc_ops->dentry_ops;
+ d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
out:
nfs_free_fattr(fattr);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e67e31c73416..017daa3bed38 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1438,11 +1438,18 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
return &nfsi->vfs_inode;
}
-void nfs_destroy_inode(struct inode *inode)
+static void nfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
}
+void nfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, nfs_i_callback);
+}
+
static inline void nfs4_init_once(struct nfs_inode *nfsi)
{
#ifdef CONFIG_NFS_V4
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index db6aa3673cf3..74aaf3963c10 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -49,12 +49,17 @@ char *nfs_path(const char *base,
const struct dentry *dentry,
char *buffer, ssize_t buflen)
{
- char *end = buffer+buflen;
+ char *end;
int namelen;
+ unsigned seq;
+rename_retry:
+ end = buffer+buflen;
*--end = '\0';
buflen--;
- spin_lock(&dcache_lock);
+
+ seq = read_seqbegin(&rename_lock);
+ rcu_read_lock();
while (!IS_ROOT(dentry) && dentry != droot) {
namelen = dentry->d_name.len;
buflen -= namelen + 1;
@@ -65,7 +70,9 @@ char *nfs_path(const char *base,
*--end = '/';
dentry = dentry->d_parent;
}
- spin_unlock(&dcache_lock);
+ rcu_read_unlock();
+ if (read_seqretry(&rename_lock, seq))
+ goto rename_retry;
if (*end != '/') {
if (--buflen < 0)
goto Elong;
@@ -82,7 +89,9 @@ char *nfs_path(const char *base,
memcpy(end, base, namelen);
return end;
Elong_unlock:
- spin_unlock(&dcache_lock);
+ rcu_read_unlock();
+ if (read_seqretry(&rename_lock, seq))
+ goto rename_retry;
Elong:
return ERR_PTR(-ENAMETOOLONG);
}
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 3bf1e53c4a3f..e313a51acdd1 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -496,7 +496,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
- atomic_read(&dentry->d_count));
+ dentry->d_count);
nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 184938fcff04..3a359023c9f7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1756,8 +1756,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
goto out_dput_new;
if (svc_msnfs(ffhp) &&
- ((atomic_read(&odentry->d_count) > 1)
- || (atomic_read(&ndentry->d_count) > 1))) {
+ ((odentry->d_count > 1) || (ndentry->d_count > 1))) {
host_err = -EPERM;
goto out_dput_new;
}
@@ -1843,7 +1842,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (type != S_IFDIR) { /* It's UNLINK */
#ifdef MSNFS
if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
- (atomic_read(&rdentry->d_count) > 1)) {
+ (rdentry->d_count > 1)) {
host_err = -EPERM;
} else
#endif
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 69f3144b327b..6ea32d9b1b9d 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -171,10 +171,13 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
return &ii->vfs_inode;
}
-void nilfs_destroy_inode(struct inode *inode)
+static void nilfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
+
if (mdi) {
kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
kfree(mdi);
@@ -182,6 +185,11 @@ void nilfs_destroy_inode(struct inode *inode)
kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
}
+void nilfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, nilfs_i_callback);
+}
+
static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
{
struct the_nilfs *nilfs = sbi->s_nilfs;
@@ -847,7 +855,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
static int nilfs_tree_was_touched(struct dentry *root_dentry)
{
- return atomic_read(&root_dentry->d_count) > 1;
+ return root_dentry->d_count > 1;
}
/**
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 20dc218707ca..79b47cbb5cd8 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -59,7 +59,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
/* determine if the children should tell inode about their events */
watched = fsnotify_inode_watches_children(inode);
- spin_lock(&dcache_lock);
+ spin_lock(&inode->i_lock);
/* run all of the dentries associated with this inode. Since this is a
* directory, there damn well better only be one item on this list */
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
@@ -68,19 +68,21 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
/* run all of the children of the original inode and fix their
* d_flags to indicate parental interest (their parent is the
* original inode) */
+ spin_lock(&alias->d_lock);
list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
if (!child->d_inode)
continue;
- spin_lock(&child->d_lock);
+ spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
if (watched)
child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
else
child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
spin_unlock(&child->d_lock);
}
+ spin_unlock(&alias->d_lock);
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
}
/* Notify this dentry's parent about a child's events. */
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 93622b175fc7..a627ed82c0a3 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -332,6 +332,13 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
return NULL;
}
+static void ntfs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
+}
+
void ntfs_destroy_big_inode(struct inode *inode)
{
ntfs_inode *ni = NTFS_I(inode);
@@ -340,7 +347,7 @@ void ntfs_destroy_big_inode(struct inode *inode)
BUG_ON(ni->page);
if (!atomic_dec_and_test(&ni->count))
BUG();
- kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
+ call_rcu(&inode->i_rcu, ntfs_i_callback);
}
static inline ntfs_inode *ntfs_alloc_extent_inode(void)
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 895532ac4d98..b4d3fe9c1c5c 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -169,23 +169,25 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
struct list_head *p;
struct dentry *dentry = NULL;
- spin_lock(&dcache_lock);
-
+ spin_lock(&inode->i_lock);
list_for_each(p, &inode->i_dentry) {
dentry = list_entry(p, struct dentry, d_alias);
+ spin_lock(&dentry->d_lock);
if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
mlog(0, "dentry found: %.*s\n",
dentry->d_name.len, dentry->d_name.name);
- dget_locked(dentry);
+ dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
break;
}
+ spin_unlock(&dentry->d_lock);
dentry = NULL;
}
- spin_unlock(&dcache_lock);
+ spin_unlock(&inode->i_lock);
return dentry;
}
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b2df490a19ed..8c5c0eddc365 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -351,11 +351,18 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
return &ip->ip_vfs_inode;
}
-static void dlmfs_destroy_inode(struct inode *inode)
+static void dlmfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
}
+static void dlmfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, dlmfs_i_callback);
+}
+
static void dlmfs_evict_inode(struct inode *inode)
{
int status;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 19ad145d2af3..6adafa576065 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -138,7 +138,7 @@ check_gen:
result = d_obtain_alias(inode);
if (!IS_ERR(result))
- result->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(result, &ocfs2_dentry_ops);
else
mlog_errno(PTR_ERR(result));
@@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
if (!IS_ERR(parent))
- parent->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(parent, &ocfs2_dentry_ops);
bail_unlock:
ocfs2_inode_unlock(dir, 0);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ca35f81a13bb..30c523144452 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
spin_unlock(&oi->ip_lock);
bail_add:
- dentry->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(dentry, &ocfs2_dentry_ops);
ret = d_splice_alias(inode, dentry);
if (inode) {
@@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir,
mlog_errno(status);
goto leave;
}
- dentry->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(dentry, &ocfs2_dentry_ops);
status = ocfs2_add_entry(handle, dentry, inode,
OCFS2_I(inode)->ip_blkno, parent_fe_bh,
@@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry,
}
ihold(inode);
- dentry->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(dentry, &ocfs2_dentry_ops);
d_instantiate(dentry, inode);
out_commit:
@@ -1797,7 +1797,7 @@ static int ocfs2_symlink(struct inode *dir,
mlog_errno(status);
goto bail;
}
- dentry->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(dentry, &ocfs2_dentry_ops);
status = ocfs2_add_entry(handle, dentry, inode,
le64_to_cpu(fe->i_blkno), parent_fe_bh,
@@ -2462,7 +2462,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
goto out_commit;
}
- dentry->d_op = &ocfs2_dentry_ops;
+ d_set_d_op(dentry, &ocfs2_dentry_ops);
d_instantiate(dentry, inode);
status = 0;
out_commit:
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index cfeab7ce3697..17ff46fa8a10 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -569,11 +569,18 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
return &oi->vfs_inode;
}
-static void ocfs2_destroy_inode(struct inode *inode)
+static void ocfs2_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
}
+static void ocfs2_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, ocfs2_i_callback);
+}
+
static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
unsigned int cbits)
{
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 911e61f348fc..a2a5bff774e3 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -343,11 +343,18 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
return &oi->vfs_inode;
}
-static void openprom_destroy_inode(struct inode *inode)
+static void openprom_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(op_inode_cachep, OP_I(inode));
}
+static void openprom_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, openprom_i_callback);
+}
+
static struct inode *openprom_iget(struct super_block *sb, ino_t ino)
{
struct inode *inode;
diff --git a/fs/pipe.c b/fs/pipe.c
index 04629f36e397..68f1f8e4e23b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -999,12 +999,12 @@ struct file *create_write_pipe(int flags)
goto err;
err = -ENOMEM;
- path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
+ path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
if (!path.dentry)
goto err_inode;
path.mnt = mntget(pipe_mnt);
- path.dentry->d_op = &pipefs_dentry_operations;
+ d_set_d_op(path.dentry, &pipefs_dentry_operations);
d_instantiate(path.dentry, inode);
err = -ENFILE;
@@ -1253,6 +1253,10 @@ out:
return ret;
}
+static const struct super_operations pipefs_ops = {
+ .destroy_inode = free_inode_nonrcu,
+};
+
/*
* pipefs should _never_ be mounted by userland - too much of security hassle,
* no real gain from having the whole whorehouse mounted. So we don't need
@@ -1262,7 +1266,7 @@ out:
static struct dentry *pipefs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
+ return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC);
}
static struct file_system_type pipe_fs_type = {
@@ -1288,7 +1292,7 @@ static int __init init_pipe_fs(void)
static void __exit exit_pipe_fs(void)
{
unregister_filesystem(&pipe_fs_type);
- mntput(pipe_mnt);
+ mntput_long(pipe_mnt);
}
fs_initcall(init_pipe_fs);
diff --git a/fs/pnode.c b/fs/pnode.c
index 8066b8dd748f..646ab86d8ae4 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -288,7 +288,7 @@ out:
*/
static inline int do_refcount_check(struct vfsmount *mnt, int count)
{
- int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts;
+ int mycount = count_mnt_count(mnt) - mnt->mnt_ghosts;
return (mycount > count);
}
@@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
* Check if any of these mounts that **do not have submounts**
* have more references than 'refcnt'. If so return busy.
*
- * vfsmount lock must be held for read or write
+ * vfsmount lock must be held for write
*/
int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
{
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 182845147fe4..85f0a80912aa 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1744,7 +1744,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
return 0;
}
-static int pid_delete_dentry(struct dentry * dentry)
+static int pid_delete_dentry(const struct dentry * dentry)
{
/* Is the task we represent dead?
* If so, then don't put the dentry on the lru list,
@@ -1969,7 +1969,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64;
ei->op.proc_get_link = proc_fd_link;
- dentry->d_op = &tid_fd_dentry_operations;
+ d_set_d_op(dentry, &tid_fd_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (tid_fd_revalidate(dentry, NULL))
@@ -2137,7 +2137,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
ei->fd = fd;
inode->i_mode = S_IFREG | S_IRUSR;
inode->i_fop = &proc_fdinfo_file_operations;
- dentry->d_op = &tid_fd_dentry_operations;
+ d_set_d_op(dentry, &tid_fd_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (tid_fd_revalidate(dentry, NULL))
@@ -2196,7 +2196,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
- dentry->d_op = &pid_dentry_operations;
+ d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, NULL))
@@ -2615,7 +2615,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
if (p->fop)
inode->i_fop = p->fop;
ei->op = p->op;
- dentry->d_op = &proc_base_dentry_operations;
+ d_set_d_op(dentry, &proc_base_dentry_operations);
d_add(dentry, inode);
error = NULL;
out:
@@ -2926,7 +2926,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
ARRAY_SIZE(tgid_base_stuff));
- dentry->d_op = &pid_dentry_operations;
+ d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
@@ -3169,7 +3169,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
ARRAY_SIZE(tid_base_stuff));
- dentry->d_op = &pid_dentry_operations;
+ d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index dd29f0337661..f766be29d2c7 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -400,7 +400,7 @@ static const struct inode_operations proc_link_inode_operations = {
* smarter: we could keep a "volatile" flag in the
* inode to indicate which ones to keep.
*/
-static int proc_delete_dentry(struct dentry * dentry)
+static int proc_delete_dentry(const struct dentry * dentry)
{
return 1;
}
@@ -439,7 +439,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
out_unlock:
if (inode) {
- dentry->d_op = &proc_dentry_operations;
+ d_set_d_op(dentry, &proc_dentry_operations);
d_add(dentry, inode);
return NULL;
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 3ddb6068177c..6bcb926b101b 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -65,11 +65,18 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
return inode;
}
-static void proc_destroy_inode(struct inode *inode)
+static void proc_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(proc_inode_cachep, PROC_I(inode));
}
+static void proc_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, proc_i_callback);
+}
+
static void init_once(void *foo)
{
struct proc_inode *ei = (struct proc_inode *) foo;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b652cb00906b..35efd85a4d32 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -120,7 +120,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
goto out;
err = NULL;
- dentry->d_op = &proc_sys_dentry_operations;
+ d_set_d_op(dentry, &proc_sys_dentry_operations);
d_add(dentry, inode);
out:
@@ -201,7 +201,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent,
dput(child);
return -ENOMEM;
} else {
- child->d_op = &proc_sys_dentry_operations;
+ d_set_d_op(child, &proc_sys_dentry_operations);
d_add(child, inode);
}
} else {
@@ -392,20 +392,25 @@ static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
return !PROC_I(dentry->d_inode)->sysctl->unregistering;
}
-static int proc_sys_delete(struct dentry *dentry)
+static int proc_sys_delete(const struct dentry *dentry)
{
return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
}
-static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
- struct qstr *name)
+static int proc_sys_compare(const struct dentry *parent,
+ const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *name)
{
- struct dentry *dentry = container_of(qstr, struct dentry, d_name);
- if (qstr->len != name->len)
+ /* Although proc doesn't have negative dentries, rcu-walk means
+ * that inode here can be NULL */
+ if (!inode)
+ return 0;
+ if (name->len != len)
return 1;
- if (memcmp(qstr->name, name->name, name->len))
+ if (memcmp(name->name, str, len))
return 1;
- return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
+ return !sysctl_is_seen(PROC_I(inode)->sysctl);
}
static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index fcada42f1aa3..e63b4171d583 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -425,11 +425,18 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void qnx4_destroy_inode(struct inode *inode)
+static void qnx4_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
}
+static void qnx4_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, qnx4_i_callback);
+}
+
static void init_once(void *foo)
{
struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b243117b8752..2575682a9ead 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -529,11 +529,18 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void reiserfs_destroy_inode(struct inode *inode)
+static void reiserfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
}
+static void reiserfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, reiserfs_i_callback);
+}
+
static void init_once(void *foo)
{
struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5d04a7828e7a..e0f0d7ea10a1 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -990,7 +990,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
strlen(PRIVROOT_NAME));
if (!IS_ERR(dentry)) {
REISERFS_SB(s)->priv_root = dentry;
- dentry->d_op = &xattr_lookup_poison_ops;
+ d_set_d_op(dentry, &xattr_lookup_poison_ops);
if (dentry->d_inode)
dentry->d_inode->i_flags |= S_PRIVATE;
} else
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 6647f90e55cd..2305e3121cb1 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -400,11 +400,18 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
/*
* return a spent inode to the slab cache
*/
-static void romfs_destroy_inode(struct inode *inode)
+static void romfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
}
+static void romfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, romfs_i_callback);
+}
+
/*
* get filesystem statistics
*/
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 24de30ba34c1..20700b9f2b4c 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -440,11 +440,18 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb)
}
-static void squashfs_destroy_inode(struct inode *inode)
+static void squashfs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode));
}
+static void squashfs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, squashfs_i_callback);
+}
+
static struct file_system_type squashfs_fs_type = {
.owner = THIS_MODULE,
diff --git a/fs/super.c b/fs/super.c
index ca696155cd9a..823e061faa87 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -30,6 +30,7 @@
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/backing-dev.h>
+#include <linux/rculist_bl.h>
#include "internal.h"
@@ -71,7 +72,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
INIT_LIST_HEAD(&s->s_files);
#endif
INIT_LIST_HEAD(&s->s_instances);
- INIT_HLIST_HEAD(&s->s_anon);
+ INIT_HLIST_BL_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_dentry_lru);
init_rwsem(&s->s_umount);
@@ -1139,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
return mnt;
err:
- mntput(mnt);
+ mntput_long(mnt);
return ERR_PTR(err);
}
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 7e54bac8c4b0..c3fff5788b51 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -231,18 +231,27 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
goto repeat;
}
-static int sysfs_dentry_delete(struct dentry *dentry)
+static int sysfs_dentry_delete(const struct dentry *dentry)
{
struct sysfs_dirent *sd = dentry->d_fsdata;
return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
}
-static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
+static int sysfs_dentry_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
{
struct sysfs_dirent *sd = dentry->d_fsdata;
int is_dir;
- mutex_lock(&sysfs_mutex);
+ if (nd->flags & LOOKUP_RCU) {
+ if (!mutex_trylock(&sysfs_mutex))
+ return -ECHILD;
+ /* ensure dentry still exists, now under the sysfs_mutex */
+ if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
+ mutex_unlock(&sysfs_mutex);
+ return -ECHILD;
+ }
+ } else
+ mutex_lock(&sysfs_mutex);
/* The sysfs dirent has been deleted */
if (sd->s_flags & SYSFS_FLAG_REMOVED)
@@ -272,6 +281,9 @@ out_bad:
*/
is_dir = (sysfs_type(sd) == SYSFS_DIR);
mutex_unlock(&sysfs_mutex);
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
if (is_dir) {
/* If we have submounts we must allow the vfs caches
* to lie about the state of the filesystem to prevent
@@ -294,7 +306,7 @@ static void sysfs_dentry_iput(struct dentry *dentry, struct inode *inode)
}
static const struct dentry_operations sysfs_dentry_ops = {
- .d_revalidate = sysfs_dentry_revalidate,
+ .d_revalidate_rcu = sysfs_dentry_revalidate_rcu,
.d_delete = sysfs_dentry_delete,
.d_iput = sysfs_dentry_iput,
};
@@ -701,7 +713,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
/* instantiate and hash dentry */
ret = d_find_alias(inode);
if (!ret) {
- dentry->d_op = &sysfs_dentry_ops;
+ d_set_d_op(dentry, &sysfs_dentry_ops);
dentry->d_fsdata = sysfs_get(sd);
d_add(dentry, inode);
} else {
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index de44d067b9e6..0630eb969a28 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -333,11 +333,18 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
return &si->vfs_inode;
}
-static void sysv_destroy_inode(struct inode *inode)
+static void sysv_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
}
+static void sysv_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, sysv_i_callback);
+}
+
static void init_once(void *p)
{
struct sysv_inode_info *si = (struct sysv_inode_info *)p;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 11e7f7d11cd0..b5e68da2db32 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -27,7 +27,8 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
return err;
}
-static int sysv_hash(struct dentry *dentry, struct qstr *qstr)
+static int sysv_hash(const struct dentry *dentry, const struct inode *inode,
+ struct qstr *qstr)
{
/* Truncate the name in place, avoids having to define a compare
function. */
@@ -47,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st
struct inode * inode = NULL;
ino_t ino;
- dentry->d_op = dir->i_sb->s_root->d_op;
+ d_set_d_op(dentry, dir->i_sb->s_root->d_op);
if (dentry->d_name.len > SYSV_NAMELEN)
return ERR_PTR(-ENAMETOOLONG);
ino = sysv_inode_by_name(dentry);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 3d9c62be0c10..76712aefc4ab 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
if (sbi->s_forced_ro)
sb->s_flags |= MS_RDONLY;
if (sbi->s_truncate)
- sb->s_root->d_op = &sysv_dentry_operations;
+ d_set_d_op(sb->s_root, &sysv_dentry_operations);
return 1;
}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 91fac54c70e3..6e11c2975dcf 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -272,12 +272,20 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb)
return &ui->vfs_inode;
};
+static void ubifs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ubifs_inode *ui = ubifs_inode(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ubifs_inode_slab, ui);
+}
+
static void ubifs_destroy_inode(struct inode *inode)
{
struct ubifs_inode *ui = ubifs_inode(inode);
kfree(ui->data);
- kmem_cache_free(ubifs_inode_slab, inode);
+ call_rcu(&inode->i_rcu, ubifs_i_callback);
}
/*
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 536f89da4af2..7b27b063ff6d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -139,11 +139,18 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void udf_destroy_inode(struct inode *inode)
+static void udf_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(udf_inode_cachep, UDF_I(inode));
}
+static void udf_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, udf_i_callback);
+}
+
static void init_once(void *foo)
{
struct udf_inode_info *ei = (struct udf_inode_info *)foo;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 2c47daed56da..2c61ac5d4e48 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1412,11 +1412,18 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void ufs_destroy_inode(struct inode *inode)
+static void ufs_i_callback(struct rcu_head *head)
{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ INIT_LIST_HEAD(&inode->i_dentry);
kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
}
+static void ufs_destroy_inode(struct inode *inode)
+{
+ call_rcu(&inode->i_rcu, ufs_i_callback);
+}
+
static void init_once(void *foo)
{
struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index b2771862fd3d..70053f6fd208 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -219,7 +219,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
}
int
-xfs_check_acl(struct inode *inode, int mask)
+xfs_check_acl_rcu(struct inode *inode, int mask, unsigned int flags)
{
struct xfs_inode *ip = XFS_I(inode);
struct posix_acl *acl;
@@ -234,6 +234,12 @@ xfs_check_acl(struct inode *inode, int mask)
if (!XFS_IFORK_Q(ip))
return -EAGAIN;
+ if (flags & IPERM_FLAG_RCU) {
+ if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
+ return -ECHILD;
+ return -EAGAIN;
+ }
+
acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 94d5fd6a2973..c8751c70383f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -643,7 +643,7 @@ xfs_vn_fiemap(
}
static const struct inode_operations xfs_inode_operations = {
- .check_acl = xfs_check_acl,
+ .check_acl_rcu = xfs_check_acl_rcu,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -670,7 +670,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
.rmdir = xfs_vn_unlink,
.mknod = xfs_vn_mknod,
.rename = xfs_vn_rename,
- .check_acl = xfs_check_acl,
+ .check_acl_rcu = xfs_check_acl_rcu,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -695,7 +695,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
.rmdir = xfs_vn_unlink,
.mknod = xfs_vn_mknod,
.rename = xfs_vn_rename,
- .check_acl = xfs_check_acl,
+ .check_acl_rcu = xfs_check_acl_rcu,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -708,7 +708,7 @@ static const struct inode_operations xfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = xfs_vn_follow_link,
.put_link = xfs_vn_put_link,
- .check_acl = xfs_check_acl,
+ .check_acl_rcu = xfs_check_acl_rcu,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 0135e2a669d7..6560d563a557 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -42,7 +42,7 @@ struct xfs_acl {
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
#ifdef CONFIG_XFS_POSIX_ACL
-extern int xfs_check_acl(struct inode *inode, int mask);
+extern int xfs_check_acl_rcu(struct inode *inode, int mask, unsigned int flags);
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
extern int xfs_acl_chmod(struct inode *inode);
@@ -52,7 +52,7 @@ extern int posix_acl_default_exists(struct inode *inode);
extern const struct xattr_handler xfs_xattr_acl_access_handler;
extern const struct xattr_handler xfs_xattr_acl_default_handler;
#else
-# define xfs_check_acl NULL
+# define xfs_check_acl_rcu NULL
# define xfs_get_acl(inode, type) NULL
# define xfs_inherit_acl(inode, default_acl) 0
# define xfs_acl_chmod(inode) 0
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0cdd26932d8e..d7de5a3f7867 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,6 +91,17 @@ xfs_inode_alloc(
return ip;
}
+STATIC void
+xfs_inode_free_callback(
+ struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct xfs_inode *ip = XFS_I(inode);
+
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_zone_free(xfs_inode_zone, ip);
+}
+
void
xfs_inode_free(
struct xfs_inode *ip)
@@ -134,7 +145,7 @@ xfs_inode_free(
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
- kmem_zone_free(xfs_inode_zone, ip);
+ call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback);
}
/*