summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c6
-rw-r--r--fs/adfs/dir.c2
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/adfs/inode.c3
-rw-r--r--fs/affs/affs.h2
-rw-r--r--fs/affs/file.c4
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/server.c5
-rw-r--r--fs/afs/write.c4
-rw-r--r--fs/aio.c6
-rw-r--r--fs/anon_inodes.c2
-rw-r--r--fs/attr.c50
-rw-r--r--fs/bad_inode.c3
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/binfmt_elf_fdpic.c26
-rw-r--r--fs/binfmt_flat.c27
-rw-r--r--fs/block_dev.c88
-rw-r--r--fs/btrfs/acl.c8
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/file.c15
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/relocation.c7
-rw-r--r--fs/btrfs/root-tree.c3
-rw-r--r--fs/btrfs/super.c6
-rw-r--r--fs/buffer.c123
-rw-r--r--fs/ceph/auth.c7
-rw-r--r--fs/ceph/auth.h6
-rw-r--r--fs/ceph/auth_none.c8
-rw-r--r--fs/ceph/auth_x.c14
-rw-r--r--fs/ceph/caps.c118
-rw-r--r--fs/ceph/ceph_fs.h21
-rw-r--r--fs/ceph/crush/mapper.c41
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/dir.c7
-rw-r--r--fs/ceph/export.c2
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c23
-rw-r--r--fs/ceph/mds_client.c55
-rw-r--r--fs/ceph/mds_client.h6
-rw-r--r--fs/ceph/messenger.c10
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/mon_client.c10
-rw-r--r--fs/ceph/osd_client.c10
-rw-r--r--fs/ceph/osdmap.c3
-rw-r--r--fs/ceph/super.c16
-rw-r--r--fs/ceph/super.h3
-rw-r--r--fs/cifs/cifsfs.c16
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsproto.h1
-rw-r--r--fs/cifs/dir.c76
-rw-r--r--fs/cifs/file.c105
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/sess.c10
-rw-r--r--fs/coda/coda_int.h3
-rw-r--r--fs/coda/file.c4
-rw-r--r--fs/compat.c2
-rw-r--r--fs/configfs/inode.c14
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/debugfs/file.c21
-rw-r--r--fs/direct-io.c61
-rw-r--r--fs/ecryptfs/file.c2
-rw-r--r--fs/ecryptfs/inode.c4
-rw-r--r--fs/exofs/file.c7
-rw-r--r--fs/ext2/acl.c1
-rw-r--r--fs/ext2/ext2.h3
-rw-r--r--fs/ext2/file.c7
-rw-r--r--fs/ext2/inode.c153
-rw-r--r--fs/ext2/super.c20
-rw-r--r--fs/ext3/acl.c1
-rw-r--r--fs/ext3/dir.c2
-rw-r--r--fs/ext3/fsync.c4
-rw-r--r--fs/ext3/super.c38
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/fsync.c8
-rw-r--r--fs/ext4/inode.c40
-rw-r--r--fs/ext4/move_extent.c3
-rw-r--r--fs/ext4/super.c37
-rw-r--r--fs/fat/fat.h6
-rw-r--r--fs/fat/file.c40
-rw-r--r--fs/fat/inode.c35
-rw-r--r--fs/fcntl.c13
-rw-r--r--fs/file_table.c21
-rw-r--r--fs/fs-writeback.c509
-rw-r--r--fs/fscache/page.c36
-rw-r--r--fs/fuse/dev.c527
-rw-r--r--fs/fuse/dir.c5
-rw-r--r--fs/fuse/file.c48
-rw-r--r--fs/fuse/fuse_i.h6
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/bmap.c1
-rw-r--r--fs/gfs2/dir.c2
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/gfs2/glock.c10
-rw-r--r--fs/gfs2/inode.c12
-rw-r--r--fs/gfs2/ops_inode.c5
-rw-r--r--fs/gfs2/quota.c8
-rw-r--r--fs/hostfs/hostfs_kern.c4
-rw-r--r--fs/hpfs/file.c4
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hppfs/hppfs.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/jffs2/acl.c3
-rw-r--r--fs/jffs2/dir.c127
-rw-r--r--fs/jffs2/file.c4
-rw-r--r--fs/jffs2/fs.c11
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jfs/file.c4
-rw-r--r--fs/jfs/jfs_inode.h2
-rw-r--r--fs/jfs/super.c16
-rw-r--r--fs/libfs.c111
-rw-r--r--fs/logfs/file.c4
-rw-r--r--fs/logfs/logfs.h2
-rw-r--r--fs/minix/dir.c11
-rw-r--r--fs/minix/file.c2
-rw-r--r--fs/minix/itree_v2.c27
-rw-r--r--fs/namei.c2
-rw-r--r--fs/ncpfs/file.c2
-rw-r--r--fs/nfs/client.c122
-rw-r--r--fs/nfs/dir.c6
-rw-r--r--fs/nfs/file.c5
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/super.c22
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/btree.h2
-rw-r--r--fs/nilfs2/file.c4
-rw-r--r--fs/nilfs2/nilfs.h2
-rw-r--r--fs/nilfs2/segbuf.h2
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/super.c8
-rw-r--r--fs/ntfs/dir.c5
-rw-r--r--fs/ntfs/file.c9
-rw-r--r--fs/ocfs2/file.c15
-rw-r--r--fs/ocfs2/reservations.c1
-rw-r--r--fs/ocfs2/super.c50
-rw-r--r--fs/omfs/file.c2
-rw-r--r--fs/pipe.c102
-rw-r--r--fs/proc/proc_devtree.c3
-rw-r--r--fs/proc/task_nommu.c20
-rw-r--r--fs/qnx4/dir.c2
-rw-r--r--fs/quota/dquot.c190
-rw-r--r--fs/quota/quota.c4
-rw-r--r--fs/ramfs/file-mmu.c3
-rw-r--r--fs/ramfs/file-nommu.c9
-rw-r--r--fs/reiserfs/dir.c8
-rw-r--r--fs/reiserfs/file.c5
-rw-r--r--fs/reiserfs/super.c48
-rw-r--r--fs/smbfs/file.c3
-rw-r--r--fs/smbfs/inode.c2
-rw-r--r--fs/splice.c11
-rw-r--r--fs/super.c23
-rw-r--r--fs/sync.c10
-rw-r--r--fs/sysfs/inode.c8
-rw-r--r--fs/sysv/dir.c2
-rw-r--r--fs/sysv/file.c2
-rw-r--r--fs/sysv/ialloc.c6
-rw-r--r--fs/sysv/inode.c1
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/file.c17
-rw-r--r--fs/ubifs/ubifs.h4
-rw-r--r--fs/udf/balloc.c43
-rw-r--r--fs/udf/dir.c2
-rw-r--r--fs/udf/file.c28
-rw-r--r--fs/udf/ialloc.c21
-rw-r--r--fs/udf/inode.c5
-rw-r--r--fs/udf/namei.c20
-rw-r--r--fs/udf/super.c13
-rw-r--r--fs/udf/udfdecl.h1
-rw-r--r--fs/ufs/balloc.c24
-rw-r--r--fs/ufs/dir.c2
-rw-r--r--fs/ufs/file.c5
-rw-r--r--fs/ufs/ialloc.c13
-rw-r--r--fs/ufs/inode.c4
-rw-r--r--fs/ufs/namei.c16
-rw-r--r--fs/ufs/super.c110
-rw-r--r--fs/ufs/truncate.c20
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h356
-rw-r--r--fs/xfs/quota/xfs_qm.c22
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c27
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_dfrag.c5
-rw-r--r--fs/xfs/xfs_ialloc.c142
-rw-r--r--fs/xfs/xfs_iget.c39
-rw-r--r--fs/xfs/xfs_inode.c149
-rw-r--r--fs/xfs/xfs_inode.h6
-rw-r--r--fs/xfs/xfs_itable.c285
-rw-r--r--fs/xfs/xfs_itable.h17
-rw-r--r--fs/xfs/xfs_log_recover.c13
-rw-r--r--fs/xfs/xfs_mount.c70
-rw-r--r--fs/xfs/xfs_rtalloc.c8
-rw-r--r--fs/xfs/xfs_rtalloc.h11
-rw-r--r--fs/xfs/xfs_trans.c446
-rw-r--r--fs/xfs/xfs_trans.h411
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
209 files changed, 3270 insertions, 2997 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 25b300e1c9d7..2bedc6c94fc2 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -257,15 +257,13 @@ v9fs_file_write(struct file *filp, const char __user * data,
return total;
}
-static int v9fs_file_fsync(struct file *filp, struct dentry *dentry,
- int datasync)
+static int v9fs_file_fsync(struct file *filp, int datasync)
{
struct p9_fid *fid;
struct p9_wstat wstat;
int retval;
- P9_DPRINTK(P9_DEBUG_VFS, "filp %p dentry %p datasync %x\n", filp,
- dentry, datasync);
+ P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
fid = filp->private_data;
v9fs_blank_wstat(&wstat);
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 23aa52f548a0..f4287e4de744 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -197,7 +197,7 @@ const struct file_operations adfs_dir_operations = {
.read = generic_read_dir,
.llseek = generic_file_llseek,
.readdir = adfs_readdir,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
};
static int
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 005ea34d1758..a36da5382b40 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -26,7 +26,7 @@ const struct file_operations adfs_file_operations = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.splice_read = generic_file_splice_read,
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 0f5e30978135..6f850b06ab62 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -322,8 +322,9 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
if (error)
goto out;
+ /* XXX: this is missing some actual on-disk truncation.. */
if (ia_valid & ATTR_SIZE)
- error = vmtruncate(inode, attr->ia_size);
+ error = simple_setsize(inode, attr->ia_size);
if (error)
goto out;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 861dae68ac12..f05b6155ccc8 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -183,7 +183,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent
void affs_free_prealloc(struct inode *inode);
extern void affs_truncate(struct inode *);
-int affs_file_fsync(struct file *, struct dentry *, int);
+int affs_file_fsync(struct file *, int);
/* dir.c */
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 184e55c1c9ba..322710c3eedf 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -916,9 +916,9 @@ affs_truncate(struct inode *inode)
affs_free_prealloc(inode);
}
-int affs_file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int affs_file_fsync(struct file *filp, int datasync)
{
- struct inode * inode = dentry->d_inode;
+ struct inode *inode = filp->f_mapping->host;
int ret, err;
ret = write_inode_now(inode, 0);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 807f284cc75e..5f679b77ce24 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -740,7 +740,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
extern int afs_writeback_all(struct afs_vnode *);
-extern int afs_fsync(struct file *, struct dentry *, int);
+extern int afs_fsync(struct file *, int);
/*****************************************************************************/
diff --git a/fs/afs/server.c b/fs/afs/server.c
index f49099516675..9fdc7fe3a7bc 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -91,9 +91,10 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell,
memcpy(&server->addr, addr, sizeof(struct in_addr));
server->addr.s_addr = addr->s_addr;
+ _leave(" = %p{%d}", server, atomic_read(&server->usage));
+ } else {
+ _leave(" = NULL [nomem]");
}
-
- _leave(" = %p{%d}", server, atomic_read(&server->usage));
return server;
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3bed54a294d4..722743b152d8 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
{
struct address_space *mapping = vnode->vfs_inode.i_mapping;
struct writeback_control wbc = {
- .bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_cyclic = 1,
@@ -701,8 +700,9 @@ int afs_writeback_all(struct afs_vnode *vnode)
* - the return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
*/
-int afs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int afs_fsync(struct file *file, int datasync)
{
+ struct dentry *dentry = file->f_path.dentry;
struct afs_writeback *wb, *xwb;
struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
int ret;
diff --git a/fs/aio.c b/fs/aio.c
index 48fdeebdb544..1ccf25cef1f0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -527,7 +527,7 @@ static void aio_fput_routine(struct work_struct *data)
/* Complete the fput(s) */
if (req->ki_filp != NULL)
- __fput(req->ki_filp);
+ fput(req->ki_filp);
/* Link the iocb into the context's free list */
spin_lock_irq(&ctx->ctx_lock);
@@ -560,11 +560,11 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
/*
* Try to optimize the aio and eventfd file* puts, by avoiding to
- * schedule work in case it is not __fput() time. In normal cases,
+ * schedule work in case it is not final fput() time. In normal cases,
* we would not be holding the last reference to the file*, so
* this function will be executed w/out any aio kthread wakeup.
*/
- if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
+ if (unlikely(!fput_atomic(req->ki_filp))) {
get_ioctx(ctx);
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 9bd4b3876c99..e4b75d6eda83 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -205,7 +205,7 @@ static struct inode *anon_inode_mkinode(void)
* that it already _is_ on the dirty list.
*/
inode->i_state = I_DIRTY;
- inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
+ inode->i_mode = S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_flags |= S_PRIVATE;
diff --git a/fs/attr.c b/fs/attr.c
index 0815e93bb487..b4fa3b0aa596 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -67,14 +67,14 @@ EXPORT_SYMBOL(inode_change_ok);
* @offset: the new size to assign to the inode
* @Returns: 0 on success, -ve errno on failure
*
+ * inode_newsize_ok must be called with i_mutex held.
+ *
* inode_newsize_ok will check filesystem limits and ulimits to check that the
* new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
* when necessary. Caller must not proceed with inode size change if failure is
* returned. @inode must be a file (not directory), with appropriate
* permissions to allow truncate (inode_newsize_ok does NOT check these
* conditions).
- *
- * inode_newsize_ok must be called with i_mutex held.
*/
int inode_newsize_ok(const struct inode *inode, loff_t offset)
{
@@ -104,17 +104,25 @@ out_big:
}
EXPORT_SYMBOL(inode_newsize_ok);
-int inode_setattr(struct inode * inode, struct iattr * attr)
+/**
+ * generic_setattr - copy simple metadata updates into the generic inode
+ * @inode: the inode to be updated
+ * @attr: the new attributes
+ *
+ * generic_setattr must be called with i_mutex held.
+ *
+ * generic_setattr updates the inode's metadata with that specified
+ * in attr. Noticably missing is inode size update, which is more complex
+ * as it requires pagecache updates. See simple_setsize.
+ *
+ * The inode is not marked as dirty after this operation. The rationale is
+ * that for "simple" filesystems, the struct inode is the inode storage.
+ * The caller is free to mark the inode dirty afterwards if needed.
+ */
+void generic_setattr(struct inode *inode, const struct iattr *attr)
{
unsigned int ia_valid = attr->ia_valid;
- if (ia_valid & ATTR_SIZE &&
- attr->ia_size != i_size_read(inode)) {
- int error = vmtruncate(inode, attr->ia_size);
- if (error)
- return error;
- }
-
if (ia_valid & ATTR_UID)
inode->i_uid = attr->ia_uid;
if (ia_valid & ATTR_GID)
@@ -135,6 +143,28 @@ int inode_setattr(struct inode * inode, struct iattr * attr)
mode &= ~S_ISGID;
inode->i_mode = mode;
}
+}
+EXPORT_SYMBOL(generic_setattr);
+
+/*
+ * note this function is deprecated, the new truncate sequence should be
+ * used instead -- see eg. simple_setsize, generic_setattr.
+ */
+int inode_setattr(struct inode *inode, const struct iattr *attr)
+{
+ unsigned int ia_valid = attr->ia_valid;
+
+ if (ia_valid & ATTR_SIZE &&
+ attr->ia_size != i_size_read(inode)) {
+ int error;
+
+ error = vmtruncate(inode, attr->ia_size);
+ if (error)
+ return error;
+ }
+
+ generic_setattr(inode, attr);
+
mark_inode_dirty(inode);
return 0;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index a05287a23f62..52e59bf4aa5f 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -93,8 +93,7 @@ static int bad_file_release(struct inode *inode, struct file *filp)
return -EIO;
}
-static int bad_file_fsync(struct file *file, struct dentry *dentry,
- int datasync)
+static int bad_file_fsync(struct file *file, int datasync)
{
return -EIO;
}
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 8f73841fc974..d967e052b779 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -78,7 +78,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
const struct file_operations bfs_dir_operations = {
.read = generic_read_dir,
.readdir = bfs_readdir,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.llseek = generic_file_llseek,
};
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2c5f9a0e5d72..63039ed9576f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -990,10 +990,9 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
/* clear any space allocated but not loaded */
if (phdr->p_filesz < phdr->p_memsz) {
- ret = clear_user((void *) (seg->addr + phdr->p_filesz),
- phdr->p_memsz - phdr->p_filesz);
- if (ret)
- return ret;
+ if (clear_user((void *) (seg->addr + phdr->p_filesz),
+ phdr->p_memsz - phdr->p_filesz))
+ return -EFAULT;
}
if (mm) {
@@ -1027,7 +1026,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
struct elf32_fdpic_loadseg *seg;
struct elf32_phdr *phdr;
unsigned long load_addr, delta_vaddr;
- int loop, dvset, ret;
+ int loop, dvset;
load_addr = params->load_addr;
delta_vaddr = 0;
@@ -1127,9 +1126,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
* PT_LOAD */
if (prot & PROT_WRITE && disp > 0) {
kdebug("clear[%d] ad=%lx sz=%lx", loop, maddr, disp);
- ret = clear_user((void __user *) maddr, disp);
- if (ret)
- return ret;
+ if (clear_user((void __user *) maddr, disp))
+ return -EFAULT;
maddr += disp;
}
@@ -1164,19 +1162,17 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
if (prot & PROT_WRITE && excess1 > 0) {
kdebug("clear[%d] ad=%lx sz=%lx",
loop, maddr + phdr->p_filesz, excess1);
- ret = clear_user((void __user *) maddr + phdr->p_filesz,
- excess1);
- if (ret)
- return ret;
+ if (clear_user((void __user *) maddr + phdr->p_filesz,
+ excess1))
+ return -EFAULT;
}
#else
if (excess > 0) {
kdebug("clear[%d] ad=%lx sz=%lx",
loop, maddr + phdr->p_filesz, excess);
- ret = clear_user((void *) maddr + phdr->p_filesz, excess);
- if (ret)
- return ret;
+ if (clear_user((void *) maddr + phdr->p_filesz, excess))
+ return -EFAULT;
}
#endif
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 49566c1687d8..811384bec8de 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -56,16 +56,19 @@
#endif
/*
- * User data (stack, data section and bss) needs to be aligned
- * for the same reasons as SLAB memory is, and to the same amount.
- * Avoid duplicating architecture specific code by using the same
- * macro as with SLAB allocation:
+ * User data (data section and bss) needs to be aligned.
+ * We pick 0x20 here because it is the max value elf2flt has always
+ * used in producing FLAT files, and because it seems to be large
+ * enough to make all the gcc alignment related tests happy.
*/
-#ifdef ARCH_SLAB_MINALIGN
-#define FLAT_DATA_ALIGN (ARCH_SLAB_MINALIGN)
-#else
-#define FLAT_DATA_ALIGN (sizeof(void *))
-#endif
+#define FLAT_DATA_ALIGN (0x20)
+
+/*
+ * User data (stack) also needs to be aligned.
+ * Here we can be a bit looser than the data sections since this
+ * needs to only meet arch ABI requirements.
+ */
+#define FLAT_STACK_ALIGN max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */
@@ -129,7 +132,7 @@ static unsigned long create_flat_tables(
sp = (unsigned long *)p;
sp -= (envc + argc + 2) + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
- sp = (unsigned long *) ((unsigned long)sp & -FLAT_DATA_ALIGN);
+ sp = (unsigned long *) ((unsigned long)sp & -FLAT_STACK_ALIGN);
argv = sp + 1 + (flat_argvp_envp_on_stack() ? 2 : 0);
envp = argv + (argc + 1);
@@ -589,7 +592,7 @@ static int load_flat_file(struct linux_binprm * bprm,
if (IS_ERR_VALUE(result)) {
printk("Unable to read data+bss, errno %d\n", (int)-result);
do_munmap(current->mm, textpos, text_len);
- do_munmap(current->mm, realdatastart, data_len + extra);
+ do_munmap(current->mm, realdatastart, len);
ret = result;
goto err;
}
@@ -876,7 +879,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
stack_len = TOP_OF_ARGS - bprm->p; /* the strings */
stack_len += (bprm->argc + 1) * sizeof(char *); /* the argv array */
stack_len += (bprm->envc + 1) * sizeof(char *); /* the envp array */
- stack_len += FLAT_DATA_ALIGN - 1; /* reserve for upcoming alignment */
+ stack_len += FLAT_STACK_ALIGN - 1; /* reserve for upcoming alignment */
res = load_flat_file(bprm, &libinfo, 0, &stack_len);
if (IS_ERR_VALUE(res))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 26e5f5026620..99d6af811747 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -172,8 +172,9 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode),
- iov, offset, nr_segs, blkdev_get_blocks, NULL);
+ return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode,
+ I_BDEV(inode), iov, offset, nr_segs,
+ blkdev_get_blocks, NULL);
}
int __sync_blockdev(struct block_device *bdev, int wait)
@@ -309,8 +310,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping,
struct page **pagep, void **fsdata)
{
*pagep = NULL;
- return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
- blkdev_get_block);
+ return block_write_begin_newtrunc(file, mapping, pos, len, flags,
+ pagep, fsdata, blkdev_get_block);
}
static int blkdev_write_end(struct file *file, struct address_space *mapping,
@@ -358,12 +359,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
return retval;
}
-/*
- * Filp is never NULL; the only case when ->fsync() is called with
- * NULL first argument is nfsd_sync_dir() and that's not a directory.
- */
-
-int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int blkdev_fsync(struct file *filp, int datasync)
{
struct inode *bd_inode = filp->f_mapping->host;
struct block_device *bdev = I_BDEV(bd_inode);
@@ -710,8 +706,13 @@ retry:
* @bdev is about to be opened exclusively. Check @bdev can be opened
* exclusively and mark that an exclusive open is in progress. Each
* successful call to this function must be matched with a call to
- * either bd_claim() or bd_abort_claiming(). If this function
- * succeeds, the matching bd_claim() is guaranteed to succeed.
+ * either bd_finish_claiming() or bd_abort_claiming() (which do not
+ * fail).
+ *
+ * This function is used to gain exclusive access to the block device
+ * without actually causing other exclusive open attempts to fail. It
+ * should be used when the open sequence itself requires exclusive
+ * access but may subsequently fail.
*
* CONTEXT:
* Might sleep.
@@ -738,6 +739,7 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
return ERR_PTR(-ENXIO);
whole = bdget_disk(disk, 0);
+ module_put(disk->fops->owner);
put_disk(disk);
if (!whole)
return ERR_PTR(-ENOMEM);
@@ -786,15 +788,46 @@ static void bd_abort_claiming(struct block_device *whole, void *holder)
__bd_abort_claiming(whole, holder); /* releases bdev_lock */
}
+/* increment holders when we have a legitimate claim. requires bdev_lock */
+static void __bd_claim(struct block_device *bdev, struct block_device *whole,
+ void *holder)
+{
+ /* note that for a whole device bd_holders
+ * will be incremented twice, and bd_holder will
+ * be set to bd_claim before being set to holder
+ */
+ whole->bd_holders++;
+ whole->bd_holder = bd_claim;
+ bdev->bd_holders++;
+ bdev->bd_holder = holder;
+}
+
+/**
+ * bd_finish_claiming - finish claiming a block device
+ * @bdev: block device of interest (passed to bd_start_claiming())
+ * @whole: whole block device returned by bd_start_claiming()
+ * @holder: holder trying to claim @bdev
+ *
+ * Finish a claiming block started by bd_start_claiming().
+ *
+ * CONTEXT:
+ * Grabs and releases bdev_lock.
+ */
+static void bd_finish_claiming(struct block_device *bdev,
+ struct block_device *whole, void *holder)
+{
+ spin_lock(&bdev_lock);
+ BUG_ON(!bd_may_claim(bdev, whole, holder));
+ __bd_claim(bdev, whole, holder);
+ __bd_abort_claiming(whole, holder); /* not actually an abort */
+}
+
/**
* bd_claim - claim a block device
* @bdev: block device to claim
* @holder: holder trying to claim @bdev
*
- * Try to claim @bdev which must have been opened successfully. This
- * function may be called with or without preceding
- * blk_start_claiming(). In the former case, this function is always
- * successful and terminates the claiming block.
+ * Try to claim @bdev which must have been opened successfully.
*
* CONTEXT:
* Might sleep.
@@ -810,23 +843,10 @@ int bd_claim(struct block_device *bdev, void *holder)
might_sleep();
spin_lock(&bdev_lock);
-
res = bd_prepare_to_claim(bdev, whole, holder);
- if (res == 0) {
- /* note that for a whole device bd_holders
- * will be incremented twice, and bd_holder will
- * be set to bd_claim before being set to holder
- */
- whole->bd_holders++;
- whole->bd_holder = bd_claim;
- bdev->bd_holders++;
- bdev->bd_holder = holder;
- }
-
- if (whole->bd_claiming)
- __bd_abort_claiming(whole, holder); /* releases bdev_lock */
- else
- spin_unlock(&bdev_lock);
+ if (res == 0)
+ __bd_claim(bdev, whole, holder);
+ spin_unlock(&bdev_lock);
return res;
}
@@ -1480,7 +1500,7 @@ static int blkdev_open(struct inode * inode, struct file * filp)
if (whole) {
if (res == 0)
- BUG_ON(bd_claim(bdev, filp) != 0);
+ bd_finish_claiming(bdev, whole, filp);
else
bd_abort_claiming(whole, filp);
}
@@ -1716,7 +1736,7 @@ struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *h
if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
goto out_blkdev_put;
- BUG_ON(bd_claim(bdev, holder) != 0);
+ bd_finish_claiming(bdev, whole, holder);
return bdev;
out_blkdev_put:
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 8d432cd9d580..2222d161c7b6 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,6 +60,8 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
size = __btrfs_getxattr(inode, name, value, size);
if (size > 0) {
acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return acl;
set_cached_acl(inode, type, acl);
}
kfree(value);
@@ -160,6 +162,12 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
int ret;
struct posix_acl *acl = NULL;
+ if (!is_owner_or_cap(dentry->d_inode))
+ return -EPERM;
+
+ if (!IS_POSIXACL(dentry->d_inode))
+ return -EOPNOTSUPP;
+
if (value) {
acl = posix_acl_from_xattr(value, size);
if (acl == NULL) {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e9bf86415e86..29c20092847e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2434,7 +2434,7 @@ void btrfs_update_iflags(struct inode *inode);
void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
/* file.c */
-int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
+int btrfs_sync_file(struct file *file, int datasync);
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int skip_pinned);
int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f3b287c22caf..34f7c375567e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1941,8 +1941,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_level_size(tree_root,
btrfs_super_log_root_level(disk_super));
- log_tree_root = kzalloc(sizeof(struct btrfs_root),
- GFP_NOFS);
+ log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+ if (!log_tree_root) {
+ err = -ENOMEM;
+ goto fail_trans_kthread;
+ }
__setup_root(nodesize, leafsize, sectorsize, stripesize,
log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
@@ -1982,6 +1985,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
if (!fs_info->fs_root)
goto fail_trans_kthread;
+ if (IS_ERR(fs_info->fs_root)) {
+ err = PTR_ERR(fs_info->fs_root);
+ goto fail_trans_kthread;
+ }
if (!(sb->s_flags & MS_RDONLY)) {
down_read(&fs_info->cleanup_work_sem);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b9080d71991a..32d094002a57 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4360,7 +4360,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
block_rsv = get_block_rsv(trans, root);
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
- BUG_ON(block_rsv->space_info != cache->space_info);
+ if (block_rsv->space_info != cache->space_info)
+ goto out;
if (btrfs_header_generation(buf) == trans->transid) {
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a4080c21ec55..d74e6af9b53a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = wbc->bdi,
.sync_mode = wbc->sync_mode,
.older_than_this = NULL,
.nr_to_write = 64,
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
.sync_io = mode == WB_SYNC_ALL,
};
struct writeback_control wbc_writepages = {
- .bdi = inode->i_mapping->backing_dev_info,
.sync_mode = mode,
.older_than_this = NULL,
.nr_to_write = nr_pages * 2,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 79437c5eeb1e..e354c33df082 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1101,8 +1101,9 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
* important optimization for directories because holding the mutex prevents
* new operations on the dir while we write to disk.
*/
-int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
+int btrfs_sync_file(struct file *file, int datasync)
{
+ struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
@@ -1139,7 +1140,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
/*
* ok we haven't committed the transaction yet, lets do a commit
*/
- if (file && file->private_data)
+ if (file->private_data)
btrfs_ioctl_trans_end(file);
trans = btrfs_start_transaction(root, 0);
@@ -1189,14 +1190,22 @@ static const struct vm_operations_struct btrfs_file_vm_ops = {
static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
{
- vma->vm_ops = &btrfs_file_vm_ops;
+ struct address_space *mapping = filp->f_mapping;
+
+ if (!mapping->a_ops->readpage)
+ return -ENOEXEC;
+
file_accessed(filp);
+ vma->vm_ops = &btrfs_file_vm_ops;
+ vma->vm_flags |= VM_CAN_NONLINEAR;
+
return 0;
}
const struct file_operations btrfs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
.splice_read = generic_file_splice_read,
.aio_write = btrfs_file_aio_write,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index fa6ccc1bfe2a..1bff92ad4744 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2673,7 +2673,7 @@ static int check_path_shared(struct btrfs_root *root,
struct extent_buffer *eb;
int level;
int ret;
- u64 refs;
+ u64 refs = 1;
for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
if (!path->nodes[level])
@@ -6884,7 +6884,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
if (em->block_start == EXTENT_MAP_HOLE ||
(cur_offset >= inode->i_size &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
- ret = btrfs_prealloc_file_range(inode, 0, cur_offset,
+ ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
last_byte - cur_offset,
1 << inode->i_blkbits,
offset + len,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4cdb98cf26de..4dbaf89b1337 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1280,7 +1280,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- goto out;
+ goto out_up_write;
}
trans->block_rsv = &root->fs_info->global_block_rsv;
@@ -1845,7 +1845,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
dir_id, "default", 7, 1);
- if (!di) {
+ if (IS_ERR_OR_NULL(di)) {
btrfs_free_path(path);
btrfs_end_transaction(trans, root);
printk(KERN_ERR "Umm, you don't have the default dir item, "
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 05d41e569236..b37d723b9d4a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -784,16 +784,17 @@ again:
struct btrfs_extent_ref_v0 *ref0;
ref0 = btrfs_item_ptr(eb, path1->slots[0],
struct btrfs_extent_ref_v0);
- root = find_tree_root(rc, eb, ref0);
- if (!root->ref_cows)
- cur->cowonly = 1;
if (key.objectid == key.offset) {
+ root = find_tree_root(rc, eb, ref0);
if (root && !should_ignore_root(root))
cur->root = root;
else
list_add(&cur->list, &useless);
break;
}
+ if (is_cowonly_root(btrfs_ref_root_v0(eb,
+ ref0)))
+ cur->cowonly = 1;
}
#else
BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index b91ccd972644..2d958be761c8 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -330,7 +330,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{
struct btrfs_path *path;
int ret;
- u32 refs;
struct btrfs_root_item *ri;
struct extent_buffer *leaf;
@@ -344,8 +343,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
leaf = path->nodes[0];
ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item);
- refs = btrfs_disk_root_refs(leaf, ri);
- BUG_ON(refs != 0);
ret = btrfs_del_item(trans, root, path);
out:
btrfs_free_path(path);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d34b2dfc9628..f2393b390318 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -360,6 +360,8 @@ static struct dentry *get_default_root(struct super_block *sb,
*/
dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
+ if (IS_ERR(di))
+ return ERR_CAST(di);
if (!di) {
/*
* Ok the default dir item isn't there. This is weird since
@@ -390,8 +392,8 @@ setup_root:
location.offset = 0;
inode = btrfs_iget(sb, &location, new_root, &new);
- if (!inode)
- return ERR_PTR(-ENOMEM);
+ if (IS_ERR(inode))
+ return ERR_CAST(inode);
/*
* If we're just mounting the root most subvol put the inode and return
diff --git a/fs/buffer.c b/fs/buffer.c
index e8aa7081d25c..d54812b198e9 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1949,14 +1949,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
}
/*
- * block_write_begin takes care of the basic task of block allocation and
- * bringing partial write blocks uptodate first.
- *
- * If *pagep is not NULL, then block_write_begin uses the locked page
- * at *pagep rather than allocating its own. In this case, the page will
- * not be unlocked or deallocated on failure.
+ * Filesystems implementing the new truncate sequence should use the
+ * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * The filesystem needs to handle block truncation upon failure.
*/
-int block_write_begin(struct file *file, struct address_space *mapping,
+int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block)
@@ -1992,20 +1989,50 @@ int block_write_begin(struct file *file, struct address_space *mapping,
unlock_page(page);
page_cache_release(page);
*pagep = NULL;
-
- /*
- * prepare_write() may have instantiated a few blocks
- * outside i_size. Trim these off again. Don't need
- * i_size_read because we hold i_mutex.
- */
- if (pos + len > inode->i_size)
- vmtruncate(inode, inode->i_size);
}
}
out:
return status;
}
+EXPORT_SYMBOL(block_write_begin_newtrunc);
+
+/*
+ * block_write_begin takes care of the basic task of block allocation and
+ * bringing partial write blocks uptodate first.
+ *
+ * If *pagep is not NULL, then block_write_begin uses the locked page
+ * at *pagep rather than allocating its own. In this case, the page will
+ * not be unlocked or deallocated on failure.
+ */
+int block_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata,
+ get_block_t *get_block)
+{
+ int ret;
+
+ ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
+ pagep, fsdata, get_block);
+
+ /*
+ * prepare_write() may have instantiated a few blocks
+ * outside i_size. Trim these off again. Don't need
+ * i_size_read because we hold i_mutex.
+ *
+ * Filesystems which pass down their own page also cannot
+ * call into vmtruncate here because it would lead to lock
+ * inversion problems (*pagep is locked). This is a further
+ * example of where the old truncate sequence is inadequate.
+ */
+ if (unlikely(ret) && *pagep == NULL) {
+ loff_t isize = mapping->host->i_size;
+ if (pos + len > isize)
+ vmtruncate(mapping->host, isize);
+ }
+
+ return ret;
+}
EXPORT_SYMBOL(block_write_begin);
int block_write_end(struct file *file, struct address_space *mapping,
@@ -2324,7 +2351,7 @@ out:
* For moronic filesystems that do not allow holes in file.
* We may have to extend the file.
*/
-int cont_write_begin(struct file *file, struct address_space *mapping,
+int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block, loff_t *bytes)
@@ -2345,11 +2372,30 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
}
*pagep = NULL;
- err = block_write_begin(file, mapping, pos, len,
+ err = block_write_begin_newtrunc(file, mapping, pos, len,
flags, pagep, fsdata, get_block);
out:
return err;
}
+EXPORT_SYMBOL(cont_write_begin_newtrunc);
+
+int cont_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata,
+ get_block_t *get_block, loff_t *bytes)
+{
+ int ret;
+
+ ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
+ pagep, fsdata, get_block, bytes);
+ if (unlikely(ret)) {
+ loff_t isize = mapping->host->i_size;
+ if (pos + len > isize)
+ vmtruncate(mapping->host, isize);
+ }
+
+ return ret;
+}
EXPORT_SYMBOL(cont_write_begin);
int block_prepare_write(struct page *page, unsigned from, unsigned to,
@@ -2381,7 +2427,7 @@ EXPORT_SYMBOL(block_commit_write);
*
* We are not allowed to take the i_mutex here so we have to play games to
* protect against truncate races as the page could now be beyond EOF. Because
- * vmtruncate() writes the inode size before removing pages, once we have the
+ * truncate writes the inode size before removing pages, once we have the
* page lock we can determine safely if the page is beyond EOF. If it is not
* beyond EOF, then the page is guaranteed safe against truncation until we
* unlock the page.
@@ -2464,10 +2510,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
}
/*
- * On entry, the page is fully not uptodate.
- * On exit the page is fully uptodate in the areas outside (from,to)
+ * Filesystems implementing the new truncate sequence should use the
+ * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * The filesystem needs to handle block truncation upon failure.
*/
-int nobh_write_begin(struct file *file, struct address_space *mapping,
+int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block)
@@ -2500,8 +2547,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
unlock_page(page);
page_cache_release(page);
*pagep = NULL;
- return block_write_begin(file, mapping, pos, len, flags, pagep,
- fsdata, get_block);
+ return block_write_begin_newtrunc(file, mapping, pos, len,
+ flags, pagep, fsdata, get_block);
}
if (PageMappedToDisk(page))
@@ -2605,8 +2652,34 @@ out_release:
page_cache_release(page);
*pagep = NULL;
- if (pos + len > inode->i_size)
- vmtruncate(inode, inode->i_size);
+ return ret;
+}
+EXPORT_SYMBOL(nobh_write_begin_newtrunc);
+
+/*
+ * On entry, the page is fully not uptodate.
+ * On exit the page is fully uptodate in the areas outside (from,to)
+ */
+int nobh_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata,
+ get_block_t *get_block)
+{
+ int ret;
+
+ ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
+ pagep, fsdata, get_block);
+
+ /*
+ * prepare_write() may have instantiated a few blocks
+ * outside i_size. Trim these off again. Don't need
+ * i_size_read because we hold i_mutex.
+ */
+ if (unlikely(ret)) {
+ loff_t isize = mapping->host->i_size;
+ if (pos + len > isize)
+ vmtruncate(mapping->host, isize);
+ }
return ret;
}
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index 9f46de2ba7a7..89490beaf537 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -1,7 +1,6 @@
#include "ceph_debug.h"
#include <linux/module.h>
-#include <linux/slab.h>
#include <linux/err.h>
#include <linux/slab.h>
@@ -217,8 +216,8 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
if (ac->protocol != protocol) {
ret = ceph_auth_init_protocol(ac, protocol);
if (ret) {
- pr_err("error %d on auth method %s init\n",
- ret, ac->ops->name);
+ pr_err("error %d on auth protocol %d init\n",
+ ret, protocol);
goto out;
}
}
@@ -247,7 +246,7 @@ int ceph_build_auth(struct ceph_auth_client *ac,
if (!ac->protocol)
return ceph_auth_build_hello(ac, msg_buf, msg_len);
BUG_ON(!ac->ops);
- if (!ac->ops->is_authenticated(ac))
+ if (ac->ops->should_authenticate(ac))
return ceph_build_auth_request(ac, msg_buf, msg_len);
return 0;
}
diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h
index 4429a707c021..d38a2fb4a137 100644
--- a/fs/ceph/auth.h
+++ b/fs/ceph/auth.h
@@ -24,6 +24,12 @@ struct ceph_auth_client_ops {
int (*is_authenticated)(struct ceph_auth_client *ac);
/*
+ * true if we should (re)authenticate, e.g., when our tickets
+ * are getting old and crusty.
+ */
+ int (*should_authenticate)(struct ceph_auth_client *ac);
+
+ /*
* build requests and process replies during monitor
* handshake. if handle_reply returns -EAGAIN, we build
* another request.
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c
index 24407c119291..ad1dc21286c7 100644
--- a/fs/ceph/auth_none.c
+++ b/fs/ceph/auth_none.c
@@ -31,6 +31,13 @@ static int is_authenticated(struct ceph_auth_client *ac)
return !xi->starting;
}
+static int should_authenticate(struct ceph_auth_client *ac)
+{
+ struct ceph_auth_none_info *xi = ac->private;
+
+ return xi->starting;
+}
+
/*
* the generic auth code decode the global_id, and we carry no actual
* authenticate state, so nothing happens here.
@@ -98,6 +105,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = {
.reset = reset,
.destroy = destroy,
.is_authenticated = is_authenticated,
+ .should_authenticate = should_authenticate,
.handle_reply = handle_reply,
.create_authorizer = ceph_auth_none_create_authorizer,
.destroy_authorizer = ceph_auth_none_destroy_authorizer,
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index 7b206231566d..3fe49042d8ad 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -27,6 +27,17 @@ static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
return (ac->want_keys & xi->have_keys) == ac->want_keys;
}
+static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
+{
+ struct ceph_x_info *xi = ac->private;
+ int need;
+
+ ceph_x_validate_tickets(ac, &need);
+ dout("ceph_x_should_authenticate want=%d need=%d have=%d\n",
+ ac->want_keys, need, xi->have_keys);
+ return need != 0;
+}
+
static int ceph_x_encrypt_buflen(int ilen)
{
return sizeof(struct ceph_x_encrypt_header) + ilen + 16 +
@@ -482,7 +493,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
return -EAGAIN;
}
- op = le32_to_cpu(head->op);
+ op = le16_to_cpu(head->op);
result = le32_to_cpu(head->result);
dout("handle_reply op %d result %d\n", op, result);
switch (op) {
@@ -620,6 +631,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
static const struct ceph_auth_client_ops ceph_x_ops = {
.name = "x",
.is_authenticated = ceph_x_is_authenticated,
+ .should_authenticate = ceph_x_should_authenticate,
.build_request = ceph_x_build_request,
.handle_reply = ceph_x_handle_reply,
.create_authorizer = ceph_x_create_authorizer,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0dd0b81e64f7..74144d6389f0 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -244,8 +244,14 @@ static struct ceph_cap *get_cap(struct ceph_cap_reservation *ctx)
struct ceph_cap *cap = NULL;
/* temporary, until we do something about cap import/export */
- if (!ctx)
- return kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (!ctx) {
+ cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
+ if (cap) {
+ caps_use_count++;
+ caps_total_count++;
+ }
+ return cap;
+ }
spin_lock(&caps_list_lock);
dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
@@ -981,6 +987,46 @@ static int send_cap_msg(struct ceph_mds_session *session,
return 0;
}
+static void __queue_cap_release(struct ceph_mds_session *session,
+ u64 ino, u64 cap_id, u32 migrate_seq,
+ u32 issue_seq)
+{
+ struct ceph_msg *msg;
+ struct ceph_mds_cap_release *head;
+ struct ceph_mds_cap_item *item;
+
+ spin_lock(&session->s_cap_lock);
+ BUG_ON(!session->s_num_cap_releases);
+ msg = list_first_entry(&session->s_cap_releases,
+ struct ceph_msg, list_head);
+
+ dout(" adding %llx release to mds%d msg %p (%d left)\n",
+ ino, session->s_mds, msg, session->s_num_cap_releases);
+
+ BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
+ head = msg->front.iov_base;
+ head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
+ item = msg->front.iov_base + msg->front.iov_len;
+ item->ino = cpu_to_le64(ino);
+ item->cap_id = cpu_to_le64(cap_id);
+ item->migrate_seq = cpu_to_le32(migrate_seq);
+ item->seq = cpu_to_le32(issue_seq);
+
+ session->s_num_cap_releases--;
+
+ msg->front.iov_len += sizeof(*item);
+ if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
+ dout(" release msg %p full\n", msg);
+ list_move_tail(&msg->list_head, &session->s_cap_releases_done);
+ } else {
+ dout(" release msg %p at %d/%d (%d)\n", msg,
+ (int)le32_to_cpu(head->num),
+ (int)CEPH_CAPS_PER_RELEASE,
+ (int)msg->front.iov_len);
+ }
+ spin_unlock(&session->s_cap_lock);
+}
+
/*
* Queue cap releases when an inode is dropped from our cache. Since
* inode is about to be destroyed, there is no need for i_lock.
@@ -994,41 +1040,9 @@ void ceph_queue_caps_release(struct inode *inode)
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
struct ceph_mds_session *session = cap->session;
- struct ceph_msg *msg;
- struct ceph_mds_cap_release *head;
- struct ceph_mds_cap_item *item;
- spin_lock(&session->s_cap_lock);
- BUG_ON(!session->s_num_cap_releases);
- msg = list_first_entry(&session->s_cap_releases,
- struct ceph_msg, list_head);
-
- dout(" adding %p release to mds%d msg %p (%d left)\n",
- inode, session->s_mds, msg, session->s_num_cap_releases);
-
- BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE);
- head = msg->front.iov_base;
- head->num = cpu_to_le32(le32_to_cpu(head->num) + 1);
- item = msg->front.iov_base + msg->front.iov_len;
- item->ino = cpu_to_le64(ceph_ino(inode));
- item->cap_id = cpu_to_le64(cap->cap_id);
- item->migrate_seq = cpu_to_le32(cap->mseq);
- item->seq = cpu_to_le32(cap->issue_seq);
-
- session->s_num_cap_releases--;
-
- msg->front.iov_len += sizeof(*item);
- if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
- dout(" release msg %p full\n", msg);
- list_move_tail(&msg->list_head,
- &session->s_cap_releases_done);
- } else {
- dout(" release msg %p at %d/%d (%d)\n", msg,
- (int)le32_to_cpu(head->num),
- (int)CEPH_CAPS_PER_RELEASE,
- (int)msg->front.iov_len);
- }
- spin_unlock(&session->s_cap_lock);
+ __queue_cap_release(session, ceph_ino(inode), cap->cap_id,
+ cap->mseq, cap->issue_seq);
p = rb_next(p);
__ceph_remove_cap(cap);
}
@@ -1776,9 +1790,9 @@ out:
spin_unlock(&ci->i_unsafe_lock);
}
-int ceph_fsync(struct file *file, struct dentry *dentry, int datasync)
+int ceph_fsync(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
unsigned flush_tid;
int ret;
@@ -2655,7 +2669,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
struct ceph_mds_caps *h;
int mds = session->s_mds;
int op;
- u32 seq;
+ u32 seq, mseq;
struct ceph_vino vino;
u64 cap_id;
u64 size, max_size;
@@ -2675,6 +2689,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap = CEPH_NOSNAP;
cap_id = le64_to_cpu(h->cap_id);
seq = le32_to_cpu(h->seq);
+ mseq = le32_to_cpu(h->migrate_seq);
size = le64_to_cpu(h->size);
max_size = le64_to_cpu(h->max_size);
@@ -2689,6 +2704,18 @@ void ceph_handle_caps(struct ceph_mds_session *session,
vino.snap, inode);
if (!inode) {
dout(" i don't have ino %llx\n", vino.ino);
+
+ if (op == CEPH_CAP_OP_IMPORT)
+ __queue_cap_release(session, vino.ino, cap_id,
+ mseq, seq);
+
+ /*
+ * send any full release message to try to move things
+ * along for the mds (who clearly thinks we still have this
+ * cap).
+ */
+ ceph_add_cap_releases(mdsc, session, -1);
+ ceph_send_cap_releases(mdsc, session);
goto done;
}
@@ -2714,7 +2741,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
spin_lock(&inode->i_lock);
cap = __get_cap_for_mds(ceph_inode(inode), mds);
if (!cap) {
- dout("no cap on %p ino %llx.%llx from mds%d, releasing\n",
+ dout(" no cap on %p ino %llx.%llx from mds%d\n",
inode, ceph_ino(inode), ceph_snap(inode), mds);
spin_unlock(&inode->i_lock);
goto done;
@@ -2865,18 +2892,19 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_cap *cap;
struct ceph_mds_request_release *rel = *p;
+ int used, dirty;
int ret = 0;
- int used = 0;
spin_lock(&inode->i_lock);
used = __ceph_caps_used(ci);
+ dirty = __ceph_caps_dirty(ci);
- dout("encode_inode_release %p mds%d used %s drop %s unless %s\n", inode,
- mds, ceph_cap_string(used), ceph_cap_string(drop),
+ dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
+ inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
ceph_cap_string(unless));
- /* only drop unused caps */
- drop &= ~used;
+ /* only drop unused, clean caps */
+ drop &= ~(used | dirty);
cap = __get_cap_for_mds(ci, mds);
if (cap && __cap_is_valid(cap)) {
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h
index 3b9eeed097b3..2fa992eaf7da 100644
--- a/fs/ceph/ceph_fs.h
+++ b/fs/ceph/ceph_fs.h
@@ -265,16 +265,17 @@ extern const char *ceph_mds_state_name(int s);
* - they also define the lock ordering by the MDS
* - a few of these are internal to the mds
*/
-#define CEPH_LOCK_DN 1
-#define CEPH_LOCK_ISNAP 2
-#define CEPH_LOCK_IVERSION 4 /* mds internal */
-#define CEPH_LOCK_IFILE 8 /* mds internal */
-#define CEPH_LOCK_IAUTH 32
-#define CEPH_LOCK_ILINK 64
-#define CEPH_LOCK_IDFT 128 /* dir frag tree */
-#define CEPH_LOCK_INEST 256 /* mds internal */
-#define CEPH_LOCK_IXATTR 512
-#define CEPH_LOCK_INO 2048 /* immutable inode bits; not a lock */
+#define CEPH_LOCK_DVERSION 1
+#define CEPH_LOCK_DN 2
+#define CEPH_LOCK_ISNAP 16
+#define CEPH_LOCK_IVERSION 32 /* mds internal */
+#define CEPH_LOCK_IFILE 64
+#define CEPH_LOCK_IAUTH 128
+#define CEPH_LOCK_ILINK 256
+#define CEPH_LOCK_IDFT 512 /* dir frag tree */
+#define CEPH_LOCK_INEST 1024 /* mds internal */
+#define CEPH_LOCK_IXATTR 2048
+#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */
/* client_session ops */
enum {
diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c
index 9ba54efb6543..a4eec133258e 100644
--- a/fs/ceph/crush/mapper.c
+++ b/fs/ceph/crush/mapper.c
@@ -238,7 +238,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
- dprintk("choose %d x=%d r=%d\n", in->id, x, r);
+ dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
switch (in->alg) {
case CRUSH_BUCKET_UNIFORM:
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
@@ -264,7 +264,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
*/
static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
{
- if (weight[item] >= 0x1000)
+ if (weight[item] >= 0x10000)
return 0;
if (weight[item] == 0)
return 1;
@@ -305,7 +305,9 @@ static int crush_choose(struct crush_map *map,
int itemtype;
int collide, reject;
const int orig_tries = 5; /* attempts before we fall back to search */
- dprintk("choose bucket %d x %d outpos %d\n", bucket->id, x, outpos);
+
+ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
+ bucket->id, x, outpos, numrep);
for (rep = outpos; rep < numrep; rep++) {
/* keep trying until we get a non-out, non-colliding item */
@@ -366,6 +368,7 @@ static int crush_choose(struct crush_map *map,
BUG_ON(item >= 0 ||
(-1-item) >= map->max_buckets);
in = map->buckets[-1-item];
+ retry_bucket = 1;
continue;
}
@@ -377,15 +380,25 @@ static int crush_choose(struct crush_map *map,
}
}
- if (recurse_to_leaf &&
- item < 0 &&
- crush_choose(map, map->buckets[-1-item],
- weight,
- x, outpos+1, 0,
- out2, outpos,
- firstn, 0, NULL) <= outpos) {
- reject = 1;
- } else {
+ reject = 0;
+ if (recurse_to_leaf) {
+ if (item < 0) {
+ if (crush_choose(map,
+ map->buckets[-1-item],
+ weight,
+ x, outpos+1, 0,
+ out2, outpos,
+ firstn, 0,
+ NULL) <= outpos)
+ /* didn't get leaf */
+ reject = 1;
+ } else {
+ /* we already have a leaf! */
+ out2[outpos] = item;
+ }
+ }
+
+ if (!reject) {
/* out? */
if (itemtype == 0)
reject = is_out(map, weight,
@@ -424,12 +437,12 @@ reject:
continue;
}
- dprintk("choose got %d\n", item);
+ dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
}
- dprintk("choose returns %d\n", outpos);
+ dprintk("CHOOSE returns %d\n", outpos);
return outpos;
}
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 3be33fb066cc..f2f5332ddbba 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -261,7 +261,7 @@ static int osdc_show(struct seq_file *s, void *pp)
static int caps_show(struct seq_file *s, void *p)
{
- struct ceph_client *client = p;
+ struct ceph_client *client = s->private;
int total, avail, used, reserved, min;
ceph_reservation_status(client, &total, &avail, &used, &reserved, &min);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 4fd30900eff7..f85719310db2 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -587,7 +587,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
if (IS_ERR(req))
- return ERR_PTR(PTR_ERR(req));
+ return ERR_CAST(req);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
/* we only need inode linkage */
@@ -1107,10 +1107,9 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
* an fsync() on a dir will wait for any uncommitted directory
* operations to commit.
*/
-static int ceph_dir_fsync(struct file *file, struct dentry *dentry,
- int datasync)
+static int ceph_dir_fsync(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_path.dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct list_head *head = &ci->i_unsafe_dirops;
struct ceph_mds_request *req;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 17447644d675..4480cb1c63e7 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -133,7 +133,7 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH,
USE_ANY_MDS);
if (IS_ERR(req))
- return ERR_PTR(PTR_ERR(req));
+ return ERR_CAST(req);
req->r_ino1 = vino;
req->r_ino2.ino = cfh->parent_ino;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6512b6701b9e..6251a1574b94 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -230,7 +230,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
/* do the open */
req = prepare_open_request(dir->i_sb, flags, mode);
if (IS_ERR(req))
- return ERR_PTR(PTR_ERR(req));
+ return ERR_CAST(req);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
if (flags & O_CREAT) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index a81b8b662c7b..8f9b9fe8ef9f 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -69,7 +69,7 @@ struct inode *ceph_get_snapdir(struct inode *parent)
BUG_ON(!S_ISDIR(parent->i_mode));
if (IS_ERR(inode))
- return ERR_PTR(PTR_ERR(inode));
+ return inode;
inode->i_mode = parent->i_mode;
inode->i_uid = parent->i_uid;
inode->i_gid = parent->i_gid;
@@ -827,7 +827,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
spin_lock(&dcache_lock);
spin_lock(&dn->d_lock);
- list_move_tail(&dir->d_subdirs, &dn->d_u.d_child);
+ list_move(&dn->d_u.d_child, &dir->d_subdirs);
dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
dn->d_u.d_child.prev, dn->d_u.d_child.next);
spin_unlock(&dn->d_lock);
@@ -854,8 +854,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
d_drop(dn);
realdn = d_materialise_unique(dn, in);
if (IS_ERR(realdn)) {
- pr_err("splice_dentry error %p inode %p ino %llx.%llx\n",
- dn, in, ceph_vinop(in));
+ pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
+ PTR_ERR(realdn), dn, in, ceph_vinop(in));
if (prehash)
*prehash = false; /* don't rehash on error */
dn = realdn; /* note realdn contains the error */
@@ -1234,18 +1234,23 @@ retry_lookup:
goto out;
}
dn = splice_dentry(dn, in, NULL);
+ if (IS_ERR(dn))
+ dn = NULL;
}
if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation) < 0) {
pr_err("fill_inode badness on %p\n", in);
- dput(dn);
- continue;
+ goto next_item;
}
- update_dentry_lease(dn, rinfo->dir_dlease[i],
- req->r_session, req->r_request_started);
- dput(dn);
+ if (dn)
+ update_dentry_lease(dn, rinfo->dir_dlease[i],
+ req->r_session,
+ req->r_request_started);
+next_item:
+ if (dn)
+ dput(dn);
}
req->r_did_prepopulate = true;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 885aa5710cfd..3ab79f6c4ce8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1066,9 +1066,9 @@ static int trim_caps(struct ceph_mds_client *mdsc,
*
* Called under s_mutex.
*/
-static int add_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session,
- int extra)
+int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ int extra)
{
struct ceph_msg *msg;
struct ceph_mds_cap_release *head;
@@ -1176,8 +1176,8 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
/*
* called under s_mutex
*/
-static void send_cap_releases(struct ceph_mds_client *mdsc,
- struct ceph_mds_session *session)
+void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
{
struct ceph_msg *msg;
@@ -1768,12 +1768,12 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
mutex_unlock(&mdsc->mutex);
dout("do_request waiting\n");
if (req->r_timeout) {
- err = (long)wait_for_completion_interruptible_timeout(
+ err = (long)wait_for_completion_killable_timeout(
&req->r_completion, req->r_timeout);
if (err == 0)
err = -EIO;
} else {
- err = wait_for_completion_interruptible(&req->r_completion);
+ err = wait_for_completion_killable(&req->r_completion);
}
dout("do_request waited, got %d\n", err);
mutex_lock(&mdsc->mutex);
@@ -1980,7 +1980,7 @@ out_err:
}
mutex_unlock(&mdsc->mutex);
- add_cap_releases(mdsc, req->r_session, -1);
+ ceph_add_cap_releases(mdsc, req->r_session, -1);
mutex_unlock(&session->s_mutex);
/* kick calling process */
@@ -2014,16 +2014,21 @@ static void handle_forward(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
req = __lookup_request(mdsc, tid);
if (!req) {
- dout("forward %llu to mds%d - req dne\n", tid, next_mds);
+ dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
goto out; /* dup reply? */
}
- if (fwd_seq <= req->r_num_fwd) {
- dout("forward %llu to mds%d - old seq %d <= %d\n",
+ if (req->r_aborted) {
+ dout("forward tid %llu aborted, unregistering\n", tid);
+ __unregister_request(mdsc, req);
+ } else if (fwd_seq <= req->r_num_fwd) {
+ dout("forward tid %llu to mds%d - old seq %d <= %d\n",
tid, next_mds, req->r_num_fwd, fwd_seq);
} else {
/* resend. forward race not possible; mds would drop */
- dout("forward %llu to mds%d (we resend)\n", tid, next_mds);
+ dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
+ BUG_ON(req->r_err);
+ BUG_ON(req->r_got_result);
req->r_num_fwd = fwd_seq;
req->r_resend_mds = next_mds;
put_request_session(req);
@@ -2428,6 +2433,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
struct ceph_dentry_info *di;
int mds = session->s_mds;
struct ceph_mds_lease *h = msg->front.iov_base;
+ u32 seq;
struct ceph_vino vino;
int mask;
struct qstr dname;
@@ -2441,6 +2447,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
mask = le16_to_cpu(h->mask);
+ seq = le32_to_cpu(h->seq);
dname.name = (void *)h + sizeof(*h) + sizeof(u32);
dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
if (dname.len != get_unaligned_le32(h+1))
@@ -2451,8 +2458,9 @@ static void handle_lease(struct ceph_mds_client *mdsc,
/* lookup inode */
inode = ceph_find_inode(sb, vino);
- dout("handle_lease '%s', mask %d, ino %llx %p\n",
- ceph_lease_op_name(h->action), mask, vino.ino, inode);
+ dout("handle_lease %s, mask %d, ino %llx %p %.*s\n",
+ ceph_lease_op_name(h->action), mask, vino.ino, inode,
+ dname.len, dname.name);
if (inode == NULL) {
dout("handle_lease no inode %llx\n", vino.ino);
goto release;
@@ -2477,7 +2485,8 @@ static void handle_lease(struct ceph_mds_client *mdsc,
switch (h->action) {
case CEPH_MDS_LEASE_REVOKE:
if (di && di->lease_session == session) {
- h->seq = cpu_to_le32(di->lease_seq);
+ if (ceph_seq_cmp(di->lease_seq, seq) > 0)
+ h->seq = cpu_to_le32(di->lease_seq);
__ceph_mdsc_drop_dentry_lease(dentry);
}
release = 1;
@@ -2491,7 +2500,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
unsigned long duration =
le32_to_cpu(h->duration_ms) * HZ / 1000;
- di->lease_seq = le32_to_cpu(h->seq);
+ di->lease_seq = seq;
dentry->d_time = di->lease_renew_from + duration;
di->lease_renew_after = di->lease_renew_from +
(duration >> 1);
@@ -2541,7 +2550,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
return;
lease = msg->front.iov_base;
lease->action = action;
- lease->mask = cpu_to_le16(CEPH_LOCK_DN);
+ lease->mask = cpu_to_le16(1);
lease->ino = cpu_to_le64(ceph_vino(inode).ino);
lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap);
lease->seq = cpu_to_le32(seq);
@@ -2571,7 +2580,7 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
BUG_ON(inode == NULL);
BUG_ON(dentry == NULL);
- BUG_ON(mask != CEPH_LOCK_DN);
+ BUG_ON(mask == 0);
/* is dentry lease valid? */
spin_lock(&dentry->d_lock);
@@ -2681,10 +2690,10 @@ static void delayed_work(struct work_struct *work)
send_renew_caps(mdsc, s);
else
ceph_con_keepalive(&s->s_con);
- add_cap_releases(mdsc, s, -1);
+ ceph_add_cap_releases(mdsc, s, -1);
if (s->s_state == CEPH_MDS_SESSION_OPEN ||
s->s_state == CEPH_MDS_SESSION_HUNG)
- send_cap_releases(mdsc, s);
+ ceph_send_cap_releases(mdsc, s);
mutex_unlock(&s->s_mutex);
ceph_put_mds_session(s);
@@ -2774,6 +2783,12 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
drop_leases(mdsc);
ceph_flush_dirty_caps(mdsc);
wait_requests(mdsc);
+
+ /*
+ * wait for reply handlers to drop their request refs and
+ * their inode/dcache refs
+ */
+ ceph_msgr_flush();
}
/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d9936c4f1212..b292fa42a66d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -322,6 +322,12 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req)
kref_put(&req->r_kref, ceph_mdsc_release_request);
}
+extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session,
+ int extra);
+extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 60b74839ebec..9ad43a310a41 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -120,6 +120,12 @@ void ceph_msgr_exit(void)
destroy_workqueue(ceph_msgr_wq);
}
+void ceph_msgr_flush()
+{
+ flush_workqueue(ceph_msgr_wq);
+}
+
+
/*
* socket callback functions
*/
@@ -651,7 +657,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
con->connect_seq, global_seq, proto);
- con->out_connect.features = CEPH_FEATURE_SUPPORTED_CLIENT;
+ con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED_CLIENT);
con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
con->out_connect.global_seq = cpu_to_le32(global_seq);
@@ -1390,10 +1396,12 @@ static int read_partial_message(struct ceph_connection *con)
if (!con->in_msg) {
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
con->in_hdr.front_len, con->in_hdr.data_len);
+ skip = 0;
con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
if (skip) {
/* skip this message */
dout("alloc_msg said skip message\n");
+ BUG_ON(con->in_msg);
con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index 00a9430b1ffc..76fbc957bc13 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -213,6 +213,7 @@ extern int ceph_parse_ips(const char *c, const char *end,
extern int ceph_msgr_init(void);
extern void ceph_msgr_exit(void);
+extern void ceph_msgr_flush(void);
extern struct ceph_messenger *ceph_messenger_create(
struct ceph_entity_addr *myaddr);
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c
index f6510a476e7e..cc115eafae11 100644
--- a/fs/ceph/mon_client.c
+++ b/fs/ceph/mon_client.c
@@ -400,6 +400,8 @@ static void release_generic_request(struct kref *kref)
ceph_msg_put(req->reply);
if (req->request)
ceph_msg_put(req->request);
+
+ kfree(req);
}
static void put_generic_request(struct ceph_mon_generic_request *req)
@@ -704,8 +706,11 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
struct ceph_msg *msg)
{
int ret;
+ int was_auth = 0;
mutex_lock(&monc->mutex);
+ if (monc->auth->ops)
+ was_auth = monc->auth->ops->is_authenticated(monc->auth);
monc->pending_auth = 0;
ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
msg->front.iov_len,
@@ -716,11 +721,12 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
wake_up(&monc->client->auth_wq);
} else if (ret > 0) {
__send_prepared_auth_request(monc, ret);
- } else if (monc->auth->ops->is_authenticated(monc->auth)) {
+ } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
- monc->client->msgr->inst.name.num = monc->auth->global_id;
+ monc->client->msgr->inst.name.num =
+ cpu_to_le64(monc->auth->global_id);
__send_subscribe(monc);
__resend_generic_request(monc);
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index afa7bb3895c4..92b7251a53f1 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -361,8 +361,13 @@ static void put_osd(struct ceph_osd *osd)
{
dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
atomic_read(&osd->o_ref) - 1);
- if (atomic_dec_and_test(&osd->o_ref))
+ if (atomic_dec_and_test(&osd->o_ref)) {
+ struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
+
+ if (osd->o_authorizer)
+ ac->ops->destroy_authorizer(ac, osd->o_authorizer);
kfree(osd);
+ }
}
/*
@@ -1339,7 +1344,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
int type = le16_to_cpu(msg->hdr.type);
if (!osd)
- return;
+ goto out;
osdc = osd->o_osdc;
switch (type) {
@@ -1354,6 +1359,7 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
pr_err("received unknown message type %d %s\n", type,
ceph_msg_type_name(type));
}
+out:
ceph_msg_put(msg);
}
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index cfdd8f4388b7..50ce64ebd330 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -706,7 +706,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
len, *p, end);
newcrush = crush_decode(*p, min(*p+len, end));
if (IS_ERR(newcrush))
- return ERR_PTR(PTR_ERR(newcrush));
+ return ERR_CAST(newcrush);
+ *p += len;
}
/* new flags? */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 7c663d9b9f81..fa87f51e38e1 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -89,7 +89,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_files = le64_to_cpu(st.num_objects);
buf->f_ffree = -1;
- buf->f_namelen = PATH_MAX;
+ buf->f_namelen = NAME_MAX;
buf->f_frsize = PAGE_CACHE_SIZE;
/* leave fsid little-endian, regardless of host endianness */
@@ -669,9 +669,17 @@ static void ceph_destroy_client(struct ceph_client *client)
/* unmount */
ceph_mdsc_stop(&client->mdsc);
- ceph_monc_stop(&client->monc);
ceph_osdc_stop(&client->osdc);
+ /*
+ * make sure mds and osd connections close out before destroying
+ * the auth module, which is needed to free those connections'
+ * ceph_authorizers.
+ */
+ ceph_msgr_flush();
+
+ ceph_monc_stop(&client->monc);
+
ceph_adjust_min_caps(-client->min_caps);
ceph_debugfs_client_cleanup(client);
@@ -738,7 +746,7 @@ static struct dentry *open_root_dentry(struct ceph_client *client,
dout("open_root_inode opening '%s'\n", path);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
if (IS_ERR(req))
- return ERR_PTR(PTR_ERR(req));
+ return ERR_CAST(req);
req->r_path1 = kstrdup(path, GFP_NOFS);
req->r_ino1.ino = CEPH_INO_ROOT;
req->r_ino1.snap = CEPH_NOSNAP;
@@ -918,7 +926,7 @@ static int ceph_compare_super(struct super_block *sb, void *data)
/*
* construct our own bdi so we can control readahead, etc.
*/
-static atomic_long_t bdi_seq = ATOMIC_INIT(0);
+static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
{
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 3725c9ee9d08..10a4a406e887 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -10,7 +10,6 @@
#include <linux/fs.h>
#include <linux/mempool.h>
#include <linux/pagemap.h>
-#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/writeback.h>
#include <linux/slab.h>
@@ -811,7 +810,7 @@ extern void ceph_put_cap(struct ceph_cap *cap);
extern void ceph_queue_caps_release(struct inode *inode);
extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
-extern int ceph_fsync(struct file *file, struct dentry *dentry, int datasync);
+extern int ceph_fsync(struct file *file, int datasync);
extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session);
extern int ceph_get_cap_mds(struct inode *inode);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 78c02eb4cb1f..484e52bb40bb 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -473,14 +473,24 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
+void cifs_drop_inode(struct inode *inode)
+{
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
+ return generic_drop_inode(inode);
+
+ return generic_delete_inode(inode);
+}
+
static const struct super_operations cifs_super_ops = {
.put_super = cifs_put_super,
.statfs = cifs_statfs,
.alloc_inode = cifs_alloc_inode,
.destroy_inode = cifs_destroy_inode,
-/* .drop_inode = generic_delete_inode,
- .delete_inode = cifs_delete_inode, */ /* Do not need above two
- functions unless later we add lazy close of inodes or unless the
+ .drop_inode = cifs_drop_inode,
+/* .delete_inode = cifs_delete_inode, */ /* Do not need above
+ function unless later we add lazy close of inodes or unless the
kernel forgets to call us with the same number of releases (closes)
as opens */
.show_options = cifs_show_options,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 0242ff9cbf41..a7eb65c84b1c 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -84,7 +84,7 @@ extern ssize_t cifs_user_read(struct file *file, char __user *read_data,
extern ssize_t cifs_user_write(struct file *file, const char __user *write_data,
size_t write_size, loff_t *poffset);
extern int cifs_lock(struct file *, int, struct file_lock *);
-extern int cifs_fsync(struct file *, struct dentry *, int);
+extern int cifs_fsync(struct file *, int);
extern int cifs_flush(struct file *, fl_owner_t id);
extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
extern const struct file_operations cifs_dir_ops;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fb1657e0fdb8..fb6318b81509 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -106,7 +106,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
__u16 fileHandle, struct file *file,
struct vfsmount *mnt, unsigned int oflags);
extern int cifs_posix_open(char *full_path, struct inode **pinode,
- struct vfsmount *mnt,
struct super_block *sb,
int mode, int oflags,
__u32 *poplock, __u16 *pnetfid, int xid);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 391816b461ca..e7ae78b66fa1 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -25,6 +25,7 @@
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/mount.h>
+#include <linux/file.h>
#include "cifsfs.h"
#include "cifspdu.h"
#include "cifsglob.h"
@@ -184,12 +185,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
}
write_unlock(&GlobalSMBSeslock);
+ file->private_data = pCifsFile;
+
return pCifsFile;
}
int cifs_posix_open(char *full_path, struct inode **pinode,
- struct vfsmount *mnt, struct super_block *sb,
- int mode, int oflags,
+ struct super_block *sb, int mode, int oflags,
__u32 *poplock, __u16 *pnetfid, int xid)
{
int rc;
@@ -258,19 +260,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
cifs_fattr_to_inode(*pinode, &fattr);
}
- /*
- * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
- * file->private_data.
- */
- if (mnt) {
- struct cifsFileInfo *pfile_info;
-
- pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
- oflags);
- if (pfile_info == NULL)
- rc = -ENOMEM;
- }
-
posix_open_ret:
kfree(presp_data);
return rc;
@@ -298,7 +287,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
int create_options = CREATE_NOT_DIR;
__u32 oplock = 0;
int oflags;
- bool posix_create = false;
/*
* BB below access is probably too much for mknod to request
* but we have to do query and setpathinfo so requesting
@@ -339,7 +327,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
rc = cifs_posix_open(full_path, &newinode,
- nd ? nd->path.mnt : NULL,
inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
/* EIO could indicate that (posix open) operation is not
supported, despite what server claimed in capability
@@ -347,7 +334,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
handled in posix open */
if (rc == 0) {
- posix_create = true;
if (newinode == NULL) /* query inode info */
goto cifs_create_get_file_info;
else /* success, no need to query */
@@ -478,21 +464,28 @@ cifs_create_set_dentry:
else
cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
- /* nfsd case - nfs srv does not set nd */
- if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
- /* mknod case - do not leave file open */
- CIFSSMBClose(xid, tcon, fileHandle);
- } else if (!(posix_create) && (newinode)) {
+ if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
struct cifsFileInfo *pfile_info;
- /*
- * cifs_fill_filedata() takes care of setting cifsFileInfo
- * pointer to file->private_data.
- */
- pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL,
+ struct file *filp;
+
+ filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ CIFSSMBClose(xid, tcon, fileHandle);
+ goto cifs_create_out;
+ }
+
+ pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp,
nd->path.mnt, oflags);
- if (pfile_info == NULL)
+ if (pfile_info == NULL) {
+ fput(filp);
+ CIFSSMBClose(xid, tcon, fileHandle);
rc = -ENOMEM;
+ }
+ } else {
+ CIFSSMBClose(xid, tcon, fileHandle);
}
+
cifs_create_out:
kfree(buf);
kfree(full_path);
@@ -636,6 +629,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
bool posix_open = false;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *pTcon;
+ struct cifsFileInfo *cfile;
struct inode *newInode = NULL;
char *full_path = NULL;
struct file *filp;
@@ -703,7 +697,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
(nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
(nd->intent.open.flags & O_CREAT)) {
- rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
+ rc = cifs_posix_open(full_path, &newInode,
parent_dir_inode->i_sb,
nd->intent.open.create_mode,
nd->intent.open.flags, &oplock,
@@ -733,8 +727,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
else
direntry->d_op = &cifs_dentry_ops;
d_add(direntry, newInode);
- if (posix_open)
- filp = lookup_instantiate_filp(nd, direntry, NULL);
+ if (posix_open) {
+ filp = lookup_instantiate_filp(nd, direntry,
+ generic_file_open);
+ if (IS_ERR(filp)) {
+ rc = PTR_ERR(filp);
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ goto lookup_out;
+ }
+
+ cfile = cifs_new_fileinfo(newInode, fileHandle, filp,
+ nd->path.mnt,
+ nd->intent.open.flags);
+ if (cfile == NULL) {
+ fput(filp);
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ rc = -ENOMEM;
+ goto lookup_out;
+ }
+ }
/* since paths are not looked up by component - the parent
directories are presumed to be good here */
renew_parental_timestamps(direntry);
@@ -755,6 +766,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
is a common return code */
}
+lookup_out:
kfree(full_path);
FreeXid(xid);
return ERR_PTR(rc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a83541ec9713..409e4f523e61 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -162,44 +162,12 @@ psx_client_can_cache:
return 0;
}
-static struct cifsFileInfo *
-cifs_fill_filedata(struct file *file)
-{
- struct list_head *tmp;
- struct cifsFileInfo *pCifsFile = NULL;
- struct cifsInodeInfo *pCifsInode = NULL;
-
- /* search inode for this file and fill in file->private_data */
- pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
- read_lock(&GlobalSMBSeslock);
- list_for_each(tmp, &pCifsInode->openFileList) {
- pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
- if ((pCifsFile->pfile == NULL) &&
- (pCifsFile->pid == current->tgid)) {
- /* mode set in cifs_create */
-
- /* needed for writepage */
- pCifsFile->pfile = file;
- file->private_data = pCifsFile;
- break;
- }
- }
- read_unlock(&GlobalSMBSeslock);
-
- if (file->private_data != NULL) {
- return pCifsFile;
- } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
- cERROR(1, "could not find file instance for "
- "new file %p", file);
- return NULL;
-}
-
/* all arguments to this function must be checked for validity in caller */
-static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
- struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
+static inline int cifs_open_inode_helper(struct inode *inode,
struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
char *full_path, int xid)
{
+ struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
struct timespec temp;
int rc;
@@ -213,36 +181,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
/* if not oplocked, invalidate inode pages if mtime or file
size changed */
temp = cifs_NTtimeToUnix(buf->LastWriteTime);
- if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
- (file->f_path.dentry->d_inode->i_size ==
+ if (timespec_equal(&inode->i_mtime, &temp) &&
+ (inode->i_size ==
(loff_t)le64_to_cpu(buf->EndOfFile))) {
cFYI(1, "inode unchanged on server");
} else {
- if (file->f_path.dentry->d_inode->i_mapping) {
+ if (inode->i_mapping) {
/* BB no need to lock inode until after invalidate
since namei code should already have it locked? */
- rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
+ rc = filemap_write_and_wait(inode->i_mapping);
if (rc != 0)
- CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
+ pCifsInode->write_behind_rc = rc;
}
cFYI(1, "invalidating remote inode since open detected it "
"changed");
- invalidate_remote_inode(file->f_path.dentry->d_inode);
+ invalidate_remote_inode(inode);
}
client_can_cache:
if (pTcon->unix_ext)
- rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
- full_path, inode->i_sb, xid);
+ rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
+ xid);
else
- rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
- full_path, buf, inode->i_sb, xid, NULL);
+ rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
+ xid, NULL);
if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
pCifsInode->clientCanCacheAll = true;
pCifsInode->clientCanCacheRead = true;
- cFYI(1, "Exclusive Oplock granted on inode %p",
- file->f_path.dentry->d_inode);
+ cFYI(1, "Exclusive Oplock granted on inode %p", inode);
} else if ((*oplock & 0xF) == OPLOCK_READ)
pCifsInode->clientCanCacheRead = true;
@@ -256,7 +223,7 @@ int cifs_open(struct inode *inode, struct file *file)
__u32 oplock;
struct cifs_sb_info *cifs_sb;
struct cifsTconInfo *tcon;
- struct cifsFileInfo *pCifsFile;
+ struct cifsFileInfo *pCifsFile = NULL;
struct cifsInodeInfo *pCifsInode;
char *full_path = NULL;
int desiredAccess;
@@ -270,12 +237,6 @@ int cifs_open(struct inode *inode, struct file *file)
tcon = cifs_sb->tcon;
pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
- pCifsFile = cifs_fill_filedata(file);
- if (pCifsFile) {
- rc = 0;
- FreeXid(xid);
- return rc;
- }
full_path = build_path_from_dentry(file->f_path.dentry);
if (full_path == NULL) {
@@ -299,8 +260,7 @@ int cifs_open(struct inode *inode, struct file *file)
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
oflags |= SMB_O_CREAT;
/* can not refresh inode info since size could be stale */
- rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
- inode->i_sb,
+ rc = cifs_posix_open(full_path, &inode, inode->i_sb,
cifs_sb->mnt_file_mode /* ignored */,
oflags, &oplock, &netfid, xid);
if (rc == 0) {
@@ -308,9 +268,20 @@ int cifs_open(struct inode *inode, struct file *file)
/* no need for special case handling of setting mode
on read only files needed here */
- pCifsFile = cifs_fill_filedata(file);
- cifs_posix_open_inode_helper(inode, file, pCifsInode,
- oplock, netfid);
+ rc = cifs_posix_open_inode_helper(inode, file,
+ pCifsInode, oplock, netfid);
+ if (rc != 0) {
+ CIFSSMBClose(xid, tcon, netfid);
+ goto out;
+ }
+
+ pCifsFile = cifs_new_fileinfo(inode, netfid, file,
+ file->f_path.mnt,
+ oflags);
+ if (pCifsFile == NULL) {
+ CIFSSMBClose(xid, tcon, netfid);
+ rc = -ENOMEM;
+ }
goto out;
} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
if (tcon->ses->serverNOS)
@@ -391,17 +362,17 @@ int cifs_open(struct inode *inode, struct file *file)
goto out;
}
+ rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
+ if (rc != 0)
+ goto out;
+
pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
file->f_flags);
- file->private_data = pCifsFile;
- if (file->private_data == NULL) {
+ if (pCifsFile == NULL) {
rc = -ENOMEM;
goto out;
}
- rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon,
- &oplock, buf, full_path, xid);
-
if (oplock & CIFS_CREATE_ACTION) {
/* time to set mode which we can not set earlier due to
problems creating new read-only files */
@@ -513,8 +484,7 @@ reopen_error_exit:
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
/* can not refresh inode info since size could be stale */
- rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
- inode->i_sb,
+ rc = cifs_posix_open(full_path, NULL, inode->i_sb,
cifs_sb->mnt_file_mode /* ignored */,
oflags, &oplock, &netfid, xid);
if (rc == 0) {
@@ -1676,7 +1646,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
return rc;
}
-int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int cifs_fsync(struct file *file, int datasync)
{
int xid;
int rc = 0;
@@ -1688,7 +1658,7 @@ int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
xid = GetXid();
cFYI(1, "Sync file - name: %s datasync: 0x%x",
- dentry->d_name.name, datasync);
+ file->f_path.dentry->d_name.name, datasync);
rc = filemap_write_and_wait(inode->i_mapping);
if (rc == 0) {
@@ -1952,6 +1922,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping,
bytes_read -= PAGE_CACHE_SIZE;
continue;
}
+ page_cache_release(page);
target = kmap_atomic(page, KM_USER0);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 62b324f26a56..6f0683c68952 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1401,6 +1401,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
if (rc == 0 || rc != -ETXTBSY)
return rc;
+ /* open-file renames don't work across directories */
+ if (to_dentry->d_parent != from_dentry->d_parent)
+ return rc;
+
/* open the file to be renamed -- we need DELETE perms */
rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
CREATE_NOT_DIR, &srcfid, &oplock, NULL,
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7707389bdf2c..0a57cb7db5dd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate:
/* calculate session key */
setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
- if (first_time) /* should this be moved into common code
- with similar ntlmv2 path? */
- /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
- response BB FIXME, v2_sess_key); */
-
- /* copy session key */
-
- /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
- bcc_ptr += LM2_SESS_KEY_SIZE; */
+ /* FIXME: calculate MAC key */
memcpy(bcc_ptr, (char *)v2_sess_key,
sizeof(struct ntlmv2_resp));
bcc_ptr += sizeof(struct ntlmv2_resp);
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h
index d99860a33890..6b443ff43a19 100644
--- a/fs/coda/coda_int.h
+++ b/fs/coda/coda_int.h
@@ -11,8 +11,7 @@ extern int coda_fake_statfs;
void coda_destroy_inodecache(void);
int coda_init_inodecache(void);
-int coda_fsync(struct file *coda_file, struct dentry *coda_dentry,
- int datasync);
+int coda_fsync(struct file *coda_file, int datasync);
void coda_sysctl_init(void);
void coda_sysctl_clean(void);
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 7196077b1688..ad3cd2abeeb4 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -202,10 +202,10 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
return 0;
}
-int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
+int coda_fsync(struct file *coda_file, int datasync)
{
struct file *host_file;
- struct inode *coda_inode = coda_dentry->d_inode;
+ struct inode *coda_inode = coda_file->f_path.dentry->d_inode;
struct coda_file_info *cfi;
int err = 0;
diff --git a/fs/compat.c b/fs/compat.c
index f0b391c50552..6490d2134ff3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -626,7 +626,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
tot_len += len;
if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
goto out;
- if (!access_ok(vrfy_dir(type), buf, len)) {
+ if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
ret = -EFAULT;
goto out;
}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index c8af2d91174b..cf78d44a8d6a 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -73,15 +73,6 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
return -EINVAL;
sd_iattr = sd->s_iattr;
-
- error = inode_change_ok(inode, iattr);
- if (error)
- return error;
-
- error = inode_setattr(inode, iattr);
- if (error)
- return error;
-
if (!sd_iattr) {
/* setting attributes for the first time, allocate now */
sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL);
@@ -94,9 +85,12 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
sd->s_iattr = sd_iattr;
}
-
/* attributes were changed atleast once in past */
+ error = simple_setattr(dentry, iattr);
+ if (error)
+ return error;
+
if (ia_valid & ATTR_UID)
sd_iattr->ia_uid = iattr->ia_uid;
if (ia_valid & ATTR_GID)
diff --git a/fs/dcache.c b/fs/dcache.c
index d96047b4a633..c8c78ba07827 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -590,6 +590,8 @@ static void prune_dcache(int count)
up_read(&sb->s_umount);
}
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
count -= pruned;
__put_super(sb);
/* more work left to do? */
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 4d74fc72c195..0210898458b2 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -277,8 +277,10 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n"
DEFINE_SIMPLE_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n");
DEFINE_SIMPLE_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n");
+
/*
- * debugfs_create_x{8,16,32} - create a debugfs file that is used to read and write an unsigned {8,16,32}-bit value
+ * debugfs_create_x{8,16,32,64} - create a debugfs file that is used to read and write an unsigned {8,16,32,64}-bit value
*
* These functions are exactly the same as the above functions (but use a hex
* output for the decimal challenged). For details look at the above unsigned
@@ -357,6 +359,23 @@ struct dentry *debugfs_create_x32(const char *name, mode_t mode,
}
EXPORT_SYMBOL_GPL(debugfs_create_x32);
+/**
+ * debugfs_create_x64 - create a debugfs file that is used to read and write an unsigned 64-bit value
+ * @name: a pointer to a string containing the name of the file to create.
+ * @mode: the permission that the file should have
+ * @parent: a pointer to the parent dentry for this file. This should be a
+ * directory dentry if set. If this parameter is %NULL, then the
+ * file will be created in the root of the debugfs filesystem.
+ * @value: a pointer to the variable that the file should read to and write
+ * from.
+ */
+struct dentry *debugfs_create_x64(const char *name, mode_t mode,
+ struct dentry *parent, u64 *value)
+{
+ return debugfs_create_file(name, mode, parent, value, &fops_x64);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_x64);
+
static int debugfs_size_t_set(void *data, u64 val)
{
diff --git a/fs/direct-io.c b/fs/direct-io.c
index da111aacb46e..7600aacf531d 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1134,27 +1134,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
return ret;
}
-/*
- * This is a library function for use by filesystem drivers.
- *
- * The locking rules are governed by the flags parameter:
- * - if the flags value contains DIO_LOCKING we use a fancy locking
- * scheme for dumb filesystems.
- * For writes this function is called under i_mutex and returns with
- * i_mutex held, for reads, i_mutex is not held on entry, but it is
- * taken and dropped again before returning.
- * For reads and writes i_alloc_sem is taken in shared mode and released
- * on I/O completion (which may happen asynchronously after returning to
- * the caller).
- *
- * - if the flags value does NOT contain DIO_LOCKING we don't use any
- * internal locking but rather rely on the filesystem to synchronize
- * direct I/O reads/writes versus each other and truncate.
- * For reads and writes both i_mutex and i_alloc_sem are not held on
- * entry and are never taken.
- */
ssize_t
-__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+__blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
dio_submit_t submit_io, int flags)
@@ -1247,9 +1228,46 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
nr_segs, blkbits, get_block, end_io,
submit_io, dio);
+out:
+ return retval;
+}
+EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc);
+
+/*
+ * This is a library function for use by filesystem drivers.
+ *
+ * The locking rules are governed by the flags parameter:
+ * - if the flags value contains DIO_LOCKING we use a fancy locking
+ * scheme for dumb filesystems.
+ * For writes this function is called under i_mutex and returns with
+ * i_mutex held, for reads, i_mutex is not held on entry, but it is
+ * taken and dropped again before returning.
+ * For reads and writes i_alloc_sem is taken in shared mode and released
+ * on I/O completion (which may happen asynchronously after returning to
+ * the caller).
+ *
+ * - if the flags value does NOT contain DIO_LOCKING we don't use any
+ * internal locking but rather rely on the filesystem to synchronize
+ * direct I/O reads/writes versus each other and truncate.
+ * For reads and writes both i_mutex and i_alloc_sem are not held on
+ * entry and are never taken.
+ */
+ssize_t
+__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+ struct block_device *bdev, const struct iovec *iov, loff_t offset,
+ unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+ dio_submit_t submit_io, int flags)
+{
+ ssize_t retval;
+
+ retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov,
+ offset, nr_segs, get_block, end_io, submit_io, flags);
/*
* In case of error extending write may have instantiated a few
* blocks outside i_size. Trim these off again for DIO_LOCKING.
+ * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
+ * their own manner. This is a further example of where the old
+ * truncate sequence is inadequate.
*
* NOTE: filesystems with their own locking have to handle this
* on their own.
@@ -1257,12 +1275,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
if (flags & DIO_LOCKING) {
if (unlikely((rw & WRITE) && retval < 0)) {
loff_t isize = i_size_read(inode);
+ loff_t end = offset + iov_length(iov, nr_segs);
+
if (end > isize)
vmtruncate(inode, isize);
}
}
-out:
return retval;
}
EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 3bdddbcc785f..e8fcf4e2ed7d 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -274,7 +274,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
}
static int
-ecryptfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+ecryptfs_fsync(struct file *file, int datasync)
{
return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 65dee2f336ae..31ef5252f0fe 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -805,7 +805,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
- (ia->ia_size & ~PAGE_CACHE_MASK));
if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
- rc = vmtruncate(inode, ia->ia_size);
+ rc = simple_setsize(inode, ia->ia_size);
if (rc)
goto out;
lower_ia->ia_size = ia->ia_size;
@@ -830,7 +830,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
goto out;
}
}
- vmtruncate(inode, ia->ia_size);
+ simple_setsize(inode, ia->ia_size);
rc = ecryptfs_write_inode_size_to_metadata(inode);
if (rc) {
printk(KERN_ERR "Problem with "
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 839b9dc1e70f..fef6899be397 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -40,12 +40,11 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
return 0;
}
-static int exofs_file_fsync(struct file *filp, struct dentry *dentry,
- int datasync)
+static int exofs_file_fsync(struct file *filp, int datasync)
{
int ret;
struct address_space *mapping = filp->f_mapping;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = mapping->host;
struct super_block *sb;
ret = filemap_write_and_wait(mapping);
@@ -66,7 +65,7 @@ static int exofs_file_fsync(struct file *filp, struct dentry *dentry,
static int exofs_flush(struct file *file, fl_owner_t id)
{
- exofs_file_fsync(file, file->f_path.dentry, 1);
+ exofs_file_fsync(file, 1);
/* TODO: Flush the OSD target */
return 0;
}
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index ca7e2a0ed98a..2bcc0431bada 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -200,6 +200,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
return error;
else {
inode->i_mode = mode;
+ inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
if (error == 0)
acl = NULL;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 0b038e47ad2f..52b34f1d2738 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -122,7 +122,6 @@ extern int ext2_write_inode (struct inode *, struct writeback_control *);
extern void ext2_delete_inode (struct inode *);
extern int ext2_sync_inode (struct inode *);
extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern void ext2_truncate (struct inode *);
extern int ext2_setattr (struct dentry *, struct iattr *);
extern void ext2_set_inode_flags(struct inode *inode);
extern void ext2_get_inode_flags(struct ext2_inode_info *);
@@ -155,7 +154,7 @@ extern void ext2_write_super (struct super_block *);
extern const struct file_operations ext2_dir_operations;
/* file.c */
-extern int ext2_fsync(struct file *file, struct dentry *dentry, int datasync);
+extern int ext2_fsync(struct file *file, int datasync);
extern const struct inode_operations ext2_file_inode_operations;
extern const struct file_operations ext2_file_operations;
extern const struct file_operations ext2_xip_file_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 5d198d0697fb..49eec9456c5b 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -40,13 +40,13 @@ static int ext2_release_file (struct inode * inode, struct file * filp)
return 0;
}
-int ext2_fsync(struct file *file, struct dentry *dentry, int datasync)
+int ext2_fsync(struct file *file, int datasync)
{
int ret;
- struct super_block *sb = dentry->d_inode->i_sb;
+ struct super_block *sb = file->f_mapping->host->i_sb;
struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
- ret = simple_fsync(file, dentry, datasync);
+ ret = generic_file_fsync(file, datasync);
if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) {
/* We don't really know where the IO error happened... */
ext2_error(sb, __func__,
@@ -95,7 +95,6 @@ const struct file_operations ext2_xip_file_operations = {
#endif
const struct inode_operations ext2_file_inode_operations = {
- .truncate = ext2_truncate,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 527c46d9bc1f..3675088cb88c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -54,6 +54,18 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode)
inode->i_blocks - ea_blocks == 0);
}
+static void ext2_truncate_blocks(struct inode *inode, loff_t offset);
+
+static void ext2_write_failed(struct address_space *mapping, loff_t to)
+{
+ struct inode *inode = mapping->host;
+
+ if (to > inode->i_size) {
+ truncate_pagecache(inode, to, inode->i_size);
+ ext2_truncate_blocks(inode, inode->i_size);
+ }
+}
+
/*
* Called at the last iput() if i_nlink is zero.
*/
@@ -71,7 +83,7 @@ void ext2_delete_inode (struct inode * inode)
inode->i_size = 0;
if (inode->i_blocks)
- ext2_truncate (inode);
+ ext2_truncate_blocks(inode, 0);
ext2_free_inode (inode);
return;
@@ -757,8 +769,8 @@ int __ext2_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
- ext2_get_block);
+ return block_write_begin_newtrunc(file, mapping, pos, len, flags,
+ pagep, fsdata, ext2_get_block);
}
static int
@@ -766,8 +778,25 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
+ int ret;
+
*pagep = NULL;
- return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
+ ret = __ext2_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+ if (ret < 0)
+ ext2_write_failed(mapping, pos + len);
+ return ret;
+}
+
+static int ext2_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ int ret;
+
+ ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+ if (ret < len)
+ ext2_write_failed(mapping, pos + len);
+ return ret;
}
static int
@@ -775,13 +804,18 @@ ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
+ int ret;
+
/*
* Dir-in-pagecache still uses ext2_write_begin. Would have to rework
* directory handling code to pass around offsets rather than struct
* pages in order to make this work easily.
*/
- return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
- ext2_get_block);
+ ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, pagep,
+ fsdata, ext2_get_block);
+ if (ret < 0)
+ ext2_write_failed(mapping, pos + len);
+ return ret;
}
static int ext2_nobh_writepage(struct page *page,
@@ -800,10 +834,15 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
-
- return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs, ext2_get_block, NULL);
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ ssize_t ret;
+
+ ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev,
+ iov, offset, nr_segs, ext2_get_block, NULL);
+ if (ret < 0 && (rw & WRITE))
+ ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
+ return ret;
}
static int
@@ -818,7 +857,7 @@ const struct address_space_operations ext2_aops = {
.writepage = ext2_writepage,
.sync_page = block_sync_page,
.write_begin = ext2_write_begin,
- .write_end = generic_write_end,
+ .write_end = ext2_write_end,
.bmap = ext2_bmap,
.direct_IO = ext2_direct_IO,
.writepages = ext2_writepages,
@@ -1027,7 +1066,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de
ext2_free_data(inode, p, q);
}
-void ext2_truncate(struct inode *inode)
+static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
{
__le32 *i_data = EXT2_I(inode)->i_data;
struct ext2_inode_info *ei = EXT2_I(inode);
@@ -1039,27 +1078,8 @@ void ext2_truncate(struct inode *inode)
int n;
long iblock;
unsigned blocksize;
-
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)))
- return;
- if (ext2_inode_is_fast_symlink(inode))
- return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return;
-
blocksize = inode->i_sb->s_blocksize;
- iblock = (inode->i_size + blocksize-1)
- >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
-
- if (mapping_is_xip(inode->i_mapping))
- xip_truncate_page(inode->i_mapping, inode->i_size);
- else if (test_opt(inode->i_sb, NOBH))
- nobh_truncate_page(inode->i_mapping,
- inode->i_size, ext2_get_block);
- else
- block_truncate_page(inode->i_mapping,
- inode->i_size, ext2_get_block);
+ iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
n = ext2_block_to_path(inode, iblock, offsets, NULL);
if (n == 0)
@@ -1127,6 +1147,62 @@ do_indirects:
ext2_discard_reservation(inode);
mutex_unlock(&ei->truncate_mutex);
+}
+
+static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
+{
+ /*
+ * XXX: it seems like a bug here that we don't allow
+ * IS_APPEND inode to have blocks-past-i_size trimmed off.
+ * review and fix this.
+ *
+ * Also would be nice to be able to handle IO errors and such,
+ * but that's probably too much to ask.
+ */
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return;
+ if (ext2_inode_is_fast_symlink(inode))
+ return;
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return;
+ __ext2_truncate_blocks(inode, offset);
+}
+
+int ext2_setsize(struct inode *inode, loff_t newsize)
+{
+ loff_t oldsize;
+ int error;
+
+ error = inode_newsize_ok(inode, newsize);
+ if (error)
+ return error;
+
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return -EINVAL;
+ if (ext2_inode_is_fast_symlink(inode))
+ return -EINVAL;
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return -EPERM;
+
+ if (mapping_is_xip(inode->i_mapping))
+ error = xip_truncate_page(inode->i_mapping, newsize);
+ else if (test_opt(inode->i_sb, NOBH))
+ error = nobh_truncate_page(inode->i_mapping,
+ newsize, ext2_get_block);
+ else
+ error = block_truncate_page(inode->i_mapping,
+ newsize, ext2_get_block);
+ if (error)
+ return error;
+
+ oldsize = inode->i_size;
+ i_size_write(inode, newsize);
+ truncate_pagecache(inode, oldsize, newsize);
+
+ __ext2_truncate_blocks(inode, newsize);
+
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
if (inode_needs_sync(inode)) {
sync_mapping_buffers(inode->i_mapping);
@@ -1134,6 +1210,8 @@ do_indirects:
} else {
mark_inode_dirty(inode);
}
+
+ return 0;
}
static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino,
@@ -1474,8 +1552,15 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
if (error)
return error;
}
- error = inode_setattr(inode, iattr);
- if (!error && (iattr->ia_valid & ATTR_MODE))
+ if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) {
+ error = ext2_setsize(inode, iattr->ia_size);
+ if (error)
+ return error;
+ }
+ generic_setattr(inode, iattr);
+ if (iattr->ia_valid & ATTR_MODE)
error = ext2_acl_chmod(inode);
+ mark_inode_dirty(inode);
+
return error;
}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 71e9eb1fa696..7ff43f4a59cd 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -119,6 +119,8 @@ static void ext2_put_super (struct super_block * sb)
int i;
struct ext2_sb_info *sbi = EXT2_SB(sb);
+ dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
if (sb->s_dirt)
ext2_write_super(sb);
@@ -1063,6 +1065,12 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &ext2_sops;
sb->s_export_op = &ext2_export_ops;
sb->s_xattr = ext2_xattr_handlers;
+
+#ifdef CONFIG_QUOTA
+ sb->dq_op = &dquot_operations;
+ sb->s_qcop = &dquot_quotactl_ops;
+#endif
+
root = ext2_iget(sb, EXT2_ROOT_INO);
if (IS_ERR(root)) {
ret = PTR_ERR(root);
@@ -1241,6 +1249,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
spin_unlock(&sbi->s_lock);
return 0;
}
+
/*
* OK, we are remounting a valid rw partition rdonly, so set
* the rdonly flag and then mark the partition as valid again.
@@ -1248,6 +1257,13 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
es->s_state = cpu_to_le16(sbi->s_mount_state);
es->s_mtime = cpu_to_le32(get_seconds());
spin_unlock(&sbi->s_lock);
+
+ err = dquot_suspend(sb, -1);
+ if (err < 0) {
+ spin_lock(&sbi->s_lock);
+ goto restore_opts;
+ }
+
ext2_sync_super(sb, es, 1);
} else {
__le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
@@ -1269,8 +1285,12 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
if (!ext2_setup_super (sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
spin_unlock(&sbi->s_lock);
+
ext2_write_super(sb);
+
+ dquot_resume(sb, -1);
}
+
return 0;
restore_opts:
sbi->s_mount_opt = old_opts.s_mount_opt;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 01552abbca3c..8a11fe212183 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -205,6 +205,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
return error;
else {
inode->i_mode = mode;
+ inode->i_ctime = CURRENT_TIME_SEC;
ext3_mark_inode_dirty(handle, inode);
if (error == 0)
acl = NULL;
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 373fa90c796a..e2e72c367cf6 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -297,7 +297,7 @@ static void free_rb_tree_fname(struct rb_root *root)
kfree (old);
}
if (!parent)
- root->rb_node = NULL;
+ *root = RB_ROOT;
else if (parent->rb_left == n)
parent->rb_left = NULL;
else if (parent->rb_right == n)
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index fcf7487734b6..d7e9f74dc3a6 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -43,9 +43,9 @@
* inode to disk.
*/
-int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
+int ext3_sync_file(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct ext3_inode_info *ei = EXT3_I(inode);
journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
int ret, needs_barrier = 0;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 0fc1293d0e96..6c953bb255e7 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -410,6 +410,8 @@ static void ext3_put_super (struct super_block * sb)
struct ext3_super_block *es = sbi->s_es;
int i, err;
+ dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
lock_kernel();
ext3_xattr_put_super(sb);
@@ -748,7 +750,7 @@ static int ext3_release_dquot(struct dquot *dquot);
static int ext3_mark_dquot_dirty(struct dquot *dquot);
static int ext3_write_info(struct super_block *sb, int type);
static int ext3_quota_on(struct super_block *sb, int type, int format_id,
- char *path, int remount);
+ char *path);
static int ext3_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
@@ -767,12 +769,12 @@ static const struct dquot_operations ext3_quota_operations = {
static const struct quotactl_ops ext3_qctl_operations = {
.quota_on = ext3_quota_on,
- .quota_off = vfs_quota_off,
- .quota_sync = vfs_quota_sync,
- .get_info = vfs_get_dqinfo,
- .set_info = vfs_set_dqinfo,
- .get_dqblk = vfs_get_dqblk,
- .set_dqblk = vfs_set_dqblk
+ .quota_off = dquot_quota_off,
+ .quota_sync = dquot_quota_sync,
+ .get_info = dquot_get_dqinfo,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk
};
#endif
@@ -1527,7 +1529,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
/* Turn quotas off */
for (i = 0; i < MAXQUOTAS; i++) {
if (sb_dqopt(sb)->files[i])
- vfs_quota_off(sb, i, 0);
+ dquot_quota_off(sb, i);
}
#endif
sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -2551,6 +2553,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
ext3_fsblk_t n_blocks_count = 0;
unsigned long old_sb_flags;
struct ext3_mount_options old_opts;
+ int enable_quota = 0;
int err;
#ifdef CONFIG_QUOTA
int i;
@@ -2597,6 +2600,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
}
if (*flags & MS_RDONLY) {
+ err = dquot_suspend(sb, -1);
+ if (err < 0)
+ goto restore_opts;
+
/*
* First of all, the unconditional stuff we have to do
* to disable replay of the journal when we next remount
@@ -2651,6 +2658,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
goto restore_opts;
if (!ext3_setup_super (sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
+ enable_quota = 1;
}
}
#ifdef CONFIG_QUOTA
@@ -2662,6 +2670,9 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
#endif
unlock_super(sb);
unlock_kernel();
+
+ if (enable_quota)
+ dquot_resume(sb, -1);
return 0;
restore_opts:
sb->s_flags = old_sb_flags;
@@ -2851,24 +2862,21 @@ static int ext3_write_info(struct super_block *sb, int type)
*/
static int ext3_quota_on_mount(struct super_block *sb, int type)
{
- return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
- EXT3_SB(sb)->s_jquota_fmt, type);
+ return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
+ EXT3_SB(sb)->s_jquota_fmt, type);
}
/*
* Standard function to be called on quota_on
*/
static int ext3_quota_on(struct super_block *sb, int type, int format_id,
- char *name, int remount)
+ char *name)
{
int err;
struct path path;
if (!test_opt(sb, QUOTA))
return -EINVAL;
- /* When remounting, no checks are needed and in fact, name is NULL */
- if (remount)
- return vfs_quota_on(sb, type, format_id, name, remount);
err = kern_path(name, LOOKUP_FOLLOW, &path);
if (err)
@@ -2906,7 +2914,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
}
}
- err = vfs_quota_on_path(sb, type, format_id, &path);
+ err = dquot_quota_on_path(sb, type, format_id, &path);
path_put(&path);
return err;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 60bd31026e7c..19a4de57128a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1519,7 +1519,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
extern void ext4_htree_free_dir_info(struct dir_private_info *p);
/* fsync.c */
-extern int ext4_sync_file(struct file *, struct dentry *, int);
+extern int ext4_sync_file(struct file *, int);
/* hash.c */
extern int ext4fs_dirhash(const char *name, int len, struct
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index b6a74f991bf4..592adf2e546e 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -71,9 +71,9 @@ static void ext4_sync_parent(struct inode *inode)
* i_mutex lock is held when entering and exiting this function
*/
-int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
+int ext4_sync_file(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct ext4_inode_info *ei = EXT4_I(inode);
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
int ret;
@@ -81,7 +81,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
J_ASSERT(ext4_journal_current_handle() == NULL);
- trace_ext4_sync_file(file, dentry, datasync);
+ trace_ext4_sync_file(file, datasync);
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
@@ -91,7 +91,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
return ret;
if (!journal) {
- ret = simple_fsync(file, dentry, datasync);
+ ret = generic_file_fsync(file, datasync);
if (!ret && !list_empty(&inode->i_dentry))
ext4_sync_parent(inode);
return ret;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 19df61c321fd..42272d67955a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4942,20 +4942,26 @@ void ext4_set_inode_flags(struct inode *inode)
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
void ext4_get_inode_flags(struct ext4_inode_info *ei)
{
- unsigned int flags = ei->vfs_inode.i_flags;
-
- ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
- EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
- if (flags & S_SYNC)
- ei->i_flags |= EXT4_SYNC_FL;
- if (flags & S_APPEND)
- ei->i_flags |= EXT4_APPEND_FL;
- if (flags & S_IMMUTABLE)
- ei->i_flags |= EXT4_IMMUTABLE_FL;
- if (flags & S_NOATIME)
- ei->i_flags |= EXT4_NOATIME_FL;
- if (flags & S_DIRSYNC)
- ei->i_flags |= EXT4_DIRSYNC_FL;
+ unsigned int vfs_fl;
+ unsigned long old_fl, new_fl;
+
+ do {
+ vfs_fl = ei->vfs_inode.i_flags;
+ old_fl = ei->i_flags;
+ new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
+ EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
+ EXT4_DIRSYNC_FL);
+ if (vfs_fl & S_SYNC)
+ new_fl |= EXT4_SYNC_FL;
+ if (vfs_fl & S_APPEND)
+ new_fl |= EXT4_APPEND_FL;
+ if (vfs_fl & S_IMMUTABLE)
+ new_fl |= EXT4_IMMUTABLE_FL;
+ if (vfs_fl & S_NOATIME)
+ new_fl |= EXT4_NOATIME_FL;
+ if (vfs_fl & S_DIRSYNC)
+ new_fl |= EXT4_DIRSYNC_FL;
+ } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -5191,7 +5197,7 @@ static int ext4_inode_blocks_set(handle_t *handle,
*/
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = 0;
- ei->i_flags &= ~EXT4_HUGE_FILE_FL;
+ ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
return 0;
}
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
@@ -5204,9 +5210,9 @@ static int ext4_inode_blocks_set(handle_t *handle,
*/
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
- ei->i_flags &= ~EXT4_HUGE_FILE_FL;
+ ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
} else {
- ei->i_flags |= EXT4_HUGE_FILE_FL;
+ ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
/* i_block is stored in file system block size */
i_blocks = i_blocks >> (inode->i_blkbits - 9);
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 3a6c92ac131c..52abfa12762a 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -960,6 +960,9 @@ mext_check_arguments(struct inode *orig_inode,
return -EINVAL;
}
+ if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
+ return -EPERM;
+
/* Ext4 move extent does not support swapfile */
if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
ext4_debug("ext4 move extent: The argument files should "
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 49d88c0597c4..4e8983a9811b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -646,6 +646,8 @@ static void ext4_put_super(struct super_block *sb)
struct ext4_super_block *es = sbi->s_es;
int i, err;
+ dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
flush_workqueue(sbi->dio_unwritten_wq);
destroy_workqueue(sbi->dio_unwritten_wq);
@@ -1062,7 +1064,7 @@ static int ext4_release_dquot(struct dquot *dquot);
static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- char *path, int remount);
+ char *path);
static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
@@ -1084,12 +1086,12 @@ static const struct dquot_operations ext4_quota_operations = {
static const struct quotactl_ops ext4_qctl_operations = {
.quota_on = ext4_quota_on,
- .quota_off = vfs_quota_off,
- .quota_sync = vfs_quota_sync,
- .get_info = vfs_get_dqinfo,
- .set_info = vfs_set_dqinfo,
- .get_dqblk = vfs_get_dqblk,
- .set_dqblk = vfs_set_dqblk
+ .quota_off = dquot_quota_off,
+ .quota_sync = dquot_quota_sync,
+ .get_info = dquot_get_dqinfo,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk
};
#endif
@@ -2054,7 +2056,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
/* Turn quotas off */
for (i = 0; i < MAXQUOTAS; i++) {
if (sb_dqopt(sb)->files[i])
- vfs_quota_off(sb, i, 0);
+ dquot_quota_off(sb, i);
}
#endif
sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -3576,6 +3578,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
ext4_fsblk_t n_blocks_count = 0;
unsigned long old_sb_flags;
struct ext4_mount_options old_opts;
+ int enable_quota = 0;
ext4_group_t g;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
int err;
@@ -3633,6 +3636,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
if (*flags & MS_RDONLY) {
+ err = dquot_suspend(sb, -1);
+ if (err < 0)
+ goto restore_opts;
+
/*
* First of all, the unconditional stuff we have to do
* to disable replay of the journal when we next remount
@@ -3701,6 +3708,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
if (!ext4_setup_super(sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
+ enable_quota = 1;
}
}
ext4_setup_system_zone(sb);
@@ -3716,6 +3724,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
#endif
unlock_super(sb);
unlock_kernel();
+ if (enable_quota)
+ dquot_resume(sb, -1);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data);
@@ -3913,24 +3923,21 @@ static int ext4_write_info(struct super_block *sb, int type)
*/
static int ext4_quota_on_mount(struct super_block *sb, int type)
{
- return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
- EXT4_SB(sb)->s_jquota_fmt, type);
+ return dquot_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
+ EXT4_SB(sb)->s_jquota_fmt, type);
}
/*
* Standard function to be called on quota_on
*/
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
- char *name, int remount)
+ char *name)
{
int err;
struct path path;
if (!test_opt(sb, QUOTA))
return -EINVAL;
- /* When remounting, no checks are needed and in fact, name is NULL */
- if (remount)
- return vfs_quota_on(sb, type, format_id, name, remount);
err = kern_path(name, LOOKUP_FOLLOW, &path);
if (err)
@@ -3969,7 +3976,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
}
}
- err = vfs_quota_on_path(sb, type, format_id, &path);
+ err = dquot_quota_on_path(sb, type, format_id, &path);
path_put(&path);
return err;
}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 53dba57b49a1..27ac25725954 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -306,11 +306,11 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
extern const struct file_operations fat_file_operations;
extern const struct inode_operations fat_file_inode_operations;
extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
-extern void fat_truncate(struct inode *inode);
+extern int fat_setsize(struct inode *inode, loff_t offset);
+extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
-extern int fat_file_fsync(struct file *file, struct dentry *dentry,
- int datasync);
+extern int fat_file_fsync(struct file *file, int datasync);
/* fat/inode.c */
extern void fat_attach(struct inode *inode, loff_t i_pos);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index a14c2f6a489e..990dfae022e5 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -149,12 +149,12 @@ static int fat_file_release(struct inode *inode, struct file *filp)
return 0;
}
-int fat_file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int fat_file_fsync(struct file *filp, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = filp->f_mapping->host;
int res, err;
- res = simple_fsync(filp, dentry, datasync);
+ res = generic_file_fsync(filp, datasync);
err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
return res ? res : err;
@@ -283,7 +283,7 @@ static int fat_free(struct inode *inode, int skip)
return fat_free_clusters(inode, free_start);
}
-void fat_truncate(struct inode *inode)
+void fat_truncate_blocks(struct inode *inode, loff_t offset)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
const unsigned int cluster_size = sbi->cluster_size;
@@ -293,10 +293,10 @@ void fat_truncate(struct inode *inode)
* This protects against truncating a file bigger than it was then
* trying to write into the hole.
*/
- if (MSDOS_I(inode)->mmu_private > inode->i_size)
- MSDOS_I(inode)->mmu_private = inode->i_size;
+ if (MSDOS_I(inode)->mmu_private > offset)
+ MSDOS_I(inode)->mmu_private = offset;
- nr_clusters = (inode->i_size + (cluster_size - 1)) >> sbi->cluster_bits;
+ nr_clusters = (offset + (cluster_size - 1)) >> sbi->cluster_bits;
fat_free(inode, nr_clusters);
fat_flush_inodes(inode->i_sb, inode, NULL);
@@ -364,6 +364,18 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
return 0;
}
+int fat_setsize(struct inode *inode, loff_t offset)
+{
+ int error;
+
+ error = simple_setsize(inode, offset);
+ if (error)
+ return error;
+ fat_truncate_blocks(inode, offset);
+
+ return error;
+}
+
#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
/* valid file mode bits */
#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO)
@@ -378,7 +390,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
/*
* Expand the file. Since inode_setattr() updates ->i_size
* before calling the ->truncate(), but FAT needs to fill the
- * hole before it.
+ * hole before it. XXX: this is no longer true with new truncate
+ * sequence.
*/
if (attr->ia_valid & ATTR_SIZE) {
if (attr->ia_size > inode->i_size) {
@@ -427,15 +440,20 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
attr->ia_valid &= ~ATTR_MODE;
}
- if (attr->ia_valid)
- error = inode_setattr(inode, attr);
+ if (attr->ia_valid & ATTR_SIZE) {
+ error = fat_setsize(inode, attr->ia_size);
+ if (error)
+ goto out;
+ }
+
+ generic_setattr(inode, attr);
+ mark_inode_dirty(inode);
out:
return error;
}
EXPORT_SYMBOL_GPL(fat_setattr);
const struct inode_operations fat_file_inode_operations = {
- .truncate = fat_truncate,
.setattr = fat_setattr,
.getattr = fat_getattr,
};
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ed33904926ee..7bf45aee56d7 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -142,14 +142,29 @@ static int fat_readpages(struct file *file, struct address_space *mapping,
return mpage_readpages(mapping, pages, nr_pages, fat_get_block);
}
+static void fat_write_failed(struct address_space *mapping, loff_t to)
+{
+ struct inode *inode = mapping->host;
+
+ if (to > inode->i_size) {
+ truncate_pagecache(inode, to, inode->i_size);
+ fat_truncate_blocks(inode, inode->i_size);
+ }
+}
+
static int fat_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
+ int err;
+
*pagep = NULL;
- return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
- fat_get_block,
+ err = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
+ pagep, fsdata, fat_get_block,
&MSDOS_I(mapping->host)->mmu_private);
+ if (err < 0)
+ fat_write_failed(mapping, pos + len);
+ return err;
}
static int fat_write_end(struct file *file, struct address_space *mapping,
@@ -159,6 +174,8 @@ static int fat_write_end(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
int err;
err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+ if (err < len)
+ fat_write_failed(mapping, pos + len);
if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
@@ -172,7 +189,9 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
loff_t offset, unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ ssize_t ret;
if (rw == WRITE) {
/*
@@ -193,8 +212,12 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
* FAT need to use the DIO_LOCKING for avoiding the race
* condition of fat_get_block() and ->truncate().
*/
- return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs, fat_get_block, NULL);
+ ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev,
+ iov, offset, nr_segs, fat_get_block, NULL);
+ if (ret < 0 && (rw & WRITE))
+ fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
+
+ return ret;
}
static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
@@ -429,7 +452,7 @@ static void fat_delete_inode(struct inode *inode)
{
truncate_inode_pages(&inode->i_data, 0);
inode->i_size = 0;
- fat_truncate(inode);
+ fat_truncate_blocks(inode, 0);
clear_inode(inode);
}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f74d270ba155..9d175d623aab 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -274,7 +274,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
ret = copy_from_user(&owner, owner_p, sizeof(owner));
if (ret)
- return ret;
+ return -EFAULT;
switch (owner.type) {
case F_OWNER_TID:
@@ -332,8 +332,11 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
}
read_unlock(&filp->f_owner.lock);
- if (!ret)
+ if (!ret) {
ret = copy_to_user(owner_p, &owner, sizeof(owner));
+ if (ret)
+ ret = -EFAULT;
+ }
return ret;
}
@@ -730,12 +733,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
{
while (fa) {
struct fown_struct *fown;
+ unsigned long flags;
+
if (fa->magic != FASYNC_MAGIC) {
printk(KERN_ERR "kill_fasync: bad magic number in "
"fasync_struct!\n");
return;
}
- spin_lock(&fa->fa_lock);
+ spin_lock_irqsave(&fa->fa_lock, flags);
if (fa->fa_file) {
fown = &fa->fa_file->f_owner;
/* Don't send SIGURG to processes which have not set a
@@ -744,7 +749,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
if (!(sig == SIGURG && fown->signum == 0))
send_sigio(fown, fa->fa_fd, band);
}
- spin_unlock(&fa->fa_lock);
+ spin_unlock_irqrestore(&fa->fa_lock, flags);
fa = rcu_dereference(fa->fa_next);
}
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 32d12b78bac8..5c7d10ead4ad 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -194,14 +194,6 @@ struct file *alloc_file(struct path *path, fmode_t mode,
}
EXPORT_SYMBOL(alloc_file);
-void fput(struct file *file)
-{
- if (atomic_long_dec_and_test(&file->f_count))
- __fput(file);
-}
-
-EXPORT_SYMBOL(fput);
-
/**
* drop_file_write_access - give up ability to write to a file
* @file: the file to which we will stop writing
@@ -227,10 +219,9 @@ void drop_file_write_access(struct file *file)
}
EXPORT_SYMBOL_GPL(drop_file_write_access);
-/* __fput is called from task context when aio completion releases the last
- * last use of a struct file *. Do not use otherwise.
+/* the real guts of fput() - releasing the last reference to file
*/
-void __fput(struct file *file)
+static void __fput(struct file *file)
{
struct dentry *dentry = file->f_path.dentry;
struct vfsmount *mnt = file->f_path.mnt;
@@ -268,6 +259,14 @@ void __fput(struct file *file)
mntput(mnt);
}
+void fput(struct file *file)
+{
+ if (atomic_long_dec_and_test(&file->f_count))
+ __fput(file);
+}
+
+EXPORT_SYMBOL(fput);
+
struct file *fget(unsigned int fd)
{
struct file *file;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ea8592b90696..d5be1693ac93 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -38,52 +38,18 @@ int nr_pdflush_threads;
/*
* Passed into wb_writeback(), essentially a subset of writeback_control
*/
-struct wb_writeback_args {
+struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
enum writeback_sync_modes sync_mode;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
- unsigned int sb_pinned:1;
-};
-/*
- * Work items for the bdi_writeback threads
- */
-struct bdi_work {
struct list_head list; /* pending work list */
- struct rcu_head rcu_head; /* for RCU free/clear of work */
-
- unsigned long seen; /* threads that have seen this work */
- atomic_t pending; /* number of threads still to do work */
-
- struct wb_writeback_args args; /* writeback arguments */
-
- unsigned long state; /* flag bits, see WS_* */
+ struct completion *done; /* set if the caller waits */
};
-enum {
- WS_USED_B = 0,
- WS_ONSTACK_B,
-};
-
-#define WS_USED (1 << WS_USED_B)
-#define WS_ONSTACK (1 << WS_ONSTACK_B)
-
-static inline bool bdi_work_on_stack(struct bdi_work *work)
-{
- return test_bit(WS_ONSTACK_B, &work->state);
-}
-
-static inline void bdi_work_init(struct bdi_work *work,
- struct wb_writeback_args *args)
-{
- INIT_RCU_HEAD(&work->rcu_head);
- work->args = *args;
- work->state = WS_USED;
-}
-
/**
* writeback_in_progress - determine whether there is writeback in progress
* @bdi: the device's backing_dev_info structure.
@@ -96,76 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi)
return !list_empty(&bdi->work_list);
}
-static void bdi_work_clear(struct bdi_work *work)
-{
- clear_bit(WS_USED_B, &work->state);
- smp_mb__after_clear_bit();
- /*
- * work can have disappeared at this point. bit waitq functions
- * should be able to tolerate this, provided bdi_sched_wait does
- * not dereference it's pointer argument.
- */
- wake_up_bit(&work->state, WS_USED_B);
-}
-
-static void bdi_work_free(struct rcu_head *head)
-{
- struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
-
- if (!bdi_work_on_stack(work))
- kfree(work);
- else
- bdi_work_clear(work);
-}
-
-static void wb_work_complete(struct bdi_work *work)
-{
- const enum writeback_sync_modes sync_mode = work->args.sync_mode;
- int onstack = bdi_work_on_stack(work);
-
- /*
- * For allocated work, we can clear the done/seen bit right here.
- * For on-stack work, we need to postpone both the clear and free
- * to after the RCU grace period, since the stack could be invalidated
- * as soon as bdi_work_clear() has done the wakeup.
- */
- if (!onstack)
- bdi_work_clear(work);
- if (sync_mode == WB_SYNC_NONE || onstack)
- call_rcu(&work->rcu_head, bdi_work_free);
-}
-
-static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
-{
- /*
- * The caller has retrieved the work arguments from this work,
- * drop our reference. If this is the last ref, delete and free it
- */
- if (atomic_dec_and_test(&work->pending)) {
- struct backing_dev_info *bdi = wb->bdi;
-
- spin_lock(&bdi->wb_lock);
- list_del_rcu(&work->list);
- spin_unlock(&bdi->wb_lock);
-
- wb_work_complete(work);
- }
-}
-
-static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
+static void bdi_queue_work(struct backing_dev_info *bdi,
+ struct wb_writeback_work *work)
{
- work->seen = bdi->wb_mask;
- BUG_ON(!work->seen);
- atomic_set(&work->pending, bdi->wb_cnt);
- BUG_ON(!bdi->wb_cnt);
-
- /*
- * list_add_tail_rcu() contains the necessary barriers to
- * make sure the above stores are seen before the item is
- * noticed on the list
- */
spin_lock(&bdi->wb_lock);
- list_add_tail_rcu(&work->list, &bdi->work_list);
+ list_add_tail(&work->list, &bdi->work_list);
spin_unlock(&bdi->wb_lock);
/*
@@ -182,107 +83,59 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
}
}
-/*
- * Used for on-stack allocated work items. The caller needs to wait until
- * the wb threads have acked the work before it's safe to continue.
- */
-static void bdi_wait_on_work_clear(struct bdi_work *work)
-{
- wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
- TASK_UNINTERRUPTIBLE);
-}
-
-static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
- struct wb_writeback_args *args,
- int wait)
+static void
+__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
+ bool range_cyclic, bool for_background)
{
- struct bdi_work *work;
+ struct wb_writeback_work *work;
/*
* This is WB_SYNC_NONE writeback, so if allocation fails just
* wakeup the thread for old dirty data writeback
*/
- work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (work) {
- bdi_work_init(work, args);
- bdi_queue_work(bdi, work);
- if (wait)
- bdi_wait_on_work_clear(work);
- } else {
- struct bdi_writeback *wb = &bdi->wb;
-
- if (wb->task)
- wake_up_process(wb->task);
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ if (bdi->wb.task)
+ wake_up_process(bdi->wb.task);
+ return;
}
+
+ work->sync_mode = WB_SYNC_NONE;
+ work->nr_pages = nr_pages;
+ work->range_cyclic = range_cyclic;
+ work->for_background = for_background;
+
+ bdi_queue_work(bdi, work);
}
/**
- * bdi_sync_writeback - start and wait for writeback
+ * bdi_start_writeback - start writeback
* @bdi: the backing device to write from
- * @sb: write inodes from this super_block
+ * @nr_pages: the number of pages to write
*
* Description:
- * This does WB_SYNC_ALL data integrity writeback and waits for the
- * IO to complete. Callers must hold the sb s_umount semaphore for
- * reading, to avoid having the super disappear before we are done.
+ * This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ * started when this function returns, we make no guarentees on
+ * completion. Caller need not hold sb s_umount semaphore.
+ *
*/
-static void bdi_sync_writeback(struct backing_dev_info *bdi,
- struct super_block *sb)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .sync_mode = WB_SYNC_ALL,
- .nr_pages = LONG_MAX,
- .range_cyclic = 0,
- /*
- * Setting sb_pinned is not necessary for WB_SYNC_ALL, but
- * lets make it explicitly clear.
- */
- .sb_pinned = 1,
- };
- struct bdi_work work;
-
- bdi_work_init(&work, &args);
- work.state |= WS_ONSTACK;
-
- bdi_queue_work(bdi, &work);
- bdi_wait_on_work_clear(&work);
+ __bdi_start_writeback(bdi, nr_pages, true, false);
}
/**
- * bdi_start_writeback - start writeback
+ * bdi_start_background_writeback - start background writeback
* @bdi: the backing device to write from
- * @sb: write inodes from this super_block
- * @nr_pages: the number of pages to write
- * @sb_locked: caller already holds sb umount sem.
*
* Description:
- * This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ * This does WB_SYNC_NONE background writeback. The IO is only
* started when this function returns, we make no guarentees on
- * completion. Caller specifies whether sb umount sem is held already or not.
- *
+ * completion. Caller need not hold sb s_umount semaphore.
*/
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
- long nr_pages, int sb_locked)
+void bdi_start_background_writeback(struct backing_dev_info *bdi)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .sync_mode = WB_SYNC_NONE,
- .nr_pages = nr_pages,
- .range_cyclic = 1,
- .sb_pinned = sb_locked,
- };
-
- /*
- * We treat @nr_pages=0 as the special case to do background writeback,
- * ie. to sync pages until the background dirty threshold is reached.
- */
- if (!nr_pages) {
- args.nr_pages = LONG_MAX;
- args.for_background = 1;
- }
-
- bdi_alloc_queue_work(bdi, &args, sb_locked);
+ __bdi_start_writeback(bdi, LONG_MAX, true, true);
}
/*
@@ -572,75 +425,69 @@ select_queue:
return ret;
}
-static void unpin_sb_for_writeback(struct super_block *sb)
-{
- up_read(&sb->s_umount);
- put_super(sb);
-}
-
-enum sb_pin_state {
- SB_PINNED,
- SB_NOT_PINNED,
- SB_PIN_FAILED
-};
-
/*
- * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
+ * For background writeback the caller does not have the sb pinned
* before calling writeback. So make sure that we do pin it, so it doesn't
* go away while we are writing inodes from it.
*/
-static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
- struct super_block *sb)
+static bool pin_sb_for_writeback(struct super_block *sb)
{
- /*
- * Caller must already hold the ref for this
- */
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->sb_pinned) {
- WARN_ON(!rwsem_is_locked(&sb->s_umount));
- return SB_NOT_PINNED;
- }
spin_lock(&sb_lock);
+ if (list_empty(&sb->s_instances)) {
+ spin_unlock(&sb_lock);
+ return false;
+ }
+
sb->s_count++;
+ spin_unlock(&sb_lock);
+
if (down_read_trylock(&sb->s_umount)) {
- if (sb->s_root) {
- spin_unlock(&sb_lock);
- return SB_PINNED;
- }
- /*
- * umounted, drop rwsem again and fall through to failure
- */
+ if (sb->s_root)
+ return true;
up_read(&sb->s_umount);
}
- sb->s_count--;
- spin_unlock(&sb_lock);
- return SB_PIN_FAILED;
+
+ put_super(sb);
+ return false;
}
/*
* Write a portion of b_io inodes which belong to @sb.
- * If @wbc->sb != NULL, then find and write all such
+ *
+ * If @only_this_sb is true, then find and write all such
* inodes. Otherwise write only ones which go sequentially
* in reverse order.
+ *
* Return 1, if the caller writeback routine should be
* interrupted. Otherwise return 0.
*/
-static int writeback_sb_inodes(struct super_block *sb,
- struct bdi_writeback *wb,
- struct writeback_control *wbc)
+static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
+ struct writeback_control *wbc, bool only_this_sb)
{
while (!list_empty(&wb->b_io)) {
long pages_skipped;
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
- if (wbc->sb && sb != inode->i_sb) {
- /* super block given and doesn't
- match, skip this inode */
- redirty_tail(inode);
- continue;
- }
- if (sb != inode->i_sb)
- /* finish with this superblock */
+
+ if (inode->i_sb != sb) {
+ if (only_this_sb) {
+ /*
+ * We only want to write back data for this
+ * superblock, move all inodes not belonging
+ * to it back onto the dirty list.
+ */
+ redirty_tail(inode);
+ continue;
+ }
+
+ /*
+ * The inode belongs to a different superblock.
+ * Bounce back to the caller to unpin this and
+ * pin the next superblock.
+ */
return 0;
+ }
+
if (inode->i_state & (I_NEW | I_WILL_FREE)) {
requeue_io(inode);
continue;
@@ -678,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb,
return 1;
}
-static void writeback_inodes_wb(struct bdi_writeback *wb,
- struct writeback_control *wbc)
+void writeback_inodes_wb(struct bdi_writeback *wb,
+ struct writeback_control *wbc)
{
int ret = 0;
@@ -692,24 +539,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
struct super_block *sb = inode->i_sb;
- enum sb_pin_state state;
-
- if (wbc->sb && sb != wbc->sb) {
- /* super block given and doesn't
- match, skip this inode */
- redirty_tail(inode);
- continue;
- }
- state = pin_sb_for_writeback(wbc, sb);
- if (state == SB_PIN_FAILED) {
+ if (!pin_sb_for_writeback(sb)) {
requeue_io(inode);
continue;
}
- ret = writeback_sb_inodes(sb, wb, wbc);
+ ret = writeback_sb_inodes(sb, wb, wbc, false);
+ drop_super(sb);
- if (state == SB_PINNED)
- unpin_sb_for_writeback(sb);
if (ret)
break;
}
@@ -717,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
/* Leave any unwritten inodes on b_io */
}
-void writeback_inodes_wbc(struct writeback_control *wbc)
+static void __writeback_inodes_sb(struct super_block *sb,
+ struct bdi_writeback *wb, struct writeback_control *wbc)
{
- struct backing_dev_info *bdi = wbc->bdi;
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
- writeback_inodes_wb(&bdi->wb, wbc);
+ wbc->wb_start = jiffies; /* livelock avoidance */
+ spin_lock(&inode_lock);
+ if (!wbc->for_kupdate || list_empty(&wb->b_io))
+ queue_io(wb, wbc->older_than_this);
+ writeback_sb_inodes(sb, wb, wbc, true);
+ spin_unlock(&inode_lock);
}
/*
@@ -759,17 +602,14 @@ static inline bool over_bground_thresh(void)
* all dirty pages if they are all attached to "old" mappings.
*/
static long wb_writeback(struct bdi_writeback *wb,
- struct wb_writeback_args *args)
+ struct wb_writeback_work *work)
{
struct writeback_control wbc = {
- .bdi = wb->bdi,
- .sb = args->sb,
- .sync_mode = args->sync_mode,
+ .sync_mode = work->sync_mode,
.older_than_this = NULL,
- .for_kupdate = args->for_kupdate,
- .for_background = args->for_background,
- .range_cyclic = args->range_cyclic,
- .sb_pinned = args->sb_pinned,
+ .for_kupdate = work->for_kupdate,
+ .for_background = work->for_background,
+ .range_cyclic = work->range_cyclic,
};
unsigned long oldest_jif;
long wrote = 0;
@@ -789,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb,
/*
* Stop writeback when nr_pages has been consumed
*/
- if (args->nr_pages <= 0)
+ if (work->nr_pages <= 0)
break;
/*
* For background writeout, stop when we are below the
* background dirty threshold
*/
- if (args->for_background && !over_bground_thresh())
+ if (work->for_background && !over_bground_thresh())
break;
wbc.more_io = 0;
wbc.nr_to_write = MAX_WRITEBACK_PAGES;
wbc.pages_skipped = 0;
- writeback_inodes_wb(wb, &wbc);
- args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+ if (work->sb)
+ __writeback_inodes_sb(work->sb, wb, &wbc);
+ else
+ writeback_inodes_wb(wb, &wbc);
+ work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
/*
@@ -839,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb,
}
/*
- * Return the next bdi_work struct that hasn't been processed by this
- * wb thread yet. ->seen is initially set for each thread that exists
- * for this device, when a thread first notices a piece of work it
- * clears its bit. Depending on writeback type, the thread will notify
- * completion on either receiving the work (WB_SYNC_NONE) or after
- * it is done (WB_SYNC_ALL).
+ * Return the next wb_writeback_work struct that hasn't been processed yet.
*/
-static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
- struct bdi_writeback *wb)
+static struct wb_writeback_work *
+get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
{
- struct bdi_work *work, *ret = NULL;
+ struct wb_writeback_work *work = NULL;
- rcu_read_lock();
-
- list_for_each_entry_rcu(work, &bdi->work_list, list) {
- if (!test_bit(wb->nr, &work->seen))
- continue;
- clear_bit(wb->nr, &work->seen);
-
- ret = work;
- break;
+ spin_lock(&bdi->wb_lock);
+ if (!list_empty(&bdi->work_list)) {
+ work = list_entry(bdi->work_list.next,
+ struct wb_writeback_work, list);
+ list_del_init(&work->list);
}
-
- rcu_read_unlock();
- return ret;
+ spin_unlock(&bdi->wb_lock);
+ return work;
}
static long wb_check_old_data_flush(struct bdi_writeback *wb)
@@ -888,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
if (nr_pages) {
- struct wb_writeback_args args = {
+ struct wb_writeback_work work = {
.nr_pages = nr_pages,
.sync_mode = WB_SYNC_NONE,
.for_kupdate = 1,
.range_cyclic = 1,
};
- return wb_writeback(wb, &args);
+ return wb_writeback(wb, &work);
}
return 0;
@@ -907,36 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
{
struct backing_dev_info *bdi = wb->bdi;
- struct bdi_work *work;
+ struct wb_writeback_work *work;
long wrote = 0;
while ((work = get_next_work_item(bdi, wb)) != NULL) {
- struct wb_writeback_args args = work->args;
- int post_clear;
-
/*
* Override sync mode, in case we must wait for completion
+ * because this thread is exiting now.
*/
if (force_wait)
- work->args.sync_mode = args.sync_mode = WB_SYNC_ALL;
-
- post_clear = WB_SYNC_ALL || args.sb_pinned;
-
- /*
- * If this isn't a data integrity operation, just notify
- * that we have seen this work and we are now starting it.
- */
- if (!post_clear)
- wb_clear_pending(wb, work);
+ work->sync_mode = WB_SYNC_ALL;
- wrote += wb_writeback(wb, &args);
+ wrote += wb_writeback(wb, work);
/*
- * This is a data integrity writeback, so only do the
- * notification when we have completed the work.
+ * Notify the caller of completion if this is a synchronous
+ * work item, otherwise just free it.
*/
- if (post_clear)
- wb_clear_pending(wb, work);
+ if (work->done)
+ complete(work->done);
+ else
+ kfree(work);
}
/*
@@ -993,42 +817,27 @@ int bdi_writeback_task(struct bdi_writeback *wb)
}
/*
- * Schedule writeback for all backing devices. This does WB_SYNC_NONE
- * writeback, for integrity writeback see bdi_sync_writeback().
+ * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
+ * the whole world.
*/
-static void bdi_writeback_all(struct super_block *sb, long nr_pages)
+void wakeup_flusher_threads(long nr_pages)
{
- struct wb_writeback_args args = {
- .sb = sb,
- .nr_pages = nr_pages,
- .sync_mode = WB_SYNC_NONE,
- };
struct backing_dev_info *bdi;
- rcu_read_lock();
+ if (!nr_pages) {
+ nr_pages = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS);
+ }
+ rcu_read_lock();
list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
if (!bdi_has_dirty_io(bdi))
continue;
-
- bdi_alloc_queue_work(bdi, &args, 0);
+ __bdi_start_writeback(bdi, nr_pages, false, false);
}
-
rcu_read_unlock();
}
-/*
- * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
- * the whole world.
- */
-void wakeup_flusher_threads(long nr_pages)
-{
- if (nr_pages == 0)
- nr_pages = global_page_state(NR_FILE_DIRTY) +
- global_page_state(NR_UNSTABLE_NFS);
- bdi_writeback_all(NULL, nr_pages);
-}
-
static noinline void block_dump___mark_inode_dirty(struct inode *inode)
{
if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1220,18 +1029,6 @@ static void wait_sb_inodes(struct super_block *sb)
iput(old_inode);
}
-static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
-{
- unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
- unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
- long nr_to_write;
-
- nr_to_write = nr_dirty + nr_unstable +
- (inodes_stat.nr_inodes - inodes_stat.nr_unused);
-
- bdi_start_writeback(sb->s_bdi, sb, nr_to_write, sb_locked);
-}
-
/**
* writeback_inodes_sb - writeback dirty inodes from given super_block
* @sb: the superblock
@@ -1243,21 +1040,24 @@ static void __writeback_inodes_sb(struct super_block *sb, int sb_locked)
*/
void writeback_inodes_sb(struct super_block *sb)
{
- __writeback_inodes_sb(sb, 0);
-}
-EXPORT_SYMBOL(writeback_inodes_sb);
+ unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
+ unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct wb_writeback_work work = {
+ .sb = sb,
+ .sync_mode = WB_SYNC_NONE,
+ .done = &done,
+ };
-/**
- * writeback_inodes_sb_locked - writeback dirty inodes from given super_block
- * @sb: the superblock
- *
- * Like writeback_inodes_sb(), except the caller already holds the
- * sb umount sem.
- */
-void writeback_inodes_sb_locked(struct super_block *sb)
-{
- __writeback_inodes_sb(sb, 1);
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+ work.nr_pages = nr_dirty + nr_unstable +
+ (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+
+ bdi_queue_work(sb->s_bdi, &work);
+ wait_for_completion(&done);
}
+EXPORT_SYMBOL(writeback_inodes_sb);
/**
* writeback_inodes_sb_if_idle - start writeback if none underway
@@ -1269,7 +1069,9 @@ void writeback_inodes_sb_locked(struct super_block *sb)
int writeback_inodes_sb_if_idle(struct super_block *sb)
{
if (!writeback_in_progress(sb->s_bdi)) {
+ down_read(&sb->s_umount);
writeback_inodes_sb(sb);
+ up_read(&sb->s_umount);
return 1;
} else
return 0;
@@ -1285,7 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
*/
void sync_inodes_sb(struct super_block *sb)
{
- bdi_sync_writeback(sb->s_bdi, sb);
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct wb_writeback_work work = {
+ .sb = sb,
+ .sync_mode = WB_SYNC_ALL,
+ .nr_pages = LONG_MAX,
+ .range_cyclic = 0,
+ .done = &done,
+ };
+
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+ bdi_queue_work(sb->s_bdi, &work);
+ wait_for_completion(&done);
+
wait_sb_inodes(sb);
}
EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 47aefd376e54..723b889fd219 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -710,30 +710,26 @@ static void fscache_write_op(struct fscache_operation *_op)
goto superseded;
}
- if (page) {
- radix_tree_tag_set(&cookie->stores, page->index,
- FSCACHE_COOKIE_STORING_TAG);
- radix_tree_tag_clear(&cookie->stores, page->index,
- FSCACHE_COOKIE_PENDING_TAG);
- }
+ radix_tree_tag_set(&cookie->stores, page->index,
+ FSCACHE_COOKIE_STORING_TAG);
+ radix_tree_tag_clear(&cookie->stores, page->index,
+ FSCACHE_COOKIE_PENDING_TAG);
spin_unlock(&cookie->stores_lock);
spin_unlock(&object->lock);
- if (page) {
- fscache_set_op_state(&op->op, "Store");
- fscache_stat(&fscache_n_store_pages);
- fscache_stat(&fscache_n_cop_write_page);
- ret = object->cache->ops->write_page(op, page);
- fscache_stat_d(&fscache_n_cop_write_page);
- fscache_set_op_state(&op->op, "EndWrite");
- fscache_end_page_write(object, page);
- if (ret < 0) {
- fscache_set_op_state(&op->op, "Abort");
- fscache_abort_object(object);
- } else {
- fscache_enqueue_operation(&op->op);
- }
+ fscache_set_op_state(&op->op, "Store");
+ fscache_stat(&fscache_n_store_pages);
+ fscache_stat(&fscache_n_cop_write_page);
+ ret = object->cache->ops->write_page(op, page);
+ fscache_stat_d(&fscache_n_cop_write_page);
+ fscache_set_op_state(&op->op, "EndWrite");
+ fscache_end_page_write(object, page);
+ if (ret < 0) {
+ fscache_set_op_state(&op->op, "Abort");
+ fscache_abort_object(object);
+ } else {
+ fscache_enqueue_operation(&op->op);
}
_leave("");
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e53df5ebb2b8..9424796d6634 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -16,6 +16,9 @@
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/slab.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/swap.h>
+#include <linux/splice.h>
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse");
@@ -499,6 +502,9 @@ struct fuse_copy_state {
int write;
struct fuse_req *req;
const struct iovec *iov;
+ struct pipe_buffer *pipebufs;
+ struct pipe_buffer *currbuf;
+ struct pipe_inode_info *pipe;
unsigned long nr_segs;
unsigned long seglen;
unsigned long addr;
@@ -506,16 +512,16 @@ struct fuse_copy_state {
void *mapaddr;
void *buf;
unsigned len;
+ unsigned move_pages:1;
};
static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
- int write, struct fuse_req *req,
+ int write,
const struct iovec *iov, unsigned long nr_segs)
{
memset(cs, 0, sizeof(*cs));
cs->fc = fc;
cs->write = write;
- cs->req = req;
cs->iov = iov;
cs->nr_segs = nr_segs;
}
@@ -523,7 +529,18 @@ static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
/* Unmap and put previous page of userspace buffer */
static void fuse_copy_finish(struct fuse_copy_state *cs)
{
- if (cs->mapaddr) {
+ if (cs->currbuf) {
+ struct pipe_buffer *buf = cs->currbuf;
+
+ if (!cs->write) {
+ buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
+ } else {
+ kunmap_atomic(cs->mapaddr, KM_USER0);
+ buf->len = PAGE_SIZE - cs->len;
+ }
+ cs->currbuf = NULL;
+ cs->mapaddr = NULL;
+ } else if (cs->mapaddr) {
kunmap_atomic(cs->mapaddr, KM_USER0);
if (cs->write) {
flush_dcache_page(cs->pg);
@@ -545,26 +562,61 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
unlock_request(cs->fc, cs->req);
fuse_copy_finish(cs);
- if (!cs->seglen) {
- BUG_ON(!cs->nr_segs);
- cs->seglen = cs->iov[0].iov_len;
- cs->addr = (unsigned long) cs->iov[0].iov_base;
- cs->iov++;
- cs->nr_segs--;
+ if (cs->pipebufs) {
+ struct pipe_buffer *buf = cs->pipebufs;
+
+ if (!cs->write) {
+ err = buf->ops->confirm(cs->pipe, buf);
+ if (err)
+ return err;
+
+ BUG_ON(!cs->nr_segs);
+ cs->currbuf = buf;
+ cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
+ cs->len = buf->len;
+ cs->buf = cs->mapaddr + buf->offset;
+ cs->pipebufs++;
+ cs->nr_segs--;
+ } else {
+ struct page *page;
+
+ if (cs->nr_segs == cs->pipe->buffers)
+ return -EIO;
+
+ page = alloc_page(GFP_HIGHUSER);
+ if (!page)
+ return -ENOMEM;
+
+ buf->page = page;
+ buf->offset = 0;
+ buf->len = 0;
+
+ cs->currbuf = buf;
+ cs->mapaddr = kmap_atomic(page, KM_USER0);
+ cs->buf = cs->mapaddr;
+ cs->len = PAGE_SIZE;
+ cs->pipebufs++;
+ cs->nr_segs++;
+ }
+ } else {
+ if (!cs->seglen) {
+ BUG_ON(!cs->nr_segs);
+ cs->seglen = cs->iov[0].iov_len;
+ cs->addr = (unsigned long) cs->iov[0].iov_base;
+ cs->iov++;
+ cs->nr_segs--;
+ }
+ err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
+ if (err < 0)
+ return err;
+ BUG_ON(err != 1);
+ offset = cs->addr % PAGE_SIZE;
+ cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
+ cs->buf = cs->mapaddr + offset;
+ cs->len = min(PAGE_SIZE - offset, cs->seglen);
+ cs->seglen -= cs->len;
+ cs->addr += cs->len;
}
- down_read(&current->mm->mmap_sem);
- err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
- &cs->pg, NULL);
- up_read(&current->mm->mmap_sem);
- if (err < 0)
- return err;
- BUG_ON(err != 1);
- offset = cs->addr % PAGE_SIZE;
- cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
- cs->buf = cs->mapaddr + offset;
- cs->len = min(PAGE_SIZE - offset, cs->seglen);
- cs->seglen -= cs->len;
- cs->addr += cs->len;
return lock_request(cs->fc, cs->req);
}
@@ -586,23 +638,178 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
return ncpy;
}
+static int fuse_check_page(struct page *page)
+{
+ if (page_mapcount(page) ||
+ page->mapping != NULL ||
+ page_count(page) != 1 ||
+ (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
+ ~(1 << PG_locked |
+ 1 << PG_referenced |
+ 1 << PG_uptodate |
+ 1 << PG_lru |
+ 1 << PG_active |
+ 1 << PG_reclaim))) {
+ printk(KERN_WARNING "fuse: trying to steal weird page\n");
+ printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
+ return 1;
+ }
+ return 0;
+}
+
+static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
+{
+ int err;
+ struct page *oldpage = *pagep;
+ struct page *newpage;
+ struct pipe_buffer *buf = cs->pipebufs;
+ struct address_space *mapping;
+ pgoff_t index;
+
+ unlock_request(cs->fc, cs->req);
+ fuse_copy_finish(cs);
+
+ err = buf->ops->confirm(cs->pipe, buf);
+ if (err)
+ return err;
+
+ BUG_ON(!cs->nr_segs);
+ cs->currbuf = buf;
+ cs->len = buf->len;
+ cs->pipebufs++;
+ cs->nr_segs--;
+
+ if (cs->len != PAGE_SIZE)
+ goto out_fallback;
+
+ if (buf->ops->steal(cs->pipe, buf) != 0)
+ goto out_fallback;
+
+ newpage = buf->page;
+
+ if (WARN_ON(!PageUptodate(newpage)))
+ return -EIO;
+
+ ClearPageMappedToDisk(newpage);
+
+ if (fuse_check_page(newpage) != 0)
+ goto out_fallback_unlock;
+
+ mapping = oldpage->mapping;
+ index = oldpage->index;
+
+ /*
+ * This is a new and locked page, it shouldn't be mapped or
+ * have any special flags on it
+ */
+ if (WARN_ON(page_mapped(oldpage)))
+ goto out_fallback_unlock;
+ if (WARN_ON(page_has_private(oldpage)))
+ goto out_fallback_unlock;
+ if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
+ goto out_fallback_unlock;
+ if (WARN_ON(PageMlocked(oldpage)))
+ goto out_fallback_unlock;
+
+ remove_from_page_cache(oldpage);
+ page_cache_release(oldpage);
+
+ err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL);
+ if (err) {
+ printk(KERN_WARNING "fuse_try_move_page: failed to add page");
+ goto out_fallback_unlock;
+ }
+ page_cache_get(newpage);
+
+ if (!(buf->flags & PIPE_BUF_FLAG_LRU))
+ lru_cache_add_file(newpage);
+
+ err = 0;
+ spin_lock(&cs->fc->lock);
+ if (cs->req->aborted)
+ err = -ENOENT;
+ else
+ *pagep = newpage;
+ spin_unlock(&cs->fc->lock);
+
+ if (err) {
+ unlock_page(newpage);
+ page_cache_release(newpage);
+ return err;
+ }
+
+ unlock_page(oldpage);
+ page_cache_release(oldpage);
+ cs->len = 0;
+
+ return 0;
+
+out_fallback_unlock:
+ unlock_page(newpage);
+out_fallback:
+ cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
+ cs->buf = cs->mapaddr + buf->offset;
+
+ err = lock_request(cs->fc, cs->req);
+ if (err)
+ return err;
+
+ return 1;
+}
+
+static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
+ unsigned offset, unsigned count)
+{
+ struct pipe_buffer *buf;
+
+ if (cs->nr_segs == cs->pipe->buffers)
+ return -EIO;
+
+ unlock_request(cs->fc, cs->req);
+ fuse_copy_finish(cs);
+
+ buf = cs->pipebufs;
+ page_cache_get(page);
+ buf->page = page;
+ buf->offset = offset;
+ buf->len = count;
+
+ cs->pipebufs++;
+ cs->nr_segs++;
+ cs->len = 0;
+
+ return 0;
+}
+
/*
* Copy a page in the request to/from the userspace buffer. Must be
* done atomically
*/
-static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
+static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
unsigned offset, unsigned count, int zeroing)
{
+ int err;
+ struct page *page = *pagep;
+
if (page && zeroing && count < PAGE_SIZE) {
void *mapaddr = kmap_atomic(page, KM_USER1);
memset(mapaddr, 0, PAGE_SIZE);
kunmap_atomic(mapaddr, KM_USER1);
}
while (count) {
- if (!cs->len) {
- int err = fuse_copy_fill(cs);
- if (err)
- return err;
+ if (cs->write && cs->pipebufs && page) {
+ return fuse_ref_page(cs, page, offset, count);
+ } else if (!cs->len) {
+ if (cs->move_pages && page &&
+ offset == 0 && count == PAGE_SIZE) {
+ err = fuse_try_move_page(cs, pagep);
+ if (err <= 0)
+ return err;
+ } else {
+ err = fuse_copy_fill(cs);
+ if (err)
+ return err;
+ }
}
if (page) {
void *mapaddr = kmap_atomic(page, KM_USER1);
@@ -627,8 +834,10 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
- struct page *page = req->pages[i];
- int err = fuse_copy_page(cs, page, offset, count, zeroing);
+ int err;
+
+ err = fuse_copy_page(cs, &req->pages[i], offset, count,
+ zeroing);
if (err)
return err;
@@ -705,11 +914,10 @@ __acquires(&fc->lock)
*
* Called with fc->lock held, releases it
*/
-static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
- const struct iovec *iov, unsigned long nr_segs)
+static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
+ size_t nbytes, struct fuse_req *req)
__releases(&fc->lock)
{
- struct fuse_copy_state cs;
struct fuse_in_header ih;
struct fuse_interrupt_in arg;
unsigned reqsize = sizeof(ih) + sizeof(arg);
@@ -725,14 +933,13 @@ __releases(&fc->lock)
arg.unique = req->in.h.unique;
spin_unlock(&fc->lock);
- if (iov_length(iov, nr_segs) < reqsize)
+ if (nbytes < reqsize)
return -EINVAL;
- fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs);
- err = fuse_copy_one(&cs, &ih, sizeof(ih));
+ err = fuse_copy_one(cs, &ih, sizeof(ih));
if (!err)
- err = fuse_copy_one(&cs, &arg, sizeof(arg));
- fuse_copy_finish(&cs);
+ err = fuse_copy_one(cs, &arg, sizeof(arg));
+ fuse_copy_finish(cs);
return err ? err : reqsize;
}
@@ -746,18 +953,13 @@ __releases(&fc->lock)
* request_end(). Otherwise add it to the processing list, and set
* the 'sent' flag.
*/
-static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
+ struct fuse_copy_state *cs, size_t nbytes)
{
int err;
struct fuse_req *req;
struct fuse_in *in;
- struct fuse_copy_state cs;
unsigned reqsize;
- struct file *file = iocb->ki_filp;
- struct fuse_conn *fc = fuse_get_conn(file);
- if (!fc)
- return -EPERM;
restart:
spin_lock(&fc->lock);
@@ -777,7 +979,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
if (!list_empty(&fc->interrupts)) {
req = list_entry(fc->interrupts.next, struct fuse_req,
intr_entry);
- return fuse_read_interrupt(fc, req, iov, nr_segs);
+ return fuse_read_interrupt(fc, cs, nbytes, req);
}
req = list_entry(fc->pending.next, struct fuse_req, list);
@@ -787,7 +989,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
in = &req->in;
reqsize = in->h.len;
/* If request is too large, reply with an error and restart the read */
- if (iov_length(iov, nr_segs) < reqsize) {
+ if (nbytes < reqsize) {
req->out.h.error = -EIO;
/* SETXATTR is special, since it may contain too large data */
if (in->h.opcode == FUSE_SETXATTR)
@@ -796,12 +998,12 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
goto restart;
}
spin_unlock(&fc->lock);
- fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
- err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
+ cs->req = req;
+ err = fuse_copy_one(cs, &in->h, sizeof(in->h));
if (!err)
- err = fuse_copy_args(&cs, in->numargs, in->argpages,
+ err = fuse_copy_args(cs, in->numargs, in->argpages,
(struct fuse_arg *) in->args, 0);
- fuse_copy_finish(&cs);
+ fuse_copy_finish(cs);
spin_lock(&fc->lock);
req->locked = 0;
if (req->aborted) {
@@ -829,6 +1031,110 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
return err;
}
+static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct fuse_copy_state cs;
+ struct file *file = iocb->ki_filp;
+ struct fuse_conn *fc = fuse_get_conn(file);
+ if (!fc)
+ return -EPERM;
+
+ fuse_copy_init(&cs, fc, 1, iov, nr_segs);
+
+ return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
+}
+
+static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ return 1;
+}
+
+static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = generic_pipe_buf_release,
+ .steal = fuse_dev_pipe_buf_steal,
+ .get = generic_pipe_buf_get,
+};
+
+static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ int ret;
+ int page_nr = 0;
+ int do_wakeup = 0;
+ struct pipe_buffer *bufs;
+ struct fuse_copy_state cs;
+ struct fuse_conn *fc = fuse_get_conn(in);
+ if (!fc)
+ return -EPERM;
+
+ bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
+ if (!bufs)
+ return -ENOMEM;
+
+ fuse_copy_init(&cs, fc, 1, NULL, 0);
+ cs.pipebufs = bufs;
+ cs.pipe = pipe;
+ ret = fuse_dev_do_read(fc, in, &cs, len);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+ pipe_lock(pipe);
+
+ if (!pipe->readers) {
+ send_sig(SIGPIPE, current, 0);
+ if (!ret)
+ ret = -EPIPE;
+ goto out_unlock;
+ }
+
+ if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+
+ while (page_nr < cs.nr_segs) {
+ int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
+ struct pipe_buffer *buf = pipe->bufs + newbuf;
+
+ buf->page = bufs[page_nr].page;
+ buf->offset = bufs[page_nr].offset;
+ buf->len = bufs[page_nr].len;
+ buf->ops = &fuse_dev_pipe_buf_ops;
+
+ pipe->nrbufs++;
+ page_nr++;
+ ret += buf->len;
+
+ if (pipe->inode)
+ do_wakeup = 1;
+ }
+
+out_unlock:
+ pipe_unlock(pipe);
+
+ if (do_wakeup) {
+ smp_mb();
+ if (waitqueue_active(&pipe->wait))
+ wake_up_interruptible(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ }
+
+out:
+ for (; page_nr < cs.nr_segs; page_nr++)
+ page_cache_release(bufs[page_nr].page);
+
+ kfree(bufs);
+ return ret;
+}
+
static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
struct fuse_copy_state *cs)
{
@@ -988,23 +1294,17 @@ static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
* it from the list and copy the rest of the buffer to the request.
* The request is finished by calling request_end()
*/
-static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
+ struct fuse_copy_state *cs, size_t nbytes)
{
int err;
- size_t nbytes = iov_length(iov, nr_segs);
struct fuse_req *req;
struct fuse_out_header oh;
- struct fuse_copy_state cs;
- struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
- if (!fc)
- return -EPERM;
- fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
if (nbytes < sizeof(struct fuse_out_header))
return -EINVAL;
- err = fuse_copy_one(&cs, &oh, sizeof(oh));
+ err = fuse_copy_one(cs, &oh, sizeof(oh));
if (err)
goto err_finish;
@@ -1017,7 +1317,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
* and error contains notification code.
*/
if (!oh.unique) {
- err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), &cs);
+ err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
return err ? err : nbytes;
}
@@ -1036,7 +1336,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
if (req->aborted) {
spin_unlock(&fc->lock);
- fuse_copy_finish(&cs);
+ fuse_copy_finish(cs);
spin_lock(&fc->lock);
request_end(fc, req);
return -ENOENT;
@@ -1053,7 +1353,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
queue_interrupt(fc, req);
spin_unlock(&fc->lock);
- fuse_copy_finish(&cs);
+ fuse_copy_finish(cs);
return nbytes;
}
@@ -1061,11 +1361,13 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
list_move(&req->list, &fc->io);
req->out.h = oh;
req->locked = 1;
- cs.req = req;
+ cs->req = req;
+ if (!req->out.page_replace)
+ cs->move_pages = 0;
spin_unlock(&fc->lock);
- err = copy_out_args(&cs, &req->out, nbytes);
- fuse_copy_finish(&cs);
+ err = copy_out_args(cs, &req->out, nbytes);
+ fuse_copy_finish(cs);
spin_lock(&fc->lock);
req->locked = 0;
@@ -1081,10 +1383,101 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
err_unlock:
spin_unlock(&fc->lock);
err_finish:
- fuse_copy_finish(&cs);
+ fuse_copy_finish(cs);
return err;
}
+static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct fuse_copy_state cs;
+ struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
+ if (!fc)
+ return -EPERM;
+
+ fuse_copy_init(&cs, fc, 0, iov, nr_segs);
+
+ return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
+}
+
+static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *ppos,
+ size_t len, unsigned int flags)
+{
+ unsigned nbuf;
+ unsigned idx;
+ struct pipe_buffer *bufs;
+ struct fuse_copy_state cs;
+ struct fuse_conn *fc;
+ size_t rem;
+ ssize_t ret;
+
+ fc = fuse_get_conn(out);
+ if (!fc)
+ return -EPERM;
+
+ bufs = kmalloc(pipe->buffers * sizeof (struct pipe_buffer), GFP_KERNEL);
+ if (!bufs)
+ return -ENOMEM;
+
+ pipe_lock(pipe);
+ nbuf = 0;
+ rem = 0;
+ for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
+ rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
+
+ ret = -EINVAL;
+ if (rem < len) {
+ pipe_unlock(pipe);
+ goto out;
+ }
+
+ rem = len;
+ while (rem) {
+ struct pipe_buffer *ibuf;
+ struct pipe_buffer *obuf;
+
+ BUG_ON(nbuf >= pipe->buffers);
+ BUG_ON(!pipe->nrbufs);
+ ibuf = &pipe->bufs[pipe->curbuf];
+ obuf = &bufs[nbuf];
+
+ if (rem >= ibuf->len) {
+ *obuf = *ibuf;
+ ibuf->ops = NULL;
+ pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
+ pipe->nrbufs--;
+ } else {
+ ibuf->ops->get(pipe, ibuf);
+ *obuf = *ibuf;
+ obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
+ obuf->len = rem;
+ ibuf->offset += obuf->len;
+ ibuf->len -= obuf->len;
+ }
+ nbuf++;
+ rem -= obuf->len;
+ }
+ pipe_unlock(pipe);
+
+ fuse_copy_init(&cs, fc, 0, NULL, nbuf);
+ cs.pipebufs = bufs;
+ cs.pipe = pipe;
+
+ if (flags & SPLICE_F_MOVE)
+ cs.move_pages = 1;
+
+ ret = fuse_dev_do_write(fc, &cs, len);
+
+ for (idx = 0; idx < nbuf; idx++) {
+ struct pipe_buffer *buf = &bufs[idx];
+ buf->ops->release(pipe, buf);
+ }
+out:
+ kfree(bufs);
+ return ret;
+}
+
static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
{
unsigned mask = POLLOUT | POLLWRNORM;
@@ -1226,8 +1619,10 @@ const struct file_operations fuse_dev_operations = {
.llseek = no_llseek,
.read = do_sync_read,
.aio_read = fuse_dev_read,
+ .splice_read = fuse_dev_splice_read,
.write = do_sync_write,
.aio_write = fuse_dev_write,
+ .splice_write = fuse_dev_splice_write,
.poll = fuse_dev_poll,
.release = fuse_dev_release,
.fasync = fuse_dev_fasync,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 4787ae6c5c1c..3cdc5f78a406 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1156,10 +1156,9 @@ static int fuse_dir_release(struct inode *inode, struct file *file)
return 0;
}
-static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
+static int fuse_dir_fsync(struct file *file, int datasync)
{
- /* nfsd can call this with no file */
- return file ? fuse_fsync_common(file, de, datasync, 1) : 0;
+ return fuse_fsync_common(file, datasync, 1);
}
static bool update_mtime(unsigned ivalid)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a9f5e137f1d3..ada0adeb3bb5 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -351,10 +351,9 @@ static void fuse_sync_writes(struct inode *inode)
fuse_release_nowrite(inode);
}
-int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
- int isdir)
+int fuse_fsync_common(struct file *file, int datasync, int isdir)
{
- struct inode *inode = de->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
struct fuse_req *req;
@@ -403,9 +402,9 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
return err;
}
-static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
+static int fuse_fsync(struct file *file, int datasync)
{
- return fuse_fsync_common(file, de, datasync, 0);
+ return fuse_fsync_common(file, datasync, 0);
}
void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
@@ -517,17 +516,26 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
int i;
size_t count = req->misc.read.in.size;
size_t num_read = req->out.args[0].size;
- struct inode *inode = req->pages[0]->mapping->host;
+ struct address_space *mapping = NULL;
- /*
- * Short read means EOF. If file size is larger, truncate it
- */
- if (!req->out.h.error && num_read < count) {
- loff_t pos = page_offset(req->pages[0]) + num_read;
- fuse_read_update_size(inode, pos, req->misc.read.attr_ver);
- }
+ for (i = 0; mapping == NULL && i < req->num_pages; i++)
+ mapping = req->pages[i]->mapping;
- fuse_invalidate_attr(inode); /* atime changed */
+ if (mapping) {
+ struct inode *inode = mapping->host;
+
+ /*
+ * Short read means EOF. If file size is larger, truncate it
+ */
+ if (!req->out.h.error && num_read < count) {
+ loff_t pos;
+
+ pos = page_offset(req->pages[0]) + num_read;
+ fuse_read_update_size(inode, pos,
+ req->misc.read.attr_ver);
+ }
+ fuse_invalidate_attr(inode); /* atime changed */
+ }
for (i = 0; i < req->num_pages; i++) {
struct page *page = req->pages[i];
@@ -536,6 +544,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
else
SetPageError(page);
unlock_page(page);
+ page_cache_release(page);
}
if (req->ff)
fuse_file_put(req->ff);
@@ -550,6 +559,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file)
req->out.argpages = 1;
req->out.page_zeroing = 1;
+ req->out.page_replace = 1;
fuse_read_fill(req, file, pos, count, FUSE_READ);
req->misc.read.attr_ver = fuse_get_attr_version(fc);
if (fc->async_read) {
@@ -589,6 +599,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
return PTR_ERR(req);
}
}
+ page_cache_get(page);
req->pages[req->num_pages] = page;
req->num_pages++;
return 0;
@@ -994,10 +1005,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
- down_read(&current->mm->mmap_sem);
- npages = get_user_pages(current, current->mm, user_addr, npages, !write,
- 0, req->pages, NULL);
- up_read(&current->mm->mmap_sem);
+ npages = get_user_pages_fast(user_addr, npages, !write, req->pages);
if (npages < 0)
return npages;
@@ -1580,9 +1588,9 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
while (iov_iter_count(&ii)) {
struct page *page = pages[page_idx++];
size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii));
- void *kaddr, *map;
+ void *kaddr;
- kaddr = map = kmap(page);
+ kaddr = kmap(page);
while (todo) {
char __user *uaddr = ii.iov->iov_base + ii.iov_offset;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 01cc462ff45d..8f309f04064e 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -177,6 +177,9 @@ struct fuse_out {
/** Zero partially or not copied pages */
unsigned page_zeroing:1;
+ /** Pages may be replaced with new ones */
+ unsigned page_replace:1;
+
/** Number or arguments */
unsigned numargs;
@@ -568,8 +571,7 @@ void fuse_release_common(struct file *file, int opcode);
/**
* Send FSYNC or FSYNCDIR request
*/
-int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
- int isdir);
+int fuse_fsync_common(struct file *file, int datasync, int isdir);
/**
* Notify poll wakeup
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index a739a0a48067..9f8b52500d63 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -700,8 +700,14 @@ out:
return 0;
page_cache_release(page);
+
+ /*
+ * XXX(hch): the call below should probably be replaced with
+ * a call to the gfs2-specific truncate blocks helper to actually
+ * release disk blocks..
+ */
if (pos + len > ip->i_inode.i_size)
- vmtruncate(&ip->i_inode, ip->i_inode.i_size);
+ simple_setsize(&ip->i_inode, ip->i_inode.i_size);
out_endtrans:
gfs2_trans_end(sdp);
out_trans_fail:
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 4a48c0f4b402..84da64b551b2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1041,6 +1041,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
if (gfs2_is_stuffed(ip)) {
u64 dsize = size + sizeof(struct gfs2_inode);
+ ip->i_disksize = size;
ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8295c5b5d4a9..26ca3361a8bc 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -392,7 +392,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
unsigned totlen = be16_to_cpu(dent->de_rec_len);
if (gfs2_dirent_sentinel(dent))
- actual = GFS2_DIRENT_SIZE(0);
+ actual = 0;
if (totlen - actual >= required)
return 1;
return 0;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index b20bfcc9fa2d..ed9a94f0ef15 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -554,9 +554,9 @@ static int gfs2_close(struct inode *inode, struct file *file)
* Returns: errno
*/
-static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
+static int gfs2_fsync(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
int ret = 0;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ddcdbf493536..dbab3fdc2582 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -706,8 +706,18 @@ static void glock_work_func(struct work_struct *work)
{
unsigned long delay = 0;
struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
+ struct gfs2_holder *gh;
int drop_ref = 0;
+ if (unlikely(test_bit(GLF_FROZEN, &gl->gl_flags))) {
+ spin_lock(&gl->gl_spin);
+ gh = find_first_waiter(gl);
+ if (gh && (gh->gh_flags & LM_FLAG_NOEXP) &&
+ test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
+ set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
+ spin_unlock(&gl->gl_spin);
+ }
+
if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
finish_xmote(gl, gl->gl_reply);
drop_ref = 1;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b5612cbb62a5..f03afd9c44bc 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
{
struct inode *inode;
struct gfs2_inode *ip;
- struct gfs2_glock *io_gl;
+ struct gfs2_glock *io_gl = NULL;
int error;
inode = gfs2_iget(sb, no_addr);
@@ -198,6 +198,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
ip->i_iopen_gh.gh_gl->gl_object = ip;
gfs2_glock_put(io_gl);
+ io_gl = NULL;
if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
goto gfs2_nfsbypass;
@@ -228,7 +229,8 @@ gfs2_nfsbypass:
fail_glock:
gfs2_glock_dq(&ip->i_iopen_gh);
fail_iopen:
- gfs2_glock_put(io_gl);
+ if (io_gl)
+ gfs2_glock_put(io_gl);
fail_put:
if (inode->i_state & I_NEW)
ip->i_gl->gl_object = NULL;
@@ -256,7 +258,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
{
struct gfs2_sbd *sdp;
struct gfs2_inode *ip;
- struct gfs2_glock *io_gl;
+ struct gfs2_glock *io_gl = NULL;
int error;
struct gfs2_holder gh;
struct inode *inode;
@@ -293,6 +295,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
ip->i_iopen_gh.gh_gl->gl_object = ip;
gfs2_glock_put(io_gl);
+ io_gl = NULL;
inode->i_mode = DT2IF(DT_UNKNOWN);
@@ -319,7 +322,8 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
fail_glock:
gfs2_glock_dq(&ip->i_iopen_gh);
fail_iopen:
- gfs2_glock_put(io_gl);
+ if (io_gl)
+ gfs2_glock_put(io_gl);
fail_put:
ip->i_gl->gl_object = NULL;
gfs2_glock_put(ip->i_gl);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 4e64352d49de..98cdd05f3316 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1071,6 +1071,9 @@ int gfs2_permission(struct inode *inode, int mask)
return error;
}
+/*
+ * XXX: should be changed to have proper ordering by opencoding simple_setsize
+ */
static int setattr_size(struct inode *inode, struct iattr *attr)
{
struct gfs2_inode *ip = GFS2_I(inode);
@@ -1081,7 +1084,7 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
if (error)
return error;
- error = vmtruncate(inode, attr->ia_size);
+ error = simple_setsize(inode, attr->ia_size);
gfs2_trans_end(sdp);
if (error)
return error;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 49667d68769e..b256d6f24288 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -694,10 +694,8 @@ get_a_page:
if (!buffer_mapped(bh))
goto unlock_out;
/* If it's a newly allocated disk block for quota, zero it */
- if (buffer_new(bh)) {
- memset(bh->b_data, 0, bh->b_size);
- set_buffer_uptodate(bh);
- }
+ if (buffer_new(bh))
+ zero_user(page, pos - blocksize, bh->b_size);
}
if (PageUptodate(page))
@@ -723,7 +721,7 @@ get_a_page:
/* If quota straddles page boundary, we need to update the rest of the
* quota at the beginning of the next page */
- if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */
+ if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) {
ptr = ptr + nbytes;
nbytes = sizeof(struct gfs2_quota) - nbytes;
offset = 0;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 3a029d8f4cf1..87ac1891a185 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -411,9 +411,9 @@ int hostfs_file_open(struct inode *ino, struct file *file)
return 0;
}
-int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int hostfs_fsync(struct file *file, int datasync)
{
- return fsync_file(HOSTFS_I(dentry->d_inode)->fd, datasync);
+ return fsync_file(HOSTFS_I(file->f_mapping->host)->fd, datasync);
}
static const struct file_operations hostfs_file_fops = {
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 3efabff00367..a9ae9bfa752f 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -19,9 +19,9 @@ static int hpfs_file_release(struct inode *inode, struct file *file)
return 0;
}
-int hpfs_file_fsync(struct file *file, struct dentry *dentry, int datasync)
+int hpfs_file_fsync(struct file *file, int datasync)
{
- /*return file_fsync(file, dentry);*/
+ /*return file_fsync(file, datasync);*/
return 0; /* Don't fsync :-) */
}
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 97bf738cd5d6..75f9d4324851 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -268,7 +268,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, const char *,
/* file.c */
-int hpfs_file_fsync(struct file *, struct dentry *, int);
+int hpfs_file_fsync(struct file *, int);
extern const struct file_operations hpfs_file_ops;
extern const struct inode_operations hpfs_file_iops;
extern const struct address_space_operations hpfs_aops;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 2e4dfa8593da..826c3f9d29ac 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -587,7 +587,7 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
return err;
}
-static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+static int hppfs_fsync(struct file *file, int datasync)
{
return 0;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a0bbd3d1b41a..a4e9a7ec3691 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -688,7 +688,7 @@ static void init_once(void *foo)
const struct file_operations hugetlbfs_file_operations = {
.read = hugetlbfs_read,
.mmap = hugetlbfs_file_mmap,
- .fsync = simple_sync_file,
+ .fsync = noop_fsync,
.get_unmapped_area = hugetlb_get_unmapped_area,
};
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index a33aab6b5e68..54a92fd02bbd 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -234,8 +234,9 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
if (inode->i_mode != mode) {
struct iattr attr;
- attr.ia_valid = ATTR_MODE;
+ attr.ia_valid = ATTR_MODE | ATTR_CTIME;
attr.ia_mode = mode;
+ attr.ia_ctime = CURRENT_TIME_SEC;
rc = jffs2_do_setattr(inode, &attr);
if (rc < 0)
return rc;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 7aa4417e085f..166062a68230 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -222,15 +222,18 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime));
jffs2_free_raw_inode(ri);
- d_instantiate(dentry, inode);
D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
inode->i_ino, inode->i_mode, inode->i_nlink,
f->inocache->pino_nlink, inode->i_mapping->nrpages));
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
fail:
make_bad_inode(inode);
+ unlock_new_inode(inode);
iput(inode);
jffs2_free_raw_inode(ri);
return ret;
@@ -360,8 +363,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
/* Eeek. Wave bye bye */
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return PTR_ERR(fn);
+ ret = PTR_ERR(fn);
+ goto fail;
}
/* We use f->target field to store the target path. */
@@ -370,8 +373,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
memcpy(f->target, target, targetlen + 1);
@@ -386,30 +389,24 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
ret = jffs2_init_security(inode, dir_i);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
+
ret = jffs2_init_acl_post(inode);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
- if (ret) {
- /* Eep. */
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
rd = jffs2_alloc_raw_dirent();
if (!rd) {
/* Argh. Now we treat it like a normal delete */
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -437,8 +434,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
jffs2_free_raw_dirent(rd);
mutex_unlock(&dir_f->sem);
- jffs2_clear_inode(inode);
- return PTR_ERR(fd);
+ ret = PTR_ERR(fd);
+ goto fail;
}
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -453,7 +450,14 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
jffs2_complete_reservation(c);
d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
+
+ fail:
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ret;
}
@@ -519,8 +523,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
/* Eeek. Wave bye bye */
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return PTR_ERR(fn);
+ ret = PTR_ERR(fn);
+ goto fail;
}
/* No data here. Only a metadata node, which will be
obsoleted by the first data write
@@ -531,30 +535,24 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
ret = jffs2_init_security(inode, dir_i);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
+
ret = jffs2_init_acl_post(inode);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
- if (ret) {
- /* Eep. */
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
rd = jffs2_alloc_raw_dirent();
if (!rd) {
/* Argh. Now we treat it like a normal delete */
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -582,8 +580,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
jffs2_free_raw_dirent(rd);
mutex_unlock(&dir_f->sem);
- jffs2_clear_inode(inode);
- return PTR_ERR(fd);
+ ret = PTR_ERR(fd);
+ goto fail;
}
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -599,7 +597,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
jffs2_complete_reservation(c);
d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
+
+ fail:
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ret;
}
static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
@@ -693,8 +698,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
/* Eeek. Wave bye bye */
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return PTR_ERR(fn);
+ ret = PTR_ERR(fn);
+ goto fail;
}
/* No data here. Only a metadata node, which will be
obsoleted by the first data write
@@ -705,30 +710,24 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
ret = jffs2_init_security(inode, dir_i);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
+
ret = jffs2_init_acl_post(inode);
- if (ret) {
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen,
ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen));
- if (ret) {
- /* Eep. */
- jffs2_clear_inode(inode);
- return ret;
- }
+ if (ret)
+ goto fail;
rd = jffs2_alloc_raw_dirent();
if (!rd) {
/* Argh. Now we treat it like a normal delete */
jffs2_complete_reservation(c);
- jffs2_clear_inode(inode);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dir_f = JFFS2_INODE_INFO(dir_i);
@@ -759,8 +758,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
jffs2_free_raw_dirent(rd);
mutex_unlock(&dir_f->sem);
- jffs2_clear_inode(inode);
- return PTR_ERR(fd);
+ ret = PTR_ERR(fd);
+ goto fail;
}
dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
@@ -775,8 +774,14 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
jffs2_complete_reservation(c);
d_instantiate(dentry, inode);
-
+ unlock_new_inode(inode);
return 0;
+
+ fail:
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ret;
}
static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index e7291c161a19..813497024437 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -26,9 +26,9 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
struct page **pagep, void **fsdata);
static int jffs2_readpage (struct file *filp, struct page *pg);
-int jffs2_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int jffs2_fsync(struct file *filp, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = filp->f_mapping->host;
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
/* Trigger GC to flush any pending writes for this inode */
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 86e0821fc989..459d39d1ea0b 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -169,13 +169,13 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
mutex_unlock(&f->sem);
jffs2_complete_reservation(c);
- /* We have to do the vmtruncate() without f->sem held, since
+ /* We have to do the simple_setsize() without f->sem held, since
some pages may be locked and waiting for it in readpage().
We are protected from a simultaneous write() extending i_size
back past iattr->ia_size, because do_truncate() holds the
generic inode semaphore. */
if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) {
- vmtruncate(inode, iattr->ia_size);
+ simple_setsize(inode, iattr->ia_size);
inode->i_blocks = (inode->i_size + 511) >> 9;
}
@@ -465,7 +465,12 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
inode->i_blocks = 0;
inode->i_size = 0;
- insert_inode_hash(inode);
+ if (insert_inode_locked(inode) < 0) {
+ make_bad_inode(inode);
+ unlock_new_inode(inode);
+ iput(inode);
+ return ERR_PTR(-EINVAL);
+ }
return inode;
}
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 035a767f958b..4791aacf3084 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -158,7 +158,7 @@ extern const struct inode_operations jffs2_dir_inode_operations;
extern const struct file_operations jffs2_file_operations;
extern const struct inode_operations jffs2_file_inode_operations;
extern const struct address_space_operations jffs2_file_address_operations;
-int jffs2_fsync(struct file *, struct dentry *, int);
+int jffs2_fsync(struct file *, int);
int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
/* ioctl.c */
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 85d9ec659225..127263cc8657 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -27,9 +27,9 @@
#include "jfs_acl.h"
#include "jfs_debug.h"
-int jfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int jfs_fsync(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
int rc = 0;
if (!(inode->i_state & I_DIRTY) ||
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 9e6bda30a6e8..11042b1f44b5 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -21,7 +21,7 @@
struct fid;
extern struct inode *ialloc(struct inode *, umode_t);
-extern int jfs_fsync(struct file *, struct dentry *, int);
+extern int jfs_fsync(struct file *, int);
extern long jfs_ioctl(struct file *, unsigned int, unsigned long);
extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
extern struct inode *jfs_iget(struct super_block *, unsigned long);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b66832ac33ac..b38f96bef829 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -179,6 +179,8 @@ static void jfs_put_super(struct super_block *sb)
jfs_info("In jfs_put_super");
+ dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
lock_kernel();
rc = jfs_umount(sb);
@@ -396,10 +398,20 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
JFS_SBI(sb)->flag = flag;
ret = jfs_mount_rw(sb, 1);
+
+ /* mark the fs r/w for quota activity */
+ sb->s_flags &= ~MS_RDONLY;
+
unlock_kernel();
+ dquot_resume(sb, -1);
return ret;
}
if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) {
+ rc = dquot_suspend(sb, -1);
+ if (rc < 0) {
+ unlock_kernel();
+ return rc;
+ }
rc = jfs_umount_rw(sb);
JFS_SBI(sb)->flag = flag;
unlock_kernel();
@@ -469,6 +481,10 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
*/
sb->s_op = &jfs_super_operations;
sb->s_export_op = &jfs_export_operations;
+#ifdef CONFIG_QUOTA
+ sb->dq_op = &dquot_operations;
+ sb->s_qcop = &dquot_quotactl_ops;
+#endif
/*
* Initialize direct-mapping inode/address-space
diff --git a/fs/libfs.c b/fs/libfs.c
index 232bea425b09..dcaf972cbf1b 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -8,6 +8,7 @@
#include <linux/slab.h>
#include <linux/mount.h>
#include <linux/vfs.h>
+#include <linux/quotaops.h>
#include <linux/mutex.h>
#include <linux/exportfs.h>
#include <linux/writeback.h>
@@ -58,11 +59,6 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na
return NULL;
}
-int simple_sync_file(struct file * file, struct dentry *dentry, int datasync)
-{
- return 0;
-}
-
int dcache_dir_open(struct inode *inode, struct file *file)
{
static struct qstr cursor_name = {.len = 1, .name = "."};
@@ -190,7 +186,7 @@ const struct file_operations simple_dir_operations = {
.llseek = dcache_dir_lseek,
.read = generic_read_dir,
.readdir = dcache_readdir,
- .fsync = simple_sync_file,
+ .fsync = noop_fsync,
};
const struct inode_operations simple_dir_inode_operations = {
@@ -330,6 +326,81 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
return 0;
}
+/**
+ * simple_setsize - handle core mm and vfs requirements for file size change
+ * @inode: inode
+ * @newsize: new file size
+ *
+ * Returns 0 on success, -error on failure.
+ *
+ * simple_setsize must be called with inode_mutex held.
+ *
+ * simple_setsize will check that the requested new size is OK (see
+ * inode_newsize_ok), and then will perform the necessary i_size update
+ * and pagecache truncation (if necessary). It will be typically be called
+ * from the filesystem's setattr function when ATTR_SIZE is passed in.
+ *
+ * The inode itself must have correct permissions and attributes to allow
+ * i_size to be changed, this function then just checks that the new size
+ * requested is valid.
+ *
+ * In the case of simple in-memory filesystems with inodes stored solely
+ * in the inode cache, and file data in the pagecache, nothing more needs
+ * to be done to satisfy a truncate request. Filesystems with on-disk
+ * blocks for example will need to free them in the case of truncate, in
+ * that case it may be easier not to use simple_setsize (but each of its
+ * components will likely be required at some point to update pagecache
+ * and inode etc).
+ */
+int simple_setsize(struct inode *inode, loff_t newsize)
+{
+ loff_t oldsize;
+ int error;
+
+ error = inode_newsize_ok(inode, newsize);
+ if (error)
+ return error;
+
+ oldsize = inode->i_size;
+ i_size_write(inode, newsize);
+ truncate_pagecache(inode, oldsize, newsize);
+
+ return error;
+}
+EXPORT_SYMBOL(simple_setsize);
+
+/**
+ * simple_setattr - setattr for simple in-memory filesystem
+ * @dentry: dentry
+ * @iattr: iattr structure
+ *
+ * Returns 0 on success, -error on failure.
+ *
+ * simple_setattr implements setattr for an in-memory filesystem which
+ * does not store its own file data or metadata (eg. uses the page cache
+ * and inode cache as its data store).
+ */
+int simple_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+ struct inode *inode = dentry->d_inode;
+ int error;
+
+ error = inode_change_ok(inode, iattr);
+ if (error)
+ return error;
+
+ if (iattr->ia_valid & ATTR_SIZE) {
+ error = simple_setsize(inode, iattr->ia_size);
+ if (error)
+ return error;
+ }
+
+ generic_setattr(inode, iattr);
+
+ return error;
+}
+EXPORT_SYMBOL(simple_setattr);
+
int simple_readpage(struct file *file, struct page *page)
{
clear_highpage(page);
@@ -418,7 +489,8 @@ int simple_write_end(struct file *file, struct address_space *mapping,
* unique inode values later for this filesystem, then you must take care
* to pass it an appropriate max_reserved value to avoid collisions.
*/
-int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files)
+int simple_fill_super(struct super_block *s, unsigned long magic,
+ struct tree_descr *files)
{
struct inode *inode;
struct dentry *root;
@@ -851,13 +923,22 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
}
EXPORT_SYMBOL_GPL(generic_fh_to_parent);
-int simple_fsync(struct file *file, struct dentry *dentry, int datasync)
+/**
+ * generic_file_fsync - generic fsync implementation for simple filesystems
+ * @file: file to synchronize
+ * @datasync: only synchronize essential metadata if true
+ *
+ * This is a generic implementation of the fsync method for simple
+ * filesystems which track all non-inode metadata in the buffers list
+ * hanging off the address_space structure.
+ */
+int generic_file_fsync(struct file *file, int datasync)
{
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0, /* metadata-only; caller takes care of data */
};
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
int err;
int ret;
@@ -872,7 +953,15 @@ int simple_fsync(struct file *file, struct dentry *dentry, int datasync)
ret = err;
return ret;
}
-EXPORT_SYMBOL(simple_fsync);
+EXPORT_SYMBOL(generic_file_fsync);
+
+/*
+ * No-op implementation of ->fsync for in-memory filesystems.
+ */
+int noop_fsync(struct file *file, int datasync)
+{
+ return 0;
+}
EXPORT_SYMBOL(dcache_dir_close);
EXPORT_SYMBOL(dcache_dir_lseek);
@@ -895,7 +984,7 @@ EXPORT_SYMBOL(simple_release_fs);
EXPORT_SYMBOL(simple_rename);
EXPORT_SYMBOL(simple_rmdir);
EXPORT_SYMBOL(simple_statfs);
-EXPORT_SYMBOL(simple_sync_file);
+EXPORT_SYMBOL(noop_fsync);
EXPORT_SYMBOL(simple_unlink);
EXPORT_SYMBOL(simple_read_from_buffer);
EXPORT_SYMBOL(simple_write_to_buffer);
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 0de524071870..abe1cafbd4c2 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -219,9 +219,9 @@ int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
}
}
-int logfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int logfs_fsync(struct file *file, int datasync)
{
- struct super_block *sb = dentry->d_inode->i_sb;
+ struct super_block *sb = file->f_mapping->host->i_sb;
logfs_write_anchor(sb);
return 0;
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 1a9db84f8d8f..c838c4d72111 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -506,7 +506,7 @@ extern const struct address_space_operations logfs_reg_aops;
int logfs_readpage(struct file *file, struct page *page);
int logfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
unsigned long arg);
-int logfs_fsync(struct file *file, struct dentry *dentry, int datasync);
+int logfs_fsync(struct file *file, int datasync);
/* gc.c */
u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec);
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 6198731d7fcd..1dbf921ca44b 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -22,7 +22,7 @@ const struct file_operations minix_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = minix_readdir,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
};
static inline void dir_put_page(struct page *page)
@@ -72,16 +72,9 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n)
{
struct address_space *mapping = dir->i_mapping;
struct page *page = read_mapping_page(mapping, n, NULL);
- if (!IS_ERR(page)) {
+ if (!IS_ERR(page))
kmap(page);
- if (!PageUptodate(page))
- goto fail;
- }
return page;
-
-fail:
- dir_put_page(page);
- return ERR_PTR(-EIO);
}
static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi)
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 3eec3e607a87..d5320ff23faf 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -19,7 +19,7 @@ const struct file_operations minix_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c
index f23010969369..13487ad16894 100644
--- a/fs/minix/itree_v2.c
+++ b/fs/minix/itree_v2.c
@@ -20,6 +20,9 @@ static inline block_t *i_data(struct inode *inode)
return (block_t *)minix_i(inode)->u.i2_data;
}
+#define DIRCOUNT 7
+#define INDIRCOUNT(sb) (1 << ((sb)->s_blocksize_bits - 2))
+
static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
{
int n = 0;
@@ -34,21 +37,21 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
printk("MINIX-fs: block_to_path: "
"block %ld too big on dev %s\n",
block, bdevname(sb->s_bdev, b));
- } else if (block < 7) {
+ } else if (block < DIRCOUNT) {
offsets[n++] = block;
- } else if ((block -= 7) < 256) {
- offsets[n++] = 7;
+ } else if ((block -= DIRCOUNT) < INDIRCOUNT(sb)) {
+ offsets[n++] = DIRCOUNT;
offsets[n++] = block;
- } else if ((block -= 256) < 256*256) {
- offsets[n++] = 8;
- offsets[n++] = block>>8;
- offsets[n++] = block & 255;
+ } else if ((block -= INDIRCOUNT(sb)) < INDIRCOUNT(sb) * INDIRCOUNT(sb)) {
+ offsets[n++] = DIRCOUNT + 1;
+ offsets[n++] = block / INDIRCOUNT(sb);
+ offsets[n++] = block % INDIRCOUNT(sb);
} else {
- block -= 256*256;
- offsets[n++] = 9;
- offsets[n++] = block>>16;
- offsets[n++] = (block>>8) & 255;
- offsets[n++] = block & 255;
+ block -= INDIRCOUNT(sb) * INDIRCOUNT(sb);
+ offsets[n++] = DIRCOUNT + 2;
+ offsets[n++] = (block / INDIRCOUNT(sb)) / INDIRCOUNT(sb);
+ offsets[n++] = (block / INDIRCOUNT(sb)) % INDIRCOUNT(sb);
+ offsets[n++] = block % INDIRCOUNT(sb);
}
return n;
}
diff --git a/fs/namei.c b/fs/namei.c
index 48e1f60520ea..868d0cb9d473 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1621,6 +1621,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
case LAST_DOTDOT:
follow_dotdot(nd);
dir = nd->path.dentry;
+ case LAST_DOT:
if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {
if (!dir->d_op->d_revalidate(dir, nd)) {
error = -ESTALE;
@@ -1628,7 +1629,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
}
}
/* fallthrough */
- case LAST_DOT:
case LAST_ROOT:
if (open_flag & O_CREAT)
goto exit;
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index b93870892892..3639cc5cbdae 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -22,7 +22,7 @@
#include <linux/ncp_fs.h>
#include "ncplib_kernel.h"
-static int ncp_fsync(struct file *file, struct dentry *dentry, int datasync)
+static int ncp_fsync(struct file *file, int datasync)
{
return 0;
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7ec9b34a59f8..d25b5257b7a1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1286,6 +1286,55 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
#endif /* CONFIG_NFS_V4_1 */
}
+static int nfs4_server_common_setup(struct nfs_server *server,
+ struct nfs_fh *mntfh)
+{
+ struct nfs_fattr *fattr;
+ int error;
+
+ BUG_ON(!server->nfs_client);
+ BUG_ON(!server->nfs_client->rpc_ops);
+ BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+ fattr = nfs_alloc_fattr();
+ if (fattr == NULL)
+ return -ENOMEM;
+
+ /* We must ensure the session is initialised first */
+ error = nfs4_init_session(server);
+ if (error < 0)
+ goto out;
+
+ /* Probe the root fh to retrieve its FSID and filehandle */
+ error = nfs4_get_rootfh(server, mntfh);
+ if (error < 0)
+ goto out;
+
+ dprintk("Server FSID: %llx:%llx\n",
+ (unsigned long long) server->fsid.major,
+ (unsigned long long) server->fsid.minor);
+ dprintk("Mount FH: %d\n", mntfh->size);
+
+ nfs4_session_set_rwsize(server);
+
+ error = nfs_probe_fsinfo(server, mntfh, fattr);
+ if (error < 0)
+ goto out;
+
+ if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+ server->namelen = NFS4_MAXNAMLEN;
+
+ spin_lock(&nfs_client_lock);
+ list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+ list_add_tail(&server->master_link, &nfs_volume_list);
+ spin_unlock(&nfs_client_lock);
+
+ server->mount_time = jiffies;
+out:
+ nfs_free_fattr(fattr);
+ return error;
+}
+
/*
* Create a version 4 volume record
*/
@@ -1346,7 +1395,6 @@ error:
struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
struct nfs_fh *mntfh)
{
- struct nfs_fattr *fattr;
struct nfs_server *server;
int error;
@@ -1356,55 +1404,19 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
if (!server)
return ERR_PTR(-ENOMEM);
- error = -ENOMEM;
- fattr = nfs_alloc_fattr();
- if (fattr == NULL)
- goto error;
-
/* set up the general RPC client */
error = nfs4_init_server(server, data);
if (error < 0)
goto error;
- BUG_ON(!server->nfs_client);
- BUG_ON(!server->nfs_client->rpc_ops);
- BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
- error = nfs4_init_session(server);
- if (error < 0)
- goto error;
-
- /* Probe the root fh to retrieve its FSID */
- error = nfs4_get_rootfh(server, mntfh);
+ error = nfs4_server_common_setup(server, mntfh);
if (error < 0)
goto error;
- dprintk("Server FSID: %llx:%llx\n",
- (unsigned long long) server->fsid.major,
- (unsigned long long) server->fsid.minor);
- dprintk("Mount FH: %d\n", mntfh->size);
-
- nfs4_session_set_rwsize(server);
-
- error = nfs_probe_fsinfo(server, mntfh, fattr);
- if (error < 0)
- goto error;
-
- if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
- server->namelen = NFS4_MAXNAMLEN;
-
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
- server->mount_time = jiffies;
dprintk("<-- nfs4_create_server() = %p\n", server);
- nfs_free_fattr(fattr);
return server;
error:
- nfs_free_fattr(fattr);
nfs_free_server(server);
dprintk("<-- nfs4_create_server() = error %d\n", error);
return ERR_PTR(error);
@@ -1418,7 +1430,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
{
struct nfs_client *parent_client;
struct nfs_server *server, *parent_server;
- struct nfs_fattr *fattr;
int error;
dprintk("--> nfs4_create_referral_server()\n");
@@ -1427,11 +1438,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (!server)
return ERR_PTR(-ENOMEM);
- error = -ENOMEM;
- fattr = nfs_alloc_fattr();
- if (fattr == NULL)
- goto error;
-
parent_server = NFS_SB(data->sb);
parent_client = parent_server->nfs_client;
@@ -1456,40 +1462,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
if (error < 0)
goto error;
- BUG_ON(!server->nfs_client);
- BUG_ON(!server->nfs_client->rpc_ops);
- BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
- /* Probe the root fh to retrieve its FSID and filehandle */
- error = nfs4_get_rootfh(server, mntfh);
- if (error < 0)
- goto error;
-
- /* probe the filesystem info for this server filesystem */
- error = nfs_probe_fsinfo(server, mntfh, fattr);
+ error = nfs4_server_common_setup(server, mntfh);
if (error < 0)
goto error;
- if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
- server->namelen = NFS4_MAXNAMLEN;
-
- dprintk("Referral FSID: %llx:%llx\n",
- (unsigned long long) server->fsid.major,
- (unsigned long long) server->fsid.minor);
-
- spin_lock(&nfs_client_lock);
- list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
- list_add_tail(&server->master_link, &nfs_volume_list);
- spin_unlock(&nfs_client_lock);
-
- server->mount_time = jiffies;
-
- nfs_free_fattr(fattr);
dprintk("<-- nfs_create_referral_server() = %p\n", server);
return server;
error:
- nfs_free_fattr(fattr);
nfs_free_server(server);
dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
return ERR_PTR(error);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index db64854b7b09..782b431ef91c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -53,7 +53,7 @@ static int nfs_link(struct dentry *, struct inode *, struct dentry *);
static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
static int nfs_rename(struct inode *, struct dentry *,
struct inode *, struct dentry *);
-static int nfs_fsync_dir(struct file *, struct dentry *, int);
+static int nfs_fsync_dir(struct file *, int);
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
const struct file_operations nfs_dir_operations = {
@@ -641,8 +641,10 @@ out:
* All directory operations under NFS are synchronous, so fsync()
* is a dummy operation.
*/
-static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
+static int nfs_fsync_dir(struct file *filp, int datasync)
{
+ struct dentry *dentry = filp->f_path.dentry;
+
dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
datasync);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index cac96bcc91e4..36a5e74f51b4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -53,7 +53,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
static int nfs_file_flush(struct file *, fl_owner_t id);
-static int nfs_file_fsync(struct file *, struct dentry *dentry, int datasync);
+static int nfs_file_fsync(struct file *, int datasync);
static int nfs_check_flags(int flags);
static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
@@ -322,8 +322,9 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
* whether any write errors occurred for this process.
*/
static int
-nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync)
+nfs_file_fsync(struct file *file, int datasync)
{
+ struct dentry *dentry = file->f_path.dentry;
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = dentry->d_inode;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 7428f7d6273b..a70e446e1605 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -146,7 +146,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
goto out;
}
- if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE)
+ if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
|| !S_ISDIR(fsinfo.fattr->mode)) {
printk(KERN_ERR "nfs4_get_rootfh:"
" getroot encountered non-directory\n");
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6bdef28efa33..65c8dae4b267 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -862,8 +862,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
*p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
- *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
+ *p++ = cpu_to_be32(iap->ia_atime.tv_sec);
+ *p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
}
else if (iap->ia_valid & ATTR_ATIME) {
bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 04214fc5c304..f9df16de4a56 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -570,6 +570,22 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
nfs_show_mountd_netid(m, nfss, showdefaults);
}
+#ifdef CONFIG_NFS_V4
+static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
+ int showdefaults)
+{
+ struct nfs_client *clp = nfss->nfs_client;
+
+ seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
+ seq_printf(m, ",minorversion=%u", clp->cl_minorversion);
+}
+#else
+static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
+ int showdefaults)
+{
+}
+#endif
+
/*
* Describe the mount options in force on this server representation
*/
@@ -631,11 +647,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
if (version != 4)
nfs_show_mountd_options(m, nfss, showdefaults);
+ else
+ nfs_show_nfsv4_options(m, nfss, showdefaults);
-#ifdef CONFIG_NFS_V4
- if (clp->rpc_ops->version == 4)
- seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
-#endif
if (nfss->options & NFS_OPTION_FSCACHE)
seq_printf(m, ",fsc");
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 12f7109720c2..4a2734758778 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4122,8 +4122,8 @@ nfs4_state_shutdown(void)
nfs4_lock_state();
nfs4_release_reclaim();
__nfs4_state_shutdown();
- nfsd4_destroy_callback_queue();
nfs4_unlock_state();
+ nfsd4_destroy_callback_queue();
}
/*
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ebbf3b6b2457..3c111120b619 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -443,8 +443,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
if (size_change)
put_write_access(inode);
if (!err)
- if (EX_ISSYNC(fhp->fh_export))
- write_inode_now(inode, 1);
+ commit_metadata(fhp);
out:
return err;
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index af638d59e3bf..43c8c5b541fd 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -75,8 +75,6 @@ struct nilfs_btree_path {
extern struct kmem_cache *nilfs_btree_path_cache;
-int nilfs_btree_path_cache_init(void);
-void nilfs_btree_path_cache_destroy(void);
int nilfs_btree_init(struct nilfs_bmap *);
int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64,
const __u64 *, const __u64 *, int);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 30292df443ce..c9a30d7ff6fc 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -27,7 +27,7 @@
#include "nilfs.h"
#include "segment.h"
-int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
+int nilfs_sync_file(struct file *file, int datasync)
{
/*
* Called from fsync() system call
@@ -37,7 +37,7 @@ int nilfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
* This function should be implemented when the writeback function
* will be implemented.
*/
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
int err;
if (!nilfs_inode_dirty(inode))
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 8723e5bfd071..47d6d7928122 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -228,7 +228,7 @@ extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *,
struct page *, struct inode *);
/* file.c */
-extern int nilfs_sync_file(struct file *, struct dentry *, int);
+extern int nilfs_sync_file(struct file *, int);
/* ioctl.c */
long nilfs_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index fdf1c3b6d673..85fbb66455e2 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -127,8 +127,6 @@ struct nilfs_segment_buffer {
extern struct kmem_cache *nilfs_segbuf_cachep;
-int __init nilfs_init_segbuf_cache(void);
-void nilfs_destroy_segbuf_cache(void);
struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *);
void nilfs_segbuf_free(struct nilfs_segment_buffer *);
void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long,
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index dca142361ccf..01e20dbb217d 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -221,8 +221,6 @@ enum {
extern struct kmem_cache *nilfs_transaction_cachep;
/* segment.c */
-extern int nilfs_init_transaction_cache(void);
-extern void nilfs_destroy_transaction_cache(void);
extern void nilfs_relax_pressure_in_lock(struct super_block *);
extern int nilfs_construct_segment(struct super_block *);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 03b34b738993..414ef68931cf 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1130,13 +1130,13 @@ static void nilfs_segbuf_init_once(void *obj)
static void nilfs_destroy_cachep(void)
{
- if (nilfs_inode_cachep)
+ if (nilfs_inode_cachep)
kmem_cache_destroy(nilfs_inode_cachep);
- if (nilfs_transaction_cachep)
+ if (nilfs_transaction_cachep)
kmem_cache_destroy(nilfs_transaction_cachep);
- if (nilfs_segbuf_cachep)
+ if (nilfs_segbuf_cachep)
kmem_cache_destroy(nilfs_segbuf_cachep);
- if (nilfs_btree_path_cache)
+ if (nilfs_btree_path_cache)
kmem_cache_destroy(nilfs_btree_path_cache);
}
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index fe44d3feee4a..0f48e7c5d9e1 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1527,10 +1527,9 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
* this problem for now. We do write the $BITMAP attribute if it is present
* which is the important one for a directory so things are not too bad.
*/
-static int ntfs_dir_fsync(struct file *filp, struct dentry *dentry,
- int datasync)
+static int ntfs_dir_fsync(struct file *filp, int datasync)
{
- struct inode *bmp_vi, *vi = dentry->d_inode;
+ struct inode *bmp_vi, *vi = filp->f_mapping->host;
int err, ret;
ntfs_attr na;
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index a1924a0d2ab0..113ebd9f25a4 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2133,7 +2133,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
/**
* ntfs_file_fsync - sync a file to disk
* @filp: file to be synced
- * @dentry: dentry describing the file to sync
* @datasync: if non-zero only flush user data and not metadata
*
* Data integrity sync of a file to disk. Used for fsync, fdatasync, and msync
@@ -2149,19 +2148,15 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
* Also, if @datasync is true, we do not wait on the inode to be written out
* but we always wait on the page cache pages to be written out.
*
- * Note: In the past @filp could be NULL so we ignore it as we don't need it
- * anyway.
- *
* Locking: Caller must hold i_mutex on the inode.
*
* TODO: We should probably also write all attribute/index inodes associated
* with this inode but since we have no simple way of getting to them we ignore
* this problem for now.
*/
-static int ntfs_file_fsync(struct file *filp, struct dentry *dentry,
- int datasync)
+static int ntfs_file_fsync(struct file *filp, int datasync)
{
- struct inode *vi = dentry->d_inode;
+ struct inode *vi = filp->f_mapping->host;
int err, ret = 0;
ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 97e54b9e654b..6a13ea64c447 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -175,13 +175,12 @@ static int ocfs2_dir_release(struct inode *inode, struct file *file)
return 0;
}
-static int ocfs2_sync_file(struct file *file,
- struct dentry *dentry,
- int datasync)
+static int ocfs2_sync_file(struct file *file, int datasync)
{
int err = 0;
journal_t *journal;
- struct inode *inode = dentry->d_inode;
+ struct dentry *dentry = file->f_path.dentry;
+ struct inode *inode = file->f_mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync,
@@ -1053,7 +1052,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
}
/*
- * This will intentionally not wind up calling vmtruncate(),
+ * This will intentionally not wind up calling simple_setsize(),
* since all the work for a size change has been done above.
* Otherwise, we could get into problems with truncate as
* ip_alloc_sem is used there to protect against i_size
@@ -2119,9 +2118,13 @@ relock:
* direct write may have instantiated a few
* blocks outside i_size. Trim these off again.
* Don't need i_size_read because we hold i_mutex.
+ *
+ * XXX(hch): this looks buggy because ocfs2 did not
+ * actually implement ->truncate. Take a look at
+ * the new truncate sequence and update this accordingly
*/
if (*ppos + count > inode->i_size)
- vmtruncate(inode, inode->i_size);
+ simple_setsize(inode, inode->i_size);
ret = written;
goto out_dio;
}
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 40650021fc24..d8b6e4259b80 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -26,7 +26,6 @@
#include <linux/fs.h>
#include <linux/types.h>
-#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/bitops.h>
#include <linux/list.h>
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 2c26ce251cb3..0eaa929a4dbf 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -879,13 +879,15 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend)
if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
continue;
if (unsuspend)
- status = vfs_quota_enable(
- sb_dqopt(sb)->files[type],
- type, QFMT_OCFS2,
- DQUOT_SUSPENDED);
- else
- status = vfs_quota_disable(sb, type,
- DQUOT_SUSPENDED);
+ status = dquot_resume(sb, type);
+ else {
+ struct ocfs2_mem_dqinfo *oinfo;
+
+ /* Cancel periodic syncing before suspending */
+ oinfo = sb_dqinfo(sb, type)->dqi_priv;
+ cancel_delayed_work_sync(&oinfo->dqi_sync_work);
+ status = dquot_suspend(sb, type);
+ }
if (status < 0)
break;
}
@@ -916,8 +918,8 @@ static int ocfs2_enable_quotas(struct ocfs2_super *osb)
status = -ENOENT;
goto out_quota_off;
}
- status = vfs_quota_enable(inode[type], type, QFMT_OCFS2,
- DQUOT_USAGE_ENABLED);
+ status = dquot_enable(inode[type], type, QFMT_OCFS2,
+ DQUOT_USAGE_ENABLED);
if (status < 0)
goto out_quota_off;
}
@@ -952,8 +954,8 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
/* Turn off quotas. This will remove all dquot structures from
* memory and so they will be automatically synced to global
* quota files */
- vfs_quota_disable(sb, type, DQUOT_USAGE_ENABLED |
- DQUOT_LIMITS_ENABLED);
+ dquot_disable(sb, type, DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED);
if (!inode)
continue;
iput(inode);
@@ -962,7 +964,7 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
/* Handle quota on quotactl */
static int ocfs2_quota_on(struct super_block *sb, int type, int format_id,
- char *path, int remount)
+ char *path)
{
unsigned int feature[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA};
@@ -970,30 +972,24 @@ static int ocfs2_quota_on(struct super_block *sb, int type, int format_id,
if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, feature[type]))
return -EINVAL;
- if (remount)
- return 0; /* Just ignore it has been handled in
- * ocfs2_remount() */
- return vfs_quota_enable(sb_dqopt(sb)->files[type], type,
- format_id, DQUOT_LIMITS_ENABLED);
+ return dquot_enable(sb_dqopt(sb)->files[type], type,
+ format_id, DQUOT_LIMITS_ENABLED);
}
/* Handle quota off quotactl */
-static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
+static int ocfs2_quota_off(struct super_block *sb, int type)
{
- if (remount)
- return 0; /* Ignore now and handle later in
- * ocfs2_remount() */
- return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
+ return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED);
}
static const struct quotactl_ops ocfs2_quotactl_ops = {
.quota_on = ocfs2_quota_on,
.quota_off = ocfs2_quota_off,
- .quota_sync = vfs_quota_sync,
- .get_info = vfs_get_dqinfo,
- .set_info = vfs_set_dqinfo,
- .get_dqblk = vfs_get_dqblk,
- .set_dqblk = vfs_set_dqblk,
+ .quota_sync = dquot_quota_sync,
+ .get_info = dquot_get_dqinfo,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk,
};
static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 399487c09364..6e7a3291bbe8 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -329,7 +329,7 @@ const struct file_operations omfs_file_operations = {
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/pipe.c b/fs/pipe.c
index d79872eba09a..279eef96c51c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -26,9 +26,14 @@
/*
* The max size that a non-root user is allowed to grow the pipe. Can
- * be set by root in /proc/sys/fs/pipe-max-pages
+ * be set by root in /proc/sys/fs/pipe-max-size
*/
-unsigned int pipe_max_pages = PIPE_DEF_BUFFERS * 16;
+unsigned int pipe_max_size = 1048576;
+
+/*
+ * Minimum pipe size, as required by POSIX
+ */
+unsigned int pipe_min_size = PAGE_SIZE;
/*
* We use a start+len construction, which provides full use of the
@@ -230,6 +235,7 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
return kmap(buf->page);
}
+EXPORT_SYMBOL(generic_pipe_buf_map);
/**
* generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
@@ -249,6 +255,7 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
} else
kunmap(buf->page);
}
+EXPORT_SYMBOL(generic_pipe_buf_unmap);
/**
* generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
@@ -279,6 +286,7 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
return 1;
}
+EXPORT_SYMBOL(generic_pipe_buf_steal);
/**
* generic_pipe_buf_get - get a reference to a &struct pipe_buffer
@@ -294,6 +302,7 @@ void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
page_cache_get(buf->page);
}
+EXPORT_SYMBOL(generic_pipe_buf_get);
/**
* generic_pipe_buf_confirm - verify contents of the pipe buffer
@@ -309,6 +318,7 @@ int generic_pipe_buf_confirm(struct pipe_inode_info *info,
{
return 0;
}
+EXPORT_SYMBOL(generic_pipe_buf_confirm);
/**
* generic_pipe_buf_release - put a reference to a &struct pipe_buffer
@@ -323,6 +333,7 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe,
{
page_cache_release(buf->page);
}
+EXPORT_SYMBOL(generic_pipe_buf_release);
static const struct pipe_buf_operations anon_pipe_buf_ops = {
.can_merge = 1,
@@ -1112,26 +1123,20 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
* Allocate a new array of pipe buffers and copy the info over. Returns the
* pipe size if successful, or return -ERROR on error.
*/
-static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
+static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
{
struct pipe_buffer *bufs;
/*
- * Must be a power-of-2 currently
- */
- if (!is_power_of_2(arg))
- return -EINVAL;
-
- /*
* We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
* expect a lot of shrink+grow operations, just free and allocate
* again like we would do for growing. If the pipe currently
* contains more buffers than arg, then return busy.
*/
- if (arg < pipe->nrbufs)
+ if (nr_pages < pipe->nrbufs)
return -EBUSY;
- bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL);
+ bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL);
if (unlikely(!bufs))
return -ENOMEM;
@@ -1140,20 +1145,56 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
* and adjust the indexes.
*/
if (pipe->nrbufs) {
- const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1);
- const unsigned int head = pipe->nrbufs - tail;
+ unsigned int tail;
+ unsigned int head;
+ tail = pipe->curbuf + pipe->nrbufs;
+ if (tail < pipe->buffers)
+ tail = 0;
+ else
+ tail &= (pipe->buffers - 1);
+
+ head = pipe->nrbufs - tail;
if (head)
memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
if (tail)
- memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer));
+ memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
}
pipe->curbuf = 0;
kfree(pipe->bufs);
pipe->bufs = bufs;
- pipe->buffers = arg;
- return arg;
+ pipe->buffers = nr_pages;
+ return nr_pages * PAGE_SIZE;
+}
+
+/*
+ * Currently we rely on the pipe array holding a power-of-2 number
+ * of pages.
+ */
+static inline unsigned int round_pipe_size(unsigned int size)
+{
+ unsigned long nr_pages;
+
+ nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
+}
+
+/*
+ * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax
+ * will return an error.
+ */
+int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
+ size_t *lenp, loff_t *ppos)
+{
+ int ret;
+
+ ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
+ if (ret < 0 || !write)
+ return ret;
+
+ pipe_max_size = round_pipe_size(pipe_max_size);
+ return ret;
}
long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1168,25 +1209,32 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
mutex_lock(&pipe->inode->i_mutex);
switch (cmd) {
- case F_SETPIPE_SZ:
- if (!capable(CAP_SYS_ADMIN) && arg > pipe_max_pages)
- return -EINVAL;
- /*
- * The pipe needs to be at least 2 pages large to
- * guarantee POSIX behaviour.
- */
- if (arg < 2)
- return -EINVAL;
- ret = pipe_set_size(pipe, arg);
+ case F_SETPIPE_SZ: {
+ unsigned int size, nr_pages;
+
+ size = round_pipe_size(arg);
+ nr_pages = size >> PAGE_SHIFT;
+
+ ret = -EINVAL;
+ if (!nr_pages)
+ goto out;
+
+ if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
+ ret = -EPERM;
+ goto out;
+ }
+ ret = pipe_set_size(pipe, nr_pages);
break;
+ }
case F_GETPIPE_SZ:
- ret = pipe->buffers;
+ ret = pipe->buffers * PAGE_SIZE;
break;
default:
ret = -EINVAL;
break;
}
+out:
mutex_unlock(&pipe->inode->i_mutex);
return ret;
}
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index ce94801f48ca..d9396a4fc7ff 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -209,6 +209,9 @@ void proc_device_tree_add_node(struct device_node *np,
for (pp = np->properties; pp != NULL; pp = pp->next) {
p = pp->name;
+ if (strchr(p, '/'))
+ continue;
+
if (duplicate_name(de, p))
p = fixup_name(np, de, p);
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 46d4b5d72bd3..cb6306e63843 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -122,11 +122,20 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
return size;
}
+static void pad_len_spaces(struct seq_file *m, int len)
+{
+ len = 25 + sizeof(void*) * 6 - len;
+ if (len < 1)
+ len = 1;
+ seq_printf(m, "%*c", len, ' ');
+}
+
/*
* display a single VMA to a sequenced file
*/
static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
{
+ struct mm_struct *mm = vma->vm_mm;
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
@@ -155,11 +164,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
MAJOR(dev), MINOR(dev), ino, &len);
if (file) {
- len = 25 + sizeof(void *) * 6 - len;
- if (len < 1)
- len = 1;
- seq_printf(m, "%*c", len, ' ');
+ pad_len_spaces(m, len);
seq_path(m, &file->f_path, "");
+ } else if (mm) {
+ if (vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack) {
+ pad_len_spaces(m, len);
+ seq_puts(m, "[stack]");
+ }
}
seq_putc(m, '\n');
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index 3d3fd4692133..6e8fc62b40a8 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -80,7 +80,7 @@ const struct file_operations qnx4_dir_operations =
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = qnx4_readdir,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
};
const struct inode_operations qnx4_dir_inode_operations =
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 1ad8bf076cfc..12c233da1b6b 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -228,10 +228,6 @@ static struct hlist_head *dquot_hash;
struct dqstats dqstats;
EXPORT_SYMBOL(dqstats);
-#ifdef CONFIG_SMP
-struct dqstats *dqstats_pcpu;
-EXPORT_SYMBOL(dqstats_pcpu);
-#endif
static qsize_t inode_get_rsv_space(struct inode *inode);
static void __dquot_initialize(struct inode *inode, int type);
@@ -584,7 +580,7 @@ out:
}
EXPORT_SYMBOL(dquot_scan_active);
-int vfs_quota_sync(struct super_block *sb, int type, int wait)
+int dquot_quota_sync(struct super_block *sb, int type, int wait)
{
struct list_head *dirty;
struct dquot *dquot;
@@ -656,7 +652,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
return 0;
}
-EXPORT_SYMBOL(vfs_quota_sync);
+EXPORT_SYMBOL(dquot_quota_sync);
/* Free unused dquots from cache */
static void prune_dqcache(int count)
@@ -676,27 +672,10 @@ static void prune_dqcache(int count)
}
}
-static int dqstats_read(unsigned int type)
-{
- int count = 0;
-#ifdef CONFIG_SMP
- int cpu;
- for_each_possible_cpu(cpu)
- count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
- /* Statistics reading is racy, but absolute accuracy isn't required */
- if (count < 0)
- count = 0;
-#else
- count = dqstats.stat[type];
-#endif
- return count;
-}
-
/*
* This is called from kswapd when we think we need some
* more memory
*/
-
static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
{
if (nr) {
@@ -704,7 +683,9 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
prune_dqcache(nr);
spin_unlock(&dq_list_lock);
}
- return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
+ return ((unsigned)
+ percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])
+ /100) * sysctl_vfs_cache_pressure;
}
static struct shrinker dqcache_shrinker = {
@@ -1815,7 +1796,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid)
transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA);
if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)
- transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_uid, GRPQUOTA);
+ transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_gid, GRPQUOTA);
ret = __dquot_transfer(inode, transfer_to);
dqput_all(transfer_to);
@@ -1850,6 +1831,7 @@ const struct dquot_operations dquot_operations = {
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
};
+EXPORT_SYMBOL(dquot_operations);
/*
* Generic helper for ->open on filesystems supporting disk quotas.
@@ -1868,7 +1850,7 @@ EXPORT_SYMBOL(dquot_file_open);
/*
* Turn quota off on a device. type == -1 ==> quotaoff for all types (umount)
*/
-int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags)
+int dquot_disable(struct super_block *sb, int type, unsigned int flags)
{
int cnt, ret = 0;
struct quota_info *dqopt = sb_dqopt(sb);
@@ -1998,14 +1980,15 @@ put_inodes:
}
return ret;
}
-EXPORT_SYMBOL(vfs_quota_disable);
+EXPORT_SYMBOL(dquot_disable);
-int vfs_quota_off(struct super_block *sb, int type, int remount)
+int dquot_quota_off(struct super_block *sb, int type)
{
- return vfs_quota_disable(sb, type, remount ? DQUOT_SUSPENDED :
- (DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED));
+ return dquot_disable(sb, type,
+ DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
}
-EXPORT_SYMBOL(vfs_quota_off);
+EXPORT_SYMBOL(dquot_quota_off);
+
/*
* Turn quotas on on a device
*/
@@ -2123,36 +2106,43 @@ out_fmt:
}
/* Reenable quotas on remount RW */
-static int vfs_quota_on_remount(struct super_block *sb, int type)
+int dquot_resume(struct super_block *sb, int type)
{
struct quota_info *dqopt = sb_dqopt(sb);
struct inode *inode;
- int ret;
+ int ret = 0, cnt;
unsigned int flags;
- mutex_lock(&dqopt->dqonoff_mutex);
- if (!sb_has_quota_suspended(sb, type)) {
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (type != -1 && cnt != type)
+ continue;
+
+ mutex_lock(&dqopt->dqonoff_mutex);
+ if (!sb_has_quota_suspended(sb, cnt)) {
+ mutex_unlock(&dqopt->dqonoff_mutex);
+ continue;
+ }
+ inode = dqopt->files[cnt];
+ dqopt->files[cnt] = NULL;
+ spin_lock(&dq_state_lock);
+ flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED,
+ cnt);
+ dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, cnt);
+ spin_unlock(&dq_state_lock);
mutex_unlock(&dqopt->dqonoff_mutex);
- return 0;
- }
- inode = dqopt->files[type];
- dqopt->files[type] = NULL;
- spin_lock(&dq_state_lock);
- flags = dqopt->flags & dquot_state_flag(DQUOT_USAGE_ENABLED |
- DQUOT_LIMITS_ENABLED, type);
- dqopt->flags &= ~dquot_state_flag(DQUOT_STATE_FLAGS, type);
- spin_unlock(&dq_state_lock);
- mutex_unlock(&dqopt->dqonoff_mutex);
- flags = dquot_generic_flag(flags, type);
- ret = vfs_load_quota_inode(inode, type, dqopt->info[type].dqi_fmt_id,
- flags);
- iput(inode);
+ flags = dquot_generic_flag(flags, cnt);
+ ret = vfs_load_quota_inode(inode, cnt,
+ dqopt->info[cnt].dqi_fmt_id, flags);
+ iput(inode);
+ }
return ret;
}
+EXPORT_SYMBOL(dquot_resume);
-int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
+int dquot_quota_on_path(struct super_block *sb, int type, int format_id,
struct path *path)
{
int error = security_quota_on(path->dentry);
@@ -2167,40 +2157,36 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
DQUOT_LIMITS_ENABLED);
return error;
}
-EXPORT_SYMBOL(vfs_quota_on_path);
+EXPORT_SYMBOL(dquot_quota_on_path);
-int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
- int remount)
+int dquot_quota_on(struct super_block *sb, int type, int format_id, char *name)
{
struct path path;
int error;
- if (remount)
- return vfs_quota_on_remount(sb, type);
-
error = kern_path(name, LOOKUP_FOLLOW, &path);
if (!error) {
- error = vfs_quota_on_path(sb, type, format_id, &path);
+ error = dquot_quota_on_path(sb, type, format_id, &path);
path_put(&path);
}
return error;
}
-EXPORT_SYMBOL(vfs_quota_on);
+EXPORT_SYMBOL(dquot_quota_on);
/*
* More powerful function for turning on quotas allowing setting
* of individual quota flags
*/
-int vfs_quota_enable(struct inode *inode, int type, int format_id,
- unsigned int flags)
+int dquot_enable(struct inode *inode, int type, int format_id,
+ unsigned int flags)
{
int ret = 0;
struct super_block *sb = inode->i_sb;
struct quota_info *dqopt = sb_dqopt(sb);
/* Just unsuspend quotas? */
- if (flags & DQUOT_SUSPENDED)
- return vfs_quota_on_remount(sb, type);
+ BUG_ON(flags & DQUOT_SUSPENDED);
+
if (!flags)
return 0;
/* Just updating flags needed? */
@@ -2232,13 +2218,13 @@ out_lock:
load_quota:
return vfs_load_quota_inode(inode, type, format_id, flags);
}
-EXPORT_SYMBOL(vfs_quota_enable);
+EXPORT_SYMBOL(dquot_enable);
/*
* This function is used when filesystem needs to initialize quotas
* during mount time.
*/
-int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
int format_id, int type)
{
struct dentry *dentry;
@@ -2264,24 +2250,7 @@ out:
dput(dentry);
return error;
}
-EXPORT_SYMBOL(vfs_quota_on_mount);
-
-/* Wrapper to turn on quotas when remounting rw */
-int vfs_dq_quota_on_remount(struct super_block *sb)
-{
- int cnt;
- int ret = 0, err;
-
- if (!sb->s_qcop || !sb->s_qcop->quota_on)
- return -ENOSYS;
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
- if (err < 0 && !ret)
- ret = err;
- }
- return ret;
-}
-EXPORT_SYMBOL(vfs_dq_quota_on_remount);
+EXPORT_SYMBOL(dquot_quota_on_mount);
static inline qsize_t qbtos(qsize_t blocks)
{
@@ -2316,8 +2285,8 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
spin_unlock(&dq_data_lock);
}
-int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
- struct fs_disk_quota *di)
+int dquot_get_dqblk(struct super_block *sb, int type, qid_t id,
+ struct fs_disk_quota *di)
{
struct dquot *dquot;
@@ -2329,7 +2298,7 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
return 0;
}
-EXPORT_SYMBOL(vfs_get_dqblk);
+EXPORT_SYMBOL(dquot_get_dqblk);
#define VFS_FS_DQ_MASK \
(FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \
@@ -2428,7 +2397,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
return 0;
}
-int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
+int dquot_set_dqblk(struct super_block *sb, int type, qid_t id,
struct fs_disk_quota *di)
{
struct dquot *dquot;
@@ -2444,10 +2413,10 @@ int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
out:
return rc;
}
-EXPORT_SYMBOL(vfs_set_dqblk);
+EXPORT_SYMBOL(dquot_set_dqblk);
/* Generic routine for getting common part of quota file information */
-int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
+int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
{
struct mem_dqinfo *mi;
@@ -2466,10 +2435,10 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return 0;
}
-EXPORT_SYMBOL(vfs_get_dqinfo);
+EXPORT_SYMBOL(dquot_get_dqinfo);
/* Generic routine for setting common part of quota file information */
-int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
+int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii)
{
struct mem_dqinfo *mi;
int err = 0;
@@ -2496,27 +2465,27 @@ out:
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
return err;
}
-EXPORT_SYMBOL(vfs_set_dqinfo);
+EXPORT_SYMBOL(dquot_set_dqinfo);
-const struct quotactl_ops vfs_quotactl_ops = {
- .quota_on = vfs_quota_on,
- .quota_off = vfs_quota_off,
- .quota_sync = vfs_quota_sync,
- .get_info = vfs_get_dqinfo,
- .set_info = vfs_set_dqinfo,
- .get_dqblk = vfs_get_dqblk,
- .set_dqblk = vfs_set_dqblk
+const struct quotactl_ops dquot_quotactl_ops = {
+ .quota_on = dquot_quota_on,
+ .quota_off = dquot_quota_off,
+ .quota_sync = dquot_quota_sync,
+ .get_info = dquot_get_dqinfo,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk
};
-
+EXPORT_SYMBOL(dquot_quotactl_ops);
static int do_proc_dqstats(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
-#ifdef CONFIG_SMP
- /* Update global table */
unsigned int type = (int *)table->data - dqstats.stat;
- dqstats.stat[type] = dqstats_read(type);
-#endif
+
+ /* Update global table */
+ dqstats.stat[type] =
+ percpu_counter_sum_positive(&dqstats.counter[type]);
return proc_dointvec(table, write, buffer, lenp, ppos);
}
@@ -2609,7 +2578,7 @@ static ctl_table sys_table[] = {
static int __init dquot_init(void)
{
- int i;
+ int i, ret;
unsigned long nr_hash, order;
printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__);
@@ -2627,12 +2596,11 @@ static int __init dquot_init(void)
if (!dquot_hash)
panic("Cannot create dquot hash table");
-#ifdef CONFIG_SMP
- dqstats_pcpu = alloc_percpu(struct dqstats);
- if (!dqstats_pcpu)
- panic("Cannot create dquot stats table");
-#endif
- memset(&dqstats, 0, sizeof(struct dqstats));
+ for (i = 0; i < _DQST_DQSTAT_LAST; i++) {
+ ret = percpu_counter_init(&dqstats.counter[i], 0);
+ if (ret)
+ panic("Cannot create dquot stat counters");
+ }
/* Find power-of-two hlist_heads which can fit into allocation */
nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index ce3dfd066f59..b299961e1edb 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -73,7 +73,7 @@ static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
if (IS_ERR(pathname))
return PTR_ERR(pathname);
if (sb->s_qcop->quota_on)
- ret = sb->s_qcop->quota_on(sb, type, id, pathname, 0);
+ ret = sb->s_qcop->quota_on(sb, type, id, pathname);
putname(pathname);
return ret;
}
@@ -260,7 +260,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
case Q_QUOTAOFF:
if (!sb->s_qcop->quota_off)
return -ENOSYS;
- return sb->s_qcop->quota_off(sb, type, 0);
+ return sb->s_qcop->quota_off(sb, type);
case Q_GETFMT:
return quota_getfmt(sb, type, addr);
case Q_GETINFO:
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 78f613cb9c76..4884ac5ae9be 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -43,12 +43,13 @@ const struct file_operations ramfs_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .fsync = simple_sync_file,
+ .fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.llseek = generic_file_llseek,
};
const struct inode_operations ramfs_file_inode_operations = {
+ .setattr = simple_setattr,
.getattr = simple_getattr,
};
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5ea4ad81a429..d532c20fc179 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = {
.aio_read = generic_file_aio_read,
.write = do_sync_write,
.aio_write = generic_file_aio_write,
- .fsync = simple_sync_file,
+ .fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.llseek = generic_file_llseek,
@@ -146,7 +146,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
return ret;
}
- ret = vmtruncate(inode, newsize);
+ ret = simple_setsize(inode, newsize);
return ret;
}
@@ -169,7 +169,8 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
/* pick out size-changing events */
if (ia->ia_valid & ATTR_SIZE) {
- loff_t size = i_size_read(inode);
+ loff_t size = inode->i_size;
+
if (ia->ia_size != size) {
ret = ramfs_nommu_resize(inode, ia->ia_size, size);
if (ret < 0 || ia->ia_valid == ATTR_SIZE)
@@ -182,7 +183,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
}
}
- ret = inode_setattr(inode, ia);
+ generic_setattr(inode, ia);
out:
ia->ia_valid = old_ia_valid;
return ret;
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 4455fbe269a3..198dabf1b2bb 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -14,8 +14,7 @@
extern const struct reiserfs_key MIN_KEY;
static int reiserfs_readdir(struct file *, void *, filldir_t);
-static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
- int datasync);
+static int reiserfs_dir_fsync(struct file *filp, int datasync);
const struct file_operations reiserfs_dir_operations = {
.llseek = generic_file_llseek,
@@ -28,10 +27,9 @@ const struct file_operations reiserfs_dir_operations = {
#endif
};
-static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
- int datasync)
+static int reiserfs_dir_fsync(struct file *filp, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = filp->f_mapping->host;
int err;
reiserfs_write_lock(inode->i_sb);
err = reiserfs_commit_for_inode(inode);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9977df9f3a54..b82cdd8a45dd 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -134,10 +134,9 @@ static void reiserfs_vfs_truncate_file(struct inode *inode)
* be removed...
*/
-static int reiserfs_sync_file(struct file *filp,
- struct dentry *dentry, int datasync)
+static int reiserfs_sync_file(struct file *filp, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = filp->f_mapping->host;
int err;
int barrier_done;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 59125fb36d42..9822fa15118b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -158,6 +158,7 @@ static int finish_unfinished(struct super_block *s)
#ifdef CONFIG_QUOTA
int i;
int ms_active_set;
+ int quota_enabled[MAXQUOTAS];
#endif
/* compose key to look for "save" links */
@@ -179,8 +180,15 @@ static int finish_unfinished(struct super_block *s)
}
/* Turn on quotas so that they are updated correctly */
for (i = 0; i < MAXQUOTAS; i++) {
+ quota_enabled[i] = 1;
if (REISERFS_SB(s)->s_qf_names[i]) {
- int ret = reiserfs_quota_on_mount(s, i);
+ int ret;
+
+ if (sb_has_quota_active(s, i)) {
+ quota_enabled[i] = 0;
+ continue;
+ }
+ ret = reiserfs_quota_on_mount(s, i);
if (ret < 0)
reiserfs_warning(s, "reiserfs-2500",
"cannot turn on journaled "
@@ -304,8 +312,8 @@ static int finish_unfinished(struct super_block *s)
#ifdef CONFIG_QUOTA
/* Turn quotas off */
for (i = 0; i < MAXQUOTAS; i++) {
- if (sb_dqopt(s)->files[i])
- vfs_quota_off(s, i, 0);
+ if (sb_dqopt(s)->files[i] && quota_enabled[i])
+ dquot_quota_off(s, i);
}
if (ms_active_set)
/* Restore the flag back */
@@ -466,6 +474,8 @@ static void reiserfs_put_super(struct super_block *s)
struct reiserfs_transaction_handle th;
th.t_trans_id = 0;
+ dquot_disable(s, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
reiserfs_write_lock(s);
if (s->s_dirt)
@@ -620,7 +630,7 @@ static int reiserfs_acquire_dquot(struct dquot *);
static int reiserfs_release_dquot(struct dquot *);
static int reiserfs_mark_dquot_dirty(struct dquot *);
static int reiserfs_write_info(struct super_block *, int);
-static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
+static int reiserfs_quota_on(struct super_block *, int, int, char *);
static const struct dquot_operations reiserfs_quota_operations = {
.write_dquot = reiserfs_write_dquot,
@@ -634,12 +644,12 @@ static const struct dquot_operations reiserfs_quota_operations = {
static const struct quotactl_ops reiserfs_qctl_operations = {
.quota_on = reiserfs_quota_on,
- .quota_off = vfs_quota_off,
- .quota_sync = vfs_quota_sync,
- .get_info = vfs_get_dqinfo,
- .set_info = vfs_set_dqinfo,
- .get_dqblk = vfs_get_dqblk,
- .set_dqblk = vfs_set_dqblk,
+ .quota_off = dquot_quota_off,
+ .quota_sync = dquot_quota_sync,
+ .get_info = dquot_get_dqinfo,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk,
};
#endif
@@ -1242,6 +1252,11 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
if (s->s_flags & MS_RDONLY)
/* it is read-only already */
goto out_ok;
+
+ err = dquot_suspend(s, -1);
+ if (err < 0)
+ goto out_err;
+
/* try to remount file system with read-only permissions */
if (sb_umount_state(rs) == REISERFS_VALID_FS
|| REISERFS_SB(s)->s_mount_state != REISERFS_VALID_FS) {
@@ -1295,6 +1310,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
s->s_dirt = 0;
if (!(*mount_flags & MS_RDONLY)) {
+ dquot_resume(s, -1);
finish_unfinished(s);
reiserfs_xattr_init(s, *mount_flags);
}
@@ -2022,15 +2038,15 @@ static int reiserfs_write_info(struct super_block *sb, int type)
*/
static int reiserfs_quota_on_mount(struct super_block *sb, int type)
{
- return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
- REISERFS_SB(sb)->s_jquota_fmt, type);
+ return dquot_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
+ REISERFS_SB(sb)->s_jquota_fmt, type);
}
/*
* Standard function to be called on quota_on
*/
static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
- char *name, int remount)
+ char *name)
{
int err;
struct path path;
@@ -2039,9 +2055,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
return -EINVAL;
- /* No more checks needed? Path and format_id are bogus anyway... */
- if (remount)
- return vfs_quota_on(sb, type, format_id, name, 1);
+
err = kern_path(name, LOOKUP_FOLLOW, &path);
if (err)
return err;
@@ -2085,7 +2099,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
if (err)
goto out;
}
- err = vfs_quota_on_path(sb, type, format_id, &path);
+ err = dquot_quota_on_path(sb, type, format_id, &path);
out:
path_put(&path);
return err;
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 84ecf0e43f91..8e187a0f94bb 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -28,8 +28,9 @@
#include "proto.h"
static int
-smb_fsync(struct file *file, struct dentry * dentry, int datasync)
+smb_fsync(struct file *file, int datasync)
{
+ struct dentry *dentry = file->f_path.dentry;
struct smb_sb_info *server = server_from_dentry(dentry);
int result;
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index dfa1d67f8fca..9551cb6f7fe4 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -714,7 +714,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr)
error = server->ops->truncate(inode, attr->ia_size);
if (error)
goto out;
- error = vmtruncate(inode, attr->ia_size);
+ error = simple_setsize(inode, attr->ia_size);
if (error)
goto out;
refresh = 1;
diff --git a/fs/splice.c b/fs/splice.c
index ac22b00d86c3..efdbfece9932 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -354,7 +354,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
break;
error = add_to_page_cache_lru(page, mapping, index,
- mapping_gfp_mask(mapping));
+ GFP_KERNEL);
if (unlikely(error)) {
page_cache_release(page);
if (error == -EEXIST)
@@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
{
struct file *file = sd->u.file;
- return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
+ return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
+ sd->flags);
}
/**
@@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
if (off_in)
return -ESPIPE;
if (off_out) {
- if (!out->f_op || !out->f_op->llseek ||
- out->f_op->llseek == no_llseek)
+ if (!(out->f_mode & FMODE_PWRITE))
return -EINVAL;
if (copy_from_user(&offset, off_out, sizeof(loff_t)))
return -EFAULT;
@@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
if (off_out)
return -ESPIPE;
if (off_in) {
- if (!in->f_op || !in->f_op->llseek ||
- in->f_op->llseek == no_llseek)
+ if (!(in->f_mode & FMODE_PREAD))
return -EINVAL;
if (copy_from_user(&offset, off_in, sizeof(loff_t)))
return -EFAULT;
diff --git a/fs/super.c b/fs/super.c
index 69688b15f1fa..938119ab8dcb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -24,7 +24,6 @@
#include <linux/slab.h>
#include <linux/acct.h>
#include <linux/blkdev.h>
-#include <linux/quotaops.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/writeback.h> /* for the emergency remount stuff */
@@ -94,8 +93,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
init_rwsem(&s->s_dquot.dqptr_sem);
init_waitqueue_head(&s->s_wait_unfrozen);
s->s_maxbytes = MAX_NON_LFS;
- s->dq_op = sb_dquot_ops;
- s->s_qcop = sb_quotactl_ops;
s->s_op = &default_op;
s->s_time_gran = 1000000000;
}
@@ -160,7 +157,6 @@ void deactivate_locked_super(struct super_block *s)
{
struct file_system_type *fs = s->s_type;
if (atomic_dec_and_test(&s->s_active)) {
- vfs_dq_off(s, 0);
fs->kill_sb(s);
put_filesystem(fs);
put_super(s);
@@ -378,6 +374,8 @@ void sync_supers(void)
up_read(&sb->s_umount);
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
__put_super(sb);
}
}
@@ -409,6 +407,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
up_read(&sb->s_umount);
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
__put_super(sb);
}
spin_unlock(&sb_lock);
@@ -524,7 +524,7 @@ rescan:
int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
{
int retval;
- int remount_rw, remount_ro;
+ int remount_ro;
if (sb->s_frozen != SB_UNFROZEN)
return -EBUSY;
@@ -540,7 +540,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
sync_filesystem(sb);
remount_ro = (flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY);
- remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
/* If we are remounting RDONLY and current sb is read/write,
make sure there are no rw files opened */
@@ -549,9 +548,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
mark_files_ro(sb);
else if (!fs_may_remount_ro(sb))
return -EBUSY;
- retval = vfs_dq_off(sb, 1);
- if (retval < 0 && retval != -ENOSYS)
- return -EBUSY;
}
if (sb->s_op->remount_fs) {
@@ -560,8 +556,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
return retval;
}
sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
- if (remount_rw)
- vfs_dq_quota_on_remount(sb);
+
/*
* Some filesystems modify their metadata via some other path than the
* bdev buffer cache (eg. use a private mapping, or directories in
@@ -594,6 +589,8 @@ static void do_emergency_remount(struct work_struct *work)
}
up_write(&sb->s_umount);
spin_lock(&sb_lock);
+ /* lock was dropped, must reset next */
+ list_safe_reset_next(sb, n, s_list);
__put_super(sb);
}
spin_unlock(&sb_lock);
@@ -946,8 +943,8 @@ out:
EXPORT_SYMBOL_GPL(vfs_kern_mount);
/**
- * freeze_super -- lock the filesystem and force it into a consistent state
- * @super: the super to lock
+ * freeze_super - lock the filesystem and force it into a consistent state
+ * @sb: the super to lock
*
* Syncs the super to make sure the filesystem is consistent and calls the fs's
* freeze_fs. Subsequent calls to this without first thawing the fs will return
diff --git a/fs/sync.c b/fs/sync.c
index e8cbd415e50a..15aa6f03b2da 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -42,7 +42,7 @@ static int __sync_filesystem(struct super_block *sb, int wait)
if (wait)
sync_inodes_sb(sb);
else
- writeback_inodes_sb_locked(sb);
+ writeback_inodes_sb(sb);
if (sb->s_op->sync_fs)
sb->s_op->sync_fs(sb, wait);
@@ -130,12 +130,10 @@ void emergency_sync(void)
/*
* Generic function to fsync a file.
- *
- * filp may be NULL if called via the msync of a vma.
*/
-int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+int file_fsync(struct file *filp, int datasync)
{
- struct inode * inode = dentry->d_inode;
+ struct inode *inode = filp->f_mapping->host;
struct super_block * sb;
int ret, err;
@@ -183,7 +181,7 @@ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
* livelocks in fsync_buffers_list().
*/
mutex_lock(&mapping->host->i_mutex);
- err = file->f_op->fsync(file, file->f_path.dentry, datasync);
+ err = file->f_op->fsync(file, datasync);
if (!ret)
ret = err;
mutex_unlock(&mapping->host->i_mutex);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index bbd77e95cf7f..0835a3b70e03 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -117,13 +117,13 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
if (error)
goto out;
- iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
-
- error = inode_setattr(inode, iattr);
+ error = sysfs_sd_setattr(sd, iattr);
if (error)
goto out;
- error = sysfs_sd_setattr(sd, iattr);
+ /* this ignores size changes */
+ generic_setattr(inode, iattr);
+
out:
mutex_unlock(&sysfs_mutex);
return error;
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 1dabed286b4c..79941e4964a4 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -24,7 +24,7 @@ const struct file_operations sysv_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = sysv_readdir,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
};
static inline void dir_put_page(struct page *page)
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 96340c01f4a7..750cc22349bd 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -26,7 +26,7 @@ const struct file_operations sysv_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index bbd69bdb0fa8..fcc498ec9b33 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -25,6 +25,7 @@
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/buffer_head.h>
+#include <linux/writeback.h>
#include "sysv.h"
/* We don't trust the value of
@@ -139,6 +140,9 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
struct inode *inode;
sysv_ino_t ino;
unsigned count;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_NONE
+ };
inode = new_inode(sb);
if (!inode)
@@ -168,7 +172,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
insert_inode_hash(inode);
mark_inode_dirty(inode);
- sysv_write_inode(inode, 0); /* ensure inode not allocated again */
+ sysv_write_inode(inode, &wbc); /* ensure inode not allocated again */
mark_inode_dirty(inode); /* cleared by sysv_write_inode() */
/* That's it. */
unlock_super(sb);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 4573734d723d..d4a5380b5669 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -43,6 +43,7 @@ static int sysv_sync_fs(struct super_block *sb, int wait)
* then attach current time stamp.
* But if the filesystem was marked clean, keep it clean.
*/
+ sb->s_dirt = 0;
old_time = fs32_to_cpu(sbi, *sbi->s_sb_time);
if (sbi->s_type == FSTYPE_SYSV4) {
if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time))
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 076ca50e9933..c8ff0d1ae5d3 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -62,7 +62,9 @@
*/
static void shrink_liability(struct ubifs_info *c, int nr_to_write)
{
+ down_read(&c->vfs_sb->s_umount);
writeback_inodes_sb(c->vfs_sb);
+ up_read(&c->vfs_sb->s_umount);
}
/**
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5692cf72b807..12f445cee9f7 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -967,12 +967,15 @@ static int do_writepage(struct page *page, int len)
* the page locked, and it locks @ui_mutex. However, write-back does take inode
* @i_mutex, which means other VFS operations may be run on this inode at the
* same time. And the problematic one is truncation to smaller size, from where
- * we have to call 'vmtruncate()', which first changes @inode->i_size, then
+ * we have to call 'simple_setsize()', which first changes @inode->i_size, then
* drops the truncated pages. And while dropping the pages, it takes the page
- * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with
+ * lock. This means that 'do_truncation()' cannot call 'simple_setsize()' with
* @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
* means that @inode->i_size is changed while @ui_mutex is unlocked.
*
+ * XXX: with the new truncate the above is not true anymore, the simple_setsize
+ * calls can be replaced with the individual components.
+ *
* But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
* inode size. How do we do this if @inode->i_size may became smaller while we
* are in the middle of 'ubifs_writepage()'? The UBIFS solution is the
@@ -1125,7 +1128,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
budgeted = 0;
}
- err = vmtruncate(inode, new_size);
+ err = simple_setsize(inode, new_size);
if (err)
goto out_budg;
@@ -1214,7 +1217,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
if (attr->ia_valid & ATTR_SIZE) {
dbg_gen("size %lld -> %lld", inode->i_size, new_size);
- err = vmtruncate(inode, new_size);
+ err = simple_setsize(inode, new_size);
if (err)
goto out;
}
@@ -1223,7 +1226,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
if (attr->ia_valid & ATTR_SIZE) {
/* Truncation changes inode [mc]time */
inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
- /* 'vmtruncate()' changed @i_size, update @ui_size */
+ /* 'simple_setsize()' changed @i_size, update @ui_size */
ui->ui_size = inode->i_size;
}
@@ -1304,9 +1307,9 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int ubifs_fsync(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
int err;
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index bd2542dad014..2eef553d50c8 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -379,7 +379,7 @@ struct ubifs_gced_idx_leb {
* The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
* @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
* make sure @inode->i_size is always changed under @ui_mutex, because it
- * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock
+ * cannot call 'simple_setsize()' with @ui_mutex locked, because it would deadlock
* with 'ubifs_writepage()' (see file.c). All the other inode fields are
* changed under @ui_mutex, so they do not need "shadow" fields. Note, one
* could consider to rework locking and base it on "shadow" fields.
@@ -1678,7 +1678,7 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
int ubifs_calc_dark(const struct ubifs_info *c, int spc);
/* file.c */
-int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync);
+int ubifs_fsync(struct file *file, int datasync);
int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
/* dir.c */
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 9a9378b4eb5a..b608efaa4cee 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -21,7 +21,6 @@
#include "udfdecl.h"
-#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bitops.h>
@@ -159,8 +158,6 @@ static void udf_bitmap_free_blocks(struct super_block *sb,
udf_debug("byte=%2x\n",
((char *)bh->b_data)[(bit + i) >> 3]);
} else {
- if (inode)
- dquot_free_block(inode, 1);
udf_add_free_space(sb, sbi->s_partition, 1);
}
}
@@ -210,15 +207,8 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb,
bit = block % (sb->s_blocksize << 3);
while (bit < (sb->s_blocksize << 3) && block_count > 0) {
- if (!udf_test_bit(bit, bh->b_data))
+ if (!udf_clear_bit(bit, bh->b_data))
goto out;
- else if (dquot_prealloc_block(inode, 1))
- goto out;
- else if (!udf_clear_bit(bit, bh->b_data)) {
- udf_debug("bit already cleared for block %d\n", bit);
- dquot_free_block(inode, 1);
- goto out;
- }
block_count--;
alloc_count++;
bit++;
@@ -338,20 +328,6 @@ search_back:
}
got_block:
-
- /*
- * Check quota for allocation of this block.
- */
- if (inode) {
- int ret = dquot_alloc_block(inode, 1);
-
- if (ret) {
- mutex_unlock(&sbi->s_alloc_mutex);
- *err = ret;
- return 0;
- }
- }
-
newblock = bit + (block_group << (sb->s_blocksize_bits + 3)) -
(sizeof(struct spaceBitmapDesc) << 3);
@@ -401,10 +377,6 @@ static void udf_table_free_blocks(struct super_block *sb,
}
iinfo = UDF_I(table);
- /* We do this up front - There are some error conditions that
- could occure, but.. oh well */
- if (inode)
- dquot_free_block(inode, count);
udf_add_free_space(sb, sbi->s_partition, count);
start = bloc->logicalBlockNum + offset;
@@ -649,10 +621,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
epos.offset -= adsize;
alloc_count = (elen >> sb->s_blocksize_bits);
- if (inode && dquot_prealloc_block(inode,
- alloc_count > block_count ? block_count : alloc_count))
- alloc_count = 0;
- else if (alloc_count > block_count) {
+ if (alloc_count > block_count) {
alloc_count = block_count;
eloc.logicalBlockNum += alloc_count;
elen -= (alloc_count << sb->s_blocksize_bits);
@@ -752,14 +721,6 @@ static int udf_table_new_block(struct super_block *sb,
newblock = goal_eloc.logicalBlockNum;
goal_eloc.logicalBlockNum++;
goal_elen -= sb->s_blocksize;
- if (inode) {
- *err = dquot_alloc_block(inode, 1);
- if (*err) {
- brelse(goal_epos.bh);
- mutex_unlock(&sbi->s_alloc_mutex);
- return 0;
- }
- }
if (goal_elen)
udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1);
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 1660c81ffa3d..51552bf50225 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -211,5 +211,5 @@ const struct file_operations udf_dir_operations = {
.read = generic_read_dir,
.readdir = udf_readdir,
.unlocked_ioctl = udf_ioctl,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
};
diff --git a/fs/udf/file.c b/fs/udf/file.c
index baae3a723946..94e06d6bddbd 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -34,7 +34,6 @@
#include <linux/errno.h>
#include <linux/smp_lock.h>
#include <linux/pagemap.h>
-#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/aio.h>
#include <linux/smp_lock.h>
@@ -219,39 +218,16 @@ const struct file_operations udf_file_operations = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.unlocked_ioctl = udf_ioctl,
- .open = dquot_file_open,
+ .open = generic_file_open,
.mmap = generic_file_mmap,
.write = do_sync_write,
.aio_write = udf_file_aio_write,
.release = udf_release_file,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
.llseek = generic_file_llseek,
};
-int udf_setattr(struct dentry *dentry, struct iattr *iattr)
-{
- struct inode *inode = dentry->d_inode;
- int error;
-
- error = inode_change_ok(inode, iattr);
- if (error)
- return error;
-
- if (is_quota_modification(inode, iattr))
- dquot_initialize(inode);
-
- if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
- (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
- error = dquot_transfer(inode, iattr);
- if (error)
- return error;
- }
-
- return inode_setattr(inode, iattr);
-}
-
const struct inode_operations udf_file_inode_operations = {
.truncate = udf_truncate,
- .setattr = udf_setattr,
};
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 2b5586c7f02a..18cd7111185d 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -20,7 +20,6 @@
#include "udfdecl.h"
#include <linux/fs.h>
-#include <linux/quotaops.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -32,13 +31,6 @@ void udf_free_inode(struct inode *inode)
struct super_block *sb = inode->i_sb;
struct udf_sb_info *sbi = UDF_SB(sb);
- /*
- * Note: we must free any quota before locking the superblock,
- * as writing the quota to disk may need the lock as well.
- */
- dquot_free_inode(inode);
- dquot_drop(inode);
-
clear_inode(inode);
mutex_lock(&sbi->s_alloc_mutex);
@@ -61,7 +53,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
struct super_block *sb = dir->i_sb;
struct udf_sb_info *sbi = UDF_SB(sb);
struct inode *inode;
- int block, ret;
+ int block;
uint32_t start = UDF_I(dir)->i_location.logicalBlockNum;
struct udf_inode_info *iinfo;
struct udf_inode_info *dinfo = UDF_I(dir);
@@ -146,17 +138,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err)
insert_inode_hash(inode);
mark_inode_dirty(inode);
- dquot_initialize(inode);
- ret = dquot_alloc_inode(inode);
- if (ret) {
- dquot_drop(inode);
- inode->i_flags |= S_NOQUOTA;
- inode->i_nlink = 0;
- iput(inode);
- *err = ret;
- return NULL;
- }
-
*err = 0;
return inode;
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 8a3fbd177cab..124852bcf6fe 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -36,7 +36,6 @@
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
-#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/crc-itu-t.h>
@@ -71,9 +70,6 @@ static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
void udf_delete_inode(struct inode *inode)
{
- if (!is_bad_inode(inode))
- dquot_initialize(inode);
-
truncate_inode_pages(&inode->i_data, 0);
if (is_bad_inode(inode))
@@ -113,7 +109,6 @@ void udf_clear_inode(struct inode *inode)
(unsigned long long)iinfo->i_lenExtents);
}
- dquot_drop(inode);
kfree(iinfo->i_ext.i_data);
iinfo->i_ext.i_data = NULL;
}
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 585f733615dc..bf5fc674193c 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -27,7 +27,6 @@
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/slab.h>
-#include <linux/quotaops.h>
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/sched.h>
@@ -563,8 +562,6 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
int err;
struct udf_inode_info *iinfo;
- dquot_initialize(dir);
-
lock_kernel();
inode = udf_new_inode(dir, mode, &err);
if (!inode) {
@@ -617,8 +614,6 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode,
if (!old_valid_dev(rdev))
return -EINVAL;
- dquot_initialize(dir);
-
lock_kernel();
err = -EIO;
inode = udf_new_inode(dir, mode, &err);
@@ -664,8 +659,6 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
struct udf_inode_info *dinfo = UDF_I(dir);
struct udf_inode_info *iinfo;
- dquot_initialize(dir);
-
lock_kernel();
err = -EMLINK;
if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1)
@@ -800,8 +793,6 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
struct fileIdentDesc *fi, cfi;
struct kernel_lb_addr tloc;
- dquot_initialize(dir);
-
retval = -ENOENT;
lock_kernel();
fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
@@ -848,8 +839,6 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
struct fileIdentDesc cfi;
struct kernel_lb_addr tloc;
- dquot_initialize(dir);
-
retval = -ENOENT;
lock_kernel();
fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
@@ -904,8 +893,6 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
struct buffer_head *bh;
struct udf_inode_info *iinfo;
- dquot_initialize(dir);
-
lock_kernel();
inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err);
if (!inode)
@@ -1075,8 +1062,6 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
int err;
struct buffer_head *bh;
- dquot_initialize(dir);
-
lock_kernel();
if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) {
unlock_kernel();
@@ -1139,9 +1124,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
struct kernel_lb_addr tloc;
struct udf_inode_info *old_iinfo = UDF_I(old_inode);
- dquot_initialize(old_dir);
- dquot_initialize(new_dir);
-
lock_kernel();
ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
if (ofi) {
@@ -1387,7 +1369,6 @@ const struct export_operations udf_export_ops = {
const struct inode_operations udf_dir_inode_operations = {
.lookup = udf_lookup,
.create = udf_create,
- .setattr = udf_setattr,
.link = udf_link,
.unlink = udf_unlink,
.symlink = udf_symlink,
@@ -1400,5 +1381,4 @@ const struct inode_operations udf_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
- .setattr = udf_setattr,
};
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 1e4543cbcd27..612d1e2e285a 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -557,6 +557,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
{
struct udf_options uopt;
struct udf_sb_info *sbi = UDF_SB(sb);
+ int error = 0;
uopt.flags = sbi->s_flags;
uopt.uid = sbi->s_uid;
@@ -582,17 +583,17 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
*flags |= MS_RDONLY;
}
- if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
- unlock_kernel();
- return 0;
- }
+ if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+ goto out_unlock;
+
if (*flags & MS_RDONLY)
udf_close_lvid(sb);
else
udf_open_lvid(sb);
+out_unlock:
unlock_kernel();
- return 0;
+ return error;
}
/* Check Volume Structure Descriptors (ECMA 167 2/9.1) */
@@ -1939,7 +1940,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
/* Fill in the rest of the superblock */
sb->s_op = &udf_sb_ops;
sb->s_export_op = &udf_export_ops;
- sb->dq_op = NULL;
+
sb->s_dirt = 0;
sb->s_magic = UDF_SUPER_MAGIC;
sb->s_time_gran = 1000;
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 9079ff7d6255..2bac0354891f 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -131,7 +131,6 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
/* file.c */
extern long udf_ioctl(struct file *, unsigned int, unsigned long);
-extern int udf_setattr(struct dentry *dentry, struct iattr *iattr);
/* inode.c */
extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
extern int udf_sync_inode(struct inode *);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 5cfa4d85ccf2..048484fb10d2 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -12,7 +12,6 @@
#include <linux/stat.h>
#include <linux/time.h>
#include <linux/string.h>
-#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/capability.h>
#include <linux/bitops.h>
@@ -85,9 +84,6 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
"bit already cleared for fragment %u", i);
}
- dquot_free_block(inode, count);
-
-
fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
uspi->cs_total.cs_nffree += count;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
@@ -195,7 +191,6 @@ do_more:
ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
ufs_clusteracct (sb, ucpi, blkno, 1);
- dquot_free_block(inode, uspi->s_fpb);
fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
uspi->cs_total.cs_nbfree++;
@@ -511,7 +506,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
unsigned cgno, fragno, fragoff, count, fragsize, i;
- int ret;
UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n",
(unsigned long long)fragment, oldcount, newcount);
@@ -557,11 +551,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
fs32_add(sb, &ucg->cg_frsum[fragsize - count], 1);
for (i = oldcount; i < newcount; i++)
ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i);
- ret = dquot_alloc_block(inode, count);
- if (ret) {
- *err = ret;
- return 0;
- }
fs32_sub(sb, &ucg->cg_cs.cs_nffree, count);
fs32_sub(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
@@ -598,7 +587,6 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
struct ufs_cylinder_group * ucg;
unsigned oldcg, i, j, k, allocsize;
u64 result;
- int ret;
UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n",
inode->i_ino, cgno, (unsigned long long)goal, count);
@@ -667,7 +655,6 @@ cg_found:
for (i = count; i < uspi->s_fpb; i++)
ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
i = uspi->s_fpb - count;
- dquot_free_block(inode, i);
fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
uspi->cs_total.cs_nffree += i;
@@ -679,11 +666,6 @@ cg_found:
result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
if (result == INVBLOCK)
return 0;
- ret = dquot_alloc_block(inode, count);
- if (ret) {
- *err = ret;
- return 0;
- }
for (i = 0; i < count; i++)
ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i);
@@ -718,7 +700,6 @@ static u64 ufs_alloccg_block(struct inode *inode,
struct ufs_super_block_first * usb1;
struct ufs_cylinder_group * ucg;
u64 result, blkno;
- int ret;
UFSD("ENTER, goal %llu\n", (unsigned long long)goal);
@@ -752,11 +733,6 @@ gotit:
ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
ufs_clusteracct (sb, ucpi, blkno, -1);
- ret = dquot_alloc_block(inode, uspi->s_fpb);
- if (ret) {
- *err = ret;
- return INVBLOCK;
- }
fs32_sub(sb, &ucg->cg_cs.cs_nbfree, 1);
uspi->cs_total.cs_nbfree--;
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 317a0d444f6b..ec784756dc65 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -666,6 +666,6 @@ not_empty:
const struct file_operations ufs_dir_operations = {
.read = generic_read_dir,
.readdir = ufs_readdir,
- .fsync = simple_fsync,
+ .fsync = generic_file_fsync,
.llseek = generic_file_llseek,
};
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index a8962cecde5b..33afa20d4509 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -24,7 +24,6 @@
*/
#include <linux/fs.h>
-#include <linux/quotaops.h>
#include "ufs_fs.h"
#include "ufs.h"
@@ -41,7 +40,7 @@ const struct file_operations ufs_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .open = dquot_file_open,
- .fsync = simple_fsync,
+ .open = generic_file_open,
+ .fsync = generic_file_fsync,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 3a959d55084d..594480e537d2 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -27,7 +27,6 @@
#include <linux/time.h>
#include <linux/stat.h>
#include <linux/string.h>
-#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/sched.h>
#include <linux/bitops.h>
@@ -95,9 +94,6 @@ void ufs_free_inode (struct inode * inode)
is_directory = S_ISDIR(inode->i_mode);
- dquot_free_inode(inode);
- dquot_drop(inode);
-
clear_inode (inode);
if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit))
@@ -347,21 +343,12 @@ cg_found:
unlock_super (sb);
- dquot_initialize(inode);
- err = dquot_alloc_inode(inode);
- if (err) {
- dquot_drop(inode);
- goto fail_without_unlock;
- }
-
UFSD("allocating inode %lu\n", inode->i_ino);
UFSD("EXIT\n");
return inode;
fail_remove_inode:
unlock_super(sb);
-fail_without_unlock:
- inode->i_flags |= S_NOQUOTA;
inode->i_nlink = 0;
iput(inode);
UFSD("EXIT (FAILED): err %d\n", err);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index cffa756f1047..73fe773aa034 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -37,7 +37,6 @@
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
-#include <linux/quotaops.h>
#include "ufs_fs.h"
#include "ufs.h"
@@ -910,9 +909,6 @@ void ufs_delete_inode (struct inode * inode)
{
loff_t old_i_size;
- if (!is_bad_inode(inode))
- dquot_initialize(inode);
-
truncate_inode_pages(&inode->i_data, 0);
if (is_bad_inode(inode))
goto no_delete;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index eabc02eb1294..b056f02b1fb3 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -30,7 +30,6 @@
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/smp_lock.h>
-#include <linux/quotaops.h>
#include "ufs_fs.h"
#include "ufs.h"
@@ -86,8 +85,6 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
UFSD("BEGIN\n");
- dquot_initialize(dir);
-
inode = ufs_new_inode(dir, mode);
err = PTR_ERR(inode);
@@ -112,8 +109,6 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t
if (!old_valid_dev(rdev))
return -EINVAL;
- dquot_initialize(dir);
-
inode = ufs_new_inode(dir, mode);
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
@@ -138,8 +133,6 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
if (l > sb->s_blocksize)
goto out_notlocked;
- dquot_initialize(dir);
-
lock_kernel();
inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
err = PTR_ERR(inode);
@@ -185,8 +178,6 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
return -EMLINK;
}
- dquot_initialize(dir);
-
inode->i_ctime = CURRENT_TIME_SEC;
inode_inc_link_count(inode);
atomic_inc(&inode->i_count);
@@ -204,8 +195,6 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
if (dir->i_nlink >= UFS_LINK_MAX)
goto out;
- dquot_initialize(dir);
-
lock_kernel();
inode_inc_link_count(dir);
@@ -250,8 +239,6 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
struct page *page;
int err = -ENOENT;
- dquot_initialize(dir);
-
de = ufs_find_entry(dir, &dentry->d_name, &page);
if (!de)
goto out;
@@ -296,9 +283,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct ufs_dir_entry *old_de;
int err = -ENOENT;
- dquot_initialize(old_dir);
- dquot_initialize(new_dir);
-
old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_de)
goto out;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index ad9bc1ebd3a6..3ec5a9eb6efb 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -77,7 +77,6 @@
#include <linux/errno.h>
#include <linux/fs.h>
-#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/stat.h>
@@ -1047,7 +1046,7 @@ magic_found:
*/
sb->s_op = &ufs_super_ops;
sb->s_export_op = &ufs_export_ops;
- sb->dq_op = NULL; /***/
+
sb->s_magic = fs32_to_cpu(sb, usb3->fs_magic);
uspi->s_sblkno = fs32_to_cpu(sb, usb1->fs_sblkno);
@@ -1437,126 +1436,19 @@ static void destroy_inodecache(void)
kmem_cache_destroy(ufs_inode_cachep);
}
-static void ufs_clear_inode(struct inode *inode)
-{
- dquot_drop(inode);
-}
-
-#ifdef CONFIG_QUOTA
-static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t);
-static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t);
-#endif
-
static const struct super_operations ufs_super_ops = {
.alloc_inode = ufs_alloc_inode,
.destroy_inode = ufs_destroy_inode,
.write_inode = ufs_write_inode,
.delete_inode = ufs_delete_inode,
- .clear_inode = ufs_clear_inode,
.put_super = ufs_put_super,
.write_super = ufs_write_super,
.sync_fs = ufs_sync_fs,
.statfs = ufs_statfs,
.remount_fs = ufs_remount,
.show_options = ufs_show_options,
-#ifdef CONFIG_QUOTA
- .quota_read = ufs_quota_read,
- .quota_write = ufs_quota_write,
-#endif
};
-#ifdef CONFIG_QUOTA
-
-/* Read data from quotafile - avoid pagecache and such because we cannot afford
- * acquiring the locks... As quota files are never truncated and quota code
- * itself serializes the operations (and noone else should touch the files)
- * we don't have to be afraid of races */
-static ssize_t ufs_quota_read(struct super_block *sb, int type, char *data,
- size_t len, loff_t off)
-{
- struct inode *inode = sb_dqopt(sb)->files[type];
- sector_t blk = off >> sb->s_blocksize_bits;
- int err = 0;
- int offset = off & (sb->s_blocksize - 1);
- int tocopy;
- size_t toread;
- struct buffer_head *bh;
- loff_t i_size = i_size_read(inode);
-
- if (off > i_size)
- return 0;
- if (off+len > i_size)
- len = i_size-off;
- toread = len;
- while (toread > 0) {
- tocopy = sb->s_blocksize - offset < toread ?
- sb->s_blocksize - offset : toread;
-
- bh = ufs_bread(inode, blk, 0, &err);
- if (err)
- return err;
- if (!bh) /* A hole? */
- memset(data, 0, tocopy);
- else {
- memcpy(data, bh->b_data+offset, tocopy);
- brelse(bh);
- }
- offset = 0;
- toread -= tocopy;
- data += tocopy;
- blk++;
- }
- return len;
-}
-
-/* Write to quotafile */
-static ssize_t ufs_quota_write(struct super_block *sb, int type,
- const char *data, size_t len, loff_t off)
-{
- struct inode *inode = sb_dqopt(sb)->files[type];
- sector_t blk = off >> sb->s_blocksize_bits;
- int err = 0;
- int offset = off & (sb->s_blocksize - 1);
- int tocopy;
- size_t towrite = len;
- struct buffer_head *bh;
-
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
- while (towrite > 0) {
- tocopy = sb->s_blocksize - offset < towrite ?
- sb->s_blocksize - offset : towrite;
-
- bh = ufs_bread(inode, blk, 1, &err);
- if (!bh)
- goto out;
- lock_buffer(bh);
- memcpy(bh->b_data+offset, data, tocopy);
- flush_dcache_page(bh->b_page);
- set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
- unlock_buffer(bh);
- brelse(bh);
- offset = 0;
- towrite -= tocopy;
- data += tocopy;
- blk++;
- }
-out:
- if (len == towrite) {
- mutex_unlock(&inode->i_mutex);
- return err;
- }
- if (inode->i_size < off+len-towrite)
- i_size_write(inode, off+len-towrite);
- inode->i_version++;
- inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(inode);
- mutex_unlock(&inode->i_mutex);
- return len - towrite;
-}
-
-#endif
-
static int ufs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index f294c44577dc..589e01a465ba 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -44,7 +44,6 @@
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/sched.h>
-#include <linux/quotaops.h>
#include "ufs_fs.h"
#include "ufs.h"
@@ -501,12 +500,10 @@ out:
return err;
}
-
/*
- * We don't define our `inode->i_op->truncate', and call it here,
- * because of:
- * - there is no way to know old size
- * - there is no way inform user about error, if it happens in `truncate'
+ * TODO:
+ * - truncate case should use proper ordering instead of using
+ * simple_setsize
*/
int ufs_setattr(struct dentry *dentry, struct iattr *attr)
{
@@ -518,19 +515,10 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
if (error)
return error;
- if (is_quota_modification(inode, attr))
- dquot_initialize(inode);
-
- if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
- (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
- error = dquot_transfer(inode, attr);
- if (error)
- return error;
- }
if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
loff_t old_i_size = inode->i_size;
- error = vmtruncate(inode, attr->ia_size);
+ error = simple_setsize(inode, attr->ia_size);
if (error)
return error;
error = ufs_truncate(inode, old_i_size);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 089eaca860b4..34640d6dbdcb 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1333,6 +1333,21 @@ xfs_vm_writepage(
trace_xfs_writepage(inode, page, 0);
/*
+ * Refuse to write the page out if we are called from reclaim context.
+ *
+ * This is primarily to avoid stack overflows when called from deep
+ * used stacks in random callers for direct reclaim, but disabling
+ * reclaim for kswap is a nice side-effect as kswapd causes rather
+ * suboptimal I/O patters, too.
+ *
+ * This should really be done by the core VM, but until that happens
+ * filesystems like XFS, btrfs and ext4 have to take care of this
+ * by themselves.
+ */
+ if (current->flags & PF_MEMALLOC)
+ goto out_fail;
+
+ /*
* We need a transaction if:
* 1. There are delalloc buffers on the page
* 2. The page is uptodate and we have unmapped buffers
@@ -1366,14 +1381,6 @@ xfs_vm_writepage(
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-
- /*
- * VM calculation for nr_to_write seems off. Bump it way
- * up, this gets simple streaming writes zippy again.
- * To be reviewed again after Jens' writeback changes.
- */
- wbc->nr_to_write *= 4;
-
/*
* Convert delayed allocate, unwritten or unmapped space
* to real space and flush out to disk.
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 846b75aeb2ab..e7839ee49e43 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -128,13 +128,12 @@ xfs_nfs_get_inode(
return ERR_PTR(-ESTALE);
/*
- * The XFS_IGET_BULKSTAT means that an invalid inode number is just
- * fine and not an indication of a corrupted filesystem. Because
- * clients can send any kind of invalid file handle, e.g. after
- * a restore on the server we have to deal with this case gracefully.
+ * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
+ * fine and not an indication of a corrupted filesystem as clients can
+ * send invalid file handles and we have to handle it gracefully..
*/
- error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT,
- XFS_ILOCK_SHARED, &ip, 0);
+ error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED,
+ XFS_ILOCK_SHARED, &ip);
if (error) {
/*
* EINVAL means the inode cluster doesn't exist anymore.
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index d8fb1b5d6cb5..257a56b127cf 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -100,10 +100,10 @@ xfs_iozero(
STATIC int
xfs_file_fsync(
struct file *file,
- struct dentry *dentry,
int datasync)
{
- struct xfs_inode *ip = XFS_I(dentry->d_inode);
+ struct inode *inode = file->f_mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
struct xfs_trans *tp;
int error = 0;
int log_flushed = 0;
@@ -140,8 +140,8 @@ xfs_file_fsync(
* might gets cleared when the inode gets written out via the AIL
* or xfs_iflush_cluster.
*/
- if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) ||
- ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
+ if (((inode->i_state & I_DIRTY_DATASYNC) ||
+ ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
ip->i_update_core) {
/*
* Kick off a transaction to log the inode core to get the
@@ -868,7 +868,7 @@ write_retry:
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, iolock);
- error2 = -xfs_file_fsync(file, file->f_path.dentry,
+ error2 = -xfs_file_fsync(file,
(file->f_flags & __O_SYNC) ? 0 : 1);
if (!error)
error = error2;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 699b60cbab9c..e59a81062830 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -679,10 +679,9 @@ xfs_ioc_bulkstat(
error = xfs_bulkstat_single(mp, &inlast,
bulkreq.ubuffer, &done);
else /* XFS_IOC_FSBULKSTAT */
- error = xfs_bulkstat(mp, &inlast, &count,
- (bulkstat_one_pf)xfs_bulkstat_one, NULL,
- sizeof(xfs_bstat_t), bulkreq.ubuffer,
- BULKSTAT_FG_QUICK, &done);
+ error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
+ sizeof(xfs_bstat_t), bulkreq.ubuffer,
+ &done);
if (error)
return -error;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 9287135e9bfc..52ed49e6465c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -237,15 +237,12 @@ xfs_bulkstat_one_compat(
xfs_ino_t ino, /* inode number to get data for */
void __user *buffer, /* buffer to place output in */
int ubsize, /* size of buffer */
- void *private_data, /* my private data */
- xfs_daddr_t bno, /* starting bno of inode cluster */
int *ubused, /* bytes used by me */
- void *dibuff, /* on-disk inode buffer */
int *stat) /* BULKSTAT_RV_... */
{
return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
- xfs_bulkstat_one_fmt_compat, bno,
- ubused, dibuff, stat);
+ xfs_bulkstat_one_fmt_compat,
+ ubused, stat);
}
/* copied from xfs_ioctl.c */
@@ -298,13 +295,11 @@ xfs_compat_ioc_bulkstat(
int res;
error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
- sizeof(compat_xfs_bstat_t),
- NULL, 0, NULL, NULL, &res);
+ sizeof(compat_xfs_bstat_t), 0, &res);
} else if (cmd == XFS_IOC_FSBULKSTAT_32) {
error = xfs_bulkstat(mp, &inlast, &count,
- xfs_bulkstat_one_compat, NULL,
- sizeof(compat_xfs_bstat_t), bulkreq.ubuffer,
- BULKSTAT_FG_QUICK, &done);
+ xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
+ bulkreq.ubuffer, &done);
} else
error = XFS_ERROR(EINVAL);
if (error)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 9c8019c78c92..44f0b2de153e 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -585,11 +585,20 @@ xfs_vn_fallocate(
bf.l_len = len;
xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+ /* check the new inode size is valid before allocating */
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + len > i_size_read(inode)) {
+ new_size = offset + len;
+ error = inode_newsize_ok(inode, new_size);
+ if (error)
+ goto out_unlock;
+ }
+
error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
0, XFS_ATTR_NOLOCK);
- if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode))
- new_size = offset + len;
+ if (error)
+ goto out_unlock;
/* Change file size if needed */
if (new_size) {
@@ -600,6 +609,7 @@ xfs_vn_fallocate(
error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
}
+out_unlock:
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
out_error:
return error;
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 9ac8aea91529..067cafbfc635 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -23,7 +23,6 @@
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_quota.h"
-#include "xfs_log.h"
#include "xfs_trans.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 3884e20bc14e..ef7f0218bccb 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -164,10 +164,6 @@ xfs_inode_ag_iterator(
struct xfs_perag *pag;
pag = xfs_perag_get(mp, ag);
- if (!pag->pag_ici_init) {
- xfs_perag_put(pag);
- continue;
- }
error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
exclusive, &nr);
xfs_perag_put(pag);
@@ -867,12 +863,7 @@ xfs_reclaim_inode_shrink(
down_read(&xfs_mount_list_lock);
list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
-
pag = xfs_perag_get(mp, ag);
- if (!pag->pag_ici_init) {
- xfs_perag_put(pag);
- continue;
- }
reclaimable += pag->pag_ici_reclaimable;
xfs_perag_put(pag);
}
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 207fa77f63ae..d12be8470cba 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -50,7 +50,6 @@
#include "quota/xfs_dquot_item.h"
#include "quota/xfs_dquot.h"
#include "xfs_log_recover.h"
-#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
/*
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index ff6bc797baf2..73d5aa117384 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -82,33 +82,6 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class,
)
)
-#define DEFINE_PERAG_REF_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
- unsigned long caller_ip), \
- TP_ARGS(mp, agno, refcount, caller_ip), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_agnumber_t, agno) \
- __field(int, refcount) \
- __field(unsigned long, caller_ip) \
- ), \
- TP_fast_assign( \
- __entry->dev = mp->m_super->s_dev; \
- __entry->agno = agno; \
- __entry->refcount = refcount; \
- __entry->caller_ip = caller_ip; \
- ), \
- TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->agno, \
- __entry->refcount, \
- (char *)__entry->caller_ip) \
-);
-
-DEFINE_PERAG_REF_EVENT(xfs_perag_get)
-DEFINE_PERAG_REF_EVENT(xfs_perag_put)
-
#define DEFINE_ATTR_LIST_EVENT(name) \
DEFINE_EVENT(xfs_attr_list_class, name, \
TP_PROTO(struct xfs_attr_list_context *ctx), \
@@ -122,6 +95,37 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
+DECLARE_EVENT_CLASS(xfs_perag_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
+ unsigned long caller_ip),
+ TP_ARGS(mp, agno, refcount, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(int, refcount)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->refcount = refcount;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d agno %u refcount %d caller %pf",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->refcount,
+ (char *)__entry->caller_ip)
+);
+
+#define DEFINE_PERAG_REF_EVENT(name) \
+DEFINE_EVENT(xfs_perag_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
+ unsigned long caller_ip), \
+ TP_ARGS(mp, agno, refcount, caller_ip))
+DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+
TRACE_EVENT(xfs_attr_list_node_descend,
TP_PROTO(struct xfs_attr_list_context *ctx,
struct xfs_da_node_entry *btree),
@@ -775,165 +779,181 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
-#define DEFINE_RW_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
- TP_ARGS(ip, count, offset, flags), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(xfs_fsize_t, size) \
- __field(xfs_fsize_t, new_size) \
- __field(loff_t, offset) \
- __field(size_t, count) \
- __field(int, flags) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->size = ip->i_d.di_size; \
- __entry->new_size = ip->i_new_size; \
- __entry->offset = offset; \
- __entry->count = count; \
- __entry->flags = flags; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
- "offset 0x%llx count 0x%zx ioflags %s", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->size, \
- __entry->new_size, \
- __entry->offset, \
- __entry->count, \
- __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \
+DECLARE_EVENT_CLASS(xfs_file_class,
+ TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
+ TP_ARGS(ip, count, offset, flags),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_fsize_t, size)
+ __field(xfs_fsize_t, new_size)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ __field(int, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->size = ip->i_d.di_size;
+ __entry->new_size = ip->i_new_size;
+ __entry->offset = offset;
+ __entry->count = count;
+ __entry->flags = flags;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+ "offset 0x%llx count 0x%zx ioflags %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->size,
+ __entry->new_size,
+ __entry->offset,
+ __entry->count,
+ __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
)
+
+#define DEFINE_RW_EVENT(name) \
+DEFINE_EVENT(xfs_file_class, name, \
+ TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
+ TP_ARGS(ip, count, offset, flags))
DEFINE_RW_EVENT(xfs_file_read);
DEFINE_RW_EVENT(xfs_file_buffered_write);
DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_splice_read);
DEFINE_RW_EVENT(xfs_file_splice_write);
-
-#define DEFINE_PAGE_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
- TP_ARGS(inode, page, off), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(pgoff_t, pgoff) \
- __field(loff_t, size) \
- __field(unsigned long, offset) \
- __field(int, delalloc) \
- __field(int, unmapped) \
- __field(int, unwritten) \
- ), \
- TP_fast_assign( \
- int delalloc = -1, unmapped = -1, unwritten = -1; \
- \
- if (page_has_buffers(page)) \
- xfs_count_page_state(page, &delalloc, \
- &unmapped, &unwritten); \
- __entry->dev = inode->i_sb->s_dev; \
- __entry->ino = XFS_I(inode)->i_ino; \
- __entry->pgoff = page_offset(page); \
- __entry->size = i_size_read(inode); \
- __entry->offset = off; \
- __entry->delalloc = delalloc; \
- __entry->unmapped = unmapped; \
- __entry->unwritten = unwritten; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \
- "delalloc %d unmapped %d unwritten %d", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->pgoff, \
- __entry->size, \
- __entry->offset, \
- __entry->delalloc, \
- __entry->unmapped, \
- __entry->unwritten) \
+DECLARE_EVENT_CLASS(xfs_page_class,
+ TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
+ TP_ARGS(inode, page, off),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(pgoff_t, pgoff)
+ __field(loff_t, size)
+ __field(unsigned long, offset)
+ __field(int, delalloc)
+ __field(int, unmapped)
+ __field(int, unwritten)
+ ),
+ TP_fast_assign(
+ int delalloc = -1, unmapped = -1, unwritten = -1;
+
+ if (page_has_buffers(page))
+ xfs_count_page_state(page, &delalloc,
+ &unmapped, &unwritten);
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->ino = XFS_I(inode)->i_ino;
+ __entry->pgoff = page_offset(page);
+ __entry->size = i_size_read(inode);
+ __entry->offset = off;
+ __entry->delalloc = delalloc;
+ __entry->unmapped = unmapped;
+ __entry->unwritten = unwritten;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
+ "delalloc %d unmapped %d unwritten %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->pgoff,
+ __entry->size,
+ __entry->offset,
+ __entry->delalloc,
+ __entry->unmapped,
+ __entry->unwritten)
)
+
+#define DEFINE_PAGE_EVENT(name) \
+DEFINE_EVENT(xfs_page_class, name, \
+ TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
+ TP_ARGS(inode, page, off))
DEFINE_PAGE_EVENT(xfs_writepage);
DEFINE_PAGE_EVENT(xfs_releasepage);
DEFINE_PAGE_EVENT(xfs_invalidatepage);
-#define DEFINE_IOMAP_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
- int flags, struct xfs_bmbt_irec *irec), \
- TP_ARGS(ip, offset, count, flags, irec), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(loff_t, size) \
- __field(loff_t, new_size) \
- __field(loff_t, offset) \
- __field(size_t, count) \
- __field(int, flags) \
- __field(xfs_fileoff_t, startoff) \
- __field(xfs_fsblock_t, startblock) \
- __field(xfs_filblks_t, blockcount) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->size = ip->i_d.di_size; \
- __entry->new_size = ip->i_new_size; \
- __entry->offset = offset; \
- __entry->count = count; \
- __entry->flags = flags; \
- __entry->startoff = irec ? irec->br_startoff : 0; \
- __entry->startblock = irec ? irec->br_startblock : 0; \
- __entry->blockcount = irec ? irec->br_blockcount : 0; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
- "offset 0x%llx count %zd flags %s " \
- "startoff 0x%llx startblock %lld blockcount 0x%llx", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->size, \
- __entry->new_size, \
- __entry->offset, \
- __entry->count, \
- __print_flags(__entry->flags, "|", BMAPI_FLAGS), \
- __entry->startoff, \
- (__int64_t)__entry->startblock, \
- __entry->blockcount) \
+DECLARE_EVENT_CLASS(xfs_iomap_class,
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
+ int flags, struct xfs_bmbt_irec *irec),
+ TP_ARGS(ip, offset, count, flags, irec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(loff_t, size)
+ __field(loff_t, new_size)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ __field(int, flags)
+ __field(xfs_fileoff_t, startoff)
+ __field(xfs_fsblock_t, startblock)
+ __field(xfs_filblks_t, blockcount)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->size = ip->i_d.di_size;
+ __entry->new_size = ip->i_new_size;
+ __entry->offset = offset;
+ __entry->count = count;
+ __entry->flags = flags;
+ __entry->startoff = irec ? irec->br_startoff : 0;
+ __entry->startblock = irec ? irec->br_startblock : 0;
+ __entry->blockcount = irec ? irec->br_blockcount : 0;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+ "offset 0x%llx count %zd flags %s "
+ "startoff 0x%llx startblock %lld blockcount 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->size,
+ __entry->new_size,
+ __entry->offset,
+ __entry->count,
+ __print_flags(__entry->flags, "|", BMAPI_FLAGS),
+ __entry->startoff,
+ (__int64_t)__entry->startblock,
+ __entry->blockcount)
)
+
+#define DEFINE_IOMAP_EVENT(name) \
+DEFINE_EVENT(xfs_iomap_class, name, \
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
+ int flags, struct xfs_bmbt_irec *irec), \
+ TP_ARGS(ip, offset, count, flags, irec))
DEFINE_IOMAP_EVENT(xfs_iomap_enter);
DEFINE_IOMAP_EVENT(xfs_iomap_found);
DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
-#define DEFINE_SIMPLE_IO_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
- TP_ARGS(ip, offset, count), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(loff_t, size) \
- __field(loff_t, new_size) \
- __field(loff_t, offset) \
- __field(size_t, count) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->size = ip->i_d.di_size; \
- __entry->new_size = ip->i_new_size; \
- __entry->offset = offset; \
- __entry->count = count; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
- "offset 0x%llx count %zd", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->size, \
- __entry->new_size, \
- __entry->offset, \
- __entry->count) \
+DECLARE_EVENT_CLASS(xfs_simple_io_class,
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
+ TP_ARGS(ip, offset, count),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(loff_t, size)
+ __field(loff_t, new_size)
+ __field(loff_t, offset)
+ __field(size_t, count)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->size = ip->i_d.di_size;
+ __entry->new_size = ip->i_new_size;
+ __entry->offset = offset;
+ __entry->count = count;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+ "offset 0x%llx count %zd",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->size,
+ __entry->new_size,
+ __entry->offset,
+ __entry->count)
);
+
+#define DEFINE_SIMPLE_IO_EVENT(name) \
+DEFINE_EVENT(xfs_simple_io_class, name, \
+ TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
+ TP_ARGS(ip, offset, count))
DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 38e764146644..8c117ff2e3ab 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -249,8 +249,10 @@ xfs_qm_hold_quotafs_ref(
if (!xfs_Gqm) {
xfs_Gqm = xfs_Gqm_init();
- if (!xfs_Gqm)
+ if (!xfs_Gqm) {
+ mutex_unlock(&xfs_Gqm_lock);
return ENOMEM;
+ }
}
/*
@@ -1630,10 +1632,7 @@ xfs_qm_dqusage_adjust(
xfs_ino_t ino, /* inode number to get data for */
void __user *buffer, /* not used */
int ubsize, /* not used */
- void *private_data, /* not used */
- xfs_daddr_t bno, /* starting block of inode cluster */
int *ubused, /* not used */
- void *dip, /* on-disk inode pointer (not used) */
int *res) /* result code value */
{
xfs_inode_t *ip;
@@ -1658,7 +1657,7 @@ xfs_qm_dqusage_adjust(
* the case in all other instances. It's OK that we do this because
* quotacheck is done only at mount time.
*/
- if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
+ if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) {
*res = BULKSTAT_RV_NOTHING;
return error;
}
@@ -1794,12 +1793,13 @@ xfs_qm_quotacheck(
* Iterate thru all the inodes in the file system,
* adjusting the corresponding dquot counters in core.
*/
- if ((error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_dqusage_adjust, NULL,
- structsz, NULL, BULKSTAT_FG_IGET, &done)))
+ error = xfs_bulkstat(mp, &lastino, &count,
+ xfs_qm_dqusage_adjust,
+ structsz, NULL, &done);
+ if (error)
break;
- } while (! done);
+ } while (!done);
/*
* We've made all the changes that we need to make incore.
@@ -1887,14 +1887,14 @@ xfs_qm_init_quotainos(
mp->m_sb.sb_uquotino != NULLFSINO) {
ASSERT(mp->m_sb.sb_uquotino > 0);
if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
- 0, 0, &uip, 0)))
+ 0, 0, &uip)))
return XFS_ERROR(error);
}
if (XFS_IS_OQUOTA_ON(mp) &&
mp->m_sb.sb_gquotino != NULLFSINO) {
ASSERT(mp->m_sb.sb_gquotino > 0);
if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
- 0, 0, &gip, 0))) {
+ 0, 0, &gip))) {
if (uip)
IRELE(uip);
return XFS_ERROR(error);
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 92b002f1805f..b4487764e923 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -262,7 +262,7 @@ xfs_qm_scall_trunc_qfiles(
}
if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
- error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0);
+ error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip);
if (!error) {
error = xfs_truncate_file(mp, qip);
IRELE(qip);
@@ -271,7 +271,7 @@ xfs_qm_scall_trunc_qfiles(
if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
mp->m_sb.sb_gquotino != NULLFSINO) {
- error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0);
+ error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip);
if (!error2) {
error2 = xfs_truncate_file(mp, qip);
IRELE(qip);
@@ -417,12 +417,12 @@ xfs_qm_scall_getqstat(
}
if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
- 0, 0, &uip, 0) == 0)
+ 0, 0, &uip) == 0)
tempuqip = B_TRUE;
}
if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
- 0, 0, &gip, 0) == 0)
+ 0, 0, &gip) == 0)
tempgqip = B_TRUE;
}
if (uip) {
@@ -1109,10 +1109,7 @@ xfs_qm_internalqcheck_adjust(
xfs_ino_t ino, /* inode number to get data for */
void __user *buffer, /* not used */
int ubsize, /* not used */
- void *private_data, /* not used */
- xfs_daddr_t bno, /* starting block of inode cluster */
int *ubused, /* not used */
- void *dip, /* not used */
int *res) /* bulkstat result code */
{
xfs_inode_t *ip;
@@ -1134,7 +1131,7 @@ xfs_qm_internalqcheck_adjust(
ipreleased = B_FALSE;
again:
lock_flags = XFS_ILOCK_SHARED;
- if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) {
+ if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
*res = BULKSTAT_RV_NOTHING;
return (error);
}
@@ -1205,15 +1202,15 @@ xfs_qm_internalqcheck(
* Iterate thru all the inodes in the file system,
* adjusting the corresponding dquot counters
*/
- if ((error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_internalqcheck_adjust, NULL,
- 0, NULL, BULKSTAT_FG_IGET, &done))) {
+ error = xfs_bulkstat(mp, &lastino, &count,
+ xfs_qm_internalqcheck_adjust,
+ 0, NULL, &done);
+ if (error) {
+ cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
break;
}
- } while (! done);
- if (error) {
- cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
- }
+ } while (!done);
+
cmn_err(CE_DEBUG, "Checking results against system dquots");
for (i = 0; i < qmtest_hashmask; i++) {
xfs_dqtest_t *d, *n;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 401f364ad36c..4917d4eed4ed 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -227,7 +227,6 @@ typedef struct xfs_perag {
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
- int pag_ici_init; /* incore inode cache initialised */
rwlock_t pag_ici_lock; /* incore inode lock */
struct radix_tree_root pag_ici_root; /* incore inode cache root */
int pag_ici_reclaimable; /* reclaimable inodes */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5bba29a07812..7f159d2a429a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -69,7 +69,9 @@ xfs_swapext(
goto out;
}
- if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) {
+ if (!(file->f_mode & FMODE_WRITE) ||
+ !(file->f_mode & FMODE_READ) ||
+ (file->f_flags & O_APPEND)) {
error = XFS_ERROR(EBADF);
goto out_put_file;
}
@@ -81,6 +83,7 @@ xfs_swapext(
}
if (!(tmp_file->f_mode & FMODE_WRITE) ||
+ !(tmp_file->f_mode & FMODE_READ) ||
(tmp_file->f_flags & O_APPEND)) {
error = XFS_ERROR(EBADF);
goto out_put_tmp_file;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 9d884c127bb9..c7142a064c48 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1203,6 +1203,63 @@ error0:
return error;
}
+STATIC int
+xfs_imap_lookup(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ xfs_agino_t agino,
+ xfs_agblock_t agbno,
+ xfs_agblock_t *chunk_agbno,
+ xfs_agblock_t *offset_agbno,
+ int flags)
+{
+ struct xfs_inobt_rec_incore rec;
+ struct xfs_btree_cur *cur;
+ struct xfs_buf *agbp;
+ xfs_agino_t startino;
+ int error;
+ int i;
+
+ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+ if (error) {
+ xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
+ "xfs_ialloc_read_agi() returned "
+ "error %d, agno %d",
+ error, agno);
+ return error;
+ }
+
+ /*
+ * derive and lookup the exact inode record for the given agino. If the
+ * record cannot be found, then it's an invalid inode number and we
+ * should abort.
+ */
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
+ startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
+ error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
+ if (!error) {
+ if (i)
+ error = xfs_inobt_get_rec(cur, &rec, &i);
+ if (!error && i == 0)
+ error = EINVAL;
+ }
+
+ xfs_trans_brelse(tp, agbp);
+ xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ if (error)
+ return error;
+
+ /* for untrusted inodes check it is allocated first */
+ if ((flags & XFS_IGET_UNTRUSTED) &&
+ (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
+ return EINVAL;
+
+ *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
+ *offset_agbno = agbno - *chunk_agbno;
+ return 0;
+}
+
/*
* Return the location of the inode in imap, for mapping it into a buffer.
*/
@@ -1235,8 +1292,11 @@ xfs_imap(
if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
#ifdef DEBUG
- /* no diagnostics for bulkstat, ino comes from userspace */
- if (flags & XFS_IGET_BULKSTAT)
+ /*
+ * Don't output diagnostic information for untrusted inodes
+ * as they can be invalid without implying corruption.
+ */
+ if (flags & XFS_IGET_UNTRUSTED)
return XFS_ERROR(EINVAL);
if (agno >= mp->m_sb.sb_agcount) {
xfs_fs_cmn_err(CE_ALERT, mp,
@@ -1263,6 +1323,23 @@ xfs_imap(
return XFS_ERROR(EINVAL);
}
+ blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
+
+ /*
+ * For bulkstat and handle lookups, we have an untrusted inode number
+ * that we have to verify is valid. We cannot do this just by reading
+ * the inode buffer as it may have been unlinked and removed leaving
+ * inodes in stale state on disk. Hence we have to do a btree lookup
+ * in all cases where an untrusted inode number is passed.
+ */
+ if (flags & XFS_IGET_UNTRUSTED) {
+ error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
+ &chunk_agbno, &offset_agbno, flags);
+ if (error)
+ return error;
+ goto out_map;
+ }
+
/*
* If the inode cluster size is the same as the blocksize or
* smaller we get to the buffer by simple arithmetics.
@@ -1277,24 +1354,6 @@ xfs_imap(
return 0;
}
- blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
-
- /*
- * If we get a block number passed from bulkstat we can use it to
- * find the buffer easily.
- */
- if (imap->im_blkno) {
- offset = XFS_INO_TO_OFFSET(mp, ino);
- ASSERT(offset < mp->m_sb.sb_inopblock);
-
- cluster_agbno = xfs_daddr_to_agbno(mp, imap->im_blkno);
- offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
-
- imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
- imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
- return 0;
- }
-
/*
* If the inode chunks are aligned then use simple maths to
* find the location. Otherwise we have to do a btree
@@ -1304,50 +1363,13 @@ xfs_imap(
offset_agbno = agbno & mp->m_inoalign_mask;
chunk_agbno = agbno - offset_agbno;
} else {
- xfs_btree_cur_t *cur; /* inode btree cursor */
- xfs_inobt_rec_incore_t chunk_rec;
- xfs_buf_t *agbp; /* agi buffer */
- int i; /* temp state */
-
- error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
- if (error) {
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
- "xfs_ialloc_read_agi() returned "
- "error %d, agno %d",
- error, agno);
- return error;
- }
-
- cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
- error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
- if (error) {
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
- "xfs_inobt_lookup() failed");
- goto error0;
- }
-
- error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
- if (error) {
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
- "xfs_inobt_get_rec() failed");
- goto error0;
- }
- if (i == 0) {
-#ifdef DEBUG
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
- "xfs_inobt_get_rec() failed");
-#endif /* DEBUG */
- error = XFS_ERROR(EINVAL);
- }
- error0:
- xfs_trans_brelse(tp, agbp);
- xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+ error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
+ &chunk_agbno, &offset_agbno, flags);
if (error)
return error;
- chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
- offset_agbno = agbno - chunk_agbno;
}
+out_map:
ASSERT(agbno >= chunk_agbno);
cluster_agbno = chunk_agbno +
((offset_agbno / blks_per_cluster) * blks_per_cluster);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 6845db90818f..8f8b91be2c99 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -259,7 +259,6 @@ xfs_iget_cache_miss(
xfs_trans_t *tp,
xfs_ino_t ino,
struct xfs_inode **ipp,
- xfs_daddr_t bno,
int flags,
int lock_flags)
{
@@ -272,7 +271,7 @@ xfs_iget_cache_miss(
if (!ip)
return ENOMEM;
- error = xfs_iread(mp, tp, ip, bno, flags);
+ error = xfs_iread(mp, tp, ip, flags);
if (error)
goto out_destroy;
@@ -358,8 +357,6 @@ out_destroy:
* within the file system for the inode being requested.
* lock_flags -- flags indicating how to lock the inode. See the comment
* for xfs_ilock() for a list of valid values.
- * bno -- the block number starting the buffer containing the inode,
- * if known (as by bulkstat), else 0.
*/
int
xfs_iget(
@@ -368,8 +365,7 @@ xfs_iget(
xfs_ino_t ino,
uint flags,
uint lock_flags,
- xfs_inode_t **ipp,
- xfs_daddr_t bno)
+ xfs_inode_t **ipp)
{
xfs_inode_t *ip;
int error;
@@ -382,9 +378,6 @@ xfs_iget(
/* get the perag structure and ensure that it's inode capable */
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
- if (!pag->pagi_inodeok)
- return EINVAL;
- ASSERT(pag->pag_ici_init);
agino = XFS_INO_TO_AGINO(mp, ino);
again:
@@ -400,7 +393,7 @@ again:
read_unlock(&pag->pag_ici_lock);
XFS_STATS_INC(xs_ig_missed);
- error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno,
+ error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
flags, lock_flags);
if (error)
goto out_error_or_again;
@@ -744,30 +737,24 @@ xfs_ilock_demote(
}
#ifdef DEBUG
-/*
- * Debug-only routine, without additional rw_semaphore APIs, we can
- * now only answer requests regarding whether we hold the lock for write
- * (reader state is outside our visibility, we only track writer state).
- *
- * Note: this means !xfs_isilocked would give false positives, so don't do that.
- */
int
xfs_isilocked(
xfs_inode_t *ip,
uint lock_flags)
{
- if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) ==
- XFS_ILOCK_EXCL) {
- if (!ip->i_lock.mr_writer)
- return 0;
+ if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
+ if (!(lock_flags & XFS_ILOCK_SHARED))
+ return !!ip->i_lock.mr_writer;
+ return rwsem_is_locked(&ip->i_lock.mr_lock);
}
- if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) ==
- XFS_IOLOCK_EXCL) {
- if (!ip->i_iolock.mr_writer)
- return 0;
+ if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
+ if (!(lock_flags & XFS_IOLOCK_SHARED))
+ return !!ip->i_iolock.mr_writer;
+ return rwsem_is_locked(&ip->i_iolock.mr_lock);
}
- return 1;
+ ASSERT(0);
+ return 0;
}
#endif
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8cd6e8d8fe9c..b76a829d7e20 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -177,7 +177,7 @@ xfs_imap_to_bp(
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
XFS_ERRTAG_ITOBP_INOTOBP,
XFS_RANDOM_ITOBP_INOTOBP))) {
- if (iget_flags & XFS_IGET_BULKSTAT) {
+ if (iget_flags & XFS_IGET_UNTRUSTED) {
xfs_trans_brelse(tp, bp);
return XFS_ERROR(EINVAL);
}
@@ -787,7 +787,6 @@ xfs_iread(
xfs_mount_t *mp,
xfs_trans_t *tp,
xfs_inode_t *ip,
- xfs_daddr_t bno,
uint iget_flags)
{
xfs_buf_t *bp;
@@ -797,11 +796,9 @@ xfs_iread(
/*
* Fill in the location information in the in-core inode.
*/
- ip->i_imap.im_blkno = bno;
error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
if (error)
return error;
- ASSERT(bno == 0 || bno == ip->i_imap.im_blkno);
/*
* Get pointers to the on-disk inode and the buffer containing it.
@@ -1940,10 +1937,10 @@ xfs_ifree_cluster(
int blks_per_cluster;
int nbufs;
int ninodes;
- int i, j, found, pre_flushed;
+ int i, j;
xfs_daddr_t blkno;
xfs_buf_t *bp;
- xfs_inode_t *ip, **ip_found;
+ xfs_inode_t *ip;
xfs_inode_log_item_t *iip;
xfs_log_item_t *lip;
struct xfs_perag *pag;
@@ -1960,114 +1957,97 @@ xfs_ifree_cluster(
nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
}
- ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS);
-
for (j = 0; j < nbufs; j++, inum += ninodes) {
+ int found = 0;
+
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
+ /*
+ * We obtain and lock the backing buffer first in the process
+ * here, as we have to ensure that any dirty inode that we
+ * can't get the flush lock on is attached to the buffer.
+ * If we scan the in-memory inodes first, then buffer IO can
+ * complete before we get a lock on it, and hence we may fail
+ * to mark all the active inodes on the buffer stale.
+ */
+ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
+ mp->m_bsize * blks_per_cluster,
+ XBF_LOCK);
+
+ /*
+ * Walk the inodes already attached to the buffer and mark them
+ * stale. These will all have the flush locks held, so an
+ * in-memory inode walk can't lock them.
+ */
+ lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ while (lip) {
+ if (lip->li_type == XFS_LI_INODE) {
+ iip = (xfs_inode_log_item_t *)lip;
+ ASSERT(iip->ili_logged == 1);
+ lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
+ xfs_trans_ail_copy_lsn(mp->m_ail,
+ &iip->ili_flush_lsn,
+ &iip->ili_item.li_lsn);
+ xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
+ found++;
+ }
+ lip = lip->li_bio_list;
+ }
/*
- * Look for each inode in memory and attempt to lock it,
- * we can be racing with flush and tail pushing here.
- * any inode we get the locks on, add to an array of
- * inode items to process later.
+ * For each inode in memory attempt to add it to the inode
+ * buffer and set it up for being staled on buffer IO
+ * completion. This is safe as we've locked out tail pushing
+ * and flushing by locking the buffer.
*
- * The get the buffer lock, we could beat a flush
- * or tail pushing thread to the lock here, in which
- * case they will go looking for the inode buffer
- * and fail, we need some other form of interlock
- * here.
+ * We have already marked every inode that was part of a
+ * transaction stale above, which means there is no point in
+ * even trying to lock them.
*/
- found = 0;
for (i = 0; i < ninodes; i++) {
read_lock(&pag->pag_ici_lock);
ip = radix_tree_lookup(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, (inum + i)));
- /* Inode not in memory or we found it already,
- * nothing to do
- */
+ /* Inode not in memory or stale, nothing to do */
if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
read_unlock(&pag->pag_ici_lock);
continue;
}
- if (xfs_inode_clean(ip)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
-
- /* If we can get the locks then add it to the
- * list, otherwise by the time we get the bp lock
- * below it will already be attached to the
- * inode buffer.
- */
-
- /* This inode will already be locked - by us, lets
- * keep it that way.
- */
-
- if (ip == free_ip) {
- if (xfs_iflock_nowait(ip)) {
- xfs_iflags_set(ip, XFS_ISTALE);
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- } else {
- ip_found[found++] = ip;
- }
- }
+ /* don't try to lock/unlock the current inode */
+ if (ip != free_ip &&
+ !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
read_unlock(&pag->pag_ici_lock);
continue;
}
+ read_unlock(&pag->pag_ici_lock);
- if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
- if (xfs_iflock_nowait(ip)) {
- xfs_iflags_set(ip, XFS_ISTALE);
-
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- } else {
- ip_found[found++] = ip;
- }
- } else {
+ if (!xfs_iflock_nowait(ip)) {
+ if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
+ continue;
}
- read_unlock(&pag->pag_ici_lock);
- }
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
- mp->m_bsize * blks_per_cluster,
- XBF_LOCK);
-
- pre_flushed = 0;
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- while (lip) {
- if (lip->li_type == XFS_LI_INODE) {
- iip = (xfs_inode_log_item_t *)lip;
- ASSERT(iip->ili_logged == 1);
- lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
- xfs_trans_ail_copy_lsn(mp->m_ail,
- &iip->ili_flush_lsn,
- &iip->ili_item.li_lsn);
- xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
- pre_flushed++;
+ xfs_iflags_set(ip, XFS_ISTALE);
+ if (xfs_inode_clean(ip)) {
+ ASSERT(ip != free_ip);
+ xfs_ifunlock(ip);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ continue;
}
- lip = lip->li_bio_list;
- }
- for (i = 0; i < found; i++) {
- ip = ip_found[i];
iip = ip->i_itemp;
-
if (!iip) {
+ /* inode with unlogged changes only */
+ ASSERT(ip != free_ip);
ip->i_update_core = 0;
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
+ found++;
iip->ili_last_fields = iip->ili_format.ilf_fields;
iip->ili_format.ilf_fields = 0;
@@ -2078,17 +2058,16 @@ xfs_ifree_cluster(
xfs_buf_attach_iodone(bp,
(void(*)(xfs_buf_t*,xfs_log_item_t*))
xfs_istale_done, (xfs_log_item_t *)iip);
- if (ip != free_ip) {
+
+ if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
}
- if (found || pre_flushed)
+ if (found)
xfs_trans_stale_inode_buf(tp, bp);
xfs_trans_binval(tp, bp);
}
- kmem_free(ip_found);
xfs_perag_put(pag);
}
@@ -2649,8 +2628,6 @@ xfs_iflush_cluster(
int i;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- ASSERT(pag->pagi_inodeok);
- ASSERT(pag->pag_ici_init);
inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 9965e40a4615..78550df13cd6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -442,7 +442,7 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
* xfs_iget.c prototypes.
*/
int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
- uint, uint, xfs_inode_t **, xfs_daddr_t);
+ uint, uint, xfs_inode_t **);
void xfs_iput(xfs_inode_t *, uint);
void xfs_iput_new(xfs_inode_t *, uint);
void xfs_ilock(xfs_inode_t *, uint);
@@ -500,7 +500,7 @@ do { \
* Flags for xfs_iget()
*/
#define XFS_IGET_CREATE 0x1
-#define XFS_IGET_BULKSTAT 0x2
+#define XFS_IGET_UNTRUSTED 0x2
int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
xfs_ino_t, struct xfs_dinode **,
@@ -509,7 +509,7 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
struct xfs_inode *, struct xfs_dinode **,
struct xfs_buf **, uint);
int xfs_iread(struct xfs_mount *, struct xfs_trans *,
- struct xfs_inode *, xfs_daddr_t, uint);
+ struct xfs_inode *, uint);
void xfs_dinode_to_disk(struct xfs_dinode *,
struct xfs_icdinode *);
void xfs_idestroy_fork(struct xfs_inode *, int);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index b1b801e4a28e..2b86f8610512 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -49,24 +49,40 @@ xfs_internal_inum(
(ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
}
-STATIC int
-xfs_bulkstat_one_iget(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- xfs_daddr_t bno, /* starting bno of inode cluster */
- xfs_bstat_t *buf, /* return buffer */
- int *stat) /* BULKSTAT_RV_... */
+/*
+ * Return stat information for one inode.
+ * Return 0 if ok, else errno.
+ */
+int
+xfs_bulkstat_one_int(
+ struct xfs_mount *mp, /* mount point for filesystem */
+ xfs_ino_t ino, /* inode to get data for */
+ void __user *buffer, /* buffer to place output in */
+ int ubsize, /* size of buffer */
+ bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
+ int *ubused, /* bytes used by me */
+ int *stat) /* BULKSTAT_RV_... */
{
- xfs_icdinode_t *dic; /* dinode core info pointer */
- xfs_inode_t *ip; /* incore inode pointer */
- struct inode *inode;
- int error;
+ struct xfs_icdinode *dic; /* dinode core info pointer */
+ struct xfs_inode *ip; /* incore inode pointer */
+ struct inode *inode;
+ struct xfs_bstat *buf; /* return buffer */
+ int error = 0; /* error value */
+
+ *stat = BULKSTAT_RV_NOTHING;
+
+ if (!buffer || xfs_internal_inum(mp, ino))
+ return XFS_ERROR(EINVAL);
+
+ buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
+ if (!buf)
+ return XFS_ERROR(ENOMEM);
error = xfs_iget(mp, NULL, ino,
- XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
+ XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip);
if (error) {
*stat = BULKSTAT_RV_NOTHING;
- return error;
+ goto out_free;
}
ASSERT(ip != NULL);
@@ -127,77 +143,16 @@ xfs_bulkstat_one_iget(
buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
break;
}
-
xfs_iput(ip, XFS_ILOCK_SHARED);
- return error;
-}
-STATIC void
-xfs_bulkstat_one_dinode(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- xfs_dinode_t *dic, /* dinode inode pointer */
- xfs_bstat_t *buf) /* return buffer */
-{
- /*
- * The inode format changed when we moved the link count and
- * made it 32 bits long. If this is an old format inode,
- * convert it in memory to look like a new one. If it gets
- * flushed to disk we will convert back before flushing or
- * logging it. We zero out the new projid field and the old link
- * count field. We'll handle clearing the pad field (the remains
- * of the old uuid field) when we actually convert the inode to
- * the new format. We don't change the version number so that we
- * can distinguish this from a real new format inode.
- */
- if (dic->di_version == 1) {
- buf->bs_nlink = be16_to_cpu(dic->di_onlink);
- buf->bs_projid = 0;
- } else {
- buf->bs_nlink = be32_to_cpu(dic->di_nlink);
- buf->bs_projid = be16_to_cpu(dic->di_projid);
- }
+ error = formatter(buffer, ubsize, ubused, buf);
- buf->bs_ino = ino;
- buf->bs_mode = be16_to_cpu(dic->di_mode);
- buf->bs_uid = be32_to_cpu(dic->di_uid);
- buf->bs_gid = be32_to_cpu(dic->di_gid);
- buf->bs_size = be64_to_cpu(dic->di_size);
- buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec);
- buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec);
- buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec);
- buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
- buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
- buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
- buf->bs_xflags = xfs_dic2xflags(dic);
- buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
- buf->bs_extents = be32_to_cpu(dic->di_nextents);
- buf->bs_gen = be32_to_cpu(dic->di_gen);
- memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
- buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
- buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
- buf->bs_aextents = be16_to_cpu(dic->di_anextents);
- buf->bs_forkoff = XFS_DFORK_BOFF(dic);
+ if (!error)
+ *stat = BULKSTAT_RV_DIDONE;
- switch (dic->di_format) {
- case XFS_DINODE_FMT_DEV:
- buf->bs_rdev = xfs_dinode_get_rdev(dic);
- buf->bs_blksize = BLKDEV_IOSIZE;
- buf->bs_blocks = 0;
- break;
- case XFS_DINODE_FMT_LOCAL:
- case XFS_DINODE_FMT_UUID:
- buf->bs_rdev = 0;
- buf->bs_blksize = mp->m_sb.sb_blocksize;
- buf->bs_blocks = 0;
- break;
- case XFS_DINODE_FMT_EXTENTS:
- case XFS_DINODE_FMT_BTREE:
- buf->bs_rdev = 0;
- buf->bs_blksize = mp->m_sb.sb_blocksize;
- buf->bs_blocks = be64_to_cpu(dic->di_nblocks);
- break;
- }
+ out_free:
+ kmem_free(buf);
+ return error;
}
/* Return 0 on success or positive error */
@@ -217,118 +172,17 @@ xfs_bulkstat_one_fmt(
return 0;
}
-/*
- * Return stat information for one inode.
- * Return 0 if ok, else errno.
- */
-int /* error status */
-xfs_bulkstat_one_int(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* buffer to place output in */
- int ubsize, /* size of buffer */
- bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
- xfs_daddr_t bno, /* starting bno of inode cluster */
- int *ubused, /* bytes used by me */
- void *dibuff, /* on-disk inode buffer */
- int *stat) /* BULKSTAT_RV_... */
-{
- xfs_bstat_t *buf; /* return buffer */
- int error = 0; /* error value */
- xfs_dinode_t *dip; /* dinode inode pointer */
-
- dip = (xfs_dinode_t *)dibuff;
- *stat = BULKSTAT_RV_NOTHING;
-
- if (!buffer || xfs_internal_inum(mp, ino))
- return XFS_ERROR(EINVAL);
-
- buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
-
- if (dip == NULL) {
- /* We're not being passed a pointer to a dinode. This happens
- * if BULKSTAT_FG_IGET is selected. Do the iget.
- */
- error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat);
- if (error)
- goto out_free;
- } else {
- xfs_bulkstat_one_dinode(mp, ino, dip, buf);
- }
-
- error = formatter(buffer, ubsize, ubused, buf);
- if (error)
- goto out_free;
-
- *stat = BULKSTAT_RV_DIDONE;
-
- out_free:
- kmem_free(buf);
- return error;
-}
-
int
xfs_bulkstat_one(
xfs_mount_t *mp, /* mount point for filesystem */
xfs_ino_t ino, /* inode number to get data for */
void __user *buffer, /* buffer to place output in */
int ubsize, /* size of buffer */
- void *private_data, /* my private data */
- xfs_daddr_t bno, /* starting bno of inode cluster */
int *ubused, /* bytes used by me */
- void *dibuff, /* on-disk inode buffer */
int *stat) /* BULKSTAT_RV_... */
{
return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
- xfs_bulkstat_one_fmt, bno,
- ubused, dibuff, stat);
-}
-
-/*
- * Test to see whether we can use the ondisk inode directly, based
- * on the given bulkstat flags, filling in dipp accordingly.
- * Returns zero if the inode is dodgey.
- */
-STATIC int
-xfs_bulkstat_use_dinode(
- xfs_mount_t *mp,
- int flags,
- xfs_buf_t *bp,
- int clustidx,
- xfs_dinode_t **dipp)
-{
- xfs_dinode_t *dip;
- unsigned int aformat;
-
- *dipp = NULL;
- if (!bp || (flags & BULKSTAT_FG_IGET))
- return 1;
- dip = (xfs_dinode_t *)
- xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
- /*
- * Check the buffer containing the on-disk inode for di_mode == 0.
- * This is to prevent xfs_bulkstat from picking up just reclaimed
- * inodes that have their in-core state initialized but not flushed
- * to disk yet. This is a temporary hack that would require a proper
- * fix in the future.
- */
- if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
- !XFS_DINODE_GOOD_VERSION(dip->di_version) ||
- !dip->di_mode)
- return 0;
- if (flags & BULKSTAT_FG_QUICK) {
- *dipp = dip;
- return 1;
- }
- /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
- aformat = dip->di_aformat;
- if ((XFS_DFORK_Q(dip) == 0) ||
- (aformat == XFS_DINODE_FMT_LOCAL) ||
- (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) {
- *dipp = dip;
- return 1;
- }
- return 1;
+ xfs_bulkstat_one_fmt, ubused, stat);
}
#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
@@ -342,10 +196,8 @@ xfs_bulkstat(
xfs_ino_t *lastinop, /* last inode returned */
int *ubcountp, /* size of buffer/count returned */
bulkstat_one_pf formatter, /* func that'd fill a single buf */
- void *private_data,/* private data for formatter */
size_t statstruct_size, /* sizeof struct filling */
char __user *ubuffer, /* buffer with inode stats */
- int flags, /* defined in xfs_itable.h */
int *done) /* 1 if there are more stats to get */
{
xfs_agblock_t agbno=0;/* allocation group block number */
@@ -380,14 +232,12 @@ xfs_bulkstat(
int ubelem; /* spaces used in user's buffer */
int ubused; /* bytes used by formatter */
xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */
- xfs_dinode_t *dip; /* ptr into bp for specific inode */
/*
* Get the last inode value, see if there's nothing to do.
*/
ino = (xfs_ino_t)*lastinop;
lastino = ino;
- dip = NULL;
agno = XFS_INO_TO_AGNO(mp, ino);
agino = XFS_INO_TO_AGINO(mp, ino);
if (agno >= mp->m_sb.sb_agcount ||
@@ -612,37 +462,6 @@ xfs_bulkstat(
irbp->ir_startino) +
((chunkidx & nimask) >>
mp->m_sb.sb_inopblog);
-
- if (flags & (BULKSTAT_FG_QUICK |
- BULKSTAT_FG_INLINE)) {
- int offset;
-
- ino = XFS_AGINO_TO_INO(mp, agno,
- agino);
- bno = XFS_AGB_TO_DADDR(mp, agno,
- agbno);
-
- /*
- * Get the inode cluster buffer
- */
- if (bp)
- xfs_buf_relse(bp);
-
- error = xfs_inotobp(mp, NULL, ino, &dip,
- &bp, &offset,
- XFS_IGET_BULKSTAT);
-
- if (!error)
- clustidx = offset / mp->m_sb.sb_inodesize;
- if (XFS_TEST_ERROR(error != 0,
- mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
- XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
- bp = NULL;
- ubleft = 0;
- rval = error;
- break;
- }
- }
}
ino = XFS_AGINO_TO_INO(mp, agno, agino);
bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
@@ -658,35 +477,13 @@ xfs_bulkstat(
* when the chunk is used up.
*/
irbp->ir_freecount++;
- if (!xfs_bulkstat_use_dinode(mp, flags, bp,
- clustidx, &dip)) {
- lastino = ino;
- continue;
- }
- /*
- * If we need to do an iget, cannot hold bp.
- * Drop it, until starting the next cluster.
- */
- if ((flags & BULKSTAT_FG_INLINE) && !dip) {
- if (bp)
- xfs_buf_relse(bp);
- bp = NULL;
- }
/*
* Get the inode and fill in a single buffer.
- * BULKSTAT_FG_QUICK uses dip to fill it in.
- * BULKSTAT_FG_IGET uses igets.
- * BULKSTAT_FG_INLINE uses dip if we have an
- * inline attr fork, else igets.
- * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
- * This is also used to count inodes/blks, etc
- * in xfs_qm_quotacheck.
*/
ubused = statstruct_size;
- error = formatter(mp, ino, ubufp,
- ubleft, private_data,
- bno, &ubused, dip, &fmterror);
+ error = formatter(mp, ino, ubufp, ubleft,
+ &ubused, &fmterror);
if (fmterror == BULKSTAT_RV_NOTHING) {
if (error && error != ENOENT &&
error != EINVAL) {
@@ -778,8 +575,7 @@ xfs_bulkstat_single(
*/
ino = (xfs_ino_t)*lastinop;
- error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
- NULL, 0, NULL, NULL, &res);
+ error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res);
if (error) {
/*
* Special case way failed, do it the "long" way
@@ -788,8 +584,7 @@ xfs_bulkstat_single(
(*lastinop)--;
count = 1;
if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
- NULL, sizeof(xfs_bstat_t), buffer,
- BULKSTAT_FG_IGET, done))
+ sizeof(xfs_bstat_t), buffer, done))
return error;
if (count == 0 || (xfs_ino_t)*lastinop != ino)
return error == EFSCORRUPTED ?
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 20792bf45946..97295d91d170 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -27,10 +27,7 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
xfs_ino_t ino,
void __user *buffer,
int ubsize,
- void *private_data,
- xfs_daddr_t bno,
int *ubused,
- void *dip,
int *stat);
/*
@@ -41,13 +38,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
#define BULKSTAT_RV_GIVEUP 2
/*
- * Values for bulkstat flag argument.
- */
-#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */
-#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */
-#define BULKSTAT_FG_INLINE 0x4 /* No iget if inline attrs */
-
-/*
* Return stat information in bulk (by-inode) for the filesystem.
*/
int /* error status */
@@ -56,10 +46,8 @@ xfs_bulkstat(
xfs_ino_t *lastino, /* last inode returned */
int *count, /* size of buffer/count returned */
bulkstat_one_pf formatter, /* func that'd fill a single buf */
- void *private_data, /* private data for formatter */
size_t statstruct_size,/* sizeof struct that we're filling */
char __user *ubuffer,/* buffer with inode stats */
- int flags, /* flag to control access method */
int *done); /* 1 if there are more stats to get */
int
@@ -82,9 +70,7 @@ xfs_bulkstat_one_int(
void __user *buffer,
int ubsize,
bulkstat_one_fmt_pf formatter,
- xfs_daddr_t bno,
int *ubused,
- void *dibuff,
int *stat);
int
@@ -93,10 +79,7 @@ xfs_bulkstat_one(
xfs_ino_t ino,
void __user *buffer,
int ubsize,
- void *private_data,
- xfs_daddr_t bno,
int *ubused,
- void *dibuff,
int *stat);
typedef int (*inumbers_fmt_pf)(
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 14a69aec2c0b..9ac5cfab27b9 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -132,15 +132,10 @@ xlog_align(
int nbblks,
xfs_buf_t *bp)
{
- xfs_daddr_t offset;
- xfs_caddr_t ptr;
+ xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
- offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1);
- ptr = XFS_BUF_PTR(bp) + BBTOB(offset);
-
- ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
-
- return ptr;
+ ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
+ return XFS_BUF_PTR(bp) + BBTOB(offset);
}
@@ -3203,7 +3198,7 @@ xlog_recover_process_one_iunlink(
int error;
ino = XFS_AGINO_TO_INO(mp, agno, agino);
- error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0);
+ error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
if (error)
goto fail;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d7bf38c8cd1c..69f62d8b2816 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -268,10 +268,10 @@ xfs_sb_validate_fsb_count(
#if XFS_BIG_BLKNOS /* Limited by ULONG_MAX of page cache index */
if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
- return E2BIG;
+ return EFBIG;
#else /* Limited by UINT_MAX of sectors */
if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
- return E2BIG;
+ return EFBIG;
#endif
return 0;
}
@@ -393,7 +393,7 @@ xfs_mount_validate_sb(
xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
xfs_fs_mount_cmn_err(flags,
"file system too large to be mounted on this system.");
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
}
if (unlikely(sbp->sb_inprogress)) {
@@ -413,17 +413,6 @@ xfs_mount_validate_sb(
return 0;
}
-STATIC void
-xfs_initialize_perag_icache(
- xfs_perag_t *pag)
-{
- if (!pag->pag_ici_init) {
- rwlock_init(&pag->pag_ici_lock);
- INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
- pag->pag_ici_init = 1;
- }
-}
-
int
xfs_initialize_perag(
xfs_mount_t *mp,
@@ -436,13 +425,8 @@ xfs_initialize_perag(
xfs_agino_t agino;
xfs_ino_t ino;
xfs_sb_t *sbp = &mp->m_sb;
- xfs_ino_t max_inum = XFS_MAXINUMBER_32;
int error = -ENOMEM;
- /* Check to see if the filesystem can overflow 32 bit inodes */
- agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
- ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
-
/*
* Walk the current per-ag tree so we don't try to initialise AGs
* that already exist (growfs case). Allocate and insert all the
@@ -456,11 +440,18 @@ xfs_initialize_perag(
}
if (!first_initialised)
first_initialised = index;
+
pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
if (!pag)
goto out_unwind;
+ pag->pag_agno = index;
+ pag->pag_mount = mp;
+ rwlock_init(&pag->pag_ici_lock);
+ INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+
if (radix_tree_preload(GFP_NOFS))
goto out_unwind;
+
spin_lock(&mp->m_perag_lock);
if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
BUG();
@@ -469,25 +460,26 @@ xfs_initialize_perag(
error = -EEXIST;
goto out_unwind;
}
- pag->pag_agno = index;
- pag->pag_mount = mp;
spin_unlock(&mp->m_perag_lock);
radix_tree_preload_end();
}
- /* Clear the mount flag if no inode can overflow 32 bits
- * on this filesystem, or if specifically requested..
+ /*
+ * If we mount with the inode64 option, or no inode overflows
+ * the legacy 32-bit address space clear the inode32 option.
*/
- if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) {
+ agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
+ ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
+
+ if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
mp->m_flags |= XFS_MOUNT_32BITINODES;
- } else {
+ else
mp->m_flags &= ~XFS_MOUNT_32BITINODES;
- }
- /* If we can overflow then setup the ag headers accordingly */
if (mp->m_flags & XFS_MOUNT_32BITINODES) {
- /* Calculate how much should be reserved for inodes to
- * meet the max inode percentage.
+ /*
+ * Calculate how much should be reserved for inodes to meet
+ * the max inode percentage.
*/
if (mp->m_maxicount) {
__uint64_t icount;
@@ -500,30 +492,28 @@ xfs_initialize_perag(
} else {
max_metadata = agcount;
}
+
for (index = 0; index < agcount; index++) {
ino = XFS_AGINO_TO_INO(mp, index, agino);
- if (ino > max_inum) {
+ if (ino > XFS_MAXINUMBER_32) {
index++;
break;
}
- /* This ag is preferred for inodes */
pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 1;
if (index < max_metadata)
pag->pagf_metadata = 1;
- xfs_initialize_perag_icache(pag);
xfs_perag_put(pag);
}
} else {
- /* Setup default behavior for smaller filesystems */
for (index = 0; index < agcount; index++) {
pag = xfs_perag_get(mp, index);
pag->pagi_inodeok = 1;
- xfs_initialize_perag_icache(pag);
xfs_perag_put(pag);
}
}
+
if (maxagi)
*maxagi = index;
return 0;
@@ -1009,7 +999,7 @@ xfs_check_sizes(xfs_mount_t *mp)
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
cmn_err(CE_WARN, "XFS: size check 1 failed");
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
}
error = xfs_read_buf(mp, mp->m_ddev_targp,
d - XFS_FSS_TO_BB(mp, 1),
@@ -1019,7 +1009,7 @@ xfs_check_sizes(xfs_mount_t *mp)
} else {
cmn_err(CE_WARN, "XFS: size check 2 failed");
if (error == ENOSPC)
- error = XFS_ERROR(E2BIG);
+ error = XFS_ERROR(EFBIG);
return error;
}
@@ -1027,7 +1017,7 @@ xfs_check_sizes(xfs_mount_t *mp)
d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
cmn_err(CE_WARN, "XFS: size check 3 failed");
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
}
error = xfs_read_buf(mp, mp->m_logdev_targp,
d - XFS_FSB_TO_BB(mp, 1),
@@ -1037,7 +1027,7 @@ xfs_check_sizes(xfs_mount_t *mp)
} else {
cmn_err(CE_WARN, "XFS: size check 3 failed");
if (error == ENOSPC)
- error = XFS_ERROR(E2BIG);
+ error = XFS_ERROR(EFBIG);
return error;
}
}
@@ -1254,7 +1244,7 @@ xfs_mountfs(
* Allocate and initialize the per-ag data.
*/
spin_lock_init(&mp->m_perag_lock);
- INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS);
+ INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
if (error) {
cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
@@ -1310,7 +1300,7 @@ xfs_mountfs(
* Get and sanity-check the root inode.
* Save the pointer to it in the mount structure.
*/
- error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0);
+ error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip);
if (error) {
cmn_err(CE_WARN, "XFS: failed to read root inode");
goto out_log_dealloc;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 6be05f756d59..a2d32ce335aa 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2247,7 +2247,7 @@ xfs_rtmount_init(
cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu",
(unsigned long long) XFS_BB_TO_FSB(mp, d),
(unsigned long long) mp->m_sb.sb_rblocks);
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
}
error = xfs_read_buf(mp, mp->m_rtdev_targp,
d - XFS_FSB_TO_BB(mp, 1),
@@ -2256,7 +2256,7 @@ xfs_rtmount_init(
cmn_err(CE_WARN,
"XFS: realtime mount -- xfs_read_buf failed, returned %d", error);
if (error == ENOSPC)
- return XFS_ERROR(E2BIG);
+ return XFS_ERROR(EFBIG);
return error;
}
xfs_buf_relse(bp);
@@ -2277,12 +2277,12 @@ xfs_rtmount_inodes(
sbp = &mp->m_sb;
if (sbp->sb_rbmino == NULLFSINO)
return 0;
- error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip, 0);
+ error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
if (error)
return error;
ASSERT(mp->m_rbmip != NULL);
ASSERT(sbp->sb_rsumino != NULLFSINO);
- error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0);
+ error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
if (error) {
IRELE(mp->m_rbmip);
return error;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index b2d67adb6a08..ff614c29b441 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -147,7 +147,16 @@ xfs_growfs_rt(
# define xfs_rtfree_extent(t,b,l) (ENOSYS)
# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
# define xfs_growfs_rt(mp,in) (ENOSYS)
-# define xfs_rtmount_init(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
+static inline int /* error */
+xfs_rtmount_init(
+ xfs_mount_t *mp) /* file system mount structure */
+{
+ if (mp->m_sb.sb_rblocks == 0)
+ return 0;
+
+ cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT");
+ return ENOSYS;
+}
# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
# define xfs_rtunmount_inodes(m)
#endif /* CONFIG_XFS_RT */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ce558efa2ea0..28547dfce037 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -48,134 +48,489 @@
kmem_zone_t *xfs_trans_zone;
+
/*
- * Reservation functions here avoid a huge stack in xfs_trans_init
- * due to register overflow from temporaries in the calculations.
+ * Various log reservation values.
+ *
+ * These are based on the size of the file system block because that is what
+ * most transactions manipulate. Each adds in an additional 128 bytes per
+ * item logged to try to account for the overhead of the transaction mechanism.
+ *
+ * Note: Most of the reservations underestimate the number of allocation
+ * groups into which they could free extents in the xfs_bmap_finish() call.
+ * This is because the number in the worst case is quite high and quite
+ * unusual. In order to fix this we need to change xfs_bmap_finish() to free
+ * extents in only a single AG at a time. This will require changes to the
+ * EFI code as well, however, so that the EFI for the extents not freed is
+ * logged again in each transaction. See SGI PV #261917.
+ *
+ * Reservation functions here avoid a huge stack in xfs_trans_init due to
+ * register overflow from temporaries in the calculations.
+ */
+
+
+/*
+ * In a write transaction we can allocate a maximum of 2
+ * extents. This gives:
+ * the inode getting the new extents: inode size
+ * the inode's bmap btree: max depth * block size
+ * the agfs of the ags from which the extents are allocated: 2 * sector
+ * the superblock free block counter: sector size
+ * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join:
+ * the agfs of the ags containing the blocks: 2 * sector size
+ * the agfls of the ags containing the blocks: 2 * sector size
+ * the super block free block counter: sector size
+ * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
-xfs_calc_write_reservation(xfs_mount_t *mp)
+xfs_calc_write_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
+ 2 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 2) +
+ 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 2))),
+ (2 * mp->m_sb.sb_sectsize +
+ 2 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 2) +
+ 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
}
+/*
+ * In truncating a file we free up to two extents at once. We can modify:
+ * the inode being truncated: inode size
+ * the inode's bmap btree: (max depth + 1) * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ * the agf for each of the ags: 4 * sector size
+ * the agfl for each of the ags: 4 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * worst case split in allocation btrees per extent assuming 4 extents:
+ * 4 exts * 2 trees * (2 * max depth - 1) * block size
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_itruncate_reservation(xfs_mount_t *mp)
+xfs_calc_itruncate_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) +
+ 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
+ (4 * mp->m_sb.sb_sectsize +
+ 4 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 4) +
+ 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) +
+ 128 * 5 +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
}
+/*
+ * In renaming a files we can modify:
+ * the four inodes involved: 4 * inode size
+ * the two directory btrees: 2 * (max depth + v2) * dir block size
+ * the two directory bmap btrees: 2 * max depth * block size
+ * And the bmap_finish transaction can free dir and bmap blocks (two sets
+ * of bmap blocks) giving:
+ * the agf for the ags in which the blocks live: 3 * sector size
+ * the agfl for the ags in which the blocks live: 3 * sector size
+ * the superblock for the free block count: sector size
+ * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_rename_reservation(xfs_mount_t *mp)
+xfs_calc_rename_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((4 * mp->m_sb.sb_inodesize +
+ 2 * XFS_DIROP_LOG_RES(mp) +
+ 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))),
+ (3 * mp->m_sb.sb_sectsize +
+ 3 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 3) +
+ 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
}
+/*
+ * For creating a link to an inode:
+ * the parent directory inode: inode size
+ * the linked inode: inode size
+ * the directory btree could split: (max depth + v2) * dir block size
+ * the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free some bmap blocks giving:
+ * the agf for the ag in which the blocks live: sector size
+ * the agfl for the ag in which the blocks live: sector size
+ * the superblock for the free block count: sector size
+ * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_link_reservation(xfs_mount_t *mp)
+xfs_calc_link_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_inodesize +
+ XFS_DIROP_LOG_RES(mp) +
+ 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
+ (mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
}
+/*
+ * For removing a directory entry we can modify:
+ * the parent directory inode: inode size
+ * the removed inode: inode size
+ * the directory btree could join: (max depth + v2) * dir block size
+ * the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free the dir and bmap blocks giving:
+ * the agf for the ag in which the blocks live: 2 * sector size
+ * the agfl for the ag in which the blocks live: 2 * sector size
+ * the superblock for the free block count: sector size
+ * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_remove_reservation(xfs_mount_t *mp)
+xfs_calc_remove_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_inodesize +
+ XFS_DIROP_LOG_RES(mp) +
+ 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
+ (2 * mp->m_sb.sb_sectsize +
+ 2 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 2) +
+ 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
}
+/*
+ * For symlink we can modify:
+ * the parent directory inode: inode size
+ * the new inode: inode size
+ * the inode btree entry: 1 block
+ * the directory btree: (max depth + v2) * dir block size
+ * the directory inode's bmap btree: (max depth + v2) * block size
+ * the blocks for the symlink: 1 kB
+ * Or in the first xact we allocate some inodes giving:
+ * the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_symlink_reservation(xfs_mount_t *mp)
+xfs_calc_symlink_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, 1) +
+ XFS_DIROP_LOG_RES(mp) +
+ 1024 +
+ 128 * (4 + XFS_DIROP_LOG_COUNT(mp))),
+ (2 * mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
+ XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
}
+/*
+ * For create we can modify:
+ * the parent directory inode: inode size
+ * the new inode: inode size
+ * the inode btree entry: block size
+ * the superblock for the nlink flag: sector size
+ * the directory btree: (max depth + v2) * dir block size
+ * the directory inode's bmap btree: (max depth + v2) * block size
+ * Or in the first xact we allocate some inodes giving:
+ * the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ * the superblock for the nlink flag: sector size
+ * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_create_reservation(xfs_mount_t *mp)
+xfs_calc_create_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, 1) +
+ XFS_DIROP_LOG_RES(mp) +
+ 128 * (3 + XFS_DIROP_LOG_COUNT(mp))),
+ (3 * mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
+ XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
}
+/*
+ * Making a new directory is the same as creating a new file.
+ */
STATIC uint
-xfs_calc_mkdir_reservation(xfs_mount_t *mp)
+xfs_calc_mkdir_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return xfs_calc_create_reservation(mp);
}
+/*
+ * In freeing an inode we can modify:
+ * the inode being freed: inode size
+ * the super block free inode counter: sector size
+ * the agi hash list and counters: sector size
+ * the inode btree entry: block size
+ * the on disk inode before ours in the agi hash list: inode cluster size
+ * the inode btree: max depth * blocksize
+ * the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_ifree_reservation(xfs_mount_t *mp)
+xfs_calc_ifree_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, 1) +
+ MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
+ XFS_INODE_CLUSTER_SIZE(mp)) +
+ 128 * 5 +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1));
}
+/*
+ * When only changing the inode we log the inode and possibly the superblock
+ * We also add a bit of slop for the transaction stuff.
+ */
STATIC uint
-xfs_calc_ichange_reservation(xfs_mount_t *mp)
+xfs_calc_ichange_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize +
+ 512;
+
}
+/*
+ * Growing the data section of the filesystem.
+ * superblock
+ * agi and agf
+ * allocation btrees
+ */
STATIC uint
-xfs_calc_growdata_reservation(xfs_mount_t *mp)
+xfs_calc_growdata_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_GROWDATA_LOG_RES(mp);
+ return mp->m_sb.sb_sectsize * 3 +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1));
}
+/*
+ * Growing the rt section of the filesystem.
+ * In the first set of transactions (ALLOC) we allocate space to the
+ * bitmap or summary files.
+ * superblock: sector size
+ * agf of the ag from which the extent is allocated: sector size
+ * bmap btree for bitmap/summary inode: max depth * blocksize
+ * bitmap/summary inode: inode size
+ * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
+ */
STATIC uint
-xfs_calc_growrtalloc_reservation(xfs_mount_t *mp)
+xfs_calc_growrtalloc_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_GROWRTALLOC_LOG_RES(mp);
+ return 2 * mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
+ mp->m_sb.sb_inodesize +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1));
}
+/*
+ * Growing the rt section of the filesystem.
+ * In the second set of transactions (ZERO) we zero the new metadata blocks.
+ * one bitmap/summary block: blocksize
+ */
STATIC uint
-xfs_calc_growrtzero_reservation(xfs_mount_t *mp)
+xfs_calc_growrtzero_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_GROWRTZERO_LOG_RES(mp);
+ return mp->m_sb.sb_blocksize + 128;
}
+/*
+ * Growing the rt section of the filesystem.
+ * In the third set of transactions (FREE) we update metadata without
+ * allocating any new blocks.
+ * superblock: sector size
+ * bitmap inode: inode size
+ * summary inode: inode size
+ * one bitmap block: blocksize
+ * summary blocks: new summary size
+ */
STATIC uint
-xfs_calc_growrtfree_reservation(xfs_mount_t *mp)
+xfs_calc_growrtfree_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_GROWRTFREE_LOG_RES(mp);
+ return mp->m_sb.sb_sectsize +
+ 2 * mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_blocksize +
+ mp->m_rsumsize +
+ 128 * 5;
}
+/*
+ * Logging the inode modification timestamp on a synchronous write.
+ * inode
+ */
STATIC uint
-xfs_calc_swrite_reservation(xfs_mount_t *mp)
+xfs_calc_swrite_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_SWRITE_LOG_RES(mp);
+ return mp->m_sb.sb_inodesize + 128;
}
+/*
+ * Logging the inode mode bits when writing a setuid/setgid file
+ * inode
+ */
STATIC uint
xfs_calc_writeid_reservation(xfs_mount_t *mp)
{
- return XFS_CALC_WRITEID_LOG_RES(mp);
+ return mp->m_sb.sb_inodesize + 128;
}
+/*
+ * Converting the inode from non-attributed to attributed.
+ * the inode being converted: inode size
+ * agf block and superblock (for block allocation)
+ * the new block (directory sized)
+ * bmap blocks for the new directory block
+ * allocation btrees
+ */
STATIC uint
-xfs_calc_addafork_reservation(xfs_mount_t *mp)
+xfs_calc_addafork_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize * 2 +
+ mp->m_dirblksize +
+ XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) +
+ XFS_ALLOCFREE_LOG_RES(mp, 1) +
+ 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 +
+ XFS_ALLOCFREE_LOG_COUNT(mp, 1));
}
+/*
+ * Removing the attribute fork of a file
+ * the inode being truncated: inode size
+ * the inode's bmap btree: max depth * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ * the agf for each of the ags: 4 * sector size
+ * the agfl for each of the ags: 4 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * worst case split in allocation btrees per extent assuming 4 extents:
+ * 4 exts * 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_attrinval_reservation(xfs_mount_t *mp)
+xfs_calc_attrinval_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ATTRINVAL_LOG_RES(mp);
+ return MAX((mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+ 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))),
+ (4 * mp->m_sb.sb_sectsize +
+ 4 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 4) +
+ 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
}
+/*
+ * Setting an attribute.
+ * the inode getting the attribute
+ * the superblock for allocations
+ * the agfs extents are allocated from
+ * the attribute btree * max depth
+ * the inode allocation btree
+ * Since attribute transaction space is dependent on the size of the attribute,
+ * the calculation is done partially at mount time and partially at runtime.
+ */
STATIC uint
-xfs_calc_attrset_reservation(xfs_mount_t *mp)
+xfs_calc_attrset_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ mp->m_sb.sb_inodesize +
+ mp->m_sb.sb_sectsize +
+ XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
+ 128 * (2 + XFS_DA_NODE_MAXDEPTH);
}
+/*
+ * Removing an attribute.
+ * the inode: inode size
+ * the attribute btree could join: max depth * block size
+ * the inode bmap btree could join or split: max depth * block size
+ * And the bmap_finish transaction can free the attr blocks freed giving:
+ * the agf for the ag in which the blocks live: 2 * sector size
+ * the agfl for the ag in which the blocks live: 2 * sector size
+ * the superblock for the free block count: sector size
+ * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
STATIC uint
-xfs_calc_attrrm_reservation(xfs_mount_t *mp)
+xfs_calc_attrrm_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+ return XFS_DQUOT_LOGRES(mp) +
+ MAX((mp->m_sb.sb_inodesize +
+ XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
+ XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+ 128 * (1 + XFS_DA_NODE_MAXDEPTH +
+ XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
+ (2 * mp->m_sb.sb_sectsize +
+ 2 * mp->m_sb.sb_sectsize +
+ mp->m_sb.sb_sectsize +
+ XFS_ALLOCFREE_LOG_RES(mp, 2) +
+ 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
}
+/*
+ * Clearing a bad agino number in an agi hash bucket.
+ */
STATIC uint
-xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
+xfs_calc_clear_agi_bucket_reservation(
+ struct xfs_mount *mp)
{
- return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp);
+ return mp->m_sb.sb_sectsize + 128;
}
/*
@@ -184,11 +539,10 @@ xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
*/
void
xfs_trans_init(
- xfs_mount_t *mp)
+ struct xfs_mount *mp)
{
- xfs_trans_reservations_t *resp;
+ struct xfs_trans_reservations *resp = &mp->m_reservations;
- resp = &(mp->m_reservations);
resp->tr_write = xfs_calc_write_reservation(mp);
resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
resp->tr_rename = xfs_calc_rename_reservation(mp);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 8c69e7824f68..e639e8e9a2a9 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -300,24 +300,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
/*
- * Various log reservation values.
- * These are based on the size of the file system block
- * because that is what most transactions manipulate.
- * Each adds in an additional 128 bytes per item logged to
- * try to account for the overhead of the transaction mechanism.
- *
- * Note:
- * Most of the reservations underestimate the number of allocation
- * groups into which they could free extents in the xfs_bmap_finish()
- * call. This is because the number in the worst case is quite high
- * and quite unusual. In order to fix this we need to change
- * xfs_bmap_finish() to free extents in only a single AG at a time.
- * This will require changes to the EFI code as well, however, so that
- * the EFI for the extents not freed is logged again in each transaction.
- * See bug 261917.
- */
-
-/*
* Per-extent log reservation for the allocation btree changes
* involved in freeing or allocating an extent.
* 2 trees * (2 blocks/level * max depth - 1) * block size
@@ -341,429 +323,36 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
(XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
-/*
- * In a write transaction we can allocate a maximum of 2
- * extents. This gives:
- * the inode getting the new extents: inode size
- * the inode's bmap btree: max depth * block size
- * the agfs of the ags from which the extents are allocated: 2 * sector
- * the superblock free block counter: sector size
- * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- * And the bmap_finish transaction can free bmap blocks in a join:
- * the agfs of the ags containing the blocks: 2 * sector size
- * the agfls of the ags containing the blocks: 2 * sector size
- * the super block free block counter: sector size
- * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_WRITE_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
- (2 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 2) + \
- (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\
- ((2 * (mp)->m_sb.sb_sectsize) + \
- (2 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 2) + \
- (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
#define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write)
-
-/*
- * In truncating a file we free up to two extents at once. We can modify:
- * the inode being truncated: inode size
- * the inode's bmap btree: (max depth + 1) * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- * the agf for each of the ags: 4 * sector size
- * the agfl for each of the ags: 4 * sector size
- * the super block to reflect the freed blocks: sector size
- * worst case split in allocation btrees per extent assuming 4 extents:
- * 4 exts * 2 trees * (2 * max depth - 1) * block size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define XFS_CALC_ITRUNCATE_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \
- (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
- ((4 * (mp)->m_sb.sb_sectsize) + \
- (4 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 4) + \
- (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \
- (128 * 5) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate)
-
-/*
- * In renaming a files we can modify:
- * the four inodes involved: 4 * inode size
- * the two directory btrees: 2 * (max depth + v2) * dir block size
- * the two directory bmap btrees: 2 * max depth * block size
- * And the bmap_finish transaction can free dir and bmap blocks (two sets
- * of bmap blocks) giving:
- * the agf for the ags in which the blocks live: 3 * sector size
- * the agfl for the ags in which the blocks live: 3 * sector size
- * the superblock for the free block count: sector size
- * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_RENAME_LOG_RES(mp) \
- (MAX( \
- ((4 * (mp)->m_sb.sb_inodesize) + \
- (2 * XFS_DIROP_LOG_RES(mp)) + \
- (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \
- ((3 * (mp)->m_sb.sb_sectsize) + \
- (3 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 3) + \
- (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))))))
-
#define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename)
-
-/*
- * For creating a link to an inode:
- * the parent directory inode: inode size
- * the linked inode: inode size
- * the directory btree could split: (max depth + v2) * dir block size
- * the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free some bmap blocks giving:
- * the agf for the ag in which the blocks live: sector size
- * the agfl for the ag in which the blocks live: sector size
- * the superblock for the free block count: sector size
- * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_LINK_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_inodesize + \
- XFS_DIROP_LOG_RES(mp) + \
- (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
- ((mp)->m_sb.sb_sectsize + \
- (mp)->m_sb.sb_sectsize + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
#define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link)
-
-/*
- * For removing a directory entry we can modify:
- * the parent directory inode: inode size
- * the removed inode: inode size
- * the directory btree could join: (max depth + v2) * dir block size
- * the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free the dir and bmap blocks giving:
- * the agf for the ag in which the blocks live: 2 * sector size
- * the agfl for the ag in which the blocks live: 2 * sector size
- * the superblock for the free block count: sector size
- * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_REMOVE_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_inodesize + \
- XFS_DIROP_LOG_RES(mp) + \
- (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
- ((2 * (mp)->m_sb.sb_sectsize) + \
- (2 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 2) + \
- (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
-
#define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove)
-
-/*
- * For symlink we can modify:
- * the parent directory inode: inode size
- * the new inode: inode size
- * the inode btree entry: 1 block
- * the directory btree: (max depth + v2) * dir block size
- * the directory inode's bmap btree: (max depth + v2) * block size
- * the blocks for the symlink: 1 kB
- * Or in the first xact we allocate some inodes giving:
- * the agi and agf of the ag getting the new inodes: 2 * sectorsize
- * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_SYMLINK_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B(mp, 1) + \
- XFS_DIROP_LOG_RES(mp) + \
- 1024 + \
- (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
- (2 * (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
- XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink)
-
-/*
- * For create we can modify:
- * the parent directory inode: inode size
- * the new inode: inode size
- * the inode btree entry: block size
- * the superblock for the nlink flag: sector size
- * the directory btree: (max depth + v2) * dir block size
- * the directory inode's bmap btree: (max depth + v2) * block size
- * Or in the first xact we allocate some inodes giving:
- * the agi and agf of the ag getting the new inodes: 2 * sectorsize
- * the superblock for the nlink flag: sector size
- * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define XFS_CALC_CREATE_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B(mp, 1) + \
- XFS_DIROP_LOG_RES(mp) + \
- (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
- (3 * (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
- XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create)
-
-/*
- * Making a new directory is the same as creating a new file.
- */
-#define XFS_CALC_MKDIR_LOG_RES(mp) XFS_CALC_CREATE_LOG_RES(mp)
-
#define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir)
-
-/*
- * In freeing an inode we can modify:
- * the inode being freed: inode size
- * the super block free inode counter: sector size
- * the agi hash list and counters: sector size
- * the inode btree entry: block size
- * the on disk inode before ours in the agi hash list: inode cluster size
- * the inode btree: max depth * blocksize
- * the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define XFS_CALC_IFREE_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize + \
- (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), 1) + \
- MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
- (128 * 5) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
-
#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree)
-
-/*
- * When only changing the inode we log the inode and possibly the superblock
- * We also add a bit of slop for the transaction stuff.
- */
-#define XFS_CALC_ICHANGE_LOG_RES(mp) ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize + 512)
-
#define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange)
-
-/*
- * Growing the data section of the filesystem.
- * superblock
- * agi and agf
- * allocation btrees
- */
-#define XFS_CALC_GROWDATA_LOG_RES(mp) \
- ((mp)->m_sb.sb_sectsize * 3 + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
#define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata)
-
-/*
- * Growing the rt section of the filesystem.
- * In the first set of transactions (ALLOC) we allocate space to the
- * bitmap or summary files.
- * superblock: sector size
- * agf of the ag from which the extent is allocated: sector size
- * bmap btree for bitmap/summary inode: max depth * blocksize
- * bitmap/summary inode: inode size
- * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
- */
-#define XFS_CALC_GROWRTALLOC_LOG_RES(mp) \
- (2 * (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
- (mp)->m_sb.sb_inodesize + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * \
- (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
#define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc)
-
-/*
- * Growing the rt section of the filesystem.
- * In the second set of transactions (ZERO) we zero the new metadata blocks.
- * one bitmap/summary block: blocksize
- */
-#define XFS_CALC_GROWRTZERO_LOG_RES(mp) \
- ((mp)->m_sb.sb_blocksize + 128)
-
#define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero)
-
-/*
- * Growing the rt section of the filesystem.
- * In the third set of transactions (FREE) we update metadata without
- * allocating any new blocks.
- * superblock: sector size
- * bitmap inode: inode size
- * summary inode: inode size
- * one bitmap block: blocksize
- * summary blocks: new summary size
- */
-#define XFS_CALC_GROWRTFREE_LOG_RES(mp) \
- ((mp)->m_sb.sb_sectsize + \
- 2 * (mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_blocksize + \
- (mp)->m_rsumsize + \
- (128 * 5))
-
#define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree)
-
-/*
- * Logging the inode modification timestamp on a synchronous write.
- * inode
- */
-#define XFS_CALC_SWRITE_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + 128)
-
#define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
-
/*
* Logging the inode timestamps on an fsync -- same as SWRITE
* as long as SWRITE logs the entire inode core
*/
#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
-
-/*
- * Logging the inode mode bits when writing a setuid/setgid file
- * inode
- */
-#define XFS_CALC_WRITEID_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + 128)
-
#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
-
-/*
- * Converting the inode from non-attributed to attributed.
- * the inode being converted: inode size
- * agf block and superblock (for block allocation)
- * the new block (directory sized)
- * bmap blocks for the new directory block
- * allocation btrees
- */
-#define XFS_CALC_ADDAFORK_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize * 2 + \
- (mp)->m_dirblksize + \
- XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \
- XFS_ALLOCFREE_LOG_RES(mp, 1) + \
- (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
- XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
-
-/*
- * Removing the attribute fork of a file
- * the inode being truncated: inode size
- * the inode's bmap btree: max depth * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- * the agf for each of the ags: 4 * sector size
- * the agfl for each of the ags: 4 * sector size
- * the super block to reflect the freed blocks: sector size
- * worst case split in allocation btrees per extent assuming 4 extents:
- * 4 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_ATTRINVAL_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
- (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \
- ((4 * (mp)->m_sb.sb_sectsize) + \
- (4 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 4) + \
- (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
-
#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval)
-
-/*
- * Setting an attribute.
- * the inode getting the attribute
- * the superblock for allocations
- * the agfs extents are allocated from
- * the attribute btree * max depth
- * the inode allocation btree
- * Since attribute transaction space is dependent on the size of the attribute,
- * the calculation is done partially at mount time and partially at runtime.
- */
-#define XFS_CALC_ATTRSET_LOG_RES(mp) \
- ((mp)->m_sb.sb_inodesize + \
- (mp)->m_sb.sb_sectsize + \
- XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
- (128 * (2 + XFS_DA_NODE_MAXDEPTH)))
-
#define XFS_ATTRSET_LOG_RES(mp, ext) \
((mp)->m_reservations.tr_attrset + \
(ext * (mp)->m_sb.sb_sectsize) + \
(ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
(128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
-
-/*
- * Removing an attribute.
- * the inode: inode size
- * the attribute btree could join: max depth * block size
- * the inode bmap btree could join or split: max depth * block size
- * And the bmap_finish transaction can free the attr blocks freed giving:
- * the agf for the ag in which the blocks live: 2 * sector size
- * the agfl for the ag in which the blocks live: 2 * sector size
- * the superblock for the free block count: sector size
- * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_ATTRRM_LOG_RES(mp) \
- (MAX( \
- ((mp)->m_sb.sb_inodesize + \
- XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
- XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
- (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
- ((2 * (mp)->m_sb.sb_sectsize) + \
- (2 * (mp)->m_sb.sb_sectsize) + \
- (mp)->m_sb.sb_sectsize + \
- XFS_ALLOCFREE_LOG_RES(mp, 2) + \
- (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
-
#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
-
-/*
- * Clearing a bad agino number in an agi hash bucket.
- */
-#define XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \
- ((mp)->m_sb.sb_sectsize + 128)
-
#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi)
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 785ff101da0a..2559dfec946b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -62,7 +62,7 @@ xfs_trans_iget(
{
int error;
- error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0);
+ error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
if (!error && tp)
xfs_trans_ijoin(tp, *ipp, lock_flags);
return error;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 9d376be0ea38..c1646838898f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -267,7 +267,7 @@ xfs_setattr(
if (code) {
ASSERT(tp == NULL);
lock_flags &= ~XFS_ILOCK_EXCL;
- ASSERT(lock_flags == XFS_IOLOCK_EXCL);
+ ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
goto error_return;
}
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
@@ -1269,7 +1269,7 @@ xfs_lookup(
if (error)
goto out;
- error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
+ error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
if (error)
goto out_free_name;